rx mtu discovery constrainment code

this code makes us work in the face of a sub1500 mtu network.
a subsequent commit is needed to make it more effective: attempts
to grow the mtu must be scheduled so we aren't forced to heavily
brute-force on failure to discover the exact mtu immediately;
for performance, we do want to grow to the real mtu.

Change-Id: If876c134efd4b0952035193ef00a6e780f7adc64
Reviewed-on: http://gerrit.openafs.org/2069
Reviewed-by: Derrick Brashear <shadow@dementia.org>
Tested-by: Derrick Brashear <shadow@dementia.org>
Tested-by: Jeffrey Altman <jaltman@openafs.org>
Reviewed-by: Jeffrey Altman <jaltman@openafs.org>
This commit is contained in:
Derrick Brashear 2010-06-01 13:10:32 -04:00 committed by Jeffrey Altman
parent 8be3eee95e
commit 67e8373746
3 changed files with 87 additions and 6 deletions

View File

@ -2493,6 +2493,14 @@ rxi_SetPeerMtu(struct rx_peer *peer, afs_uint32 host, afs_uint32 port, int mtu)
mtu = MAX(mtu, RX_MIN_PACKET_SIZE); mtu = MAX(mtu, RX_MIN_PACKET_SIZE);
peer->ifMTU=MIN(mtu, peer->ifMTU); peer->ifMTU=MIN(mtu, peer->ifMTU);
peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU); peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
/* if we tweaked this down, need to tune our peer MTU too */
peer->MTU = MIN(peer->MTU, peer->natMTU);
/* if we discovered a sub-1500 mtu, degrade */
if (peer->ifMTU < OLD_MAX_PACKET_SIZE)
peer->maxDgramPackets = 1;
/* We no longer have valid peer packet information */
if (peer->maxPacketSize-RX_IPUDP_SIZE > peer->ifMTU)
peer->maxPacketSize = 0;
MUTEX_EXIT(&peer->peer_lock); MUTEX_EXIT(&peer->peer_lock);
MUTEX_ENTER(&rx_peerHashTable_lock); MUTEX_ENTER(&rx_peerHashTable_lock);
@ -3810,6 +3818,7 @@ rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np,
int newAckCount = 0; int newAckCount = 0;
u_short maxMTU = 0; /* Set if peer supports AFS 3.4a jumbo datagrams */ u_short maxMTU = 0; /* Set if peer supports AFS 3.4a jumbo datagrams */
int maxDgramPackets = 0; /* Set if peer supports AFS 3.5 jumbo datagrams */ int maxDgramPackets = 0; /* Set if peer supports AFS 3.5 jumbo datagrams */
int pktsize = 0; /* Set if we need to update the peer mtu */
if (rx_stats_active) if (rx_stats_active)
rx_MutexIncrement(rx_stats.ackPacketsRead, rx_stats_mutex); rx_MutexIncrement(rx_stats.ackPacketsRead, rx_stats_mutex);
@ -3837,6 +3846,28 @@ rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np,
if (ap->reason == RX_ACK_PING_RESPONSE) if (ap->reason == RX_ACK_PING_RESPONSE)
rxi_UpdatePeerReach(conn, call); rxi_UpdatePeerReach(conn, call);
if (conn->lastPacketSizeSeq) {
MUTEX_ENTER(&conn->conn_data_lock);
if (first >= conn->lastPacketSizeSeq) {
pktsize = conn->lastPacketSize;
conn->lastPacketSize = conn->lastPacketSizeSeq = 0;
}
MUTEX_EXIT(&conn->conn_data_lock);
MUTEX_ENTER(&peer->peer_lock);
/* start somewhere */
if (!peer->maxPacketSize)
peer->maxPacketSize = np->length+RX_IPUDP_SIZE;
if (pktsize > peer->maxPacketSize) {
peer->maxPacketSize = pktsize;
if ((pktsize-RX_IPUDP_SIZE > peer->ifMTU)) {
peer->ifMTU=pktsize-RX_IPUDP_SIZE;
peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
}
}
MUTEX_EXIT(&peer->peer_lock);
}
#ifdef RXDEBUG #ifdef RXDEBUG
#ifdef AFS_NT40_ENV #ifdef AFS_NT40_ENV
if (rxdebug_active) { if (rxdebug_active) {
@ -4234,8 +4265,13 @@ rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np,
} }
call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE; call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
} else if (call->MTU < peer->maxMTU) { } else if (call->MTU < peer->maxMTU) {
call->MTU += peer->natMTU; /* don't upgrade if we can't handle it */
call->MTU = MIN(call->MTU, peer->maxMTU); if ((call->nDgramPackets == 1) && (call->MTU >= peer->ifMTU))
call->MTU = peer->ifMTU;
else {
call->MTU += peer->natMTU;
call->MTU = MIN(call->MTU, peer->maxMTU);
}
} }
call->nAcks = 0; call->nAcks = 0;
} }
@ -5772,6 +5808,8 @@ rxi_CheckCall(struct rx_call *call)
struct rx_connection *conn = call->conn; struct rx_connection *conn = call->conn;
afs_uint32 now; afs_uint32 now;
afs_uint32 deadTime; afs_uint32 deadTime;
int cerror = 0;
int newmtu = 0;
#ifdef AFS_GLOBAL_RXLOCK_KERNEL #ifdef AFS_GLOBAL_RXLOCK_KERNEL
if (call->flags & RX_CALL_TQ_BUSY) { if (call->flags & RX_CALL_TQ_BUSY) {
@ -5819,8 +5857,8 @@ rxi_CheckCall(struct rx_call *call)
#endif #endif
#endif #endif
#endif /* ADAPT_PMTU */ #endif /* ADAPT_PMTU */
rxi_CallError(call, RX_CALL_DEAD); cerror = RX_CALL_DEAD;
return -1; goto mtuout;
} else { } else {
#ifdef RX_ENABLE_LOCKS #ifdef RX_ENABLE_LOCKS
/* Cancel pending events */ /* Cancel pending events */
@ -5867,6 +5905,31 @@ rxi_CheckCall(struct rx_call *call)
return -1; return -1;
} }
return 0; return 0;
mtuout:
if (call->conn->msgsizeRetryErr && cerror != RX_CALL_TIMEOUT) {
/* if we never succeeded, let the error pass out as-is */
if (call->conn->peer->maxPacketSize)
cerror = call->conn->msgsizeRetryErr;
/* if we thought we could send more, perhaps things got worse */
if (call->conn->peer->maxPacketSize > conn->lastPacketSize)
/* maxpacketsize will be cleared in rxi_SetPeerMtu */
newmtu = MAX(call->conn->peer->maxPacketSize-RX_IPUDP_SIZE,
conn->lastPacketSize-(128+RX_IPUDP_SIZE));
else
newmtu = conn->lastPacketSize-(128+RX_IPUDP_SIZE);
/* minimum capped in SetPeerMtu */
rxi_SetPeerMtu(call->conn->peer, 0, 0, newmtu);
/* clean up */
conn->lastPacketSize = 0;
/* needed so ResetCall doesn't clobber us. */
call->MTU = call->conn->peer->ifMTU;
}
rxi_CallError(call, cerror);
return -1;
} }
void void

View File

@ -175,6 +175,9 @@ int ntoh_syserr_conv(int error);
/* Set the overload threshold and the overload error */ /* Set the overload threshold and the overload error */
#define rx_SetBusyThreshold(threshold, code) (rx_BusyThreshold=(threshold),rx_BusyError=(code)) #define rx_SetBusyThreshold(threshold, code) (rx_BusyThreshold=(threshold),rx_BusyError=(code))
/* Set the error to use for retrying a connection during MTU tuning */
#define rx_SetMsgsizeRetryErr(conn, err) ((conn)->msgsizeRetryErr = (err))
/* If this flag is set,no new requests are processed by rx, all new requests are /* If this flag is set,no new requests are processed by rx, all new requests are
returned with an error code of RX_CALL_DEAD ( transient error ) */ returned with an error code of RX_CALL_DEAD ( transient error ) */
#define rx_SetRxTranquil() (rx_tranquil = 1) #define rx_SetRxTranquil() (rx_tranquil = 1)
@ -244,8 +247,9 @@ struct rx_connection {
afs_uint32 serial; /* Next outgoing packet serial number */ afs_uint32 serial; /* Next outgoing packet serial number */
afs_uint32 lastSerial; /* # of last packet received, for computing skew */ afs_uint32 lastSerial; /* # of last packet received, for computing skew */
afs_int32 maxSerial; /* largest serial number seen on incoming packets */ afs_int32 maxSerial; /* largest serial number seen on incoming packets */
/* afs_int32 maxPacketSize; max packet size should be per-connection since */ afs_int32 lastPacketSize; /* last >max attempt */
/* peer process could be restarted on us. Includes RX Header. */ afs_int32 lastPacketSizeSeq; /* seq number of attempt */
struct rxevent *challengeEvent; /* Scheduled when the server is challenging a */ struct rxevent *challengeEvent; /* Scheduled when the server is challenging a */
struct rxevent *delayedAbortEvent; /* Scheduled to throttle looping client */ struct rxevent *delayedAbortEvent; /* Scheduled to throttle looping client */
struct rxevent *checkReachEvent; /* Scheduled when checking reachability */ struct rxevent *checkReachEvent; /* Scheduled when checking reachability */
@ -274,6 +278,7 @@ struct rx_connection {
afs_int32 idleDeadErr; afs_int32 idleDeadErr;
afs_int32 secondsUntilNatPing; /* how often to ping conn */ afs_int32 secondsUntilNatPing; /* how often to ping conn */
struct rxevent *natKeepAliveEvent; /* Scheduled to keep connection open */ struct rxevent *natKeepAliveEvent; /* Scheduled to keep connection open */
afs_int32 msgsizeRetryErr;
int nSpecific; /* number entries in specific data */ int nSpecific; /* number entries in specific data */
void **specific; /* pointer to connection specific data */ void **specific; /* pointer to connection specific data */
}; };
@ -419,6 +424,7 @@ struct rx_peer {
afs_hyper_t bytesReceived; /* Number of bytes received from this peer */ afs_hyper_t bytesReceived; /* Number of bytes received from this peer */
struct rx_queue rpcStats; /* rpc statistic list */ struct rx_queue rpcStats; /* rpc statistic list */
int lastReachTime; /* Last time we verified reachability */ int lastReachTime; /* Last time we verified reachability */
afs_int32 maxPacketSize; /* peer packetsize hint */
}; };
#ifndef KDUMP_RX_LOCK #ifndef KDUMP_RX_LOCK

View File

@ -2217,6 +2217,10 @@ rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
* serial number means the packet was never sent. */ * serial number means the packet was never sent. */
MUTEX_ENTER(&conn->conn_data_lock); MUTEX_ENTER(&conn->conn_data_lock);
p->header.serial = ++conn->serial; p->header.serial = ++conn->serial;
if (p->length > conn->peer->maxPacketSize) {
conn->lastPacketSize = p->length;
conn->lastPacketSizeSeq = p->header.seq;
}
MUTEX_EXIT(&conn->conn_data_lock); MUTEX_EXIT(&conn->conn_data_lock);
/* This is so we can adjust retransmit time-outs better in the face of /* This is so we can adjust retransmit time-outs better in the face of
* rapidly changing round-trip times. RTO estimation is not a la Karn. * rapidly changing round-trip times. RTO estimation is not a la Karn.
@ -2366,6 +2370,14 @@ rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
MUTEX_ENTER(&conn->conn_data_lock); MUTEX_ENTER(&conn->conn_data_lock);
serial = conn->serial; serial = conn->serial;
conn->serial += len; conn->serial += len;
for (i = 0; i < len; i++) {
p = list[i];
if ((p->length > conn->peer->maxPacketSize) &&
(p->length > conn->lastPacketSize)) {
conn->lastPacketSize = p->length;
conn->lastPacketSizeSeq = p->header.seq;
}
}
MUTEX_EXIT(&conn->conn_data_lock); MUTEX_EXIT(&conn->conn_data_lock);