mirror of
https://git.openafs.org/openafs.git
synced 2025-01-22 08:50:17 +00:00
rx mtu discovery constrainment code
this code makes us work in the face of a sub1500 mtu network. a subsequent commit is needed to make it more effective: attempts to grow the mtu must be scheduled so we aren't forced to heavily brute-force on failure to discover the exact mtu immediately; for performance, we do want to grow to the real mtu. Change-Id: If876c134efd4b0952035193ef00a6e780f7adc64 Reviewed-on: http://gerrit.openafs.org/2069 Reviewed-by: Derrick Brashear <shadow@dementia.org> Tested-by: Derrick Brashear <shadow@dementia.org> Tested-by: Jeffrey Altman <jaltman@openafs.org> Reviewed-by: Jeffrey Altman <jaltman@openafs.org>
This commit is contained in:
parent
8be3eee95e
commit
67e8373746
71
src/rx/rx.c
71
src/rx/rx.c
@ -2493,6 +2493,14 @@ rxi_SetPeerMtu(struct rx_peer *peer, afs_uint32 host, afs_uint32 port, int mtu)
|
|||||||
mtu = MAX(mtu, RX_MIN_PACKET_SIZE);
|
mtu = MAX(mtu, RX_MIN_PACKET_SIZE);
|
||||||
peer->ifMTU=MIN(mtu, peer->ifMTU);
|
peer->ifMTU=MIN(mtu, peer->ifMTU);
|
||||||
peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
|
peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
|
||||||
|
/* if we tweaked this down, need to tune our peer MTU too */
|
||||||
|
peer->MTU = MIN(peer->MTU, peer->natMTU);
|
||||||
|
/* if we discovered a sub-1500 mtu, degrade */
|
||||||
|
if (peer->ifMTU < OLD_MAX_PACKET_SIZE)
|
||||||
|
peer->maxDgramPackets = 1;
|
||||||
|
/* We no longer have valid peer packet information */
|
||||||
|
if (peer->maxPacketSize-RX_IPUDP_SIZE > peer->ifMTU)
|
||||||
|
peer->maxPacketSize = 0;
|
||||||
MUTEX_EXIT(&peer->peer_lock);
|
MUTEX_EXIT(&peer->peer_lock);
|
||||||
|
|
||||||
MUTEX_ENTER(&rx_peerHashTable_lock);
|
MUTEX_ENTER(&rx_peerHashTable_lock);
|
||||||
@ -3810,6 +3818,7 @@ rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np,
|
|||||||
int newAckCount = 0;
|
int newAckCount = 0;
|
||||||
u_short maxMTU = 0; /* Set if peer supports AFS 3.4a jumbo datagrams */
|
u_short maxMTU = 0; /* Set if peer supports AFS 3.4a jumbo datagrams */
|
||||||
int maxDgramPackets = 0; /* Set if peer supports AFS 3.5 jumbo datagrams */
|
int maxDgramPackets = 0; /* Set if peer supports AFS 3.5 jumbo datagrams */
|
||||||
|
int pktsize = 0; /* Set if we need to update the peer mtu */
|
||||||
|
|
||||||
if (rx_stats_active)
|
if (rx_stats_active)
|
||||||
rx_MutexIncrement(rx_stats.ackPacketsRead, rx_stats_mutex);
|
rx_MutexIncrement(rx_stats.ackPacketsRead, rx_stats_mutex);
|
||||||
@ -3837,6 +3846,28 @@ rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np,
|
|||||||
if (ap->reason == RX_ACK_PING_RESPONSE)
|
if (ap->reason == RX_ACK_PING_RESPONSE)
|
||||||
rxi_UpdatePeerReach(conn, call);
|
rxi_UpdatePeerReach(conn, call);
|
||||||
|
|
||||||
|
if (conn->lastPacketSizeSeq) {
|
||||||
|
MUTEX_ENTER(&conn->conn_data_lock);
|
||||||
|
if (first >= conn->lastPacketSizeSeq) {
|
||||||
|
pktsize = conn->lastPacketSize;
|
||||||
|
conn->lastPacketSize = conn->lastPacketSizeSeq = 0;
|
||||||
|
}
|
||||||
|
MUTEX_EXIT(&conn->conn_data_lock);
|
||||||
|
MUTEX_ENTER(&peer->peer_lock);
|
||||||
|
/* start somewhere */
|
||||||
|
if (!peer->maxPacketSize)
|
||||||
|
peer->maxPacketSize = np->length+RX_IPUDP_SIZE;
|
||||||
|
|
||||||
|
if (pktsize > peer->maxPacketSize) {
|
||||||
|
peer->maxPacketSize = pktsize;
|
||||||
|
if ((pktsize-RX_IPUDP_SIZE > peer->ifMTU)) {
|
||||||
|
peer->ifMTU=pktsize-RX_IPUDP_SIZE;
|
||||||
|
peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MUTEX_EXIT(&peer->peer_lock);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef RXDEBUG
|
#ifdef RXDEBUG
|
||||||
#ifdef AFS_NT40_ENV
|
#ifdef AFS_NT40_ENV
|
||||||
if (rxdebug_active) {
|
if (rxdebug_active) {
|
||||||
@ -4234,8 +4265,13 @@ rxi_ReceiveAckPacket(struct rx_call *call, struct rx_packet *np,
|
|||||||
}
|
}
|
||||||
call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
|
call->MTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE;
|
||||||
} else if (call->MTU < peer->maxMTU) {
|
} else if (call->MTU < peer->maxMTU) {
|
||||||
call->MTU += peer->natMTU;
|
/* don't upgrade if we can't handle it */
|
||||||
call->MTU = MIN(call->MTU, peer->maxMTU);
|
if ((call->nDgramPackets == 1) && (call->MTU >= peer->ifMTU))
|
||||||
|
call->MTU = peer->ifMTU;
|
||||||
|
else {
|
||||||
|
call->MTU += peer->natMTU;
|
||||||
|
call->MTU = MIN(call->MTU, peer->maxMTU);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
call->nAcks = 0;
|
call->nAcks = 0;
|
||||||
}
|
}
|
||||||
@ -5772,6 +5808,8 @@ rxi_CheckCall(struct rx_call *call)
|
|||||||
struct rx_connection *conn = call->conn;
|
struct rx_connection *conn = call->conn;
|
||||||
afs_uint32 now;
|
afs_uint32 now;
|
||||||
afs_uint32 deadTime;
|
afs_uint32 deadTime;
|
||||||
|
int cerror = 0;
|
||||||
|
int newmtu = 0;
|
||||||
|
|
||||||
#ifdef AFS_GLOBAL_RXLOCK_KERNEL
|
#ifdef AFS_GLOBAL_RXLOCK_KERNEL
|
||||||
if (call->flags & RX_CALL_TQ_BUSY) {
|
if (call->flags & RX_CALL_TQ_BUSY) {
|
||||||
@ -5819,8 +5857,8 @@ rxi_CheckCall(struct rx_call *call)
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#endif /* ADAPT_PMTU */
|
#endif /* ADAPT_PMTU */
|
||||||
rxi_CallError(call, RX_CALL_DEAD);
|
cerror = RX_CALL_DEAD;
|
||||||
return -1;
|
goto mtuout;
|
||||||
} else {
|
} else {
|
||||||
#ifdef RX_ENABLE_LOCKS
|
#ifdef RX_ENABLE_LOCKS
|
||||||
/* Cancel pending events */
|
/* Cancel pending events */
|
||||||
@ -5867,6 +5905,31 @@ rxi_CheckCall(struct rx_call *call)
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
mtuout:
|
||||||
|
if (call->conn->msgsizeRetryErr && cerror != RX_CALL_TIMEOUT) {
|
||||||
|
/* if we never succeeded, let the error pass out as-is */
|
||||||
|
if (call->conn->peer->maxPacketSize)
|
||||||
|
cerror = call->conn->msgsizeRetryErr;
|
||||||
|
|
||||||
|
/* if we thought we could send more, perhaps things got worse */
|
||||||
|
if (call->conn->peer->maxPacketSize > conn->lastPacketSize)
|
||||||
|
/* maxpacketsize will be cleared in rxi_SetPeerMtu */
|
||||||
|
newmtu = MAX(call->conn->peer->maxPacketSize-RX_IPUDP_SIZE,
|
||||||
|
conn->lastPacketSize-(128+RX_IPUDP_SIZE));
|
||||||
|
else
|
||||||
|
newmtu = conn->lastPacketSize-(128+RX_IPUDP_SIZE);
|
||||||
|
|
||||||
|
/* minimum capped in SetPeerMtu */
|
||||||
|
rxi_SetPeerMtu(call->conn->peer, 0, 0, newmtu);
|
||||||
|
|
||||||
|
/* clean up */
|
||||||
|
conn->lastPacketSize = 0;
|
||||||
|
|
||||||
|
/* needed so ResetCall doesn't clobber us. */
|
||||||
|
call->MTU = call->conn->peer->ifMTU;
|
||||||
|
}
|
||||||
|
rxi_CallError(call, cerror);
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
10
src/rx/rx.h
10
src/rx/rx.h
@ -175,6 +175,9 @@ int ntoh_syserr_conv(int error);
|
|||||||
/* Set the overload threshold and the overload error */
|
/* Set the overload threshold and the overload error */
|
||||||
#define rx_SetBusyThreshold(threshold, code) (rx_BusyThreshold=(threshold),rx_BusyError=(code))
|
#define rx_SetBusyThreshold(threshold, code) (rx_BusyThreshold=(threshold),rx_BusyError=(code))
|
||||||
|
|
||||||
|
/* Set the error to use for retrying a connection during MTU tuning */
|
||||||
|
#define rx_SetMsgsizeRetryErr(conn, err) ((conn)->msgsizeRetryErr = (err))
|
||||||
|
|
||||||
/* If this flag is set,no new requests are processed by rx, all new requests are
|
/* If this flag is set,no new requests are processed by rx, all new requests are
|
||||||
returned with an error code of RX_CALL_DEAD ( transient error ) */
|
returned with an error code of RX_CALL_DEAD ( transient error ) */
|
||||||
#define rx_SetRxTranquil() (rx_tranquil = 1)
|
#define rx_SetRxTranquil() (rx_tranquil = 1)
|
||||||
@ -244,8 +247,9 @@ struct rx_connection {
|
|||||||
afs_uint32 serial; /* Next outgoing packet serial number */
|
afs_uint32 serial; /* Next outgoing packet serial number */
|
||||||
afs_uint32 lastSerial; /* # of last packet received, for computing skew */
|
afs_uint32 lastSerial; /* # of last packet received, for computing skew */
|
||||||
afs_int32 maxSerial; /* largest serial number seen on incoming packets */
|
afs_int32 maxSerial; /* largest serial number seen on incoming packets */
|
||||||
/* afs_int32 maxPacketSize; max packet size should be per-connection since */
|
afs_int32 lastPacketSize; /* last >max attempt */
|
||||||
/* peer process could be restarted on us. Includes RX Header. */
|
afs_int32 lastPacketSizeSeq; /* seq number of attempt */
|
||||||
|
|
||||||
struct rxevent *challengeEvent; /* Scheduled when the server is challenging a */
|
struct rxevent *challengeEvent; /* Scheduled when the server is challenging a */
|
||||||
struct rxevent *delayedAbortEvent; /* Scheduled to throttle looping client */
|
struct rxevent *delayedAbortEvent; /* Scheduled to throttle looping client */
|
||||||
struct rxevent *checkReachEvent; /* Scheduled when checking reachability */
|
struct rxevent *checkReachEvent; /* Scheduled when checking reachability */
|
||||||
@ -274,6 +278,7 @@ struct rx_connection {
|
|||||||
afs_int32 idleDeadErr;
|
afs_int32 idleDeadErr;
|
||||||
afs_int32 secondsUntilNatPing; /* how often to ping conn */
|
afs_int32 secondsUntilNatPing; /* how often to ping conn */
|
||||||
struct rxevent *natKeepAliveEvent; /* Scheduled to keep connection open */
|
struct rxevent *natKeepAliveEvent; /* Scheduled to keep connection open */
|
||||||
|
afs_int32 msgsizeRetryErr;
|
||||||
int nSpecific; /* number entries in specific data */
|
int nSpecific; /* number entries in specific data */
|
||||||
void **specific; /* pointer to connection specific data */
|
void **specific; /* pointer to connection specific data */
|
||||||
};
|
};
|
||||||
@ -419,6 +424,7 @@ struct rx_peer {
|
|||||||
afs_hyper_t bytesReceived; /* Number of bytes received from this peer */
|
afs_hyper_t bytesReceived; /* Number of bytes received from this peer */
|
||||||
struct rx_queue rpcStats; /* rpc statistic list */
|
struct rx_queue rpcStats; /* rpc statistic list */
|
||||||
int lastReachTime; /* Last time we verified reachability */
|
int lastReachTime; /* Last time we verified reachability */
|
||||||
|
afs_int32 maxPacketSize; /* peer packetsize hint */
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef KDUMP_RX_LOCK
|
#ifndef KDUMP_RX_LOCK
|
||||||
|
@ -2217,6 +2217,10 @@ rxi_SendPacket(struct rx_call *call, struct rx_connection *conn,
|
|||||||
* serial number means the packet was never sent. */
|
* serial number means the packet was never sent. */
|
||||||
MUTEX_ENTER(&conn->conn_data_lock);
|
MUTEX_ENTER(&conn->conn_data_lock);
|
||||||
p->header.serial = ++conn->serial;
|
p->header.serial = ++conn->serial;
|
||||||
|
if (p->length > conn->peer->maxPacketSize) {
|
||||||
|
conn->lastPacketSize = p->length;
|
||||||
|
conn->lastPacketSizeSeq = p->header.seq;
|
||||||
|
}
|
||||||
MUTEX_EXIT(&conn->conn_data_lock);
|
MUTEX_EXIT(&conn->conn_data_lock);
|
||||||
/* This is so we can adjust retransmit time-outs better in the face of
|
/* This is so we can adjust retransmit time-outs better in the face of
|
||||||
* rapidly changing round-trip times. RTO estimation is not a la Karn.
|
* rapidly changing round-trip times. RTO estimation is not a la Karn.
|
||||||
@ -2366,6 +2370,14 @@ rxi_SendPacketList(struct rx_call *call, struct rx_connection *conn,
|
|||||||
MUTEX_ENTER(&conn->conn_data_lock);
|
MUTEX_ENTER(&conn->conn_data_lock);
|
||||||
serial = conn->serial;
|
serial = conn->serial;
|
||||||
conn->serial += len;
|
conn->serial += len;
|
||||||
|
for (i = 0; i < len; i++) {
|
||||||
|
p = list[i];
|
||||||
|
if ((p->length > conn->peer->maxPacketSize) &&
|
||||||
|
(p->length > conn->lastPacketSize)) {
|
||||||
|
conn->lastPacketSize = p->length;
|
||||||
|
conn->lastPacketSizeSeq = p->header.seq;
|
||||||
|
}
|
||||||
|
}
|
||||||
MUTEX_EXIT(&conn->conn_data_lock);
|
MUTEX_EXIT(&conn->conn_data_lock);
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user