pmtu-and-prefetch-20080520

LICENSE IPL10

try hard to optimize using the wire for high latency connections
This commit is contained in:
Derrick Brashear 2008-05-21 05:22:13 +00:00
parent c761f24839
commit 1206e7538b
18 changed files with 516 additions and 67 deletions

View File

@ -37,6 +37,8 @@ AC_ARG_ENABLE( unix-sockets,
[ --enable-unix-sockets enable use of unix domain sockets for fssync],, enable_unix_sockets="yes")
AC_ARG_ENABLE( full-vos-listvol-switch,
[ --disable-full-vos-listvol-switch disable vos full listvol switch for formatted output],, enable_full_vos_listvol_switch="yes")
AC_ARG_ENABLE( icmp-pmtu-discovery,
[ --enable-icmp-pmtu-discovery enable path MTU discovery by decoding ICMP unreachable replies],, enable_icmp_pmtu_discovery="no")
AC_ARG_WITH(dux-kernel-headers,
[ --with-dux-kernel-headers=path use the kernel headers found at path(optional, defaults to first match in /usr/sys)]
)
@ -1069,6 +1071,19 @@ else
fi
AC_CACHE_VAL(ac_cv_setsockopt_iprecverr,
[
AC_MSG_CHECKING([for setsockopt(, SOL_IP, IP_RECVERR)])
AC_TRY_COMPILE( [#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>],
[int on=1;
setsockopt(0, SOL_IP, IP_RECVERR, &on, sizeof(on));], ac_cv_setsockopt_iprecverr=yes, ac_cv_setsockopt_iprecverr=no)
AC_MSG_RESULT($ac_cv_setsockopt_iprecverr)])
if test "$ac_cv_setsockopt_iprecverr" = "yes"; then
AC_DEFINE(ADAPT_PMTU_RECVERR, 1, [define if asynchronous socket errors can be received])
fi
PTHREAD_LIBS=error
if test "x$MKAFS_OSTYPE" = OBSD; then
PTHREAD_LIBS="-pthread"
@ -1158,6 +1173,12 @@ if test "$enable_full_vos_listvol_switch" = "yes"; then
AC_DEFINE(FULL_LISTVOL_SWITCH, 1, [define if you want to want listvol switch])
fi
if test "$enable_icmp_pmtu_discovery" = "yes"; then
if test "$ac_cv_setsockopt_iprecverr" = "yes"; then
AC_DEFINE(ADAPT_PMTU, 1, [define if you want to decode icmp unreachable packets to discover path mtu])
fi
fi
if test "$enable_bos_restricted_mode" = "yes"; then
AC_DEFINE(BOS_RESTRICTED_MODE, 1, [define if you want to want bos restricted mode])
fi
@ -1210,6 +1231,7 @@ AC_CHECK_HEADERS(mntent.h sys/vfs.h sys/param.h sys/fs_types.h sys/fstyp.h)
AC_CHECK_HEADERS(sys/mount.h strings.h termios.h signal.h poll.h)
AC_CHECK_HEADERS(windows.h malloc.h winsock2.h direct.h io.h sys/user.h)
AC_CHECK_HEADERS(security/pam_modules.h siad.h usersec.h ucontext.h regex.h values.h)
AC_CHECK_HEADERS(linux/errqueue.h,,,[#include <linux/types.h>])
dnl Don't build PAM on IRIX; the interface doesn't work for us.
if test "$ac_cv_header_security_pam_modules_h" = yes -a "$enable_pam" = yes; then

View File

@ -150,6 +150,35 @@ afs_open(struct vcache **avcp, afs_int32 aflags, struct AFS_UCRED *acred)
}
#endif
ReleaseReadLock(&tvc->lock);
if ((afs_preCache != 0) && (writing == 0) && (vType(tvc) != VDIR) &&
(!afs_BBusy())) {
register struct dcache *tdc;
afs_size_t offset, len, totallen = 0;
tdc = afs_GetDCache(tvc, 0, &treq, &offset, &len, 1);
ObtainSharedLock(&tdc->mflock, 865);
if (!(tdc->mflags & DFFetchReq)) {
struct brequest *bp;
/* start the daemon (may already be running, however) */
UpgradeSToWLock(&tdc->mflock, 666);
tdc->mflags |= DFFetchReq; /* guaranteed to be cleared by BKG or
GetDCache */
/* last parm (1) tells bkg daemon to do an afs_PutDCache when it
is done, since we don't want to wait for it to finish before
doing so ourselves.
*/
bp = afs_BQueue(BOP_FETCH, tvc, B_DONTWAIT, 0, acred,
(afs_size_t) 0, (afs_size_t) 1, tdc);
if (!bp) {
tdc->mflags &= ~DFFetchReq;
}
ReleaseWriteLock(&tdc->mflock);
} else {
ReleaseSharedLock(&tdc->mflock);
}
}
done:
afs_PutFakeStat(&fakestate);
code = afs_CheckCode(code, &treq, 4); /* avoid AIX -O bug */

View File

@ -388,12 +388,16 @@ afs_MemRead(register struct vcache *avc, struct uio *auio,
*/
if (tdc) {
ReleaseReadLock(&tdc->lock);
#if !defined(AFS_VM_RDWR_ENV)
/* try to queue prefetch, if needed */
if (!noLock) {
if (!noLock &&
#ifndef AFS_VM_RDWR_ENV
afs_preCache
#else
1
#endif
) {
afs_PrefetchChunk(avc, tdc, acred, &treq);
}
#endif
afs_PutDCache(tdc);
}
if (!noLock)

View File

@ -52,6 +52,7 @@ afs_int32 afs_CheckServerDaemonStarted = 0;
afs_int32 afs_probe_interval = DEFAULT_PROBE_INTERVAL;
afs_int32 afs_probe_all_interval = 600;
afs_int32 afs_nat_probe_interval = 60;
afs_int32 afs_preCache = 0;
#define PROBE_WAIT() (1000 * (afs_probe_interval - ((afs_random() & 0x7fffffff) \
% (afs_probe_interval/2))))
@ -478,17 +479,22 @@ BPrefetch(register struct brequest *ab)
{
register struct dcache *tdc;
register struct vcache *tvc;
afs_size_t offset, len;
afs_size_t offset, len, abyte, totallen = 0;
struct vrequest treq;
AFS_STATCNT(BPrefetch);
if ((len = afs_InitReq(&treq, ab->cred)))
return;
abyte = ab->size_parm[0];
tvc = ab->vc;
tdc = afs_GetDCache(tvc, ab->size_parm[0], &treq, &offset, &len, 1);
if (tdc) {
afs_PutDCache(tdc);
}
do {
tdc = afs_GetDCache(tvc, abyte, &treq, &offset, &len, 1);
if (tdc) {
afs_PutDCache(tdc);
}
abyte+=len;
totallen += len;
} while ((totallen < afs_preCache) && tdc && (len > 0));
/* now, dude may be waiting for us to clear DFFetchReq bit; do so. Can't
* use tdc from GetDCache since afs_GetDCache may fail, but someone may
* be waiting for our wakeup anyway.

View File

@ -93,6 +93,7 @@ DECL_PIOCTL(PCallBackAddr);
DECL_PIOCTL(PDiscon);
DECL_PIOCTL(PNFSNukeCreds);
DECL_PIOCTL(PNewUuid);
DECL_PIOCTL(PPrecache);
/*
* A macro that says whether we're going to need HandleClientContext().
@ -201,6 +202,9 @@ static int (*(CpioctlSw[])) () = {
PBogus, /* 7 */
PBogus, /* 8 */
PNewUuid, /* 9 */
PBogus, /* 0 */
PBogus, /* 0 */
PPrecache, /* 12 */
};
static int (*(OpioctlSw[])) () = {
@ -2064,6 +2068,18 @@ DECL_PIOCTL(PViceAccess)
return EACCES;
}
DECL_PIOCTL(PPrecache)
{
afs_int32 newValue;
/*AFS_STATCNT(PPrecache);*/
if (!afs_osi_suser(*acred))
return EACCES;
memcpy((char *)&newValue, ain, sizeof(afs_int32));
afs_preCache = newValue*1024;
return 0;
}
DECL_PIOCTL(PSetCacheSize)
{
afs_int32 newValue;

View File

@ -199,6 +199,7 @@ extern afs_int32 afs_gcpags;
extern afs_int32 afs_gcpags_procsize;
extern afs_int32 afs_CheckServerDaemonStarted;
extern afs_int32 afs_probe_interval;
extern afs_int32 afs_preCache;
extern void afs_Daemon(void);
extern struct brequest *afs_BQueue(register short aopcode,

View File

@ -98,6 +98,7 @@
#define VIOC_CBADDR _CVICEIOCTL(3) /* push callback addr */
#define VIOC_DISCON _CVICEIOCTL(5) /* set/get discon mode */
#define VIOC_NEWUUID _CVICEIOCTL(9) /* new uuid */
#define VIOCPRECACHE _CVICEIOCTL(12) /* precache size */
/* OpenAFS-specific 'O' pioctl's */
#define VIOC_NFS_NUKE_CREDS _OVICEIOCTL(1) /* nuke creds for all PAG's */

View File

@ -25,6 +25,10 @@ RCSID
#include "h/smp_lock.h"
#endif
#include <asm/uaccess.h>
#ifdef ADAPT_PMTU
#include <linux/errqueue.h>
#include <linux/icmp.h>
#endif
/* rxk_NewSocket
* open and bind RX socket
@ -36,8 +40,12 @@ rxk_NewSocketHost(afs_uint32 ahost, short aport)
struct sockaddr_in myaddr;
int code;
KERNEL_SPACE_DECL;
#ifdef ADAPT_PMTU
int pmtu = IP_PMTUDISC_WANT;
int do_recverr = 1;
#else
int pmtu = IP_PMTUDISC_DONT;
#endif
/* We need a better test for this. if you need it back, tell us
* how to detect it.
@ -69,6 +77,10 @@ rxk_NewSocketHost(afs_uint32 ahost, short aport)
TO_USER_SPACE();
sockp->ops->setsockopt(sockp, SOL_IP, IP_MTU_DISCOVER, (char *)&pmtu,
sizeof(pmtu));
#ifdef ADAPT_PMTU
sockp->ops->setsockopt(sockp, SOL_IP, IP_RECVERR, (char *)&do_recverr,
sizeof(do_recverr));
#endif
TO_KERNEL_SPACE();
return (osi_socket *)sockp;
}
@ -87,6 +99,65 @@ rxk_FreeSocket(register struct socket *asocket)
return 0;
}
#ifdef ADAPT_PMTU
void
handle_socket_error(osi_socket so)
{
KERNEL_SPACE_DECL;
struct msghdr msg;
struct cmsghdr *cmsg;
struct sock_extended_err *err;
struct sockaddr_in addr;
struct sockaddr *offender;
char *controlmsgbuf;
int code;
struct socket *sop = (struct socket *)so;
if (!(controlmsgbuf=rxi_Alloc(256)))
return;
msg.msg_name = &addr;
msg.msg_namelen = sizeof(addr);
msg.msg_iov = NULL;
msg.msg_iovlen = 0;
msg.msg_control = controlmsgbuf;
msg.msg_controllen = 256;
msg.msg_flags = 0;
TO_USER_SPACE();
code = sock_recvmsg(sop, &msg, 256, MSG_ERRQUEUE|MSG_DONTWAIT|MSG_TRUNC);
TO_KERNEL_SPACE();
if (code < 0 || !(msg.msg_flags & MSG_ERRQUEUE))
goto out;
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if (CMSG_OK(&msg, cmsg) && cmsg->cmsg_level == SOL_IP &&
cmsg->cmsg_type == IP_RECVERR)
break;
}
if (!cmsg)
goto out;
err = CMSG_DATA(cmsg);
offender = SO_EE_OFFENDER(err);
if (offender->sa_family != AF_INET)
goto out;
memcpy(&addr, offender, sizeof(addr));
if (err->ee_origin == SO_EE_ORIGIN_ICMP &&
err->ee_type == ICMP_DEST_UNREACH &&
err->ee_code == ICMP_FRAG_NEEDED) {
rxi_SetPeerMtu(ntohl(addr.sin_addr.s_addr), ntohs(addr.sin_port),
err->ee_info);
}
/* other DEST_UNREACH's and TIME_EXCEEDED should be dealt with too */
out:
rxi_Free(controlmsgbuf, 256);
return;
}
#endif
/* osi_NetSend
*
@ -100,7 +171,22 @@ osi_NetSend(osi_socket sop, struct sockaddr_in *to, struct iovec *iovec,
{
KERNEL_SPACE_DECL;
struct msghdr msg;
int code;
int code, sockerr;
size_t esize;
#ifdef ADAPT_PMTU
while (1) {
sockerr=0;
esize = sizeof(sockerr);
TO_USER_SPACE();
sop->ops->getsockopt(sop, SOL_SOCKET, SO_ERROR, (char *)&sockerr,
&esize);
TO_KERNEL_SPACE();
if (sockerr == 0)
break;
handle_socket_error(sop);
}
#endif
msg.msg_iovlen = iovcnt;
msg.msg_iov = iovec;
@ -144,13 +230,27 @@ osi_NetReceive(osi_socket so, struct sockaddr_in *from, struct iovec *iov,
{
KERNEL_SPACE_DECL;
struct msghdr msg;
int code;
int code, sockerr;
size_t esize;
struct iovec tmpvec[RX_MAXWVECS + 2];
struct socket *sop = (struct socket *)so;
if (iovcnt > RX_MAXWVECS + 2) {
osi_Panic("Too many (%d) iovecs passed to osi_NetReceive\n", iovcnt);
}
#ifdef ADAPT_PMTU
while (1) {
sockerr=0;
esize = sizeof(sockerr);
TO_USER_SPACE();
sop->ops->getsockopt(sop, SOL_SOCKET, SO_ERROR, (char *)&sockerr,
&esize);
TO_KERNEL_SPACE();
if (sockerr == 0)
break;
handle_socket_error(so);
}
#endif
memcpy(tmpvec, iov, iovcnt * sizeof(struct iovec));
msg.msg_name = from;
msg.msg_iov = tmpvec;

View File

@ -33,6 +33,11 @@ RCSID
#include "h/socket.h"
#endif
#include "netinet/in.h"
#ifdef AFS_SUN57_ENV
#include "inet/common.h"
#include "inet/ip.h"
#include "inet/ip_ire.h"
#endif
#include "afs/afs_args.h"
#include "afs/afs_osi.h"
#ifdef RX_KERNEL_TRACE
@ -2321,6 +2326,43 @@ rxi_Free(void *addr, register size_t size)
osi_Free(addr, size);
}
void
rxi_SetPeerMtu(register afs_uint32 host, register afs_uint32 port, int mtu)
{
struct rx_peer **peer_ptr, **peer_end;
int hashIndex;
MUTEX_ENTER(&rx_peerHashTable_lock);
if (port == 0) {
for (peer_ptr = &rx_peerHashTable[0], peer_end =
&rx_peerHashTable[rx_hashTableSize]; peer_ptr < peer_end;
peer_ptr++) {
struct rx_peer *peer, *next;
for (peer = *peer_ptr; peer; peer = next) {
next = peer->next;
if (host == peer->host) {
MUTEX_ENTER(&peer->peer_lock);
peer->ifMTU=MIN(mtu, peer->ifMTU);
peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
MUTEX_EXIT(&peer->peer_lock);
}
}
}
} else {
struct rx_peer *peer, *next;
hashIndex = PEER_HASH(host, port);
for (peer = rx_peerHashTable[hashIndex]; peer; peer = peer->next) {
if ((peer->host == host) && (peer->port == port)) {
MUTEX_ENTER(&peer->peer_lock);
peer->ifMTU=MIN(mtu, peer->ifMTU);
peer->natMTU = rxi_AdjustIfMTU(peer->ifMTU);
MUTEX_EXIT(&peer->peer_lock);
}
}
}
MUTEX_EXIT(&rx_peerHashTable_lock);
}
/* Find the peer process represented by the supplied (host,port)
* combination. If there is no appropriate active peer structure, a
* new one will be allocated and initialized
@ -5492,6 +5534,32 @@ rxi_CheckCall(register struct rx_call *call)
* number of seconds. */
if (now > (call->lastReceiveTime + deadTime)) {
if (call->state == RX_STATE_ACTIVE) {
#ifdef ADAPT_PMTU
#if defined(KERNEL) && defined(AFS_SUN57_ENV)
ire_t *ire;
#if defined(AFS_SUN510_ENV) && defined(GLOBAL_NETSTACKID)
netstack_t *ns = netstack_find_by_stackid(GLOBAL_NETSTACKID);
ip_stack_t *ipst = ns->netstack_ip;
#endif
ire = ire_cache_lookup(call->conn->peer->host
#if defined(AFS_SUN510_ENV) && defined(ALL_ZONES)
, ALL_ZONES
#if defined(AFS_SUN510_ENV) && (defined(ICL_3_ARG) || defined(GLOBAL_NETSTACKID))
, NULL
#if defined(AFS_SUN510_ENV) && defined(GLOBAL_NETSTACKID)
, ipst
#endif
#endif
#endif
);
if (ire && ire->ire_max_frag > 0)
rxi_SetPeerMtu(call->conn->peer->host, 0, ire->ire_max_frag);
#if defined(GLOBAL_NETSTACKID)
netstack_rele(ns);
#endif
#endif
#endif /* ADAPT_PMTU */
rxi_CallError(call, RX_CALL_DEAD);
return -1;
} else {

View File

@ -397,7 +397,7 @@ EXT int rx_packetReclaims GLOBALSINIT(0);
* This is provided for backward compatibility with peers which may be unable
* to swallow anything larger. THIS MUST NEVER DECREASE WHILE AN APPLICATION
* IS RUNNING! */
EXT afs_uint32 rx_maxReceiveSize GLOBALSINIT(OLD_MAX_PACKET_SIZE * RX_MAX_FRAGS +
EXT afs_uint32 rx_maxReceiveSize GLOBALSINIT(_OLD_MAX_PACKET_SIZE * RX_MAX_FRAGS +
UDP_HDR_SIZE * (RX_MAX_FRAGS - 1));
/* this is the maximum packet size that the user wants us to receive */
@ -600,4 +600,5 @@ EXT int rx_enable_hot_thread GLOBALSINIT(0);
EXT int rx_max_clones_per_connection GLOBALSINIT(2);
EXT int RX_IPUDP_SIZE GLOBALSINIT(_RX_IPUDP_SIZE);
#endif /* AFS_RX_GLOBALS_H */

View File

@ -420,6 +420,10 @@ rxi_Listen(osi_socket sock)
int
rxi_Recvmsg(osi_socket socket, struct msghdr *msg_p, int flags)
{
#if defined(HAVE_LINUX_ERRQUEUE_H) && defined(ADAPT_PMTU)
while((rxi_HandleSocketError(socket)) > 0)
;
#endif
return recvmsg(socket, msg_p, flags);
}
@ -443,6 +447,10 @@ rxi_Sendmsg(osi_socket socket, struct msghdr *msg_p, int flags)
}
FD_SET(socket, sfds);
}
#if defined(HAVE_LINUX_ERRQUEUE_H) && defined(ADAPT_PMTU)
while((rxi_HandleSocketError(socket)) > 0)
;
#endif
#ifdef AFS_NT40_ENV
if (WSAGetLastError())
#elif defined(AFS_LINUX22_ENV)

View File

@ -2630,6 +2630,8 @@ rxi_AdjustIfMTU(int mtu)
int adjMTU;
int frags;
if (rxi_nRecvFrags == 1 && rxi_nSendFrags == 1)
return mtu;
adjMTU = RX_HEADER_SIZE + RX_JUMBOBUFFERSIZE + RX_JUMBOHEADERSIZE;
if (mtu <= adjMTU) {
return mtu;

View File

@ -51,7 +51,7 @@
#define IPv6_FRAG_HDR_SIZE 8 /* IPv6 Fragment Header */
#define UDP_HDR_SIZE 8 /* UDP Header */
#define RX_IP_SIZE (IPv6_HDR_SIZE + IPv6_FRAG_HDR_SIZE)
#define RX_IPUDP_SIZE (RX_IP_SIZE + UDP_HDR_SIZE)
#define _RX_IPUDP_SIZE (RX_IP_SIZE + UDP_HDR_SIZE)
/* REMOTE_PACKET_SIZE is currently the same as local. This is because REMOTE
* is defined much too generally for my tastes, and includes the case of
@ -102,11 +102,15 @@
/* The minimum MTU for an IP network is 576 bytes including headers */
#define RX_MIN_PACKET_SIZE (576 - RX_IPUDP_SIZE)
#define RX_PP_PACKET_SIZE RX_MIN_PACKET_SIZE
#define _RX_MIN_PACKET_SIZE (576 - _RX_IPUDP_SIZE)
#define _RX_PP_PACKET_SIZE _RX_MIN_PACKET_SIZE
#define OLD_MAX_PACKET_SIZE (1500 - RX_IPUDP_SIZE)
#define _OLD_MAX_PACKET_SIZE (1500 - _RX_IPUDP_SIZE)
/* if the other guy is not on the local net, use this size */
#define RX_REMOTE_PACKET_SIZE (1500 - RX_IPUDP_SIZE)
#define _RX_REMOTE_PACKET_SIZE (1500 - _RX_IPUDP_SIZE)
/* for now, never send more data than this */
#define RX_MAX_PACKET_SIZE 16384

View File

@ -83,6 +83,8 @@ extern void rxi_FreeCall(register struct rx_call *call);
extern char *rxi_Alloc(register size_t size);
extern void rxi_Free(void *addr, register size_t size);
extern void rxi_SetPeerMtu(register afs_uint32 host, register afs_uint32 port,
int mtu);
extern struct rx_peer *rxi_FindPeer(register afs_uint32 host,
register u_short port,
struct rx_peer *origPeer, int create);
@ -584,6 +586,7 @@ extern osi_socket rxi_GetUDPSocket(u_short port);
extern void osi_AssertFailU(const char *expr, const char *file, int line);
extern int rx_getAllAddr(afs_int32 * buffer, int maxSize);
extern void rxi_InitPeerParams(struct rx_peer *pp);
extern int rxi_HandleSocketError(int socket);
#if defined(AFS_AIX32_ENV) && !defined(KERNEL)
extern void *osi_Alloc(afs_int32 x);

View File

@ -412,6 +412,10 @@ int
rxi_Recvmsg(osi_socket socket, struct msghdr *msg_p, int flags)
{
int ret;
#if defined(HAVE_LINUX_ERRQUEUE_H) && defined(ADAPT_PMTU)
while((rxi_HandleSocketError(socket)) > 0)
;
#endif
ret = recvmsg(socket, msg_p, flags);
return ret;
}

View File

@ -95,8 +95,20 @@ rxi_GetHostUDPSocket(u_int ahost, u_short port)
struct sockaddr_in taddr;
char *name = "rxi_GetUDPSocket: ";
#ifdef AFS_LINUX22_ENV
#if defined(ADAPT_PMTU)
int pmtu=IP_PMTUDISC_WANT;
int recverr=1;
#else
int pmtu=IP_PMTUDISC_DONT;
#endif
#endif
#if defined(HAVE_LINUX_ERRQUEUE_H) && defined(ADAPT_PMTU)
#include <linux/types.h>
#include <linux/errqueue.h>
#ifndef IP_MTU
#define IP_MTU 14
#endif
#endif
#if !defined(AFS_NT40_ENV)
if (ntohs(port) >= IPPORT_RESERVED && ntohs(port) < IPPORT_USERRESERVED) {
@ -178,8 +190,10 @@ rxi_GetHostUDPSocket(u_int ahost, u_short port)
#ifdef AFS_LINUX22_ENV
setsockopt(socketFd, SOL_IP, IP_MTU_DISCOVER, &pmtu, sizeof(pmtu));
#if defined(ADAPT_PMTU)
setsockopt(socketFd, SOL_IP, IP_RECVERR, &recverr, sizeof(recverr));
#endif
#endif
if (rxi_Listen(socketFd) < 0) {
goto error;
}
@ -605,6 +619,10 @@ rxi_InitPeerParams(struct rx_peer *pp)
afs_uint32 ppaddr;
u_short rxmtu;
int ix;
#if defined(ADAPT_PMTU) && defined(IP_MTU)
int sock;
struct sockaddr_in addr;
#endif
LOCK_IF_INIT;
if (!Inited) {
@ -654,6 +672,22 @@ rxi_InitPeerParams(struct rx_peer *pp)
pp->timeout.sec = 2;
pp->ifMTU = MIN(rx_MyMaxSendSize, OLD_MAX_PACKET_SIZE);
#endif /* ADAPT_MTU */
#if defined(ADAPT_PMTU) && defined(IP_MTU)
sock=socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
if (sock >= 0) {
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = pp->host;
addr.sin_port = pp->port;
if (connect(sock, (struct sockaddr *)&addr, sizeof(addr)) == 0) {
int mtu=0;
socklen_t s = sizeof(mtu);
if (getsockopt(sock, SOL_IP, IP_MTU, &mtu, &s)== 0) {
pp->ifMTU = MIN(mtu - RX_IPUDP_SIZE, pp->ifMTU);
}
}
close(sock);
}
#endif
pp->ifMTU = rxi_AdjustIfMTU(pp->ifMTU);
pp->maxMTU = OLD_MAX_PACKET_SIZE; /* for compatibility with old guys */
pp->natMTU = MIN((int)pp->ifMTU, OLD_MAX_PACKET_SIZE);
@ -686,3 +720,54 @@ rx_SetMaxMTU(int mtu)
{
rx_MyMaxSendSize = rx_maxReceiveSizeUser = rx_maxReceiveSize = mtu;
}
#if defined(HAVE_LINUX_ERRQUEUE_H) && defined(ADAPT_PMTU)
int
rxi_HandleSocketError(int socket)
{
struct msghdr msg;
struct cmsghdr *cmsg;
struct sock_extended_err *err;
struct sockaddr_in addr;
struct sockaddr *offender;
char controlmsgbuf[256];
int ret=0;
int code;
msg.msg_name = &addr;
msg.msg_namelen = sizeof(addr);
msg.msg_iov = NULL;
msg.msg_iovlen = 0;
msg.msg_control = controlmsgbuf;
msg.msg_controllen = 256;
msg.msg_flags = 0;
code = recvmsg(socket, &msg, MSG_ERRQUEUE|MSG_DONTWAIT|MSG_TRUNC);
if (code < 0 || !(msg.msg_flags & MSG_ERRQUEUE))
goto out;
for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if ((char *)cmsg - controlmsgbuf > msg.msg_controllen - CMSG_SPACE(0) ||
(char *)cmsg - controlmsgbuf > msg.msg_controllen - CMSG_SPACE(cmsg->cmsg_len) ||
cmsg->cmsg_len == 0) {
cmsg = 0;
break;
}
if (cmsg->cmsg_level == SOL_IP && cmsg->cmsg_type == IP_RECVERR)
break;
}
if (!cmsg)
goto out;
ret=1;
err =(struct sock_extended_err *) CMSG_DATA(cmsg);
if (err->ee_errno == EMSGSIZE && err->ee_info >= 68) {
rxi_SetPeerMtu(addr.sin_addr.s_addr, addr.sin_port,
err->ee_info - RX_IPUDP_SIZE);
}
/* other DEST_UNREACH's and TIME_EXCEEDED should be dealt with too */
out:
return ret;
}
#endif

View File

@ -32,9 +32,7 @@
* SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include <config.h>
#endif
#include <afsconfig.h>
/*
nn * We are using getopt since we want it to be possible to link to
@ -57,9 +55,17 @@ RCSID("$Id$");
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#ifdef HAVE_STRING_H
#include <string.h>
#else
#ifdef HAVE_STRINGS_H
#include <strings.h>
#endif
#endif
#include <assert.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <signal.h>
#ifdef HAVE_ERRX
#include <err.h> /* not stricly right, but if we have a errx() there
@ -311,7 +317,7 @@ rxperf_ExecuteRequest(struct rx_call *call)
DBFPRINT(("got a request\n"));
if (rx_Read(call, &version, 4) != 4) {
if (rx_Read32(call, &version) != 4) {
warn("rx_Read failed to read version");
return -1;
}
@ -321,13 +327,13 @@ rxperf_ExecuteRequest(struct rx_call *call)
return -1;
}
if (rx_Read(call, &command, 4) != 4) {
if (rx_Read32(call, &command) != 4) {
warnx("rx_Read failed to read command");
return -1;
}
command = ntohl(command);
if (rx_Read(call, &data, 4) != 4) {
if (rx_Read32(call, &data) != 4) {
warnx("rx_Read failed to read size");
return -1;
}
@ -337,7 +343,7 @@ rxperf_ExecuteRequest(struct rx_call *call)
return -1;
}
if (rx_Read(call, &data, 4) != 4) {
if (rx_Read32(call, &data) != 4) {
warnx("rx_Read failed to write size");
return -1;
}
@ -351,7 +357,7 @@ rxperf_ExecuteRequest(struct rx_call *call)
case RX_PERF_SEND:
DBFPRINT(("got a send request\n"));
if (rx_Read(call, &bytes, 4) != 4) {
if (rx_Read32(call, &bytes) != 4) {
warnx("rx_Read failed to read bytes");
return -1;
}
@ -361,7 +367,7 @@ rxperf_ExecuteRequest(struct rx_call *call)
readbytes(call, bytes);
data = htonl(RXPERF_MAGIC_COOKIE);
if (rx_Write(call, &data, 4) != 4) {
if (rx_Write32(call, &data) != 4) {
warnx("rx_Write failed when sending back result");
return -1;
}
@ -371,12 +377,12 @@ rxperf_ExecuteRequest(struct rx_call *call)
case RX_PERF_RPC:
DBFPRINT(("got a rpc request, reading commands\n"));
if (rx_Read(call, &recvb, 4) != 4) {
if (rx_Read32(call, &recvb) != 4) {
warnx("rx_Read failed to read recvbytes");
return -1;
}
recvb = ntohl(recvb);
if (rx_Read(call, &sendb, 4) != 4) {
if (rx_Read32(call, &sendb) != 4) {
warnx("rx_Read failed to read sendbytes");
return -1;
}
@ -396,14 +402,14 @@ rxperf_ExecuteRequest(struct rx_call *call)
DBFPRINT(("done\n"));
data = htonl(RXPERF_MAGIC_COOKIE);
if (rx_Write(call, &data, 4) != 4) {
if (rx_Write32(call, &data) != 4) {
warnx("rx_Write failed when sending back magic cookie");
return -1;
}
break;
case RX_PERF_FILE:
if (rx_Read(call, &data, 4) != 4)
if (rx_Read32(call, &data) != 4)
errx(1, "failed to read num from client");
num = ntohl(data);
@ -436,7 +442,7 @@ rxperf_ExecuteRequest(struct rx_call *call)
case RX_PERF_RECV:
DBFPRINT(("got a recv request\n"));
if (rx_Read(call, &bytes, 4) != 4) {
if (rx_Read32(call, &bytes) != 4) {
warnx("rx_Read failed to read bytes");
return -1;
}
@ -446,7 +452,7 @@ rxperf_ExecuteRequest(struct rx_call *call)
sendbytes(call, bytes);
data = htonl(RXPERF_MAGIC_COOKIE);
if (rx_Write(call, &data, 4) != 4) {
if (rx_Write32(call, &data) != 4) {
warnx("rx_Write failed when sending back result");
return -1;
}
@ -467,7 +473,7 @@ rxperf_ExecuteRequest(struct rx_call *call)
*/
static void
do_server(int port)
do_server(int port, int nojumbo, int maxmtu)
{
struct rx_service *service;
struct rx_securityClass *secureobj;
@ -478,6 +484,10 @@ do_server(int port)
if (ret)
errx(1, "rx_Init failed");
if (nojumbo)
rx_SetNoJumbo();
if (maxmtu)
rx_SetMaxMTU(maxmtu);
get_sec(1, &secureobj, &secureindex);
service =
@ -547,7 +557,8 @@ readfile(const char *filename, u_int32_t ** readwrite, u_int32_t * size)
static void
do_client(const char *server, int port, char *filename, int32_t command,
int32_t times, int32_t bytes, int32_t sendtimes, int32_t recvtimes)
int32_t times, int32_t bytes, int32_t sendtimes, int32_t recvtimes,
int dumpstats, int nojumbo, int maxmtu)
{
struct rx_connection *conn;
struct rx_call *call;
@ -568,6 +579,10 @@ do_client(const char *server, int port, char *filename, int32_t command,
if (ret)
errx(1, "rx_Init failed");
if (nojumbo)
rx_SetNoJumbo();
if (maxmtu)
rx_SetMaxMTU(maxmtu);
get_sec(0, &secureobj, &secureindex);
conn = rx_NewConnection(addr, port, RX_SERVER_ID, secureobj, secureindex);
@ -587,19 +602,19 @@ do_client(const char *server, int port, char *filename, int32_t command,
errx(1, "rx_NewCall failed");
data = htonl(RX_PERF_VERSION);
if (rx_Write(call, &data, 4) != 4)
errx(1, "rx_Write failed to send version");
if (rx_Write32(call, &data) != 4)
errx(1, "rx_Write failed to send version (err %d)", rx_Error(call));
data = htonl(command);
if (rx_Write(call, &data, 4) != 4)
errx(1, "rx_Write failed to send command");
if (rx_Write32(call, &data) != 4)
errx(1, "rx_Write failed to send command (err %d)", rx_Error(call));
data = htonl(rxread_size);
if (rx_Write(call, &data, 4) != 4)
errx(1, "rx_Write failed to send read size");
if (rx_Write32(call, &data) != 4)
errx(1, "rx_Write failed to send read size (err %d)", rx_Error(call));
data = htonl(rxwrite_size);
if (rx_Write(call, &data, 4) != 4)
errx(1, "rx_Write failed to send write read");
if (rx_Write32(call, &data) != 4)
errx(1, "rx_Write failed to send write read (err %d)", rx_Error(call));
switch (command) {
@ -607,15 +622,15 @@ do_client(const char *server, int port, char *filename, int32_t command,
DBFPRINT(("command "));
data = htonl(bytes);
if (rx_Write(call, &data, 4) != 4)
errx(1, "rx_Write failed to send size");
if (rx_Write32(call, &data) != 4)
errx(1, "rx_Write failed to send size (err %d)", rx_Error(call));
DBFPRINT(("sending(%d) ", bytes));
if (readbytes(call, bytes))
errx(1, "sendbytes");
errx(1, "sendbytes (err %d)", rx_Error(call));
if (rx_Read(call, &data, 4) != 4)
errx(1, "failed to read result from server");
if (rx_Read32(call, &data) != 4)
errx(1, "failed to read result from server (err %d)", rx_Error(call));
if (data != htonl(RXPERF_MAGIC_COOKIE))
warn("server send wrong magic cookie in responce");
@ -627,15 +642,15 @@ do_client(const char *server, int port, char *filename, int32_t command,
DBFPRINT(("command "));
data = htonl(bytes);
if (rx_Write(call, &data, 4) != 4)
errx(1, "rx_Write failed to send size");
if (rx_Write32(call, &data) != 4)
errx(1, "rx_Write failed to send size (err %d)", rx_Error(call));
DBFPRINT(("sending(%d) ", bytes));
if (sendbytes(call, bytes))
errx(1, "sendbytes");
errx(1, "sendbytes (err %d)", rx_Error(call));
if (rx_Read(call, &data, 4) != 4)
errx(1, "failed to read result from server");
if (rx_Read32(call, &data) != 4)
errx(1, "failed to read result from server (err %d)", rx_Error(call));
if (data != htonl(RXPERF_MAGIC_COOKIE))
warn("server send wrong magic cookie in responce");
@ -647,21 +662,23 @@ do_client(const char *server, int port, char *filename, int32_t command,
DBFPRINT(("commands "));
data = htonl(sendtimes);
if (rx_Write(call, &data, 4) != 4)
errx(1, "rx_Write failed to send command");
if (rx_Write32(call, &data) != 4)
errx(1, "rx_Write failed to send command (err %d)", rx_Error(call));
data = htonl(recvtimes);
if (rx_Write(call, &data, 4) != 4)
errx(1, "rx_Write failed to send command");
if (rx_Write32(call, &data) != 4)
errx(1, "rx_Write failed to send command (err %d)", rx_Error(call));
DBFPRINT(("send(%d) ", sendtimes));
sendbytes(call, sendtimes);
if (sendbytes(call, sendtimes))
errx(1, "sendbytes (err %d)", rx_Error(call));
DBFPRINT(("recv(%d) ", recvtimes));
readbytes(call, recvtimes);
if (readbytes(call, recvtimes))
errx(1, "sendbytes (err %d)", rx_Error(call));
if (rx_Read(call, &bytes, 4) != 4)
errx(1, "failed to read result from server");
if (rx_Read32(call, &bytes) != 4)
errx(1, "failed to read result from server (err %d)", rx_Error(call));
if (bytes != htonl(RXPERF_MAGIC_COOKIE))
warn("server send wrong magic cookie in responce");
@ -673,12 +690,12 @@ do_client(const char *server, int port, char *filename, int32_t command,
readfile(filename, &readwrite, &num);
data = htonl(num);
if (rx_Write(call, &data, sizeof(data)) != 4)
errx(1, "rx_Write failed to send size");
if (rx_Write32(call, &data) != 4)
errx(1, "rx_Write failed to send size (err %d)", rx_Error(call));
if (rx_Write(call, readwrite, num * sizeof(u_int32_t))
!= num * sizeof(u_int32_t))
errx(1, "rx_Write failed to send list");
errx(1, "rx_Write failed to send list (err %d)", rx_Error(call));
for (i = 0; i < num; i++) {
if (readwrite[i] == 0)
@ -687,10 +704,12 @@ do_client(const char *server, int port, char *filename, int32_t command,
size = ntohl(readwrite[i]) * sizeof(u_int32_t);
if (readp) {
readbytes(call, size);
if (readbytes(call, size))
errx(1, "sendbytes (err %d)", rx_Error(call));
DBFPRINT(("read\n"));
} else {
sendbytes(call, size);
if (sendbytes(call, size))
errx(1, "sendbytes (err %d)", rx_Error(call));
DBFPRINT(("send\n"));
}
}
@ -705,6 +724,10 @@ do_client(const char *server, int port, char *filename, int32_t command,
end_and_print_timer(stamp);
DBFPRINT(("done for good\n"));
if (dumpstats) {
rx_PrintStats(stdout);
rx_PrintPeerStats(stdout, conn->peer);
}
rx_Finalize();
}
@ -721,7 +744,7 @@ usage()
fprintf(stderr, "usage: %s client -c file -f filename\n", __progname);
fprintf(stderr,
"%s: usage: common option to the client "
"-w <write-bytes> -r <read-bytes> -T times -p port -s server\n",
"-w <write-bytes> -r <read-bytes> -T times -p port -s server -D\n",
__progname);
fprintf(stderr, "usage: %s server -p port\n", __progname);
#undef COMMMON
@ -736,10 +759,12 @@ static int
rxperf_server(int argc, char **argv)
{
int port = DEFAULT_PORT;
int nojumbo = 0;
int maxmtu = 0;
char *ptr;
int ch;
while ((ch = getopt(argc, argv, "r:d:p:w:")) != -1) {
while ((ch = getopt(argc, argv, "r:d:p:w:jm:4")) != -1) {
switch (ch) {
case 'd':
#ifdef RXDEBUG
@ -771,6 +796,17 @@ rxperf_server(int argc, char **argv)
errx(1, "%d > sizeof(somebuf) (%d)", rxwrite_size,
sizeof(somebuf));
break;
case 'j':
nojumbo=1;
break;
case 'm':
maxmtu = strtol(optarg, &ptr, 0);
if (ptr && *ptr != '\0')
errx(1, "can't resolve rx maxmtu to use");
break;
case '4':
RX_IPUDP_SIZE = 28;
break;
default:
usage();
}
@ -779,7 +815,7 @@ rxperf_server(int argc, char **argv)
if (optind != argc)
usage();
do_server(htons(port));
do_server(htons(port), nojumbo, maxmtu);
return 0;
}
@ -799,12 +835,15 @@ rxperf_client(int argc, char **argv)
int sendtimes = 3;
int recvtimes = 30;
int times = 100;
int dumpstats = 0;
int nojumbo = 0;
int maxmtu = 0;
char *ptr;
int ch;
cmd = RX_PERF_UNKNOWN;
while ((ch = getopt(argc, argv, "T:S:R:b:c:d:p:r:s:w:f:")) != -1) {
while ((ch = getopt(argc, argv, "T:S:R:b:c:d:p:r:s:w:f:Djm:4")) != -1) {
switch (ch) {
case 'b':
bytes = strtol(optarg, &ptr, 0);
@ -876,6 +915,24 @@ rxperf_client(int argc, char **argv)
case 'f':
filename = optarg;
break;
case 'D':
#ifdef RXDEBUG
dumpstats = 1;
#else
errx(1, "compiled without RXDEBUG");
#endif
break;
case 'j':
nojumbo=1;
break;
case 'm':
maxmtu = strtol(optarg, &ptr, 0);
if (ptr && *ptr != '\0')
errx(1, "can't resolve rx maxmtu to use");
break;
case '4':
RX_IPUDP_SIZE = 28;
break;
default:
usage();
}
@ -888,7 +945,7 @@ rxperf_client(int argc, char **argv)
errx(1, "no command given to the client");
do_client(host, htons(port), filename, cmd, times, bytes, sendtimes,
recvtimes);
recvtimes, dumpstats, nojumbo, maxmtu);
return 0;
}

View File

@ -1995,6 +1995,39 @@ CheckVolumesCmd(struct cmd_syndesc *as, void *arock)
return 0;
}
static int
PreCacheCmd(struct cmd_syndesc *as, char *arock)
{
afs_int32 code;
struct ViceIoctl blob;
afs_int32 temp;
if (!as->parms[0].items && !as->parms[1].items) {
fprintf(stderr, "%s: syntax error in precache cmd.\n", pn);
return 1;
}
if (as->parms[0].items) {
code = util_GetInt32(as->parms[0].items->data, &temp);
if (code) {
fprintf(stderr, "%s: bad integer specified for precache size.\n",
pn);
return 1;
}
} else
temp = 0;
blob.in = (char *)&temp;
blob.in_size = sizeof(afs_int32);
blob.out_size = 0;
code = pioctl(0, VIOCPRECACHE, &blob, 1);
if (code) {
Die(errno, NULL);
return 1;
}
printf("New precache size set.\n");
return 0;
}
static int
SetCacheSizeCmd(struct cmd_syndesc *as, void *arock)
{
@ -3657,6 +3690,11 @@ defect 3069
ts = cmd_CreateSyntax("uuid", UuidCmd, NULL, "manage the UUID for the cache manager");
cmd_AddParm(ts, "-generate", CMD_FLAG, CMD_REQUIRED, "generate a new UUID");
ts = cmd_CreateSyntax("precache", PreCacheCmd, 0,
"set precache size");
cmd_AddParm(ts, "-blocks", CMD_SINGLE, CMD_OPTIONAL,
"size in 1K byte blocks (0 => disable)");
code = cmd_Dispatch(argc, argv);
if (rxInitDone)
rx_Finalize();