Windows: improved idle dead time handling

RX_CALL_IDLE has been treated the same as RX_CALL_DEAD which is
a fatal error that results in the server being marked down.  This
is not the appropriate behavior for an idle dead timeout error
which should not result in servers being marked down.

Idle dead timeouts are locally generated and are an indication
that the server:

 a. is severely overloaded and cannot process all
    incoming requests in a timely fashion.

 b. has a partition whose underlying disk (or iSCSI, etc) is
    failing and all I/O requests on that device are blocking.

 c. has a large number of threads blocking on a single vnode
    and cannot process requests for other vnodes as a result.

 d. is malicious.

RX_CALL_IDLE is distinct from RX_DEAD_CALL in that idle dead timeout
handling should permit failover to replicas when they exist in a
timely fashion but in the non-replica case should not be triggered
until the hard dead timeout.  If the request cannot be retried, it
should fail with an I/O error.  The client should not retry a request
to the same server as a result of an idle dead timeout.

In addition, RX_CALL_IDLE indicates that the client has abandoned
the call but the server has not.  Therefore, the client cannot determine
whether or not the RPC will eventually succeed and it must discard
any status information it has about the object of the RPC if the
RPC could have altered the object state upon success.

This patchset splits the RX_CALL_DEAD processing in cm_Analyze() to
clarify that only RX_CALL_DEAD errors result in the server being marked
down.  Since Rx idle dead timeout processing is per connection and
idle dead timeouts must differ depending upon whether or not replica
sites exist, cm_ConnBy*() are extended to select a connection based
upon whether or not replica sites exist.  A separate connection object
is used for RPCs to replicated objects as compared to RPCs to non-replicated
objects (volumes or vldb).

For non-replica connections the idle dead timeout is set to the hard
dead timeout.  For replica connections the idle dead timeout is set
to the configured idle dead timeout.

Idle dead timeout events and whether or not a retry was triggered
are logged to the Windows Event Log.

cm_Analyze() is given a new 'storeOp' parameter which is non-zero
when the execute RPC could modify the data on the file server.

Change-Id: Idef696b15a8161335aa48907c15a4dc37f918bdb
Reviewed-on: http://gerrit.openafs.org/6118
Reviewed-by: Jeffrey Altman <jaltman@secure-endpoints.com>
Tested-by: BuildBot <buildbot@rampaginggeek.com>
This commit is contained in:
Jeffrey Altman 2011-11-25 09:28:18 -05:00 committed by Jeffrey Altman
parent c7673f4fad
commit f768fb95f3
13 changed files with 288 additions and 90 deletions

View File

@ -263,6 +263,7 @@ LogEvent(WORD wEventType, DWORD dwEventID, ...)
case MSG_ALL_SERVERS_BUSY:
case MSG_ALL_SERVERS_OFFLINE:
case MSG_ALL_SERVERS_DOWN:
case MSG_RX_IDLE_DEAD_TIMEOUT:
wNumArgs = 2;
lpArgs[0] = va_arg(listArgs, LPTSTR);
StringCbPrintf(lpStrings[1],STRLEN,"%d",va_arg(listArgs,afs_int32));

View File

@ -446,4 +446,12 @@ Language=English
OpenAFS Shutdown Complete.
.
MessageId=
Severity=Warning
Facility=System
SymbolicName=MSG_RX_IDLE_DEAD_TIMEOUT
Language=English
Idle dead timeout when communicating with server %1.
.
;#endif /* __AFSD_EVENTMESSAGES_H_ 1 */

View File

@ -1858,7 +1858,7 @@ long cm_GetCallback(cm_scache_t *scp, struct cm_user *userp,
&afsStatus, &callback, &volSync);
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp, &sfid, &volSync, NULL,
} while (cm_Analyze(connp, userp, reqp, &sfid, 0, &volSync, NULL,
&cbr, code));
code = cm_MapRPCError(code, reqp);
if (code)
@ -2072,7 +2072,7 @@ cm_GiveUpAllCallbacks(cm_server_t *tsp, afs_int32 markDown)
if ((tsp->type == CM_SERVER_FILE) && !(tsp->flags & CM_SERVERFLAG_DOWN))
{
code = cm_ConnByServer(tsp, cm_rootUserp, &connp);
code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &connp);
if (code == 0) {
rxconnp = cm_GetRxConn(connp);
rx_SetConnDeadTime(rxconnp, 10);
@ -2167,7 +2167,7 @@ cm_GiveUpAllCallbacksAllServersMulti(afs_int32 markDown)
lock_ReleaseRead(&cm_serverLock);
serversp[nconns] = tsp;
code = cm_ConnByServer(tsp, cm_rootUserp, &conns[nconns]);
code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &conns[nconns]);
if (code) {
lock_ObtainRead(&cm_serverLock);
cm_PutServerNoLock(tsp);

View File

@ -30,6 +30,7 @@ DWORD RDRtimeout = CM_CONN_DEFAULTRDRTIMEOUT;
unsigned short ConnDeadtimeout = CM_CONN_CONNDEADTIME;
unsigned short HardDeadtimeout = CM_CONN_HARDDEADTIME;
unsigned short IdleDeadtimeout = CM_CONN_IDLEDEADTIME;
unsigned short ReplicaIdleDeadtimeout = CM_CONN_IDLEDEADTIME_REP;
unsigned short NatPingInterval = CM_CONN_NATPINGINTERVAL;
#define LANMAN_WKS_PARAM_KEY "SYSTEM\\CurrentControlSet\\Services\\lanmanworkstation\\parameters"
@ -127,6 +128,13 @@ void cm_InitConn(void)
afsi_log("IdleDeadTimeout is %d", IdleDeadtimeout);
}
dummyLen = sizeof(DWORD);
code = RegQueryValueEx(parmKey, "ReplicaIdleDeadTimeout", NULL, NULL,
(BYTE *) &dwValue, &dummyLen);
if (code == ERROR_SUCCESS) {
ReplicaIdleDeadtimeout = (unsigned short)dwValue;
afsi_log("ReplicaIdleDeadTimeout is %d", ReplicaIdleDeadtimeout);
}
dummyLen = sizeof(DWORD);
code = RegQueryValueEx(parmKey, "NatPingInterval", NULL, NULL,
(BYTE *) &dwValue, &dummyLen);
if (code == ERROR_SUCCESS) {
@ -146,6 +154,15 @@ void cm_InitConn(void)
*
* We base our values on those while making sure we leave
* enough time for overhead.
*
* To further complicate matters we need to take into account
* file server hard dead timeouts as they affect the length
* of time it takes the file server to give up when attempting
* to break callbacks to unresponsive clients. The file
* server hard dead timeout is 120 seconds.
*
* For SMB, we have no choice but to timeout quickly. For
* the AFS redirector, we can wait.
*/
if (smb_Enabled) {
afsi_log("lanmanworkstation : SessTimeout %u", RDRtimeout);
@ -161,6 +178,10 @@ void cm_InitConn(void)
IdleDeadtimeout = 10 * (unsigned short) HardDeadtimeout;
afsi_log("IdleDeadTimeout is %d", IdleDeadtimeout);
}
if (ReplicaIdleDeadtimeout == 0) {
ReplicaIdleDeadtimeout = (unsigned short) HardDeadtimeout;
afsi_log("ReplicaIdleDeadTimeout is %d", ReplicaIdleDeadtimeout);
}
} else {
if (ConnDeadtimeout == 0) {
ConnDeadtimeout = CM_CONN_IFS_CONNDEADTIME;
@ -174,6 +195,10 @@ void cm_InitConn(void)
IdleDeadtimeout = CM_CONN_IFS_IDLEDEADTIME;
afsi_log("IdleDeadTimeout is %d", IdleDeadtimeout);
}
if (IdleDeadtimeout == 0) {
ReplicaIdleDeadtimeout = CM_CONN_IFS_IDLEDEADTIME_REP;
afsi_log("ReplicaIdleDeadTimeout is %d", ReplicaIdleDeadtimeout);
}
}
osi_EndOnce(&once);
}
@ -186,10 +211,11 @@ void cm_InitReq(cm_req_t *reqp)
}
static long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp,
struct cm_req *reqp, cm_serverRef_t ***serversppp)
struct cm_req *reqp, afs_uint32 *replicated, cm_serverRef_t ***serversppp)
{
long code;
cm_volume_t *volp = NULL;
cm_vol_state_t *volstatep = NULL;
cm_cell_t *cellp = NULL;
if (!fidp) {
@ -205,6 +231,8 @@ static long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp,
if (code)
return code;
volstatep = cm_VolumeStateByID(volp, fidp->volume);
*replicated = (volstatep->flags & CM_VOL_STATE_FLAG_REPLICATED);
*serversppp = cm_GetVolServers(volp, fidp->volume, userp, reqp);
lock_ObtainRead(&cm_volumeLock);
@ -230,11 +258,15 @@ static long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp,
* volSyncp and/or cbrp may also be NULL.
*/
int
cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
cm_Analyze(cm_conn_t *connp,
cm_user_t *userp,
cm_req_t *reqp,
struct cm_fid *fidp,
afs_uint32 storeOp,
AFSVolSync *volSyncp,
cm_serverRef_t * serversp,
cm_callbackRequest_t *cbrp, long errorCode)
cm_callbackRequest_t *cbrp,
long errorCode)
{
cm_server_t *serverp = NULL;
cm_serverRef_t **serverspp = NULL;
@ -243,6 +275,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
cm_ucell_t *ucellp;
cm_volume_t * volp = NULL;
cm_vol_state_t *statep = NULL;
cm_scache_t * scp = NULL;
afs_uint32 replicated;
int retry = 0;
int free_svr_list = 0;
int dead_session;
@ -415,7 +449,7 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
retry = 1;
} else {
if (!serversp) {
code = cm_GetServerList(fidp, userp, reqp, &serverspp);
code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp);
if (code == 0) {
serversp = *serverspp;
free_svr_list = 1;
@ -473,7 +507,7 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
/* special codes: VBUSY and VRESTARTING */
else if (errorCode == VBUSY || errorCode == VRESTARTING) {
if (!serversp && fidp) {
code = cm_GetServerList(fidp, userp, reqp, &serverspp);
code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp);
if (code == 0) {
serversp = *serverspp;
free_svr_list = 1;
@ -624,7 +658,7 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
* from the server list if it was moved or is not present.
*/
if (!serversp || location_updated) {
code = cm_GetServerList(fidp, userp, reqp, &serverspp);
code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp);
if (code == 0) {
serversp = *serverspp;
free_svr_list = 1;
@ -675,8 +709,6 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
retry = 1;
} else if ( errorCode == VNOVNODE ) {
if ( fidp ) {
cm_scache_t * scp;
osi_Log4(afsd_logp, "cm_Analyze passed VNOVNODE cell %u vol %u vn %u uniq %u.",
fidp->cell, fidp->volume, fidp->vnode, fidp->unique);
@ -742,6 +774,24 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
reqp->idleError++;
}
if (fidp && storeOp)
scp = cm_FindSCache(fidp);
if (scp) {
if (cm_HaveCallback(scp)) {
lock_ObtainWrite(&scp->rw);
cm_DiscardSCache(scp);
lock_ReleaseWrite(&scp->rw);
/*
* We really should notify the redirector that we discarded
* the status information but doing so in this case is not
* safe as it can result in a deadlock with extent release
* processing.
*/
}
cm_ReleaseSCache(scp);
}
if (timeLeft > 2) {
if (!fidp) { /* vldb */
retry = 1;
@ -794,6 +844,132 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
osi_LogSaveString(afsd_logp,addr));
retry = 1;
}
else if (errorCode == RX_CALL_IDLE) {
/*
* RPC failed because the server failed to respond with data
* within the idle dead timeout period. This could be for a variety
* of reasons:
* 1. The server could have a bad partition such as a failed
* disk or iSCSI target and all I/O to that partition is
* blocking on the server and will never complete.
*
* 2. The server vnode may be locked by another client request
* that is taking a very long time.
*
* 3. The server may have a very long queue of requests
* pending and is unable to process this request.
*
* 4. The server could be malicious and is performing a denial
* of service attack against the client.
*
* If this is a request against a .readonly with alternate sites
* the server should be marked down for this request and the
* client should fail over to another server. If this is a
* request against a single source, the client may retry once.
*/
if (serverp)
sprintf(addr, "%d.%d.%d.%d",
((serverp->addr.sin_addr.s_addr & 0xff)),
((serverp->addr.sin_addr.s_addr & 0xff00)>> 8),
((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24));
if (fidp) {
code = cm_FindVolumeByID(cellp, fidp->volume, userp, reqp,
CM_GETVOL_FLAG_NO_LRU_UPDATE,
&volp);
if (code == 0) {
statep = cm_VolumeStateByID(volp, fidp->volume);
if (statep)
replicated = (statep->flags & CM_VOL_STATE_FLAG_REPLICATED);
lock_ObtainRead(&cm_volumeLock);
cm_PutVolume(volp);
lock_ReleaseRead(&cm_volumeLock);
volp = NULL;
}
if (storeOp)
scp = cm_FindSCache(fidp);
if (scp) {
if (cm_HaveCallback(scp)) {
lock_ObtainWrite(&scp->rw);
cm_DiscardSCache(scp);
lock_ReleaseWrite(&scp->rw);
/*
* We really should notify the redirector that we discarded
* the status information but doing so in this case is not
* safe as it can result in a deadlock with extent release
* processing.
*/
}
cm_ReleaseSCache(scp);
}
}
if (replicated && serverp) {
reqp->tokenIdleErrorServp = serverp;
reqp->tokenError = errorCode;
if (timeLeft > 2)
retry = 1;
}
LogEvent(EVENTLOG_WARNING_TYPE, MSG_RX_IDLE_DEAD_TIMEOUT, addr, retry);
osi_Log2(afsd_logp, "cm_Analyze: RPC failed due to idle dead timeout addr[%s] retry=%u",
osi_LogSaveString(afsd_logp,addr), retry);
}
else if (errorCode == RX_CALL_DEAD) {
/* mark server as down */
if (serverp)
sprintf(addr, "%d.%d.%d.%d",
((serverp->addr.sin_addr.s_addr & 0xff)),
((serverp->addr.sin_addr.s_addr & 0xff00)>> 8),
((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24));
osi_Log2(afsd_logp, "cm_Analyze: Rx Call Dead addr[%s] forcedNew[%s]",
osi_LogSaveString(afsd_logp,addr),
(reqp->flags & CM_REQ_NEW_CONN_FORCED ? "yes" : "no"));
if (serverp) {
if ((reqp->flags & CM_REQ_NEW_CONN_FORCED)) {
lock_ObtainMutex(&serverp->mx);
if (!(serverp->flags & CM_SERVERFLAG_DOWN)) {
_InterlockedOr(&serverp->flags, CM_SERVERFLAG_DOWN);
serverp->downTime = time(NULL);
}
lock_ReleaseMutex(&serverp->mx);
} else {
reqp->flags |= CM_REQ_NEW_CONN_FORCED;
forcing_new = 1;
cm_ForceNewConnections(serverp);
}
}
if (fidp && storeOp)
scp = cm_FindSCache(fidp);
if (scp) {
if (cm_HaveCallback(scp)) {
lock_ObtainWrite(&scp->rw);
cm_DiscardSCache(scp);
lock_ReleaseWrite(&scp->rw);
/*
* We really should notify the redirector that we discarded
* the status information but doing so in this case is not
* safe as it can result in a deadlock with extent release
* processing.
*/
}
cm_ReleaseSCache(scp);
}
if ( timeLeft > 2 )
retry = 1;
}
else if (errorCode >= -64 && errorCode < 0) {
/* mark server as down */
if (serverp)
@ -803,35 +979,20 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24));
if (errorCode == RX_CALL_DEAD)
osi_Log2(afsd_logp, "cm_Analyze: Rx Call Dead addr[%s] forcedNew[%s]",
osi_LogSaveString(afsd_logp,addr),
(reqp->flags & CM_REQ_NEW_CONN_FORCED ? "yes" : "no"));
else
osi_Log3(afsd_logp, "cm_Analyze: Rx Misc Error[%d] addr[%s] forcedNew[%s]",
errorCode,
osi_LogSaveString(afsd_logp,addr),
(reqp->flags & CM_REQ_NEW_CONN_FORCED ? "yes" : "no"));
osi_Log3(afsd_logp, "cm_Analyze: Rx Misc Error[%d] addr[%s] forcedNew[%s]",
errorCode,
osi_LogSaveString(afsd_logp,addr),
(reqp->flags & CM_REQ_NEW_CONN_FORCED ? "yes" : "no"));
if (serverp) {
lock_ObtainMutex(&serverp->mx);
if (errorCode == RX_CALL_DEAD &&
(reqp->flags & CM_REQ_NEW_CONN_FORCED)) {
if (!(serverp->flags & CM_SERVERFLAG_DOWN)) {
_InterlockedOr(&serverp->flags, CM_SERVERFLAG_DOWN);
serverp->downTime = time(NULL);
}
if (reqp->flags & CM_REQ_NEW_CONN_FORCED) {
reqp->tokenIdleErrorServp = serverp;
reqp->tokenError = errorCode;
} else {
if (reqp->flags & CM_REQ_NEW_CONN_FORCED) {
reqp->tokenIdleErrorServp = serverp;
reqp->tokenError = errorCode;
} else {
reqp->flags |= CM_REQ_NEW_CONN_FORCED;
forcing_new = 1;
}
reqp->flags |= CM_REQ_NEW_CONN_FORCED;
forcing_new = 1;
cm_ForceNewConnections(serverp);
}
lock_ReleaseMutex(&serverp->mx);
cm_ForceNewConnections(serverp);
}
if ( timeLeft > 2 )
retry = 1;
@ -1042,8 +1203,8 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
return retry;
}
long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
cm_req_t *reqp, cm_conn_t **connpp)
long cm_ConnByMServers(cm_serverRef_t *serversp, afs_uint32 replicated, cm_user_t *usersp,
cm_req_t *reqp, cm_conn_t **connpp)
{
long code;
cm_serverRef_t *tsrp;
@ -1099,7 +1260,7 @@ long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,
} else {
allOffline = 0;
allBusy = 0;
code = cm_ConnByServer(tsp, usersp, connpp);
code = cm_ConnByServer(tsp, usersp, replicated, connpp);
if (code == 0) { /* cm_CBS only returns 0 */
cm_PutServer(tsp);
#ifdef SET_RX_TIMEOUTS_TO_TIMELEFT
@ -1184,7 +1345,7 @@ void cm_GCConnections(cm_server_t *serverp)
}
static void cm_NewRXConnection(cm_conn_t *tcp, cm_ucell_t *ucellp,
cm_server_t *serverp)
cm_server_t *serverp, afs_uint32 replicated)
{
unsigned short port;
int serviceID;
@ -1240,7 +1401,12 @@ static void cm_NewRXConnection(cm_conn_t *tcp, cm_ucell_t *ucellp,
/*
* Setting idle dead timeout to a non-zero value activates RX_CALL_IDLE errors
*/
rx_SetConnIdleDeadTime(tcp->rxconnp, IdleDeadtimeout);
if (replicated) {
tcp->flags &= CM_CONN_FLAG_REPLICATION;
rx_SetConnIdleDeadTime(tcp->rxconnp, ReplicaIdleDeadtimeout);
} else {
rx_SetConnIdleDeadTime(tcp->rxconnp, IdleDeadtimeout);
}
/*
* Let the Rx library know that we can auto-retry if an
@ -1264,7 +1430,7 @@ static void cm_NewRXConnection(cm_conn_t *tcp, cm_ucell_t *ucellp,
rxs_Release(secObjp); /* Decrement the initial refCount */
}
long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, cm_conn_t **connpp)
long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, afs_uint32 replicated, cm_conn_t **connpp)
{
cm_conn_t *tcp;
cm_ucell_t *ucellp;
@ -1277,7 +1443,9 @@ long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, cm_conn_t **connpp)
lock_ObtainMutex(&userp->mx);
lock_ObtainRead(&cm_connLock);
for (tcp = serverp->connsp; tcp; tcp=tcp->nextp) {
if (tcp->userp == userp)
if (tcp->userp == userp &&
(replicated && (tcp->flags & CM_CONN_FLAG_REPLICATION) ||
!replicated && !(tcp->flags & CM_CONN_FLAG_REPLICATION)))
break;
}
@ -1304,7 +1472,7 @@ long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, cm_conn_t **connpp)
lock_ObtainMutex(&tcp->mx);
tcp->serverp = serverp;
tcp->cryptlevel = rxkad_clear;
cm_NewRXConnection(tcp, ucellp, serverp);
cm_NewRXConnection(tcp, ucellp, serverp, replicated);
tcp->refCount = 1;
lock_ReleaseMutex(&tcp->mx);
lock_ReleaseWrite(&cm_connLock);
@ -1325,7 +1493,7 @@ long cm_ConnByServer(cm_server_t *serverp, cm_user_t *userp, cm_conn_t **connpp)
tcp->flags &= ~CM_CONN_FLAG_FORCE_NEW;
rx_SetConnSecondsUntilNatPing(tcp->rxconnp, 0);
rx_DestroyConnection(tcp->rxconnp);
cm_NewRXConnection(tcp, ucellp, serverp);
cm_NewRXConnection(tcp, ucellp, serverp, replicated);
}
lock_ReleaseMutex(&tcp->mx);
}
@ -1346,10 +1514,11 @@ long cm_ServerAvailable(struct cm_fid *fidp, struct cm_user *userp)
cm_serverRef_t *tsrp;
cm_server_t *tsp;
int someBusy = 0, someOffline = 0, allOffline = 1, allBusy = 1, allDown = 1;
afs_uint32 replicated;
cm_InitReq(&req);
code = cm_GetServerList(fidp, userp, &req, &serverspp);
code = cm_GetServerList(fidp, userp, &req, &replicated, &serverspp);
if (code)
return 0;
@ -1394,15 +1563,15 @@ long cm_ConnFromFID(struct cm_fid *fidp, struct cm_user *userp, cm_req_t *reqp,
{
long code;
cm_serverRef_t **serverspp;
afs_uint32 replicated;
*connpp = NULL;
code = cm_GetServerList(fidp, userp, reqp, &serverspp);
if (code) {
code = cm_GetServerList(fidp, userp, reqp, &replicated, &serverspp);
if (code)
return code;
}
code = cm_ConnByMServers(*serverspp, userp, reqp, connpp);
code = cm_ConnByMServers(*serverspp, replicated, userp, reqp, connpp);
cm_FreeServerList(serverspp, 0);
return code;
}
@ -1413,12 +1582,16 @@ long cm_ConnFromVolume(struct cm_volume *volp, unsigned long volid, struct cm_us
{
long code;
cm_serverRef_t **serverspp;
afs_uint32 replicated;
cm_vol_state_t * volstatep;
*connpp = NULL;
volstatep = cm_VolumeStateByID(volp, volid);
replicated = (volstatep->flags & CM_VOL_STATE_FLAG_REPLICATED);
serverspp = cm_GetVolServers(volp, volid, userp, reqp);
code = cm_ConnByMServers(*serverspp, userp, reqp, connpp);
code = cm_ConnByMServers(*serverspp, replicated, userp, reqp, connpp);
cm_FreeServerList(serverspp, 0);
return code;
}

View File

@ -20,6 +20,9 @@
#ifndef CM_CONN_IDLEDEADTIME
#define CM_CONN_IDLEDEADTIME 0
#endif
#ifndef CM_CONN_IDLEDEADTIME_REP
#define CM_CONN_IDLEDEADTIME_REP 0
#endif
#ifndef CM_CONN_NATPINGINTERVAL
#define CM_CONN_NATPINGINTERVAL 0
#endif
@ -27,6 +30,7 @@
#define CM_CONN_IFS_HARDDEADTIME 120
#define CM_CONN_IFS_CONNDEADTIME 60
#define CM_CONN_IFS_IDLEDEADTIME 1200
#define CM_CONN_IFS_IDLEDEADTIME_REP 180 /* must be larger than file server hard dead timeout = 120 */
extern unsigned short ConnDeadtimeout;
extern unsigned short HardDeadtimeout;
@ -45,7 +49,8 @@ typedef struct cm_conn {
int cryptlevel; /* encrytion status */
} cm_conn_t;
#define CM_CONN_FLAG_FORCE_NEW 1
#define CM_CONN_FLAG_FORCE_NEW 1
#define CM_CONN_FLAG_REPLICATION 2
/*
* structure used for tracking RPC progress
@ -132,15 +137,16 @@ extern void cm_InitConn(void);
extern void cm_InitReq(cm_req_t *reqp);
extern int cm_Analyze(cm_conn_t *connp, struct cm_user *up, struct cm_req *reqp,
struct cm_fid *fidp,
struct AFSVolSync *volInfop,
cm_serverRef_t * serversp,
struct cm_callbackRequest *cbrp, long code);
struct cm_fid *fidp,
afs_uint32 storeOp,
struct AFSVolSync *volInfop,
cm_serverRef_t * serversp,
struct cm_callbackRequest *cbrp, long code);
extern long cm_ConnByMServers(struct cm_serverRef *, struct cm_user *,
extern long cm_ConnByMServers(struct cm_serverRef *, afs_uint32, struct cm_user *,
cm_req_t *, cm_conn_t **);
extern long cm_ConnByServer(struct cm_server *, struct cm_user *, cm_conn_t **);
extern long cm_ConnByServer(struct cm_server *, struct cm_user *, afs_uint32, cm_conn_t **);
extern long cm_ConnFromFID(struct cm_fid *, struct cm_user *, struct cm_req *,
cm_conn_t **);

View File

@ -371,7 +371,7 @@ long cm_BufWrite(void *vscp, osi_hyper_t *offsetp, long length, long flags,
osi_Log2(afsd_logp, "rx_EndCall converted 0x%x to 0x%x", code, code1);
code = code1;
}
} while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code));
} while (cm_Analyze(connp, userp, reqp, &scp->fid, 1, &volSync, NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
@ -551,7 +551,7 @@ long cm_StoreMini(cm_scache_t *scp, cm_user_t *userp, cm_req_t *reqp)
/* prefer StoreData error over rx_EndCall error */
if (code == 0 && code1 != 0)
code = code1;
} while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code));
} while (cm_Analyze(connp, userp, reqp, &scp->fid, 1, &volSync, NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
/* now, clean up our state */
@ -2117,7 +2117,7 @@ long cm_GetBuffer(cm_scache_t *scp, cm_buf_t *bufp, int *cpffp, cm_user_t *userp
code = code1;
osi_Log0(afsd_logp, "CALL FetchData DONE");
} while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code));
} while (cm_Analyze(connp, userp, reqp, &scp->fid, 0, &volSync, NULL, NULL, code));
fetchingcompleted:
code = cm_MapRPCError(code, reqp);
@ -2494,7 +2494,7 @@ long cm_GetData(cm_scache_t *scp, osi_hyper_t *offsetp, char *datap, int data_le
code = code1;
osi_Log0(afsd_logp, "CALL FetchData DONE");
} while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code));
} while (cm_Analyze(connp, userp, reqp, &scp->fid, 0, &volSync, NULL, NULL, code));
fetchingcompleted:
code = cm_MapRPCError(code, reqp);

View File

@ -439,7 +439,7 @@ cm_IoctlGetACL(cm_ioctl_t *ioctlp, cm_user_t *userp, cm_scache_t *scp, cm_req_t
code = RXAFS_FetchACL(rxconnp, &afid, &acl, &fileStatus, &volSync);
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code));
} while (cm_Analyze(connp, userp, reqp, &scp->fid, 0, &volSync, NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
if (code)
@ -537,7 +537,7 @@ cm_IoctlSetACL(struct cm_ioctl *ioctlp, struct cm_user *userp, cm_scache_t *scp,
code = RXAFS_StoreACL(rxconnp, &fid, &acl, &fileStatus, &volSync);
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp, &scp->fid, &volSync, NULL, NULL, code));
} while (cm_Analyze(connp, userp, reqp, &scp->fid, 1, &volSync, NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
/* invalidate cache info, since we just trashed the ACL cache */
@ -710,7 +710,7 @@ cm_IoctlSetVolumeStatus(struct cm_ioctl *ioctlp, struct cm_user *userp, cm_scach
&storeStat, volName, offLineMsg, motd);
rx_PutConnection(rxconnp);
} while (cm_Analyze(tcp, userp, reqp, &scp->fid, NULL, NULL, NULL, code));
} while (cm_Analyze(tcp, userp, reqp, &scp->fid, 1, NULL, NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
}
@ -786,7 +786,7 @@ cm_IoctlGetVolumeStatus(struct cm_ioctl *ioctlp, struct cm_user *userp, cm_scach
&volStat, &Name, &OfflineMsg, &MOTD);
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp, &scp->fid, NULL, NULL, NULL, code));
} while (cm_Analyze(connp, userp, reqp, &scp->fid, 0, NULL, NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
}

View File

@ -142,7 +142,7 @@ cm_PingServer(cm_server_t *tsp)
afs_inet_ntoa_r(tsp->addr.sin_addr.S_un.S_addr, hoststr);
lock_ReleaseMutex(&tsp->mx);
code = cm_ConnByServer(tsp, cm_rootUserp, &connp);
code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &connp);
if (code == 0) {
/* now call the appropriate ping call. Drop the timeout if
* the server is known to be down, so that we don't waste a
@ -418,7 +418,7 @@ static void cm_CheckServersMulti(afs_uint32 flags, cm_cell_t *cellp)
lock_ReleaseMutex(&tsp->mx);
serversp[nconns] = tsp;
code = cm_ConnByServer(tsp, cm_rootUserp, &conns[nconns]);
code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &conns[nconns]);
if (code) {
lock_ObtainRead(&cm_serverLock);
cm_PutServerNoLock(tsp);
@ -584,7 +584,7 @@ static void cm_CheckServersMulti(afs_uint32 flags, cm_cell_t *cellp)
lock_ReleaseMutex(&tsp->mx);
serversp[nconns] = tsp;
code = cm_ConnByServer(tsp, cm_rootUserp, &conns[nconns]);
code = cm_ConnByServer(tsp, cm_rootUserp, FALSE, &conns[nconns]);
if (code) {
lock_ObtainRead(&cm_serverLock);
cm_PutServerNoLock(tsp);

View File

@ -214,9 +214,10 @@ long cm_MapRPCError(long error, cm_req_t *reqp)
if (error == RX_CALL_DEAD ||
error == RX_CALL_TIMEOUT ||
error == RX_CALL_BUSY ||
error == RX_CALL_IDLE ||
error == RX_MSGSIZE)
error = CM_ERROR_RETRY;
else if (error == RX_CALL_IDLE)
error = EIO;
else if (error < 0)
error = CM_ERROR_UNKNOWN;
else if (error == EINVAL)

View File

@ -1679,7 +1679,7 @@ long cm_Unlink(cm_scache_t *dscp, fschar_t *fnamep, clientchar_t * cnamep,
&newDirStatus, &volSync);
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp, &dscp->fid, &volSync, NULL, NULL, code));
} while (cm_Analyze(connp, userp, reqp, &dscp->fid, 1, &volSync, NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
if (code)
@ -2484,7 +2484,7 @@ cm_TryBulkStatRPC(cm_scache_t *dscp, cm_bulkStat_t *bbp, cm_user_t *userp, cm_re
code = (&bbp->stats[0])->errorCode;
}
}
} while (cm_Analyze(connp, userp, reqp, &tfid, &volSync, NULL, &cbReq, code));
} while (cm_Analyze(connp, userp, reqp, &tfid, 0, &volSync, NULL, &cbReq, code));
code = cm_MapRPCError(code, reqp);
/*
@ -2515,7 +2515,7 @@ cm_TryBulkStatRPC(cm_scache_t *dscp, cm_bulkStat_t *bbp, cm_user_t *userp, cm_re
if (inlinebulk && (&bbp->stats[j])->errorCode) {
cm_req_t treq = *reqp;
cm_Analyze(NULL, userp, &treq, &tfid, &volSync, NULL, &cbReq, (&bbp->stats[j])->errorCode);
cm_Analyze(NULL, userp, &treq, &tfid, 0, &volSync, NULL, &cbReq, (&bbp->stats[j])->errorCode);
} else {
code = cm_GetSCache(&tfid, &scp, userp, reqp);
if (code != 0)
@ -2805,7 +2805,7 @@ long cm_SetAttr(cm_scache_t *scp, cm_attr_t *attrp, cm_user_t *userp,
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp,
&scp->fid, &volSync, NULL, NULL, code));
&scp->fid, 1, &volSync, NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
if (code)
@ -2917,7 +2917,7 @@ long cm_Create(cm_scache_t *dscp, clientchar_t *cnamep, long flags, cm_attr_t *a
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp,
&dscp->fid, &volSync, NULL, &cbReq, code));
&dscp->fid, 1, &volSync, NULL, &cbReq, code));
code = cm_MapRPCError(code, reqp);
if (code)
@ -3109,7 +3109,7 @@ long cm_MakeDir(cm_scache_t *dscp, clientchar_t *cnamep, long flags, cm_attr_t *
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp,
&dscp->fid, &volSync, NULL, &cbReq, code));
&dscp->fid, 1, &volSync, NULL, &cbReq, code));
code = cm_MapRPCError(code, reqp);
if (code)
@ -3240,8 +3240,7 @@ long cm_Link(cm_scache_t *dscp, clientchar_t *cnamep, cm_scache_t *sscp, long fl
rx_PutConnection(rxconnp);
osi_Log1(afsd_logp," RXAFS_Link returns 0x%x", code);
} while (cm_Analyze(connp, userp, reqp,
&dscp->fid, &volSync, NULL, NULL, code));
} while (cm_Analyze(connp, userp, reqp, &dscp->fid, 1, &volSync, NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
@ -3357,7 +3356,7 @@ long cm_SymLink(cm_scache_t *dscp, clientchar_t *cnamep, fschar_t *contentsp, lo
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp,
&dscp->fid, &volSync, NULL, NULL, code));
&dscp->fid, 1, &volSync, NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
if (code)
@ -3514,7 +3513,7 @@ long cm_RemoveDir(cm_scache_t *dscp, fschar_t *fnamep, clientchar_t *cnamep, cm_
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp,
&dscp->fid, &volSync, NULL, NULL, code));
&dscp->fid, 1, &volSync, NULL, NULL, code));
code = cm_MapRPCErrorRmdir(code, reqp);
if (code)
@ -3860,7 +3859,7 @@ long cm_Rename(cm_scache_t *oldDscp, fschar_t *oldNamep, clientchar_t *cOldNamep
&volSync);
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp, &oldDscp->fid,
} while (cm_Analyze(connp, userp, reqp, &oldDscp->fid, 1,
&volSync, NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
@ -4556,7 +4555,7 @@ long cm_IntSetLock(cm_scache_t * scp, cm_user_t * userp, int lockType,
&volSync);
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp, &cfid, &volSync,
} while (cm_Analyze(connp, userp, reqp, &cfid, 1, &volSync,
NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
@ -4618,7 +4617,7 @@ long cm_IntReleaseLock(cm_scache_t * scp, cm_user_t * userp,
code = RXAFS_ReleaseLock(rxconnp, &tfid, &volSync);
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp, &cfid, &volSync,
} while (cm_Analyze(connp, userp, reqp, &cfid, 1, &volSync,
NULL, NULL, code));
code = cm_MapRPCError(code, reqp);
if (code)
@ -5612,7 +5611,7 @@ void cm_CheckLocks()
osi_Log1(afsd_logp, " ExtendLock returns %d", code);
} while (cm_Analyze(connp, userp, &req,
&cfid, &volSync, NULL, NULL,
&cfid, 1, &volSync, NULL, NULL,
code));
code = cm_MapRPCError(code, &req);

View File

@ -184,7 +184,7 @@ cm_GetEntryByName( struct cm_cell *cellp, const char *name,
osi_LogSaveString(afsd_logp,name));
do {
code = cm_ConnByMServers(cellp->vlServersp, userp, reqp, &connp);
code = cm_ConnByMServers(cellp->vlServersp, FALSE, userp, reqp, &connp);
if (code)
continue;
@ -201,7 +201,7 @@ cm_GetEntryByName( struct cm_cell *cellp, const char *name,
*methodp = 0;
}
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp, NULL, NULL, cellp->vlServersp, NULL, code));
} while (cm_Analyze(connp, userp, reqp, NULL, 0, NULL, cellp->vlServersp, NULL, code));
code = cm_MapVLRPCError(code, reqp);
if ( code )
osi_Log3(afsd_logp, "CALL VL_GetEntryByName{UNO} name %s:%s FAILURE, code 0x%x",
@ -257,6 +257,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
#endif
afs_uint32 volType;
time_t now;
int replicated = 0;
lock_AssertWrite(&volp->rw);
@ -388,6 +389,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
case 0:
flags = vldbEntry.flags;
nServers = vldbEntry.nServers;
replicated = (nServers > 0);
rwID = vldbEntry.volumeId[0];
roID = vldbEntry.volumeId[1];
bkID = vldbEntry.volumeId[2];
@ -401,6 +403,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
case 1:
flags = nvldbEntry.flags;
nServers = nvldbEntry.nServers;
replicated = (nServers > 0);
rwID = nvldbEntry.volumeId[0];
roID = nvldbEntry.volumeId[1];
bkID = nvldbEntry.volumeId[2];
@ -414,6 +417,7 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
case 2:
flags = uvldbEntry.flags;
nServers = uvldbEntry.nServers;
replicated = (nServers > 0);
rwID = uvldbEntry.volumeId[0];
roID = uvldbEntry.volumeId[1];
bkID = uvldbEntry.volumeId[2];
@ -435,14 +439,14 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
memset(&addrs, 0, sizeof(addrs));
do {
code = cm_ConnByMServers(cellp->vlServersp, userp, reqp, &connp);
code = cm_ConnByMServers(cellp->vlServersp, FALSE, userp, reqp, &connp);
if (code)
continue;
rxconnp = cm_GetRxConn(connp);
code = VL_GetAddrsU(rxconnp, &attrs, &uuid, &unique, &nentries, &addrs);
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, reqp, NULL, NULL, cellp->vlServersp, NULL, code));
} while (cm_Analyze(connp, userp, reqp, NULL, 0, NULL, cellp->vlServersp, NULL, code));
if ( code ) {
code = cm_MapVLRPCError(code, reqp);
@ -520,6 +524,10 @@ long cm_UpdateVolumeLocation(struct cm_cell *cellp, cm_user_t *userp, cm_req_t *
volp->vol[ROVOL].ID = roID;
cm_AddVolumeToIDHashTable(volp, ROVOL);
}
if (replicated)
_InterlockedOr(&volp->vol[ROVOL].flags, CM_VOL_STATE_FLAG_REPLICATED);
else
_InterlockedAnd(&volp->vol[ROVOL].flags, ~CM_VOL_STATE_FLAG_REPLICATED);
} else {
if (volp->vol[ROVOL].qflags & CM_VOLUME_QFLAG_IN_HASH)
cm_RemoveVolumeFromIDHashTable(volp, ROVOL);
@ -1286,7 +1294,7 @@ cm_CheckOfflineVolumeState(cm_volume_t *volp, cm_vol_state_t *statep, afs_uint32
code = RXAFS_GetVolumeStatus(rxconnp, statep->ID,
&volStat, &Name, &OfflineMsg, &MOTD);
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, cm_rootUserp, &req, &fid, NULL, NULL, NULL, code));
} while (cm_Analyze(connp, cm_rootUserp, &req, &fid, 0, NULL, NULL, NULL, code));
code = cm_MapRPCError(code, &req);
lock_ObtainWrite(&volp->rw);

View File

@ -33,6 +33,8 @@ typedef struct cm_vol_state {
/* RWVOL, ROVOL, BACKVOL are defined in cm.h */
#define NUM_VOL_TYPES 3
#define CM_VOL_STATE_FLAG_REPLICATED 1
typedef struct cm_volume {
osi_queue_t q; /* LRU queue; cm_volumeLock */
afs_uint32 qflags; /* by cm_volumeLock */

View File

@ -5092,7 +5092,7 @@ RDR_GetVolumeInfo( IN cm_user_t *userp,
&volStat, &Name, &OfflineMsg, &MOTD);
rx_PutConnection(rxconnp);
} while (cm_Analyze(connp, userp, &req, &scp->fid, NULL, NULL, NULL, code));
} while (cm_Analyze(connp, userp, &req, &scp->fid, 0, NULL, NULL, NULL, code));
code = cm_MapRPCError(code, &req);
if (code == 0) {
pResultCB->TotalAllocationUnits.QuadPart = volStat.PartMaxBlocks;