From 5f7df409a5a8621db153f7c34049605372ec4bc7 Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Tue, 12 Jun 2007 16:38:43 +0000 Subject: [PATCH] DEVEL15-windows-volume-status-tracking-20070612 * re-write cm_Analyze to make better use of the known volume status. VL_Server queries cannot result in CM_ERROR_ALLOFFLINE messages. * renamed cm_CheckBusyVolumes to cm_CheckOfflineVolumes. busy volumes will be reset to srv_non_busy by the function but there is no mechanism for querying the busy state other than by attempting to access the resource. * cm_Analyze will query the state of an offline volume before deciding whether or not to retry when all volume instances are offline. (cherry picked from commit 86b3330c7148c25c0a7ee4ea8f15e3098695a667) --- src/WINNT/afsd/cm_conn.c | 160 +++++++++++++++++++-------------- src/WINNT/afsd/cm_daemon.c | 2 +- src/WINNT/afsd/cm_volume.c | 180 ++++++++++++++++++++++--------------- src/WINNT/afsd/cm_volume.h | 8 +- 4 files changed, 206 insertions(+), 144 deletions(-) diff --git a/src/WINNT/afsd/cm_conn.c b/src/WINNT/afsd/cm_conn.c index 4c6ca6b64e..37b93d690c 100644 --- a/src/WINNT/afsd/cm_conn.c +++ b/src/WINNT/afsd/cm_conn.c @@ -212,18 +212,22 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, /* leave 5 seconds margin for sleep */ timeLeft = HardDeadtimeout - timeUsed; + /* get a pointer to the cell */ + if (errorCode) { + if (cellp == NULL && serverp) + cellp = serverp->cellp; + if (cellp == NULL && serversp) { + struct cm_serverRef * refp; + for ( refp=serversp ; cellp == NULL && refp != NULL; refp=refp->next) { + if ( refp->server ) + cellp = refp->server->cellp; + } + } + } + if (errorCode == CM_ERROR_TIMEDOUT) { if (timeLeft > 5 ) { thrd_Sleep(3000); - if (cellp == NULL && serverp) - cellp = serverp->cellp; - if (cellp == NULL && serversp) { - struct cm_serverRef * refp; - for ( refp=serversp ; cellp == NULL && refp != NULL; refp=refp->next) { - if ( refp->server ) - cellp = refp->server->cellp; - } - } cm_CheckServers(CM_FLAG_CHECKDOWNSERVERS, cellp); retry = 1; } @@ -259,80 +263,98 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, /* Volume instances marked offline will be restored by the * background daemon thread as they become available */ -#if 0 - if (timeLeft > 7) { + if (timeLeft > 7 && fidp) { + cm_volume_t *volp; + cm_vol_state_t *statep; + thrd_Sleep(5000); - if (fidp) { /* Not a VLDB call */ - if (!serversp) { - code = cm_GetServerList(fidp, userp, reqp, &serverspp); - if (code == 0) { - serversp = *serverspp; - free_svr_list = 1; - } - } - if (serversp) { - lock_ObtainWrite(&cm_serverLock); - for (tsrp = serversp; tsrp; tsrp=tsrp->next) { - /* REDIRECT */ - tsrp->status = srv_not_busy; - } - lock_ReleaseWrite(&cm_serverLock); - if (free_svr_list) { - cm_FreeServerList(&serversp, 0); - *serverspp = serversp; - } - retry = 1; - } + code = cm_GetVolumeByID(cellp, fidp->volume, userp, reqp, + CM_GETVOL_FLAG_NO_LRU_UPDATE, + &volp); + if (code == 0) { + if (fidp->volume == volp->rw.ID) + statep = &volp->rw; + else if (fidp->volume == volp->ro.ID) + statep = &volp->ro; + else if (fidp->volume == volp->bk.ID) + statep = &volp->bk; - cm_ForceUpdateVolume(fidp, userp, reqp); - } else { /* VLDB call */ - if (serversp) { - lock_ObtainWrite(&cm_serverLock); - for (tsrp = serversp; tsrp; tsrp=tsrp->next) { - /* REDIRECT */ - tsrp->status = srv_not_busy; - } - lock_ReleaseWrite(&cm_serverLock); - if (free_svr_list) { - cm_FreeServerList(&serversp, 0); - *serverspp = serversp; - } - } - } + if (statep->state != vl_offline) { + retry = 1; + } else { + if (cm_CheckOfflineVolume(volp, statep->ID)) + retry = 1; + } + + cm_PutVolume(volp); + } } -#endif } else if (errorCode == CM_ERROR_ALLBUSY) { - /* Volume instances marked busy will be restored by the - * background daemon thread as they become available. + /* Volumes that are busy cannot be determined to be non-busy + * without actually attempting to access them. */ osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY."); -#if 0 if (timeLeft > 7) { + cm_volume_t * volp = NULL; + cm_vol_state_t *statep; + thrd_Sleep(5000); - if (!serversp) { - code = cm_GetServerList(fidp, userp, reqp, &serverspp); + + if (fidp) { /* File Server query */ + code = cm_GetVolumeByID(cellp, fidp->volume, userp, reqp, + CM_GETVOL_FLAG_NO_LRU_UPDATE, + &volp); if (code == 0) { - serversp = *serverspp; - free_svr_list = 1; + if (fidp->volume == volp->rw.ID) + statep = &volp->rw; + else if (fidp->volume == volp->ro.ID) + statep = &volp->ro; + else if (fidp->volume == volp->bk.ID) + statep = &volp->bk; + + if (statep->state != vl_offline && statep->state != vl_busy) { + retry = 1; + } else { + if (!serversp) { + code = cm_GetServerList(fidp, userp, reqp, &serverspp); + if (code == 0) { + serversp = *serverspp; + free_svr_list = 1; + } + } + lock_ObtainWrite(&cm_serverLock); + for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_busy) { + tsrp->status = srv_not_busy; + } + } + lock_ReleaseWrite(&cm_serverLock); + if (free_svr_list) { + cm_FreeServerList(&serversp, 0); + *serverspp = serversp; + } + + cm_UpdateVolumeStatus(volp, statep->ID); + retry = 1; + } + + cm_PutVolume(volp); + } + } else { /* VL Server query */ + if (serversp) { + lock_ObtainWrite(&cm_serverLock); + for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->status == srv_busy) { + tsrp->status = srv_not_busy; + } + } + lock_ReleaseWrite(&cm_serverLock); + retry = 1; } } - lock_ObtainWrite(&cm_serverLock); - for (tsrp = serversp; tsrp; tsrp=tsrp->next) { - if (tsrp->status == srv_busy) { - /* REDIRECT */ - tsrp->status = srv_not_busy; - } - } - lock_ReleaseWrite(&cm_serverLock); - if (free_svr_list) { - cm_FreeServerList(&serversp, 0); - *serverspp = serversp; - } - retry = 1; } -#endif } /* special codes: VBUSY and VRESTARTING */ diff --git a/src/WINNT/afsd/cm_daemon.c b/src/WINNT/afsd/cm_daemon.c index 0e6f97495d..759db2ffdc 100644 --- a/src/WINNT/afsd/cm_daemon.c +++ b/src/WINNT/afsd/cm_daemon.c @@ -395,7 +395,7 @@ void cm_Daemon(long parm) if (now > lastBusyVolCheck + cm_daemonCheckBusyVolInterval) { lastVolCheck = now; - cm_CheckBusyVolumes(); + cm_CheckOfflineVolumes(); now = osi_Time(); } diff --git a/src/WINNT/afsd/cm_volume.c b/src/WINNT/afsd/cm_volume.c index 4e2aaf492b..4d93529364 100644 --- a/src/WINNT/afsd/cm_volume.c +++ b/src/WINNT/afsd/cm_volume.c @@ -971,12 +971,15 @@ void cm_RefreshVolumes(void) } -/* called from the Daemon thread */ -void cm_CheckBusyVolumes(void) + +/* The return code is 0 if the volume is not online and + * 1 if the volume is online + */ +long +cm_CheckOfflineVolume(cm_volume_t *volp, afs_uint32 volID) { - cm_volume_t *volp; cm_conn_t *connp; - register long code; + long code; AFSFetchVolumeStatus volStat; char *Name; char *OfflineMsg; @@ -986,84 +989,119 @@ void cm_CheckBusyVolumes(void) char volName[32]; char offLineMsg[256]; char motd[256]; + long online = 0; + cm_serverRef_t *serversp; Name = volName; OfflineMsg = offLineMsg; MOTD = motd; + lock_ObtainMutex(&volp->mx); + + if (volp->rw.ID != 0 && (!volID || volID == volp->rw.ID) && + (volp->rw.state == vl_busy || volp->rw.state == vl_offline)) { + cm_InitReq(&req); + + for (serversp = volp->rw.serversp; serversp; serversp = serversp->next) { + if (serversp->status == srv_busy || serversp->status == srv_offline) + serversp->status = srv_not_busy; + } + + do { + code = cm_ConnFromVolume(volp, volp->rw.ID, cm_rootUserp, &req, &connp); + if (code) + continue; + + callp = cm_GetRxConn(connp); + code = RXAFS_GetVolumeStatus(callp, volp->rw.ID, + &volStat, &Name, &OfflineMsg, &MOTD); + rx_PutConnection(callp); + + } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); + code = cm_MapRPCError(code, &req); + + if (code == 0 && volStat.Online) { + cm_VolumeStatusNotification(volp, volp->rw.ID, volp->rw.state, vl_online); + volp->rw.state = vl_online; + online = 1; + } + } + + if (volp->ro.ID != 0 && (!volID || volID == volp->ro.ID) && + (volp->ro.state == vl_busy || volp->ro.state == vl_offline)) { + cm_InitReq(&req); + + for (serversp = volp->ro.serversp; serversp; serversp = serversp->next) { + if (serversp->status == srv_busy || serversp->status == srv_offline) + serversp->status = srv_not_busy; + } + + do { + code = cm_ConnFromVolume(volp, volp->ro.ID, cm_rootUserp, &req, &connp); + if (code) + continue; + + callp = cm_GetRxConn(connp); + code = RXAFS_GetVolumeStatus(callp, volp->ro.ID, + &volStat, &Name, &OfflineMsg, &MOTD); + rx_PutConnection(callp); + + } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); + code = cm_MapRPCError(code, &req); + + if (code == 0 && volStat.Online) { + cm_VolumeStatusNotification(volp, volp->ro.ID, volp->ro.state, vl_online); + volp->ro.state = vl_online; + online = 1; + } + } + + if (volp->bk.ID != 0 && (!volID || volID == volp->bk.ID) && + (volp->bk.state == vl_busy || volp->bk.state == vl_offline)) { + cm_InitReq(&req); + + for (serversp = volp->bk.serversp; serversp; serversp = serversp->next) { + if (serversp->status == srv_busy || serversp->status == srv_offline) + serversp->status = srv_not_busy; + } + + do { + code = cm_ConnFromVolume(volp, volp->bk.ID, cm_rootUserp, &req, &connp); + if (code) + continue; + + callp = cm_GetRxConn(connp); + code = RXAFS_GetVolumeStatus(callp, volp->bk.ID, + &volStat, &Name, &OfflineMsg, &MOTD); + rx_PutConnection(callp); + + } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); + code = cm_MapRPCError(code, &req); + + if (code == 0 && volStat.Online) { + cm_VolumeStatusNotification(volp, volp->bk.ID, volp->bk.state, vl_online); + volp->bk.state = vl_online; + online = 1; + } + } + + lock_ReleaseMutex(&volp->mx); + return online; +} + + +/* called from the Daemon thread */ +void cm_CheckOfflineVolumes(void) +{ + cm_volume_t *volp; + lock_ObtainWrite(&cm_volumeLock); for (volp = cm_data.allVolumesp; volp; volp=volp->allNextp) { volp->refCount++; lock_ReleaseWrite(&cm_volumeLock); - lock_ObtainMutex(&volp->mx); - if (volp->rw.ID != 0 && (volp->rw.state == vl_busy || volp->rw.state == vl_offline)) { - cm_InitReq(&req); + cm_CheckOfflineVolume(volp, 0); - do { - code = cm_ConnFromVolume(volp, volp->rw.ID, cm_rootUserp, &req, &connp); - if (code) - continue; - - callp = cm_GetRxConn(connp); - code = RXAFS_GetVolumeStatus(callp, volp->rw.ID, - &volStat, &Name, &OfflineMsg, &MOTD); - rx_PutConnection(callp); - - } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); - code = cm_MapRPCError(code, &req); - - if (code == 0 && volStat.Online) { - cm_VolumeStatusNotification(volp, volp->rw.ID, volp->rw.state, vl_online); - volp->rw.state = vl_online; - } - } - - if (volp->ro.ID != 0 && (volp->ro.state == vl_busy || volp->ro.state == vl_offline)) { - cm_InitReq(&req); - - do { - code = cm_ConnFromVolume(volp, volp->ro.ID, cm_rootUserp, &req, &connp); - if (code) - continue; - - callp = cm_GetRxConn(connp); - code = RXAFS_GetVolumeStatus(callp, volp->ro.ID, - &volStat, &Name, &OfflineMsg, &MOTD); - rx_PutConnection(callp); - - } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); - code = cm_MapRPCError(code, &req); - - if (code == 0 && volStat.Online) { - cm_VolumeStatusNotification(volp, volp->ro.ID, volp->ro.state, vl_online); - volp->ro.state = vl_online; - } - } - - if (volp->bk.ID != 0 && (volp->bk.state == vl_busy || volp->bk.state == vl_offline)) { - cm_InitReq(&req); - - do { - code = cm_ConnFromVolume(volp, volp->bk.ID, cm_rootUserp, &req, &connp); - if (code) - continue; - - callp = cm_GetRxConn(connp); - code = RXAFS_GetVolumeStatus(callp, volp->bk.ID, - &volStat, &Name, &OfflineMsg, &MOTD); - rx_PutConnection(callp); - - } while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code)); - code = cm_MapRPCError(code, &req); - - if (code == 0 && volStat.Online) { - cm_VolumeStatusNotification(volp, volp->bk.ID, volp->bk.state, vl_online); - volp->bk.state = vl_online; - } - } - - lock_ReleaseMutex(&volp->mx); lock_ObtainWrite(&cm_volumeLock); osi_assert(volp->refCount-- > 0); } diff --git a/src/WINNT/afsd/cm_volume.h b/src/WINNT/afsd/cm_volume.h index 1f89ddeab3..791fb267b1 100644 --- a/src/WINNT/afsd/cm_volume.h +++ b/src/WINNT/afsd/cm_volume.h @@ -16,13 +16,13 @@ enum volstatus {vl_online, vl_busy, vl_offline, vl_alldown, vl_unknown}; -struct cm_vol_state { +typedef struct cm_vol_state { afs_uint32 ID; /* by mx */ struct cm_volume *nextp; /* volumeIDHashTable; by cm_volumeLock */ cm_serverRef_t *serversp; /* by mx */ enum volstatus state; /* by mx */ afs_uint32 flags; /* by mx */ -}; +} cm_vol_state_t; typedef struct cm_volume { osi_queue_t q; /* LRU queue; cm_volumeLock */ @@ -110,7 +110,9 @@ extern void cm_AdjustVolumeLRU(cm_volume_t *volp); extern void cm_RemoveVolumeFromLRU(cm_volume_t *volp); -extern void cm_CheckBusyVolumes(void); +extern void cm_CheckOfflineVolumes(void); + +extern long cm_CheckOfflineVolume(cm_volume_t *volp, afs_uint32 volID); extern void cm_UpdateVolumeStatus(cm_volume_t *volp, afs_uint32 volID);