DEVEL15-windows-volume-status-tracking-20070612

* re-write cm_Analyze to make better use of the known volume
  status.  VL_Server queries cannot result in CM_ERROR_ALLOFFLINE
  messages.

* renamed cm_CheckBusyVolumes to cm_CheckOfflineVolumes.
  busy volumes will be reset to srv_non_busy by the function
  but there is no mechanism for querying the busy state other
  than by attempting to access the resource.

* cm_Analyze will query the state of an offline volume before
  deciding whether or not to retry when all volume instances
  are offline.


(cherry picked from commit 86b3330c71)
This commit is contained in:
Jeffrey Altman 2007-06-12 16:38:43 +00:00
parent db8553387d
commit 5f7df409a5
4 changed files with 206 additions and 144 deletions

View File

@ -212,18 +212,22 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
/* leave 5 seconds margin for sleep */
timeLeft = HardDeadtimeout - timeUsed;
/* get a pointer to the cell */
if (errorCode) {
if (cellp == NULL && serverp)
cellp = serverp->cellp;
if (cellp == NULL && serversp) {
struct cm_serverRef * refp;
for ( refp=serversp ; cellp == NULL && refp != NULL; refp=refp->next) {
if ( refp->server )
cellp = refp->server->cellp;
}
}
}
if (errorCode == CM_ERROR_TIMEDOUT) {
if (timeLeft > 5 ) {
thrd_Sleep(3000);
if (cellp == NULL && serverp)
cellp = serverp->cellp;
if (cellp == NULL && serversp) {
struct cm_serverRef * refp;
for ( refp=serversp ; cellp == NULL && refp != NULL; refp=refp->next) {
if ( refp->server )
cellp = refp->server->cellp;
}
}
cm_CheckServers(CM_FLAG_CHECKDOWNSERVERS, cellp);
retry = 1;
}
@ -259,80 +263,98 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
/* Volume instances marked offline will be restored by the
* background daemon thread as they become available
*/
#if 0
if (timeLeft > 7) {
if (timeLeft > 7 && fidp) {
cm_volume_t *volp;
cm_vol_state_t *statep;
thrd_Sleep(5000);
if (fidp) { /* Not a VLDB call */
if (!serversp) {
code = cm_GetServerList(fidp, userp, reqp, &serverspp);
if (code == 0) {
serversp = *serverspp;
free_svr_list = 1;
}
}
if (serversp) {
lock_ObtainWrite(&cm_serverLock);
for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
/* REDIRECT */
tsrp->status = srv_not_busy;
}
lock_ReleaseWrite(&cm_serverLock);
if (free_svr_list) {
cm_FreeServerList(&serversp, 0);
*serverspp = serversp;
}
retry = 1;
}
code = cm_GetVolumeByID(cellp, fidp->volume, userp, reqp,
CM_GETVOL_FLAG_NO_LRU_UPDATE,
&volp);
if (code == 0) {
if (fidp->volume == volp->rw.ID)
statep = &volp->rw;
else if (fidp->volume == volp->ro.ID)
statep = &volp->ro;
else if (fidp->volume == volp->bk.ID)
statep = &volp->bk;
cm_ForceUpdateVolume(fidp, userp, reqp);
} else { /* VLDB call */
if (serversp) {
lock_ObtainWrite(&cm_serverLock);
for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
/* REDIRECT */
tsrp->status = srv_not_busy;
}
lock_ReleaseWrite(&cm_serverLock);
if (free_svr_list) {
cm_FreeServerList(&serversp, 0);
*serverspp = serversp;
}
}
}
if (statep->state != vl_offline) {
retry = 1;
} else {
if (cm_CheckOfflineVolume(volp, statep->ID))
retry = 1;
}
cm_PutVolume(volp);
}
}
#endif
}
else if (errorCode == CM_ERROR_ALLBUSY) {
/* Volume instances marked busy will be restored by the
* background daemon thread as they become available.
/* Volumes that are busy cannot be determined to be non-busy
* without actually attempting to access them.
*/
osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLBUSY.");
#if 0
if (timeLeft > 7) {
cm_volume_t * volp = NULL;
cm_vol_state_t *statep;
thrd_Sleep(5000);
if (!serversp) {
code = cm_GetServerList(fidp, userp, reqp, &serverspp);
if (fidp) { /* File Server query */
code = cm_GetVolumeByID(cellp, fidp->volume, userp, reqp,
CM_GETVOL_FLAG_NO_LRU_UPDATE,
&volp);
if (code == 0) {
serversp = *serverspp;
free_svr_list = 1;
if (fidp->volume == volp->rw.ID)
statep = &volp->rw;
else if (fidp->volume == volp->ro.ID)
statep = &volp->ro;
else if (fidp->volume == volp->bk.ID)
statep = &volp->bk;
if (statep->state != vl_offline && statep->state != vl_busy) {
retry = 1;
} else {
if (!serversp) {
code = cm_GetServerList(fidp, userp, reqp, &serverspp);
if (code == 0) {
serversp = *serverspp;
free_svr_list = 1;
}
}
lock_ObtainWrite(&cm_serverLock);
for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
if (tsrp->status == srv_busy) {
tsrp->status = srv_not_busy;
}
}
lock_ReleaseWrite(&cm_serverLock);
if (free_svr_list) {
cm_FreeServerList(&serversp, 0);
*serverspp = serversp;
}
cm_UpdateVolumeStatus(volp, statep->ID);
retry = 1;
}
cm_PutVolume(volp);
}
} else { /* VL Server query */
if (serversp) {
lock_ObtainWrite(&cm_serverLock);
for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
if (tsrp->status == srv_busy) {
tsrp->status = srv_not_busy;
}
}
lock_ReleaseWrite(&cm_serverLock);
retry = 1;
}
}
lock_ObtainWrite(&cm_serverLock);
for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
if (tsrp->status == srv_busy) {
/* REDIRECT */
tsrp->status = srv_not_busy;
}
}
lock_ReleaseWrite(&cm_serverLock);
if (free_svr_list) {
cm_FreeServerList(&serversp, 0);
*serverspp = serversp;
}
retry = 1;
}
#endif
}
/* special codes: VBUSY and VRESTARTING */

View File

@ -395,7 +395,7 @@ void cm_Daemon(long parm)
if (now > lastBusyVolCheck + cm_daemonCheckBusyVolInterval) {
lastVolCheck = now;
cm_CheckBusyVolumes();
cm_CheckOfflineVolumes();
now = osi_Time();
}

View File

@ -971,12 +971,15 @@ void cm_RefreshVolumes(void)
}
/* called from the Daemon thread */
void cm_CheckBusyVolumes(void)
/* The return code is 0 if the volume is not online and
* 1 if the volume is online
*/
long
cm_CheckOfflineVolume(cm_volume_t *volp, afs_uint32 volID)
{
cm_volume_t *volp;
cm_conn_t *connp;
register long code;
long code;
AFSFetchVolumeStatus volStat;
char *Name;
char *OfflineMsg;
@ -986,84 +989,119 @@ void cm_CheckBusyVolumes(void)
char volName[32];
char offLineMsg[256];
char motd[256];
long online = 0;
cm_serverRef_t *serversp;
Name = volName;
OfflineMsg = offLineMsg;
MOTD = motd;
lock_ObtainMutex(&volp->mx);
if (volp->rw.ID != 0 && (!volID || volID == volp->rw.ID) &&
(volp->rw.state == vl_busy || volp->rw.state == vl_offline)) {
cm_InitReq(&req);
for (serversp = volp->rw.serversp; serversp; serversp = serversp->next) {
if (serversp->status == srv_busy || serversp->status == srv_offline)
serversp->status = srv_not_busy;
}
do {
code = cm_ConnFromVolume(volp, volp->rw.ID, cm_rootUserp, &req, &connp);
if (code)
continue;
callp = cm_GetRxConn(connp);
code = RXAFS_GetVolumeStatus(callp, volp->rw.ID,
&volStat, &Name, &OfflineMsg, &MOTD);
rx_PutConnection(callp);
} while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code));
code = cm_MapRPCError(code, &req);
if (code == 0 && volStat.Online) {
cm_VolumeStatusNotification(volp, volp->rw.ID, volp->rw.state, vl_online);
volp->rw.state = vl_online;
online = 1;
}
}
if (volp->ro.ID != 0 && (!volID || volID == volp->ro.ID) &&
(volp->ro.state == vl_busy || volp->ro.state == vl_offline)) {
cm_InitReq(&req);
for (serversp = volp->ro.serversp; serversp; serversp = serversp->next) {
if (serversp->status == srv_busy || serversp->status == srv_offline)
serversp->status = srv_not_busy;
}
do {
code = cm_ConnFromVolume(volp, volp->ro.ID, cm_rootUserp, &req, &connp);
if (code)
continue;
callp = cm_GetRxConn(connp);
code = RXAFS_GetVolumeStatus(callp, volp->ro.ID,
&volStat, &Name, &OfflineMsg, &MOTD);
rx_PutConnection(callp);
} while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code));
code = cm_MapRPCError(code, &req);
if (code == 0 && volStat.Online) {
cm_VolumeStatusNotification(volp, volp->ro.ID, volp->ro.state, vl_online);
volp->ro.state = vl_online;
online = 1;
}
}
if (volp->bk.ID != 0 && (!volID || volID == volp->bk.ID) &&
(volp->bk.state == vl_busy || volp->bk.state == vl_offline)) {
cm_InitReq(&req);
for (serversp = volp->bk.serversp; serversp; serversp = serversp->next) {
if (serversp->status == srv_busy || serversp->status == srv_offline)
serversp->status = srv_not_busy;
}
do {
code = cm_ConnFromVolume(volp, volp->bk.ID, cm_rootUserp, &req, &connp);
if (code)
continue;
callp = cm_GetRxConn(connp);
code = RXAFS_GetVolumeStatus(callp, volp->bk.ID,
&volStat, &Name, &OfflineMsg, &MOTD);
rx_PutConnection(callp);
} while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code));
code = cm_MapRPCError(code, &req);
if (code == 0 && volStat.Online) {
cm_VolumeStatusNotification(volp, volp->bk.ID, volp->bk.state, vl_online);
volp->bk.state = vl_online;
online = 1;
}
}
lock_ReleaseMutex(&volp->mx);
return online;
}
/* called from the Daemon thread */
void cm_CheckOfflineVolumes(void)
{
cm_volume_t *volp;
lock_ObtainWrite(&cm_volumeLock);
for (volp = cm_data.allVolumesp; volp; volp=volp->allNextp) {
volp->refCount++;
lock_ReleaseWrite(&cm_volumeLock);
lock_ObtainMutex(&volp->mx);
if (volp->rw.ID != 0 && (volp->rw.state == vl_busy || volp->rw.state == vl_offline)) {
cm_InitReq(&req);
cm_CheckOfflineVolume(volp, 0);
do {
code = cm_ConnFromVolume(volp, volp->rw.ID, cm_rootUserp, &req, &connp);
if (code)
continue;
callp = cm_GetRxConn(connp);
code = RXAFS_GetVolumeStatus(callp, volp->rw.ID,
&volStat, &Name, &OfflineMsg, &MOTD);
rx_PutConnection(callp);
} while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code));
code = cm_MapRPCError(code, &req);
if (code == 0 && volStat.Online) {
cm_VolumeStatusNotification(volp, volp->rw.ID, volp->rw.state, vl_online);
volp->rw.state = vl_online;
}
}
if (volp->ro.ID != 0 && (volp->ro.state == vl_busy || volp->ro.state == vl_offline)) {
cm_InitReq(&req);
do {
code = cm_ConnFromVolume(volp, volp->ro.ID, cm_rootUserp, &req, &connp);
if (code)
continue;
callp = cm_GetRxConn(connp);
code = RXAFS_GetVolumeStatus(callp, volp->ro.ID,
&volStat, &Name, &OfflineMsg, &MOTD);
rx_PutConnection(callp);
} while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code));
code = cm_MapRPCError(code, &req);
if (code == 0 && volStat.Online) {
cm_VolumeStatusNotification(volp, volp->ro.ID, volp->ro.state, vl_online);
volp->ro.state = vl_online;
}
}
if (volp->bk.ID != 0 && (volp->bk.state == vl_busy || volp->bk.state == vl_offline)) {
cm_InitReq(&req);
do {
code = cm_ConnFromVolume(volp, volp->bk.ID, cm_rootUserp, &req, &connp);
if (code)
continue;
callp = cm_GetRxConn(connp);
code = RXAFS_GetVolumeStatus(callp, volp->bk.ID,
&volStat, &Name, &OfflineMsg, &MOTD);
rx_PutConnection(callp);
} while (cm_Analyze(connp, cm_rootUserp, &req, NULL, NULL, NULL, NULL, code));
code = cm_MapRPCError(code, &req);
if (code == 0 && volStat.Online) {
cm_VolumeStatusNotification(volp, volp->bk.ID, volp->bk.state, vl_online);
volp->bk.state = vl_online;
}
}
lock_ReleaseMutex(&volp->mx);
lock_ObtainWrite(&cm_volumeLock);
osi_assert(volp->refCount-- > 0);
}

View File

@ -16,13 +16,13 @@
enum volstatus {vl_online, vl_busy, vl_offline, vl_alldown, vl_unknown};
struct cm_vol_state {
typedef struct cm_vol_state {
afs_uint32 ID; /* by mx */
struct cm_volume *nextp; /* volumeIDHashTable; by cm_volumeLock */
cm_serverRef_t *serversp; /* by mx */
enum volstatus state; /* by mx */
afs_uint32 flags; /* by mx */
};
} cm_vol_state_t;
typedef struct cm_volume {
osi_queue_t q; /* LRU queue; cm_volumeLock */
@ -110,7 +110,9 @@ extern void cm_AdjustVolumeLRU(cm_volume_t *volp);
extern void cm_RemoveVolumeFromLRU(cm_volume_t *volp);
extern void cm_CheckBusyVolumes(void);
extern void cm_CheckOfflineVolumes(void);
extern long cm_CheckOfflineVolume(cm_volume_t *volp, afs_uint32 volID);
extern void cm_UpdateVolumeStatus(cm_volume_t *volp, afs_uint32 volID);