STABLE14-windows-cb-deadlock-20050104

afskfw.c - remove extra parameter to pr_Initialize

afsd_service.c - move SERVICE_STOP_PENDING to before we start to
                 do any work.

afsd_init.c - initialize rx and rpc after starting the cache manager

cm_callback.c - release cm_callbackLock before attempting to re-obtain
                scp->mx lock in cm_EndCallbackGrantingCall()


(cherry picked from commit ca1c67688bf92903c9803976e918266753ef0aae)
This commit is contained in:
Jeffrey Altman 2005-01-31 04:08:57 +00:00 committed by Derrick Brashear
parent 25297a74ee
commit 65f2a099f3
6 changed files with 218 additions and 192 deletions

View File

@ -893,42 +893,6 @@ int afsd_InitCM(char **reasonP)
/* Ensure the AFS Netbios Name is registered to allow loopback access */
configureBackConnectionHostNames();
/* initialize RX, and tell it to listen to port 7001, which is used for
* callback RPC messages.
*/
code = rx_Init(htons(7001));
afsi_log("rx_Init code %x", code);
if (code != 0) {
*reasonP = "afsd: failed to init rx client on port 7001";
return -1;
}
/* Initialize the RPC server for session keys */
RpcInit();
/* create an unauthenticated service #1 for callbacks */
nullServerSecurityClassp = rxnull_NewServerSecurityObject();
serverp = rx_NewService(0, 1, "AFS", &nullServerSecurityClassp, 1,
RXAFSCB_ExecuteRequest);
afsi_log("rx_NewService addr %x", (int)serverp);
if (serverp == NULL) {
*reasonP = "unknown error";
return -1;
}
nullServerSecurityClassp = rxnull_NewServerSecurityObject();
serverp = rx_NewService(0, RX_STATS_SERVICE_ID, "rpcstats",
&nullServerSecurityClassp, 1, RXSTATS_ExecuteRequest);
afsi_log("rx_NewService addr %x", (int)serverp);
if (serverp == NULL) {
*reasonP = "unknown error";
return -1;
}
/* start server threads, *not* donating this one to the pool */
rx_StartServer(0);
afsi_log("rx_StartServer");
/* init user daemon, and other packages */
cm_InitUser();
@ -965,6 +929,39 @@ int afsd_InitCM(char **reasonP)
#endif
#endif
/* initialize RX, and tell it to listen to port 7001, which is used for
* callback RPC messages.
*/
code = rx_Init(htons(7001));
afsi_log("rx_Init code %x", code);
if (code != 0) {
*reasonP = "afsd: failed to init rx client on port 7001";
return -1;
}
/* create an unauthenticated service #1 for callbacks */
nullServerSecurityClassp = rxnull_NewServerSecurityObject();
serverp = rx_NewService(0, 1, "AFS", &nullServerSecurityClassp, 1,
RXAFSCB_ExecuteRequest);
afsi_log("rx_NewService addr %x", (int)serverp);
if (serverp == NULL) {
*reasonP = "unknown error";
return -1;
}
nullServerSecurityClassp = rxnull_NewServerSecurityObject();
serverp = rx_NewService(0, RX_STATS_SERVICE_ID, "rpcstats",
&nullServerSecurityClassp, 1, RXSTATS_ExecuteRequest);
afsi_log("rx_NewService addr %x", (int)serverp);
if (serverp == NULL) {
*reasonP = "unknown error";
return -1;
}
/* start server threads, *not* donating this one to the pool */
rx_StartServer(0);
afsi_log("rx_StartServer");
code = cm_GetRootCellName(rootCellName);
afsi_log("cm_GetRootCellName code %d, cm_freelanceEnabled= %d, rcn= %s",
code, cm_freelanceEnabled, (code ? "<none>" : rootCellName));
@ -991,6 +988,10 @@ int afsd_InitCM(char **reasonP)
if (cm_freelanceEnabled)
cm_InitFreelance();
#endif
/* Initialize the RPC server for session keys */
RpcInit();
return 0;
}

View File

@ -173,6 +173,16 @@ afsd_ServiceControlHandler(DWORD ctrlCode)
switch (ctrlCode) {
case SERVICE_CONTROL_SHUTDOWN:
case SERVICE_CONTROL_STOP:
ServiceStatus.dwCurrentState = SERVICE_STOP_PENDING;
ServiceStatus.dwWin32ExitCode = NO_ERROR;
ServiceStatus.dwCheckPoint = 1;
ServiceStatus.dwWaitHint = 30000;
ServiceStatus.dwControlsAccepted = 0;
SetServiceStatus(StatusHandle, &ServiceStatus);
#ifdef FLUSH_VOLUME
afsd_ServiceFlushVolume((DWORD) lpEventData);
#endif
/* Force trace if requested */
code = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
AFSConfigKeyName,
@ -193,14 +203,9 @@ afsd_ServiceControlHandler(DWORD ctrlCode)
}
doneTrace:
ServiceStatus.dwCurrentState = SERVICE_STOP_PENDING;
ServiceStatus.dwWin32ExitCode = NO_ERROR;
ServiceStatus.dwCheckPoint = 1;
ServiceStatus.dwWaitHint = 10000;
ServiceStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_SHUTDOWN;
SetServiceStatus(StatusHandle, &ServiceStatus);
SetEvent(WaitToTerminate);
break;
case SERVICE_CONTROL_INTERROGATE:
ServiceStatus.dwCurrentState = SERVICE_RUNNING;
ServiceStatus.dwWin32ExitCode = NO_ERROR;
@ -234,7 +239,19 @@ afsd_ServiceControlHandlerEx(
switch (ctrlCode)
{
case SERVICE_CONTROL_SHUTDOWN:
case SERVICE_CONTROL_STOP:
ServiceStatus.dwCurrentState = SERVICE_STOP_PENDING;
ServiceStatus.dwWin32ExitCode = NO_ERROR;
ServiceStatus.dwCheckPoint = 1;
ServiceStatus.dwWaitHint = 30000;
ServiceStatus.dwControlsAccepted = 0;
SetServiceStatus(StatusHandle, &ServiceStatus);
#ifdef FLUSH_VOLUME
afsd_ServiceFlushVolume((DWORD) lpEventData);
#endif
/* Force trace if requested */
code = RegOpenKeyEx(HKEY_LOCAL_MACHINE,
AFSConfigKeyName,
@ -255,12 +272,6 @@ afsd_ServiceControlHandlerEx(
}
doneTrace:
ServiceStatus.dwCurrentState = SERVICE_STOP_PENDING;
ServiceStatus.dwWin32ExitCode = NO_ERROR;
ServiceStatus.dwCheckPoint = 1;
ServiceStatus.dwWaitHint = 10000;
ServiceStatus.dwControlsAccepted = 0;
SetServiceStatus(StatusHandle, &ServiceStatus);
SetEvent(WaitToTerminate);
dwRet = NO_ERROR;
break;

View File

@ -2510,7 +2510,7 @@ ViceIDToUsername(char *username,
* level
*/
if (status = pr_Initialize(1L, confname, aserver->cell, 0))
if (status = pr_Initialize(1L, confname, aserver->cell))
return status;
if (status = pr_CreateUser(username, &id))
return status;

View File

@ -319,10 +319,20 @@ long buf_Init(cm_buf_ops_t *opsp)
afsi_log("Error creating cache file \"%s\" error %d",
cm_CachePath, GetLastError());
return CM_ERROR_INVAL;
} else if (GetLastError() == ERROR_ALREADY_EXISTS) {
BY_HANDLE_FILE_INFORMATION fileInfo;
afsi_log("Cache File \"%s\" already exists", cm_CachePath);
if ( GetFileInformationByHandle(hf, &fileInfo) )
afsi_log("Existing File Size: %08X:%08X",
fileInfo.nFileSizeHigh,
fileInfo.nFileSizeLow);
}
} else { /* buf_cacheType == CM_BUF_CACHETYPE_VIRTUAL */
hf = INVALID_HANDLE_VALUE;
}
afsi_log("File Mapping Size: %08X", buf_nbuffers * buf_bufferSize);
CacheHandle = hf;
hm = CreateFileMapping(hf,
NULL,
@ -331,8 +341,7 @@ long buf_Init(cm_buf_ops_t *opsp)
NULL);
if (hm == NULL) {
if (GetLastError() == ERROR_DISK_FULL) {
afsi_log("Error creating cache file \"%s\" mapping: disk full",
cm_CachePath);
afsi_log("Error creating cache file mapping: disk full");
return CM_ERROR_TOOMANYBUFS;
}
return CM_ERROR_INVAL;

View File

@ -775,13 +775,17 @@ void cm_EndCallbackGrantingCall(cm_scache_t *scp, cm_callbackRequest_t *cbrp,
*/
lock_ReleaseMutex(&scp->mx);
cm_CallbackNotifyChange(scp);
lock_ReleaseWrite(&cm_callbackLock);
lock_ObtainMutex(&scp->mx);
lock_ObtainWrite(&cm_callbackLock);
}
if (freeFlag) free(revp);
if (freeFlag)
free(revp);
}
/* if we freed the list, zap the pointer to it */
if (freeFlag) cm_racingRevokesp = NULL;
if (freeFlag)
cm_racingRevokesp = NULL;
lock_ReleaseWrite(&cm_callbackLock);

View File

@ -138,30 +138,30 @@ static long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp,
*/
int
cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
struct cm_fid *fidp,
AFSVolSync *volSyncp,
cm_serverRef_t * serversp,
cm_callbackRequest_t *cbrp, long errorCode)
struct cm_fid *fidp,
AFSVolSync *volSyncp,
cm_serverRef_t * serversp,
cm_callbackRequest_t *cbrp, long errorCode)
{
cm_server_t *serverp = 0;
cm_serverRef_t **serverspp = 0;
cm_serverRef_t *tsrp;
cm_ucell_t *ucellp;
cm_serverRef_t *tsrp;
cm_ucell_t *ucellp;
int retry = 0;
int free_svr_list = 0;
int dead_session;
int dead_session;
long timeUsed, timeLeft;
osi_Log2(afsd_logp, "cm_Analyze connp 0x%x, code %d",
(long) connp, errorCode);
osi_Log2(afsd_logp, "cm_Analyze connp 0x%x, code %d",
(long) connp, errorCode);
/* no locking required, since connp->serverp never changes after
* creation */
dead_session = (userp->cellInfop == NULL);
if (connp)
serverp = connp->serverp;
/* no locking required, since connp->serverp never changes after
* creation */
dead_session = (userp->cellInfop == NULL);
if (connp)
serverp = connp->serverp;
/* Update callback pointer */
/* Update callback pointer */
if (cbrp && serverp && errorCode == 0) {
if (cbrp->serverp) {
if ( cbrp->serverp != serverp ) {
@ -178,39 +178,39 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
lock_ReleaseWrite(&cm_callbackLock);
}
/* If not allowed to retry, don't */
if (reqp->flags & CM_REQ_NORETRY)
goto out;
/* If not allowed to retry, don't */
if (reqp->flags & CM_REQ_NORETRY)
goto out;
/* if timeout - check that it did not exceed the SMB timeout
/* if timeout - check that it did not exceed the SMB timeout
* and retry */
/* timeleft - get if from reqp the same way as cmXonnByMServers does */
/* timeleft - get if from reqp the same way as cmXonnByMServers does */
#ifndef DJGPP
timeUsed = (GetCurrentTime() - reqp->startTime) / 1000;
timeUsed = (GetCurrentTime() - reqp->startTime) / 1000;
#else
gettimeofday(&now, NULL);
timeUsed = sub_time(now, reqp->startTime) / 1000;
gettimeofday(&now, NULL);
timeUsed = sub_time(now, reqp->startTime) / 1000;
#endif
/* leave 5 seconds margin for sleep */
timeLeft = RDRtimeout - timeUsed;
/* leave 5 seconds margin for sleep */
timeLeft = RDRtimeout - timeUsed;
if (errorCode == CM_ERROR_TIMEDOUT && timeLeft > 5 ) {
thrd_Sleep(3000);
cm_CheckServers(CM_FLAG_CHECKDOWNSERVERS, NULL);
retry = 1;
}
thrd_Sleep(3000);
cm_CheckServers(CM_FLAG_CHECKDOWNSERVERS, NULL);
retry = 1;
}
/* if all servers are offline, mark them non-busy and start over */
if (errorCode == CM_ERROR_ALLOFFLINE && timeLeft > 7) {
osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE.");
thrd_Sleep(5000);
/* cm_ForceUpdateVolume marks all servers as non_busy */
/* No it doesn't and it won't do anything if all of the
* the servers are marked as DOWN. So clear the DOWN
* flag and reset the busy state as well.
*/
osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE.");
thrd_Sleep(5000);
/* cm_ForceUpdateVolume marks all servers as non_busy */
/* No it doesn't and it won't do anything if all of the
* the servers are marked as DOWN. So clear the DOWN
* flag and reset the busy state as well.
*/
if (!serversp) {
cm_GetServerList(fidp, userp, reqp, &serverspp);
serversp = *serverspp;
@ -233,7 +233,7 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
if (fidp != NULL) /* Not a VLDB call */
cm_ForceUpdateVolume(fidp, userp, reqp);
}
}
/* if all servers are busy, mark them non-busy and start over */
if (errorCode == CM_ERROR_ALLBUSY && timeLeft > 7) {
@ -258,127 +258,128 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp,
/* special codes: VBUSY and VRESTARTING */
if (errorCode == VBUSY || errorCode == VRESTARTING) {
if (!serversp) {
cm_GetServerList(fidp, userp, reqp, &serverspp);
serversp = *serverspp;
free_svr_list = 1;
}
lock_ObtainWrite(&cm_serverLock);
for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
if (tsrp->server == serverp
&& tsrp->status == not_busy) {
tsrp->status = busy;
break;
}
}
lock_ReleaseWrite(&cm_serverLock);
if (free_svr_list) {
cm_FreeServerList(&serversp);
*serverspp = serversp;
}
retry = 1;
if (!serversp) {
cm_GetServerList(fidp, userp, reqp, &serverspp);
serversp = *serverspp;
free_svr_list = 1;
}
lock_ObtainWrite(&cm_serverLock);
for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
if (tsrp->server == serverp
&& tsrp->status == not_busy) {
tsrp->status = busy;
break;
}
}
lock_ReleaseWrite(&cm_serverLock);
if (free_svr_list) {
cm_FreeServerList(&serversp);
*serverspp = serversp;
}
retry = 1;
}
/* special codes: missing volumes */
if (errorCode == VNOVOL || errorCode == VMOVED || errorCode == VOFFLINE
|| errorCode == VSALVAGE || errorCode == VNOSERVICE) {
/* Log server being offline for this volume */
osi_Log4(afsd_logp, "cm_Analyze found server %d.%d.%d.%d marked offline for a volume",
((serverp->addr.sin_addr.s_addr & 0xff)),
((serverp->addr.sin_addr.s_addr & 0xff00)>> 8),
((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24));
/* Create Event Log message */
{
HANDLE h;
char *ptbuf[1];
char s[100];
h = RegisterEventSource(NULL, AFS_DAEMON_EVENT_NAME);
sprintf(s, "cm_Analyze: Server %d.%d.%d.%d reported volume %d as missing.",
((serverp->addr.sin_addr.s_addr & 0xff)),
((serverp->addr.sin_addr.s_addr & 0xff00)>> 8),
((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24),
fidp->volume);
ptbuf[0] = s;
ReportEvent(h, EVENTLOG_WARNING_TYPE, 0, 1009, NULL,
1, 0, ptbuf, NULL);
DeregisterEventSource(h);
}
|| errorCode == VSALVAGE || errorCode == VNOSERVICE)
{
/* Log server being offline for this volume */
osi_Log4(afsd_logp, "cm_Analyze found server %d.%d.%d.%d marked offline for a volume",
((serverp->addr.sin_addr.s_addr & 0xff)),
((serverp->addr.sin_addr.s_addr & 0xff00)>> 8),
((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24));
/* Create Event Log message */
{
HANDLE h;
char *ptbuf[1];
char s[100];
h = RegisterEventSource(NULL, AFS_DAEMON_EVENT_NAME);
sprintf(s, "cm_Analyze: Server %d.%d.%d.%d reported volume %d as missing.",
((serverp->addr.sin_addr.s_addr & 0xff)),
((serverp->addr.sin_addr.s_addr & 0xff00)>> 8),
((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16),
((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24),
fidp->volume);
ptbuf[0] = s;
ReportEvent(h, EVENTLOG_WARNING_TYPE, 0, 1009, NULL,
1, 0, ptbuf, NULL);
DeregisterEventSource(h);
}
/* Mark server offline for this volume */
if (!serversp) {
cm_GetServerList(fidp, userp, reqp, &serverspp);
serversp = *serverspp;
free_svr_list = 1;
}
for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
if (tsrp->server == serverp)
tsrp->status = offline;
}
if (free_svr_list) {
cm_FreeServerList(&serversp);
*serverspp = serversp;
}
if ( timeLeft > 2 )
/* Mark server offline for this volume */
if (!serversp) {
cm_GetServerList(fidp, userp, reqp, &serverspp);
serversp = *serverspp;
free_svr_list = 1;
}
for (tsrp = serversp; tsrp; tsrp=tsrp->next) {
if (tsrp->server == serverp)
tsrp->status = offline;
}
if (free_svr_list) {
cm_FreeServerList(&serversp);
*serverspp = serversp;
}
if ( timeLeft > 2 )
retry = 1;
}
/* RX codes */
if (errorCode == RX_CALL_TIMEOUT) {
/* server took longer than hardDeadTime
* don't mark server as down but don't retry
* this is to prevent the SMB session from timing out
* In addition, we log an event to the event log
*/
/* server took longer than hardDeadTime
* don't mark server as down but don't retry
* this is to prevent the SMB session from timing out
* In addition, we log an event to the event log
*/
#ifndef DJGPP
HANDLE h;
char *ptbuf[1];
char s[100];
h = RegisterEventSource(NULL, AFS_DAEMON_EVENT_NAME);
sprintf(s, "cm_Analyze: HardDeadTime exceeded.");
ptbuf[0] = s;
ReportEvent(h, EVENTLOG_WARNING_TYPE, 0, 1009, NULL,
1, 0, ptbuf, NULL);
DeregisterEventSource(h);
HANDLE h;
char *ptbuf[1];
char s[100];
h = RegisterEventSource(NULL, AFS_DAEMON_EVENT_NAME);
sprintf(s, "cm_Analyze: HardDeadTime exceeded.");
ptbuf[0] = s;
ReportEvent(h, EVENTLOG_WARNING_TYPE, 0, 1009, NULL,
1, 0, ptbuf, NULL);
DeregisterEventSource(h);
#endif /* !DJGPP */
retry = 0;
osi_Log0(afsd_logp, "cm_Analyze: hardDeadTime exceeded");
retry = 0;
osi_Log0(afsd_logp, "cm_Analyze: hardDeadTime exceeded");
}
else if (errorCode >= -64 && errorCode < 0) {
/* mark server as down */
lock_ObtainMutex(&serverp->mx);
serverp->flags |= CM_SERVERFLAG_DOWN;
lock_ReleaseMutex(&serverp->mx);
/* mark server as down */
lock_ObtainMutex(&serverp->mx);
serverp->flags |= CM_SERVERFLAG_DOWN;
lock_ReleaseMutex(&serverp->mx);
if ( timeLeft > 2 )
retry = 1;
retry = 1;
}
if (errorCode == RXKADEXPIRED && !dead_session) {
lock_ObtainMutex(&userp->mx);
ucellp = cm_GetUCell(userp, serverp->cellp);
if (ucellp->ticketp) {
free(ucellp->ticketp);
ucellp->ticketp = NULL;
}
ucellp->flags &= ~CM_UCELLFLAG_RXKAD;
ucellp->gen++;
lock_ReleaseMutex(&userp->mx);
if ( timeLeft > 2 )
retry = 1;
}
if (errorCode == RXKADEXPIRED && !dead_session) {
lock_ObtainMutex(&userp->mx);
ucellp = cm_GetUCell(userp, serverp->cellp);
if (ucellp->ticketp) {
free(ucellp->ticketp);
ucellp->ticketp = NULL;
}
ucellp->flags &= ~CM_UCELLFLAG_RXKAD;
ucellp->gen++;
lock_ReleaseMutex(&userp->mx);
if ( timeLeft > 2 )
retry = 1;
}
if (retry && dead_session)
retry = 0;
if (retry && dead_session)
retry = 0;
out:
/* drop this on the way out */
if (connp)
cm_PutConn(connp);
out:
/* drop this on the way out */
if (connp)
cm_PutConn(connp);
/* retry until we fail to find a connection */
return retry;
/* retry until we fail to find a connection */
return retry;
}
long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,