From 65f2a099f39c13699d737a4618bac2ab825fee50 Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Mon, 31 Jan 2005 04:08:57 +0000 Subject: [PATCH] STABLE14-windows-cb-deadlock-20050104 afskfw.c - remove extra parameter to pr_Initialize afsd_service.c - move SERVICE_STOP_PENDING to before we start to do any work. afsd_init.c - initialize rx and rpc after starting the cache manager cm_callback.c - release cm_callbackLock before attempting to re-obtain scp->mx lock in cm_EndCallbackGrantingCall() (cherry picked from commit ca1c67688bf92903c9803976e918266753ef0aae) --- src/WINNT/afsd/afsd_init.c | 73 ++++----- src/WINNT/afsd/afsd_service.c | 35 +++-- src/WINNT/afsd/afskfw.c | 2 +- src/WINNT/afsd/cm_buf.c | 13 +- src/WINNT/afsd/cm_callback.c | 8 +- src/WINNT/afsd/cm_conn.c | 279 +++++++++++++++++----------------- 6 files changed, 218 insertions(+), 192 deletions(-) diff --git a/src/WINNT/afsd/afsd_init.c b/src/WINNT/afsd/afsd_init.c index 9c9908cbc6..1bb9d9fff8 100644 --- a/src/WINNT/afsd/afsd_init.c +++ b/src/WINNT/afsd/afsd_init.c @@ -893,42 +893,6 @@ int afsd_InitCM(char **reasonP) /* Ensure the AFS Netbios Name is registered to allow loopback access */ configureBackConnectionHostNames(); - /* initialize RX, and tell it to listen to port 7001, which is used for - * callback RPC messages. - */ - code = rx_Init(htons(7001)); - afsi_log("rx_Init code %x", code); - if (code != 0) { - *reasonP = "afsd: failed to init rx client on port 7001"; - return -1; - } - - /* Initialize the RPC server for session keys */ - RpcInit(); - - /* create an unauthenticated service #1 for callbacks */ - nullServerSecurityClassp = rxnull_NewServerSecurityObject(); - serverp = rx_NewService(0, 1, "AFS", &nullServerSecurityClassp, 1, - RXAFSCB_ExecuteRequest); - afsi_log("rx_NewService addr %x", (int)serverp); - if (serverp == NULL) { - *reasonP = "unknown error"; - return -1; - } - - nullServerSecurityClassp = rxnull_NewServerSecurityObject(); - serverp = rx_NewService(0, RX_STATS_SERVICE_ID, "rpcstats", - &nullServerSecurityClassp, 1, RXSTATS_ExecuteRequest); - afsi_log("rx_NewService addr %x", (int)serverp); - if (serverp == NULL) { - *reasonP = "unknown error"; - return -1; - } - - /* start server threads, *not* donating this one to the pool */ - rx_StartServer(0); - afsi_log("rx_StartServer"); - /* init user daemon, and other packages */ cm_InitUser(); @@ -965,6 +929,39 @@ int afsd_InitCM(char **reasonP) #endif #endif + /* initialize RX, and tell it to listen to port 7001, which is used for + * callback RPC messages. + */ + code = rx_Init(htons(7001)); + afsi_log("rx_Init code %x", code); + if (code != 0) { + *reasonP = "afsd: failed to init rx client on port 7001"; + return -1; + } + + /* create an unauthenticated service #1 for callbacks */ + nullServerSecurityClassp = rxnull_NewServerSecurityObject(); + serverp = rx_NewService(0, 1, "AFS", &nullServerSecurityClassp, 1, + RXAFSCB_ExecuteRequest); + afsi_log("rx_NewService addr %x", (int)serverp); + if (serverp == NULL) { + *reasonP = "unknown error"; + return -1; + } + + nullServerSecurityClassp = rxnull_NewServerSecurityObject(); + serverp = rx_NewService(0, RX_STATS_SERVICE_ID, "rpcstats", + &nullServerSecurityClassp, 1, RXSTATS_ExecuteRequest); + afsi_log("rx_NewService addr %x", (int)serverp); + if (serverp == NULL) { + *reasonP = "unknown error"; + return -1; + } + + /* start server threads, *not* donating this one to the pool */ + rx_StartServer(0); + afsi_log("rx_StartServer"); + code = cm_GetRootCellName(rootCellName); afsi_log("cm_GetRootCellName code %d, cm_freelanceEnabled= %d, rcn= %s", code, cm_freelanceEnabled, (code ? "" : rootCellName)); @@ -991,6 +988,10 @@ int afsd_InitCM(char **reasonP) if (cm_freelanceEnabled) cm_InitFreelance(); #endif + + /* Initialize the RPC server for session keys */ + RpcInit(); + return 0; } diff --git a/src/WINNT/afsd/afsd_service.c b/src/WINNT/afsd/afsd_service.c index d72092eb45..31e488c15d 100644 --- a/src/WINNT/afsd/afsd_service.c +++ b/src/WINNT/afsd/afsd_service.c @@ -173,6 +173,16 @@ afsd_ServiceControlHandler(DWORD ctrlCode) switch (ctrlCode) { case SERVICE_CONTROL_SHUTDOWN: case SERVICE_CONTROL_STOP: + ServiceStatus.dwCurrentState = SERVICE_STOP_PENDING; + ServiceStatus.dwWin32ExitCode = NO_ERROR; + ServiceStatus.dwCheckPoint = 1; + ServiceStatus.dwWaitHint = 30000; + ServiceStatus.dwControlsAccepted = 0; + SetServiceStatus(StatusHandle, &ServiceStatus); + +#ifdef FLUSH_VOLUME + afsd_ServiceFlushVolume((DWORD) lpEventData); +#endif /* Force trace if requested */ code = RegOpenKeyEx(HKEY_LOCAL_MACHINE, AFSConfigKeyName, @@ -193,14 +203,9 @@ afsd_ServiceControlHandler(DWORD ctrlCode) } doneTrace: - ServiceStatus.dwCurrentState = SERVICE_STOP_PENDING; - ServiceStatus.dwWin32ExitCode = NO_ERROR; - ServiceStatus.dwCheckPoint = 1; - ServiceStatus.dwWaitHint = 10000; - ServiceStatus.dwControlsAccepted = SERVICE_ACCEPT_STOP | SERVICE_ACCEPT_SHUTDOWN; - SetServiceStatus(StatusHandle, &ServiceStatus); SetEvent(WaitToTerminate); break; + case SERVICE_CONTROL_INTERROGATE: ServiceStatus.dwCurrentState = SERVICE_RUNNING; ServiceStatus.dwWin32ExitCode = NO_ERROR; @@ -234,7 +239,19 @@ afsd_ServiceControlHandlerEx( switch (ctrlCode) { + case SERVICE_CONTROL_SHUTDOWN: case SERVICE_CONTROL_STOP: + ServiceStatus.dwCurrentState = SERVICE_STOP_PENDING; + ServiceStatus.dwWin32ExitCode = NO_ERROR; + ServiceStatus.dwCheckPoint = 1; + ServiceStatus.dwWaitHint = 30000; + ServiceStatus.dwControlsAccepted = 0; + SetServiceStatus(StatusHandle, &ServiceStatus); + +#ifdef FLUSH_VOLUME + afsd_ServiceFlushVolume((DWORD) lpEventData); +#endif + /* Force trace if requested */ code = RegOpenKeyEx(HKEY_LOCAL_MACHINE, AFSConfigKeyName, @@ -255,12 +272,6 @@ afsd_ServiceControlHandlerEx( } doneTrace: - ServiceStatus.dwCurrentState = SERVICE_STOP_PENDING; - ServiceStatus.dwWin32ExitCode = NO_ERROR; - ServiceStatus.dwCheckPoint = 1; - ServiceStatus.dwWaitHint = 10000; - ServiceStatus.dwControlsAccepted = 0; - SetServiceStatus(StatusHandle, &ServiceStatus); SetEvent(WaitToTerminate); dwRet = NO_ERROR; break; diff --git a/src/WINNT/afsd/afskfw.c b/src/WINNT/afsd/afskfw.c index 5a4a2b70d1..2f3b6408ef 100644 --- a/src/WINNT/afsd/afskfw.c +++ b/src/WINNT/afsd/afskfw.c @@ -2510,7 +2510,7 @@ ViceIDToUsername(char *username, * level */ - if (status = pr_Initialize(1L, confname, aserver->cell, 0)) + if (status = pr_Initialize(1L, confname, aserver->cell)) return status; if (status = pr_CreateUser(username, &id)) return status; diff --git a/src/WINNT/afsd/cm_buf.c b/src/WINNT/afsd/cm_buf.c index ed31f3733f..26ff2422c4 100644 --- a/src/WINNT/afsd/cm_buf.c +++ b/src/WINNT/afsd/cm_buf.c @@ -319,10 +319,20 @@ long buf_Init(cm_buf_ops_t *opsp) afsi_log("Error creating cache file \"%s\" error %d", cm_CachePath, GetLastError()); return CM_ERROR_INVAL; + } else if (GetLastError() == ERROR_ALREADY_EXISTS) { + BY_HANDLE_FILE_INFORMATION fileInfo; + + afsi_log("Cache File \"%s\" already exists", cm_CachePath); + if ( GetFileInformationByHandle(hf, &fileInfo) ) + afsi_log("Existing File Size: %08X:%08X", + fileInfo.nFileSizeHigh, + fileInfo.nFileSizeLow); } } else { /* buf_cacheType == CM_BUF_CACHETYPE_VIRTUAL */ hf = INVALID_HANDLE_VALUE; } + afsi_log("File Mapping Size: %08X", buf_nbuffers * buf_bufferSize); + CacheHandle = hf; hm = CreateFileMapping(hf, NULL, @@ -331,8 +341,7 @@ long buf_Init(cm_buf_ops_t *opsp) NULL); if (hm == NULL) { if (GetLastError() == ERROR_DISK_FULL) { - afsi_log("Error creating cache file \"%s\" mapping: disk full", - cm_CachePath); + afsi_log("Error creating cache file mapping: disk full"); return CM_ERROR_TOOMANYBUFS; } return CM_ERROR_INVAL; diff --git a/src/WINNT/afsd/cm_callback.c b/src/WINNT/afsd/cm_callback.c index 7e7ec28281..917bf364b2 100644 --- a/src/WINNT/afsd/cm_callback.c +++ b/src/WINNT/afsd/cm_callback.c @@ -775,13 +775,17 @@ void cm_EndCallbackGrantingCall(cm_scache_t *scp, cm_callbackRequest_t *cbrp, */ lock_ReleaseMutex(&scp->mx); cm_CallbackNotifyChange(scp); + lock_ReleaseWrite(&cm_callbackLock); lock_ObtainMutex(&scp->mx); + lock_ObtainWrite(&cm_callbackLock); } - if (freeFlag) free(revp); + if (freeFlag) + free(revp); } /* if we freed the list, zap the pointer to it */ - if (freeFlag) cm_racingRevokesp = NULL; + if (freeFlag) + cm_racingRevokesp = NULL; lock_ReleaseWrite(&cm_callbackLock); diff --git a/src/WINNT/afsd/cm_conn.c b/src/WINNT/afsd/cm_conn.c index 4dbfb2cfd8..e2a6da12d2 100644 --- a/src/WINNT/afsd/cm_conn.c +++ b/src/WINNT/afsd/cm_conn.c @@ -138,30 +138,30 @@ static long cm_GetServerList(struct cm_fid *fidp, struct cm_user *userp, */ int cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, - struct cm_fid *fidp, - AFSVolSync *volSyncp, - cm_serverRef_t * serversp, - cm_callbackRequest_t *cbrp, long errorCode) + struct cm_fid *fidp, + AFSVolSync *volSyncp, + cm_serverRef_t * serversp, + cm_callbackRequest_t *cbrp, long errorCode) { cm_server_t *serverp = 0; cm_serverRef_t **serverspp = 0; - cm_serverRef_t *tsrp; - cm_ucell_t *ucellp; + cm_serverRef_t *tsrp; + cm_ucell_t *ucellp; int retry = 0; int free_svr_list = 0; - int dead_session; + int dead_session; long timeUsed, timeLeft; - osi_Log2(afsd_logp, "cm_Analyze connp 0x%x, code %d", - (long) connp, errorCode); + osi_Log2(afsd_logp, "cm_Analyze connp 0x%x, code %d", + (long) connp, errorCode); - /* no locking required, since connp->serverp never changes after - * creation */ - dead_session = (userp->cellInfop == NULL); - if (connp) - serverp = connp->serverp; + /* no locking required, since connp->serverp never changes after + * creation */ + dead_session = (userp->cellInfop == NULL); + if (connp) + serverp = connp->serverp; - /* Update callback pointer */ + /* Update callback pointer */ if (cbrp && serverp && errorCode == 0) { if (cbrp->serverp) { if ( cbrp->serverp != serverp ) { @@ -178,39 +178,39 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, lock_ReleaseWrite(&cm_callbackLock); } - /* If not allowed to retry, don't */ - if (reqp->flags & CM_REQ_NORETRY) - goto out; + /* If not allowed to retry, don't */ + if (reqp->flags & CM_REQ_NORETRY) + goto out; - /* if timeout - check that it did not exceed the SMB timeout + /* if timeout - check that it did not exceed the SMB timeout * and retry */ - /* timeleft - get if from reqp the same way as cmXonnByMServers does */ + /* timeleft - get if from reqp the same way as cmXonnByMServers does */ #ifndef DJGPP - timeUsed = (GetCurrentTime() - reqp->startTime) / 1000; + timeUsed = (GetCurrentTime() - reqp->startTime) / 1000; #else - gettimeofday(&now, NULL); - timeUsed = sub_time(now, reqp->startTime) / 1000; + gettimeofday(&now, NULL); + timeUsed = sub_time(now, reqp->startTime) / 1000; #endif - /* leave 5 seconds margin for sleep */ - timeLeft = RDRtimeout - timeUsed; + /* leave 5 seconds margin for sleep */ + timeLeft = RDRtimeout - timeUsed; if (errorCode == CM_ERROR_TIMEDOUT && timeLeft > 5 ) { - thrd_Sleep(3000); - cm_CheckServers(CM_FLAG_CHECKDOWNSERVERS, NULL); - retry = 1; - } + thrd_Sleep(3000); + cm_CheckServers(CM_FLAG_CHECKDOWNSERVERS, NULL); + retry = 1; + } /* if all servers are offline, mark them non-busy and start over */ if (errorCode == CM_ERROR_ALLOFFLINE && timeLeft > 7) { - osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE."); - thrd_Sleep(5000); - /* cm_ForceUpdateVolume marks all servers as non_busy */ - /* No it doesn't and it won't do anything if all of the - * the servers are marked as DOWN. So clear the DOWN - * flag and reset the busy state as well. - */ + osi_Log0(afsd_logp, "cm_Analyze passed CM_ERROR_ALLOFFLINE."); + thrd_Sleep(5000); + /* cm_ForceUpdateVolume marks all servers as non_busy */ + /* No it doesn't and it won't do anything if all of the + * the servers are marked as DOWN. So clear the DOWN + * flag and reset the busy state as well. + */ if (!serversp) { cm_GetServerList(fidp, userp, reqp, &serverspp); serversp = *serverspp; @@ -233,7 +233,7 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, if (fidp != NULL) /* Not a VLDB call */ cm_ForceUpdateVolume(fidp, userp, reqp); - } + } /* if all servers are busy, mark them non-busy and start over */ if (errorCode == CM_ERROR_ALLBUSY && timeLeft > 7) { @@ -258,127 +258,128 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, /* special codes: VBUSY and VRESTARTING */ if (errorCode == VBUSY || errorCode == VRESTARTING) { - if (!serversp) { - cm_GetServerList(fidp, userp, reqp, &serverspp); - serversp = *serverspp; - free_svr_list = 1; - } - lock_ObtainWrite(&cm_serverLock); - for (tsrp = serversp; tsrp; tsrp=tsrp->next) { - if (tsrp->server == serverp - && tsrp->status == not_busy) { - tsrp->status = busy; - break; - } - } - lock_ReleaseWrite(&cm_serverLock); - if (free_svr_list) { - cm_FreeServerList(&serversp); - *serverspp = serversp; - } - retry = 1; + if (!serversp) { + cm_GetServerList(fidp, userp, reqp, &serverspp); + serversp = *serverspp; + free_svr_list = 1; + } + lock_ObtainWrite(&cm_serverLock); + for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->server == serverp + && tsrp->status == not_busy) { + tsrp->status = busy; + break; + } + } + lock_ReleaseWrite(&cm_serverLock); + if (free_svr_list) { + cm_FreeServerList(&serversp); + *serverspp = serversp; + } + retry = 1; } /* special codes: missing volumes */ if (errorCode == VNOVOL || errorCode == VMOVED || errorCode == VOFFLINE - || errorCode == VSALVAGE || errorCode == VNOSERVICE) { - /* Log server being offline for this volume */ - osi_Log4(afsd_logp, "cm_Analyze found server %d.%d.%d.%d marked offline for a volume", - ((serverp->addr.sin_addr.s_addr & 0xff)), - ((serverp->addr.sin_addr.s_addr & 0xff00)>> 8), - ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16), - ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24)); - /* Create Event Log message */ - { - HANDLE h; - char *ptbuf[1]; - char s[100]; - h = RegisterEventSource(NULL, AFS_DAEMON_EVENT_NAME); - sprintf(s, "cm_Analyze: Server %d.%d.%d.%d reported volume %d as missing.", - ((serverp->addr.sin_addr.s_addr & 0xff)), - ((serverp->addr.sin_addr.s_addr & 0xff00)>> 8), - ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16), - ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24), - fidp->volume); - ptbuf[0] = s; - ReportEvent(h, EVENTLOG_WARNING_TYPE, 0, 1009, NULL, - 1, 0, ptbuf, NULL); - DeregisterEventSource(h); - } + || errorCode == VSALVAGE || errorCode == VNOSERVICE) + { + /* Log server being offline for this volume */ + osi_Log4(afsd_logp, "cm_Analyze found server %d.%d.%d.%d marked offline for a volume", + ((serverp->addr.sin_addr.s_addr & 0xff)), + ((serverp->addr.sin_addr.s_addr & 0xff00)>> 8), + ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16), + ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24)); + /* Create Event Log message */ + { + HANDLE h; + char *ptbuf[1]; + char s[100]; + h = RegisterEventSource(NULL, AFS_DAEMON_EVENT_NAME); + sprintf(s, "cm_Analyze: Server %d.%d.%d.%d reported volume %d as missing.", + ((serverp->addr.sin_addr.s_addr & 0xff)), + ((serverp->addr.sin_addr.s_addr & 0xff00)>> 8), + ((serverp->addr.sin_addr.s_addr & 0xff0000)>> 16), + ((serverp->addr.sin_addr.s_addr & 0xff000000)>> 24), + fidp->volume); + ptbuf[0] = s; + ReportEvent(h, EVENTLOG_WARNING_TYPE, 0, 1009, NULL, + 1, 0, ptbuf, NULL); + DeregisterEventSource(h); + } - /* Mark server offline for this volume */ - if (!serversp) { - cm_GetServerList(fidp, userp, reqp, &serverspp); - serversp = *serverspp; - free_svr_list = 1; - } - for (tsrp = serversp; tsrp; tsrp=tsrp->next) { - if (tsrp->server == serverp) - tsrp->status = offline; - } - if (free_svr_list) { - cm_FreeServerList(&serversp); - *serverspp = serversp; - } - if ( timeLeft > 2 ) + /* Mark server offline for this volume */ + if (!serversp) { + cm_GetServerList(fidp, userp, reqp, &serverspp); + serversp = *serverspp; + free_svr_list = 1; + } + for (tsrp = serversp; tsrp; tsrp=tsrp->next) { + if (tsrp->server == serverp) + tsrp->status = offline; + } + if (free_svr_list) { + cm_FreeServerList(&serversp); + *serverspp = serversp; + } + if ( timeLeft > 2 ) retry = 1; } /* RX codes */ if (errorCode == RX_CALL_TIMEOUT) { - /* server took longer than hardDeadTime - * don't mark server as down but don't retry - * this is to prevent the SMB session from timing out - * In addition, we log an event to the event log - */ + /* server took longer than hardDeadTime + * don't mark server as down but don't retry + * this is to prevent the SMB session from timing out + * In addition, we log an event to the event log + */ #ifndef DJGPP - HANDLE h; - char *ptbuf[1]; - char s[100]; - h = RegisterEventSource(NULL, AFS_DAEMON_EVENT_NAME); - sprintf(s, "cm_Analyze: HardDeadTime exceeded."); - ptbuf[0] = s; - ReportEvent(h, EVENTLOG_WARNING_TYPE, 0, 1009, NULL, - 1, 0, ptbuf, NULL); - DeregisterEventSource(h); + HANDLE h; + char *ptbuf[1]; + char s[100]; + h = RegisterEventSource(NULL, AFS_DAEMON_EVENT_NAME); + sprintf(s, "cm_Analyze: HardDeadTime exceeded."); + ptbuf[0] = s; + ReportEvent(h, EVENTLOG_WARNING_TYPE, 0, 1009, NULL, + 1, 0, ptbuf, NULL); + DeregisterEventSource(h); #endif /* !DJGPP */ - retry = 0; - osi_Log0(afsd_logp, "cm_Analyze: hardDeadTime exceeded"); + retry = 0; + osi_Log0(afsd_logp, "cm_Analyze: hardDeadTime exceeded"); } else if (errorCode >= -64 && errorCode < 0) { - /* mark server as down */ - lock_ObtainMutex(&serverp->mx); - serverp->flags |= CM_SERVERFLAG_DOWN; - lock_ReleaseMutex(&serverp->mx); + /* mark server as down */ + lock_ObtainMutex(&serverp->mx); + serverp->flags |= CM_SERVERFLAG_DOWN; + lock_ReleaseMutex(&serverp->mx); if ( timeLeft > 2 ) - retry = 1; - } + retry = 1; + } - if (errorCode == RXKADEXPIRED && !dead_session) { - lock_ObtainMutex(&userp->mx); - ucellp = cm_GetUCell(userp, serverp->cellp); - if (ucellp->ticketp) { - free(ucellp->ticketp); - ucellp->ticketp = NULL; - } - ucellp->flags &= ~CM_UCELLFLAG_RXKAD; - ucellp->gen++; - lock_ReleaseMutex(&userp->mx); - if ( timeLeft > 2 ) - retry = 1; - } + if (errorCode == RXKADEXPIRED && !dead_session) { + lock_ObtainMutex(&userp->mx); + ucellp = cm_GetUCell(userp, serverp->cellp); + if (ucellp->ticketp) { + free(ucellp->ticketp); + ucellp->ticketp = NULL; + } + ucellp->flags &= ~CM_UCELLFLAG_RXKAD; + ucellp->gen++; + lock_ReleaseMutex(&userp->mx); + if ( timeLeft > 2 ) + retry = 1; + } - if (retry && dead_session) - retry = 0; - -out: - /* drop this on the way out */ - if (connp) - cm_PutConn(connp); + if (retry && dead_session) + retry = 0; - /* retry until we fail to find a connection */ - return retry; + out: + /* drop this on the way out */ + if (connp) + cm_PutConn(connp); + + /* retry until we fail to find a connection */ + return retry; } long cm_ConnByMServers(cm_serverRef_t *serversp, cm_user_t *usersp,