From 10125417e2edb7e378611e302e44b34fb9a806a4 Mon Sep 17 00:00:00 2001 From: Jeffrey Altman Date: Sat, 3 Jun 2006 19:01:19 +0000 Subject: [PATCH] DEVEL15-windows-scache-recycle-20060603 A deadlock was detected when performing "fs flushall" if the file server reports VNOVNODE. The scp->createBufferLock is already held by the current thread and there is no mechanism to propagate the knowledge. Therefore, an alternate mechanism for clearing the cache must be developed. A new function cm_RecycleSCache(scp,flags) has been extracted from cm_GetNewSCache(). This function performs the task of recycling an cm_scache_t object. When called from cm_GetNewSCache() with no flags the expectation is that there are no associated buffers that are queued to be read or written. When called from cm_Analyze() with the CM_SCACHE_RECYCLEFLAG_DESTROY_BUFFERS flag, any queued buffers will be de-queued and marked as if the operations were performed so the data can be discarded. This patch also addresses the following issues: * CM_ERROR codes were logged as "unknown" by cm_Analyze. * In response to VNOVNODE, the parent is only discarded if the current cm_scache_t is not a directory. * In the Ioctl FlushFile and FlushVolume functions, there were no checks to protect against flushing the Freelance SCache entries. * In FlushFile, the wrong cm_scache_t object was being released. * In cm_GetNewSCache, do not allow Freelance SCache entries to be recycled. Choose a new entry if cm_RecycleSCache fails. (cherry picked from commit f4f09d1c03cf738de6f73fc9d56502babd6743b4) --- src/WINNT/afsd/cm_conn.c | 66 +++++++++- src/WINNT/afsd/cm_ioctl.c | 16 ++- src/WINNT/afsd/cm_scache.c | 233 ++++++++++++++++++++++------------- src/WINNT/afsd/cm_scache.h | 5 + src/WINNT/afsd/cm_utils.c | 2 +- src/WINNT/afsd/cm_vnodeops.c | 7 ++ 6 files changed, 241 insertions(+), 88 deletions(-) diff --git a/src/WINNT/afsd/cm_conn.c b/src/WINNT/afsd/cm_conn.c index d9c5e428f6..e52d0b5f21 100644 --- a/src/WINNT/afsd/cm_conn.c +++ b/src/WINNT/afsd/cm_conn.c @@ -412,11 +412,18 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, cm_scache_t * scp; osi_Log4(afsd_logp, "cm_Analyze passed VNOVNODE cell %u vol %u vn %u uniq %u.", fidp->cell, fidp->volume, fidp->vnode, fidp->unique); + scp = cm_FindSCache(fidp); if (scp) { - cm_scache_t *pscp = cm_FindSCacheParent(scp); - cm_CleanFile(scp, userp, reqp); - cm_ReleaseSCache(scp); + cm_scache_t *pscp = NULL; + + if (scp->fileType != CM_SCACHETYPE_DIRECTORY) + pscp = cm_FindSCacheParent(scp); + + lock_ObtainWrite(&cm_scacheLock); + cm_RecycleSCache(scp, CM_SCACHE_RECYCLEFLAG_DESTROY_BUFFERS); + lock_ReleaseWrite(&cm_scacheLock); + if (pscp) { if (pscp->cbExpires > 0 && pscp->cbServerp != NULL) { lock_ObtainMutex(&pscp->mx); @@ -516,6 +523,59 @@ cm_Analyze(cm_conn_t *connp, cm_user_t *userp, cm_req_t *reqp, case UAEINVAL : s = "UAEINVAL"; break; case EACCES : s = "EACCES"; break; case UAEACCES : s = "UAECCES"; break; + case ENOENT : s = "ENOENT"; break; + case UAENOENT : s = "UAENOENT"; break; + case CM_ERROR_NOSUCHCELL : s = "CM_ERROR_NOSUCHCELL"; break; + case CM_ERROR_NOSUCHVOLUME : s = "CM_ERROR_NOSUCHVOLUME"; break; + case CM_ERROR_TIMEDOUT : s = "CM_ERROR_TIMEDOUT"; break; + case CM_ERROR_RETRY : s = "CM_ERROR_RETRY"; break; + case CM_ERROR_NOACCESS : s = "CM_ERROR_NOACCESS"; break; + case CM_ERROR_NOSUCHFILE : s = "CM_ERROR_NOSUCHFILE"; break; + case CM_ERROR_STOPNOW : s = "CM_ERROR_STOPNOW"; break; + case CM_ERROR_TOOBIG : s = "CM_ERROR_TOOBIG"; break; + case CM_ERROR_INVAL : s = "CM_ERROR_INVAL"; break; + case CM_ERROR_BADFD : s = "CM_ERROR_BADFD"; break; + case CM_ERROR_BADFDOP : s = "CM_ERROR_BADFDOP"; break; + case CM_ERROR_EXISTS : s = "CM_ERROR_EXISTS"; break; + case CM_ERROR_CROSSDEVLINK : s = "CM_ERROR_CROSSDEVLINK"; break; + case CM_ERROR_BADOP : s = "CM_ERROR_BADOP"; break; + case CM_ERROR_BADPASSWORD : s = "CM_ERROR_BADPASSWORD"; break; + case CM_ERROR_NOTDIR : s = "CM_ERROR_NOTDIR"; break; + case CM_ERROR_ISDIR : s = "CM_ERROR_ISDIR"; break; + case CM_ERROR_READONLY : s = "CM_ERROR_READONLY"; break; + case CM_ERROR_WOULDBLOCK : s = "CM_ERROR_WOULDBLOCK"; break; + case CM_ERROR_QUOTA : s = "CM_ERROR_QUOTA"; break; + case CM_ERROR_SPACE : s = "CM_ERROR_SPACE"; break; + case CM_ERROR_BADSHARENAME : s = "CM_ERROR_BADSHARENAME"; break; + case CM_ERROR_BADTID : s = "CM_ERROR_BADTID"; break; + case CM_ERROR_UNKNOWN : s = "CM_ERROR_UNKNOWN"; break; + case CM_ERROR_NOMORETOKENS : s = "CM_ERROR_NOMORETOKENS"; break; + case CM_ERROR_NOTEMPTY : s = "CM_ERROR_NOTEMPTY"; break; + case CM_ERROR_USESTD : s = "CM_ERROR_USESTD"; break; + case CM_ERROR_REMOTECONN : s = "CM_ERROR_REMOTECONN"; break; + case CM_ERROR_ATSYS : s = "CM_ERROR_ATSYS"; break; + case CM_ERROR_NOSUCHPATH : s = "CM_ERROR_NOSUCHPATH"; break; + case CM_ERROR_CLOCKSKEW : s = "CM_ERROR_CLOCKSKEW"; break; + case CM_ERROR_BADSMB : s = "CM_ERROR_BADSMB"; break; + case CM_ERROR_ALLBUSY : s = "CM_ERROR_ALLBUSY"; break; + case CM_ERROR_NOFILES : s = "CM_ERROR_NOFILES"; break; + case CM_ERROR_PARTIALWRITE : s = "CM_ERROR_PARTIALWRITE"; break; + case CM_ERROR_NOIPC : s = "CM_ERROR_NOIPC"; break; + case CM_ERROR_BADNTFILENAME : s = "CM_ERROR_BADNTFILENAME"; break; + case CM_ERROR_BUFFERTOOSMALL : s = "CM_ERROR_BUFFERTOOSMALL"; break; + case CM_ERROR_RENAME_IDENTICAL : s = "CM_ERROR_RENAME_IDENTICAL"; break; + case CM_ERROR_ALLOFFLINE : s = "CM_ERROR_ALLOFFLINE"; break; + case CM_ERROR_AMBIGUOUS_FILENAME: s = "CM_ERROR_AMBIGUOUS_FILENAME"; break; + case CM_ERROR_BADLOGONTYPE : s = "CM_ERROR_BADLOGONTYPE"; break; + case CM_ERROR_GSSCONTINUE : s = "CM_ERROR_GSSCONTINUE"; break; + case CM_ERROR_TIDIPC : s = "CM_ERROR_TIDIPC"; break; + case CM_ERROR_TOO_MANY_SYMLINKS : s = "CM_ERROR_TOO_MANY_SYMLINKS"; break; + case CM_ERROR_PATH_NOT_COVERED : s = "CM_ERROR_PATH_NOT_COVERED"; break; + case CM_ERROR_LOCK_CONFLICT : s = "CM_ERROR_LOCK_CONFLICT"; break; + case CM_ERROR_SHARING_VIOLATION : s = "CM_ERROR_SHARING_VIOLATION"; break; + case CM_ERROR_ALLDOWN : s = "CM_ERROR_ALLDOWN"; break; + case CM_ERROR_TOOFEWBUFS : s = "CM_ERROR_TOOFEWBUFS"; break; + case CM_ERROR_TOOMANYBUFS : s = "CM_ERROR_TOOMANYBUFS"; break; } osi_Log2(afsd_logp, "cm_Analyze: ignoring error code 0x%x (%s)", errorCode, s); diff --git a/src/WINNT/afsd/cm_ioctl.c b/src/WINNT/afsd/cm_ioctl.c index 151350f723..f8ee369e52 100644 --- a/src/WINNT/afsd/cm_ioctl.c +++ b/src/WINNT/afsd/cm_ioctl.c @@ -88,6 +88,13 @@ long cm_FlushFile(cm_scache_t *scp, cm_user_t *userp, cm_req_t *reqp) { long code; +#ifdef AFS_FREELANCE_CLIENT + if ( scp->fid.cell == AFS_FAKE_ROOT_CELL_ID && scp->fid.volume == AFS_FAKE_ROOT_VOL_ID ) { + cm_noteLocalMountPointChange(); + return 0; + } +#endif + lock_ObtainWrite(&scp->bufCreateLock); code = buf_FlushCleanPages(scp, userp, reqp); @@ -110,7 +117,7 @@ long cm_FlushParent(cm_scache_t *scp, cm_user_t *userp, cm_req_t *reqp) /* now flush the file */ code = cm_FlushFile(pscp, userp, reqp); - cm_ReleaseSCache(scp); + cm_ReleaseSCache(pscp); return code; } @@ -122,6 +129,13 @@ long cm_FlushVolume(cm_user_t *userp, cm_req_t *reqp, afs_uint32 cell, afs_uint3 cm_scache_t *scp; int i; +#ifdef AFS_FREELANCE_CLIENT + if ( cell == AFS_FAKE_ROOT_CELL_ID && volume == AFS_FAKE_ROOT_VOL_ID ) { + cm_noteLocalMountPointChange(); + return 0; + } +#endif + lock_ObtainWrite(&cm_scacheLock); for (i=0; inextp) { diff --git a/src/WINNT/afsd/cm_scache.c b/src/WINNT/afsd/cm_scache.c index 54142327b1..4485e177c8 100644 --- a/src/WINNT/afsd/cm_scache.c +++ b/src/WINNT/afsd/cm_scache.c @@ -52,16 +52,150 @@ void cm_AdjustLRU(cm_scache_t *scp) cm_data.scacheLRULastp = scp; } +/* called with cm_scacheLock write-locked; recycles an existing scp. */ +long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags) +{ + cm_scache_t **lscpp; + cm_scache_t *tscp; + int i; + +#ifdef AFS_FREELANCE_CLIENT + /* Do not recycle Freelance cache entries */ + if ( cm_freelanceEnabled && + scp->fid.cell==AFS_FAKE_ROOT_CELL_ID && + scp->fid.volume==AFS_FAKE_ROOT_VOL_ID ) + return -1; +#endif /* AFS_FREELANCE_CLIENT */ + + + if (scp->flags & CM_SCACHEFLAG_INHASH) { + /* hash it out first */ + i = CM_SCACHE_HASH(&scp->fid); + for (lscpp = &cm_data.hashTablep[i], tscp = cm_data.hashTablep[i]; + tscp; + lscpp = &tscp->nextp, tscp = tscp->nextp) { + if (tscp == scp) { + *lscpp = scp->nextp; + scp->flags &= ~CM_SCACHEFLAG_INHASH; + break; + } + } + osi_assertx(tscp, "afsd: scache hash screwup"); + } + + if (flags & CM_SCACHE_RECYCLEFLAG_DESTROY_BUFFERS) { + osi_queueData_t *qdp; + cm_buf_t *bufp; + + while(qdp = scp->bufWritesp) { + bufp = osi_GetQData(qdp); + osi_QRemove((osi_queue_t **) &scp->bufWritesp, &qdp->q); + osi_QDFree(qdp); + if (bufp) { + lock_ObtainMutex(&bufp->mx); + bufp->cmFlags &= ~CM_BUF_CMSTORING; + bufp->flags &= ~CM_BUF_DIRTY; + bufp->dataVersion = -1; /* bad */ + bufp->dirtyCounter++; + if (bufp->flags & CM_BUF_WAITING) { + osi_Log2(afsd_logp, "CM RecycleSCache Waking [scp 0x%x] bufp 0x%x", scp, bufp); + osi_Wakeup((long) &bufp); + } + lock_ReleaseMutex(&bufp->mx); + buf_Release(bufp); + } + } + while(qdp = scp->bufReadsp) { + bufp = osi_GetQData(qdp); + osi_QRemove((osi_queue_t **) &scp->bufReadsp, &qdp->q); + osi_QDFree(qdp); + if (bufp) { + lock_ObtainMutex(&bufp->mx); + bufp->cmFlags &= ~CM_BUF_CMFETCHING; + bufp->flags &= ~CM_BUF_DIRTY; + bufp->dataVersion = -1; /* bad */ + bufp->dirtyCounter++; + if (bufp->flags & CM_BUF_WAITING) { + osi_Log2(afsd_logp, "CM RecycleSCache Waking [scp 0x%x] bufp 0x%x", scp, bufp); + osi_Wakeup((long) &bufp); + } + lock_ReleaseMutex(&bufp->mx); + buf_Release(bufp); + } + } + } else { + /* look for things that shouldn't still be set */ + osi_assert(scp->bufWritesp == NULL); + osi_assert(scp->bufReadsp == NULL); + } + + /* invalidate so next merge works fine; + * also initialize some flags */ + scp->flags &= ~(CM_SCACHEFLAG_STATD + | CM_SCACHEFLAG_RO + | CM_SCACHEFLAG_PURERO + | CM_SCACHEFLAG_OVERQUOTA + | CM_SCACHEFLAG_OUTOFSPACE); + scp->serverModTime = 0; + scp->dataVersion = 0; + scp->bulkStatProgress = hzero; + scp->waitCount = 0; + + scp->fid.vnode = 0; + scp->fid.volume = 0; + scp->fid.unique = 0; + scp->fid.cell = 0; + + /* discard callback */ + if (scp->cbServerp) { + cm_PutServer(scp->cbServerp); + scp->cbServerp = NULL; + } + scp->cbExpires = 0; + + /* remove from dnlc */ + cm_dnlcPurgedp(scp); + cm_dnlcPurgevp(scp); + + /* discard cached status; if non-zero, Close + * tried to store this to server but failed */ + scp->mask = 0; + + /* drop held volume ref */ + if (scp->volp) { + cm_PutVolume(scp->volp); + scp->volp = NULL; + } + + /* discard symlink info */ + scp->mountPointStringp[0] = 0; + memset(&scp->mountRootFid, 0, sizeof(cm_fid_t)); + memset(&scp->dotdotFid, 0, sizeof(cm_fid_t)); + + /* reset locking info */ + scp->fileLocksH = NULL; + scp->fileLocksT = NULL; + scp->serverLock = (-1); + scp->exclusiveLocks = 0; + scp->sharedLocks = 0; + + /* not locked, but there can be no references to this guy + * while we hold the global refcount lock. + */ + cm_FreeAllACLEnts(scp); + + return 0; +} + + /* called with cm_scacheLock write-locked; find a vnode to recycle. * Can allocate a new one if desperate, or if below quota (cm_data.maxSCaches). */ cm_scache_t *cm_GetNewSCache(void) { cm_scache_t *scp; - int i; - cm_scache_t **lscpp; - cm_scache_t *tscp; + start: if (cm_data.currentSCaches >= cm_data.maxSCaches) { for (scp = cm_data.scacheLRULastp; scp; @@ -72,88 +206,21 @@ cm_scache_t *cm_GetNewSCache(void) if (scp) { osi_assert(scp >= cm_data.scacheBaseAddress && scp < (cm_scache_t *)cm_data.hashTablep); - /* we found an entry, so return it */ - if (scp->flags & CM_SCACHEFLAG_INHASH) { - /* hash it out first */ - i = CM_SCACHE_HASH(&scp->fid); - for (lscpp = &cm_data.hashTablep[i], tscp = cm_data.hashTablep[i]; - tscp; - lscpp = &tscp->nextp, tscp = tscp->nextp) { - if (tscp == scp) { - *lscpp = scp->nextp; - scp->flags &= ~CM_SCACHEFLAG_INHASH; - break; - } - } - osi_assertx(tscp, "afsd: scache hash screwup"); - } - /* look for things that shouldn't still be set */ - osi_assert(scp->bufWritesp == NULL); - osi_assert(scp->bufReadsp == NULL); + if (!cm_RecycleSCache(scp, 0)) { + + /* we found an entry, so return it */ + /* now remove from the LRU queue and put it back at the + * head of the LRU queue. + */ + cm_AdjustLRU(scp); - /* invalidate so next merge works fine; - * also initialize some flags */ - scp->flags &= ~(CM_SCACHEFLAG_STATD - | CM_SCACHEFLAG_RO - | CM_SCACHEFLAG_PURERO - | CM_SCACHEFLAG_OVERQUOTA - | CM_SCACHEFLAG_OUTOFSPACE); - scp->serverModTime = 0; - scp->dataVersion = 0; - scp->bulkStatProgress = hzero; - scp->waitCount = 0; - - scp->fid.vnode = 0; - scp->fid.volume = 0; - scp->fid.unique = 0; - scp->fid.cell = 0; - - /* discard callback */ - if (scp->cbServerp) { - cm_PutServer(scp->cbServerp); - scp->cbServerp = NULL; - } - scp->cbExpires = 0; - - /* remove from dnlc */ - cm_dnlcPurgedp(scp); - cm_dnlcPurgevp(scp); - - /* discard cached status; if non-zero, Close - * tried to store this to server but failed */ - scp->mask = 0; - - /* drop held volume ref */ - if (scp->volp) { - cm_PutVolume(scp->volp); - scp->volp = NULL; - } - - /* discard symlink info */ - scp->mountPointStringp[0] = 0; - memset(&scp->mountRootFid, 0, sizeof(cm_fid_t)); - memset(&scp->dotdotFid, 0, sizeof(cm_fid_t)); - - /* reset locking info */ - scp->fileLocksH = NULL; - scp->fileLocksT = NULL; - scp->serverLock = (-1); - scp->exclusiveLocks = 0; - scp->sharedLocks = 0; - - /* not locked, but there can be no references to this guy - * while we hold the global refcount lock. - */ - cm_FreeAllACLEnts(scp); - - /* now remove from the LRU queue and put it back at the - * head of the LRU queue. - */ - cm_AdjustLRU(scp); - - /* and we're done */ - return scp; + /* and we're done */ + return scp; + } else { + /* We don't like this entry, choose another one. */ + goto start; + } } } diff --git a/src/WINNT/afsd/cm_scache.h b/src/WINNT/afsd/cm_scache.h index 389baac19c..b8abbafe56 100644 --- a/src/WINNT/afsd/cm_scache.h +++ b/src/WINNT/afsd/cm_scache.h @@ -279,6 +279,9 @@ typedef struct cm_scache { #define CM_SCACHESYNC_NOWAIT 0x100000/* don't wait for the state, * just fail */ +/* flags for cm_RecycleSCache */ +#define CM_SCACHE_RECYCLEFLAG_DESTROY_BUFFERS 0x1 + /* flags for cm_MergeStatus */ #define CM_MERGEFLAG_FORCE 1 /* check mtime before merging; * used to see if we're merging @@ -347,4 +350,6 @@ extern long cm_ValidateSCache(void); extern long cm_ShutdownSCache(void); +extern long cm_RecycleSCache(cm_scache_t *scp, afs_int32 flags); + #endif /* __CM_SCACHE_H_ENV__ */ diff --git a/src/WINNT/afsd/cm_utils.c b/src/WINNT/afsd/cm_utils.c index afefb57b39..8d730149e9 100644 --- a/src/WINNT/afsd/cm_utils.c +++ b/src/WINNT/afsd/cm_utils.c @@ -246,7 +246,7 @@ long cm_MapRPCError(long error, cm_req_t *reqp) error = CM_ERROR_EXISTS; else if (error == 20) error = CM_ERROR_NOTDIR; - else if (error == 2) + else if (error == 2) /* ENOENT */ error = CM_ERROR_NOSUCHFILE; else if (error == 11 /* EAGAIN, most servers */ || error == 35) /* EAGAIN, Digital UNIX */ diff --git a/src/WINNT/afsd/cm_vnodeops.c b/src/WINNT/afsd/cm_vnodeops.c index 4969b4f9aa..44b79f89ab 100644 --- a/src/WINNT/afsd/cm_vnodeops.c +++ b/src/WINNT/afsd/cm_vnodeops.c @@ -1344,6 +1344,13 @@ long cm_Unlink(cm_scache_t *dscp, char *namep, cm_user_t *userp, cm_req_t *reqp) cm_SyncOpDone(dscp, NULL, sflags); if (code == 0) cm_MergeStatus(dscp, &newDirStatus, &volSync, userp, 0); + else if (code == CM_ERROR_NOSUCHFILE) { + /* windows would not have allowed the request to delete the file + * if it did not believe the file existed. therefore, we must + * have an inconsistent view of the world. + */ + dscp->cbServerp = NULL; + } lock_ReleaseMutex(&dscp->mx); return code;