windows-buf-refcount-leak-fix-20070203

Plug a buffer refcount leak.   This is why the client slows down over
time.  It runs out of buffers it can use.
This commit is contained in:
Jeffrey Altman 2007-02-04 02:01:12 +00:00
parent 84a0ef8e1a
commit 9a9e0976c8
4 changed files with 67 additions and 40 deletions

View File

@ -722,11 +722,12 @@ long buf_GetNewLocked(struct cm_scache *scp, osi_hyper_t *offsetp, cm_buf_t **bu
/* does this fix the problem below? it's a simple solution. */ /* does this fix the problem below? it's a simple solution. */
if (!cm_data.buf_freeListEndp) if (!cm_data.buf_freeListEndp)
{ {
lock_ReleaseWrite(&buf_globalLock); lock_ReleaseWrite(&buf_globalLock);
osi_Log0(afsd_logp, "buf_GetNewLocked: Free Buffer List is empty - sleeping 200ms");
Sleep(200); Sleep(200);
goto retry; goto retry;
} }
/* for debugging, assert free list isn't empty, although we /* for debugging, assert free list isn't empty, although we
* really should try waiting for a running tranasction to finish * really should try waiting for a running tranasction to finish
@ -828,17 +829,12 @@ long buf_GetNewLocked(struct cm_scache *scp, osi_hyper_t *offsetp, cm_buf_t **bu
cm_data.buf_fileHashTablepp[i] = bp; cm_data.buf_fileHashTablepp[i] = bp;
} }
/* prepare to return it. Start by giving it a good /* we should move it from the lru queue. It better still be there,
* refcount */ * since we've held the global (big) lock since we found it there.
bp->refCount = 1;
/* and since it has a non-zero ref count, we should move
* it from the lru queue. It better be still there,
* since we've held the global (big) lock since we found
* it there.
*/ */
osi_assertx(bp->flags & CM_BUF_INLRU, osi_assertx(bp->flags & CM_BUF_INLRU,
"buf_GetNewLocked: LRU screwup"); "buf_GetNewLocked: LRU screwup");
if (cm_data.buf_freeListEndp == bp) { if (cm_data.buf_freeListEndp == bp) {
/* we're the last guy in this queue, so maintain it */ /* we're the last guy in this queue, so maintain it */
cm_data.buf_freeListEndp = (cm_buf_t *) osi_QPrev(&bp->q); cm_data.buf_freeListEndp = (cm_buf_t *) osi_QPrev(&bp->q);
@ -846,13 +842,19 @@ long buf_GetNewLocked(struct cm_scache *scp, osi_hyper_t *offsetp, cm_buf_t **bu
osi_QRemove((osi_queue_t **) &cm_data.buf_freeListp, &bp->q); osi_QRemove((osi_queue_t **) &cm_data.buf_freeListp, &bp->q);
bp->flags &= ~CM_BUF_INLRU; bp->flags &= ~CM_BUF_INLRU;
/* finally, grab the mutex so that people don't use it /* grab the mutex so that people don't use it
* before the caller fills it with data. Again, no one * before the caller fills it with data. Again, no one
* should have been able to get to this dude to lock it. * should have been able to get to this dude to lock it.
*/ */
osi_assertx(lock_TryMutex(&bp->mx), if (!lock_TryMutex(&bp->mx)) {
"buf_GetNewLocked: TryMutex failed"); osi_Log2(afsd_logp, "buf_GetNewLocked bp 0x%p cannot be mutex locked. refCount %d should be 0",
bp, bp->refCount);
osi_panic("buf_GetNewLocked: TryMutex failed",__FILE__,__LINE__);
}
/* prepare to return it. Give it a refcount */
bp->refCount = 1;
lock_ReleaseWrite(&buf_globalLock); lock_ReleaseWrite(&buf_globalLock);
*bufpp = bp; *bufpp = bp;
@ -862,7 +864,8 @@ long buf_GetNewLocked(struct cm_scache *scp, osi_hyper_t *offsetp, cm_buf_t **bu
return 0; return 0;
} /* for all buffers in lru queue */ } /* for all buffers in lru queue */
lock_ReleaseWrite(&buf_globalLock); lock_ReleaseWrite(&buf_globalLock);
Sleep(100); /* give some time for a buffer to be freed */ osi_Log0(afsd_logp, "buf_GetNewLocked: Free Buffer List has no buffers with a zero refcount - sleeping 100ms");
Sleep(100); /* give some time for a buffer to be freed */
} /* while loop over everything */ } /* while loop over everything */
/* not reached */ /* not reached */
} /* the proc */ } /* the proc */
@ -1297,7 +1300,7 @@ long buf_Truncate(cm_scache_t *scp, cm_user_t *userp, cm_req_t *reqp,
buf_HoldLocked(bufp); buf_HoldLocked(bufp);
lock_ReleaseWrite(&buf_globalLock); lock_ReleaseWrite(&buf_globalLock);
for(; bufp; bufp = nbufp) { while (bufp) {
lock_ObtainMutex(&bufp->mx); lock_ObtainMutex(&bufp->mx);
bufEnd.HighPart = 0; bufEnd.HighPart = 0;
@ -1374,6 +1377,7 @@ long buf_Truncate(cm_scache_t *scp, cm_user_t *userp, cm_req_t *reqp,
} }
buf_ReleaseLocked(bufp); buf_ReleaseLocked(bufp);
lock_ReleaseWrite(&buf_globalLock); lock_ReleaseWrite(&buf_globalLock);
bufp = nbufp;
} }
#ifdef TESTING #ifdef TESTING
@ -1568,21 +1572,44 @@ int cm_DumpBufHashTable(FILE *outputFile, char *cookie, int lock)
{ {
for (bp = cm_data.buf_hashTablepp[i]; bp; bp=bp->hashp) for (bp = cm_data.buf_hashTablepp[i]; bp; bp=bp->hashp)
{ {
if (bp->refCount) StringCbPrintfA(output, sizeof(output),
{ "%s bp=0x%08X, hash=%d, fid (cell=%d, volume=%d, "
StringCbPrintfA(output, sizeof(output), "vnode=%d, unique=%d), size=%d flags=0x%x, refCount=%d\r\n",
"%s bp=0x%08X, hash=%d, fid (cell=%d, volume=%d, " cookie, (void *)bp, i, bp->fid.cell, bp->fid.volume,
"vnode=%d, unique=%d), size=%d refCount=%d\r\n", bp->fid.vnode, bp->fid.unique, bp->size, bp->flags, bp->refCount);
cookie, (void *)bp, i, bp->fid.cell, bp->fid.volume, WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
bp->fid.vnode, bp->fid.unique, bp->size, bp->refCount);
WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
}
} }
} }
StringCbPrintfA(output, sizeof(output), "%s - Done dumping buf_HashTable.\r\n", cookie); StringCbPrintfA(output, sizeof(output), "%s - Done dumping buf_HashTable.\r\n", cookie);
WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL); WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
StringCbPrintfA(output, sizeof(output), "%s - dumping buf_freeListEndp\r\n", cookie);
WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
for(bp = cm_data.buf_freeListEndp; bp; bp=(cm_buf_t *) osi_QPrev(&bp->q)) {
StringCbPrintfA(output, sizeof(output),
"%s bp=0x%08X, fid (cell=%d, volume=%d, "
"vnode=%d, unique=%d), size=%d flags=0x%x, refCount=%d\r\n",
cookie, (void *)bp, bp->fid.cell, bp->fid.volume,
bp->fid.vnode, bp->fid.unique, bp->size, bp->flags, bp->refCount);
WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
}
StringCbPrintfA(output, sizeof(output), "%s - Done dumping buf_FreeListEndp.\r\n", cookie);
WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
StringCbPrintfA(output, sizeof(output), "%s - dumping buf_dirtyListEndp\r\n", cookie);
WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
for(bp = cm_data.buf_dirtyListEndp; bp; bp=(cm_buf_t *) osi_QPrev(&bp->q)) {
StringCbPrintfA(output, sizeof(output),
"%s bp=0x%08X, fid (cell=%d, volume=%d, "
"vnode=%d, unique=%d), size=%d flags=0x%x, refCount=%d\r\n",
cookie, (void *)bp, bp->fid.cell, bp->fid.volume,
bp->fid.vnode, bp->fid.unique, bp->size, bp->flags, bp->refCount);
WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
}
StringCbPrintfA(output, sizeof(output), "%s - Done dumping buf_dirtyListEndp.\r\n", cookie);
WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
if (lock) if (lock)
lock_ReleaseRead(&buf_globalLock); lock_ReleaseRead(&buf_globalLock);
return 0; return 0;

View File

@ -644,7 +644,7 @@ void cm_BkgPrefetch(cm_scache_t *scp, afs_uint32 p1, afs_uint32 p2, afs_uint32 p
long length; long length;
osi_hyper_t base; osi_hyper_t base;
long code; long code;
cm_buf_t *bp; cm_buf_t *bp = NULL;
int cpff = 0; /* cleared prefetch flag */ int cpff = 0; /* cleared prefetch flag */
cm_req_t req; cm_req_t req;
@ -664,6 +664,8 @@ void cm_BkgPrefetch(cm_scache_t *scp, afs_uint32 p1, afs_uint32 p2, afs_uint32 p
if (code || (bp->cmFlags & CM_BUF_CMFETCHING)) { if (code || (bp->cmFlags & CM_BUF_CMFETCHING)) {
scp->flags &= ~CM_SCACHEFLAG_PREFETCHING; scp->flags &= ~CM_SCACHEFLAG_PREFETCHING;
lock_ReleaseMutex(&scp->mx); lock_ReleaseMutex(&scp->mx);
if (bp)
buf_Release(bp);
return; return;
} }

View File

@ -1210,7 +1210,9 @@ void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, afs_uint32 flags)
/* now update the buffer pointer */ /* now update the buffer pointer */
if (flags & CM_SCACHESYNC_FETCHDATA) { if (flags & CM_SCACHESYNC_FETCHDATA) {
/* ensure that the buffer isn't already in the I/O list */ int release = 0;
/* ensure that the buffer isn't already in the I/O list */
for(qdp = scp->bufReadsp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) { for(qdp = scp->bufReadsp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
tbufp = osi_GetQData(qdp); tbufp = osi_GetQData(qdp);
if (tbufp == bufp) if (tbufp == bufp)
@ -1219,11 +1221,9 @@ void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, afs_uint32 flags)
if (qdp) { if (qdp) {
osi_QRemove((osi_queue_t **) &scp->bufReadsp, &qdp->q); osi_QRemove((osi_queue_t **) &scp->bufReadsp, &qdp->q);
osi_QDFree(qdp); osi_QDFree(qdp);
release = 1;
} }
if (bufp) { if (bufp) {
int release = 0;
if (bufp->cmFlags & CM_BUF_CMFETCHING)
release = 1;
bufp->cmFlags &= ~(CM_BUF_CMFETCHING | CM_BUF_CMFULLYFETCHED); bufp->cmFlags &= ~(CM_BUF_CMFETCHING | CM_BUF_CMFULLYFETCHED);
if (bufp->flags & CM_BUF_WAITING) { if (bufp->flags & CM_BUF_WAITING) {
osi_Log2(afsd_logp, "CM SyncOpDone Waking [scp 0x%p] bufp 0x%p", scp, bufp); osi_Log2(afsd_logp, "CM SyncOpDone Waking [scp 0x%p] bufp 0x%p", scp, bufp);
@ -1236,6 +1236,7 @@ void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, afs_uint32 flags)
/* now update the buffer pointer */ /* now update the buffer pointer */
if (flags & CM_SCACHESYNC_STOREDATA) { if (flags & CM_SCACHESYNC_STOREDATA) {
int release = 0;
/* ensure that the buffer isn't already in the I/O list */ /* ensure that the buffer isn't already in the I/O list */
for(qdp = scp->bufWritesp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) { for(qdp = scp->bufWritesp; qdp; qdp = (osi_queueData_t *) osi_QNext(&qdp->q)) {
tbufp = osi_GetQData(qdp); tbufp = osi_GetQData(qdp);
@ -1245,11 +1246,9 @@ void cm_SyncOpDone(cm_scache_t *scp, cm_buf_t *bufp, afs_uint32 flags)
if (qdp) { if (qdp) {
osi_QRemove((osi_queue_t **) &scp->bufWritesp, &qdp->q); osi_QRemove((osi_queue_t **) &scp->bufWritesp, &qdp->q);
osi_QDFree(qdp); osi_QDFree(qdp);
release = 1;
} }
if (bufp) { if (bufp) {
int release = 0;
if (bufp->cmFlags & CM_BUF_CMSTORING)
release = 1;
bufp->cmFlags &= ~CM_BUF_CMSTORING; bufp->cmFlags &= ~CM_BUF_CMSTORING;
if (bufp->flags & CM_BUF_WAITING) { if (bufp->flags & CM_BUF_WAITING) {
osi_Log2(afsd_logp, "CM SyncOpDone Waking [scp 0x%p] bufp 0x%p", scp, bufp); osi_Log2(afsd_logp, "CM SyncOpDone Waking [scp 0x%p] bufp 0x%p", scp, bufp);
@ -1554,8 +1553,8 @@ int cm_DumpSCache(FILE *outputFile, char *cookie, int lock)
{ {
if (scp->refCount != 0) if (scp->refCount != 0)
{ {
sprintf(output, "%s fid (cell=%d, volume=%d, vnode=%d, unique=%d) refCount=%u\r\n", sprintf(output, "%s scp=0x%p, fid (cell=%d, volume=%d, vnode=%d, unique=%d) refCount=%u\r\n",
cookie, scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique, cookie, scp, scp->fid.cell, scp->fid.volume, scp->fid.vnode, scp->fid.unique,
scp->refCount); scp->refCount);
WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL); WriteFile(outputFile, output, (DWORD)strlen(output), &zilch, NULL);
} }

View File

@ -935,15 +935,15 @@ long cm_ReadMountPoint(cm_scache_t *scp, cm_user_t *userp, cm_req_t *reqp)
lock_ReleaseRead(&scp->bufCreateLock); lock_ReleaseRead(&scp->bufCreateLock);
lock_ObtainMutex(&scp->mx); lock_ObtainMutex(&scp->mx);
if (code) { if (code)
return code; return code;
}
while (1) { while (1) {
code = cm_SyncOp(scp, bufp, userp, reqp, 0, code = cm_SyncOp(scp, bufp, userp, reqp, 0,
CM_SCACHESYNC_READ | CM_SCACHESYNC_NEEDCALLBACK); CM_SCACHESYNC_READ | CM_SCACHESYNC_NEEDCALLBACK);
if (code) { if (code)
goto done; goto done;
}
cm_SyncOpDone(scp, bufp, CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_READ); cm_SyncOpDone(scp, bufp, CM_SCACHESYNC_NEEDCALLBACK | CM_SCACHESYNC_READ);
@ -952,9 +952,8 @@ long cm_ReadMountPoint(cm_scache_t *scp, cm_user_t *userp, cm_req_t *reqp)
/* otherwise load buffer */ /* otherwise load buffer */
code = cm_GetBuffer(scp, bufp, NULL, userp, reqp); code = cm_GetBuffer(scp, bufp, NULL, userp, reqp);
if (code) { if (code)
goto done; goto done;
}
} }
/* locked, has callback, has valid data in buffer */ /* locked, has callback, has valid data in buffer */
if ((tlen = scp->length.LowPart) > 1000) if ((tlen = scp->length.LowPart) > 1000)