mirror of
https://git.openafs.org/openafs.git
synced 2025-01-18 06:50:12 +00:00
SOLARIS: Avoid vcache locks when flushing pages for RO vnodes
We have multiple code paths that hold the following locks at the same time: - avc->lock for a vcache - The page lock for a page in 'avc' In order to avoid deadlocks, we need a consistent ordering for obtaining these two locks. The code in afs_putpage() currently obtains avc->lock before the page lock (Obtain*Lock is called before pvn_vplist_dirty). The code in afs_getpages() also obtains avc->lock before the page lock, but it does so in a loop for all requested pages (via pvn_getpages()). On the second iteration of that loop, it obtains avc->lock, and the page from the first iteration of the loop is still locked. Thus, it obtains a page lock before locking avc->lock in some cases. Since we have two code paths that obtain those two locks in a different order, a deadlock can occur. Fixing this properly requires changing at least one of those code paths, so the locks are taken in a consistent order. However, doing so is complex and will be done in a separate future commit. For this commit, we can avoid the deadlock for RO volumes by simply avoiding taking avc->lock in afs_putpages() at all while the pages are locked. Normally, we lock avc->lock because pvn_vplist_dirty() will call afs_putapage() for each dirty page (and afs_putapage() requires avc->lock held). But for RO volumes, we will have no dirty pages (because RO volumes cannot be written to from a client), and so afs_putapage() will never be called. So to avoid this deadlock issue for RO volumes, avoid taking avc->lock across the pvn_vplist_dirty() call in afs_putpage(). We now pass a dummy pageout callback function to pvn_vplist_dirty() instead, which should never be called, and which panics if it ever is. We still need to hold avc->lock a few other times during afs_putpage() for other minor reasons, but none of these hold page locks at the same time, so the deadlock issue is still avoided. [mmeffie: comments, and fix missing write lock, fix lock releases] [adeason: revised commit message] Change-Id: Iec11101147220828f319dae4027e7ab1f08483a6 Reviewed-on: https://gerrit.openafs.org/12247 Tested-by: BuildBot <buildbot@rampaginggeek.com> Reviewed-by: Andrew Deason <adeason@dson.org> Reviewed-by: Benjamin Kaduk <kaduk@mit.edu>
This commit is contained in:
parent
073522b3d4
commit
5e09a694ec
@ -463,6 +463,19 @@ afs_GetOnePage(struct vnode *vp, u_offset_t off, u_int alen, u_int *protp,
|
|||||||
return code;
|
return code;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dummy pvn_vplist_dirty() handler for non-writable vnodes.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
afs_never_putapage(struct vnode *vp, struct page *pages, u_offset_t * offp,
|
||||||
|
size_t * lenp, int flags, afs_ucred_t *credp)
|
||||||
|
{
|
||||||
|
struct vcache *avc = VTOAFS(vp);
|
||||||
|
osi_Assert((avc->f.states & CRO) != 0);
|
||||||
|
osi_Panic("Dirty pages while flushing a read-only volume vnode.");
|
||||||
|
return EIO; /* unreachable */
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
afs_putpage(struct vnode *vp, offset_t off, u_int len, int flags,
|
afs_putpage(struct vnode *vp, offset_t off, u_int len, int flags,
|
||||||
afs_ucred_t *cred)
|
afs_ucred_t *cred)
|
||||||
@ -474,7 +487,7 @@ afs_putpage(struct vnode *vp, offset_t off, u_int len, int flags,
|
|||||||
afs_offs_t endPos;
|
afs_offs_t endPos;
|
||||||
afs_int32 NPages = 0;
|
afs_int32 NPages = 0;
|
||||||
u_offset_t toff = off;
|
u_offset_t toff = off;
|
||||||
int didWriteLock;
|
int didLock = 0;
|
||||||
|
|
||||||
AFS_STATCNT(afs_putpage);
|
AFS_STATCNT(afs_putpage);
|
||||||
if (vp->v_flag & VNOMAP) /* file doesn't allow mapping */
|
if (vp->v_flag & VNOMAP) /* file doesn't allow mapping */
|
||||||
@ -488,11 +501,11 @@ afs_putpage(struct vnode *vp, offset_t off, u_int len, int flags,
|
|||||||
(afs_int32) vp, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(off),
|
(afs_int32) vp, ICL_TYPE_OFFSET, ICL_HANDLE_OFFSET(off),
|
||||||
ICL_TYPE_INT32, (afs_int32) len, ICL_TYPE_LONG, (int)flags);
|
ICL_TYPE_INT32, (afs_int32) len, ICL_TYPE_LONG, (int)flags);
|
||||||
avc = VTOAFS(vp);
|
avc = VTOAFS(vp);
|
||||||
ObtainSharedLock(&avc->lock, 247);
|
|
||||||
didWriteLock = 0;
|
|
||||||
|
|
||||||
/* Get a list of modified (or whatever) pages */
|
/* Get a list of modified (or whatever) pages */
|
||||||
if (len) {
|
if (len) {
|
||||||
|
ObtainSharedLock(&avc->lock, 247);
|
||||||
|
didLock = SHARED_LOCK;
|
||||||
endPos = (afs_offs_t) off + len; /* position we're supposed to write up to */
|
endPos = (afs_offs_t) off + len; /* position we're supposed to write up to */
|
||||||
while ((afs_offs_t) toff < endPos
|
while ((afs_offs_t) toff < endPos
|
||||||
&& (afs_offs_t) toff < avc->f.m.Length) {
|
&& (afs_offs_t) toff < avc->f.m.Length) {
|
||||||
@ -507,9 +520,9 @@ afs_putpage(struct vnode *vp, offset_t off, u_int len, int flags,
|
|||||||
if (!pages || !pvn_getdirty(pages, flags))
|
if (!pages || !pvn_getdirty(pages, flags))
|
||||||
tlen = PAGESIZE;
|
tlen = PAGESIZE;
|
||||||
else {
|
else {
|
||||||
if (!didWriteLock) {
|
if (didLock == SHARED_LOCK) {
|
||||||
AFS_GLOCK();
|
AFS_GLOCK();
|
||||||
didWriteLock = 1;
|
didLock = WRITE_LOCK;
|
||||||
UpgradeSToWLock(&avc->lock, 671);
|
UpgradeSToWLock(&avc->lock, 671);
|
||||||
AFS_GUNLOCK();
|
AFS_GUNLOCK();
|
||||||
}
|
}
|
||||||
@ -524,27 +537,50 @@ afs_putpage(struct vnode *vp, offset_t off, u_int len, int flags,
|
|||||||
AFS_GLOCK();
|
AFS_GLOCK();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (!didWriteLock) {
|
/*
|
||||||
UpgradeSToWLock(&avc->lock, 670);
|
* We normally arrive here due to a vm flush.
|
||||||
didWriteLock = 1;
|
*
|
||||||
|
* If this vnode belongs to a writable volume, obtain a vcache lock
|
||||||
|
* then call pvn_vplist_dirty to free, invalidate, or to write out
|
||||||
|
* dirty pages with afs_putapage. The afs_putapage routine requires a
|
||||||
|
* vcache lock, so we obtain it here before any page locks are taken.
|
||||||
|
* This locking order is done to avoid deadlocking due to races with
|
||||||
|
* afs_getpage, which also takes vcache and page locks.
|
||||||
|
*
|
||||||
|
* If this vnode belongs to a non-writable volume, then it will not
|
||||||
|
* contain dirty pages, so we do not need to lock the vcache and since
|
||||||
|
* afs_putapage will not be called. Instead, forgo the vcache lock and
|
||||||
|
* call pvn_vplist_dirty to free, or invalidate pages. Pass a dummy
|
||||||
|
* page out handler to pvn_vplist_dirty which we do not expect to be
|
||||||
|
* called. Panic if the dummy handler is called, since something went
|
||||||
|
* horribly wrong.
|
||||||
|
*/
|
||||||
|
if ((avc->f.states & CRO) == 0) {
|
||||||
|
ObtainWriteLock(&avc->lock, 670);
|
||||||
|
didLock = WRITE_LOCK;
|
||||||
}
|
}
|
||||||
|
|
||||||
AFS_GUNLOCK();
|
AFS_GUNLOCK();
|
||||||
code = pvn_vplist_dirty(vp, toff, afs_putapage, flags, cred);
|
if ((avc->f.states & CRO) == 0)
|
||||||
|
code = pvn_vplist_dirty(vp, toff, afs_putapage, flags, cred);
|
||||||
|
else
|
||||||
|
code = pvn_vplist_dirty(vp, toff, afs_never_putapage, flags, cred);
|
||||||
AFS_GLOCK();
|
AFS_GLOCK();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (code && !avc->vc_error) {
|
if (code && !avc->vc_error) {
|
||||||
if (!didWriteLock) {
|
if (didLock == 0) {
|
||||||
|
ObtainWriteLock(&avc->lock, 668);
|
||||||
|
didLock = WRITE_LOCK;
|
||||||
|
} else if (didLock == SHARED_LOCK) {
|
||||||
UpgradeSToWLock(&avc->lock, 669);
|
UpgradeSToWLock(&avc->lock, 669);
|
||||||
didWriteLock = 1;
|
didLock = WRITE_LOCK;
|
||||||
}
|
}
|
||||||
avc->vc_error = code;
|
avc->vc_error = code;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (didWriteLock)
|
if (didLock == WRITE_LOCK)
|
||||||
ReleaseWriteLock(&avc->lock);
|
ReleaseWriteLock(&avc->lock);
|
||||||
else
|
else if (didLock == SHARED_LOCK)
|
||||||
ReleaseSharedLock(&avc->lock);
|
ReleaseSharedLock(&avc->lock);
|
||||||
afs_Trace2(afs_iclSetp, CM_TRACE_PAGEOUTDONE, ICL_TYPE_LONG, code,
|
afs_Trace2(afs_iclSetp, CM_TRACE_PAGEOUTDONE, ICL_TYPE_LONG, code,
|
||||||
ICL_TYPE_LONG, NPages);
|
ICL_TYPE_LONG, NPages);
|
||||||
|
Loading…
Reference in New Issue
Block a user