DAFS: Atomically re-hash vnode in VGetFreeVnode_r

VGetFreeVnode_r pulls a vnode off of the vnode LRU, and removes the
vnode from the vnode hash table. In DAFS, we may drop the volume glock
immediately afterwards in order to close the ihandle for the old vnode
structure.

While we have the glock dropped, another thread may try to
VLookupVnode for the new vnode we are creating, find that it is not
hashed, and call VGetFreeVnode_r itself. This can result in two
threads having two separate copies of the same vnode, which bypasses
any mutual exclusion ensured by per-vnode locks, since they will lock
their own version of the vnode. This can result in a variety of
different problems where two threads try to write to the same vnode at
the same time. One example is calling CopyOnWrite on the same file in
parallel, which can cause link undercounts, writes to the wrong vnode
tag, and other CoW-related errors.

To prevent all this, make VGetFreeVnode_r atomically remove the old
vnode structure from the relevant hashes, and add it to the new hashes
before dropping the glock. This ensures that any other thread trying
to load the same vnode will see the new vnode in the hash table,
though it will not yet be valid until the vnode is loaded.

Note that this only solves this race for DAFS. For non-DAFS, the vol
glock is held over the ihandle close, so this race does not exist.
The comments around the callers of VGetFreeVnode_r indicate that
similar extant races exist here for non-DAFS, but they are unsolvable
without significant DAFS-like changes to the vnode package.

Reviewed-on: http://gerrit.openafs.org/6385
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Derrick Brashear <shadow@dementix.org>
(cherry picked from commit 8e15e16c9e6a5768f31976cc21b48d5bb10617b7)

Change-Id: I915d18c4252b698f39fdf65793311a39381096b4
Reviewed-on: http://gerrit.openafs.org/6495
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Derrick Brashear <shadow@dementix.org>
This commit is contained in:
Andrew Deason 2011-11-18 10:25:08 -06:00 committed by Derrick Brashear
parent 70fe8ec109
commit 6479bb29d4
2 changed files with 27 additions and 21 deletions

View File

@ -447,13 +447,19 @@ VInitVnodes(VnodeClass class, int nVnodes)
* allocate an unused vnode from the lru chain.
*
* @param[in] vcp vnode class info object pointer
* @param[in] vp volume pointer
* @param[in] vnodeNumber new vnode number that the vnode will be used for
*
* @pre VOL_LOCK is held
*
* @post vnode object is removed from lru, and vnode hash table.
* vnode is disassociated from volume object.
* @post vnode object is removed from lru
* vnode is disassociated with its old volume, and associated with its
* new volume
* vnode is removed from its old vnode hash table, and for DAFS, it is
* added to its new hash table
* state is set to VN_STATE_INVALID.
* inode handle is released.
* a reservation is held on the vnode object
*
* @note we traverse backwards along the lru circlist. It shouldn't
* be necessary to specify that nUsers == 0 since if it is in the list,
@ -462,10 +468,14 @@ VInitVnodes(VnodeClass class, int nVnodes)
*
* @warning DAFS: VOL_LOCK is dropped while doing inode handle release
*
* @warning for non-DAFS, the vnode is _not_ hashed on the vnode hash table;
* non-DAFS must hash the vnode itself after loading data
*
* @return vnode object pointer
*/
Vnode *
VGetFreeVnode_r(struct VnodeClassInfo * vcp)
VGetFreeVnode_r(struct VnodeClassInfo * vcp, struct Volume *vp,
VnodeId vnodeNumber)
{
Vnode *vnp;
@ -492,6 +502,16 @@ VGetFreeVnode_r(struct VnodeClassInfo * vcp)
DeleteFromVVnList(vnp);
}
/* we must re-hash the vnp _before_ we drop the glock again; otherwise,
* someone else might try to grab the same vnode id, and we'll both alloc
* a vnode object for the same vn id, bypassing vnode locking */
Vn_id(vnp) = vnodeNumber;
VnCreateReservation_r(vnp);
AddToVVnList(vp, vnp);
#ifdef AFS_DEMAND_ATTACH_FS
AddToVnHash(vnp);
#endif
/* drop the file descriptor */
if (vnp->handle) {
#ifdef AFS_DEMAND_ATTACH_FS
@ -726,16 +746,7 @@ VAllocVnode_r(Error * ec, Volume * vp, VnodeType type)
} else {
/* no such vnode in the cache */
vnp = VGetFreeVnode_r(vcp);
/* Initialize the header fields so noone allocates another
* vnode with the same number */
Vn_id(vnp) = vnodeNumber;
VnCreateReservation_r(vnp);
AddToVVnList(vp, vnp);
#ifdef AFS_DEMAND_ATTACH_FS
AddToVnHash(vnp);
#endif
vnp = VGetFreeVnode_r(vcp, vp, vnodeNumber);
/* This will never block (guaranteed by check in VGetFreeVnode_r() */
VnLock(vnp, WRITE_LOCK, VOL_LOCK_HELD, WILL_NOT_DEADLOCK);
@ -1229,17 +1240,11 @@ VGetVnode_r(Error * ec, Volume * vp, VnodeId vnodeNumber, int locktype)
/* Not in cache; tentatively grab most distantly used one from the LRU
* chain */
vcp->reads++;
vnp = VGetFreeVnode_r(vcp);
vnp = VGetFreeVnode_r(vcp, vp, vnodeNumber);
/* Initialize */
vnp->changed_newTime = vnp->changed_oldTime = 0;
vnp->delete = 0;
Vn_id(vnp) = vnodeNumber;
VnCreateReservation_r(vnp);
AddToVVnList(vp, vnp);
#ifdef AFS_DEMAND_ATTACH_FS
AddToVnHash(vnp);
#endif
/*
* XXX for non-DAFS, there is a serious

View File

@ -274,7 +274,8 @@ extern int VVnodeWriteToRead_r(Error * ec, Vnode * vnp);
extern Vnode *VAllocVnode(Error * ec, struct Volume *vp, VnodeType type);
extern Vnode *VAllocVnode_r(Error * ec, struct Volume *vp, VnodeType type);
/*extern VFreeVnode();*/
extern Vnode *VGetFreeVnode_r(struct VnodeClassInfo *vcp);
extern Vnode *VGetFreeVnode_r(struct VnodeClassInfo *vcp, struct Volume *vp,
VnodeId vnodeNumber);
extern Vnode *VLookupVnode(struct Volume * vp, VnodeId vnodeId);
extern void AddToVVnList(struct Volume * vp, Vnode * vnp);