From 030a9849e22f443492342794f436e2c86c98a903 Mon Sep 17 00:00:00 2001 From: Michael Meffie Date: Fri, 7 Jul 2017 11:11:12 -0400 Subject: [PATCH] afs: fix afs_xserver deadlock in afsdb refresh When setting up a new volume, the cache manager calls afs_GetServer() to setup the server object for each fileserver associated with the volume. The afs_GetServer() function locks afs_xserver and then, among other things, calls afs_GetCell() to lookup the cell info by cell number. When the cache manager is running in afsdb mode, afs_GetCell() will attempt to refresh the cell info if the time-to-live has been exceeded since the last call to afs_GetCell(). During this refresh the AFSDB calls afs_GetServer() to update the vlserver information. The afsdb handler thread and the thread processing the volume setup become deadlocked since the afs_xserver lock is already held at this point. This bug will manifest when the DNS SRV record TTL is smaller than the time the fileservers respond to the GetCapabilities RPC within afs_GetServer() and there are multiple read-only servers for a volume. Avoid the deadlock by using the afs_GetCellStale() variant within afs_GetServer(). This variant returns the memory resident cell info without the afsdb upcall and the subsequent afs_GetServer() call. Change-Id: Iad57870f84c5e542a5ee20f00ea03b3fc87683a1 Reviewed-on: https://gerrit.openafs.org/12652 Tested-by: BuildBot Reviewed-by: Benjamin Kaduk --- src/afs/afs_server.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/afs/afs_server.c b/src/afs/afs_server.c index afce4e6787..79b4704ec6 100644 --- a/src/afs/afs_server.c +++ b/src/afs/afs_server.c @@ -1675,7 +1675,8 @@ afs_GetServer(afs_uint32 *aserverp, afs_int32 nservers, afs_int32 acell, newts->flags |= SRVR_MULTIHOMED; } if (acell) - newts->cell = afs_GetCell(acell, 0); + /* Use the afs_GetCellStale variant to avoid afs_GetServer recursion. */ + newts->cell = afs_GetCellStale(acell, 0); /* For each IP address we are registering */ for (k = 0; k < nservers; k++) { @@ -1764,7 +1765,8 @@ afs_GetServer(afs_uint32 *aserverp, afs_int32 nservers, afs_int32 acell, afs_servers[iphash] = orphts; if (acell) - orphts->cell = afs_GetCell(acell, 0); + /* Use the afs_GetCellStale variant to avoid afs_GetServer recursion. */ + orphts->cell = afs_GetCellStale(acell, 0); } /* Hang the srvAddr struct off of the server structure. The server