diff --git a/src/vol/fssync-debug.c b/src/vol/fssync-debug.c index 096e6e3c2c..d1ae92039f 100644 --- a/src/vol/fssync-debug.c +++ b/src/vol/fssync-debug.c @@ -606,6 +606,7 @@ vol_state_to_string(VolState state) ENUMCASE(VOL_STATE_VNODE_RELEASE); ENUMCASE(VOL_STATE_VLRU_ADD); ENUMCASE(VOL_STATE_DELETED); + ENUMCASE(VOL_STATE_SALVAGE_REQ); ENUMCASE(VOL_STATE_FREED); default: return "**UNKNOWN**"; diff --git a/src/vol/fssync-server.c b/src/vol/fssync-server.c index acd2e6c664..d5e2dca6a1 100644 --- a/src/vol/fssync-server.c +++ b/src/vol/fssync-server.c @@ -922,14 +922,22 @@ FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res) if (vp->salvage.requested && !vp->salvage.scheduled) { vp->salvage.scheduled = 1; } + + /* If the volume is in VOL_STATE_SALVAGE_REQ, we need to wait + * for the vol to go offline before we can give it away. Also + * make sure we don't come out with vp in an excl state. */ + while (V_attachState(vp) == VOL_STATE_SALVAGE_REQ || + VIsExclusiveState(V_attachState(vp))) { + + VOL_CV_WAIT(&V_attachCV(vp)); + } + case debugUtility: break; case volumeUtility: case volumeServer: - if (V_attachState(vp) == VOL_STATE_SALVAGING || - vp->salvage.requested) { - + if (VIsSalvaging(vp)) { Log("denying offline request for volume %lu; volume is in salvaging state\n", afs_printable_uint32_lu(vp->hashid)); res->hdr.reason = FSYNC_SALVAGE; diff --git a/src/vol/volume.c b/src/vol/volume.c index d93c63c171..0eaa2e2614 100644 --- a/src/vol/volume.c +++ b/src/vol/volume.c @@ -3187,7 +3187,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp, if (!VCanScheduleSalvage()) { Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec); } - VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER); + VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER | + VOL_SALVAGE_NO_OFFLINE); vp->nUsers = 0; goto error; @@ -3212,7 +3213,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp, if (!VCanScheduleSalvage()) { Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path); } - VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER); + VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER | + VOL_SALVAGE_NO_OFFLINE); vp->nUsers = 0; #else /* AFS_DEMAND_ATTACH_FS */ @@ -3234,7 +3236,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp, if (!VCanScheduleSalvage()) { Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path); } - VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER); + VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER | + VOL_SALVAGE_NO_OFFLINE); vp->nUsers = 0; #else /* AFS_DEMAND_ATTACH_FS */ @@ -3256,7 +3259,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp, #if defined(AFS_DEMAND_ATTACH_FS) /* schedule a salvage so the volume goes away on disk */ - VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER); + VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER | + VOL_SALVAGE_NO_OFFLINE); VChangeState_r(vp, VOL_STATE_ERROR); vp->nUsers = 0; forcefree = 1; @@ -3274,7 +3278,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp, VGetBitmap_r(ec, vp, i); if (*ec) { #ifdef AFS_DEMAND_ATTACH_FS - VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER); + VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER | + VOL_SALVAGE_NO_OFFLINE); vp->nUsers = 0; #endif /* AFS_DEMAND_ATTACH_FS */ Log("VAttachVolume: error getting bitmap for volume (%s)\n", @@ -3322,7 +3327,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp, "%lu; needs salvage\n", (int)*ec, afs_printable_uint32_lu(V_id(vp))); #ifdef AFS_DEMAND_ATTACH_FS - VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER); + VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER | + VOL_SALVAGE_NO_OFFLINE); vp->nUsers = 0; #else /* !AFS_DEMAND_ATTACH_FS */ *ec = VSALVAGE; @@ -3725,6 +3731,7 @@ GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int nowa * - PREATTACHED * - ATTACHED * - SALVAGING + * - SALVAGE_REQ */ if (vp->salvage.requested) { @@ -3767,8 +3774,7 @@ GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int nowa } } - if ((V_attachState(vp) == VOL_STATE_SALVAGING) || - (*ec == VSALVAGING)) { + if (VIsSalvaging(vp) || (*ec == VSALVAGING)) { if (client_ec) { /* see CheckVnode() in afsfileprocs.c for an explanation * of this error code logic */ @@ -4885,6 +4891,62 @@ VVolOpSetVBusy_r(Volume * vp, FSSYNC_VolOp_info * vopinfo) /* online salvager routines */ /***************************************************/ #if defined(AFS_DEMAND_ATTACH_FS) + +/** + * offline a volume to let it be salvaged. + * + * @param[in] vp Volume to offline + * + * @return whether we offlined the volume successfully + * @retval 0 volume was not offlined + * @retval 1 volume is now offline + * + * @note This is similar to VCheckOffline, but slightly different. We do not + * deal with vp->goingOffline, and we try to avoid touching the volume + * header except just to set needsSalvaged + * + * @pre VOL_LOCK held + * @pre vp->nUsers == 0 + * @pre V_attachState(vp) == VOL_STATE_SALVAGE_REQ + */ +static int +VOfflineForSalvage_r(struct Volume *vp) +{ + Error error; + + VCreateReservation_r(vp); + VWaitExclusiveState_r(vp); + + if (vp->nUsers || V_attachState(vp) == VOL_STATE_SALVAGING) { + /* Someone's using the volume, or someone got to scheduling the salvage + * before us. I don't think either of these should be possible, as we + * should gain no new heavyweight references while we're trying to + * salvage, but just to be sure... */ + VCancelReservation_r(vp); + return 0; + } + + VChangeState_r(vp, VOL_STATE_OFFLINING); + + VLRU_Delete_r(vp); + if (vp->header) { + V_needsSalvaged(vp) = 1; + /* ignore error; updating needsSalvaged is just best effort */ + VUpdateVolume_r(&error, vp, VOL_UPDATE_NOFORCEOFF); + } + VCloseVolumeHandles_r(vp); + + FreeVolumeHeader(vp); + + /* volume has been effectively offlined; we can mark it in the SALVAGING + * state now, which lets FSSYNC give it away */ + VChangeState_r(vp, VOL_STATE_SALVAGING); + + VCancelReservation_r(vp); + + return 1; +} + /** * check whether a salvage needs to be performed on this volume. * @@ -4910,12 +4972,31 @@ VCheckSalvage(Volume * vp) { int ret = 0; #if defined(SALVSYNC_BUILD_CLIENT) || defined(FSSYNC_BUILD_CLIENT) - if (vp->nUsers || vp->nWaiters) + if (vp->nUsers) return ret; + if (!vp->salvage.requested) { + return ret; + } + + /* prevent recursion; some of the code below creates and removes + * lightweight refs, which can call VCheckSalvage */ + if (vp->salvage.scheduling) { + return ret; + } + vp->salvage.scheduling = 1; + + if (V_attachState(vp) == VOL_STATE_SALVAGE_REQ) { + if (!VOfflineForSalvage_r(vp)) { + vp->salvage.scheduling = 0; + return ret; + } + } + if (vp->salvage.requested) { VScheduleSalvage_r(vp); ret = 1; } + vp->salvage.scheduling = 0; #endif /* SALVSYNC_BUILD_CLIENT || FSSYNC_BUILD_CLIENT */ return ret; } @@ -4985,7 +5066,24 @@ VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags) * fear of a salvage already running for this volume. */ if (vp->stats.salvages < SALVAGE_COUNT_MAX) { - VChangeState_r(vp, VOL_STATE_SALVAGING); + + /* if we don't need to offline the volume, we can go directly + * to SALVAGING. SALVAGING says the volume is offline and is + * either salvaging or ready to be handed to the salvager. + * SALVAGE_REQ says that we want to salvage the volume, but we + * are waiting for it to go offline first. */ + if (flags & VOL_SALVAGE_NO_OFFLINE) { + VChangeState_r(vp, VOL_STATE_SALVAGING); + } else { + VChangeState_r(vp, VOL_STATE_SALVAGE_REQ); + if (vp->nUsers == 0) { + /* normally VOfflineForSalvage_r would be called from + * PutVolume et al when nUsers reaches 0, but if + * it's already 0, just do it ourselves, since PutVolume + * isn't going to get called */ + VOfflineForSalvage_r(vp); + } + } *ec = VSALVAGING; } else { Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid); @@ -5173,6 +5271,13 @@ VScheduleSalvage_r(Volume * vp) return 1; } + if (vp->salvage.scheduled) { + return ret; + } + + VCreateReservation_r(vp); + VWaitExclusiveState_r(vp); + /* * XXX the scheduling process should really be done asynchronously * to avoid fssync deadlocks @@ -5182,9 +5287,6 @@ VScheduleSalvage_r(Volume * vp) * * set the volume to an exclusive state and drop the lock * around the SALVSYNC call - * - * note that we do NOT acquire a reservation here -- doing so - * could result in unbounded recursion */ strlcpy(partName, VPartitionPath(vp->partition), sizeof(partName)); state_save = VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ); @@ -5227,6 +5329,7 @@ VScheduleSalvage_r(Volume * vp) } } } + VCancelReservation_r(vp); return ret; } #endif /* SALVSYNC_BUILD_CLIENT || FSSYNC_BUILD_CLIENT */ diff --git a/src/vol/volume.h b/src/vol/volume.h index 76c3f334f0..bd7b1e7b4b 100644 --- a/src/vol/volume.h +++ b/src/vol/volume.h @@ -172,9 +172,12 @@ typedef enum { VOL_STATE_VNODE_RELEASE = 18, /**< volume is busy releasing vnodes */ VOL_STATE_VLRU_ADD = 19, /**< volume is busy being added to a VLRU queue */ VOL_STATE_DELETED = 20, /**< volume has been deleted by the volserver */ + VOL_STATE_SALVAGE_REQ = 21, /**< volume has been requested to be salvaged, + * but is waiting for other users to go away + * so it can be offlined */ /* please add new states directly above this line */ - VOL_STATE_FREED = 21, /**< debugging aid */ - VOL_STATE_COUNT = 22 /**< total number of valid states */ + VOL_STATE_FREED = 22, /**< debugging aid */ + VOL_STATE_COUNT = 23 /**< total number of valid states */ } VolState; /** @@ -604,7 +607,11 @@ typedef struct VolumeOnlineSalvage { int reason; /**< reason for requesting online salvage */ byte requested; /**< flag specifying that salvage should be scheduled */ byte scheduled; /**< flag specifying whether online salvage scheduled */ - byte reserved[2]; /**< padding */ + byte scheduling; /**< if nonzero, this volume has entered + * VCheckSalvage(), so if we recurse into + * VCheckSalvage() with this set, exit immediately + * to avoid recursing forever */ + byte reserved[1]; /**< padding */ } VolumeOnlineSalvage; /** @@ -978,6 +985,8 @@ extern int VWalkVolumeHeaders(struct DiskPartition64 *dp, const char *partpath, /* VRequestSalvage_r flags */ #define VOL_SALVAGE_INVALIDATE_HEADER 0x1 /* for demand attach fs, invalidate volume header cache */ +#define VOL_SALVAGE_NO_OFFLINE 0x2 /* we do not need to wait to offline the volume; it has + * not been fully attached */ #if defined(NEARINODE_HINT) diff --git a/src/vol/volume_inline.h b/src/vol/volume_inline.h index 1c717aeb85..2d8e4987bc 100644 --- a/src/vol/volume_inline.h +++ b/src/vol/volume_inline.h @@ -239,6 +239,37 @@ VVolLockType(int mode, int writeable) } } +/** + * tells caller whether or not the volume is effectively salvaging. + * + * @param vp volume pointer + * + * @return whether volume is salvaging or not + * @retval 0 no, volume is not salvaging + * @retval 1 yes, volume is salvaging + * + * @note The volume may not actually be getting salvaged at the moment if + * this returns 1, but may have just been requested or scheduled to be + * salvaged. Callers should treat these cases as pretty much the same + * anyway, since we should not touch a volume that is busy salvaging or + * waiting to be salvaged. + */ +static_inline int +VIsSalvaging(struct Volume *vp) +{ + /* these tests are a bit redundant, but to be safe... */ + switch(V_attachState(vp)) { + case VOL_STATE_SALVAGING: + case VOL_STATE_SALVAGE_REQ: + return 1; + default: + if (vp->salvage.requested || vp->salvage.scheduled) { + return 1; + } + } + return 0; +} + /** * tells caller whether or not the current state requires * exclusive access without holding glock. @@ -291,6 +322,7 @@ VIsErrorState(VolState state) switch (state) { case VOL_STATE_ERROR: case VOL_STATE_SALVAGING: + case VOL_STATE_SALVAGE_REQ: return 1; default: return 0;