mirror of
https://git.openafs.org/openafs.git
synced 2025-01-19 15:30:14 +00:00
DAFS: Fix demand-salvages of attached volumes
Currently, when an error is encountered for an attached volume, we call VRequestSalvage_r, which makes the volume go into the VOL_STATE_SALVAGING state. This state implies that the volume is offline, however, which is not necessarily the case if we're calling VRequestSalvage_r from, for example, VAllocVnode_r or VUpdateVolume_r. So now, make a new state called VOL_STATE_SALVAGE_REQ to indicate when a salvage has been requested but the volume is not offline yet (and thus is not yet ready to give to the salvager). If VCheckSalvage finds a volume in this state, it offlines the volume first. The FSSYNC VOL_OFF handler now checks for this state, and if we're giving the volume to the salvager, we wait for the volume to exit that state. VRequestSalvage_r also gains a new flag, VOL_SALVAGE_NO_OFFLINE. This is to ensure that the existing salvaging code paths for unattached volumes does not change (for when VRequesetSalvage_r is called from attach2). If this flag is passed, we do what we used to do, which is just salvage the volume without offlining it. Change-Id: Ie709ac7013ab2b52c87fa408c254651abe5e6af3 Reviewed-on: http://gerrit.openafs.org/2329 Tested-by: BuildBot <buildbot@rampaginggeek.com> Tested-by: Derrick Brashear <shadow@dementia.org> Reviewed-by: Derrick Brashear <shadow@dementia.org>
This commit is contained in:
parent
138ac6577e
commit
0aa7fac246
@ -606,6 +606,7 @@ vol_state_to_string(VolState state)
|
||||
ENUMCASE(VOL_STATE_VNODE_RELEASE);
|
||||
ENUMCASE(VOL_STATE_VLRU_ADD);
|
||||
ENUMCASE(VOL_STATE_DELETED);
|
||||
ENUMCASE(VOL_STATE_SALVAGE_REQ);
|
||||
ENUMCASE(VOL_STATE_FREED);
|
||||
default:
|
||||
return "**UNKNOWN**";
|
||||
|
@ -922,14 +922,22 @@ FSYNC_com_VolOff(FSSYNC_VolOp_command * vcom, SYNC_response * res)
|
||||
if (vp->salvage.requested && !vp->salvage.scheduled) {
|
||||
vp->salvage.scheduled = 1;
|
||||
}
|
||||
|
||||
/* If the volume is in VOL_STATE_SALVAGE_REQ, we need to wait
|
||||
* for the vol to go offline before we can give it away. Also
|
||||
* make sure we don't come out with vp in an excl state. */
|
||||
while (V_attachState(vp) == VOL_STATE_SALVAGE_REQ ||
|
||||
VIsExclusiveState(V_attachState(vp))) {
|
||||
|
||||
VOL_CV_WAIT(&V_attachCV(vp));
|
||||
}
|
||||
|
||||
case debugUtility:
|
||||
break;
|
||||
|
||||
case volumeUtility:
|
||||
case volumeServer:
|
||||
if (V_attachState(vp) == VOL_STATE_SALVAGING ||
|
||||
vp->salvage.requested) {
|
||||
|
||||
if (VIsSalvaging(vp)) {
|
||||
Log("denying offline request for volume %lu; volume is in salvaging state\n",
|
||||
afs_printable_uint32_lu(vp->hashid));
|
||||
res->hdr.reason = FSYNC_SALVAGE;
|
||||
|
129
src/vol/volume.c
129
src/vol/volume.c
@ -3187,7 +3187,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp,
|
||||
if (!VCanScheduleSalvage()) {
|
||||
Log("VAttachVolume: Error attaching volume %s; volume needs salvage; error=%u\n", path, *ec);
|
||||
}
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
|
||||
VOL_SALVAGE_NO_OFFLINE);
|
||||
vp->nUsers = 0;
|
||||
|
||||
goto error;
|
||||
@ -3212,7 +3213,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp,
|
||||
if (!VCanScheduleSalvage()) {
|
||||
Log("VAttachVolume: volume salvage flag is ON for %s; volume needs salvage\n", path);
|
||||
}
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER |
|
||||
VOL_SALVAGE_NO_OFFLINE);
|
||||
vp->nUsers = 0;
|
||||
|
||||
#else /* AFS_DEMAND_ATTACH_FS */
|
||||
@ -3234,7 +3236,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp,
|
||||
if (!VCanScheduleSalvage()) {
|
||||
Log("VAttachVolume: volume %s needs to be salvaged; not attached.\n", path);
|
||||
}
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER);
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_NEEDED, VOL_SALVAGE_INVALIDATE_HEADER |
|
||||
VOL_SALVAGE_NO_OFFLINE);
|
||||
vp->nUsers = 0;
|
||||
|
||||
#else /* AFS_DEMAND_ATTACH_FS */
|
||||
@ -3256,7 +3259,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp,
|
||||
|
||||
#if defined(AFS_DEMAND_ATTACH_FS)
|
||||
/* schedule a salvage so the volume goes away on disk */
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
|
||||
VOL_SALVAGE_NO_OFFLINE);
|
||||
VChangeState_r(vp, VOL_STATE_ERROR);
|
||||
vp->nUsers = 0;
|
||||
forcefree = 1;
|
||||
@ -3274,7 +3278,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp,
|
||||
VGetBitmap_r(ec, vp, i);
|
||||
if (*ec) {
|
||||
#ifdef AFS_DEMAND_ATTACH_FS
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
|
||||
VOL_SALVAGE_NO_OFFLINE);
|
||||
vp->nUsers = 0;
|
||||
#endif /* AFS_DEMAND_ATTACH_FS */
|
||||
Log("VAttachVolume: error getting bitmap for volume (%s)\n",
|
||||
@ -3322,7 +3327,8 @@ attach2(Error * ec, VolId volumeId, char *path, struct DiskPartition64 *partp,
|
||||
"%lu; needs salvage\n", (int)*ec,
|
||||
afs_printable_uint32_lu(V_id(vp)));
|
||||
#ifdef AFS_DEMAND_ATTACH_FS
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER);
|
||||
VRequestSalvage_r(ec, vp, SALVSYNC_ERROR, VOL_SALVAGE_INVALIDATE_HEADER |
|
||||
VOL_SALVAGE_NO_OFFLINE);
|
||||
vp->nUsers = 0;
|
||||
#else /* !AFS_DEMAND_ATTACH_FS */
|
||||
*ec = VSALVAGE;
|
||||
@ -3725,6 +3731,7 @@ GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int nowa
|
||||
* - PREATTACHED
|
||||
* - ATTACHED
|
||||
* - SALVAGING
|
||||
* - SALVAGE_REQ
|
||||
*/
|
||||
|
||||
if (vp->salvage.requested) {
|
||||
@ -3767,8 +3774,7 @@ GetVolume(Error * ec, Error * client_ec, VolId volumeId, Volume * hint, int nowa
|
||||
}
|
||||
}
|
||||
|
||||
if ((V_attachState(vp) == VOL_STATE_SALVAGING) ||
|
||||
(*ec == VSALVAGING)) {
|
||||
if (VIsSalvaging(vp) || (*ec == VSALVAGING)) {
|
||||
if (client_ec) {
|
||||
/* see CheckVnode() in afsfileprocs.c for an explanation
|
||||
* of this error code logic */
|
||||
@ -4885,6 +4891,62 @@ VVolOpSetVBusy_r(Volume * vp, FSSYNC_VolOp_info * vopinfo)
|
||||
/* online salvager routines */
|
||||
/***************************************************/
|
||||
#if defined(AFS_DEMAND_ATTACH_FS)
|
||||
|
||||
/**
|
||||
* offline a volume to let it be salvaged.
|
||||
*
|
||||
* @param[in] vp Volume to offline
|
||||
*
|
||||
* @return whether we offlined the volume successfully
|
||||
* @retval 0 volume was not offlined
|
||||
* @retval 1 volume is now offline
|
||||
*
|
||||
* @note This is similar to VCheckOffline, but slightly different. We do not
|
||||
* deal with vp->goingOffline, and we try to avoid touching the volume
|
||||
* header except just to set needsSalvaged
|
||||
*
|
||||
* @pre VOL_LOCK held
|
||||
* @pre vp->nUsers == 0
|
||||
* @pre V_attachState(vp) == VOL_STATE_SALVAGE_REQ
|
||||
*/
|
||||
static int
|
||||
VOfflineForSalvage_r(struct Volume *vp)
|
||||
{
|
||||
Error error;
|
||||
|
||||
VCreateReservation_r(vp);
|
||||
VWaitExclusiveState_r(vp);
|
||||
|
||||
if (vp->nUsers || V_attachState(vp) == VOL_STATE_SALVAGING) {
|
||||
/* Someone's using the volume, or someone got to scheduling the salvage
|
||||
* before us. I don't think either of these should be possible, as we
|
||||
* should gain no new heavyweight references while we're trying to
|
||||
* salvage, but just to be sure... */
|
||||
VCancelReservation_r(vp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
VChangeState_r(vp, VOL_STATE_OFFLINING);
|
||||
|
||||
VLRU_Delete_r(vp);
|
||||
if (vp->header) {
|
||||
V_needsSalvaged(vp) = 1;
|
||||
/* ignore error; updating needsSalvaged is just best effort */
|
||||
VUpdateVolume_r(&error, vp, VOL_UPDATE_NOFORCEOFF);
|
||||
}
|
||||
VCloseVolumeHandles_r(vp);
|
||||
|
||||
FreeVolumeHeader(vp);
|
||||
|
||||
/* volume has been effectively offlined; we can mark it in the SALVAGING
|
||||
* state now, which lets FSSYNC give it away */
|
||||
VChangeState_r(vp, VOL_STATE_SALVAGING);
|
||||
|
||||
VCancelReservation_r(vp);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* check whether a salvage needs to be performed on this volume.
|
||||
*
|
||||
@ -4910,12 +4972,31 @@ VCheckSalvage(Volume * vp)
|
||||
{
|
||||
int ret = 0;
|
||||
#if defined(SALVSYNC_BUILD_CLIENT) || defined(FSSYNC_BUILD_CLIENT)
|
||||
if (vp->nUsers || vp->nWaiters)
|
||||
if (vp->nUsers)
|
||||
return ret;
|
||||
if (!vp->salvage.requested) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* prevent recursion; some of the code below creates and removes
|
||||
* lightweight refs, which can call VCheckSalvage */
|
||||
if (vp->salvage.scheduling) {
|
||||
return ret;
|
||||
}
|
||||
vp->salvage.scheduling = 1;
|
||||
|
||||
if (V_attachState(vp) == VOL_STATE_SALVAGE_REQ) {
|
||||
if (!VOfflineForSalvage_r(vp)) {
|
||||
vp->salvage.scheduling = 0;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
if (vp->salvage.requested) {
|
||||
VScheduleSalvage_r(vp);
|
||||
ret = 1;
|
||||
}
|
||||
vp->salvage.scheduling = 0;
|
||||
#endif /* SALVSYNC_BUILD_CLIENT || FSSYNC_BUILD_CLIENT */
|
||||
return ret;
|
||||
}
|
||||
@ -4985,7 +5066,24 @@ VRequestSalvage_r(Error * ec, Volume * vp, int reason, int flags)
|
||||
* fear of a salvage already running for this volume. */
|
||||
|
||||
if (vp->stats.salvages < SALVAGE_COUNT_MAX) {
|
||||
VChangeState_r(vp, VOL_STATE_SALVAGING);
|
||||
|
||||
/* if we don't need to offline the volume, we can go directly
|
||||
* to SALVAGING. SALVAGING says the volume is offline and is
|
||||
* either salvaging or ready to be handed to the salvager.
|
||||
* SALVAGE_REQ says that we want to salvage the volume, but we
|
||||
* are waiting for it to go offline first. */
|
||||
if (flags & VOL_SALVAGE_NO_OFFLINE) {
|
||||
VChangeState_r(vp, VOL_STATE_SALVAGING);
|
||||
} else {
|
||||
VChangeState_r(vp, VOL_STATE_SALVAGE_REQ);
|
||||
if (vp->nUsers == 0) {
|
||||
/* normally VOfflineForSalvage_r would be called from
|
||||
* PutVolume et al when nUsers reaches 0, but if
|
||||
* it's already 0, just do it ourselves, since PutVolume
|
||||
* isn't going to get called */
|
||||
VOfflineForSalvage_r(vp);
|
||||
}
|
||||
}
|
||||
*ec = VSALVAGING;
|
||||
} else {
|
||||
Log("VRequestSalvage: volume %u online salvaged too many times; forced offline.\n", vp->hashid);
|
||||
@ -5173,6 +5271,13 @@ VScheduleSalvage_r(Volume * vp)
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (vp->salvage.scheduled) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
VCreateReservation_r(vp);
|
||||
VWaitExclusiveState_r(vp);
|
||||
|
||||
/*
|
||||
* XXX the scheduling process should really be done asynchronously
|
||||
* to avoid fssync deadlocks
|
||||
@ -5182,9 +5287,6 @@ VScheduleSalvage_r(Volume * vp)
|
||||
*
|
||||
* set the volume to an exclusive state and drop the lock
|
||||
* around the SALVSYNC call
|
||||
*
|
||||
* note that we do NOT acquire a reservation here -- doing so
|
||||
* could result in unbounded recursion
|
||||
*/
|
||||
strlcpy(partName, VPartitionPath(vp->partition), sizeof(partName));
|
||||
state_save = VChangeState_r(vp, VOL_STATE_SALVSYNC_REQ);
|
||||
@ -5227,6 +5329,7 @@ VScheduleSalvage_r(Volume * vp)
|
||||
}
|
||||
}
|
||||
}
|
||||
VCancelReservation_r(vp);
|
||||
return ret;
|
||||
}
|
||||
#endif /* SALVSYNC_BUILD_CLIENT || FSSYNC_BUILD_CLIENT */
|
||||
|
@ -172,9 +172,12 @@ typedef enum {
|
||||
VOL_STATE_VNODE_RELEASE = 18, /**< volume is busy releasing vnodes */
|
||||
VOL_STATE_VLRU_ADD = 19, /**< volume is busy being added to a VLRU queue */
|
||||
VOL_STATE_DELETED = 20, /**< volume has been deleted by the volserver */
|
||||
VOL_STATE_SALVAGE_REQ = 21, /**< volume has been requested to be salvaged,
|
||||
* but is waiting for other users to go away
|
||||
* so it can be offlined */
|
||||
/* please add new states directly above this line */
|
||||
VOL_STATE_FREED = 21, /**< debugging aid */
|
||||
VOL_STATE_COUNT = 22 /**< total number of valid states */
|
||||
VOL_STATE_FREED = 22, /**< debugging aid */
|
||||
VOL_STATE_COUNT = 23 /**< total number of valid states */
|
||||
} VolState;
|
||||
|
||||
/**
|
||||
@ -604,7 +607,11 @@ typedef struct VolumeOnlineSalvage {
|
||||
int reason; /**< reason for requesting online salvage */
|
||||
byte requested; /**< flag specifying that salvage should be scheduled */
|
||||
byte scheduled; /**< flag specifying whether online salvage scheduled */
|
||||
byte reserved[2]; /**< padding */
|
||||
byte scheduling; /**< if nonzero, this volume has entered
|
||||
* VCheckSalvage(), so if we recurse into
|
||||
* VCheckSalvage() with this set, exit immediately
|
||||
* to avoid recursing forever */
|
||||
byte reserved[1]; /**< padding */
|
||||
} VolumeOnlineSalvage;
|
||||
|
||||
/**
|
||||
@ -978,6 +985,8 @@ extern int VWalkVolumeHeaders(struct DiskPartition64 *dp, const char *partpath,
|
||||
|
||||
/* VRequestSalvage_r flags */
|
||||
#define VOL_SALVAGE_INVALIDATE_HEADER 0x1 /* for demand attach fs, invalidate volume header cache */
|
||||
#define VOL_SALVAGE_NO_OFFLINE 0x2 /* we do not need to wait to offline the volume; it has
|
||||
* not been fully attached */
|
||||
|
||||
|
||||
#if defined(NEARINODE_HINT)
|
||||
|
@ -239,6 +239,37 @@ VVolLockType(int mode, int writeable)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* tells caller whether or not the volume is effectively salvaging.
|
||||
*
|
||||
* @param vp volume pointer
|
||||
*
|
||||
* @return whether volume is salvaging or not
|
||||
* @retval 0 no, volume is not salvaging
|
||||
* @retval 1 yes, volume is salvaging
|
||||
*
|
||||
* @note The volume may not actually be getting salvaged at the moment if
|
||||
* this returns 1, but may have just been requested or scheduled to be
|
||||
* salvaged. Callers should treat these cases as pretty much the same
|
||||
* anyway, since we should not touch a volume that is busy salvaging or
|
||||
* waiting to be salvaged.
|
||||
*/
|
||||
static_inline int
|
||||
VIsSalvaging(struct Volume *vp)
|
||||
{
|
||||
/* these tests are a bit redundant, but to be safe... */
|
||||
switch(V_attachState(vp)) {
|
||||
case VOL_STATE_SALVAGING:
|
||||
case VOL_STATE_SALVAGE_REQ:
|
||||
return 1;
|
||||
default:
|
||||
if (vp->salvage.requested || vp->salvage.scheduled) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* tells caller whether or not the current state requires
|
||||
* exclusive access without holding glock.
|
||||
@ -291,6 +322,7 @@ VIsErrorState(VolState state)
|
||||
switch (state) {
|
||||
case VOL_STATE_ERROR:
|
||||
case VOL_STATE_SALVAGING:
|
||||
case VOL_STATE_SALVAGE_REQ:
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
|
Loading…
Reference in New Issue
Block a user