mirror of
https://git.openafs.org/openafs.git
synced 2025-01-22 17:00:15 +00:00
DAFS: Do not serialize state for invalid hosts
When we serialize host information for DAFS during shutdown, we have no guarantee that the host is in a valid state when we look at it. This can result in a host being saved to disk when we are waiting for the host to respond to an RPC, and so the information about the host is invalid. For example, we can save a host that has the HWHO_INPROGRESS flag set, and when it is restored later, this can cause odd behavior since the flag is set but no thread is actually waiting for the host to respond. So instead, during state serialization, try to determine if a host may be in an invalid state, and simply skip the host if it may. Change-Id: I755640ea4ce607245ae98cc7455472ef781271e7 Reviewed-on: http://gerrit.openafs.org/5528 Tested-by: BuildBot <buildbot@rampaginggeek.com> Reviewed-by: Derrick Brashear <shadow@dementix.org>
This commit is contained in:
parent
6e7b31ef0a
commit
5c6bd04211
@ -138,13 +138,13 @@ fs_stateSave(void)
|
|||||||
* BUT, this still has one flaw -- what do we do about rx worker threads that
|
* BUT, this still has one flaw -- what do we do about rx worker threads that
|
||||||
* are blocked in the host package making an RPC call to a cm???
|
* are blocked in the host package making an RPC call to a cm???
|
||||||
*
|
*
|
||||||
* perhaps we need a refcounter that keeps track of threads blocked in rpc calls
|
* currently we try to detect if a host struct is in an inconsistent state
|
||||||
* with H_LOCK dropped (and the host struct likely left in an inconsistent state)
|
* when we go to save it to disk, and just skip the hosts that we think may
|
||||||
*
|
* be inconsistent (see h_isBusy_r in host.c). This has the problem of causing
|
||||||
* or better yet, we need to associate a state machine with each host object
|
* more InitCallBackState's when we come back up, but the number of hosts in
|
||||||
* (kind of like demand attach Volume structures).
|
* such a state should be small. In the future, we could try to lock hosts
|
||||||
*
|
* (with some deadline so we don't wait forever) before serializing, but at
|
||||||
* sigh. I suspect we'll need to revisit this issue
|
* least for now it does not seem worth the trouble.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (fs_state.options.fs_state_verify_before_save) {
|
if (fs_state.options.fs_state_verify_before_save) {
|
||||||
|
@ -2920,6 +2920,37 @@ static int h_stateVerifyUuidHash(struct fs_dump_state * state, struct host * h);
|
|||||||
static void h_hostToDiskEntry_r(struct host * in, struct hostDiskEntry * out);
|
static void h_hostToDiskEntry_r(struct host * in, struct hostDiskEntry * out);
|
||||||
static void h_diskEntryToHost_r(struct hostDiskEntry * in, struct host * out);
|
static void h_diskEntryToHost_r(struct hostDiskEntry * in, struct host * out);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this host busy?
|
||||||
|
*
|
||||||
|
* This is just a hint and should not be trusted; this should probably only be
|
||||||
|
* used by the host state serialization code when trying to detect if a host
|
||||||
|
* can be sanely serialized to disk or not. If this function returns 1, the
|
||||||
|
* host may be in an invalid state and thus should not be saved to disk.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
h_isBusy_r(struct host *host)
|
||||||
|
{
|
||||||
|
struct Lock *hostLock = &host->lock;
|
||||||
|
int locked = 0;
|
||||||
|
|
||||||
|
LOCK_LOCK(hostLock);
|
||||||
|
if (hostLock->excl_locked || hostLock->readers_reading) {
|
||||||
|
locked = 1;
|
||||||
|
}
|
||||||
|
LOCK_UNLOCK(hostLock);
|
||||||
|
|
||||||
|
if (locked) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((host->hostFlags & HWHO_INPROGRESS) || !(host->hostFlags & ALTADDR)) {
|
||||||
|
/* We shouldn't hit this if the host wasn't locked, but just in case... */
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/* this procedure saves all host state to disk for fast startup */
|
/* this procedure saves all host state to disk for fast startup */
|
||||||
int
|
int
|
||||||
@ -3241,6 +3272,16 @@ h_stateSaveHost(struct host * host, void* rock)
|
|||||||
struct iovec iov[4];
|
struct iovec iov[4];
|
||||||
int iovcnt = 2;
|
int iovcnt = 2;
|
||||||
|
|
||||||
|
if (h_isBusy_r(host)) {
|
||||||
|
char hoststr[16];
|
||||||
|
ViceLog(1, ("Not saving host %s:%d to disk; host appears busy\n",
|
||||||
|
afs_inet_ntoa_r(host->host, hoststr), (int)ntohs(host->port)));
|
||||||
|
/* Make sure we don't try to save callbacks to disk for this host, or
|
||||||
|
* we'll get confused on restore */
|
||||||
|
DeleteAllCallBacks_r(host, 1);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
memset(&hdr, 0, sizeof(hdr));
|
memset(&hdr, 0, sizeof(hdr));
|
||||||
|
|
||||||
if (state->h_hdr->index_max < host->index) {
|
if (state->h_hdr->index_max < host->index) {
|
||||||
|
Loading…
Reference in New Issue
Block a user