viced: Raise fsstate loop detection limits

When verifying the sanity of host and callback state at startup and
shutdown, dafileserver attempts to detect loops in various structures
by imposing arbitrary hard-coded limits (e.g.
FS_STATE_HCB_MAX_LIST_LEN, 100000 callbacks per host).

However, it is possible for AFS clients with heavy RW workloads to
legitimately exceed some of these limits, if the dafileserver itself
is configured with -cb higher than 100000, for instance.

When this occurs, the dafileserver will falsely invalidate the
fsstate.dat file at shutdown:

  "cb_stateVerifyFCBList: error: list length exceeds 100000 (h->index=1);
   assuming there's a loop"

The dafileserver will then refuse to load the invalid fsstate.dat file
at startup, defeating the callback preservation feature just when it
is most sorely needed - when there are hundreds of thousands of
callbacks.

These arbitrary limits are unnecessary, since we already know the
number of structures we're processing for all of these cases (CBs,
FEs, and hosts). So just get rid of the hard-coded limits, and use the
actual limit for the relevant structure instead:

For CBs and FEs, we can never have more than 'cbstuff.nblks' entries
whether we're loading or saving state, so just use that limit for
simplicity.

For hosts, the number of entries is either in the dump file (when
restoring state), or just in our 'hostCount' global (when saving
state). To make sure a corrupted state file doesn't cause the host
limit to be unreasonably high, impose a new, higher, arbitrary limit
on the number of hosts (FS_STATE_H_MAX_LIST_LEN, set to 100M).

[adeason@sinenomine.net: Raise limits for _all_ structures.]

Change-Id: I525916dc6189b887e4625371e81afac98f12a651
Reviewed-on: https://gerrit.openafs.org/14727
Reviewed-by: Michael Meffie <mmeffie@sinenomine.net>
Reviewed-by: Cheyenne Wills <cwills@sinenomine.net>
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Mark Vitale <mvitale@sinenomine.net>
Reviewed-by: Andrew Deason <adeason@sinenomine.net>
This commit is contained in:
Mark Vitale 2020-07-17 18:17:52 -04:00 committed by Andrew Deason
parent 900d89380d
commit c5e3b430d8
3 changed files with 59 additions and 25 deletions

View File

@ -1944,7 +1944,9 @@ cb_stateVerifyFEHash(struct fs_dump_state * state)
{
int ret = 0, i;
struct FileEntry * fe;
afs_uint32 fei, chain_len;
afs_uint32 fei, chain_len, max_FEs;
max_FEs = cbstuff.nblks;
for (i = 0; i < FEHASH_SIZE; i++) {
chain_len = 0;
@ -1959,9 +1961,9 @@ cb_stateVerifyFEHash(struct fs_dump_state * state)
if (cb_stateVerifyFE(state, fe)) {
ret = 1;
}
if (chain_len > FS_STATE_FE_MAX_HASH_CHAIN_LEN) {
if (chain_len > max_FEs) {
ViceLog(0, ("cb_stateVerifyFEHash: error: hash chain %d length exceeds %d; assuming there's a loop\n",
i, FS_STATE_FE_MAX_HASH_CHAIN_LEN));
i, max_FEs));
ret = 1;
break;
}
@ -1998,9 +2000,10 @@ static int
cb_stateVerifyFCBList(struct fs_dump_state * state, struct FileEntry * fe)
{
int ret = 0;
afs_uint32 cbi, fei, chain_len = 0;
afs_uint32 cbi, fei, chain_len = 0, max_CBs;
struct CallBack * cb;
max_CBs = cbstuff.nblks;
fei = fetoi(fe);
for (cbi = fe->firstcb, cb = itocb(cbi);
@ -2017,9 +2020,9 @@ cb_stateVerifyFCBList(struct fs_dump_state * state, struct FileEntry * fe)
fei, cb->fhead));
ret = 1;
}
if (chain_len > FS_STATE_FCB_MAX_LIST_LEN) {
if (chain_len > max_CBs) {
ViceLog(0, ("cb_stateVerifyFCBList: error: list length exceeds %d (fei=%d); assuming there's a loop\n",
FS_STATE_FCB_MAX_LIST_LEN, fei));
max_CBs, fei));
ret = 1;
goto done;
}
@ -2040,9 +2043,10 @@ int
cb_stateVerifyHCBList(struct fs_dump_state * state, struct host * host)
{
int ret = 0;
afs_uint32 hi, chain_len, cbi;
afs_uint32 hi, chain_len, cbi, max_CBs;
struct CallBack *cb, *ncb;
max_CBs = cbstuff.nblks;
hi = h_htoi(host);
chain_len = 0;
@ -2078,9 +2082,10 @@ cb_stateVerifyHCBList(struct fs_dump_state * state, struct host * host)
ret = 1;
goto done;
}
if (chain_len > FS_STATE_HCB_MAX_LIST_LEN) {
if (chain_len > max_CBs) {
ViceLog(0, ("cb_stateVerifyHCBList: error: list length exceeds %d (h->index=%d); assuming there's a loop\n",
FS_STATE_HCB_MAX_LIST_LEN, hi));
max_CBs, hi));
ret = 1;
goto done;
}
@ -2095,9 +2100,11 @@ static int
cb_stateVerifyTimeoutQueues(struct fs_dump_state * state)
{
int ret = 0, i;
afs_uint32 cbi, chain_len;
afs_uint32 cbi, chain_len, max_CBs;
struct CallBack *cb, *ncb;
max_CBs = cbstuff.nblks;
for (i = 0; i < CB_NUM_TIMEOUT_QUEUES; i++) {
chain_len = 0;
for (cbi = timeout[i], cb = itocb(cbi);
@ -2138,9 +2145,10 @@ cb_stateVerifyTimeoutQueues(struct fs_dump_state * state)
ret = 1;
break;
}
if (chain_len > FS_STATE_TCB_MAX_LIST_LEN) {
if (chain_len > max_CBs) {
ViceLog(0, ("cb_stateVerifyTimeoutQueues: list length exceeds %d (tindex=%d); assuming there's a loop\n",
FS_STATE_TCB_MAX_LIST_LEN, i));
max_CBs, i));
ret = 1;
break;
}

View File

@ -3284,6 +3284,21 @@ h_stateVerify(struct fs_dump_state * state)
return state->bail;
}
static int
h_stateVerifyMaxHosts(struct fs_dump_state * state, int *a_maxHosts)
{
if (state->mode == FS_STATE_DUMP_MODE) {
*a_maxHosts = hostCount; /* state not fully populated yet */
return 0;
} else if (state->mode == FS_STATE_LOAD_MODE) {
*a_maxHosts = state->h_hdr->records;
return 0;
} else {
ViceLog(0, ("h_stateVerifyMaxHosts: bad state mode %d\n", state->mode));
return 1;
}
}
static int
h_stateVerifyHost(struct host * h, void* rock)
{
@ -3344,7 +3359,13 @@ h_stateVerifyAddrHash(struct fs_dump_state * state, struct host * h,
struct h_AddrHashChain *chain;
int index = h_HashIndex(addr);
char tmp[16];
int chain_len = 0;
int chain_len = 0, maxHosts;
ret = h_stateVerifyMaxHosts(state, &maxHosts);
if (ret != 0) {
ret = 1;
goto done;
}
for (chain = hostAddrHashTable[index]; chain; chain = chain->next) {
host = chain->hostPtr;
@ -3375,9 +3396,9 @@ h_stateVerifyAddrHash(struct fs_dump_state * state, struct host * h,
found = 1;
break;
}
if (chain_len > FS_STATE_H_MAX_ADDR_HASH_CHAIN_LEN) {
if (chain_len > maxHosts) {
ViceLog(0, ("h_stateVerifyAddrHash: error: hash chain length exceeds %d; assuming there's a loop\n",
FS_STATE_H_MAX_ADDR_HASH_CHAIN_LEN));
maxHosts));
ret = 1;
goto done;
}
@ -3416,7 +3437,13 @@ h_stateVerifyUuidHash(struct fs_dump_state * state, struct host * h)
afsUUID * uuidp = &h->z.interface->uuid;
int index = h_UuidHashIndex(uuidp);
struct uuid_fmtbuf tmp;
int chain_len = 0;
int chain_len = 0, maxHosts;
ret = h_stateVerifyMaxHosts(state, &maxHosts);
if (ret != 0) {
ret = 1;
goto done;
}
for (chain = hostUuidHashTable[index]; chain; chain = chain->next) {
host = chain->hostPtr;
@ -3436,9 +3463,9 @@ h_stateVerifyUuidHash(struct fs_dump_state * state, struct host * h)
}
goto done;
}
if (chain_len > FS_STATE_H_MAX_UUID_HASH_CHAIN_LEN) {
if (chain_len > maxHosts) {
ViceLog(0, ("h_stateVerifyUuidHash: error: hash chain length exceeds %d; assuming there's a loop\n",
FS_STATE_H_MAX_UUID_HASH_CHAIN_LEN));
maxHosts));
ret = 1;
goto done;
}
@ -3491,6 +3518,11 @@ h_stateCheckHeader(struct host_state_header * hdr)
ViceLog(0, ("check_host_state_header: unknown version number\n"));
ret = 1;
}
else if (hdr->records > FS_STATE_H_MAX_LIST_LEN) {
ViceLog(0, ("check_host_state_header: saved host state too large "
"(%d > %d)\n", hdr->records, FS_STATE_H_MAX_LIST_LEN));
ret = 1;
}
return ret;
}

View File

@ -216,13 +216,7 @@ struct idx_map_entry_t {
* make them fairly large so we don't get
* false positives
*/
#define FS_STATE_H_MAX_UUID_HASH_CHAIN_LEN 100000 /* max elements in a host uuid-hash chain */
#define FS_STATE_H_MAX_ADDR_HASH_CHAIN_LEN 2000000 /* max elements in a host ipv4-hash chain */
#define FS_STATE_FE_MAX_HASH_CHAIN_LEN 100000 /* max elements in a FE fid-hash chain */
#define FS_STATE_FCB_MAX_LIST_LEN 100000 /* max elements in a per-FE CB list */
#define FS_STATE_HCB_MAX_LIST_LEN 100000 /* max elements in a per-host CB list */
#define FS_STATE_TCB_MAX_LIST_LEN 100000 /* max elements in a per-timeout CB list */
#define FS_STATE_H_MAX_LIST_LEN 100000000 /* max number of hosts (100M) */
/*
* main state serialization state structure