mirror of
https://git.openafs.org/openafs.git
synced 2025-01-18 15:00:12 +00:00
viced: Set HashTable size at startup
Currently, our HashTable for FileEntry structs is a fixed size (FEHASH_SIZE, 512). If we have a large number of callbacks, this can lead to very long hash chains, which can cause the fileserver to consume high amounts of CPU when finding, deleting, or especially adding new callbacks. This is especially bad since callbacks are managed under the global H_LOCK. To improve this, use our configured callback limit (-cb) to build an appropriately-sized HashTable at initialization, instead of using a hard-coded size. We compute this by dividing the -cb value by the desired hash chain length (FE_CHAIN_TARGET, 16), then rounding up to the nearest power of 2 at least as big as the old FEHASH_SIZE, 512. For DAFS, a copy of our HashTable is included in the fsstate.dat file on disk, so changing our hashtable size potentially changes the data in fsstate.dat. However, we currently do not read in the hashtable data from fsstate.dat, since it's not very useful; we only write it out in case other utilities or older fileservers need it. Older fileservers, however, will not read an fsstate.dat with a hashtable that does not have exactly FEHASH_SIZE. So if we have a differently-sized hash table, we're breaking compatibility with those fileservers anyway, so don't write out our hashtable data at all. This commit also changes the format of the callback.dump file, since it did not allow for a variable-length hashtable. We create a new MAGICV3 magic to define the new format, and add some logic to the 'cbd' utility to understand it. We still write out our hashtable to this file since it's for debugging, and the hashtable data may be useful in that context. Move FEHash to callback.c, since it now relies on the callback.c-only FEhashsize, and move FEHASH_SIZE_OLD along with it. [adeason@sinenomine.net: Don't write out non-old-sized hashtable. Move FEHash &co to callback.c. Various other minor edits.] Change-Id: I54de91c54c5fcb526f880bc63ba10c1b3eb0aaf0 Reviewed-on: https://gerrit.openafs.org/14731 Reviewed-by: Mark Vitale <mvitale@sinenomine.net> Tested-by: BuildBot <buildbot@rampaginggeek.com> Reviewed-by: Cheyenne Wills <cwills@sinenomine.net> Reviewed-by: Michael Meffie <mmeffie@sinenomine.net> Reviewed-by: Andrew Deason <adeason@sinenomine.net>
This commit is contained in:
parent
f3629f87da
commit
2b9ba03ef7
@ -100,6 +100,7 @@ $(EXEFILE): $(EXEOBJS) $(EXELIBS)
|
||||
CBD = $(DESTDIR)\root.server\usr\afs\bin\cbd.exe
|
||||
|
||||
CBDLIBS =\
|
||||
$(DESTDIR)\lib\opr.lib \
|
||||
$(DESTDIR)\lib\afsroken.lib
|
||||
|
||||
CBDRES = $(OUT)\cbd.res
|
||||
|
@ -196,12 +196,16 @@ static int DumpCallBackState_r(void);
|
||||
#define FreeCB(cb) iFreeCB((struct CallBack *)cb, &cbstuff.nCBs)
|
||||
#define FreeFE(fe) iFreeFE((struct FileEntry *)fe, &cbstuff.nFEs)
|
||||
|
||||
static afs_uint32 *HashTable; /* File entry hash table */
|
||||
static afs_int32 FEhashsize; /* number of buckets in HashTable */
|
||||
|
||||
#define FEHASH_SIZE_OLD 512 /* Historical hard-coded FEhashsize */
|
||||
#define FE_CHAIN_TARGET 16 /* Target chain length for dynamic FEhashsize */
|
||||
#define FEHash(volume, unique) (((volume)+(unique))&(FEhashsize - 1))
|
||||
|
||||
/* Other protos - move out sometime */
|
||||
void PrintCB(struct CallBack *cb, afs_uint32 now);
|
||||
|
||||
static afs_uint32 HashTable[FEHASH_SIZE]; /* File entry hash table */
|
||||
|
||||
static struct FileEntry *
|
||||
FindFE(AFSFid * fid)
|
||||
{
|
||||
@ -422,6 +426,8 @@ FDel(struct FileEntry *fe)
|
||||
int
|
||||
InitCallBack(int nblks)
|
||||
{
|
||||
afs_int32 workingHashSize;
|
||||
|
||||
opr_Assert(nblks > 0);
|
||||
|
||||
H_LOCK;
|
||||
@ -444,6 +450,45 @@ InitCallBack(int nblks)
|
||||
cbstuff.nCBs = nblks;
|
||||
while (cbstuff.nCBs)
|
||||
FreeCB(&CB[cbstuff.nCBs]); /* This is correct */
|
||||
|
||||
if (nblks > 64000) {
|
||||
/*
|
||||
* We may have a large number of callbacks to keep track of (more than
|
||||
* 64000, the default with the '-L' fileserver switch). Figure out a
|
||||
* hashtable size so our hash chains are around FE_CHAIN_TARGET long.
|
||||
* Start at the old historical hashtable size (FEHASH_SIZE_OLD), and
|
||||
* count up until we get a reasonable number.
|
||||
*/
|
||||
workingHashSize = FEHASH_SIZE_OLD;
|
||||
while (nblks / workingHashSize > FE_CHAIN_TARGET) {
|
||||
opr_Assert(workingHashSize < MAX_AFS_INT32 / 2);
|
||||
workingHashSize *= 2;
|
||||
}
|
||||
|
||||
} else {
|
||||
/*
|
||||
* It looks like we're using one of the historical default values for
|
||||
* our callback limit (64000 is the amount given by '-L' in the
|
||||
* fileserver; all other defaults are smaller).
|
||||
*
|
||||
* In this case, we're not using a huge number of callbacks, so the
|
||||
* size of the hashtable is not a big concern. Use the old hard-coded
|
||||
* hashtable size of 512 (FEHASH_SIZE_OLD), so any fsstate.dat file we
|
||||
* may save to disk is understandable by old fileservers and other
|
||||
* tools.
|
||||
*/
|
||||
workingHashSize = FEHASH_SIZE_OLD;
|
||||
}
|
||||
|
||||
/* hashtable size must be a power of 2 */
|
||||
opr_Assert(workingHashSize > 0);
|
||||
opr_Assert((workingHashSize & (workingHashSize - 1)) == 0);
|
||||
|
||||
HashTable = calloc(workingHashSize, sizeof(HashTable[0]));
|
||||
if (HashTable == NULL) {
|
||||
ViceLogThenPanic(0, ("Failed malloc in InitCallBack\n"));
|
||||
}
|
||||
FEhashsize = workingHashSize;
|
||||
cbstuff.nblks = nblks;
|
||||
cbstuff.nbreakers = 0;
|
||||
H_UNLOCK;
|
||||
@ -1230,7 +1275,7 @@ BreakVolumeCallBacksLater(VolumeId volume)
|
||||
ViceLog(25, ("Setting later on volume %" AFS_VOLID_FMT "\n",
|
||||
afs_printable_VolumeId_lu(volume)));
|
||||
H_LOCK;
|
||||
for (hash = 0; hash < FEHASH_SIZE; hash++) {
|
||||
for (hash = 0; hash < FEhashsize; hash++) {
|
||||
for (feip = &HashTable[hash]; (fe = itofe(*feip)) != NULL; ) {
|
||||
if (fe->volid == volume) {
|
||||
struct CallBack *cbnext;
|
||||
@ -1282,7 +1327,7 @@ BreakLaterCallBacks(void)
|
||||
/* Pick the first volume we see to clean up */
|
||||
fid.Volume = fid.Vnode = fid.Unique = 0;
|
||||
|
||||
for (hash = 0; hash < FEHASH_SIZE; hash++) {
|
||||
for (hash = 0; hash < FEhashsize; hash++) {
|
||||
for (feip = &HashTable[hash]; (fe = itofe(*feip)) != NULL; ) {
|
||||
if (fe && (fe->status & FE_LATER)
|
||||
&& (fid.Volume == 0 || fid.Volume == fe->volid)) {
|
||||
@ -1693,12 +1738,14 @@ PrintCallBackStats(void)
|
||||
cbstuff.nblks, (int) sizeof(struct CallBack));
|
||||
fprintf(stderr, "%d GSS1, %d GSS2, %d GSS3, %d GSS4, %d GSS5 (internal counters)\n",
|
||||
cbstuff.GSS1, cbstuff.GSS2, cbstuff.GSS3, cbstuff.GSS4, cbstuff.GSS5);
|
||||
|
||||
fprintf(stderr, "%d FEhashsize\n",
|
||||
FEhashsize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define MAGIC 0x12345678 /* To check byte ordering of dump when it is read in */
|
||||
#define MAGICV2 0x12345679 /* To check byte ordering & version of dump when it is read in */
|
||||
#define MAGICV3 0x1234567A
|
||||
|
||||
|
||||
#ifndef INTERPRET_DUMP
|
||||
@ -1943,7 +1990,7 @@ cb_stateVerifyFEHash(struct fs_dump_state * state)
|
||||
|
||||
max_FEs = cbstuff.nblks;
|
||||
|
||||
for (i = 0; i < FEHASH_SIZE; i++) {
|
||||
for (i = 0; i < FEhashsize; i++) {
|
||||
chain_len = 0;
|
||||
for (fei = HashTable[i], fe = itofe(fei);
|
||||
fe;
|
||||
@ -2245,14 +2292,42 @@ cb_stateSaveFEHash(struct fs_dump_state * state)
|
||||
AssignInt64(state->eof_offset, &state->cb_hdr->fehash_offset);
|
||||
|
||||
state->cb_fehash_hdr->magic = CALLBACK_STATE_FEHASH_MAGIC;
|
||||
state->cb_fehash_hdr->records = FEHASH_SIZE;
|
||||
|
||||
if (FEhashsize != FEHASH_SIZE_OLD) {
|
||||
/*
|
||||
* If our hashtable size is not the historical FEHASH_SIZE_OLD, don't
|
||||
* write out the hashtable at all. The hashtable data on disk is not
|
||||
* very useful; we only write it out because older fileservers or other
|
||||
* utilities may need it for interpreting fsstate.dat. But if our
|
||||
* hashtable size is not FEHASH_SIZE_OLD, then they won't be able to
|
||||
* read it anwyay, since the hashtable size has changed. So just don't
|
||||
* write out the data; just write the header that says we have 0
|
||||
* hashtable buckets.
|
||||
*/
|
||||
state->cb_fehash_hdr->records = 0;
|
||||
state->cb_fehash_hdr->len = sizeof(struct callback_state_fehash_header);
|
||||
|
||||
if (fs_stateWriteHeader(state, &state->cb_hdr->fehash_offset,
|
||||
state->cb_fehash_hdr,
|
||||
sizeof(*state->cb_fehash_hdr))) {
|
||||
ret = 1;
|
||||
goto done;
|
||||
}
|
||||
|
||||
} else {
|
||||
/*
|
||||
* Write out our HashTable data. This information is not terribly
|
||||
* useful, but older fileservers and other utilities may need it.
|
||||
* Someday this can probably be removed.
|
||||
*/
|
||||
state->cb_fehash_hdr->records = FEhashsize;
|
||||
state->cb_fehash_hdr->len = sizeof(struct callback_state_fehash_header) +
|
||||
(state->cb_fehash_hdr->records * sizeof(afs_uint32));
|
||||
|
||||
iov[0].iov_base = (char *)state->cb_fehash_hdr;
|
||||
iov[0].iov_len = sizeof(struct callback_state_fehash_header);
|
||||
iov[1].iov_base = (char *)HashTable;
|
||||
iov[1].iov_len = sizeof(HashTable);
|
||||
iov[1].iov_len = sizeof(HashTable[0]) * FEhashsize;
|
||||
|
||||
if (fs_stateSeek(state, &state->cb_hdr->fehash_offset)) {
|
||||
ret = 1;
|
||||
@ -2263,6 +2338,7 @@ cb_stateSaveFEHash(struct fs_dump_state * state)
|
||||
ret = 1;
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
|
||||
fs_stateIncEOF(state, state->cb_fehash_hdr->len);
|
||||
|
||||
@ -2279,7 +2355,7 @@ cb_stateSaveFEs(struct fs_dump_state * state)
|
||||
|
||||
AssignInt64(state->eof_offset, &state->cb_hdr->fe_offset);
|
||||
|
||||
for (hash = 0; hash < FEHASH_SIZE ; hash++) {
|
||||
for (hash = 0; hash < FEhashsize ; hash++) {
|
||||
for (fei = HashTable[hash]; fei; fei = fe->fnext) {
|
||||
fe = itofe(fei);
|
||||
if (cb_stateSaveFE(state, fe)) {
|
||||
@ -2672,7 +2748,7 @@ static int
|
||||
DumpCallBackState_r(void)
|
||||
{
|
||||
int fd, oflag;
|
||||
afs_uint32 magic = MAGICV2, now = (afs_int32) time(NULL), freelisthead;
|
||||
afs_uint32 magic = MAGICV3, now = (afs_int32) time(NULL), freelisthead;
|
||||
|
||||
oflag = O_WRONLY | O_CREAT | O_TRUNC;
|
||||
#ifdef AFS_NT40_ENV
|
||||
@ -2692,6 +2768,7 @@ DumpCallBackState_r(void)
|
||||
DumpBytes(fd, &magic, sizeof(magic));
|
||||
DumpBytes(fd, &now, sizeof(now));
|
||||
DumpBytes(fd, &cbstuff, sizeof(cbstuff));
|
||||
DumpBytes(fd, &FEhashsize, sizeof(FEhashsize));
|
||||
DumpBytes(fd, TimeOuts, sizeof(TimeOuts));
|
||||
DumpBytes(fd, timeout, sizeof(timeout));
|
||||
DumpBytes(fd, &tfirst, sizeof(tfirst));
|
||||
@ -2699,7 +2776,7 @@ DumpCallBackState_r(void)
|
||||
DumpBytes(fd, &freelisthead, sizeof(freelisthead)); /* This is a pointer */
|
||||
freelisthead = fetoi((struct FileEntry *)FEfree);
|
||||
DumpBytes(fd, &freelisthead, sizeof(freelisthead)); /* This is a pointer */
|
||||
DumpBytes(fd, HashTable, sizeof(HashTable));
|
||||
DumpBytes(fd, HashTable, sizeof(HashTable[0]) * FEhashsize);
|
||||
DumpBytes(fd, &CB[1], sizeof(CB[1]) * cbstuff.nblks); /* CB stuff */
|
||||
DumpBytes(fd, &FE[1], sizeof(FE[1]) * cbstuff.nblks); /* FE stuff */
|
||||
close(fd);
|
||||
@ -2758,18 +2835,23 @@ ReadDump(char *file, int timebits)
|
||||
exit(1);
|
||||
}
|
||||
ReadBytes(fd, &magic, sizeof(magic));
|
||||
if (magic == MAGICV2) {
|
||||
if (magic == MAGICV3) {
|
||||
/* V3 contains a new field for FEhashsize */
|
||||
timebits = 32;
|
||||
FEhashsize = 0;
|
||||
} else if (magic == MAGICV2) {
|
||||
timebits = 32;
|
||||
FEhashsize = FEHASH_SIZE_OLD;
|
||||
} else if (magic == MAGIC) {
|
||||
FEhashsize = FEHASH_SIZE_OLD;
|
||||
} else {
|
||||
if (magic != MAGIC) {
|
||||
fprintf(stderr,
|
||||
"Magic number of %s is invalid. You might be trying to\n",
|
||||
file);
|
||||
"Magic number of %s is invalid (0x%x). You might be trying to\n",
|
||||
file, magic);
|
||||
fprintf(stderr,
|
||||
"run this program on a machine type with a different byte ordering.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (timebits == 64) {
|
||||
ReadBytes(fd, &now64, sizeof(afs_int64));
|
||||
now = (afs_int32) now64;
|
||||
@ -2777,6 +2859,13 @@ ReadDump(char *file, int timebits)
|
||||
ReadBytes(fd, &now, sizeof(afs_int32));
|
||||
|
||||
ReadBytes(fd, &cbstuff, sizeof(cbstuff));
|
||||
if (FEhashsize == 0) {
|
||||
ReadBytes(fd, &FEhashsize, sizeof(FEhashsize));
|
||||
if (FEhashsize == 0) {
|
||||
fprintf(stderr, "FEhashsize of 0 is not supported.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
ReadBytes(fd, TimeOuts, sizeof(TimeOuts));
|
||||
ReadBytes(fd, timeout, sizeof(timeout));
|
||||
ReadBytes(fd, &tfirst, sizeof(tfirst));
|
||||
@ -2788,7 +2877,9 @@ ReadDump(char *file, int timebits)
|
||||
CBfree = (struct CallBack *)itocb(freelisthead);
|
||||
ReadBytes(fd, &freelisthead, sizeof(freelisthead));
|
||||
FEfree = (struct FileEntry *)itofe(freelisthead);
|
||||
ReadBytes(fd, HashTable, sizeof(HashTable));
|
||||
HashTable = calloc(FEhashsize, sizeof(HashTable[0]));
|
||||
opr_Assert(HashTable != NULL);
|
||||
ReadBytes(fd, HashTable, sizeof(HashTable[0]) * FEhashsize);
|
||||
ReadBytes(fd, &CB[1], sizeof(CB[1]) * cbstuff.nblks); /* CB stuff */
|
||||
ReadBytes(fd, &FE[1], sizeof(FE[1]) * cbstuff.nblks); /* FE stuff */
|
||||
if (close(fd)) {
|
||||
@ -2895,7 +2986,7 @@ main(int argc, char **argv)
|
||||
struct CallBack *cb;
|
||||
struct FileEntry *fe;
|
||||
|
||||
for (hash = 0; hash < FEHASH_SIZE; hash++) {
|
||||
for (hash = 0; hash < FEhashsize; hash++) {
|
||||
for (feip = &HashTable[hash]; (fe = itofe(*feip));) {
|
||||
if (!vol || (fe->volid == vol)) {
|
||||
afs_uint32 fe_i = fetoi(fe);
|
||||
|
@ -82,11 +82,6 @@ struct VCBParams {
|
||||
};
|
||||
|
||||
|
||||
/* callback hash macros */
|
||||
#define FEHASH_SIZE 512 /* Power of 2 */
|
||||
#define FEHASH_MASK (FEHASH_SIZE-1)
|
||||
#define FEHash(volume, unique) (((volume)+(unique))&(FEHASH_MASK))
|
||||
|
||||
#define CB_NUM_TIMEOUT_QUEUES 128
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user