afs: Log pid with disk cache read errors

Log the current pid (and procname) when we complain about an error
when reading from CacheItems in afs_UFSGetDSlot. These errors can
result in confusing situations, so it can be helpful to know at least
what process saw the error.

Our logic for logging this information is getting a bit large, so also
move this to a new function, LogCacheError.

Change-Id: I3427e736458784df0d516f4182684605e930e128
Reviewed-on: https://gerrit.openafs.org/14416
Reviewed-by: Mark Vitale <mvitale@sinenomine.net>
Reviewed-by: Cheyenne Wills <cwills@sinenomine.net>
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Benjamin Kaduk <kaduk@mit.edu>
This commit is contained in:
Andrew Deason 2020-10-26 12:19:19 -05:00 committed by Benjamin Kaduk
parent 98c1a8751c
commit 1caeeea43c

View File

@ -3072,6 +3072,34 @@ afs_MemGetDSlot(afs_int32 aslot, dslot_state type)
} /*afs_MemGetDSlot */
static void
LogCacheError(int aslot, int off, int code, int target_size)
{
struct osi_stat tstat;
char *procname;
if (afs_osi_Stat(afs_cacheInodep, &tstat)) {
tstat.size = -1;
}
procname = osi_AllocSmallSpace(AFS_SMALLOCSIZ);
if (procname != NULL) {
osi_procname(procname, AFS_SMALLOCSIZ);
procname[AFS_SMALLOCSIZ-1] = '\0';
}
afs_warn("afs: disk cache read error in CacheItems slot %d "
"off %d/%d code %d/%d pid %d (%s)\n",
aslot, off, (int)tstat.size, code, target_size,
(int)MyPidxx2Pid(MyPidxx),
procname ? procname : "");
if (procname != NULL) {
osi_FreeSmallSpace(procname);
procname = NULL;
}
}
unsigned int last_error = 0, lasterrtime = 0;
/*
@ -3156,15 +3184,8 @@ afs_UFSGetDSlot(afs_int32 aslot, dslot_state type)
/* If we are requesting a non-DSLOT_NEW slot, this is an error.
* non-DSLOT_NEW slots are supposed to already exist, so if we
* failed to read in the slot, something is wrong. */
struct osi_stat tstat;
if (afs_osi_Stat(afs_cacheInodep, &tstat)) {
tstat.size = -1;
}
afs_warn("afs: disk cache read error in CacheItems slot %d "
"off %d/%d code %d/%d\n",
(int)aslot,
off, (int)tstat.size,
(int)code, (int)sizeof(struct fcache));
LogCacheError(aslot, off, code, sizeof(struct fcache));
/* put tdc back on the free dslot list */
QRemove(&tdc->lruq);
tdc->index = NULLIDX;