bozo: mark failed bnodes as stopped/shutdown

Commit 466e8cb15e ('bozo: retry start
after error stops') introduced several problems for 'simple' bnodes that
experience startup errors.  After its error retries have been exhausted,
the bnode continues to appear as if it is starting up.  For instance,
'bos stop' is required before 'bos delete' will work.  Also, if 'bos
stop -wait' is issued for a different bnode, the command will hang due
to BOZO_WaitAll waiting indefinitely for the bnode that has exhausted
its error retries.

Instead, introduce bnode_IsErrorRetrying and modify ez_getstat to call
it.  In this way ex_getstat will only return BSTAT_STARTINGUP if the
error retries have not been exhausted yet.  While here, also modify
ez_procexit and SetNeedsClock to consolidate all equivalent logic in
bnode_IsErrorRetrying.

Change-Id: I29d419d76a889e13049116fa66d1a63d11c16b46
Reviewed-on: https://gerrit.openafs.org/13376
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Michael Meffie <mmeffie@sinenomine.net>
Reviewed-by: Cheyenne Wills <cwills@sinenomine.net>
This commit is contained in:
Mark Vitale 2018-10-10 23:17:13 -04:00 committed by Michael Meffie
parent 80c23d958c
commit 7f251877c9
4 changed files with 16 additions and 3 deletions

View File

@ -537,6 +537,18 @@ bnode_Delete(struct bnode *abnode)
return code;
}
/* Are we still doing error retries ? */
int
bnode_IsErrorRetrying(struct bnode *abnode)
{
if ((abnode->flags & BNODE_ERRORSTOP) != 0
&& abnode->errorStopDelay != 0) {
return 1; /* still doing error retries */
} else {
return 0; /* no error retries */
}
}
/* function to tell if there's a timeout coming up */
int
bnode_PendingTimeout(struct bnode *abnode)

View File

@ -158,3 +158,4 @@ extern int bnode_SetStat(struct bnode *abnode, int agoal);
extern int bnode_CreatePidFile(struct bnode *abnode, struct bnode_proc *aproc, char *name);
extern int bnode_DestroyPidFile(struct bnode *abnode, struct bnode_proc *aproc);
extern int bnode_ResetErrorCount(struct bnode *abnode);
extern int bnode_IsErrorRetrying(struct bnode *abnode);

View File

@ -158,7 +158,7 @@ ez_getstat(struct bnode *bn, afs_int32 * astatus)
temp = BSTAT_SHUTTINGDOWN;
else if (abnode->running)
temp = BSTAT_NORMAL;
else if (abnode->b.flags & BNODE_ERRORSTOP)
else if (bnode_IsErrorRetrying(bn))
temp = BSTAT_STARTINGUP;
else
temp = BSTAT_SHUTDOWN;
@ -225,7 +225,7 @@ ez_procexit(struct bnode *bn, struct bnode_proc *aproc)
bnode_SetTimeout((struct bnode *) abnode, 0); /* clear timer */
if (abnode->b.goal)
code = ez_setstat((struct bnode *) abnode, BSTAT_NORMAL);
else if (abnode->b.flags & BNODE_ERRORSTOP && abnode->b.errorStopDelay) {
else if (bnode_IsErrorRetrying(bn)) {
ViceLog(0, ("%s will retry start in %d seconds\n", abnode->b.name,
abnode->b.errorStopDelay));
bnode_SetTimeout(bn, abnode->b.errorStopDelay);

View File

@ -881,7 +881,7 @@ SetNeedsClock(struct fsbnode *ab)
}
} else if ((ab->b.goal == 0) && !ab->fileRunning && !ab->volRunning
&& !ab->salRunning && !ab->scanRunning && !ab->salsrvRunning) {
if (ab->b.flags & BNODE_ERRORSTOP && ab->b.errorStopDelay) {
if (bnode_IsErrorRetrying(&ab->b)) {
ViceLog(0, ("%s will retry start in %d seconds\n", ab->b.name,
ab->b.errorStopDelay));
ab->needsClock = 1; /* halted for errors, retry later */