mirror of
https://git.openafs.org/openafs.git
synced 2025-01-18 15:00:12 +00:00
bozo: mark failed bnodes as stopped/shutdown
Commit 466e8cb15e
('bozo: retry start
after error stops') introduced several problems for 'simple' bnodes that
experience startup errors. After its error retries have been exhausted,
the bnode continues to appear as if it is starting up. For instance,
'bos stop' is required before 'bos delete' will work. Also, if 'bos
stop -wait' is issued for a different bnode, the command will hang due
to BOZO_WaitAll waiting indefinitely for the bnode that has exhausted
its error retries.
Instead, introduce bnode_IsErrorRetrying and modify ez_getstat to call
it. In this way ex_getstat will only return BSTAT_STARTINGUP if the
error retries have not been exhausted yet. While here, also modify
ez_procexit and SetNeedsClock to consolidate all equivalent logic in
bnode_IsErrorRetrying.
Change-Id: I29d419d76a889e13049116fa66d1a63d11c16b46
Reviewed-on: https://gerrit.openafs.org/13376
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Michael Meffie <mmeffie@sinenomine.net>
Reviewed-by: Cheyenne Wills <cwills@sinenomine.net>
This commit is contained in:
parent
80c23d958c
commit
7f251877c9
@ -537,6 +537,18 @@ bnode_Delete(struct bnode *abnode)
|
||||
return code;
|
||||
}
|
||||
|
||||
/* Are we still doing error retries ? */
|
||||
int
|
||||
bnode_IsErrorRetrying(struct bnode *abnode)
|
||||
{
|
||||
if ((abnode->flags & BNODE_ERRORSTOP) != 0
|
||||
&& abnode->errorStopDelay != 0) {
|
||||
return 1; /* still doing error retries */
|
||||
} else {
|
||||
return 0; /* no error retries */
|
||||
}
|
||||
}
|
||||
|
||||
/* function to tell if there's a timeout coming up */
|
||||
int
|
||||
bnode_PendingTimeout(struct bnode *abnode)
|
||||
|
@ -158,3 +158,4 @@ extern int bnode_SetStat(struct bnode *abnode, int agoal);
|
||||
extern int bnode_CreatePidFile(struct bnode *abnode, struct bnode_proc *aproc, char *name);
|
||||
extern int bnode_DestroyPidFile(struct bnode *abnode, struct bnode_proc *aproc);
|
||||
extern int bnode_ResetErrorCount(struct bnode *abnode);
|
||||
extern int bnode_IsErrorRetrying(struct bnode *abnode);
|
||||
|
@ -158,7 +158,7 @@ ez_getstat(struct bnode *bn, afs_int32 * astatus)
|
||||
temp = BSTAT_SHUTTINGDOWN;
|
||||
else if (abnode->running)
|
||||
temp = BSTAT_NORMAL;
|
||||
else if (abnode->b.flags & BNODE_ERRORSTOP)
|
||||
else if (bnode_IsErrorRetrying(bn))
|
||||
temp = BSTAT_STARTINGUP;
|
||||
else
|
||||
temp = BSTAT_SHUTDOWN;
|
||||
@ -225,7 +225,7 @@ ez_procexit(struct bnode *bn, struct bnode_proc *aproc)
|
||||
bnode_SetTimeout((struct bnode *) abnode, 0); /* clear timer */
|
||||
if (abnode->b.goal)
|
||||
code = ez_setstat((struct bnode *) abnode, BSTAT_NORMAL);
|
||||
else if (abnode->b.flags & BNODE_ERRORSTOP && abnode->b.errorStopDelay) {
|
||||
else if (bnode_IsErrorRetrying(bn)) {
|
||||
ViceLog(0, ("%s will retry start in %d seconds\n", abnode->b.name,
|
||||
abnode->b.errorStopDelay));
|
||||
bnode_SetTimeout(bn, abnode->b.errorStopDelay);
|
||||
|
@ -881,7 +881,7 @@ SetNeedsClock(struct fsbnode *ab)
|
||||
}
|
||||
} else if ((ab->b.goal == 0) && !ab->fileRunning && !ab->volRunning
|
||||
&& !ab->salRunning && !ab->scanRunning && !ab->salsrvRunning) {
|
||||
if (ab->b.flags & BNODE_ERRORSTOP && ab->b.errorStopDelay) {
|
||||
if (bnode_IsErrorRetrying(&ab->b)) {
|
||||
ViceLog(0, ("%s will retry start in %d seconds\n", ab->b.name,
|
||||
ab->b.errorStopDelay));
|
||||
ab->needsClock = 1; /* halted for errors, retry later */
|
||||
|
Loading…
Reference in New Issue
Block a user