tests: Check for vlserver startup

Currently, afstest_StartVLServer waits for a fixed amount of time (5
seconds) for the vlserver to startup. This means each test that runs a
vlserver takes at least 5 seconds, which adds up very quickly when new
tests are added for vlserver functionality.

Instead of waiting 5 seconds each time, change afstest_StartVLServer
to check if the vlserver has started up, and to wait a much shorter
time if it hasn't finished starting up yet. Specifically, wait until
the "Starting AFS vlserver" message appears in VLLog, and the
UBIK_RECHAVEDB recovery state bit is set. This allows vlserver-based
tests to run much more quickly.

Change-Id: I74015670a301980e6d89b3149ed9f2a6c1a40523
Reviewed-on: https://gerrit.openafs.org/14804
Tested-by: BuildBot <buildbot@rampaginggeek.com>
Reviewed-by: Cheyenne Wills <cwills@sinenomine.net>
Reviewed-by: Benjamin Kaduk <kaduk@mit.edu>
This commit is contained in:
Andrew Deason 2020-09-16 23:05:07 -05:00 committed by Benjamin Kaduk
parent c3df91f303
commit 6f5f13795a
4 changed files with 145 additions and 12 deletions

View File

@ -10,6 +10,7 @@ LT_objs = config.lo exec.lo files.lo misc.lo network.lo rxkad.lo servers.lo \
ubik.lo
LT_libs = $(LIB_rfc3961) $(LIB_roken)
LT_deps = $(top_builddir)/tests/tap/libafstest_tap.la \
$(top_builddir)/src/ubik/liboafs_ubik.la \
$(top_builddir)/src/util/liboafs_util.la
all: libafstest_common.la

View File

@ -54,6 +54,7 @@ extern char *afstest_mkdtemp(void);
extern void afstest_rmdtemp(char *path);
extern char *afstest_src_path(char *path);
extern char *afstest_obj_path(char *path);
extern int afstest_file_contains(char *path, char *target);
/* rxkad.c */

View File

@ -126,3 +126,42 @@ afstest_obj_path(char *path)
{
return path_from_tdir("C_TAP_BUILD", path);
}
/**
* Check if the given file contains a string. To keep things simple, the string
* to look for must appear on a single line, and must appear in the first 128
* bytes of that line.
*
* @param[in] path Path to the file to check.
* @param[in] target The string to look for in 'path'.
*
* @retval 1 The file contains the given string.
* @retval 0 The file does not contain the given string (or we encountered an
* error).
*/
int
afstest_file_contains(char *path, char *target)
{
FILE *fh;
char line[128];
int found = 0;
fh = fopen(path, "r");
if (fh == NULL) {
diag("%s: Failed to open %s", __func__, path);
goto done;
}
while (fgets(line, sizeof(line), fh) != NULL) {
if (strstr(line, target) != NULL) {
found = 1;
goto done;
}
}
done:
if (fh != NULL) {
fclose(fh);
}
return found;
}

View File

@ -11,11 +11,70 @@
#include <afs/authcon.h>
#include <afs/cellconfig.h>
#define UBIK_INTERNALS
#include <ubik.h>
#include <tests/tap/basic.h>
#include "common.h"
static void
check_startup(pid_t pid, char *log, int *a_started, int *a_stopped)
{
int status;
struct rx_connection *conn;
struct ubik_debug udebug;
afs_int32 isclone;
pid_t exited;
int code;
memset(&udebug, 0, sizeof(udebug));
opr_Assert(pid > 0);
exited = waitpid(pid, &status, WNOHANG);
if (exited < 0) {
opr_Assert(errno == ECHILD);
}
if (exited != 0) {
/* pid is no longer running; vlserver must have died during startup */
*a_stopped = 1;
return;
}
if (!afstest_file_contains(log, "Starting AFS vlserver")) {
/* vlserver hasn't logged the "Starting AFS vlserver" line yet, so it's
* presumably still starting up. */
return;
}
/*
* If we're going to write to the db, we also need to wait until recovery
* has the UBIK_RECHAVEDB state bit (without that, we won't be able to
* start write transactions). If we're just reading from the db, we
* wouldn't need to wait for this, but we don't know whether our caller
* will be writing to the db or not. It shouldn't take long for
* UBIK_RECHAVEDB to get set anyway, so just always check if it's been set
* (via VOTE_XDebug).
*/
conn = rx_NewConnection(afstest_MyHostAddr(), htons(7003), VOTE_SERVICE_ID,
rxnull_NewClientSecurityObject(), 0);
code = VOTE_XDebug(conn, &udebug, &isclone);
rx_DestroyConnection(conn);
if (code != 0) {
diag("VOTE_XDebug returned %d while waiting for vlserver startup",
code);
return;
}
if (udebug.amSyncSite && (udebug.recoveryState & UBIK_RECHAVEDB) != 0) {
/* Okay, it's set! We have finished startup. */
*a_started = 1;
}
}
/* Start up the VLserver, using the configuration in dirname, and putting our
* logs there too.
*/
@ -24,18 +83,30 @@ int
afstest_StartVLServer(char *dirname, pid_t *serverPid)
{
pid_t pid;
int status;
char *logPath;
int started = 0;
int stopped = 0;
int try;
FILE *fh;
int code = 0;
logPath = afstest_asprintf("%s/VLLog", dirname);
/* Create/truncate the log in advance (since we look at it to detect when
* the vlserver has started). */
fh = fopen(logPath, "w");
opr_Assert(fh != NULL);
fclose(fh);
pid = fork();
if (pid == -1) {
exit(1);
/* Argggggghhhhh */
} else if (pid == 0) {
char *binPath, *logPath, *dbPath;
char *binPath, *dbPath;
/* Child */
binPath = afstest_obj_path("src/tvlserver/vlserver");
logPath = afstest_asprintf("%s/VLLog", dirname);
dbPath = afstest_asprintf("%s/vldb", dirname);
execl(binPath, "vlserver",
@ -44,22 +115,43 @@ afstest_StartVLServer(char *dirname, pid_t *serverPid)
exit(1);
}
if (waitpid(pid, &status, WNOHANG) != 0) {
fprintf(stderr, "Error starting vlserver\n");
return -1;
/*
* Wait for the vlserver to startup. Try to check if the vlserver is ready
* by checking the log file and the urecovery_state (check_startup()), but
* if it's taking too long, just return success anyway. If the vlserver
* isn't ready yet, then the caller's tests may fail, but we can't wait
* forever.
*/
diag("waiting for vlserver to startup");
usleep(5000); /* 5ms */
check_startup(pid, logPath, &started, &stopped);
for (try = 0; !started && !stopped; try++) {
if (try > 100 * 5) {
diag("waited too long for vlserver to finish starting up; "
"proceeding anyway");
goto done;
}
usleep(1000 * 10); /* 10ms */
check_startup(pid, logPath, &started, &stopped);
}
diag("Sleeping for a few seconds to let the vlserver startup");
sleep(5);
if (waitpid(pid, &status, WNOHANG) != 0) {
if (stopped) {
fprintf(stderr, "vlserver died during startup\n");
return -1;
code = -1;
goto done;
}
diag("vlserver started after try %d", try);
done:
*serverPid = pid;
return 0;
free(logPath);
return code;
}
int