From 6f5f13795a2176062e7a69d7bcf883c88ea10fe6 Mon Sep 17 00:00:00 2001 From: Andrew Deason Date: Wed, 16 Sep 2020 23:05:07 -0500 Subject: [PATCH] tests: Check for vlserver startup Currently, afstest_StartVLServer waits for a fixed amount of time (5 seconds) for the vlserver to startup. This means each test that runs a vlserver takes at least 5 seconds, which adds up very quickly when new tests are added for vlserver functionality. Instead of waiting 5 seconds each time, change afstest_StartVLServer to check if the vlserver has started up, and to wait a much shorter time if it hasn't finished starting up yet. Specifically, wait until the "Starting AFS vlserver" message appears in VLLog, and the UBIK_RECHAVEDB recovery state bit is set. This allows vlserver-based tests to run much more quickly. Change-Id: I74015670a301980e6d89b3149ed9f2a6c1a40523 Reviewed-on: https://gerrit.openafs.org/14804 Tested-by: BuildBot Reviewed-by: Cheyenne Wills Reviewed-by: Benjamin Kaduk --- tests/common/Makefile.in | 1 + tests/common/common.h | 1 + tests/common/files.c | 39 +++++++++++++ tests/common/servers.c | 116 +++++++++++++++++++++++++++++++++++---- 4 files changed, 145 insertions(+), 12 deletions(-) diff --git a/tests/common/Makefile.in b/tests/common/Makefile.in index f7a39af206..176167ece3 100644 --- a/tests/common/Makefile.in +++ b/tests/common/Makefile.in @@ -10,6 +10,7 @@ LT_objs = config.lo exec.lo files.lo misc.lo network.lo rxkad.lo servers.lo \ ubik.lo LT_libs = $(LIB_rfc3961) $(LIB_roken) LT_deps = $(top_builddir)/tests/tap/libafstest_tap.la \ + $(top_builddir)/src/ubik/liboafs_ubik.la \ $(top_builddir)/src/util/liboafs_util.la all: libafstest_common.la diff --git a/tests/common/common.h b/tests/common/common.h index 797343a28b..213ab879dc 100644 --- a/tests/common/common.h +++ b/tests/common/common.h @@ -54,6 +54,7 @@ extern char *afstest_mkdtemp(void); extern void afstest_rmdtemp(char *path); extern char *afstest_src_path(char *path); extern char *afstest_obj_path(char *path); +extern int afstest_file_contains(char *path, char *target); /* rxkad.c */ diff --git a/tests/common/files.c b/tests/common/files.c index ecb96c6182..dfc7f82fc7 100644 --- a/tests/common/files.c +++ b/tests/common/files.c @@ -126,3 +126,42 @@ afstest_obj_path(char *path) { return path_from_tdir("C_TAP_BUILD", path); } + +/** + * Check if the given file contains a string. To keep things simple, the string + * to look for must appear on a single line, and must appear in the first 128 + * bytes of that line. + * + * @param[in] path Path to the file to check. + * @param[in] target The string to look for in 'path'. + * + * @retval 1 The file contains the given string. + * @retval 0 The file does not contain the given string (or we encountered an + * error). + */ +int +afstest_file_contains(char *path, char *target) +{ + FILE *fh; + char line[128]; + int found = 0; + + fh = fopen(path, "r"); + if (fh == NULL) { + diag("%s: Failed to open %s", __func__, path); + goto done; + } + + while (fgets(line, sizeof(line), fh) != NULL) { + if (strstr(line, target) != NULL) { + found = 1; + goto done; + } + } + + done: + if (fh != NULL) { + fclose(fh); + } + return found; +} diff --git a/tests/common/servers.c b/tests/common/servers.c index ddff2e7b42..de5801a61c 100644 --- a/tests/common/servers.c +++ b/tests/common/servers.c @@ -11,11 +11,70 @@ #include #include +#define UBIK_INTERNALS +#include #include #include "common.h" +static void +check_startup(pid_t pid, char *log, int *a_started, int *a_stopped) +{ + int status; + struct rx_connection *conn; + struct ubik_debug udebug; + afs_int32 isclone; + pid_t exited; + int code; + + memset(&udebug, 0, sizeof(udebug)); + + opr_Assert(pid > 0); + + exited = waitpid(pid, &status, WNOHANG); + if (exited < 0) { + opr_Assert(errno == ECHILD); + } + + if (exited != 0) { + /* pid is no longer running; vlserver must have died during startup */ + *a_stopped = 1; + return; + } + + if (!afstest_file_contains(log, "Starting AFS vlserver")) { + /* vlserver hasn't logged the "Starting AFS vlserver" line yet, so it's + * presumably still starting up. */ + return; + } + + /* + * If we're going to write to the db, we also need to wait until recovery + * has the UBIK_RECHAVEDB state bit (without that, we won't be able to + * start write transactions). If we're just reading from the db, we + * wouldn't need to wait for this, but we don't know whether our caller + * will be writing to the db or not. It shouldn't take long for + * UBIK_RECHAVEDB to get set anyway, so just always check if it's been set + * (via VOTE_XDebug). + */ + + conn = rx_NewConnection(afstest_MyHostAddr(), htons(7003), VOTE_SERVICE_ID, + rxnull_NewClientSecurityObject(), 0); + code = VOTE_XDebug(conn, &udebug, &isclone); + rx_DestroyConnection(conn); + if (code != 0) { + diag("VOTE_XDebug returned %d while waiting for vlserver startup", + code); + return; + } + + if (udebug.amSyncSite && (udebug.recoveryState & UBIK_RECHAVEDB) != 0) { + /* Okay, it's set! We have finished startup. */ + *a_started = 1; + } +} + /* Start up the VLserver, using the configuration in dirname, and putting our * logs there too. */ @@ -24,18 +83,30 @@ int afstest_StartVLServer(char *dirname, pid_t *serverPid) { pid_t pid; - int status; + char *logPath; + int started = 0; + int stopped = 0; + int try; + FILE *fh; + int code = 0; + + logPath = afstest_asprintf("%s/VLLog", dirname); + + /* Create/truncate the log in advance (since we look at it to detect when + * the vlserver has started). */ + fh = fopen(logPath, "w"); + opr_Assert(fh != NULL); + fclose(fh); pid = fork(); if (pid == -1) { exit(1); /* Argggggghhhhh */ } else if (pid == 0) { - char *binPath, *logPath, *dbPath; + char *binPath, *dbPath; /* Child */ binPath = afstest_obj_path("src/tvlserver/vlserver"); - logPath = afstest_asprintf("%s/VLLog", dirname); dbPath = afstest_asprintf("%s/vldb", dirname); execl(binPath, "vlserver", @@ -44,22 +115,43 @@ afstest_StartVLServer(char *dirname, pid_t *serverPid) exit(1); } - if (waitpid(pid, &status, WNOHANG) != 0) { - fprintf(stderr, "Error starting vlserver\n"); - return -1; + /* + * Wait for the vlserver to startup. Try to check if the vlserver is ready + * by checking the log file and the urecovery_state (check_startup()), but + * if it's taking too long, just return success anyway. If the vlserver + * isn't ready yet, then the caller's tests may fail, but we can't wait + * forever. + */ + + diag("waiting for vlserver to startup"); + + usleep(5000); /* 5ms */ + check_startup(pid, logPath, &started, &stopped); + for (try = 0; !started && !stopped; try++) { + if (try > 100 * 5) { + diag("waited too long for vlserver to finish starting up; " + "proceeding anyway"); + goto done; + } + + usleep(1000 * 10); /* 10ms */ + check_startup(pid, logPath, &started, &stopped); } - diag("Sleeping for a few seconds to let the vlserver startup"); - sleep(5); - - if (waitpid(pid, &status, WNOHANG) != 0) { + if (stopped) { fprintf(stderr, "vlserver died during startup\n"); - return -1; + code = -1; + goto done; } + diag("vlserver started after try %d", try); + + done: *serverPid = pid; - return 0; + free(logPath); + + return code; } int