mirror of
https://git.openafs.org/openafs.git
synced 2025-01-31 13:38:01 +00:00
ubik: Introduce new beacon lock
A new lock is introduced to protect beacon related data when compiled with pthreads. A global structure is added containing the lock itself and the global variables that it protects. The lock also protects some values in the ubik_server structures: lastVoteTime lastBeaconSent lastVote up beaconSinceDown Based on some analysis and design work by Jeffrey Hutzelman Change-Id: I13f72d32dce71d0686406efcd07b7ea7528722f1 Reviewed-on: http://gerrit.openafs.org/4155 Reviewed-by: Jeffrey Altman <jaltman@openafs.org> Tested-by: BuildBot <buildbot@rampaginggeek.com> Reviewed-by: Derrick Brashear <shadow@dementia.org>
This commit is contained in:
parent
73a79dba2c
commit
7c4fc5278e
@ -37,8 +37,6 @@ int (*ubik_CRXSecurityProc) (void *rock, struct rx_securityClass **,
|
||||
void *ubik_CRXSecurityRock;
|
||||
|
||||
/*! \name statics used to determine if we're the sync site */
|
||||
static afs_int32 syncSiteUntil = 0; /*!< valid only if amSyncSite */
|
||||
int ubik_amSyncSite = 0; /*!< flag telling if I'm sync site */
|
||||
static int nServers; /*!< total number of servers */
|
||||
static char amIMagic = 0; /*!< is this host the magic host */
|
||||
char amIClone = 0; /*!< is this a clone which doesn't vote */
|
||||
@ -51,6 +49,10 @@ static void * securityRock = NULL;
|
||||
|
||||
afs_int32 ubikSecIndex;
|
||||
struct rx_securityClass *ubikSecClass;
|
||||
|
||||
/* Values protected by the beacon lock */
|
||||
struct beacon_data beacon_globals;
|
||||
|
||||
static int ubeacon_InitServerListCommon(afs_uint32 ame,
|
||||
struct afsconf_cell *info,
|
||||
char clones[],
|
||||
@ -88,7 +90,7 @@ void
|
||||
ubeacon_Debug(struct ubik_debug *aparm)
|
||||
{
|
||||
/* fill in beacon's state fields in the ubik_debug structure */
|
||||
aparm->syncSiteUntil = syncSiteUntil;
|
||||
aparm->syncSiteUntil = beacon_globals.syncSiteUntil;
|
||||
aparm->nServers = nServers;
|
||||
}
|
||||
|
||||
@ -119,14 +121,15 @@ ubeacon_AmSyncSite(void)
|
||||
return 1; /* one guy is always the sync site */
|
||||
}
|
||||
|
||||
if (ubik_amSyncSite == 0 || amIClone)
|
||||
UBIK_BEACON_LOCK;
|
||||
if (beacon_globals.ubik_amSyncSite == 0 || amIClone)
|
||||
rcode = 0; /* if I don't think I'm the sync site, say so */
|
||||
else {
|
||||
now = FT_ApproxTime();
|
||||
if (syncSiteUntil <= now) { /* if my votes have expired, say so */
|
||||
if (ubik_amSyncSite)
|
||||
if (beacon_globals.syncSiteUntil <= now) { /* if my votes have expired, say so */
|
||||
if (beacon_globals.ubik_amSyncSite)
|
||||
ubik_dprint("Ubik: I am no longer the sync site\n");
|
||||
ubik_amSyncSite = 0;
|
||||
beacon_globals.ubik_amSyncSite = 0;
|
||||
rcode = 0;
|
||||
} else {
|
||||
rcode = 1; /* otherwise still have the required votes */
|
||||
@ -134,6 +137,7 @@ ubeacon_AmSyncSite(void)
|
||||
}
|
||||
if (rcode == 0)
|
||||
urecovery_ResetState(); /* force recovery to re-execute */
|
||||
UBIK_BEACON_UNLOCK;
|
||||
ubik_dprint("beacon: amSyncSite is %d\n", rcode);
|
||||
return rcode;
|
||||
}
|
||||
@ -350,8 +354,8 @@ ubeacon_InitServerListCommon(afs_uint32 ame, struct afsconf_cell *info,
|
||||
if (!ubik_servers) /* special case 1 server */
|
||||
ubik_singleServer = 1;
|
||||
if (nServers == 1 && !amIClone) {
|
||||
ubik_amSyncSite = 1; /* let's start as sync site */
|
||||
syncSiteUntil = 0x7fffffff; /* and be it quite a while */
|
||||
beacon_globals.ubik_amSyncSite = 1; /* let's start as sync site */
|
||||
beacon_globals.syncSiteUntil = 0x7fffffff; /* and be it quite a while */
|
||||
}
|
||||
} else {
|
||||
if (nServers == 1) /* special case 1 server */
|
||||
@ -359,10 +363,10 @@ ubeacon_InitServerListCommon(afs_uint32 ame, struct afsconf_cell *info,
|
||||
}
|
||||
|
||||
if (ubik_singleServer) {
|
||||
if (!ubik_amSyncSite)
|
||||
if (!beacon_globals.ubik_amSyncSite)
|
||||
ubik_dprint("Ubik: I am the sync site - 1 server\n");
|
||||
ubik_amSyncSite = 1;
|
||||
syncSiteUntil = 0x7fffffff; /* quite a while */
|
||||
beacon_globals.ubik_amSyncSite = 1;
|
||||
beacon_globals.syncSiteUntil = 0x7fffffff; /* quite a while */
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -420,12 +424,14 @@ ubeacon_Interact(void *dummy)
|
||||
* is a task for the recovery module, not the beacon module), and
|
||||
* prepare to send them an r multi-call containing the beacon message */
|
||||
i = 0; /* collect connections */
|
||||
UBIK_BEACON_LOCK;
|
||||
for (ts = ubik_servers; ts; ts = ts->next) {
|
||||
if (ts->up && ts->addr[0] != ubik_host[0]) {
|
||||
servers[i] = ts;
|
||||
connections[i++] = ts->vote_rxcid;
|
||||
}
|
||||
}
|
||||
UBIK_BEACON_UNLOCK;
|
||||
servers[i] = (struct ubik_server *)0; /* end of list */
|
||||
/* note that we assume in the vote module that we'll always get at least BIGTIME
|
||||
* seconds of vote from anyone who votes for us, which means we can conservatively
|
||||
@ -459,6 +465,7 @@ ubeacon_Interact(void *dummy)
|
||||
&ttid);
|
||||
temp = FT_ApproxTime(); /* now, more or less */
|
||||
ts = servers[multi_i];
|
||||
UBIK_BEACON_LOCK;
|
||||
ts->lastBeaconSent = temp;
|
||||
code = multi_error;
|
||||
/* note that the vote time (the return code) represents the time
|
||||
@ -498,6 +505,7 @@ ubeacon_Interact(void *dummy)
|
||||
ubik_dprint("time out from %s\n",
|
||||
afs_inet_ntoa_r(ts->addr[0], hoststr));
|
||||
}
|
||||
UBIK_BEACON_UNLOCK;
|
||||
}
|
||||
multi_End;
|
||||
}
|
||||
@ -517,19 +525,23 @@ ubeacon_Interact(void *dummy)
|
||||
/* now decide if we have enough votes to become sync site.
|
||||
* Note that we can still get enough votes even if we didn't for ourself. */
|
||||
if (yesVotes > nServers) { /* yesVotes is bumped by 2 or 3 for each site */
|
||||
if (!ubik_amSyncSite)
|
||||
UBIK_BEACON_LOCK;
|
||||
if (!beacon_globals.ubik_amSyncSite)
|
||||
ubik_dprint("Ubik: I am the sync site\n");
|
||||
ubik_amSyncSite = 1;
|
||||
syncSiteUntil = oldestYesVote + SMALLTIME;
|
||||
beacon_globals.ubik_amSyncSite = 1;
|
||||
beacon_globals.syncSiteUntil = oldestYesVote + SMALLTIME;
|
||||
#ifndef AFS_PTHREAD_ENV
|
||||
/* I did not find a corresponding LWP_WaitProcess(&ubik_amSyncSite) --
|
||||
this may be a spurious signal call -- sjenkins */
|
||||
LWP_NoYieldSignal(&ubik_amSyncSite);
|
||||
LWP_NoYieldSignal(&beacon_globals.ubik_amSyncSite);
|
||||
#endif
|
||||
UBIK_BEACON_UNLOCK;
|
||||
} else {
|
||||
if (ubik_amSyncSite)
|
||||
UBIK_BEACON_LOCK;
|
||||
if (beacon_globals.ubik_amSyncSite)
|
||||
ubik_dprint("Ubik: I am no longer the sync site\n");
|
||||
ubik_amSyncSite = 0;
|
||||
beacon_globals.ubik_amSyncSite = 0;
|
||||
UBIK_BEACON_UNLOCK;
|
||||
urecovery_ResetState(); /* tell recovery we're no longer the sync site */
|
||||
}
|
||||
|
||||
@ -750,7 +762,9 @@ updateUbikNetworkAddress(afs_uint32 ubik_host[UBIK_MAX_INTERFACE_ADDR])
|
||||
ubik_print("%s ", afs_inet_ntoa_r(ts->addr[j], hoststr));
|
||||
ubik_print("\n");
|
||||
} else {
|
||||
UBIK_BEACON_LOCK;
|
||||
ts->up = 0; /* mark the remote server as down */
|
||||
UBIK_BEACON_UNLOCK;
|
||||
}
|
||||
}
|
||||
multi_End;
|
||||
|
@ -480,16 +480,20 @@ urecovery_Interact(void *dummy)
|
||||
#endif
|
||||
|
||||
for (ts = ubik_servers, doingRPC = 0; ts; ts = ts->next) {
|
||||
UBIK_BEACON_LOCK;
|
||||
if (!ts->up) {
|
||||
UBIK_BEACON_UNLOCK;
|
||||
doingRPC = 1;
|
||||
code = DoProbe(ts);
|
||||
if (code == 0) {
|
||||
UBIK_BEACON_LOCK;
|
||||
ts->up = 1;
|
||||
urecovery_state &= ~UBIK_RECFOUNDDB;
|
||||
}
|
||||
} else if (!ts->currentDB) {
|
||||
urecovery_state &= ~UBIK_RECFOUNDDB;
|
||||
}
|
||||
UBIK_BEACON_UNLOCK;
|
||||
}
|
||||
|
||||
#ifdef AFS_PTHREAD_ENV
|
||||
@ -516,8 +520,12 @@ urecovery_Interact(void *dummy)
|
||||
bestDBVersion.epoch = 0;
|
||||
bestDBVersion.counter = 0;
|
||||
for (ts = ubik_servers; ts; ts = ts->next) {
|
||||
if (!ts->up)
|
||||
UBIK_BEACON_LOCK;
|
||||
if (!ts->up) {
|
||||
UBIK_BEACON_UNLOCK;
|
||||
continue; /* don't bother with these guys */
|
||||
}
|
||||
UBIK_BEACON_UNLOCK;
|
||||
if (ts->isClone)
|
||||
continue;
|
||||
code = DISK_GetVersion(ts->disk_rxcid, &ts->version);
|
||||
@ -754,12 +762,15 @@ urecovery_Interact(void *dummy)
|
||||
|
||||
for (ts = ubik_servers; ts; ts = ts->next) {
|
||||
inAddr.s_addr = ts->addr[0];
|
||||
UBIK_BEACON_LOCK;
|
||||
if (!ts->up) {
|
||||
UBIK_BEACON_UNLOCK;
|
||||
ubik_dprint("recovery cannot send version to %s\n",
|
||||
afs_inet_ntoa_r(inAddr.s_addr, hoststr));
|
||||
dbok = 0;
|
||||
continue;
|
||||
}
|
||||
UBIK_BEACON_UNLOCK;
|
||||
ubik_dprint("recovery sending version to %s\n",
|
||||
afs_inet_ntoa_r(inAddr.s_addr, hoststr));
|
||||
if (vcmp(ts->version, ubik_dbase->version) != 0) {
|
||||
|
@ -149,8 +149,10 @@ ContactQuorum_iterate(struct ubik_trans *atrans, int aflags, struct ubik_server
|
||||
*conn = NULL;
|
||||
if (code) { /* failure */
|
||||
*rcode = code;
|
||||
UBIK_BEACON_LOCK;
|
||||
(*ts)->up = 0; /* mark as down now; beacons will no longer be sent */
|
||||
(*ts)->beaconSinceDown = 0;
|
||||
UBIK_BEACON_UNLOCK;
|
||||
(*ts)->currentDB = 0;
|
||||
urecovery_LostServer(*ts); /* tell recovery to try to resend dbase later */
|
||||
} else { /* success */
|
||||
@ -165,10 +167,13 @@ ContactQuorum_iterate(struct ubik_trans *atrans, int aflags, struct ubik_server
|
||||
}
|
||||
if (!(*ts))
|
||||
return 1;
|
||||
UBIK_BEACON_LOCK;
|
||||
if (!(*ts)->up || !(*ts)->currentDB) {
|
||||
UBIK_BEACON_UNLOCK;
|
||||
(*ts)->currentDB = 0; /* db is no longer current; we just missed an update */
|
||||
return 0; /* not up-to-date, don't bother. NULL conn will tell caller not to use */
|
||||
}
|
||||
UBIK_BEACON_UNLOCK;
|
||||
*conn = Quorum_StartIO(atrans, *ts);
|
||||
return 0;
|
||||
}
|
||||
@ -400,6 +405,7 @@ ubik_ServerInitCommon(afs_uint32 myHost, short myPort,
|
||||
memset(&tdb->cachedVersion, 0, sizeof(struct ubik_version));
|
||||
#ifdef AFS_PTHREAD_ENV
|
||||
MUTEX_INIT(&tdb->versionLock, "version lock", MUTEX_DEFAULT, 0);
|
||||
MUTEX_INIT(&beacon_globals.beacon_lock, "beacon lock", MUTEX_DEFAULT, 0);
|
||||
#else
|
||||
Lock_Init(&tdb->versionLock);
|
||||
#endif
|
||||
@ -867,7 +873,9 @@ ubik_EndTrans(struct ubik_trans *transPtr)
|
||||
break;
|
||||
}
|
||||
for (ts = ubik_servers; ts; ts = ts->next) {
|
||||
UBIK_BEACON_LOCK;
|
||||
if (!ts->beaconSinceDown && now <= ts->lastBeaconSent + BIGTIME) {
|
||||
UBIK_BEACON_UNLOCK;
|
||||
|
||||
/* this guy could have some damaged data, wait for him */
|
||||
code = 1;
|
||||
@ -887,6 +895,7 @@ ubik_EndTrans(struct ubik_trans *transPtr)
|
||||
|
||||
break;
|
||||
}
|
||||
UBIK_BEACON_UNLOCK;
|
||||
}
|
||||
if (code == 0)
|
||||
break; /* no down ones still pseudo-active */
|
||||
|
@ -349,6 +349,26 @@ extern int ubikPrimaryAddrOnly; /* use only primary address */
|
||||
|
||||
/* this extern gives the sync site's db version, with epoch of 0 if none yet */
|
||||
|
||||
/*!
|
||||
* \brief Global beacon data. All values are protected by beacon_lock
|
||||
* This lock also protects some values in the ubik_server structures:
|
||||
* lastVoteTime
|
||||
* lastBeaconSent
|
||||
* lastVote
|
||||
* up
|
||||
* beaconSinceDown
|
||||
*/
|
||||
struct beacon_data {
|
||||
#ifdef AFS_PTHREAD_ENV
|
||||
pthread_mutex_t beacon_lock;
|
||||
#endif
|
||||
int ubik_amSyncSite; /*!< flag telling if I'm sync site */
|
||||
afs_int32 syncSiteUntil; /*!< valid only if amSyncSite */
|
||||
};
|
||||
|
||||
#define UBIK_BEACON_LOCK MUTEX_ENTER(&beacon_globals.beacon_lock)
|
||||
#define UBIK_BEACON_UNLOCK MUTEX_EXIT(&beacon_globals.beacon_lock)
|
||||
|
||||
/* phys.c */
|
||||
extern int uphys_stat(struct ubik_dbase *adbase, afs_int32 afid,
|
||||
struct ubik_stat *astat);
|
||||
@ -432,6 +452,8 @@ extern int ubeacon_InitServerListByInfo(afs_uint32 ame,
|
||||
char clones[]);
|
||||
extern int ubeacon_InitServerList(afs_uint32 ame, afs_uint32 aservers[]);
|
||||
extern void *ubeacon_Interact(void *);
|
||||
extern struct beacon_data beacon_globals;
|
||||
|
||||
/*\}*/
|
||||
|
||||
/*! \name disk.c */
|
||||
|
@ -411,7 +411,7 @@ SVOTE_Debug(struct rx_call * rxcall, struct ubik_debug * aparm)
|
||||
for (i = 0; i < UBIK_MAX_INTERFACE_ADDR; i++)
|
||||
aparm->interfaceAddr[i] = ntohl(ubik_host[i]);
|
||||
|
||||
aparm->amSyncSite = ubik_amSyncSite;
|
||||
aparm->amSyncSite = beacon_globals.ubik_amSyncSite;
|
||||
ubeacon_Debug(aparm);
|
||||
|
||||
udisk_Debug(aparm);
|
||||
@ -494,7 +494,7 @@ SVOTE_DebugOld(struct rx_call * rxcall,
|
||||
aparm->syncHost = ntohl(syncHost);
|
||||
aparm->syncTime = syncTime;
|
||||
|
||||
aparm->amSyncSite = ubik_amSyncSite;
|
||||
aparm->amSyncSite = beacon_globals.ubik_amSyncSite;
|
||||
ubeacon_Debug((ubik_debug *)aparm);
|
||||
|
||||
udisk_Debug((ubik_debug *)aparm);
|
||||
|
Loading…
x
Reference in New Issue
Block a user