DEVEL15-ubik-avoid-truncating-live-database-during-recovery-20071210

LICENSE IPL10
FIXES 77183

currently we can truncate the live database while we are doing recovery.
address that.


(cherry picked from commit 0f9529171c37c4ef1e76156da111d83d04b38505)
This commit is contained in:
Derrick Brashear 2007-12-10 22:45:55 +00:00
parent a695815869
commit 1a7f3b21b7
3 changed files with 114 additions and 44 deletions

View File

@ -78,15 +78,8 @@ uphys_open(register struct ubik_dbase *adbase, afs_int32 afid)
}
/* not found, open it and try to enter in cache */
strcpy(pbuffer, adbase->pathName);
strcat(pbuffer, ".DB");
if (afid < 0) {
i = -afid;
strcat(pbuffer, "SYS");
} else
i = afid;
sprintf(temp, "%d", i);
strcat(pbuffer, temp);
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB%s%d", adbase->pathName,
(afid<0)?"SYS":"", (afid<0)?-afid:afid);
fd = open(pbuffer, O_CREAT | O_RDWR, 0600);
if (fd < 0) {
/* try opening read-only */

View File

@ -17,6 +17,7 @@ RCSID
#ifdef AFS_NT40_ENV
#include <winsock2.h>
#include <time.h>
#include <fcntl.h>
#else
#include <sys/file.h>
#include <netinet/in.h>
@ -425,9 +426,13 @@ urecovery_Interact(void)
struct timeval tv;
int length, tlen, offset, file, nbytes;
struct rx_call *rxcall;
char tbuffer[256];
char tbuffer[1024];
struct ubik_stat ubikstat;
struct in_addr inAddr;
#ifndef OLD_URECOVERY
char pbuffer[1028];
int flen, fd = -1;
#endif
/* otherwise, begin interaction */
urecovery_state = 0;
@ -530,11 +535,7 @@ urecovery_Interact(void)
urecovery_state |= UBIK_RECHAVEDB;
} else {
/* we don't have the best version; we should fetch it. */
#if defined(UBIK_PAUSE)
DBHOLD(ubik_dbase);
#else
ObtainWriteLock(&ubik_dbase->versionLock);
#endif /* UBIK_PAUSE */
urecovery_AbortAll(ubik_dbase);
/* Rx code to do the Bulk fetch */
@ -558,7 +559,8 @@ urecovery_Interact(void)
goto FetchEndCall;
}
/* Truncate the file firest */
#ifdef OLD_URECOVERY
/* Truncate the file first */
code = (*ubik_dbase->truncate) (ubik_dbase, file, 0);
if (code) {
ubik_dprint("truncate io error=%d\n", code);
@ -573,6 +575,20 @@ urecovery_Interact(void)
ubik_dprint("setlabel io error=%d\n", code);
goto FetchEndCall;
}
#else
flen = length;
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
fd = open(pbuffer, O_CREAT | O_RDWR | O_TRUNC, 0600);
if (fd < 0) {
code = errno;
goto FetchEndCall;
}
code = lseek(fd, HDRSIZE, 0);
if (code != HDRSIZE) {
close(fd);
goto FetchEndCall;
}
#endif
while (length > 0) {
tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
@ -580,18 +596,29 @@ urecovery_Interact(void)
if (nbytes != tlen) {
ubik_dprint("Rx-read bulk error=%d\n", code = BULK_ERROR);
code = EIO;
close(fd);
goto FetchEndCall;
}
#ifdef OLD_URECOVERY
nbytes =
(*ubik_dbase->write) (ubik_dbase, file, tbuffer, offset,
tlen);
#else
nbytes = write(fd, tbuffer, tlen);
#endif
if (nbytes != tlen) {
code = UIOERROR;
close(fd);
goto FetchEndCall;
}
offset += tlen;
length -= tlen;
}
#ifndef OLD_URECOVERY
code = close(fd);
if (code)
goto FetchEndCall;
#endif
code = EndDISK_GetFile(rxcall, &tversion);
FetchEndCall:
tcode = rx_EndCall(rxcall, code);
@ -602,13 +629,36 @@ urecovery_Interact(void)
urecovery_state |= UBIK_RECHAVEDB;
memcpy(&ubik_dbase->version, &tversion,
sizeof(struct ubik_version));
#ifdef OLD_URECOVERY
(*ubik_dbase->sync) (ubik_dbase, 0); /* get data out first */
#else
afs_snprintf(tbuffer, sizeof(tbuffer), "%s.DB0", ubik_dbase->pathName);
#ifdef AFS_NT40_ENV
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
code = unlink(pbuffer);
if (!code)
code = rename(tbuffer, pbuffer);
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
#endif
if (!code)
code = rename(pbuffer, tbuffer);
if (!code)
#endif
/* after data is good, sync disk with correct label */
code =
(*ubik_dbase->setlabel) (ubik_dbase, 0,
&ubik_dbase->version);
#ifndef OLD_URECOVERY
#ifdef AFS_NT40_ENV
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
unlink(pbuffer);
#endif
#endif
}
if (code) {
#ifndef OLD_URECOVERY
unlink(pbuffer);
#endif
ubik_dbase->version.epoch = 0;
ubik_dbase->version.counter = 0;
ubik_print("Ubik: Synchronize database failed (error = %d)\n",
@ -618,11 +668,7 @@ urecovery_Interact(void)
}
udisk_Invalidate(ubik_dbase, 0); /* data has changed */
LWP_NoYieldSignal(&ubik_dbase->version);
#if defined(UBIK_PAUSE)
DBRELE(ubik_dbase);
#else
ReleaseWriteLock(&ubik_dbase->versionLock);
#endif /* UBIK_PAUSE */
}
#if defined(UBIK_PAUSE)
if (!(urecovery_state & UBIK_RECSYNCSITE))
@ -637,11 +683,7 @@ urecovery_Interact(void)
* database and overwrite this one.
*/
if (ubik_dbase->version.epoch == 1) {
#if defined(UBIK_PAUSE)
DBHOLD(ubik_dbase);
#else
ObtainWriteLock(&ubik_dbase->versionLock);
#endif /* UBIK_PAUSE */
urecovery_AbortAll(ubik_dbase);
ubik_epochTime = 2;
ubik_dbase->version.epoch = ubik_epochTime;
@ -650,11 +692,7 @@ urecovery_Interact(void)
(*ubik_dbase->setlabel) (ubik_dbase, 0, &ubik_dbase->version);
udisk_Invalidate(ubik_dbase, 0); /* data may have changed */
LWP_NoYieldSignal(&ubik_dbase->version);
#if defined(UBIK_PAUSE)
DBRELE(ubik_dbase);
#else
ReleaseWriteLock(&ubik_dbase->versionLock);
#endif /* UBIK_PAUSE */
}
/* Check the other sites and send the database to them if they
@ -664,11 +702,7 @@ urecovery_Interact(void)
/* now propagate out new version to everyone else */
dbok = 1; /* start off assuming they all worked */
#if defined(UBIK_PAUSE)
DBHOLD(ubik_dbase);
#else
ObtainWriteLock(&ubik_dbase->versionLock);
#endif /* UBIK_PAUSE */
/*
* Check if a write transaction is in progress. We can't send the
* db when a write is in progress here because the db would be
@ -684,20 +718,12 @@ urecovery_Interact(void)
tv.tv_sec = 0;
tv.tv_usec = 50000;
while ((ubik_dbase->flags & DBWRITING) && (safety < 500)) {
#if defined(UBIK_PAUSE)
DBRELE(ubik_dbase);
#else
ReleaseWriteLock(&ubik_dbase->versionLock);
#endif /* UBIK_PAUSE */
/* sleep for a little while */
IOMGR_Select(0, 0, 0, 0, &tv);
tv.tv_usec += 10000;
safety++;
#if defined(UBIK_PAUSE)
DBHOLD(ubik_dbase);
#else
ObtainWriteLock(&ubik_dbase->versionLock);
#endif /* UBIK_PAUSE */
}
}
@ -764,11 +790,7 @@ urecovery_Interact(void)
ts->currentDB = 1;
}
}
#if defined(UBIK_PAUSE)
DBRELE(ubik_dbase);
#else
ReleaseWriteLock(&ubik_dbase->versionLock);
#endif /* UBIK_PAUSE */
if (dbok)
urecovery_state |= UBIK_RECSENTDB;
}

View File

@ -16,6 +16,7 @@ RCSID
#include <sys/types.h>
#ifdef AFS_NT40_ENV
#include <winsock2.h>
#include <fcntl.h>
#else
#include <sys/file.h>
#include <netinet/in.h>
@ -24,6 +25,7 @@ RCSID
#include <lock.h>
#include <rx/xdr.h>
#include <rx/rx.h>
#include <errno.h>
#include <afs/afsutil.h>
#define UBIK_INTERNALS
@ -488,13 +490,17 @@ SDISK_SendFile(rxcall, file, length, avers)
{
register afs_int32 code;
register struct ubik_dbase *dbase;
char tbuffer[256];
char tbuffer[1024];
afs_int32 offset;
struct ubik_version tversion;
register int tlen;
struct rx_peer *tpeer;
struct rx_connection *tconn;
afs_uint32 otherHost;
#ifndef OLD_URECOVERY
char pbuffer[1028];
int flen, fd = -1;
#endif
/* send the file back to the requester */
@ -532,10 +538,25 @@ SDISK_SendFile(rxcall, file, length, avers)
afs_inet_ntoa(otherHost));
offset = 0;
#ifdef OLD_URECOVERY
(*dbase->truncate) (dbase, file, 0); /* truncate first */
tversion.epoch = 0; /* start off by labelling in-transit db as invalid */
tversion.counter = 0;
(*dbase->setlabel) (dbase, file, &tversion); /* setlabel does sync */
#else
flen = length;
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
fd = open(pbuffer, O_CREAT | O_RDWR | O_TRUNC, 0600);
if (fd < 0) {
code = errno;
goto failed;
}
code = lseek(fd, HDRSIZE, 0);
if (code != HDRSIZE) {
close(fd);
goto failed;
}
#endif
memcpy(&ubik_dbase->version, &tversion, sizeof(struct ubik_version));
while (length > 0) {
tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
@ -544,29 +565,63 @@ SDISK_SendFile(rxcall, file, length, avers)
DBRELE(dbase);
ubik_dprint("Rx-read length error=%d\n", code);
code = BULK_ERROR;
close(fd);
goto failed;
}
#ifdef OLD_URECOVERY
code = (*dbase->write) (dbase, file, tbuffer, offset, tlen);
#else
code = write(fd, tbuffer, tlen);
#endif
if (code != tlen) {
DBRELE(dbase);
ubik_dprint("write failed error=%d\n", code);
code = UIOERROR;
close(fd);
goto failed;
}
offset += tlen;
length -= tlen;
}
#ifndef OLD_URECOVERY
code = close(fd);
if (code)
goto failed;
#endif
/* sync data first, then write label and resync (resync done by setlabel call).
* This way, good label is only on good database. */
#ifdef OLD_URECOVERY
(*ubik_dbase->sync) (dbase, file);
#else
afs_snprintf(tbuffer, sizeof(tbuffer), "%s.DB0", ubik_dbase->pathName);
#ifdef AFS_NT40_ENV
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
code = unlink(pbuffer);
if (!code)
code = rename(tbuffer, pbuffer);
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
#endif
if (!code)
code = rename(pbuffer, tbuffer);
if (!code)
#endif
code = (*ubik_dbase->setlabel) (dbase, file, avers);
#ifndef OLD_URECOVERY
#ifdef AFS_NT40_ENV
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
unlink(pbuffer);
#endif
#endif
memcpy(&ubik_dbase->version, avers, sizeof(struct ubik_version));
udisk_Invalidate(dbase, file); /* new dbase, flush disk buffers */
LWP_NoYieldSignal(&dbase->version);
DBRELE(dbase);
failed:
if (code) {
#ifndef OLD_URECOVERY
unlink(pbuffer);
#endif
ubik_print
("Ubik: Synchronize database with server %s failed (error = %d)\n",
afs_inet_ntoa(otherHost), code);