mirror of
https://git.openafs.org/openafs.git
synced 2025-01-31 13:38:01 +00:00
DEVEL15-ubik-avoid-truncating-live-database-during-recovery-20071210
LICENSE IPL10 FIXES 77183 currently we can truncate the live database while we are doing recovery. address that. (cherry picked from commit 0f9529171c37c4ef1e76156da111d83d04b38505)
This commit is contained in:
parent
a695815869
commit
1a7f3b21b7
@ -78,15 +78,8 @@ uphys_open(register struct ubik_dbase *adbase, afs_int32 afid)
|
||||
}
|
||||
|
||||
/* not found, open it and try to enter in cache */
|
||||
strcpy(pbuffer, adbase->pathName);
|
||||
strcat(pbuffer, ".DB");
|
||||
if (afid < 0) {
|
||||
i = -afid;
|
||||
strcat(pbuffer, "SYS");
|
||||
} else
|
||||
i = afid;
|
||||
sprintf(temp, "%d", i);
|
||||
strcat(pbuffer, temp);
|
||||
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB%s%d", adbase->pathName,
|
||||
(afid<0)?"SYS":"", (afid<0)?-afid:afid);
|
||||
fd = open(pbuffer, O_CREAT | O_RDWR, 0600);
|
||||
if (fd < 0) {
|
||||
/* try opening read-only */
|
||||
|
@ -17,6 +17,7 @@ RCSID
|
||||
#ifdef AFS_NT40_ENV
|
||||
#include <winsock2.h>
|
||||
#include <time.h>
|
||||
#include <fcntl.h>
|
||||
#else
|
||||
#include <sys/file.h>
|
||||
#include <netinet/in.h>
|
||||
@ -425,9 +426,13 @@ urecovery_Interact(void)
|
||||
struct timeval tv;
|
||||
int length, tlen, offset, file, nbytes;
|
||||
struct rx_call *rxcall;
|
||||
char tbuffer[256];
|
||||
char tbuffer[1024];
|
||||
struct ubik_stat ubikstat;
|
||||
struct in_addr inAddr;
|
||||
#ifndef OLD_URECOVERY
|
||||
char pbuffer[1028];
|
||||
int flen, fd = -1;
|
||||
#endif
|
||||
|
||||
/* otherwise, begin interaction */
|
||||
urecovery_state = 0;
|
||||
@ -530,11 +535,7 @@ urecovery_Interact(void)
|
||||
urecovery_state |= UBIK_RECHAVEDB;
|
||||
} else {
|
||||
/* we don't have the best version; we should fetch it. */
|
||||
#if defined(UBIK_PAUSE)
|
||||
DBHOLD(ubik_dbase);
|
||||
#else
|
||||
ObtainWriteLock(&ubik_dbase->versionLock);
|
||||
#endif /* UBIK_PAUSE */
|
||||
urecovery_AbortAll(ubik_dbase);
|
||||
|
||||
/* Rx code to do the Bulk fetch */
|
||||
@ -558,7 +559,8 @@ urecovery_Interact(void)
|
||||
goto FetchEndCall;
|
||||
}
|
||||
|
||||
/* Truncate the file firest */
|
||||
#ifdef OLD_URECOVERY
|
||||
/* Truncate the file first */
|
||||
code = (*ubik_dbase->truncate) (ubik_dbase, file, 0);
|
||||
if (code) {
|
||||
ubik_dprint("truncate io error=%d\n", code);
|
||||
@ -573,6 +575,20 @@ urecovery_Interact(void)
|
||||
ubik_dprint("setlabel io error=%d\n", code);
|
||||
goto FetchEndCall;
|
||||
}
|
||||
#else
|
||||
flen = length;
|
||||
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
|
||||
fd = open(pbuffer, O_CREAT | O_RDWR | O_TRUNC, 0600);
|
||||
if (fd < 0) {
|
||||
code = errno;
|
||||
goto FetchEndCall;
|
||||
}
|
||||
code = lseek(fd, HDRSIZE, 0);
|
||||
if (code != HDRSIZE) {
|
||||
close(fd);
|
||||
goto FetchEndCall;
|
||||
}
|
||||
#endif
|
||||
|
||||
while (length > 0) {
|
||||
tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
|
||||
@ -580,18 +596,29 @@ urecovery_Interact(void)
|
||||
if (nbytes != tlen) {
|
||||
ubik_dprint("Rx-read bulk error=%d\n", code = BULK_ERROR);
|
||||
code = EIO;
|
||||
close(fd);
|
||||
goto FetchEndCall;
|
||||
}
|
||||
#ifdef OLD_URECOVERY
|
||||
nbytes =
|
||||
(*ubik_dbase->write) (ubik_dbase, file, tbuffer, offset,
|
||||
tlen);
|
||||
#else
|
||||
nbytes = write(fd, tbuffer, tlen);
|
||||
#endif
|
||||
if (nbytes != tlen) {
|
||||
code = UIOERROR;
|
||||
close(fd);
|
||||
goto FetchEndCall;
|
||||
}
|
||||
offset += tlen;
|
||||
length -= tlen;
|
||||
}
|
||||
#ifndef OLD_URECOVERY
|
||||
code = close(fd);
|
||||
if (code)
|
||||
goto FetchEndCall;
|
||||
#endif
|
||||
code = EndDISK_GetFile(rxcall, &tversion);
|
||||
FetchEndCall:
|
||||
tcode = rx_EndCall(rxcall, code);
|
||||
@ -602,13 +629,36 @@ urecovery_Interact(void)
|
||||
urecovery_state |= UBIK_RECHAVEDB;
|
||||
memcpy(&ubik_dbase->version, &tversion,
|
||||
sizeof(struct ubik_version));
|
||||
#ifdef OLD_URECOVERY
|
||||
(*ubik_dbase->sync) (ubik_dbase, 0); /* get data out first */
|
||||
#else
|
||||
afs_snprintf(tbuffer, sizeof(tbuffer), "%s.DB0", ubik_dbase->pathName);
|
||||
#ifdef AFS_NT40_ENV
|
||||
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
|
||||
code = unlink(pbuffer);
|
||||
if (!code)
|
||||
code = rename(tbuffer, pbuffer);
|
||||
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
|
||||
#endif
|
||||
if (!code)
|
||||
code = rename(pbuffer, tbuffer);
|
||||
if (!code)
|
||||
#endif
|
||||
/* after data is good, sync disk with correct label */
|
||||
code =
|
||||
(*ubik_dbase->setlabel) (ubik_dbase, 0,
|
||||
&ubik_dbase->version);
|
||||
#ifndef OLD_URECOVERY
|
||||
#ifdef AFS_NT40_ENV
|
||||
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
|
||||
unlink(pbuffer);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
if (code) {
|
||||
#ifndef OLD_URECOVERY
|
||||
unlink(pbuffer);
|
||||
#endif
|
||||
ubik_dbase->version.epoch = 0;
|
||||
ubik_dbase->version.counter = 0;
|
||||
ubik_print("Ubik: Synchronize database failed (error = %d)\n",
|
||||
@ -618,11 +668,7 @@ urecovery_Interact(void)
|
||||
}
|
||||
udisk_Invalidate(ubik_dbase, 0); /* data has changed */
|
||||
LWP_NoYieldSignal(&ubik_dbase->version);
|
||||
#if defined(UBIK_PAUSE)
|
||||
DBRELE(ubik_dbase);
|
||||
#else
|
||||
ReleaseWriteLock(&ubik_dbase->versionLock);
|
||||
#endif /* UBIK_PAUSE */
|
||||
}
|
||||
#if defined(UBIK_PAUSE)
|
||||
if (!(urecovery_state & UBIK_RECSYNCSITE))
|
||||
@ -637,11 +683,7 @@ urecovery_Interact(void)
|
||||
* database and overwrite this one.
|
||||
*/
|
||||
if (ubik_dbase->version.epoch == 1) {
|
||||
#if defined(UBIK_PAUSE)
|
||||
DBHOLD(ubik_dbase);
|
||||
#else
|
||||
ObtainWriteLock(&ubik_dbase->versionLock);
|
||||
#endif /* UBIK_PAUSE */
|
||||
urecovery_AbortAll(ubik_dbase);
|
||||
ubik_epochTime = 2;
|
||||
ubik_dbase->version.epoch = ubik_epochTime;
|
||||
@ -650,11 +692,7 @@ urecovery_Interact(void)
|
||||
(*ubik_dbase->setlabel) (ubik_dbase, 0, &ubik_dbase->version);
|
||||
udisk_Invalidate(ubik_dbase, 0); /* data may have changed */
|
||||
LWP_NoYieldSignal(&ubik_dbase->version);
|
||||
#if defined(UBIK_PAUSE)
|
||||
DBRELE(ubik_dbase);
|
||||
#else
|
||||
ReleaseWriteLock(&ubik_dbase->versionLock);
|
||||
#endif /* UBIK_PAUSE */
|
||||
}
|
||||
|
||||
/* Check the other sites and send the database to them if they
|
||||
@ -664,11 +702,7 @@ urecovery_Interact(void)
|
||||
/* now propagate out new version to everyone else */
|
||||
dbok = 1; /* start off assuming they all worked */
|
||||
|
||||
#if defined(UBIK_PAUSE)
|
||||
DBHOLD(ubik_dbase);
|
||||
#else
|
||||
ObtainWriteLock(&ubik_dbase->versionLock);
|
||||
#endif /* UBIK_PAUSE */
|
||||
/*
|
||||
* Check if a write transaction is in progress. We can't send the
|
||||
* db when a write is in progress here because the db would be
|
||||
@ -684,20 +718,12 @@ urecovery_Interact(void)
|
||||
tv.tv_sec = 0;
|
||||
tv.tv_usec = 50000;
|
||||
while ((ubik_dbase->flags & DBWRITING) && (safety < 500)) {
|
||||
#if defined(UBIK_PAUSE)
|
||||
DBRELE(ubik_dbase);
|
||||
#else
|
||||
ReleaseWriteLock(&ubik_dbase->versionLock);
|
||||
#endif /* UBIK_PAUSE */
|
||||
/* sleep for a little while */
|
||||
IOMGR_Select(0, 0, 0, 0, &tv);
|
||||
tv.tv_usec += 10000;
|
||||
safety++;
|
||||
#if defined(UBIK_PAUSE)
|
||||
DBHOLD(ubik_dbase);
|
||||
#else
|
||||
ObtainWriteLock(&ubik_dbase->versionLock);
|
||||
#endif /* UBIK_PAUSE */
|
||||
}
|
||||
}
|
||||
|
||||
@ -764,11 +790,7 @@ urecovery_Interact(void)
|
||||
ts->currentDB = 1;
|
||||
}
|
||||
}
|
||||
#if defined(UBIK_PAUSE)
|
||||
DBRELE(ubik_dbase);
|
||||
#else
|
||||
ReleaseWriteLock(&ubik_dbase->versionLock);
|
||||
#endif /* UBIK_PAUSE */
|
||||
if (dbok)
|
||||
urecovery_state |= UBIK_RECSENTDB;
|
||||
}
|
||||
|
@ -16,6 +16,7 @@ RCSID
|
||||
#include <sys/types.h>
|
||||
#ifdef AFS_NT40_ENV
|
||||
#include <winsock2.h>
|
||||
#include <fcntl.h>
|
||||
#else
|
||||
#include <sys/file.h>
|
||||
#include <netinet/in.h>
|
||||
@ -24,6 +25,7 @@ RCSID
|
||||
#include <lock.h>
|
||||
#include <rx/xdr.h>
|
||||
#include <rx/rx.h>
|
||||
#include <errno.h>
|
||||
#include <afs/afsutil.h>
|
||||
|
||||
#define UBIK_INTERNALS
|
||||
@ -488,13 +490,17 @@ SDISK_SendFile(rxcall, file, length, avers)
|
||||
{
|
||||
register afs_int32 code;
|
||||
register struct ubik_dbase *dbase;
|
||||
char tbuffer[256];
|
||||
char tbuffer[1024];
|
||||
afs_int32 offset;
|
||||
struct ubik_version tversion;
|
||||
register int tlen;
|
||||
struct rx_peer *tpeer;
|
||||
struct rx_connection *tconn;
|
||||
afs_uint32 otherHost;
|
||||
#ifndef OLD_URECOVERY
|
||||
char pbuffer[1028];
|
||||
int flen, fd = -1;
|
||||
#endif
|
||||
|
||||
/* send the file back to the requester */
|
||||
|
||||
@ -532,10 +538,25 @@ SDISK_SendFile(rxcall, file, length, avers)
|
||||
afs_inet_ntoa(otherHost));
|
||||
|
||||
offset = 0;
|
||||
#ifdef OLD_URECOVERY
|
||||
(*dbase->truncate) (dbase, file, 0); /* truncate first */
|
||||
tversion.epoch = 0; /* start off by labelling in-transit db as invalid */
|
||||
tversion.counter = 0;
|
||||
(*dbase->setlabel) (dbase, file, &tversion); /* setlabel does sync */
|
||||
#else
|
||||
flen = length;
|
||||
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
|
||||
fd = open(pbuffer, O_CREAT | O_RDWR | O_TRUNC, 0600);
|
||||
if (fd < 0) {
|
||||
code = errno;
|
||||
goto failed;
|
||||
}
|
||||
code = lseek(fd, HDRSIZE, 0);
|
||||
if (code != HDRSIZE) {
|
||||
close(fd);
|
||||
goto failed;
|
||||
}
|
||||
#endif
|
||||
memcpy(&ubik_dbase->version, &tversion, sizeof(struct ubik_version));
|
||||
while (length > 0) {
|
||||
tlen = (length > sizeof(tbuffer) ? sizeof(tbuffer) : length);
|
||||
@ -544,29 +565,63 @@ SDISK_SendFile(rxcall, file, length, avers)
|
||||
DBRELE(dbase);
|
||||
ubik_dprint("Rx-read length error=%d\n", code);
|
||||
code = BULK_ERROR;
|
||||
close(fd);
|
||||
goto failed;
|
||||
}
|
||||
#ifdef OLD_URECOVERY
|
||||
code = (*dbase->write) (dbase, file, tbuffer, offset, tlen);
|
||||
#else
|
||||
code = write(fd, tbuffer, tlen);
|
||||
#endif
|
||||
if (code != tlen) {
|
||||
DBRELE(dbase);
|
||||
ubik_dprint("write failed error=%d\n", code);
|
||||
code = UIOERROR;
|
||||
close(fd);
|
||||
goto failed;
|
||||
}
|
||||
offset += tlen;
|
||||
length -= tlen;
|
||||
}
|
||||
#ifndef OLD_URECOVERY
|
||||
code = close(fd);
|
||||
if (code)
|
||||
goto failed;
|
||||
#endif
|
||||
|
||||
/* sync data first, then write label and resync (resync done by setlabel call).
|
||||
* This way, good label is only on good database. */
|
||||
#ifdef OLD_URECOVERY
|
||||
(*ubik_dbase->sync) (dbase, file);
|
||||
#else
|
||||
afs_snprintf(tbuffer, sizeof(tbuffer), "%s.DB0", ubik_dbase->pathName);
|
||||
#ifdef AFS_NT40_ENV
|
||||
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
|
||||
code = unlink(pbuffer);
|
||||
if (!code)
|
||||
code = rename(tbuffer, pbuffer);
|
||||
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.TMP", ubik_dbase->pathName);
|
||||
#endif
|
||||
if (!code)
|
||||
code = rename(pbuffer, tbuffer);
|
||||
if (!code)
|
||||
#endif
|
||||
code = (*ubik_dbase->setlabel) (dbase, file, avers);
|
||||
#ifndef OLD_URECOVERY
|
||||
#ifdef AFS_NT40_ENV
|
||||
afs_snprintf(pbuffer, sizeof(pbuffer), "%s.DB0.OLD", ubik_dbase->pathName);
|
||||
unlink(pbuffer);
|
||||
#endif
|
||||
#endif
|
||||
memcpy(&ubik_dbase->version, avers, sizeof(struct ubik_version));
|
||||
udisk_Invalidate(dbase, file); /* new dbase, flush disk buffers */
|
||||
LWP_NoYieldSignal(&dbase->version);
|
||||
DBRELE(dbase);
|
||||
failed:
|
||||
if (code) {
|
||||
#ifndef OLD_URECOVERY
|
||||
unlink(pbuffer);
|
||||
#endif
|
||||
ubik_print
|
||||
("Ubik: Synchronize database with server %s failed (error = %d)\n",
|
||||
afs_inet_ntoa(otherHost), code);
|
||||
|
Loading…
x
Reference in New Issue
Block a user