volser: Don't NUL-pad failed pread()s in dumps

Currently, the volserver SAFSVolDump RPC and the 'voldump' utility handle short reads from pread() for vnode payloads by padding the missing data with NUL bytes. That is, if we request 4k of data for our pread() call, and we only get back 1k of data, we'll write 1k of data to the volume dump stream followed by 3k of NUL bytes, and log messages like this: 1 Volser: DumpFile: Error reading inode 1234 for vnode 5678 1 Volser: DumpFile: Null padding file: 3072 bytes at offset 40960 This can happen if we hit EOF on the underlying file sooner than expected, or if the OS just responds with fewer bytes than requested for any reason. The same code path tries to do the same NUL-padding if pread() returns an error (for example, EIO), padding the entire e.g. 4k block with NULs. However, in this case, the "padding" code often doesn't work as intended, because we compare 'n' (set to -1) with 'howMany' (set to 4k in this example), like so: if (n < howMany) Here, 'n' is signed (ssize_t), and 'howMany' is unsigned (size_t), and so compilers will promote 'n' to the unsigned type, causing this conditional to fail when n is -1. As a result, all of the relevant log messages are skipped, and the data in the dumpstream gets corrupted (we skip a block of data, and our 'howFar' offset goes back by 1). So this can result in rare silent data corruption in volume dumps, which can occur during volume releases, moves, etc. To fix all of this, remove this bizarre NUL-padding behavior in the volserver. Instead: - For actual errors from pread(), return an error, like we do for I/O errors in most other code paths. - For short reads, just write out the amount of data we actually read, and keep going. - For premature EOF, treat it like a pread() error, but log a slightly different message. For the 'voldump' utility, the padding behavior can make sense if a user is trying to recover volume data offline in a disaster recovery scenario. So for voldump, add a new switch (-pad-errors) to enable the padding behavior, but change the default behavior to bail out on errors. Change-Id: Ibd6e76c5ea0dea95e3354d9b34536296f81b4f67 Reviewed-on: https://gerrit.openafs.org/14255 Tested-by: BuildBot <buildbot@rampaginggeek.com> Reviewed-by: Cheyenne Wills <cwills@sinenomine.net> Reviewed-by: Benjamin Kaduk <kaduk@mit.edu>
2025-01-18 06:50:12 +00:00 · 2020-06-22 22:54:52 -05:00 · 2020-06-22 22:54:52 -05:00 · 4498bd8179
commit 4498bd8179
parent 37b55b30c6
3 changed files with 80 additions and 84 deletions
--- a/doc/man-pages/pod8/voldump.pod
+++ b/doc/man-pages/pod8/voldump.pod
@ -9,7 +9,7 @@ voldump - Dump an AFS volume without using the Volume Server

 B<voldump> S<<< B<-part> <I<partition>> >>> S<<< B<-volumeid> <I<volume id>> >>>
    S<<< [B<-file> <I<dump file>>] >>> [B<-time> <I<dump from time>>]
-    [B<-verbose>] [B<-help>]
+    [B<-pad-errors>] [B<-verbose>] [B<-help>]

 B<voldump> S<<< B<-p> <I<partition>> >>> S<<< B<-vo> <I<volume id>> >>>
    S<<< [B<-f> <I<dump file>>] >>> [B<-time> <I<dump from time>>]
@ -70,6 +70,18 @@ volume will be dumped to standard output.
 Specifies whether the dump is full or incremental. Omit this argument to create
 a full dump, or provide one of the valid values listed in L<vos_dump(1)>.

+=item B<-pad-errors>
+
+When reading vnode data from disk, if B<voldump> encounters an I/O error or
+unexpected EOF, by default B<voldump> will print an error and exit. If
+B<-pad-errors> is given, instead B<voldump> will pad the unreadable region with
+NUL bytes, and continue with the dump.
+
+This option may be useful when trying to extract data from volumes where the
+underlying disk is failing, or the volume data is corrupted. Data may be
+missing from files in the volume in such cases (replaced by NUL bytes), but at
+least some data may be extracted.
+
 =item B<-verbose>

 Asks for a verbose trace of the dump process.  This trace information will
--- a/src/volser/dumpstuff.c
+++ b/src/volser/dumpstuff.c
@ -704,10 +704,8 @@ static int
 DumpFile(struct iod *iodp, int vnode, FdHandle_t * handleP)
 {
    int code = 0, error = 0;
-    afs_int32 pad = 0;
-    afs_foff_t offset = 0;
    afs_sfsize_t nbytes, howBig;
-    ssize_t n;
+    ssize_t n = 0;
    size_t howMany;
    afs_foff_t howFar = 0;
    byte *p;
@ -776,62 +774,38 @@ DumpFile(struct iod *iodp, int vnode, FdHandle_t * handleP)
 	return VOLSERDUMPERROR;
    }

-    for (nbytes = howBig; (nbytes && !error); nbytes -= howMany) {
+    for (nbytes = howBig; (nbytes && !error); ) {
 	if (nbytes < howMany)
 	    howMany = nbytes;

 	/* Read the data */
 	n = FDH_PREAD(handleP, p, howMany, howFar);
+	if (n < 0) {
+	    Log("1 Volser: DumpFile: Error reading inode %s for vnode %d: %s\n",
+		PrintInode(stmp, handleP->fd_ih->ih_ino), vnode,
+		afs_error_message(errno));
+	    error = VOLSERDUMPERROR;
+
+	} else if (n == 0) {
+	    Log("1 Volser: DumpFile: Premature EOF reading inode %s for vnode %d\n",
+		PrintInode(stmp, handleP->fd_ih->ih_ino), vnode);
+	    error = VOLSERDUMPERROR;
+	}
+	if (error != 0) {
+	    break;
+	}
+
 	howFar += n;
-
-	/* If read any good data and we null padded previously, log the
-	 * amount that we had null padded.
-	 */
-	if ((n > 0) && pad) {
-	    Log("1 Volser: DumpFile: Null padding file %d bytes at offset %lld\n", pad, (long long)offset);
-	    pad = 0;
-	}
-
-	/* If didn't read enough data, null padd the rest of the buffer. This
-	 * can happen if, for instance, the media has some bad spots. We don't
-	 * want to quit the dump, so we start null padding.
-	 */
-	if (n < howMany) {
-	    /* Record the read error */
-	    if (n < 0) {
-		n = 0;
-		Log("1 Volser: DumpFile: Error reading inode %s for vnode %d: %s\n", PrintInode(stmp, handleP->fd_ih->ih_ino), vnode, afs_error_message(errno));
-	    } else if (!pad) {
-		Log("1 Volser: DumpFile: Error reading inode %s for vnode %d\n", PrintInode(stmp, handleP->fd_ih->ih_ino), vnode);
-	    }
-
-	    /* Pad the rest of the buffer with zeros. Remember offset we started
-	     * padding. Keep total tally of padding.
-	     */
-	    memset(p + n, 0, howMany - n);
-	    if (!pad)
-		offset = (howBig - nbytes) + n;
-	    pad += (howMany - n);
-
-	    /* Now seek over the data we could not get. An error here means we
-	     * can't do the next read.
-	     */
-	    howFar = (size_t)((howBig - nbytes) + howMany);
-	}
+	nbytes -= n;

 	/* Now write the data out */
-	if (iod_Write(iodp, (char *)p, howMany) != howMany)
+	if (iod_Write(iodp, (char *)p, n) != n)
 	    error = VOLSERDUMPERROR;
 #ifndef AFS_PTHREAD_ENV
 	IOMGR_Poll();
 #endif
    }

-    if (pad) {			/* Any padding we hadn't reported yet */
-	Log("1 Volser: DumpFile: Null padding file: %d bytes at offset %lld\n",
-	    pad, (long long)offset);
-    }
-
    free(p);
    return error;
 }
--- a/src/volser/vol-dump.c
+++ b/src/volser/vol-dump.c
@ -53,6 +53,7 @@

 int VolumeChanged;		/* needed by physio - leave alone */
 int verbose = 0;
+static int enable_padding; /* Pad errors with NUL bytes */

 /* Forward Declarations */
 static void HandleVolume(struct DiskPartition64 *partP, char *name,
@ -161,6 +162,9 @@ handleit(struct cmd_syndesc *as, void *arock)
 		return code;
 	}
    }
+    if (as->parms[5].items != NULL) { /* -pad-errors */
+	enable_padding = 1;
+    }

    DInit(10);

@ -271,6 +275,8 @@ main(int argc, char **argv)
    cmd_AddParm(ts, "-verbose", CMD_FLAG, CMD_OPTIONAL,
 		"Trace dump progress (very verbose)");
    cmd_AddParm(ts, "-time", CMD_SINGLE, CMD_OPTIONAL, "dump from time");
+    cmd_AddParm(ts, "-pad-errors", CMD_FLAG, CMD_OPTIONAL,
+		"pad i/o errors with NUL bytes");
    code = cmd_Dispatch(argc, argv);
    return code;
 }
@ -527,11 +533,11 @@ DumpByteString(int dumpfd, char tag, byte * bs, int nbytes)
 static int
 DumpFile(int dumpfd, int vnode, FdHandle_t * handleP,  struct VnodeDiskObject *v)
 {
-    int code = 0, failed_seek = 0, failed_write = 0;
+    int code = 0;
    afs_int32 pad = 0;
    afs_foff_t offset = 0;
    afs_sfsize_t nbytes, howBig;
-    ssize_t n;
+    ssize_t n = 0;
    size_t howMany;
    afs_foff_t howFar = 0;
    byte *p;
@ -600,13 +606,11 @@ DumpFile(int dumpfd, int vnode, FdHandle_t * handleP,  struct VnodeDiskObject *v
    }

    /* loop through whole file, while we still have bytes left, and no errors, in chunks of howMany bytes */
-    for (nbytes = size; (nbytes && !failed_write); nbytes -= howMany) {
+    for (nbytes = size; (nbytes && !code); ) {
 	if (nbytes < howMany)
 	    howMany = nbytes;

-	/* Read the data - unless we know we can't */
-	n = (failed_seek ? 0 : FDH_PREAD(handleP, p, howMany, howFar));
-	howFar += n;
+	n = FDH_PREAD(handleP, p, howMany, howFar);

 	/* If read any good data and we null padded previously, log the
 	 * amount that we had null padded.
@ -617,42 +621,48 @@ DumpFile(int dumpfd, int vnode, FdHandle_t * handleP,  struct VnodeDiskObject *v
 	    pad = 0;
 	}

-	/* If didn't read enough data, null padd the rest of the buffer. This
-	 * can happen if, for instance, the media has some bad spots. We don't
-	 * want to quit the dump, so we start null padding.
-	 */
-	if (n < howMany) {
-
-		if (verbose) fprintf(stderr, "  read %u instead of %u bytes.\n", (unsigned)n, (unsigned)howMany);
-
-	    /* Record the read error */
-	    if (n < 0) {
-		n = 0;
-		fprintf(stderr, "Error %d reading inode %s for vnode %d\n",
-			errno, PrintInode(stmp, handleP->fd_ih->ih_ino),
-			vnode);
-	    } else if (!pad) {
-		fprintf(stderr, "Error reading inode %s for vnode %d\n",
+	if (n < 0) {
+	    fprintf(stderr, "Error %d reading inode %s for vnode %d\n",
+		    errno, PrintInode(stmp, handleP->fd_ih->ih_ino),
+		    vnode);
+	    code = VOLSERDUMPERROR;
+	}
+	if (n == 0) {
+	    if (pad == 0) {
+		fprintf(stderr, "Unexpected EOF reading inode %s for vnode %d\n",
 			PrintInode(stmp, handleP->fd_ih->ih_ino), vnode);
 	    }
-
-	    /* Pad the rest of the buffer with zeros. Remember offset we started
-	     * padding. Keep total tally of padding.
-	     */
-	    memset(p + n, 0, howMany - n);
-	    if (!pad)
-		offset = (howBig - nbytes) + n;
-	    pad += (howMany - n);
-
-	    /* Now seek over the data we could not get. An error here means we
-	     * can't do the next read.
-	     */
-	    howFar = ((size - nbytes) + howMany);
+	    code = VOLSERDUMPERROR;
 	}

+	if (code != 0 && enable_padding) {
+	    /*
+	     * If our read failed, NUL-pad the rest of the buffer. This can
+	     * happen if, for instance, the media has some bad spots. We don't
+	     * want to quit the dump, so we start NUL padding.
+	     */
+	    memset(p, 0, howMany);
+
+	    /* Remember the offset where we started padding, and keep a total
+	     * tally of how much padding we've done. */
+	    if (!pad)
+		offset = howFar;
+	    pad += howMany;
+
+	    /* Pretend we read 'howMany' bytes. */
+	    n = howMany;
+	    code = 0;
+	}
+	if (code != 0) {
+	    break;
+	}
+
+	howFar += n;
+	nbytes -= n;
+
 	/* Now write the data out */
-	if (write(dumpfd, (char *)p, howMany) != howMany)
-	    failed_write = VOLSERDUMPERROR;
+	if (write(dumpfd, (char *)p, n) != n)
+	    code = VOLSERDUMPERROR;
    }

    if (pad) {			/* Any padding we hadn't reported yet */
@ -661,7 +671,7 @@ DumpFile(int dumpfd, int vnode, FdHandle_t * handleP,  struct VnodeDiskObject *v
    }

    free(p);
-    return failed_write;
+    return code;
 }