MFC 287442,287537,288944:

Fix corruption of coredumps due to procstat notes changing size during
coredump generation.  The changes in r287442 required some reworking
since the 'fo_fill_kinfo' file op does not exist in stable/10.

287442:
Detect badly behaved coredump note helpers

Coredump notes depend on being able to invoke dump routines twice; once
in a dry-run mode to get the size of the note, and another to actually
emit the note to the corefile.

When a note helper emits a different length section the second time
around than the length it requested the first time, the kernel produces
a corrupt coredump.

NT_PROCSTAT_FILES output length, when packing kinfo structs, is tied to
the length of filenames corresponding to vnodes in the process' fd table
via vn_fullpath.  As vnodes may move around during dump, this is racy.

So:

 - Detect badly behaved notes in putnote() and pad underfilled notes.

 - Add a fail point, debug.fail_point.fill_kinfo_vnode__random_path to
   exercise the NT_PROCSTAT_FILES corruption.  It simply picks random
   lengths to expand or truncate paths to in fo_fill_kinfo_vnode().

 - Add a sysctl, kern.coredump_pack_fileinfo, to allow users to
   disable kinfo packing for PROCSTAT_FILES notes.  This should avoid
   both FILES note corruption and truncation, even if filenames change,
   at the cost of about 1 kiB in padding bloat per open fd.  Document
   the new sysctl in core.5.

 - Fix note_procstat_files to self-limit in the 2nd pass.  Since
   sometimes this will result in a short write, pad up to our advertised
   size.  This addresses note corruption, at the risk of sometimes
   truncating the last several fd info entries.

 - Fix NT_PROCSTAT_FILES consumers libutil and libprocstat to grok the
   zero padding.

287537:
Follow-up to r287442: Move sysctl to compiled-once file

Avoid duplicate sysctl nodes.

288944:
Fix core corruption caused by race in note_procstat_vmmap

This fix is spiritually similar to r287442 and was discovered thanks to
the KASSERT added in that revision.

NT_PROCSTAT_VMMAP output length, when packing kinfo structs, is tied to
the length of filenames corresponding to vnodes in the process' vm map
via vn_fullpath.  As vnodes may move during coredump, this is racy.

We do not remove the race, only prevent it from causing coredump
corruption.

- Add a sysctl, kern.coredump_pack_vmmapinfo, to allow users to disable
  kinfo packing for PROCSTAT_VMMAP notes.  This avoids VMMAP corruption
  and truncation, even if names change, at the cost of up to PATH_MAX
  bytes per mapped object.  The new sysctl is documented in core.5.

- Fix note_procstat_vmmap to self-limit in the second pass.  This
  addresses corruption, at the cost of sometimes producing a truncated
  result.

- Fix PROCSTAT_VMMAP consumers libutil (and libprocstat, via copy-paste)
  to grok the new zero padding.

Approved by:	re (gjb)
This commit is contained in:
John Baldwin 2016-02-10 00:08:51 +00:00
parent e4af2f2182
commit 78e6be6e43
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/stable/10/; revision=295454
10 changed files with 181 additions and 24 deletions

View File

@ -767,6 +767,8 @@ kinfo_getfile_core(struct procstat_core *core, int *cntp)
eb = buf + len;
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
if (kf->kf_structsize == 0)
break;
bp += kf->kf_structsize;
cnt++;
}
@ -782,6 +784,8 @@ kinfo_getfile_core(struct procstat_core *core, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
if (kf->kf_structsize == 0)
break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kf, kf->kf_structsize);
/* Advance to next packed record */
@ -1863,6 +1867,8 @@ kinfo_getvmmap_core(struct procstat_core *core, int *cntp)
eb = buf + len;
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
if (kv->kve_structsize == 0)
break;
bp += kv->kve_structsize;
cnt++;
}
@ -1878,6 +1884,8 @@ kinfo_getvmmap_core(struct procstat_core *core, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
if (kv->kve_structsize == 0)
break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kv, kv->kve_structsize);
/* Advance to next packed record */

View File

@ -44,6 +44,8 @@ kinfo_getfile(pid_t pid, int *cntp)
eb = buf + len;
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
if (kf->kf_structsize == 0)
break;
bp += kf->kf_structsize;
cnt++;
}
@ -59,6 +61,8 @@ kinfo_getfile(pid_t pid, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kf = (struct kinfo_file *)(uintptr_t)bp;
if (kf->kf_structsize == 0)
break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kf, kf->kf_structsize);
/* Advance to next packed record */

View File

@ -44,6 +44,8 @@ kinfo_getvmmap(pid_t pid, int *cntp)
eb = buf + len;
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
if (kv->kve_structsize == 0)
break;
bp += kv->kve_structsize;
cnt++;
}
@ -59,6 +61,8 @@ kinfo_getvmmap(pid_t pid, int *cntp)
/* Pass 2: unpack */
while (bp < eb) {
kv = (struct kinfo_vmentry *)(uintptr_t)bp;
if (kv->kve_structsize == 0)
break;
/* Copy/expand into pre-zeroed buffer */
memcpy(kp, kv, kv->kve_structsize);
/* Advance to next packed record */

View File

@ -32,7 +32,7 @@
.\" @(#)core.5 8.3 (Berkeley) 12/11/93
.\" $FreeBSD$
.\"
.Dd November 22, 2012
.Dd October 5, 2015
.Dt CORE 5
.Os
.Sh NAME
@ -126,6 +126,29 @@ Core files will have the suffix
.Em .gz
appended to them.
.El
.Sh NOTES
Corefiles are written with open file descriptor information as an ELF note.
By default, file paths are packed to only use as much space as needed.
However, file paths can change at any time, including during core dump,
and this can result in truncated file descriptor data.
.Pp
All file descriptor information can be preserved by disabling packing.
This potentially wastes up to PATH_MAX bytes per open fd.
Packing is disabled with
.Dl sysctl kern.coredump_pack_fileinfo=0 .
.Pp
Similarly, corefiles are written with vmmap information as an ELF note, which
contains file paths.
By default, they are packed to only use as much space as
needed.
By the same mechanism as for the open files note, these paths can also
change at any time and result in a truncated note.
.Pp
All vmmap information can be preserved by disabling packing.
Like the file information, this potentially wastes up to PATH_MAX bytes per
mapped object.
Packing is disabled with
.Dl sysctl kern.coredump_pack_vmmapinfo=0 .
.Sh EXAMPLES
In order to store all core images in per-user private areas under
.Pa /var/coredumps ,

View File

@ -1709,7 +1709,8 @@ static void
__elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
{
Elf_Note note;
ssize_t old_len;
ssize_t old_len, sect_len;
size_t new_len, descsz, i;
if (ninfo->type == -1) {
ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
@ -1728,7 +1729,33 @@ __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb)
return;
sbuf_start_section(sb, &old_len);
ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize);
sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0);
if (sect_len < 0)
return;
new_len = (size_t)sect_len;
descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE);
if (new_len < descsz) {
/*
* It is expected that individual note emitters will correctly
* predict their expected output size and fill up to that size
* themselves, padding in a format-specific way if needed.
* However, in case they don't, just do it here with zeros.
*/
for (i = 0; i < descsz - new_len; i++)
sbuf_putc(sb, 0);
} else if (new_len > descsz) {
/*
* We can't always truncate sb -- we may have drained some
* of it already.
*/
KASSERT(new_len == descsz, ("%s: Note type %u changed as we "
"read it (%zu > %zu). Since it is longer than "
"expected, this coredump's notes are corrupt. THIS "
"IS A BUG in the note_procstat routine for type %u.\n",
__func__, (unsigned)note.n_type, new_len, descsz,
(unsigned)note.n_type));
}
}
/*
@ -1909,25 +1936,47 @@ static void
note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep)
{
struct proc *p;
size_t size;
int structsize;
size_t size, sect_sz, i;
ssize_t start_len, sect_len;
int structsize, filedesc_flags;
if (coredump_pack_fileinfo)
filedesc_flags = KERN_FILEDESC_PACK_KINFO;
else
filedesc_flags = 0;
p = (struct proc *)arg;
structsize = sizeof(struct kinfo_file);
if (sb == NULL) {
size = 0;
sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
sbuf_set_drain(sb, sbuf_drain_count, &size);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
kern_proc_filedesc_out(p, sb, -1);
kern_proc_filedesc_out(p, sb, -1, filedesc_flags);
sbuf_finish(sb);
sbuf_delete(sb);
*sizep = size;
} else {
structsize = sizeof(struct kinfo_file);
sbuf_start_section(sb, &start_len);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
kern_proc_filedesc_out(p, sb, -1);
kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize),
filedesc_flags);
sect_len = sbuf_end_section(sb, start_len, 0, 0);
if (sect_len < 0)
return;
sect_sz = sect_len;
KASSERT(sect_sz <= *sizep,
("kern_proc_filedesc_out did not respect maxlen; "
"requested %zu, got %zu", *sizep - sizeof(structsize),
sect_sz - sizeof(structsize)));
for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++)
sbuf_putc(sb, 0);
}
}
@ -1940,24 +1989,30 @@ note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep)
{
struct proc *p;
size_t size;
int structsize;
int structsize, vmmap_flags;
if (coredump_pack_vmmapinfo)
vmmap_flags = KERN_VMMAP_PACK_KINFO;
else
vmmap_flags = 0;
p = (struct proc *)arg;
structsize = sizeof(struct kinfo_vmentry);
if (sb == NULL) {
size = 0;
sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN);
sbuf_set_drain(sb, sbuf_drain_count, &size);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
kern_proc_vmmap_out(p, sb);
kern_proc_vmmap_out(p, sb, -1, vmmap_flags);
sbuf_finish(sb);
sbuf_delete(sb);
*sizep = size;
} else {
structsize = sizeof(struct kinfo_vmentry);
sbuf_bcat(sb, &structsize, sizeof(structsize));
PROC_LOCK(p);
kern_proc_vmmap_out(p, sb);
kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize),
vmmap_flags);
}
}

View File

@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/capsicum.h>
#include <sys/conf.h>
#include <sys/domain.h>
#include <sys/fail.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
@ -3299,6 +3300,7 @@ struct export_fd_buf {
struct sbuf *sb;
ssize_t remainder;
struct kinfo_file kif;
int flags;
};
static int
@ -3385,9 +3387,12 @@ export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt,
kif->kf_type = type;
kif->kf_ref_count = refcnt;
kif->kf_offset = offset;
/* Pack record size down */
kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
strlen(kif->kf_path) + 1;
if ((efbuf->flags & KERN_FILEDESC_PACK_KINFO) != 0)
/* Pack record size down */
kif->kf_structsize = offsetof(struct kinfo_file, kf_path) +
strlen(kif->kf_path) + 1;
else
kif->kf_structsize = sizeof(*kif);
kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t));
if (efbuf->remainder != -1) {
if (efbuf->remainder < kif->kf_structsize) {
@ -3413,7 +3418,8 @@ export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt,
* Takes a locked proc as argument, and returns with the proc unlocked.
*/
int
kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
int flags)
{
struct file *fp;
struct filedesc *fdp;
@ -3448,6 +3454,7 @@ kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen)
efbuf->fdp = NULL;
efbuf->sb = sb;
efbuf->remainder = maxlen;
efbuf->flags = flags;
if (tracevp != NULL)
export_fd_to_sb(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE,
FREAD | FWRITE, -1, -1, NULL, efbuf);
@ -3597,7 +3604,8 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS)
return (error);
}
maxlen = req->oldptr != NULL ? req->oldlen : -1;
error = kern_proc_filedesc_out(p, &sb, maxlen);
error = kern_proc_filedesc_out(p, &sb, maxlen,
KERN_FILEDESC_PACK_KINFO);
error2 = sbuf_finish(&sb);
sbuf_delete(&sb);
return (error != 0 ? error : error2);
@ -3635,6 +3643,24 @@ vntype_to_kinfo(int vtype)
return (KF_VTYPE_UNKNOWN);
}
static inline void
vn_fill_junk(struct kinfo_file *kif)
{
size_t len, olen;
/*
* Simulate vn_fullpath returning changing values for a given
* vp during e.g. coredump.
*/
len = (arc4random() % (sizeof(kif->kf_path) - 2)) + 1;
olen = strlen(kif->kf_path);
if (len < olen)
strcpy(&kif->kf_path[len - 1], "$");
else
for (; olen < len; olen++)
strcpy(&kif->kf_path[olen], "A");
}
static int
fill_vnode_info(struct vnode *vp, struct kinfo_file *kif)
{
@ -3654,6 +3680,10 @@ fill_vnode_info(struct vnode *vp, struct kinfo_file *kif)
if (freepath != NULL)
free(freepath, M_TEMP);
KFAIL_POINT_CODE(DEBUG_FP, fill_kinfo_vnode__random_path,
vn_fill_junk(kif);
);
/*
* Retrieve vnode attributes.
*/

View File

@ -102,6 +102,16 @@ SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *");
MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments");
int coredump_pack_fileinfo = 1;
SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN,
&coredump_pack_fileinfo, 0,
"Enable file path packing in 'procstat -f' coredump notes");
int coredump_pack_vmmapinfo = 1;
SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN,
&coredump_pack_vmmapinfo, 0,
"Enable file path packing in 'procstat -v' coredump notes");
static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS);
static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS);
static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS);

View File

@ -2228,7 +2228,7 @@ next:;
* Must be called with the process locked and will return unlocked.
*/
int
kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags)
{
vm_map_entry_t entry, tmp_entry;
struct vattr va;
@ -2252,7 +2252,7 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
PRELE(p);
return (ESRCH);
}
kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK);
kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO);
error = 0;
map = &vm->vm_map;
@ -2387,10 +2387,23 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb)
free(freepath, M_TEMP);
/* Pack record size down */
kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) +
strlen(kve->kve_path) + 1;
if ((flags & KERN_VMMAP_PACK_KINFO) != 0)
kve->kve_structsize =
offsetof(struct kinfo_vmentry, kve_path) +
strlen(kve->kve_path) + 1;
else
kve->kve_structsize = sizeof(*kve);
kve->kve_structsize = roundup(kve->kve_structsize,
sizeof(uint64_t));
/* Halt filling and truncate rather than exceeding maxlen */
if (maxlen != -1 && maxlen < kve->kve_structsize) {
error = 0;
vm_map_lock_read(map);
break;
} else if (maxlen != -1)
maxlen -= kve->kve_structsize;
if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0)
error = ENOMEM;
vm_map_lock_read(map);
@ -2422,7 +2435,7 @@ sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS)
sbuf_delete(&sb);
return (error);
}
error = kern_proc_vmmap_out(p, &sb);
error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO);
error2 = sbuf_finish(&sb);
sbuf_delete(&sb);
return (error != 0 ? error : error2);

View File

@ -83,6 +83,9 @@ void exec_unmap_first_page(struct image_params *);
int exec_register(const struct execsw *);
int exec_unregister(const struct execsw *);
extern int coredump_pack_fileinfo;
extern int coredump_pack_vmmapinfo;
/*
* note: name##_mod cannot be const storage because the
* linker_file_sysinit() function modifies _file in the

View File

@ -536,6 +536,11 @@ struct kinfo_sigtramp {
#define KERN_PROC_NOTHREADS 0x1
#define KERN_PROC_MASK32 0x2
/* Flags for kern_proc_filedesc_out. */
#define KERN_FILEDESC_PACK_KINFO 0x00000001U
/* Flags for kern_proc_vmmap_out. */
#define KERN_VMMAP_PACK_KINFO 0x00000001U
struct sbuf;
/*
@ -547,9 +552,11 @@ struct sbuf;
* to be locked on enter. On return the process is unlocked.
*/
int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen);
int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
int flags);
int kern_proc_out(struct proc *p, struct sbuf *sb, int flags);
int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb);
int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen,
int flags);
int vntype_to_kinfo(int vtype);
#endif /* !_KERNEL */