From 78e6be6e43440e498ea6fd69e8a477c15f368d1d Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Wed, 10 Feb 2016 00:08:51 +0000 Subject: [PATCH] MFC 287442,287537,288944: Fix corruption of coredumps due to procstat notes changing size during coredump generation. The changes in r287442 required some reworking since the 'fo_fill_kinfo' file op does not exist in stable/10. 287442: Detect badly behaved coredump note helpers Coredump notes depend on being able to invoke dump routines twice; once in a dry-run mode to get the size of the note, and another to actually emit the note to the corefile. When a note helper emits a different length section the second time around than the length it requested the first time, the kernel produces a corrupt coredump. NT_PROCSTAT_FILES output length, when packing kinfo structs, is tied to the length of filenames corresponding to vnodes in the process' fd table via vn_fullpath. As vnodes may move around during dump, this is racy. So: - Detect badly behaved notes in putnote() and pad underfilled notes. - Add a fail point, debug.fail_point.fill_kinfo_vnode__random_path to exercise the NT_PROCSTAT_FILES corruption. It simply picks random lengths to expand or truncate paths to in fo_fill_kinfo_vnode(). - Add a sysctl, kern.coredump_pack_fileinfo, to allow users to disable kinfo packing for PROCSTAT_FILES notes. This should avoid both FILES note corruption and truncation, even if filenames change, at the cost of about 1 kiB in padding bloat per open fd. Document the new sysctl in core.5. - Fix note_procstat_files to self-limit in the 2nd pass. Since sometimes this will result in a short write, pad up to our advertised size. This addresses note corruption, at the risk of sometimes truncating the last several fd info entries. - Fix NT_PROCSTAT_FILES consumers libutil and libprocstat to grok the zero padding. 287537: Follow-up to r287442: Move sysctl to compiled-once file Avoid duplicate sysctl nodes. 288944: Fix core corruption caused by race in note_procstat_vmmap This fix is spiritually similar to r287442 and was discovered thanks to the KASSERT added in that revision. NT_PROCSTAT_VMMAP output length, when packing kinfo structs, is tied to the length of filenames corresponding to vnodes in the process' vm map via vn_fullpath. As vnodes may move during coredump, this is racy. We do not remove the race, only prevent it from causing coredump corruption. - Add a sysctl, kern.coredump_pack_vmmapinfo, to allow users to disable kinfo packing for PROCSTAT_VMMAP notes. This avoids VMMAP corruption and truncation, even if names change, at the cost of up to PATH_MAX bytes per mapped object. The new sysctl is documented in core.5. - Fix note_procstat_vmmap to self-limit in the second pass. This addresses corruption, at the cost of sometimes producing a truncated result. - Fix PROCSTAT_VMMAP consumers libutil (and libprocstat, via copy-paste) to grok the new zero padding. Approved by: re (gjb) --- lib/libprocstat/libprocstat.c | 8 ++++ lib/libutil/kinfo_getfile.c | 4 ++ lib/libutil/kinfo_getvmmap.c | 4 ++ share/man/man5/core.5 | 25 +++++++++++- sys/kern/imgact_elf.c | 77 ++++++++++++++++++++++++++++++----- sys/kern/kern_descrip.c | 40 +++++++++++++++--- sys/kern/kern_exec.c | 10 +++++ sys/kern/kern_proc.c | 23 ++++++++--- sys/sys/exec.h | 3 ++ sys/sys/user.h | 11 ++++- 10 files changed, 181 insertions(+), 24 deletions(-) diff --git a/lib/libprocstat/libprocstat.c b/lib/libprocstat/libprocstat.c index 727419b71d7a..7b0722c2e6b5 100644 --- a/lib/libprocstat/libprocstat.c +++ b/lib/libprocstat/libprocstat.c @@ -767,6 +767,8 @@ kinfo_getfile_core(struct procstat_core *core, int *cntp) eb = buf + len; while (bp < eb) { kf = (struct kinfo_file *)(uintptr_t)bp; + if (kf->kf_structsize == 0) + break; bp += kf->kf_structsize; cnt++; } @@ -782,6 +784,8 @@ kinfo_getfile_core(struct procstat_core *core, int *cntp) /* Pass 2: unpack */ while (bp < eb) { kf = (struct kinfo_file *)(uintptr_t)bp; + if (kf->kf_structsize == 0) + break; /* Copy/expand into pre-zeroed buffer */ memcpy(kp, kf, kf->kf_structsize); /* Advance to next packed record */ @@ -1863,6 +1867,8 @@ kinfo_getvmmap_core(struct procstat_core *core, int *cntp) eb = buf + len; while (bp < eb) { kv = (struct kinfo_vmentry *)(uintptr_t)bp; + if (kv->kve_structsize == 0) + break; bp += kv->kve_structsize; cnt++; } @@ -1878,6 +1884,8 @@ kinfo_getvmmap_core(struct procstat_core *core, int *cntp) /* Pass 2: unpack */ while (bp < eb) { kv = (struct kinfo_vmentry *)(uintptr_t)bp; + if (kv->kve_structsize == 0) + break; /* Copy/expand into pre-zeroed buffer */ memcpy(kp, kv, kv->kve_structsize); /* Advance to next packed record */ diff --git a/lib/libutil/kinfo_getfile.c b/lib/libutil/kinfo_getfile.c index 84b64db20204..8a5477f40654 100644 --- a/lib/libutil/kinfo_getfile.c +++ b/lib/libutil/kinfo_getfile.c @@ -44,6 +44,8 @@ kinfo_getfile(pid_t pid, int *cntp) eb = buf + len; while (bp < eb) { kf = (struct kinfo_file *)(uintptr_t)bp; + if (kf->kf_structsize == 0) + break; bp += kf->kf_structsize; cnt++; } @@ -59,6 +61,8 @@ kinfo_getfile(pid_t pid, int *cntp) /* Pass 2: unpack */ while (bp < eb) { kf = (struct kinfo_file *)(uintptr_t)bp; + if (kf->kf_structsize == 0) + break; /* Copy/expand into pre-zeroed buffer */ memcpy(kp, kf, kf->kf_structsize); /* Advance to next packed record */ diff --git a/lib/libutil/kinfo_getvmmap.c b/lib/libutil/kinfo_getvmmap.c index 129aa036bd90..9d9e4273a110 100644 --- a/lib/libutil/kinfo_getvmmap.c +++ b/lib/libutil/kinfo_getvmmap.c @@ -44,6 +44,8 @@ kinfo_getvmmap(pid_t pid, int *cntp) eb = buf + len; while (bp < eb) { kv = (struct kinfo_vmentry *)(uintptr_t)bp; + if (kv->kve_structsize == 0) + break; bp += kv->kve_structsize; cnt++; } @@ -59,6 +61,8 @@ kinfo_getvmmap(pid_t pid, int *cntp) /* Pass 2: unpack */ while (bp < eb) { kv = (struct kinfo_vmentry *)(uintptr_t)bp; + if (kv->kve_structsize == 0) + break; /* Copy/expand into pre-zeroed buffer */ memcpy(kp, kv, kv->kve_structsize); /* Advance to next packed record */ diff --git a/share/man/man5/core.5 b/share/man/man5/core.5 index e8a165337666..a5fefc0c4358 100644 --- a/share/man/man5/core.5 +++ b/share/man/man5/core.5 @@ -32,7 +32,7 @@ .\" @(#)core.5 8.3 (Berkeley) 12/11/93 .\" $FreeBSD$ .\" -.Dd November 22, 2012 +.Dd October 5, 2015 .Dt CORE 5 .Os .Sh NAME @@ -126,6 +126,29 @@ Core files will have the suffix .Em .gz appended to them. .El +.Sh NOTES +Corefiles are written with open file descriptor information as an ELF note. +By default, file paths are packed to only use as much space as needed. +However, file paths can change at any time, including during core dump, +and this can result in truncated file descriptor data. +.Pp +All file descriptor information can be preserved by disabling packing. +This potentially wastes up to PATH_MAX bytes per open fd. +Packing is disabled with +.Dl sysctl kern.coredump_pack_fileinfo=0 . +.Pp +Similarly, corefiles are written with vmmap information as an ELF note, which +contains file paths. +By default, they are packed to only use as much space as +needed. +By the same mechanism as for the open files note, these paths can also +change at any time and result in a truncated note. +.Pp +All vmmap information can be preserved by disabling packing. +Like the file information, this potentially wastes up to PATH_MAX bytes per +mapped object. +Packing is disabled with +.Dl sysctl kern.coredump_pack_vmmapinfo=0 . .Sh EXAMPLES In order to store all core images in per-user private areas under .Pa /var/coredumps , diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index 394d61ffaafa..e3c08def846d 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -1709,7 +1709,8 @@ static void __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb) { Elf_Note note; - ssize_t old_len; + ssize_t old_len, sect_len; + size_t new_len, descsz, i; if (ninfo->type == -1) { ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); @@ -1728,7 +1729,33 @@ __elfN(putnote)(struct note_info *ninfo, struct sbuf *sb) return; sbuf_start_section(sb, &old_len); ninfo->outfunc(ninfo->outarg, sb, &ninfo->outsize); - sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); + sect_len = sbuf_end_section(sb, old_len, ELF_NOTE_ROUNDSIZE, 0); + if (sect_len < 0) + return; + + new_len = (size_t)sect_len; + descsz = roundup(note.n_descsz, ELF_NOTE_ROUNDSIZE); + if (new_len < descsz) { + /* + * It is expected that individual note emitters will correctly + * predict their expected output size and fill up to that size + * themselves, padding in a format-specific way if needed. + * However, in case they don't, just do it here with zeros. + */ + for (i = 0; i < descsz - new_len; i++) + sbuf_putc(sb, 0); + } else if (new_len > descsz) { + /* + * We can't always truncate sb -- we may have drained some + * of it already. + */ + KASSERT(new_len == descsz, ("%s: Note type %u changed as we " + "read it (%zu > %zu). Since it is longer than " + "expected, this coredump's notes are corrupt. THIS " + "IS A BUG in the note_procstat routine for type %u.\n", + __func__, (unsigned)note.n_type, new_len, descsz, + (unsigned)note.n_type)); + } } /* @@ -1909,25 +1936,47 @@ static void note_procstat_files(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; - size_t size; - int structsize; + size_t size, sect_sz, i; + ssize_t start_len, sect_len; + int structsize, filedesc_flags; + + if (coredump_pack_fileinfo) + filedesc_flags = KERN_FILEDESC_PACK_KINFO; + else + filedesc_flags = 0; p = (struct proc *)arg; + structsize = sizeof(struct kinfo_file); if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_count, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); - kern_proc_filedesc_out(p, sb, -1); + kern_proc_filedesc_out(p, sb, -1, filedesc_flags); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { - structsize = sizeof(struct kinfo_file); + sbuf_start_section(sb, &start_len); + sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); - kern_proc_filedesc_out(p, sb, -1); + kern_proc_filedesc_out(p, sb, *sizep - sizeof(structsize), + filedesc_flags); + + sect_len = sbuf_end_section(sb, start_len, 0, 0); + if (sect_len < 0) + return; + sect_sz = sect_len; + + KASSERT(sect_sz <= *sizep, + ("kern_proc_filedesc_out did not respect maxlen; " + "requested %zu, got %zu", *sizep - sizeof(structsize), + sect_sz - sizeof(structsize))); + + for (i = 0; i < *sizep - sect_sz && sb->s_error == 0; i++) + sbuf_putc(sb, 0); } } @@ -1940,24 +1989,30 @@ note_procstat_vmmap(void *arg, struct sbuf *sb, size_t *sizep) { struct proc *p; size_t size; - int structsize; + int structsize, vmmap_flags; + + if (coredump_pack_vmmapinfo) + vmmap_flags = KERN_VMMAP_PACK_KINFO; + else + vmmap_flags = 0; p = (struct proc *)arg; + structsize = sizeof(struct kinfo_vmentry); if (sb == NULL) { size = 0; sb = sbuf_new(NULL, NULL, 128, SBUF_FIXEDLEN); sbuf_set_drain(sb, sbuf_drain_count, &size); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); - kern_proc_vmmap_out(p, sb); + kern_proc_vmmap_out(p, sb, -1, vmmap_flags); sbuf_finish(sb); sbuf_delete(sb); *sizep = size; } else { - structsize = sizeof(struct kinfo_vmentry); sbuf_bcat(sb, &structsize, sizeof(structsize)); PROC_LOCK(p); - kern_proc_vmmap_out(p, sb); + kern_proc_vmmap_out(p, sb, *sizep - sizeof(structsize), + vmmap_flags); } } diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 4930e2f6e165..7561368ed3ba 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -3299,6 +3300,7 @@ struct export_fd_buf { struct sbuf *sb; ssize_t remainder; struct kinfo_file kif; + int flags; }; static int @@ -3385,9 +3387,12 @@ export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt, kif->kf_type = type; kif->kf_ref_count = refcnt; kif->kf_offset = offset; - /* Pack record size down */ - kif->kf_structsize = offsetof(struct kinfo_file, kf_path) + - strlen(kif->kf_path) + 1; + if ((efbuf->flags & KERN_FILEDESC_PACK_KINFO) != 0) + /* Pack record size down */ + kif->kf_structsize = offsetof(struct kinfo_file, kf_path) + + strlen(kif->kf_path) + 1; + else + kif->kf_structsize = sizeof(*kif); kif->kf_structsize = roundup(kif->kf_structsize, sizeof(uint64_t)); if (efbuf->remainder != -1) { if (efbuf->remainder < kif->kf_structsize) { @@ -3413,7 +3418,8 @@ export_fd_to_sb(void *data, int type, int fd, int fflags, int refcnt, * Takes a locked proc as argument, and returns with the proc unlocked. */ int -kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) +kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, + int flags) { struct file *fp; struct filedesc *fdp; @@ -3448,6 +3454,7 @@ kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) efbuf->fdp = NULL; efbuf->sb = sb; efbuf->remainder = maxlen; + efbuf->flags = flags; if (tracevp != NULL) export_fd_to_sb(tracevp, KF_TYPE_VNODE, KF_FD_TYPE_TRACE, FREAD | FWRITE, -1, -1, NULL, efbuf); @@ -3597,7 +3604,8 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER_ARGS) return (error); } maxlen = req->oldptr != NULL ? req->oldlen : -1; - error = kern_proc_filedesc_out(p, &sb, maxlen); + error = kern_proc_filedesc_out(p, &sb, maxlen, + KERN_FILEDESC_PACK_KINFO); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); @@ -3635,6 +3643,24 @@ vntype_to_kinfo(int vtype) return (KF_VTYPE_UNKNOWN); } +static inline void +vn_fill_junk(struct kinfo_file *kif) +{ + size_t len, olen; + + /* + * Simulate vn_fullpath returning changing values for a given + * vp during e.g. coredump. + */ + len = (arc4random() % (sizeof(kif->kf_path) - 2)) + 1; + olen = strlen(kif->kf_path); + if (len < olen) + strcpy(&kif->kf_path[len - 1], "$"); + else + for (; olen < len; olen++) + strcpy(&kif->kf_path[olen], "A"); +} + static int fill_vnode_info(struct vnode *vp, struct kinfo_file *kif) { @@ -3654,6 +3680,10 @@ fill_vnode_info(struct vnode *vp, struct kinfo_file *kif) if (freepath != NULL) free(freepath, M_TEMP); + KFAIL_POINT_CODE(DEBUG_FP, fill_kinfo_vnode__random_path, + vn_fill_junk(kif); + ); + /* * Retrieve vnode attributes. */ diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index f82ffec50b24..47ea9b07c911 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -102,6 +102,16 @@ SDT_PROBE_DEFINE1(proc, kernel, , exec__success, "char *"); MALLOC_DEFINE(M_PARGS, "proc-args", "Process arguments"); +int coredump_pack_fileinfo = 1; +SYSCTL_INT(_kern, OID_AUTO, coredump_pack_fileinfo, CTLFLAG_RWTUN, + &coredump_pack_fileinfo, 0, + "Enable file path packing in 'procstat -f' coredump notes"); + +int coredump_pack_vmmapinfo = 1; +SYSCTL_INT(_kern, OID_AUTO, coredump_pack_vmmapinfo, CTLFLAG_RWTUN, + &coredump_pack_vmmapinfo, 0, + "Enable file path packing in 'procstat -v' coredump notes"); + static int sysctl_kern_ps_strings(SYSCTL_HANDLER_ARGS); static int sysctl_kern_usrstack(SYSCTL_HANDLER_ARGS); static int sysctl_kern_stackprot(SYSCTL_HANDLER_ARGS); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index 6b6084098e2a..cfaa354f7cd1 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -2228,7 +2228,7 @@ next:; * Must be called with the process locked and will return unlocked. */ int -kern_proc_vmmap_out(struct proc *p, struct sbuf *sb) +kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, int flags) { vm_map_entry_t entry, tmp_entry; struct vattr va; @@ -2252,7 +2252,7 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb) PRELE(p); return (ESRCH); } - kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK); + kve = malloc(sizeof(*kve), M_TEMP, M_WAITOK | M_ZERO); error = 0; map = &vm->vm_map; @@ -2387,10 +2387,23 @@ kern_proc_vmmap_out(struct proc *p, struct sbuf *sb) free(freepath, M_TEMP); /* Pack record size down */ - kve->kve_structsize = offsetof(struct kinfo_vmentry, kve_path) + - strlen(kve->kve_path) + 1; + if ((flags & KERN_VMMAP_PACK_KINFO) != 0) + kve->kve_structsize = + offsetof(struct kinfo_vmentry, kve_path) + + strlen(kve->kve_path) + 1; + else + kve->kve_structsize = sizeof(*kve); kve->kve_structsize = roundup(kve->kve_structsize, sizeof(uint64_t)); + + /* Halt filling and truncate rather than exceeding maxlen */ + if (maxlen != -1 && maxlen < kve->kve_structsize) { + error = 0; + vm_map_lock_read(map); + break; + } else if (maxlen != -1) + maxlen -= kve->kve_structsize; + if (sbuf_bcat(sb, kve, kve->kve_structsize) != 0) error = ENOMEM; vm_map_lock_read(map); @@ -2422,7 +2435,7 @@ sysctl_kern_proc_vmmap(SYSCTL_HANDLER_ARGS) sbuf_delete(&sb); return (error); } - error = kern_proc_vmmap_out(p, &sb); + error = kern_proc_vmmap_out(p, &sb, -1, KERN_VMMAP_PACK_KINFO); error2 = sbuf_finish(&sb); sbuf_delete(&sb); return (error != 0 ? error : error2); diff --git a/sys/sys/exec.h b/sys/sys/exec.h index cccebfd1fb59..0ccba246a857 100644 --- a/sys/sys/exec.h +++ b/sys/sys/exec.h @@ -83,6 +83,9 @@ void exec_unmap_first_page(struct image_params *); int exec_register(const struct execsw *); int exec_unregister(const struct execsw *); +extern int coredump_pack_fileinfo; +extern int coredump_pack_vmmapinfo; + /* * note: name##_mod cannot be const storage because the * linker_file_sysinit() function modifies _file in the diff --git a/sys/sys/user.h b/sys/sys/user.h index 9374daf319d6..a697b66b57a4 100644 --- a/sys/sys/user.h +++ b/sys/sys/user.h @@ -536,6 +536,11 @@ struct kinfo_sigtramp { #define KERN_PROC_NOTHREADS 0x1 #define KERN_PROC_MASK32 0x2 +/* Flags for kern_proc_filedesc_out. */ +#define KERN_FILEDESC_PACK_KINFO 0x00000001U + +/* Flags for kern_proc_vmmap_out. */ +#define KERN_VMMAP_PACK_KINFO 0x00000001U struct sbuf; /* @@ -547,9 +552,11 @@ struct sbuf; * to be locked on enter. On return the process is unlocked. */ -int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen); +int kern_proc_filedesc_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, + int flags); int kern_proc_out(struct proc *p, struct sbuf *sb, int flags); -int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb); +int kern_proc_vmmap_out(struct proc *p, struct sbuf *sb, ssize_t maxlen, + int flags); int vntype_to_kinfo(int vtype); #endif /* !_KERNEL */