From 3b5b20292b466eed14001b1a089f0f304cae6c2c Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Wed, 6 Mar 2019 00:01:06 +0000 Subject: [PATCH] Implement minidump support for RISC-V. Submitted by: Mitchell Horne Differential Revision: https://reviews.freebsd.org/D18320 --- sys/riscv/riscv/minidump_machdep.c | 376 ++++++++++++++++++++++++++++- sys/riscv/riscv/pmap.c | 4 - sys/riscv/riscv/uma_machdep.c | 6 - sys/vm/vm_page.c | 8 +- 4 files changed, 380 insertions(+), 14 deletions(-) diff --git a/sys/riscv/riscv/minidump_machdep.c b/sys/riscv/riscv/minidump_machdep.c index 7c92f7551145..42f166b53d60 100644 --- a/sys/riscv/riscv/minidump_machdep.c +++ b/sys/riscv/riscv/minidump_machdep.c @@ -1,5 +1,7 @@ /*- * Copyright (c) 2006 Peter Wemm + * Copyright (c) 2015 The FreeBSD Foundation + * Copyright (c) 2019 Mitchell Horne * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -53,9 +55,381 @@ __FBSDID("$FreeBSD$"); CTASSERT(sizeof(struct kerneldumpheader) == 512); CTASSERT(sizeof(*vm_page_dump) == 8); +uint64_t *vm_page_dump; +int vm_page_dump_size; + +static struct kerneldumpheader kdh; + +/* Handle chunked writes. */ +static size_t fragsz; +static void *dump_va; +static size_t counter, progress, dumpsize; + +static uint64_t tmpbuffer[PAGE_SIZE / sizeof(uint64_t)]; + +static struct { + int min_per; + int max_per; + int visited; +} progress_track[10] = { + { 0, 10, 0}, + { 10, 20, 0}, + { 20, 30, 0}, + { 30, 40, 0}, + { 40, 50, 0}, + { 50, 60, 0}, + { 60, 70, 0}, + { 70, 80, 0}, + { 80, 90, 0}, + { 90, 100, 0} +}; + +static void +report_progress(size_t progress, size_t dumpsize) +{ + int sofar, i; + + sofar = 100 - ((progress * 100) / dumpsize); + for (i = 0; i < nitems(progress_track); i++) { + if (sofar < progress_track[i].min_per || + sofar > progress_track[i].max_per) + continue; + if (progress_track[i].visited) + return; + progress_track[i].visited = 1; + printf("..%d%%", sofar); + return; + } +} + +static bool +is_dumpable(vm_paddr_t pa) +{ + vm_page_t m; + int i; + + if ((m = vm_phys_paddr_to_vm_page(pa)) != NULL) + return ((m->flags & PG_NODUMP) == 0); + + for (i = 0; dump_avail[i] != 0 || dump_avail[i + 1] != 0; i += 2) { + if (pa >= dump_avail[i] && pa < dump_avail[i + 1]) + return (true); + } + return (false); +} + +static int +blk_flush(struct dumperinfo *di) +{ + int error; + + if (fragsz == 0) + return (0); + + error = dump_append(di, dump_va, 0, fragsz); + fragsz = 0; + return (error); +} + +/* + * Write a block of data to the dump file. + * + * Caller can provide data through a pointer or by specifying its + * physical address. + * + * XXX writes using pa should be no larger than PAGE_SIZE. + */ +static int +blk_write(struct dumperinfo *di, char *ptr, vm_paddr_t pa, size_t sz) +{ + size_t len; + int error, c; + u_int maxdumpsz; + + maxdumpsz = min(di->maxiosize, MAXDUMPPGS * PAGE_SIZE); + if (maxdumpsz == 0) /* seatbelt */ + maxdumpsz = PAGE_SIZE; + error = 0; + if ((sz % PAGE_SIZE) != 0) { + printf("size not page aligned\n"); + return (EINVAL); + } + if (ptr != NULL && pa != 0) { + printf("cant have both va and pa!\n"); + return (EINVAL); + } + if ((((uintptr_t)pa) % PAGE_SIZE) != 0) { + printf("address not page aligned %#lx\n", (uintptr_t)pa); + return (EINVAL); + } + if (ptr != NULL) { + /* + * If we're doing a virtual dump, flush any + * pre-existing pa pages. + */ + error = blk_flush(di); + if (error != 0) + return (error); + } + while (sz) { + len = maxdumpsz - fragsz; + if (len > sz) + len = sz; + counter += len; + progress -= len; + if (counter >> 22) { + report_progress(progress, dumpsize); + counter &= (1 << 22) - 1; + } + + wdog_kern_pat(WD_LASTVAL); + + if (ptr) { + error = dump_append(di, ptr, 0, len); + if (error != 0) + return (error); + ptr += len; + sz -= len; + } else { + dump_va = (void *)PHYS_TO_DMAP(pa); + fragsz += len; + pa += len; + sz -= len; + error = blk_flush(di); + if (error != 0) + return (error); + } + + /* Check for user abort */ + c = cncheckc(); + if (c == 0x03) + return (ECANCELED); + if (c != -1) + printf(" (CTRL-C to abort) "); + } + + return (0); +} + int minidumpsys(struct dumperinfo *di) { + pd_entry_t *l1, *l2; + pt_entry_t *l3; + struct minidumphdr mdhdr; + uint32_t pmapsize; + vm_offset_t va; + vm_paddr_t pa; + int error; + uint64_t bits; + int i, bit; + int retry_count; - panic("minidumpsys"); + retry_count = 0; +retry: + retry_count++; + error = 0; + pmapsize = 0; + + /* Build set of dumpable pages from kernel pmap */ + for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { + pmapsize += PAGE_SIZE; + if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) + continue; + + /* We should always be using the l2 table for kvm */ + if (l2 == NULL) + continue; + + /* l2 may be a superpage */ + if ((*l2 & PTE_RWX) != 0) { + pa = (*l2 >> PTE_PPN1_S) << L2_SHIFT; + for (i = 0; i < Ln_ENTRIES; i++, pa += PAGE_SIZE) { + if (is_dumpable(pa)) + dump_add_page(pa); + } + } else { + for (i = 0; i < Ln_ENTRIES; i++) { + if ((l3[i] & PTE_V) == 0) + continue; + pa = (l3[i] >> PTE_PPN0_S) * PAGE_SIZE; + if (is_dumpable(pa)) + dump_add_page(pa); + } + } + } + + /* Calculate dump size */ + dumpsize = pmapsize; + dumpsize += round_page(msgbufp->msg_size); + dumpsize += round_page(vm_page_dump_size); + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = ffsl(bits) - 1; + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + + bit) * PAGE_SIZE; + /* Clear out undumpable pages now if needed */ + if (is_dumpable(pa)) + dumpsize += PAGE_SIZE; + else + dump_drop_page(pa); + bits &= ~(1ul << bit); + } + } + dumpsize += PAGE_SIZE; + + progress = dumpsize; + + /* Initialize mdhdr */ + bzero(&mdhdr, sizeof(mdhdr)); + strcpy(mdhdr.magic, MINIDUMP_MAGIC); + mdhdr.version = MINIDUMP_VERSION; + mdhdr.msgbufsize = msgbufp->msg_size; + mdhdr.bitmapsize = vm_page_dump_size; + mdhdr.pmapsize = pmapsize; + mdhdr.kernbase = KERNBASE; + mdhdr.dmapphys = DMAP_MIN_PHYSADDR; + mdhdr.dmapbase = DMAP_MIN_ADDRESS; + mdhdr.dmapend = DMAP_MAX_ADDRESS; + + dump_init_header(di, &kdh, KERNELDUMPMAGIC, KERNELDUMP_RISCV_VERSION, + dumpsize); + + error = dump_start(di, &kdh); + if (error != 0) + goto fail; + + printf("Dumping %llu out of %ju MB:", (long long)dumpsize >> 20, + ptoa((uintmax_t)physmem) / 1048576); + + /* Dump minidump header */ + bzero(&tmpbuffer, sizeof(tmpbuffer)); + bcopy(&mdhdr, &tmpbuffer, sizeof(mdhdr)); + error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); + if (error) + goto fail; + + /* Dump msgbuf up front */ + error = blk_write(di, (char *)msgbufp->msg_ptr, 0, + round_page(msgbufp->msg_size)); + if (error) + goto fail; + + /* Dump bitmap */ + error = blk_write(di, (char *)vm_page_dump, 0, + round_page(vm_page_dump_size)); + if (error) + goto fail; + + /* Dump kernel page directory pages */ + bzero(&tmpbuffer, sizeof(tmpbuffer)); + for (va = VM_MIN_KERNEL_ADDRESS; va < kernel_vm_end; va += L2_SIZE) { + if (!pmap_get_tables(pmap_kernel(), va, &l1, &l2, &l3)) { + /* We always write a page, even if it is zero */ + error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); + if (error) + goto fail; + /* Flush, in case we reuse tmpbuffer in the same block */ + error = blk_flush(di); + if (error) + goto fail; + } else if ((*l2 & PTE_RWX) != 0) { + /* Generate fake l3 entries based on the l2 superpage */ + for (i = 0; i < Ln_ENTRIES; i++) { + tmpbuffer[i] = (*l2 | (i << PTE_PPN0_S)); + } + /* We always write a page, even if it is zero */ + error = blk_write(di, (char *)&tmpbuffer, 0, PAGE_SIZE); + if (error) + goto fail; + /* Flush, in case we reuse tmpbuffer in the same block */ + error = blk_flush(di); + if (error) + goto fail; + bzero(&tmpbuffer, sizeof(tmpbuffer)); + } else { + pa = (*l2 >> PTE_PPN0_S) * PAGE_SIZE; + + /* We always write a page, even if it is zero */ + error = blk_write(di, NULL, pa, PAGE_SIZE); + if (error) + goto fail; + } + } + + /* Dump memory chunks */ + /* XXX cluster it up and use blk_dump() */ + for (i = 0; i < vm_page_dump_size / sizeof(*vm_page_dump); i++) { + bits = vm_page_dump[i]; + while (bits) { + bit = ffsl(bits) - 1; + pa = (((uint64_t)i * sizeof(*vm_page_dump) * NBBY) + + bit) * PAGE_SIZE; + error = blk_write(di, 0, pa, PAGE_SIZE); + if (error) + goto fail; + bits &= ~(1ul << bit); + } + } + + error = blk_flush(di); + if (error) + goto fail; + + error = dump_finish(di, &kdh); + if (error != 0) + goto fail; + + printf("\nDump complete\n"); + return (0); + +fail: + if (error < 0) + error = -error; + + printf("\n"); + if (error == ENOSPC) { + printf("Dump map grown while dumping. "); + if (retry_count < 5) { + printf("Retrying...\n"); + goto retry; + } + printf("Dump failed.\n"); + } + else if (error == ECANCELED) + printf("Dump aborted\n"); + else if (error == E2BIG) + printf("Dump failed. Partition too small.\n"); + else + printf("** DUMP FAILED (ERROR %d) **\n", error); + return (error); +} + +/* + * Add a page to the minidump bitmap. + */ +void +dump_add_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 6; /* 2^6 = 64 */ + bit = pa & 63; + atomic_set_long(&vm_page_dump[idx], 1ul << bit); +} + +/* + * Remove page from the minidump bitmap. + */ +void +dump_drop_page(vm_paddr_t pa) +{ + int idx, bit; + + pa >>= PAGE_SHIFT; + idx = pa >> 6; /* 2^6 = 64 */ + bit = pa & 63; + atomic_clear_long(&vm_page_dump[idx], 1ul << bit); } diff --git a/sys/riscv/riscv/pmap.c b/sys/riscv/riscv/pmap.c index 38315490784b..eeeb48bb4972 100644 --- a/sys/riscv/riscv/pmap.c +++ b/sys/riscv/riscv/pmap.c @@ -1645,9 +1645,7 @@ free_pv_chunk(struct pv_chunk *pc) PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); -#if 0 /* TODO: For minidump */ dump_drop_page(m->phys_addr); -#endif vm_page_unwire(m, PQ_NONE); vm_page_free(m); } @@ -1709,9 +1707,7 @@ retry: } PV_STAT(atomic_add_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); -#if 0 /* TODO: This is for minidump */ dump_add_page(m->phys_addr); -#endif pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ diff --git a/sys/riscv/riscv/uma_machdep.c b/sys/riscv/riscv/uma_machdep.c index d99a5a94fecd..4ab256ed2179 100644 --- a/sys/riscv/riscv/uma_machdep.c +++ b/sys/riscv/riscv/uma_machdep.c @@ -55,11 +55,8 @@ uma_small_alloc(uma_zone_t zone, vm_size_t bytes, int domain, u_int8_t *flags, if (m == NULL) return (NULL); pa = m->phys_addr; -#if 0 - /* RISCVTODO: minidump */ if ((wait & M_NODUMP) == 0) dump_add_page(pa); -#endif va = (void *)PHYS_TO_DMAP(pa); if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) bzero(va, PAGE_SIZE); @@ -73,10 +70,7 @@ uma_small_free(void *mem, vm_size_t size, u_int8_t flags) vm_paddr_t pa; pa = DMAP_TO_PHYS((vm_offset_t)mem); -#if 0 - /* RISCVTODO: minidump */ dump_drop_page(pa); -#endif m = PHYS_TO_VM_PAGE(pa); vm_page_unwire_noq(m); vm_page_free(m); diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 7c3aeaef7456..a90a6f805b74 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -633,7 +633,7 @@ vm_page_startup(vm_offset_t vaddr) #endif #if defined(__aarch64__) || defined(__amd64__) || defined(__arm__) || \ - defined(__i386__) || defined(__mips__) + defined(__i386__) || defined(__mips__) || defined(__riscv) /* * Allocate a bitmap to indicate that a random physical page * needs to be included in a minidump. @@ -658,7 +658,8 @@ vm_page_startup(vm_offset_t vaddr) #else (void)last_pa; #endif -#if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) +#if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ + defined(__riscv) /* * Include the UMA bootstrap pages, witness pages and vm_page_dump * in a crash dump. When pmap_map() uses the direct map, they are @@ -773,7 +774,8 @@ vm_page_startup(vm_offset_t vaddr) high_avail = new_end; new_end = vm_reserv_startup(&vaddr, new_end, high_avail); #endif -#if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) +#if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \ + defined(__riscv) /* * Include vm_page_array and vm_reserv_array in a crash dump. */