diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig index 83d53626c3..399a09c97e 100644 --- a/lib/compiler/test_runner.zig +++ b/lib/compiler/test_runner.zig @@ -150,6 +150,7 @@ fn mainServer() !void { try server.serveU64Message(.fuzz_start_addr, entry_addr); defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1); is_fuzz_test = false; + fuzzer_set_name(test_fn.name.ptr, test_fn.name.len); test_fn.func() catch |err| switch (err) { error.SkipZigTest => return, else => { @@ -341,8 +342,10 @@ const FuzzerSlice = extern struct { var is_fuzz_test: bool = undefined; -extern fn fuzzer_start(testOne: *const fn ([*]const u8, usize) callconv(.C) void) void; +extern fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void; extern fn fuzzer_init(cache_dir: FuzzerSlice) void; +extern fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void; +extern fn fuzzer_start(testOne: *const fn ([*]const u8, usize) callconv(.C) void) void; extern fn fuzzer_coverage_id() u64; pub fn fuzz( @@ -395,8 +398,11 @@ pub fn fuzz( if (builtin.fuzz) { const prev_allocator_state = testing.allocator_instance; testing.allocator_instance = .{}; + defer testing.allocator_instance = prev_allocator_state; + + for (options.corpus) |elem| fuzzer_init_corpus_elem(elem.ptr, elem.len); + fuzzer_start(&global.fuzzer_one); - testing.allocator_instance = prev_allocator_state; return; } diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index 3f8a991148..440e1fdd29 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -92,14 +92,9 @@ fn handleCmp(pc: usize, arg1: u64, arg2: u64) void { const Fuzzer = struct { gpa: Allocator, rng: std.Random.DefaultPrng, - input: std.ArrayListUnmanaged(u8), pcs: []const usize, pc_counters: []u8, n_runs: usize, - recent_cases: RunMap, - /// Data collected from code coverage instrumentation from one execution of - /// the test function. - coverage: Coverage, /// Tracks which PCs have been seen across all runs that do not crash the fuzzer process. /// Stored in a memory-mapped file so that it can be shared with other /// processes and viewed while the fuzzer is running. @@ -108,43 +103,18 @@ const Fuzzer = struct { /// Identifies the file name that will be used to store coverage /// information, available to other processes. coverage_id: u64, + unit_test_name: []const u8, - const RunMap = std.ArrayHashMapUnmanaged(Run, void, Run.HashContext, false); + /// The index corresponds to the file name within the f/ subdirectory. + /// The string is the input. + /// This data is read-only; it caches what is on the filesystem. + corpus: std.StringArrayHashMapUnmanaged(void), + corpus_directory: std.Build.Cache.Directory, - const Coverage = struct { - pc_table: std.AutoArrayHashMapUnmanaged(usize, void), - run_id_hasher: std.hash.Wyhash, - - fn reset(cov: *Coverage) void { - cov.pc_table.clearRetainingCapacity(); - cov.run_id_hasher = std.hash.Wyhash.init(0); - } - }; - - const Run = struct { - id: Id, - input: []const u8, - score: usize, - - const Id = u64; - - const HashContext = struct { - pub fn eql(ctx: HashContext, a: Run, b: Run, b_index: usize) bool { - _ = b_index; - _ = ctx; - return a.id == b.id; - } - pub fn hash(ctx: HashContext, a: Run) u32 { - _ = ctx; - return @truncate(a.id); - } - }; - - fn deinit(run: *Run, gpa: Allocator) void { - gpa.free(run.input); - run.* = undefined; - } - }; + /// The next input that will be given to the testOne function. When the + /// current process crashes, this memory-mapped file is used to recover the + /// input. + input: MemoryMappedList, const Slice = extern struct { ptr: [*]const u8, @@ -162,11 +132,6 @@ const Fuzzer = struct { } }; - const Analysis = struct { - score: usize, - id: Run.Id, - }; - fn init(f: *Fuzzer, cache_dir: std.fs.Dir, pc_counters: []u8, pcs: []const usize) !void { f.cache_dir = cache_dir; f.pc_counters = pc_counters; @@ -228,156 +193,178 @@ const Fuzzer = struct { } } - fn analyzeLastRun(f: *Fuzzer) Analysis { - return .{ - .id = f.coverage.run_id_hasher.final(), - .score = f.coverage.pc_table.count(), - }; + fn initNextInput(f: *Fuzzer) void { + const gpa = f.gpa; + + while (true) { + const i = f.corpus.entries.len; + var buf: [30]u8 = undefined; + const input_sub_path = std.fmt.bufPrint(&buf, "{d}", .{i}) catch unreachable; + const input = f.corpus_directory.handle.readFileAlloc(gpa, input_sub_path, 1 << 31) catch |err| switch (err) { + error.FileNotFound => { + // Make this one the next input. + var input_file = f.corpus_directory.handle.createFile(input_sub_path, .{ + .exclusive = true, + .truncate = false, + .read = true, + }) catch |e| switch (e) { + error.PathAlreadyExists => continue, + else => fatal("unable to create '{}{d}: {s}", .{ f.corpus_directory, i, @errorName(err) }), + }; + defer input_file.close(); + const capacity = 4096; + input_file.setEndPos(capacity) catch |e| { + fatal("unable to set len of input file: {s}", .{@errorName(e)}); + }; + // Initialize the mmap for the current input. + f.input = MemoryMappedList.init(input_file, 0, capacity) catch |e| { + fatal("unable to init memory map for input at '{}{d}': {s}", .{ + f.corpus_directory, i, @errorName(e), + }); + }; + break; + }, + else => fatal("unable to read '{}{d}': {s}", .{ f.corpus_directory, i, @errorName(err) }), + }; + errdefer gpa.free(input); + f.corpus.putNoClobber(gpa, input, {}) catch |err| oom(err); + } + } + + fn addCorpusElem(f: *Fuzzer, input: []const u8) !void { + const gpa = f.gpa; + try f.corpus.put(gpa, input, {}); } fn start(f: *Fuzzer) !void { const gpa = f.gpa; const rng = fuzzer.rng.random(); - // Prepare initial input. - assert(f.recent_cases.entries.len == 0); - assert(f.n_runs == 0); - try f.recent_cases.ensureUnusedCapacity(gpa, 100); - const len = rng.uintLessThanBiased(usize, 80); - try f.input.resize(gpa, len); - rng.bytes(f.input.items); - f.recent_cases.putAssumeCapacity(.{ - .id = 0, - .input = try gpa.dupe(u8, f.input.items), - .score = 0, - }, {}); + // Grab the corpus which is namespaced based on `unit_test_name`. + { + if (f.unit_test_name.len == 0) fatal("test runner never set unit test name", .{}); + const sub_path = try std.fmt.allocPrint(gpa, "f/{s}", .{f.unit_test_name}); + f.corpus_directory = .{ + .handle = f.cache_dir.makeOpenPath(sub_path, .{}) catch |err| + fatal("unable to open corpus directory 'f/{s}': {s}", .{ sub_path, @errorName(err) }), + .path = sub_path, + }; + initNextInput(f); + } - const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); + assert(f.n_runs == 0); + + // If the corpus is empty, synthesize one input. + if (f.corpus.entries.len == 0) { + var buffer: [200]u8 = undefined; + const len = rng.uintLessThanBiased(usize, buffer.len); + const slice = buffer[0..len]; + rng.bytes(slice); + f.input.appendSliceAssumeCapacity(slice); + runOne(f); + } while (true) { - const chosen_index = rng.uintLessThanBiased(usize, f.recent_cases.entries.len); - const run = &f.recent_cases.keys()[chosen_index]; - f.input.clearRetainingCapacity(); - f.input.appendSliceAssumeCapacity(run.input); - try f.mutate(); - - @memset(f.pc_counters, 0); - __sancov_lowest_stack = std.math.maxInt(usize); - f.coverage.reset(); - - fuzzer_one(f.input.items.ptr, f.input.items.len); - - f.n_runs += 1; - _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic); - - if (f.n_runs % 10000 == 0) f.dumpStats(); - - const analysis = f.analyzeLastRun(); - const gop = f.recent_cases.getOrPutAssumeCapacity(.{ - .id = analysis.id, - .input = undefined, - .score = undefined, - }); - if (gop.found_existing) { - //std.log.info("duplicate analysis: score={d} id={d}", .{ analysis.score, analysis.id }); - if (f.input.items.len < gop.key_ptr.input.len or gop.key_ptr.score == 0) { - gpa.free(gop.key_ptr.input); - gop.key_ptr.input = try gpa.dupe(u8, f.input.items); - gop.key_ptr.score = analysis.score; - } - } else { - std.log.info("unique analysis: score={d} id={d}", .{ analysis.score, analysis.id }); - gop.key_ptr.* = .{ - .id = analysis.id, - .input = try gpa.dupe(u8, f.input.items), - .score = analysis.score, - }; - - { - // Track code coverage from all runs. - comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize); - const header_end_ptr: [*]volatile usize = @ptrCast(f.seen_pcs.items[@sizeOf(SeenPcsHeader)..]); - const remainder = f.pcs.len % @bitSizeOf(usize); - const aligned_len = f.pcs.len - remainder; - const seen_pcs = header_end_ptr[0..aligned_len]; - const pc_counters = std.mem.bytesAsSlice([@bitSizeOf(usize)]u8, f.pc_counters[0..aligned_len]); - const V = @Vector(@bitSizeOf(usize), u8); - const zero_v: V = @splat(0); - - for (header_end_ptr[0..pc_counters.len], pc_counters) |*elem, *array| { - const v: V = array.*; - const mask: usize = @bitCast(v != zero_v); - _ = @atomicRmw(usize, elem, .Or, mask, .monotonic); - } - if (remainder > 0) { - const i = pc_counters.len; - const elem = &seen_pcs[i]; - var mask: usize = 0; - for (f.pc_counters[i * @bitSizeOf(usize) ..][0..remainder], 0..) |byte, bit_index| { - mask |= @as(usize, @intFromBool(byte != 0)) << @intCast(bit_index); - } - _ = @atomicRmw(usize, elem, .Or, mask, .monotonic); - } - } - - _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic); - } - - if (f.recent_cases.entries.len >= 100) { - const Context = struct { - values: []const Run, - pub fn lessThan(ctx: @This(), a_index: usize, b_index: usize) bool { - return ctx.values[b_index].score < ctx.values[a_index].score; - } - }; - f.recent_cases.sortUnstable(Context{ .values = f.recent_cases.keys() }); - const cap = 50; - // This has to be done before deinitializing the deleted items. - const doomed_runs = f.recent_cases.keys()[cap..]; - f.recent_cases.shrinkRetainingCapacity(cap); - for (doomed_runs) |*doomed_run| { - std.log.info("culling score={d} id={d}", .{ doomed_run.score, doomed_run.id }); - doomed_run.deinit(gpa); - } - } + const chosen_index = rng.uintLessThanBiased(usize, f.corpus.entries.len); + f.mutateAndRunOne(chosen_index, .remove_byte); + f.mutateAndRunOne(chosen_index, .modify_byte); + f.mutateAndRunOne(chosen_index, .add_byte); } } fn visitPc(f: *Fuzzer, pc: usize) void { - errdefer |err| oom(err); - try f.coverage.pc_table.put(f.gpa, pc, {}); - f.coverage.run_id_hasher.update(std.mem.asBytes(&pc)); + _ = f; + _ = pc; } - fn dumpStats(f: *Fuzzer) void { - for (f.recent_cases.keys()[0..@min(f.recent_cases.entries.len, 5)], 0..) |run, i| { - std.log.info("best[{d}] id={x} score={d} input: '{}'", .{ - i, run.id, run.score, std.zig.fmtEscapes(run.input), - }); - } - } + const Mutation = enum { + remove_byte, + modify_byte, + add_byte, + }; - fn mutate(f: *Fuzzer) !void { - const gpa = f.gpa; + fn mutateAndRunOne(f: *Fuzzer, corpus_index: usize, mutation: Mutation) void { const rng = fuzzer.rng.random(); + f.input.clearRetainingCapacity(); + const old_input = f.corpus.keys()[corpus_index]; + switch (mutation) { + .remove_byte => { + const omitted_index = rng.uintLessThanBiased(usize, old_input.len); + f.input.appendSliceAssumeCapacity(old_input[0..omitted_index]); + f.input.appendSliceAssumeCapacity(old_input[omitted_index + 1 ..]); + }, + .modify_byte => { + const modified_index = rng.uintLessThanBiased(usize, old_input.len); + f.input.appendSliceAssumeCapacity(old_input); + f.input.items[modified_index] = rng.int(u8); + }, + .add_byte => { + const modified_index = rng.uintLessThanBiased(usize, old_input.len); + f.input.appendSliceAssumeCapacity(old_input[0..modified_index]); + f.input.appendAssumeCapacity(rng.int(u8)); + f.input.appendSliceAssumeCapacity(old_input[modified_index..]); + }, + } + runOne(f); + } - if (f.input.items.len == 0) { - const len = rng.uintLessThanBiased(usize, 80); - try f.input.resize(gpa, len); - rng.bytes(f.input.items); - return; + fn runOne(f: *Fuzzer) void { + const header: *volatile SeenPcsHeader = @ptrCast(f.seen_pcs.items[0..@sizeOf(SeenPcsHeader)]); + + @memset(f.pc_counters, 0); + __sancov_lowest_stack = std.math.maxInt(usize); + + fuzzer_one(@volatileCast(f.input.items.ptr), f.input.items.len); + + f.n_runs += 1; + _ = @atomicRmw(usize, &header.n_runs, .Add, 1, .monotonic); + + // Track code coverage from all runs. + comptime assert(SeenPcsHeader.trailing[0] == .pc_bits_usize); + const header_end_ptr: [*]volatile usize = @ptrCast(f.seen_pcs.items[@sizeOf(SeenPcsHeader)..]); + const remainder = f.pcs.len % @bitSizeOf(usize); + const aligned_len = f.pcs.len - remainder; + const seen_pcs = header_end_ptr[0..aligned_len]; + const pc_counters = std.mem.bytesAsSlice([@bitSizeOf(usize)]u8, f.pc_counters[0..aligned_len]); + const V = @Vector(@bitSizeOf(usize), u8); + const zero_v: V = @splat(0); + var fresh = false; + + for (header_end_ptr[0..pc_counters.len], pc_counters) |*elem, *array| { + const v: V = array.*; + const mask: usize = @bitCast(v != zero_v); + const prev = @atomicRmw(usize, elem, .Or, mask, .monotonic); + fresh = fresh or (prev | mask) != prev; + } + if (remainder > 0) { + const i = pc_counters.len; + const elem = &seen_pcs[i]; + var mask: usize = 0; + for (f.pc_counters[i * @bitSizeOf(usize) ..][0..remainder], 0..) |byte, bit_index| { + mask |= @as(usize, @intFromBool(byte != 0)) << @intCast(bit_index); + } + const prev = @atomicRmw(usize, elem, .Or, mask, .monotonic); + fresh = fresh or (prev | mask) != prev; } - const index = rng.uintLessThanBiased(usize, f.input.items.len * 3); - if (index < f.input.items.len) { - f.input.items[index] = rng.int(u8); - } else if (index < f.input.items.len * 2) { - _ = f.input.orderedRemove(index - f.input.items.len); - } else if (index < f.input.items.len * 3) { - try f.input.insert(gpa, index - f.input.items.len * 2, rng.int(u8)); - } else { - unreachable; - } + // TODO: first check if this is a better version of an already existing + // input, replacing that input. + + if (!fresh) return; + + const gpa = f.gpa; + + // Input is already committed to the file system, we just need to open a new file + // for the next input. + // Pre-add it to the corpus table so that it does not get redundantly picked up. + const input = gpa.dupe(u8, @volatileCast(f.input.items)) catch |err| oom(err); + f.corpus.putNoClobber(gpa, input, {}) catch |err| oom(err); + f.input.deinit(); + initNextInput(f); + + // TODO: also mark input as "hot" so it gets prioritized for checking mutations above others. + + _ = @atomicRmw(usize, &header.unique_runs, .Add, 1, .monotonic); } }; @@ -407,15 +394,16 @@ var general_purpose_allocator: std.heap.GeneralPurposeAllocator(.{}) = .{}; var fuzzer: Fuzzer = .{ .gpa = general_purpose_allocator.allocator(), .rng = std.Random.DefaultPrng.init(0), - .input = .{}, + .input = undefined, .pcs = undefined, .pc_counters = undefined, .n_runs = 0, - .recent_cases = .{}, - .coverage = undefined, .cache_dir = undefined, .seen_pcs = undefined, .coverage_id = undefined, + .unit_test_name = &.{}, + .corpus = .{}, + .corpus_directory = undefined, }; /// Invalid until `fuzzer_init` is called. @@ -427,9 +415,11 @@ var fuzzer_one: *const fn (input_ptr: [*]const u8, input_len: usize) callconv(.C export fn fuzzer_start(testOne: @TypeOf(fuzzer_one)) void { fuzzer_one = testOne; - fuzzer.start() catch |err| switch (err) { - error.OutOfMemory => fatal("out of memory", .{}), - }; + fuzzer.start() catch |err| oom(err); +} + +export fn fuzzer_set_name(name_ptr: [*]const u8, name_len: usize) void { + fuzzer.unit_test_name = name_ptr[0..name_len]; } export fn fuzzer_init(cache_dir_struct: Fuzzer.Slice) void { @@ -472,6 +462,11 @@ export fn fuzzer_init(cache_dir_struct: Fuzzer.Slice) void { fatal("unable to init fuzzer: {s}", .{@errorName(err)}); } +export fn fuzzer_init_corpus_elem(input_ptr: [*]const u8, input_len: usize) void { + fuzzer.addCorpusElem(input_ptr[0..input_len]) catch |err| + fatal("failed to add corpus element: {s}", .{@errorName(err)}); +} + /// Like `std.ArrayListUnmanaged(u8)` but backed by memory mapping. pub const MemoryMappedList = struct { /// Contents of the list. @@ -499,6 +494,16 @@ pub const MemoryMappedList = struct { }; } + pub fn deinit(l: *MemoryMappedList) void { + std.posix.munmap(@volatileCast(l.items.ptr[0..l.capacity])); + l.* = undefined; + } + + /// Invalidates all element pointers. + pub fn clearRetainingCapacity(l: *MemoryMappedList) void { + l.items.len = 0; + } + /// Append the slice of items to the list. /// Asserts that the list can hold the additional items. pub fn appendSliceAssumeCapacity(l: *MemoryMappedList, items: []const u8) void { @@ -509,6 +514,24 @@ pub const MemoryMappedList = struct { @memcpy(l.items[old_len..][0..items.len], items); } + /// Extends the list by 1 element. + /// Never invalidates element pointers. + /// Asserts that the list can hold one additional item. + pub fn appendAssumeCapacity(l: *MemoryMappedList, item: u8) void { + const new_item_ptr = l.addOneAssumeCapacity(); + new_item_ptr.* = item; + } + + /// Increase length by 1, returning pointer to the new item. + /// The returned pointer becomes invalid when the list is resized. + /// Never invalidates element pointers. + /// Asserts that the list can hold one additional item. + pub fn addOneAssumeCapacity(l: *MemoryMappedList) *volatile u8 { + assert(l.items.len < l.capacity); + l.items.len += 1; + return &l.items[l.items.len - 1]; + } + /// Append a value to the list `n` times. /// Never invalidates element pointers. /// The function is inline so that a comptime-known `value` parameter will @@ -520,4 +543,16 @@ pub const MemoryMappedList = struct { @memset(l.items.ptr[l.items.len..new_len], value); l.items.len = new_len; } + + /// Resize the array, adding `n` new elements, which have `undefined` values. + /// The return value is a slice pointing to the newly allocated elements. + /// Never invalidates element pointers. + /// The returned pointer becomes invalid when the list is resized. + /// Asserts that the list can hold the additional items. + pub fn addManyAsSliceAssumeCapacity(l: *MemoryMappedList, n: usize) []volatile u8 { + assert(l.items.len + n <= l.capacity); + const prev_len = l.items.len; + l.items.len += n; + return l.items[prev_len..][0..n]; + } };