From 2b3e6f680c5843877f6252bd3d85a20abe367da6 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Mon, 8 Jan 2024 16:15:28 +0100 Subject: [PATCH 1/3] wasm-linker: ensure custom sections are parsed Not all custom sections are represented by a symbol, which means the section will not be parsed by the lazy parsing and therefore get garbage- collected. This is problematic as it may contain debug information that should not be garbage-collected. To resolve this, we manually create local symbols for those sections and also ensure they do not get garbage- collected. --- src/link/Wasm.zig | 12 +++--------- src/link/Wasm/Object.zig | 24 +++++++++++++++++++++++- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 9dbef32648..34b0c62414 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -3260,7 +3260,7 @@ pub fn getMatchingSegment(wasm: *Wasm, object_index: u16, symbol_index: u32) !u3 break :blk index; }; } else if (mem.eql(u8, section_name, ".debug_ranges")) { - return wasm.debug_line_index orelse blk: { + return wasm.debug_ranges_index orelse blk: { wasm.debug_ranges_index = index; try wasm.appendDummySegment(); break :blk index; @@ -5301,14 +5301,8 @@ fn markReferences(wasm: *Wasm) !void { const object = &wasm.objects.items[file]; const atom_index = try Object.parseSymbolIntoAtom(object, file, sym_loc.index, wasm); const atom = wasm.getAtom(atom_index); - for (atom.relocs.items) |reloc| { - const target_loc: SymbolLoc = .{ .index = reloc.index, .file = atom.file }; - const target_sym = target_loc.getSymbol(wasm); - if (target_sym.isAlive() or !do_garbage_collect) { - sym.mark(); - continue; // Skip all other relocations as this debug atom is already marked now - } - } + const atom_sym = atom.symbolLoc().getSymbol(wasm); + atom_sym.mark(); } } } diff --git a/src/link/Wasm/Object.zig b/src/link/Wasm/Object.zig index f0c21b8c89..aaa99292bc 100644 --- a/src/link/Wasm/Object.zig +++ b/src/link/Wasm/Object.zig @@ -80,6 +80,9 @@ const RelocatableData = struct { offset: u32, /// Represents the index of the section it belongs to section_index: u32, + /// Whether the relocatable section is represented by a symbol or not. + /// Can only be `true` for custom sections. + represented: bool = false, const Tag = enum { data, code, custom }; @@ -753,6 +756,24 @@ fn Parser(comptime ReaderType: type) type { log.debug("Found legacy indirect function table. Created symbol", .{}); } + // Not all debug sections may be represented by a symbol, for those sections + // we manually create a symbol. + if (parser.object.relocatable_data.get(.custom)) |custom_sections| { + for (custom_sections) |*data| { + if (!data.represented) { + try symbols.append(.{ + .name = data.index, + .flags = @intFromEnum(Symbol.Flag.WASM_SYM_BINDING_LOCAL), + .tag = .section, + .virtual_address = 0, + .index = data.section_index, + }); + data.represented = true; + log.debug("Created synthetic custom section symbol for '{s}'", .{parser.object.string_table.get(data.index)}); + } + } + } + parser.object.symtable = try symbols.toOwnedSlice(); }, } @@ -791,9 +812,10 @@ fn Parser(comptime ReaderType: type) type { .section => { symbol.index = try leb.readULEB128(u32, reader); const section_data = parser.object.relocatable_data.get(.custom).?; - for (section_data) |data| { + for (section_data) |*data| { if (data.section_index == symbol.index) { symbol.name = data.index; + data.represented = true; break; } } From 7fe629a8124f27e8db01e090031a5243a452e831 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 10 Jan 2024 06:35:50 +0100 Subject: [PATCH 2/3] wasm-linker: delay code atom allocation till write We delay atom allocation for the code section until we write the actual atoms. We do this to ensure the offset of the atom also includes the 'size' field which is leb128-encoded and therefore variable. We need this correct offset to ensure debug info works correctly. The ordering of the code section is now automatic due to iterating the function section and then finding the corresponding atom to each function. This also ensures each function corresponds to the right atom, and they do not go out-of-sync. Lastly, we removed the `next` field as it is no longer required and also removed manually setting the offset in synthetic functions. This means atoms use less memory and synthetic functions are less prone. They will also be placed in order of function order correctly. --- src/link/Wasm.zig | 97 ++++++++++-------------------------------- src/link/Wasm/Atom.zig | 7 --- 2 files changed, 22 insertions(+), 82 deletions(-) diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 34b0c62414..6f20e86bdc 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -2054,6 +2054,7 @@ pub fn freeDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex) void { const decl = mod.declPtr(decl_index); const atom_index = wasm.decls.get(decl_index).?; const atom = wasm.getAtomPtr(atom_index); + atom.prev = null; wasm.symbols_free_list.append(gpa, atom.sym_index) catch {}; _ = wasm.decls.remove(decl_index); wasm.symbols.items[atom.sym_index].tag = .dead; @@ -2076,16 +2077,6 @@ pub fn freeDecl(wasm: *Wasm, decl_index: InternPool.DeclIndex) void { // dwarf.freeDecl(decl_index); // } - if (atom.next) |next_atom_index| { - const next_atom = wasm.getAtomPtr(next_atom_index); - next_atom.prev = atom.prev; - atom.next = null; - } - if (atom.prev) |prev_index| { - const prev_atom = wasm.getAtomPtr(prev_index); - prev_atom.next = atom.next; - atom.prev = null; - } } /// Appends a new entry to the indirect function table @@ -2327,8 +2318,6 @@ pub fn appendAtomAtIndex(wasm: *Wasm, index: u32, atom_index: Atom.Index) !void const gpa = wasm.base.comp.gpa; const atom = wasm.getAtomPtr(atom_index); if (wasm.atoms.getPtr(index)) |last_index_ptr| { - const last = wasm.getAtomPtr(last_index_ptr.*); - last.*.next = atom_index; atom.prev = last_index_ptr.*; last_index_ptr.* = atom_index; } else { @@ -2375,6 +2364,11 @@ fn allocateAtoms(wasm: *Wasm) !void { while (it.next()) |entry| { const segment = &wasm.segments.items[entry.key_ptr.*]; var atom_index = entry.value_ptr.*; + if (entry.key_ptr.* == wasm.code_section_index) { + // Code section is allocated upon writing as they are required to be ordered + // to synchronise with the function section. + continue; + } var offset: u32 = 0; while (true) { const atom = wasm.getAtomPtr(atom_index); @@ -2387,28 +2381,17 @@ fn allocateAtoms(wasm: *Wasm) !void { break :sym object.symtable[symbol_loc.index]; } else wasm.symbols.items[symbol_loc.index]; + // Dead symbols must be unlinked from the linked-list to prevent them + // from being emit into the binary. if (sym.isDead()) { - // Dead symbols must be unlinked from the linked-list to prevent them - // from being emit into the binary. - if (atom.next) |next_index| { - const next = wasm.getAtomPtr(next_index); - next.prev = atom.prev; - } else if (entry.value_ptr.* == atom_index) { + if (entry.value_ptr.* == atom_index and atom.prev != null) { // When the atom is dead and is also the first atom retrieved from wasm.atoms(index) we update // the entry to point it to the previous atom to ensure we do not start with a dead symbol that // was removed and therefore do not emit any code at all. - if (atom.prev) |prev| { - entry.value_ptr.* = prev; - } + entry.value_ptr.* = atom.prev.?; } - atom_index = atom.prev orelse { - atom.next = null; - break; - }; - const prev = wasm.getAtomPtr(atom_index); - prev.next = atom.next; + atom_index = atom.prev orelse break; atom.prev = null; - atom.next = null; continue; } offset = @intCast(atom.alignment.forward(offset)); @@ -2546,16 +2529,6 @@ fn setupErrorsLen(wasm: *Wasm) !void { // if not, allcoate a new atom. const atom_index = if (wasm.symbol_atom.get(loc)) |index| blk: { const atom = wasm.getAtomPtr(index); - if (atom.next) |next_atom_index| { - const next_atom = wasm.getAtomPtr(next_atom_index); - next_atom.prev = atom.prev; - atom.next = null; - } - if (atom.prev) |prev_index| { - const prev_atom = wasm.getAtomPtr(prev_index); - prev_atom.next = atom.next; - atom.prev = null; - } atom.deinit(gpa); break :blk index; } else new_atom: { @@ -2658,18 +2631,12 @@ fn createSyntheticFunction( .sym_index = loc.index, .file = null, .alignment = .@"1", - .next = null, .prev = null, .code = function_body.moveToUnmanaged(), .original_offset = 0, }; try wasm.appendAtomAtIndex(wasm.code_section_index.?, atom_index); try wasm.symbol_atom.putNoClobber(gpa, loc, atom_index); - - // `allocateAtoms` has already been called, set the atom's offset manually. - // This is fine to do manually as we insert the atom at the very end. - const prev_atom = wasm.getAtom(atom.prev.?); - atom.offset = prev_atom.offset + prev_atom.size; } /// Unlike `createSyntheticFunction` this function is to be called by @@ -2695,7 +2662,6 @@ pub fn createFunction( .sym_index = loc.index, .file = null, .alignment = .@"1", - .next = null, .prev = null, .code = function_body.moveToUnmanaged(), .relocs = relocations.moveToUnmanaged(), @@ -3452,12 +3418,10 @@ fn resetState(wasm: *Wasm) void { var atom_it = wasm.decls.valueIterator(); while (atom_it.next()) |atom_index| { const atom = wasm.getAtomPtr(atom_index.*); - atom.next = null; atom.prev = null; for (atom.locals.items) |local_atom_index| { const local_atom = wasm.getAtomPtr(local_atom_index); - local_atom.next = null; local_atom.prev = null; } } @@ -4085,46 +4049,29 @@ fn writeToFile( } // Code section - var code_section_size: u32 = 0; - if (wasm.code_section_index) |code_index| { + if (wasm.code_section_index != null) { const header_offset = try reserveVecSectionHeader(&binary_bytes); - var atom_index = wasm.atoms.get(code_index).?; + const start_offset = binary_bytes.items.len - 5; // minus 5 so start offset is 5 to include entry count - // The code section must be sorted in line with the function order. - var sorted_atoms = try std.ArrayList(*const Atom).initCapacity(gpa, wasm.functions.count()); - defer sorted_atoms.deinit(); - - while (true) { + var func_it = wasm.functions.iterator(); + while (func_it.next()) |entry| { + const sym_loc: SymbolLoc = .{ .index = entry.value_ptr.sym_index, .file = entry.key_ptr.file }; + const atom_index = wasm.symbol_atom.get(sym_loc).?; const atom = wasm.getAtomPtr(atom_index); + if (!is_obj) { atom.resolveRelocs(wasm); } - sorted_atoms.appendAssumeCapacity(atom); // found more code atoms than functions - atom_index = atom.prev orelse break; - } - assert(wasm.functions.count() == sorted_atoms.items.len); - - const atom_sort_fn = struct { - fn sort(ctx: *const Wasm, lhs: *const Atom, rhs: *const Atom) bool { - const lhs_sym = lhs.symbolLoc().getSymbol(ctx); - const rhs_sym = rhs.symbolLoc().getSymbol(ctx); - return lhs_sym.index < rhs_sym.index; - } - }.sort; - - mem.sort(*const Atom, sorted_atoms.items, wasm, atom_sort_fn); - - for (sorted_atoms.items) |sorted_atom| { - try leb.writeULEB128(binary_writer, sorted_atom.size); - try binary_writer.writeAll(sorted_atom.code.items); + atom.offset = @intCast(binary_bytes.items.len - start_offset); + try leb.writeULEB128(binary_writer, atom.size); + try binary_writer.writeAll(atom.code.items); } - code_section_size = @as(u32, @intCast(binary_bytes.items.len - header_offset - header_size)); try writeVecSectionHeader( binary_bytes.items, header_offset, .code, - code_section_size, + @intCast(binary_bytes.items.len - header_offset - header_size), @intCast(wasm.functions.count()), ); code_section_index = section_count; diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig index b20e8628ba..cdf22378b7 100644 --- a/src/link/Wasm/Atom.zig +++ b/src/link/Wasm/Atom.zig @@ -26,18 +26,12 @@ offset: u32, /// The original offset within the object file. This value is substracted from /// relocation offsets to determine where in the `data` to rewrite the value original_offset: u32, - /// Represents the index of the file this atom was generated from. /// This is 'null' when the atom was generated by a Decl from Zig code. file: ?u16, - -/// Next atom in relation to this atom. -/// When null, this atom is the last atom -next: ?Atom.Index, /// Previous atom in relation to this atom. /// is null when this atom is the first in its order prev: ?Atom.Index, - /// Contains atoms local to a decl, all managed by this `Atom`. /// When the parent atom is being freed, it will also do so for all local atoms. locals: std.ArrayListUnmanaged(Atom.Index) = .{}, @@ -49,7 +43,6 @@ pub const Index = u32; pub const empty: Atom = .{ .alignment = .@"1", .file = null, - .next = null, .offset = 0, .prev = null, .size = 0, From 3f22bb96f393a81a33772bdeddce5fc660e4f667 Mon Sep 17 00:00:00 2001 From: Luuk de Gram Date: Wed, 10 Jan 2024 06:55:12 +0100 Subject: [PATCH 3/3] wasm-linker: fix debug info relocation This corrects calculating the offsets to the code section as we now correctly allocate the code atoms during write taking the 'size' into account. We also handle dead symbols which are garbage-collected by writing -2 and -1 to skip ranges, loc and other sections respectively. --- src/link/Wasm/Atom.zig | 40 ++++++++++++++++++++++++++++++---------- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/src/link/Wasm/Atom.zig b/src/link/Wasm/Atom.zig index cdf22378b7..c8d115b872 100644 --- a/src/link/Wasm/Atom.zig +++ b/src/link/Wasm/Atom.zig @@ -111,7 +111,7 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { .R_WASM_GLOBAL_INDEX_I32, .R_WASM_MEMORY_ADDR_I32, .R_WASM_SECTION_OFFSET_I32, - => std.mem.writeInt(u32, atom.code.items[reloc.offset - atom.original_offset ..][0..4], @as(u32, @intCast(value)), .little), + => std.mem.writeInt(u32, atom.code.items[reloc.offset - atom.original_offset ..][0..4], @as(u32, @truncate(value)), .little), .R_WASM_TABLE_INDEX_I64, .R_WASM_MEMORY_ADDR_I64, => std.mem.writeInt(u64, atom.code.items[reloc.offset - atom.original_offset ..][0..8], value, .little), @@ -124,7 +124,7 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { .R_WASM_TABLE_NUMBER_LEB, .R_WASM_TYPE_INDEX_LEB, .R_WASM_MEMORY_ADDR_TLS_SLEB, - => leb.writeUnsignedFixed(5, atom.code.items[reloc.offset - atom.original_offset ..][0..5], @as(u32, @intCast(value))), + => leb.writeUnsignedFixed(5, atom.code.items[reloc.offset - atom.original_offset ..][0..5], @as(u32, @truncate(value))), .R_WASM_MEMORY_ADDR_LEB64, .R_WASM_MEMORY_ADDR_SLEB64, .R_WASM_TABLE_INDEX_SLEB64, @@ -140,6 +140,13 @@ pub fn resolveRelocs(atom: *Atom, wasm_bin: *const Wasm) void { fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wasm) u64 { const target_loc = (Wasm.SymbolLoc{ .file = atom.file, .index = relocation.index }).finalLoc(wasm_bin); const symbol = target_loc.getSymbol(wasm_bin); + if (relocation.relocation_type != .R_WASM_TYPE_INDEX_LEB and + symbol.tag != .section and + symbol.isDead()) + { + const val = atom.thombstone(wasm_bin) orelse relocation.addend; + return @bitCast(val); + } switch (relocation.relocation_type) { .R_WASM_FUNCTION_INDEX_LEB => return symbol.index, .R_WASM_TABLE_NUMBER_LEB => return symbol.index, @@ -170,30 +177,43 @@ fn relocationValue(atom: Atom, relocation: types.Relocation, wasm_bin: *const Wa if (symbol.isUndefined()) { return 0; } - const va = @as(i64, @intCast(symbol.virtual_address)); + const va: i33 = @intCast(symbol.virtual_address); return @intCast(va + relocation.addend); }, .R_WASM_EVENT_INDEX_LEB => return symbol.index, .R_WASM_SECTION_OFFSET_I32 => { const target_atom_index = wasm_bin.symbol_atom.get(target_loc).?; const target_atom = wasm_bin.getAtom(target_atom_index); - const rel_value: i32 = @intCast(target_atom.offset); + const rel_value: i33 = @intCast(target_atom.offset); return @intCast(rel_value + relocation.addend); }, .R_WASM_FUNCTION_OFFSET_I32 => { - const target_atom_index = wasm_bin.symbol_atom.get(target_loc) orelse { - return @as(u32, @bitCast(@as(i32, -1))); - }; + if (symbol.isUndefined()) { + const val = atom.thombstone(wasm_bin) orelse relocation.addend; + return @bitCast(val); + } + const target_atom_index = wasm_bin.symbol_atom.get(target_loc).?; const target_atom = wasm_bin.getAtom(target_atom_index); - const offset: u32 = 11 + Wasm.getULEB128Size(target_atom.size); // Header (11 bytes fixed-size) + body size (leb-encoded) - const rel_value: i32 = @intCast(target_atom.offset + offset); + const rel_value: i33 = @intCast(target_atom.offset); return @intCast(rel_value + relocation.addend); }, .R_WASM_MEMORY_ADDR_TLS_SLEB, .R_WASM_MEMORY_ADDR_TLS_SLEB64, => { - const va: i32 = @intCast(symbol.virtual_address); + const va: i33 = @intCast(symbol.virtual_address); return @intCast(va + relocation.addend); }, } } + +// For a given `Atom` returns whether it has a thombstone value or not. +/// This defines whether we want a specific value when a section is dead. +fn thombstone(atom: Atom, wasm: *const Wasm) ?i64 { + const atom_name = atom.symbolLoc().getName(wasm); + if (std.mem.eql(u8, atom_name, ".debug_ranges") or std.mem.eql(u8, atom_name, ".debug_loc")) { + return -2; + } else if (std.mem.startsWith(u8, atom_name, ".debug_")) { + return -1; + } + return null; +}