From 8a3ad3f6204747b5621e14cae0564ee7929a7cd8 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Tue, 18 Apr 2023 14:04:42 +0200 Subject: [PATCH] elf: do not reserve a GOT slot for every Atom --- src/arch/aarch64/CodeGen.zig | 1 + src/arch/arm/CodeGen.zig | 1 + src/arch/riscv64/CodeGen.zig | 1 + src/arch/sparc64/CodeGen.zig | 1 + src/arch/x86_64/CodeGen.zig | 12 ++- src/codegen.zig | 1 + src/link/Elf.zig | 150 ++++++++++++++++++++++++----------- src/link/Elf/Atom.zig | 18 +++-- 8 files changed, 132 insertions(+), 53 deletions(-) diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 1acc11d7e8..a2db3459dc 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -4290,6 +4290,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier if (self.bin_file.cast(link.File.Elf)) |elf_file| { const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); const atom = elf_file.getAtom(atom_index); + _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = @intCast(u32, atom.getOffsetTableAddress(elf_file)); try self.genSetReg(Type.initTag(.usize), .x30, .{ .memory = got_addr }); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 661e713b1c..156ad380b8 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -4270,6 +4270,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier if (self.bin_file.cast(link.File.Elf)) |elf_file| { const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); const atom = elf_file.getAtom(atom_index); + _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = @intCast(u32, atom.getOffsetTableAddress(elf_file)); try self.genSetReg(Type.initTag(.usize), .lr, .{ .memory = got_addr }); } else if (self.bin_file.cast(link.File.MachO)) |_| { diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index f0ab8b3317..e7dce48dbf 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -1734,6 +1734,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const func = func_payload.data; const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); const atom = elf_file.getAtom(atom_index); + _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = @intCast(u32, atom.getOffsetTableAddress(elf_file)); try self.genSetReg(Type.initTag(.usize), .ra, .{ .memory = got_addr }); _ = try self.addInst(.{ diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index 2bcc1e1c4e..beb2ce2fd2 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -1254,6 +1254,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const got_addr = if (self.bin_file.cast(link.File.Elf)) |elf_file| blk: { const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); const atom = elf_file.getAtom(atom_index); + _ = try atom.getOrCreateOffsetTableEntry(elf_file); break :blk @intCast(u32, atom.getOffsetTableAddress(elf_file)); } else unreachable; diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 1c72e2296b..e4a32cd5bf 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -5624,7 +5624,9 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier if (self.bin_file.cast(link.File.Elf)) |elf_file| { const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); - const got_addr = elf_file.getAtom(atom_index).getOffsetTableAddress(elf_file); + const atom = elf_file.getAtom(atom_index); + _ = try atom.getOrCreateOffsetTableEntry(elf_file); + const got_addr = atom.getOffsetTableAddress(elf_file); try self.asmMemory(.call, Memory.sib(.qword, .{ .base = .ds, .disp = @intCast(i32, got_addr), @@ -5853,7 +5855,9 @@ fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void { .{ .kind = .const_data, .ty = Type.anyerror }, 4, // dword alignment ); - const got_addr = elf_file.getAtom(atom_index).getOffsetTableAddress(elf_file); + const atom = elf_file.getAtom(atom_index); + _ = try atom.getOrCreateOffsetTableEntry(elf_file); + const got_addr = atom.getOffsetTableAddress(elf_file); try self.asmRegisterMemory(.mov, addr_reg.to64(), Memory.sib(.qword, .{ .base = .ds, .disp = @intCast(i32, got_addr), @@ -8230,7 +8234,9 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { .{ .kind = .const_data, .ty = Type.anyerror }, 4, // dword alignment ); - const got_addr = elf_file.getAtom(atom_index).getOffsetTableAddress(elf_file); + const atom = elf_file.getAtom(atom_index); + _ = try atom.getOrCreateOffsetTableEntry(elf_file); + const got_addr = atom.getOffsetTableAddress(elf_file); try self.asmRegisterMemory(.mov, addr_reg.to64(), Memory.sib(.qword, .{ .base = .ds, .disp = @intCast(i32, got_addr), diff --git a/src/codegen.zig b/src/codegen.zig index 6d6238ceda..dbcd76118a 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -1006,6 +1006,7 @@ fn genDeclRef( if (bin_file.cast(link.File.Elf)) |elf_file| { const atom_index = try elf_file.getOrCreateAtomForDecl(decl_index); const atom = elf_file.getAtom(atom_index); + _ = try atom.getOrCreateOffsetTableEntry(elf_file); return GenResult.mcv(.{ .memory = atom.getOffsetTableAddress(elf_file) }); } else if (bin_file.cast(link.File.MachO)) |macho_file| { const atom_index = try macho_file.getOrCreateAtomForDecl(decl_index); diff --git a/src/link/Elf.zig b/src/link/Elf.zig index b25a6f8f8a..e0bea28bd1 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -63,6 +63,88 @@ const Section = struct { free_list: std.ArrayListUnmanaged(Atom.Index) = .{}, }; +const SectionTable = struct { + entries: std.ArrayListUnmanaged(SymIndex) = .{}, + free_list: std.ArrayListUnmanaged(Index) = .{}, + lookup: std.AutoHashMapUnmanaged(SymIndex, Index) = .{}, + + const SymIndex = u32; + const Index = u32; + + pub fn deinit(st: *ST, allocator: Allocator) void { + st.entries.deinit(allocator); + st.free_list.deinit(allocator); + st.lookup.deinit(allocator); + } + + pub fn allocateEntry(st: *ST, allocator: Allocator, target: SymIndex) !Index { + try st.entries.ensureUnusedCapacity(allocator, 1); + const index = blk: { + if (st.free_list.popOrNull()) |index| { + log.debug(" (reusing entry index {d})", .{index}); + break :blk index; + } else { + log.debug(" (allocating entry at index {d})", .{st.entries.items.len}); + const index = @intCast(u32, st.entries.items.len); + _ = st.entries.addOneAssumeCapacity(); + break :blk index; + } + }; + st.entries.items[index] = target; + try st.lookup.putNoClobber(allocator, target, index); + return index; + } + + pub fn freeEntry(st: *ST, allocator: Allocator, target: SymIndex) void { + const index = st.lookup.get(target) orelse return; + st.free_list.append(allocator, index) catch {}; + st.entries.items[index] = 0; + _ = st.lookup.remove(target); + } + + const FormatContext = struct { + ctx: *Elf, + st: *const ST, + }; + + fn fmt( + ctx: FormatContext, + comptime unused_format_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) @TypeOf(writer).Error!void { + _ = options; + comptime assert(unused_format_string.len == 0); + + const base_addr = ctx.ctx.program_headers.items[ctx.ctx.phdr_got_index.?].p_vaddr; + const target = ctx.ctx.base.options.target; + const ptr_bits = target.cpu.arch.ptrBitWidth(); + const ptr_bytes: u64 = @divExact(ptr_bits, 8); + + try writer.writeAll("SectionTable:\n"); + for (ctx.st.entries.items, 0..) |entry, i| { + try writer.print(" {d}@{x} => local(%{d})\n", .{ i, base_addr + i * ptr_bytes, entry }); + } + } + + fn format(st: ST, comptime unused_format_string: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = st; + _ = unused_format_string; + _ = options; + _ = writer; + @compileError("do not format SectionTable directly; use st.fmtDebug()"); + } + + pub fn fmtDebug(st: ST, ctx: *Elf) std.fmt.Formatter(fmt) { + return .{ .data = .{ + .ctx = ctx, + .st = st, + } }; + } + + const ST = @This(); +}; + const LazySymbolMetadata = struct { text_atom: ?Atom.Index = null, rodata_atom: ?Atom.Index = null, @@ -148,17 +230,13 @@ global_symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{}, local_symbol_free_list: std.ArrayListUnmanaged(u32) = .{}, global_symbol_free_list: std.ArrayListUnmanaged(u32) = .{}, -offset_table_free_list: std.ArrayListUnmanaged(u32) = .{}, -/// Same order as in the file. The value is the absolute vaddr value. -/// If the vaddr of the executable program header changes, the entire -/// offset table needs to be rewritten. -offset_table: std.ArrayListUnmanaged(u64) = .{}, +got_table: SectionTable = .{}, phdr_table_dirty: bool = false, shdr_table_dirty: bool = false, shstrtab_dirty: bool = false, -offset_table_count_dirty: bool = false, +got_table_count_dirty: bool = false, debug_strtab_dirty: bool = false, debug_abbrev_section_dirty: bool = false, @@ -329,8 +407,7 @@ pub fn deinit(self: *Elf) void { self.global_symbols.deinit(gpa); self.global_symbol_free_list.deinit(gpa); self.local_symbol_free_list.deinit(gpa); - self.offset_table_free_list.deinit(gpa); - self.offset_table.deinit(gpa); + self.got_table.deinit(gpa); { var it = self.decls.iterator(); @@ -1289,6 +1366,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node assert(!self.shdr_table_dirty); assert(!self.shstrtab_dirty); assert(!self.debug_strtab_dirty); + assert(!self.got_table_count_dirty); } fn linkWithLLD(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node) !void { @@ -2168,7 +2246,7 @@ fn freeAtom(self: *Elf, atom_index: Atom.Index) void { _ = self.atom_by_index_table.remove(local_sym_index); self.getAtomPtr(atom_index).local_sym_index = 0; - self.offset_table_free_list.append(self.base.allocator, atom.offset_table_index) catch {}; + self.got_table.freeEntry(gpa, local_sym_index); } fn shrinkAtom(self: *Elf, atom_index: Atom.Index, new_block_size: u64) void { @@ -2191,11 +2269,9 @@ pub fn createAtom(self: *Elf) !Atom.Index { const atom_index = @intCast(Atom.Index, self.atoms.items.len); const atom = try self.atoms.addOne(gpa); const local_sym_index = try self.allocateLocalSymbol(); - const offset_table_index = try self.allocateGotOffset(); try self.atom_by_index_table.putNoClobber(gpa, local_sym_index, atom_index); atom.* = .{ .local_sym_index = local_sym_index, - .offset_table_index = offset_table_index, .prev_index = null, .next_index = null, }; @@ -2352,26 +2428,6 @@ pub fn allocateLocalSymbol(self: *Elf) !u32 { return index; } -pub fn allocateGotOffset(self: *Elf) !u32 { - try self.offset_table.ensureUnusedCapacity(self.base.allocator, 1); - - const index = blk: { - if (self.offset_table_free_list.popOrNull()) |index| { - log.debug(" (reusing GOT offset at index {d})", .{index}); - break :blk index; - } else { - log.debug(" (allocating GOT offset at index {d})", .{self.offset_table.items.len}); - const index = @intCast(u32, self.offset_table.items.len); - _ = self.offset_table.addOneAssumeCapacity(); - self.offset_table_count_dirty = true; - break :blk index; - } - }; - - self.offset_table.items[index] = 0; - return index; -} - fn freeUnnamedConsts(self: *Elf, decl_index: Module.Decl.Index) void { const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { @@ -2465,6 +2521,7 @@ fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, s const decl_metadata = self.decls.get(decl_index).?; const atom_index = decl_metadata.atom; const atom = self.getAtom(atom_index); + const local_sym_index = atom.getSymbolIndex().?; const shdr_index = decl_metadata.shdr; if (atom.getSymbol(self).st_size != 0 and self.base.child_pid == null) { @@ -2485,8 +2542,9 @@ fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, s local_sym.st_value = vaddr; log.debug(" (writing new offset table entry)", .{}); - self.offset_table.items[atom.offset_table_index] = vaddr; - try self.writeOffsetTableEntry(atom.offset_table_index); + const got_entry_index = self.got_table.lookup.get(local_sym_index).?; + self.got_table.entries.items[got_entry_index] = local_sym_index; + try self.writeOffsetTableEntry(got_entry_index); } } else if (code.len < local_sym.st_size) { self.shrinkAtom(atom_index, code.len); @@ -2494,7 +2552,7 @@ fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, s local_sym.st_size = code.len; // TODO this write could be avoided if no fields of the symbol were changed. - try self.writeSymbol(atom.getSymbolIndex().?); + try self.writeSymbol(local_sym_index); } else { const local_sym = atom.getSymbolPtr(self); local_sym.* = .{ @@ -2509,12 +2567,12 @@ fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, s errdefer self.freeAtom(atom_index); log.debug("allocated text block for {s} at 0x{x}", .{ decl_name, vaddr }); - self.offset_table.items[atom.offset_table_index] = vaddr; local_sym.st_value = vaddr; local_sym.st_size = code.len; - try self.writeSymbol(atom.getSymbolIndex().?); - try self.writeOffsetTableEntry(atom.offset_table_index); + try self.writeSymbol(local_sym_index); + const got_entry_index = try atom.getOrCreateOffsetTableEntry(self); + try self.writeOffsetTableEntry(got_entry_index); } const local_sym = atom.getSymbolPtr(self); @@ -2755,12 +2813,12 @@ fn updateLazySymbolAtom( errdefer self.freeAtom(atom_index); log.debug("allocated text block for {s} at 0x{x}", .{ name, vaddr }); - self.offset_table.items[atom.offset_table_index] = vaddr; local_sym.st_value = vaddr; local_sym.st_size = code.len; try self.writeSymbol(local_sym_index); - try self.writeOffsetTableEntry(atom.offset_table_index); + const got_entry_index = try atom.getOrCreateOffsetTableEntry(self); + try self.writeOffsetTableEntry(got_entry_index); const section_offset = vaddr - self.program_headers.items[phdr_index].p_vaddr; const file_offset = self.sections.items(.shdr)[shdr_index].sh_offset + section_offset; @@ -2991,30 +3049,32 @@ fn writeSectHeader(self: *Elf, index: usize) !void { fn writeOffsetTableEntry(self: *Elf, index: usize) !void { const entry_size: u16 = self.archPtrWidthBytes(); - if (self.offset_table_count_dirty) { - const needed_size = self.offset_table.items.len * entry_size; + if (self.got_table_count_dirty) { + const needed_size = self.got_table.entries.items.len * entry_size; try self.growAllocSection(self.got_section_index.?, needed_size); - self.offset_table_count_dirty = false; + self.got_table_count_dirty = false; } const endian = self.base.options.target.cpu.arch.endian(); const shdr = &self.sections.items(.shdr)[self.got_section_index.?]; const off = shdr.sh_offset + @as(u64, entry_size) * index; const phdr = &self.program_headers.items[self.phdr_got_index.?]; const vaddr = phdr.p_vaddr + @as(u64, entry_size) * index; + const got_entry = self.got_table.entries.items[index]; + const got_value = self.getSymbol(got_entry).st_value; switch (entry_size) { 2 => { var buf: [2]u8 = undefined; - mem.writeInt(u16, &buf, @intCast(u16, self.offset_table.items[index]), endian); + mem.writeInt(u16, &buf, @intCast(u16, got_value), endian); try self.base.file.?.pwriteAll(&buf, off); }, 4 => { var buf: [4]u8 = undefined; - mem.writeInt(u32, &buf, @intCast(u32, self.offset_table.items[index]), endian); + mem.writeInt(u32, &buf, @intCast(u32, got_value), endian); try self.base.file.?.pwriteAll(&buf, off); }, 8 => { var buf: [8]u8 = undefined; - mem.writeInt(u64, &buf, self.offset_table.items[index], endian); + mem.writeInt(u64, &buf, got_value, endian); try self.base.file.?.pwriteAll(&buf, off); if (self.base.child_pid) |pid| { diff --git a/src/link/Elf/Atom.zig b/src/link/Elf/Atom.zig index 4ab304ef71..70be5abbca 100644 --- a/src/link/Elf/Atom.zig +++ b/src/link/Elf/Atom.zig @@ -14,9 +14,6 @@ const Elf = @import("../Elf.zig"); /// offset table entry. local_sym_index: u32, -/// This field is undefined for symbols with size = 0. -offset_table_index: u32, - /// Points to the previous and next neighbors, based on the `text_offset`. /// This can be used to find, for example, the capacity of this `TextBlock`. prev_index: ?Index, @@ -48,13 +45,24 @@ pub fn getName(self: Atom, elf_file: *const Elf) []const u8 { return elf_file.getSymbolName(self.getSymbolIndex().?); } +/// If entry already exists, returns index to it. +/// Otherwise, creates a new entry in the Global Offset Table for this Atom. +pub fn getOrCreateOffsetTableEntry(self: Atom, elf_file: *Elf) !u32 { + const sym_index = self.getSymbolIndex().?; + if (elf_file.got_table.lookup.get(sym_index)) |index| return index; + const index = try elf_file.got_table.allocateEntry(elf_file.base.allocator, sym_index); + elf_file.got_table_count_dirty = true; + return index; +} + pub fn getOffsetTableAddress(self: Atom, elf_file: *Elf) u64 { - assert(self.getSymbolIndex() != null); + const sym_index = self.getSymbolIndex().?; + const got_entry_index = elf_file.got_table.lookup.get(sym_index).?; const target = elf_file.base.options.target; const ptr_bits = target.cpu.arch.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); const got = elf_file.program_headers.items[elf_file.phdr_got_index.?]; - return got.p_vaddr + self.offset_table_index * ptr_bytes; + return got.p_vaddr + got_entry_index * ptr_bytes; } /// Returns how much room there is to grow in virtual address space.