teach std.debug to convert addresses to ELF symbols

When reading `std.debug.Dwarf ` fails, `std.debug.SelfInfo` will now try to load
function names from `symtab`.

Written under contract from TigerBeetle. © 2024 TigerBeetle
This commit is contained in:
geemili 2024-11-20 10:07:55 -07:00
parent f4e042a4c3
commit 3a34582cbc
3 changed files with 351 additions and 40 deletions

View File

@ -16,6 +16,7 @@ const native_endian = native_arch.endian();
pub const MemoryAccessor = @import("debug/MemoryAccessor.zig");
pub const FixedBufferReader = @import("debug/FixedBufferReader.zig");
pub const Dwarf = @import("debug/Dwarf.zig");
pub const ElfSymTab = @import("debug/ElfSymTab.zig");
pub const Pdb = @import("debug/Pdb.zig");
pub const SelfInfo = @import("debug/SelfInfo.zig");
pub const Info = @import("debug/Info.zig");
@ -945,7 +946,7 @@ fn printUnwindError(debug_info: *SelfInfo, out_stream: anytype, address: usize,
pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void {
const module = debug_info.getModuleForAddress(address) catch |err| switch (err) {
error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config),
error.MissingDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config),
else => return err,
};

254
lib/std/debug/ElfSymTab.zig Normal file
View File

@ -0,0 +1,254 @@
//! Similar to std.debug.Dwarf, but only using symbol info from an ELF file.
const ElfSymTab = @This();
endian: std.builtin.Endian,
base_address: usize,
mapped_memory: []align(std.mem.page_size) const u8,
sections: SectionArray,
/// Populated by `scanAllSymbols`.
symbol_list: std.ArrayListUnmanaged(Symbol) = .empty,
pub const Symbol = struct {
name: []const u8,
start: u64,
end: u64,
};
pub const OpenError = ScanError;
/// Initialize DWARF info. The caller has the responsibility to initialize most
/// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the
/// main binary file (not the secondary debug info file).
pub fn open(d: *ElfSymTab, gpa: Allocator) OpenError!void {
try d.scanAllSymbols(gpa);
}
pub const ScanError = error{
InvalidDebugInfo,
MissingDebugInfo,
} || Allocator.Error || std.debug.FixedBufferReader.Error;
fn scanAllSymbols(ei: *ElfSymTab, allocator: Allocator) OpenError!void {
const symtab: Section = ei.sections[@intFromEnum(Section.Id.symtab)].?;
const strtab: Section = ei.sections[@intFromEnum(Section.Id.strtab)].?;
const num_symbols = symtab.data.len / symtab.entry_size;
const symbols = @as([*]const elf.Sym, @ptrCast(@alignCast(symtab.data.ptr)))[0..num_symbols];
for (symbols) |symbol| {
if (symbol.st_name == 0) continue;
if (symbol.st_shndx == elf.SHN_UNDEF) continue;
const symbol_name = getStringFromTable(strtab.data, symbol.st_name) orelse {
// If it doesn't have a symbol name, we can't really use it for debugging purposes
continue;
};
// TODO: Does SHN_ABS make a difference for this use case?
// if (symbol.st_shndx == elf.SHN_ABS) {
// continue;
// }
// TODO: handle relocatable symbols in DYN type binaries
try ei.symbol_list.append(allocator, .{
.name = symbol_name,
.start = symbol.st_value,
.end = symbol.st_value + symbol.st_size,
});
}
}
pub const LoadError = error{
InvalidDebugInfo,
MissingDebugInfo,
InvalidElfMagic,
InvalidElfVersion,
InvalidElfEndian,
/// TODO: implement this and then remove this error code
UnimplementedElfForeignEndian,
/// TODO: implement this and then remove this error code
UnimplementedElfType,
/// The debug info may be valid but this implementation uses memory
/// mapping which limits things to usize. If the target debug info is
/// 64-bit and host is 32-bit, there may be debug info that is not
/// supportable using this method.
Overflow,
PermissionDenied,
LockedMemoryLimitExceeded,
MemoryMappingNotSupported,
} || Allocator.Error || std.fs.File.OpenError || OpenError;
/// Reads symbol info from an already mapped ELF file.
pub fn load(
gpa: Allocator,
mapped_mem: []align(std.mem.page_size) const u8,
expected_crc: ?u32,
) LoadError!ElfSymTab {
if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo;
const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]);
if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic;
if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion;
const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) {
elf.ELFDATA2LSB => .little,
elf.ELFDATA2MSB => .big,
else => return error.InvalidElfEndian,
};
if (endian != native_endian) return error.UnimplementedElfForeignEndian;
if (hdr.e_type != .EXEC) return error.UnimplementedElfType;
const shoff = hdr.e_shoff;
const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx);
const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[cast(usize, str_section_off) orelse return error.Overflow]));
const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size];
const shdrs = @as(
[*]const elf.Shdr,
@ptrCast(@alignCast(&mapped_mem[shoff])),
)[0..hdr.e_shnum];
var sections: ElfSymTab.SectionArray = ElfSymTab.null_section_array;
for (shdrs) |*shdr| {
if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue;
const name = mem.sliceTo(header_strings[shdr.sh_name..], 0);
var section_index: ?usize = null;
inline for (@typeInfo(ElfSymTab.Section.Id).@"enum".fields, 0..) |sect, i| {
if (mem.eql(u8, "." ++ sect.name, name)) section_index = i;
}
if (section_index == null) continue;
if (sections[section_index.?] != null) continue;
const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size);
sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: {
var section_stream = std.io.fixedBufferStream(section_bytes);
const section_reader = section_stream.reader();
const chdr = section_reader.readStruct(elf.Chdr) catch continue;
if (chdr.ch_type != .ZLIB) continue;
var zlib_stream = std.compress.zlib.decompressor(section_reader);
const decompressed_section = try gpa.alloc(u8, chdr.ch_size);
errdefer gpa.free(decompressed_section);
const read = zlib_stream.reader().readAll(decompressed_section) catch continue;
assert(read == decompressed_section.len);
break :blk .{
.entry_size = shdr.sh_entsize,
.data = decompressed_section,
.virtual_address = shdr.sh_addr,
.owned = true,
};
} else .{
.entry_size = shdr.sh_entsize,
.data = section_bytes,
.virtual_address = shdr.sh_addr,
.owned = false,
};
}
const missing_debug_info =
sections[@intFromEnum(ElfSymTab.Section.Id.strtab)] == null or
sections[@intFromEnum(ElfSymTab.Section.Id.symtab)] == null;
if (missing_debug_info) {
return error.MissingDebugInfo;
}
var ei: ElfSymTab = .{
.base_address = 0,
.endian = endian,
.sections = sections,
.mapped_memory = mapped_mem,
};
try ElfSymTab.open(&ei, gpa);
return ei;
}
pub fn deinit(self: *ElfSymTab, allocator: std.mem.Allocator) void {
for (self.sections) |section_opt| {
const s = section_opt orelse continue;
allocator.free(s.data);
}
self.symbol_list.deinit(allocator);
}
const num_sections = std.enums.directEnumArrayLen(Section.Id, 0);
pub const SectionArray = [num_sections]?Section;
pub const null_section_array = [_]?Section{null} ** num_sections;
pub const Section = struct {
entry_size: usize,
data: []const u8,
// Module-relative virtual address.
// Only set if the section data was loaded from disk.
virtual_address: ?usize = null,
// If `data` is owned by this Dwarf.
owned: bool,
pub const Id = enum {
strtab,
symtab,
};
// For sections that are not memory mapped by the loader, this is an offset
// from `data.ptr` to where the section would have been mapped. Otherwise,
// `data` is directly backed by the section and the offset is zero.
pub fn virtualOffset(self: Section, base_address: usize) i64 {
return if (self.virtual_address) |va|
@as(i64, @intCast(base_address + va)) -
@as(i64, @intCast(@intFromPtr(self.data.ptr)))
else
0;
}
};
pub fn section(ei: ElfSymTab, elf_section: Section.Id) ?[]const u8 {
return if (ei.sections[@intFromEnum(elf_section)]) |s| s.data else null;
}
pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol {
_ = allocator;
// Translate the VA into an address into this object
const relocated_address = address - self.base_address;
for (self.symbol_list.items) |symbol| {
if (symbol.start <= relocated_address and relocated_address <= symbol.end) {
return .{
.name = symbol.name,
};
}
}
return .{};
}
fn getStringFromTable(string_table: []const u8, pos: usize) ?[]const u8 {
if (pos == 0) return null;
const section_name_end = std.mem.indexOfScalarPos(u8, string_table, pos, '\x00') orelse return null;
return string_table[pos..section_name_end];
}
pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 {
const start = cast(usize, offset) orelse return error.Overflow;
const end = start + (cast(usize, size) orelse return error.Overflow);
return ptr[start..end];
}
const builtin = @import("builtin");
const native_endian = builtin.cpu.arch.endian();
const std = @import("../std.zig");
const Allocator = std.mem.Allocator;
const elf = std.elf;
const mem = std.mem;
const assert = std.debug.assert;
const cast = std.math.cast;
const maxInt = std.math.maxInt;
const MemoryAccessor = std.debug.MemoryAccessor;
const FixedBufferReader = std.debug.FixedBufferReader;

View File

@ -17,6 +17,7 @@ const pdb = std.pdb;
const assert = std.debug.assert;
const posix = std.posix;
const elf = std.elf;
const ElfSymTab = std.debug.ElfSymTab;
const Dwarf = std.debug.Dwarf;
const Pdb = std.debug.Pdb;
const File = std.fs.File;
@ -462,8 +463,8 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module {
return obj_di;
}
const obj_di = try self.allocator.create(Module);
errdefer self.allocator.destroy(obj_di);
const obj_ei = try self.allocator.create(Module);
errdefer self.allocator.destroy(obj_ei);
var sections: Dwarf.SectionArray = Dwarf.null_section_array;
if (ctx.gnu_eh_frame) |eh_frame_hdr| {
@ -477,15 +478,23 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module {
};
}
obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, &sections, null);
obj_di.base_address = ctx.base_address;
obj_ei.* = try Elf.readDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, &sections, null);
// Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding
obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {};
switch (obj_ei.*) {
.dwarf => |*dwarf_info| {
dwarf_info.base_address = ctx.base_address;
try self.address_map.putNoClobber(ctx.base_address, obj_di);
// Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding
dwarf_info.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {};
},
.symtab => |*symtab| {
symtab.base_address = ctx.base_address;
},
}
return obj_di;
try self.address_map.putNoClobber(ctx.base_address, obj_ei);
return obj_ei;
}
fn lookupModuleHaiku(self: *SelfInfo, address: usize) !*Module {
@ -794,7 +803,7 @@ pub const Module = switch (native_os) {
};
}
},
.linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Dwarf.ElfModule,
.linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Elf,
.wasi, .emscripten => struct {
pub fn deinit(self: *@This(), allocator: Allocator) void {
_ = self;
@ -1036,38 +1045,85 @@ fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module {
}
}
/// Reads debug info from an ELF file, or the current binary if none in specified.
/// If the required sections aren't present but a reference to external debug info is,
/// then this this function will recurse to attempt to load the debug sections from
/// an external file.
pub fn readElfDebugInfo(
allocator: Allocator,
elf_filename: ?[]const u8,
build_id: ?[]const u8,
expected_crc: ?u32,
parent_sections: *Dwarf.SectionArray,
parent_mapped_mem: ?[]align(mem.page_size) const u8,
) !Dwarf.ElfModule {
nosuspend {
const elf_file = (if (elf_filename) |filename| blk: {
break :blk fs.cwd().openFile(filename, .{});
} else fs.openSelfExe(.{})) catch |err| switch (err) {
error.FileNotFound => return error.MissingDebugInfo,
else => return err,
};
pub const Elf = union(enum) {
dwarf: Dwarf.ElfModule,
symtab: ElfSymTab,
const mapped_mem = try mapWholeFile(elf_file);
return Dwarf.ElfModule.load(
allocator,
mapped_mem,
build_id,
expected_crc,
parent_sections,
parent_mapped_mem,
elf_filename,
);
/// Reads debug info from an ELF file, or the current binary if none in specified.
/// If the required sections aren't present but a reference to external debug info is,
/// then this this function will recurse to attempt to load the debug sections from
/// an external file.
pub fn readDebugInfo(
allocator: Allocator,
elf_filename: ?[]const u8,
build_id: ?[]const u8,
expected_crc: ?u32,
parent_sections: *Dwarf.SectionArray,
parent_mapped_mem: ?[]align(mem.page_size) const u8,
) !Elf {
nosuspend {
const elf_file = (if (elf_filename) |filename| blk: {
break :blk fs.cwd().openFile(filename, .{});
} else fs.openSelfExe(.{})) catch |err| switch (err) {
error.FileNotFound => return error.MissingDebugInfo,
else => return err,
};
const mapped_mem = try mapWholeFile(elf_file);
load_dwarf: {
const dwarf_info = Dwarf.ElfModule.load(
allocator,
mapped_mem,
build_id,
expected_crc,
parent_sections,
parent_mapped_mem,
elf_filename,
) catch {
break :load_dwarf;
};
return Elf{ .dwarf = dwarf_info };
}
load_symtab: {
const symtab = ElfSymTab.load(
allocator,
mapped_mem,
expected_crc,
) catch {
break :load_symtab;
};
return Elf{ .symtab = symtab };
}
return error.MissingDebugInfo;
}
}
}
pub fn deinit(this: *@This(), allocator: Allocator) void {
return switch (this.*) {
.dwarf => |*dwarf_info| dwarf_info.deinit(allocator),
.symtab => |*symtab| symtab.deinit(allocator),
};
}
pub fn getDwarfInfoForAddress(this: *@This(), allocator: Allocator, address: usize) !?*const Dwarf {
_ = allocator;
_ = address;
return switch (this.*) {
.dwarf => |dwarf_info| &dwarf_info.dwarf,
.symtab => null,
};
}
pub fn getSymbolAtAddress(this: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol {
return switch (this.*) {
.dwarf => |*dwarf_info| dwarf_info.getSymbolAtAddress(allocator, address),
.symtab => |*symtab| symtab.getSymbolAtAddress(allocator, address),
};
}
};
const MachoSymbol = struct {
strx: u32,