diff options
| -rw-r--r-- | lib/std/debug.zig | 1 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf.zig | 1 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/ElfModule.zig | 376 | ||||
| -rw-r--r-- | lib/std/debug/ElfFile.zig | 536 | ||||
| -rw-r--r-- | lib/std/debug/Info.zig | 33 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo.zig | 1 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo/ElfModule.zig | 100 |
7 files changed, 627 insertions, 421 deletions
diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 2b6028ca82..b7d877bfaf 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -18,6 +18,7 @@ const root = @import("root"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); +pub const ElfFile = @import("debug/ElfFile.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); pub const Info = @import("debug/Info.zig"); pub const Coverage = @import("debug/Coverage.zig"); diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 8b8dc7d732..ebfb15e6e2 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -30,7 +30,6 @@ pub const expression = @import("Dwarf/expression.zig"); pub const abi = @import("Dwarf/abi.zig"); pub const call_frame = @import("Dwarf/call_frame.zig"); pub const Unwind = @import("Dwarf/Unwind.zig"); -pub const ElfModule = @import("Dwarf/ElfModule.zig"); /// Useful to temporarily enable while working on this file. const debug_debug_mode = false; diff --git a/lib/std/debug/Dwarf/ElfModule.zig b/lib/std/debug/Dwarf/ElfModule.zig deleted file mode 100644 index 4d425b1718..0000000000 --- a/lib/std/debug/Dwarf/ElfModule.zig +++ /dev/null @@ -1,376 +0,0 @@ -//! A thin wrapper around `Dwarf` which handles loading debug information from an ELF file. Load the -//! info with `load`, then directly access the `dwarf` field before finally `deinit`ing. - -dwarf: Dwarf, - -/// If we encounter a `.eh_frame` section while loading the ELF module, it is stored here and may be -/// used with `Dwarf.Unwind` for call stack unwinding. -eh_frame: ?UnwindSection, -/// If we encounter a `.debug_frame` section while loading the ELF module, it is stored here and may -/// be used with `Dwarf.Unwind` for call stack unwinding. -debug_frame: ?UnwindSection, - -/// The memory-mapped ELF file, which is referenced by `dwarf`. This field is here only so that -/// this memory can be unmapped by `ElfModule.deinit`. -mapped_file: []align(std.heap.page_size_min) const u8, -/// Sometimes, debug info is stored separately to the main ELF file. In that case, `mapped_file` -/// is the mapped ELF binary, and `mapped_debug_file` is the mapped debug info file. Both must -/// be unmapped by `ElfModule.deinit`. -mapped_debug_file: ?[]align(std.heap.page_size_min) const u8, - -pub const UnwindSection = struct { - vaddr: u64, - bytes: []const u8, - owned: bool, -}; - -pub fn deinit(em: *ElfModule, gpa: Allocator) void { - em.dwarf.deinit(gpa); - std.posix.munmap(em.mapped_file); - if (em.mapped_debug_file) |m| std.posix.munmap(m); - if (em.eh_frame) |s| if (s.owned) gpa.free(s.bytes); - if (em.debug_frame) |s| if (s.owned) gpa.free(s.bytes); -} - -pub const LoadError = error{ - InvalidDebugInfo, - MissingDebugInfo, - InvalidElfMagic, - InvalidElfVersion, - InvalidElfEndian, - /// TODO: implement this and then remove this error code - UnimplementedDwarfForeignEndian, - /// The debug info may be valid but this implementation uses memory - /// mapping which limits things to usize. If the target debug info is - /// 64-bit and host is 32-bit, there may be debug info that is not - /// supportable using this method. - Overflow, - - PermissionDenied, - LockedMemoryLimitExceeded, - MemoryMappingNotSupported, -} || Allocator.Error || std.fs.File.OpenError || Dwarf.OpenError; - -/// Reads debug info from an ELF file given its path. -/// -/// If the required sections aren't present but a reference to external debug -/// info is, then this this function will recurse to attempt to load the debug -/// sections from an external file. -pub fn load( - gpa: Allocator, - elf_file_path: Path, - build_id: ?[]const u8, - expected_crc: ?u32, - parent_sections: ?*Dwarf.SectionArray, - parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, -) LoadError!ElfModule { - const mapped_mem: []align(std.heap.page_size_min) const u8 = mapped: { - const elf_file = try elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}); - defer elf_file.close(); - - const file_len = std.math.cast( - usize, - elf_file.getEndPos() catch return Dwarf.bad(), - ) orelse return error.Overflow; - - break :mapped std.posix.mmap( - null, - file_len, - std.posix.PROT.READ, - .{ .TYPE = .SHARED }, - elf_file.handle, - 0, - ) catch |err| switch (err) { - error.MappingAlreadyExists => unreachable, - else => |e| return e, - }; - }; - errdefer std.posix.munmap(mapped_mem); - - if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; - - const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); - if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; - if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; - - const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { - elf.ELFDATA2LSB => .little, - elf.ELFDATA2MSB => .big, - else => return error.InvalidElfEndian, - }; - if (endian != native_endian) return error.UnimplementedDwarfForeignEndian; - - const shoff = hdr.e_shoff; - const str_section_off = std.math.cast( - usize, - shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx), - ) orelse return error.Overflow; - const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(mapped_mem[str_section_off..])); - const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; - const shdrs = @as( - [*]const elf.Shdr, - @ptrCast(@alignCast(&mapped_mem[shoff])), - )[0..hdr.e_shnum]; - - var sections: Dwarf.SectionArray = @splat(null); - // Combine section list. This takes ownership over any owned sections from the parent scope. - if (parent_sections) |ps| { - for (ps, §ions) |*parent, *section_elem| { - if (parent.*) |*p| { - section_elem.* = p.*; - p.owned = false; - } - } - } - errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); - - var eh_frame_section: ?UnwindSection = null; - errdefer if (eh_frame_section) |s| if (s.owned) gpa.free(s.bytes); - - var debug_frame_section: ?UnwindSection = null; - errdefer if (debug_frame_section) |s| if (s.owned) gpa.free(s.bytes); - - var separate_debug_filename: ?[]const u8 = null; - var separate_debug_crc: ?u32 = null; - - for (shdrs) |*shdr| { - if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; - const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); - - if (mem.eql(u8, name, ".gnu_debuglink")) { - if (mapped_mem.len < shdr.sh_offset + shdr.sh_size) return error.InvalidDebugInfo; - const gnu_debuglink = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; - const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); - const crc_offset = mem.alignForward(usize, debug_filename.len + 1, 4); - const crc_bytes = gnu_debuglink[crc_offset..][0..4]; - separate_debug_crc = mem.readInt(u32, crc_bytes, endian); - separate_debug_filename = debug_filename; - continue; - } - - const section_id: union(enum) { - dwarf: Dwarf.Section.Id, - eh_frame, - debug_frame, - } = s: { - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |s| { - if (mem.eql(u8, "." ++ s.name, name)) { - break :s .{ .dwarf = @enumFromInt(s.value) }; - } - } - if (mem.eql(u8, ".eh_frame", name)) break :s .eh_frame; - if (mem.eql(u8, ".debug_frame", name)) break :s .debug_frame; - continue; - }; - - switch (section_id) { - .dwarf => |i| if (sections[@intFromEnum(i)] != null) continue, - .eh_frame => if (eh_frame_section != null) continue, - .debug_frame => if (debug_frame_section != null) continue, - } - - if (mapped_mem.len < shdr.sh_offset + shdr.sh_size) return error.InvalidDebugInfo; - const raw_section_bytes = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; - - const section_bytes: []const u8, const section_owned: bool = section: { - if ((shdr.sh_flags & elf.SHF_COMPRESSED) == 0) { - break :section .{ raw_section_bytes, false }; - } - var section_reader: Reader = .fixed(raw_section_bytes); - const chdr = section_reader.takeStruct(elf.Chdr, endian) catch continue; - if (chdr.ch_type != .ZLIB) continue; - - var decompress: std.compress.flate.Decompress = .init(§ion_reader, .zlib, &.{}); - var decompressed_section: ArrayList(u8) = .empty; - defer decompressed_section.deinit(gpa); - decompress.reader.appendRemainingUnlimited(gpa, &decompressed_section) catch { - Dwarf.invalidDebugInfoDetected(); - continue; - }; - if (chdr.ch_size != decompressed_section.items.len) { - Dwarf.invalidDebugInfoDetected(); - continue; - } - break :section .{ try decompressed_section.toOwnedSlice(gpa), true }; - }; - switch (section_id) { - .dwarf => |id| sections[@intFromEnum(id)] = .{ - .data = section_bytes, - .owned = section_owned, - }, - .eh_frame => eh_frame_section = .{ - .vaddr = shdr.sh_addr, - .bytes = section_bytes, - .owned = section_owned, - }, - .debug_frame => debug_frame_section = .{ - .vaddr = shdr.sh_addr, - .bytes = section_bytes, - .owned = section_owned, - }, - } - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - - // Attempt to load debug info from an external file - // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html - if (missing_debug_info) { - // Only allow one level of debug info nesting - if (parent_mapped_mem) |_| { - return error.MissingDebugInfo; - } - - // $XDG_CACHE_HOME/debuginfod_client/<buildid>/debuginfo - // This only opportunisticly tries to load from the debuginfod cache, but doesn't try to populate it. - // One can manually run `debuginfod-find debuginfo PATH` to download the symbols - debuginfod: { - const id = build_id orelse break :debuginfod; - switch (builtin.os.tag) { - .wasi, .windows => break :debuginfod, - else => {}, - } - const id_dir_path: []u8 = p: { - if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |path| { - break :p try std.fmt.allocPrint(gpa, "{s}/{x}", .{ path, id }); - } - if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| { - if (cache_path.len > 0) { - break :p try std.fmt.allocPrint(gpa, "{s}/debuginfod_client/{x}", .{ cache_path, id }); - } - } - if (std.posix.getenv("HOME")) |home_path| { - break :p try std.fmt.allocPrint(gpa, "{s}/.cache/debuginfod_client/{x}", .{ home_path, id }); - } - break :debuginfod; - }; - defer gpa.free(id_dir_path); - if (!std.fs.path.isAbsolute(id_dir_path)) break :debuginfod; - - var id_dir = std.fs.openDirAbsolute(id_dir_path, .{}) catch break :debuginfod; - defer id_dir.close(); - - return load(gpa, .{ - .root_dir = .{ .path = id_dir_path, .handle = id_dir }, - .sub_path = "debuginfo", - }, null, separate_debug_crc, §ions, mapped_mem) catch break :debuginfod; - } - - const global_debug_directories = [_][]const u8{ - "/usr/lib/debug", - }; - - // <global debug directory>/.build-id/<2-character id prefix>/<id remainder>.debug - if (build_id) |id| blk: { - if (id.len < 3) break :blk; - - // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice - const extension = ".debug"; - var id_prefix_buf: [2]u8 = undefined; - var filename_buf: [38 + extension.len]u8 = undefined; - - _ = std.fmt.bufPrint(&id_prefix_buf, "{x}", .{id[0..1]}) catch unreachable; - const filename = std.fmt.bufPrint(&filename_buf, "{x}" ++ extension, .{id[1..]}) catch break :blk; - - for (global_debug_directories) |global_directory| { - const path: Path = .{ - .root_dir = .cwd(), - .sub_path = try std.fs.path.join(gpa, &.{ - global_directory, ".build-id", &id_prefix_buf, filename, - }), - }; - defer gpa.free(path.sub_path); - - return load(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; - } - } - - // use the path from .gnu_debuglink, in the same search order as gdb - separate: { - const separate_filename = separate_debug_filename orelse break :separate; - if (mem.eql(u8, std.fs.path.basename(elf_file_path.sub_path), separate_filename)) - return error.MissingDebugInfo; - - exe_dir: { - const exe_dir_path = try std.fs.path.resolve(gpa, &.{ - elf_file_path.root_dir.path orelse ".", - std.fs.path.dirname(elf_file_path.sub_path) orelse ".", - }); - defer gpa.free(exe_dir_path); - var exe_dir = std.fs.openDirAbsolute(exe_dir_path, .{}) catch break :exe_dir; - defer exe_dir.close(); - - // <exe_dir>/<gnu_debuglink> - if (load( - gpa, - .{ - .root_dir = .{ .path = exe_dir_path, .handle = exe_dir }, - .sub_path = separate_filename, - }, - null, - separate_debug_crc, - §ions, - mapped_mem, - )) |em| { - return em; - } else |_| {} - - // <exe_dir>/.debug/<gnu_debuglink> - const path: Path = .{ - .root_dir = .{ .path = exe_dir_path, .handle = exe_dir }, - .sub_path = try std.fs.path.join(gpa, &.{ ".debug", separate_filename }), - }; - defer gpa.free(path.sub_path); - - if (load(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { - return em; - } else |_| {} - } - - var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; - const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :separate; - - // <global debug directory>/<absolute folder of current binary>/<gnu_debuglink> - for (global_debug_directories) |global_directory| { - const path: Path = .{ - .root_dir = .cwd(), - .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), - }; - defer gpa.free(path.sub_path); - if (load(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |em| { - return em; - } else |_| {} - } - } - - return error.MissingDebugInfo; - } - - var dwarf: Dwarf = .{ .sections = sections }; - try dwarf.open(gpa, endian); - return .{ - .dwarf = dwarf, - .eh_frame = eh_frame_section, - .debug_frame = debug_frame_section, - .mapped_file = parent_mapped_mem orelse mapped_mem, - .mapped_debug_file = if (parent_mapped_mem != null) mapped_mem else null, - }; -} - -const std = @import("../../std.zig"); -const Allocator = std.mem.Allocator; -const ArrayList = std.ArrayList; -const Dwarf = std.debug.Dwarf; -const Path = std.Build.Cache.Path; -const Reader = std.Io.Reader; -const mem = std.mem; -const elf = std.elf; - -const builtin = @import("builtin"); -const native_endian = builtin.cpu.arch.endian(); - -const ElfModule = @This(); diff --git a/lib/std/debug/ElfFile.zig b/lib/std/debug/ElfFile.zig new file mode 100644 index 0000000000..b8f1bdf615 --- /dev/null +++ b/lib/std/debug/ElfFile.zig @@ -0,0 +1,536 @@ +//! A helper type for loading an ELF file and collecting its DWARF debug information, unwind +//! information, and symbol table. + +is_64: bool, +endian: Endian, + +/// This is `null` iff any of the required DWARF sections were missing. `ElfFile.load` does *not* +/// call `Dwarf.open`, `Dwarf.scanAllFunctions`, etc; that is the caller's responsibility. +dwarf: ?Dwarf, + +/// If non-`null`, describes the `.eh_frame` section, which can be used with `Dwarf.Unwind`. +eh_frame: ?UnwindSection, +/// If non-`null`, describes the `.debug_frame` section, which can be used with `Dwarf.Unwind`. +debug_frame: ?UnwindSection, + +/// If non-`null`, this is the contents of the `.strtab` section. +strtab: ?[]const u8, +/// If non-`null`, describes the `.symtab` section. +symtab: ?SymtabSection, + +/// Binary search table lazily populated by `searchSymtab`. +symbol_search_table: ?[]u64, + +/// The memory-mapped ELF file, which is referenced by `dwarf`. This field is here only so that +/// this memory can be unmapped by `ElfFile.deinit`. +mapped_file: []align(std.heap.page_size_min) const u8, +/// Sometimes, debug info is stored separately to the main ELF file. In that case, `mapped_file` +/// is the mapped ELF binary, and `mapped_debug_file` is the mapped debug info file. Both must +/// be unmapped by `ElfFile.deinit`. +mapped_debug_file: ?[]align(std.heap.page_size_min) const u8, + +arena: std.heap.ArenaAllocator.State, + +pub const UnwindSection = struct { + vaddr: u64, + bytes: []const u8, +}; +pub const SymtabSection = struct { + entry_size: u64, + bytes: []const u8, +}; + +pub const DebugInfoSearchPaths = struct { + /// The location of a debuginfod client directory, which acts as a search path for build IDs. If + /// given, we can load from this directory opportunistically, but make no effort to populate it. + /// To avoid allocation when building the search paths, this is given as two components which + /// will be concatenated. + debuginfod_client: ?[2][]const u8, + /// All "global debug directories" on the system. These are used as search paths for both debug + /// links and build IDs. On typical systems this is just "/usr/lib/debug". + global_debug: []const []const u8, + /// The path to the dirname of the ELF file, which acts as a search path for debug links. + exe_dir: ?[]const u8, + + pub const none: DebugInfoSearchPaths = .{ + .debuginfod_client = null, + .global_debug = &.{}, + .exe_dir = null, + }; + + pub fn native(exe_path: []const u8) DebugInfoSearchPaths { + return .{ + .debuginfod_client = p: { + if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |p| { + break :p .{ p, "" }; + } + if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| { + break :p .{ cache_path, "/debuginfod_client" }; + } + if (std.posix.getenv("HOME")) |home_path| { + break :p .{ home_path, "/.cache/debuginfod_client" }; + } + break :p null; + }, + .global_debug = &.{ + "/usr/lib/debug", + }, + .exe_dir = std.fs.path.dirname(exe_path) orelse ".", + }; + } +}; + +pub fn deinit(ef: *ElfFile, gpa: Allocator) void { + if (ef.dwarf) |*dwarf| dwarf.deinit(gpa); + if (ef.symbol_search_table) |t| gpa.free(t); + var arena = ef.arena.promote(gpa); + arena.deinit(); + + std.posix.munmap(ef.mapped_file); + if (ef.mapped_debug_file) |m| std.posix.munmap(m); + + ef.* = undefined; +} + +pub const LoadError = error{ + OutOfMemory, + Overflow, + TruncatedElfFile, + InvalidCompressedSection, + InvalidElfMagic, + InvalidElfVersion, + InvalidElfClass, + InvalidElfEndian, + // The remaining errors all occur when attemping to stat or mmap a file. + SystemResources, + MemoryMappingNotSupported, + AccessDenied, + LockedMemoryLimitExceeded, + ProcessFdQuotaExceeded, + SystemFdQuotaExceeded, + Unexpected, +}; + +pub fn load( + gpa: Allocator, + elf_file: std.fs.File, + opt_build_id: ?[]const u8, + di_search_paths: *const DebugInfoSearchPaths, +) LoadError!ElfFile { + var arena_instance: std.heap.ArenaAllocator = .init(gpa); + errdefer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + var result = loadInner(arena, elf_file, null) catch |err| switch (err) { + error.CrcMismatch => unreachable, // we passed crc as null + else => |e| return e, + }; + errdefer std.posix.munmap(result.mapped_mem); + + // `loadInner` did most of the work, but we might need to load an external debug info file + + const di_mapped_mem: ?[]align(std.heap.page_size_min) const u8 = load_di: { + if (result.sections.get(.debug_info) != null and + result.sections.get(.debug_abbrev) != null and + result.sections.get(.debug_str) != null and + result.sections.get(.debug_line) != null) + { + // The info is already loaded from this file alone! + break :load_di null; + } + + // We're missing some debug info---let's try and load it from a separate file. + + build_id: { + const build_id = opt_build_id orelse break :build_id; + if (build_id.len < 3) break :build_id; + + for (di_search_paths.global_debug) |global_debug| { + if (try loadSeparateDebugFile(arena, &result, null, "{s}/.build-id/{x}/{x}.debug", .{ + global_debug, + build_id[0..1], + build_id[1..], + })) |mapped| break :load_di mapped; + } + + if (di_search_paths.debuginfod_client) |components| { + if (try loadSeparateDebugFile(arena, &result, null, "{s}{s}/{x}/debuginfo", .{ + components[0], + components[1], + build_id, + })) |mapped| break :load_di mapped; + } + } + + debug_link: { + const section = result.sections.get(.gnu_debuglink) orelse break :debug_link; + const debug_filename = std.mem.sliceTo(section.bytes, 0); + const crc_offset = std.mem.alignForward(usize, debug_filename.len + 1, 4); + if (section.bytes.len < crc_offset + 4) break :debug_link; + const debug_crc = std.mem.readInt(u32, section.bytes[crc_offset..][0..4], result.endian); + + const exe_dir = di_search_paths.exe_dir orelse break :debug_link; + + if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/{s}", .{ + exe_dir, + debug_filename, + })) |mapped| break :load_di mapped; + if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/.debug/{s}", .{ + exe_dir, + debug_filename, + })) |mapped| break :load_di mapped; + for (di_search_paths.global_debug) |global_debug| { + // This looks like a bug; it isn't. They really do embed the absolute path to the + // exe's dirname, *under* the global debug path. + if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/{s}/{s}", .{ + global_debug, + exe_dir, + debug_filename, + })) |mapped| break :load_di mapped; + } + } + + break :load_di null; + }; + errdefer comptime unreachable; + + return .{ + .is_64 = result.is_64, + .endian = result.endian, + .dwarf = dwarf: { + if (result.sections.get(.debug_info) == null or + result.sections.get(.debug_abbrev) == null or + result.sections.get(.debug_str) == null or + result.sections.get(.debug_line) == null) + { + break :dwarf null; // debug info not present + } + var sections: Dwarf.SectionArray = @splat(null); + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |f| { + if (result.sections.get(@field(Section.Id, f.name))) |s| { + sections[f.value] = .{ .data = s.bytes, .owned = false }; + } + } + break :dwarf .{ .sections = sections }; + }, + .eh_frame = if (result.sections.get(.eh_frame)) |s| .{ + .vaddr = s.header.sh_addr, + .bytes = s.bytes, + } else null, + .debug_frame = if (result.sections.get(.debug_frame)) |s| .{ + .vaddr = s.header.sh_addr, + .bytes = s.bytes, + } else null, + .strtab = if (result.sections.get(.strtab)) |s| s.bytes else null, + .symtab = if (result.sections.get(.symtab)) |s| .{ + .entry_size = s.header.sh_entsize, + .bytes = s.bytes, + } else null, + .symbol_search_table = null, + .mapped_file = result.mapped_mem, + .mapped_debug_file = di_mapped_mem, + .arena = arena_instance.state, + }; +} + +pub fn searchSymtab(ef: *ElfFile, gpa: Allocator, vaddr: u64) error{ + NoSymtab, + NoStrtab, + BadSymtab, + OutOfMemory, +}!std.debug.Symbol { + const symtab = ef.symtab orelse return error.NoSymtab; + const strtab = ef.strtab orelse return error.NoStrtab; + + if (symtab.bytes.len % symtab.entry_size != 0) return error.BadSymtab; + + const swap_endian = ef.endian != @import("builtin").cpu.arch.endian(); + + switch (ef.is_64) { + inline true, false => |is_64| { + const Sym = if (is_64) elf.Elf64_Sym else elf.Elf32_Sym; + if (symtab.entry_size != @sizeOf(Sym)) return error.BadSymtab; + const symbols: []align(1) const Sym = @ptrCast(symtab.bytes); + if (ef.symbol_search_table == null) { + ef.symbol_search_table = try buildSymbolSearchTable(gpa, ef.endian, Sym, symbols); + } + const search_table = ef.symbol_search_table.?; + const SearchContext = struct { + swap_endian: bool, + target: u64, + symbols: []align(1) const Sym, + fn predicate(ctx: @This(), sym_index: u64) bool { + // We need to return `true` for the first N items, then `false` for the rest -- + // the index we'll get out is the first `false` one. So, we'll return `true` iff + // the target address is after the *end* of this symbol. This synchronizes with + // the logic in `buildSymbolSearchTable` which sorts by *end* address. + var sym = ctx.symbols[sym_index]; + if (ctx.swap_endian) std.mem.byteSwapAllFields(Sym, &sym); + const sym_end = sym.st_value + sym.st_size; + return ctx.target >= sym_end; + } + }; + const sym_index_index = std.sort.partitionPoint(u64, search_table, @as(SearchContext, .{ + .swap_endian = swap_endian, + .target = vaddr, + .symbols = symbols, + }), SearchContext.predicate); + if (sym_index_index == search_table.len) return .unknown; + var sym = symbols[search_table[sym_index_index]]; + if (swap_endian) std.mem.byteSwapAllFields(Sym, &sym); + if (vaddr < sym.st_value or vaddr >= sym.st_value + sym.st_size) return .unknown; + return .{ + .name = std.mem.sliceTo(strtab[sym.st_name..], 0), + .compile_unit_name = null, + .source_location = null, + }; + }, + } +} + +fn buildSymbolSearchTable(gpa: Allocator, endian: Endian, comptime Sym: type, symbols: []align(1) const Sym) error{ + OutOfMemory, + BadSymtab, +}![]u64 { + var result: std.ArrayList(u64) = .empty; + defer result.deinit(gpa); + + const swap_endian = endian != @import("builtin").cpu.arch.endian(); + + for (symbols, 0..) |sym_orig, sym_index| { + var sym = sym_orig; + if (swap_endian) std.mem.byteSwapAllFields(Sym, &sym); + if (sym.st_name == 0) continue; + if (sym.st_shndx == elf.SHN_UNDEF) continue; + try result.append(gpa, sym_index); + } + + const SortContext = struct { + swap_endian: bool, + symbols: []align(1) const Sym, + fn lessThan(ctx: @This(), lhs_sym_index: u64, rhs_sym_index: u64) bool { + // We sort by *end* address, not start address. This matches up with logic in `searchSymtab`. + var lhs_sym = ctx.symbols[lhs_sym_index]; + var rhs_sym = ctx.symbols[rhs_sym_index]; + if (ctx.swap_endian) { + std.mem.byteSwapAllFields(Sym, &lhs_sym); + std.mem.byteSwapAllFields(Sym, &rhs_sym); + } + const lhs_val = lhs_sym.st_value + lhs_sym.st_size; + const rhs_val = rhs_sym.st_value + rhs_sym.st_size; + return lhs_val < rhs_val; + } + }; + std.mem.sort(u64, result.items, @as(SortContext, .{ + .swap_endian = swap_endian, + .symbols = symbols, + }), SortContext.lessThan); + + return result.toOwnedSlice(gpa); +} + +/// Only used locally, during `load`. +const Section = struct { + header: elf.Elf64_Shdr, + bytes: []const u8, + const Id = enum { + // DWARF sections: see `Dwarf.Section.Id`. + debug_info, + debug_abbrev, + debug_str, + debug_str_offsets, + debug_line, + debug_line_str, + debug_ranges, + debug_loclists, + debug_rnglists, + debug_addr, + debug_names, + // Then anything else we're interested in. + gnu_debuglink, + eh_frame, + debug_frame, + symtab, + strtab, + }; + const Array = std.enums.EnumArray(Section.Id, ?Section); +}; + +fn loadSeparateDebugFile(arena: Allocator, main_loaded: *LoadInnerResult, opt_crc: ?u32, comptime fmt: []const u8, args: anytype) Allocator.Error!?[]align(std.heap.page_size_min) const u8 { + const path = try std.fmt.allocPrint(arena, fmt, args); + const elf_file = std.fs.cwd().openFile(path, .{}) catch return null; + defer elf_file.close(); + + const result = loadInner(arena, elf_file, opt_crc) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + error.CrcMismatch => return null, + else => return null, + }; + errdefer comptime unreachable; + + const have_debug_sections = inline for (@as([]const []const u8, &.{ + "debug_info", + "debug_abbrev", + "debug_str", + "debug_line", + })) |name| { + const s = @field(Section.Id, name); + if (main_loaded.sections.get(s) == null and result.sections.get(s) != null) { + break false; + } + } else true; + + if (result.is_64 != main_loaded.is_64 or + result.endian != main_loaded.endian or + !have_debug_sections) + { + std.posix.munmap(result.mapped_mem); + return null; + } + + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |f| { + const id = @field(Section.Id, f.name); + if (main_loaded.sections.get(id) == null) { + main_loaded.sections.set(id, result.sections.get(id)); + } + } + + return result.mapped_mem; +} + +const LoadInnerResult = struct { + is_64: bool, + endian: Endian, + sections: Section.Array, + mapped_mem: []align(std.heap.page_size_min) const u8, +}; +fn loadInner( + arena: Allocator, + elf_file: std.fs.File, + opt_crc: ?u32, +) (LoadError || error{CrcMismatch})!LoadInnerResult { + const mapped_mem: []align(std.heap.page_size_min) const u8 = mapped: { + const file_len = std.math.cast( + usize, + elf_file.getEndPos() catch |err| switch (err) { + error.PermissionDenied => unreachable, // not asking for PROT_EXEC + else => |e| return e, + }, + ) orelse return error.Overflow; + + break :mapped std.posix.mmap( + null, + file_len, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + elf_file.handle, + 0, + ) catch |err| switch (err) { + error.MappingAlreadyExists => unreachable, // not using FIXED_NOREPLACE + error.PermissionDenied => unreachable, // not asking for PROT_EXEC + else => |e| return e, + }; + }; + + if (opt_crc) |crc| { + if (std.hash.crc.Crc32.hash(mapped_mem) != crc) { + return error.CrcMismatch; + } + } + errdefer std.posix.munmap(mapped_mem); + + var fr: std.Io.Reader = .fixed(mapped_mem); + + const header = elf.Header.read(&fr) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.TruncatedElfFile, + + error.InvalidElfMagic, + error.InvalidElfVersion, + error.InvalidElfClass, + error.InvalidElfEndian, + => |e| return e, + }; + const endian = header.endian; + + const shstrtab_shdr_off = try std.math.add( + u64, + header.shoff, + try std.math.mul(u64, header.shstrndx, header.shentsize), + ); + fr.seek = std.math.cast(usize, shstrtab_shdr_off) orelse return error.Overflow; + const shstrtab: []const u8 = if (header.is_64) shstrtab: { + const shdr = fr.takeStruct(elf.Elf64_Shdr, endian) catch return error.TruncatedElfFile; + if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile; + break :shstrtab mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + } else shstrtab: { + const shdr = fr.takeStruct(elf.Elf32_Shdr, endian) catch return error.TruncatedElfFile; + if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile; + break :shstrtab mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + }; + + var sections: Section.Array = .initFill(null); + + var it = header.iterateSectionHeadersBuffer(mapped_mem); + while (it.next() catch return error.TruncatedElfFile) |shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + if (shdr.sh_name > shstrtab.len) return error.TruncatedElfFile; + const name = std.mem.sliceTo(shstrtab[@intCast(shdr.sh_name)..], 0); + + const section_id: Section.Id = inline for (@typeInfo(Section.Id).@"enum".fields) |s| { + if (std.mem.eql(u8, "." ++ s.name, name)) { + break @enumFromInt(s.value); + } + } else continue; + + if (sections.get(section_id) != null) continue; + + if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile; + const raw_section_bytes = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + const section_bytes: []const u8 = bytes: { + if ((shdr.sh_flags & elf.SHF_COMPRESSED) == 0) break :bytes raw_section_bytes; + + var section_reader: std.Io.Reader = .fixed(raw_section_bytes); + const ch_type: elf.COMPRESS, const ch_size: u64 = if (header.is_64) ch: { + const chdr = section_reader.takeStruct(elf.Elf64_Chdr, endian) catch return error.InvalidCompressedSection; + break :ch .{ chdr.ch_type, chdr.ch_size }; + } else ch: { + const chdr = section_reader.takeStruct(elf.Elf32_Chdr, endian) catch return error.InvalidCompressedSection; + break :ch .{ chdr.ch_type, chdr.ch_size }; + }; + if (ch_type != .ZLIB) { + // The compression algorithm is unsupported, but don't make that a hard error; the + // file might still be valid, and we might still be okay without this section. + continue; + } + + const buf = try arena.alloc(u8, ch_size); + var fw: std.Io.Writer = .fixed(buf); + var decompress: std.compress.flate.Decompress = .init(§ion_reader, .zlib, &.{}); + const n = decompress.reader.streamRemaining(&fw) catch |err| switch (err) { + // If a write failed, then `buf` filled up, so `ch_size` was incorrect + error.WriteFailed => return error.InvalidCompressedSection, + // If a read failed, flate expected the section to have more data + error.ReadFailed => return error.InvalidCompressedSection, + }; + // It's also an error if the data is shorter than expected. + if (n != buf.len) return error.InvalidCompressedSection; + break :bytes buf; + }; + sections.set(section_id, .{ .header = shdr, .bytes = section_bytes }); + } + + return .{ + .is_64 = header.is_64, + .endian = endian, + .sections = sections, + .mapped_mem = mapped_mem, + }; +} + +const std = @import("std"); +const Endian = std.builtin.Endian; +const Dwarf = std.debug.Dwarf; +const ElfFile = @This(); +const Allocator = std.mem.Allocator; +const elf = std.elf; diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index bc8efc71b4..74119a3ea4 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -9,7 +9,7 @@ const std = @import("../std.zig"); const Allocator = std.mem.Allocator; const Path = std.Build.Cache.Path; -const Dwarf = std.debug.Dwarf; +const ElfFile = std.debug.ElfFile; const assert = std.debug.assert; const Coverage = std.debug.Coverage; const SourceLocation = std.debug.Coverage.SourceLocation; @@ -17,28 +17,35 @@ const SourceLocation = std.debug.Coverage.SourceLocation; const Info = @This(); /// Sorted by key, ascending. -address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), +address_map: std.AutoArrayHashMapUnmanaged(u64, ElfFile), /// Externally managed, outlives this `Info` instance. coverage: *Coverage, -pub const LoadError = Dwarf.ElfModule.LoadError; +pub const LoadError = std.fs.File.OpenError || ElfFile.LoadError || std.debug.Dwarf.ScanError || error{MissingDebugInfo}; pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { - var elf_module = try Dwarf.ElfModule.load(gpa, path, null, null, null, null); - // This is correct because `Dwarf.ElfModule` currently only supports native-endian ELF files. - const endian = @import("builtin").target.cpu.arch.endian(); - try elf_module.dwarf.populateRanges(gpa, endian); + var file = try path.root_dir.handle.openFile(path.sub_path, .{}); + defer file.close(); + + var elf_file: ElfFile = try .load(gpa, file, null, &.none); + errdefer elf_file.deinit(gpa); + + if (elf_file.dwarf == null) return error.MissingDebugInfo; + try elf_file.dwarf.?.open(gpa, elf_file.endian); + try elf_file.dwarf.?.populateRanges(gpa, elf_file.endian); + var info: Info = .{ .address_map = .{}, .coverage = coverage, }; - try info.address_map.put(gpa, 0, elf_module); + try info.address_map.put(gpa, 0, elf_file); + errdefer comptime unreachable; // elf_file is owned by the map now return info; } pub fn deinit(info: *Info, gpa: Allocator) void { - for (info.address_map.values()) |*elf_module| { - elf_module.dwarf.deinit(gpa); + for (info.address_map.values()) |*elf_file| { + elf_file.dwarf.?.deinit(gpa); } info.address_map.deinit(gpa); info.* = undefined; @@ -58,8 +65,6 @@ pub fn resolveAddresses( ) ResolveAddressesError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); - const elf_module = &info.address_map.values()[0]; - // This is correct because `Dwarf.ElfModule` currently only supports native-endian ELF files. - const endian = @import("builtin").target.cpu.arch.endian(); - return info.coverage.resolveAddressesDwarf(gpa, endian, sorted_pc_addrs, output, &elf_module.dwarf); + const elf_file = &info.address_map.values()[0]; + return info.coverage.resolveAddressesDwarf(gpa, elf_file.endian, sorted_pc_addrs, output, &elf_file.dwarf.?); } diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index ef222cc7f4..d838a1a6c1 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -78,6 +78,7 @@ pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { comptime assert(target_supported); const module: Module = try .lookup(&self.lookup_cache, gpa, address); + if (module.name.len == 0) return error.MissingDebugInfo; return module.name; } diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 7a280c0d6e..7871f1012f 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -7,10 +7,12 @@ gnu_eh_frame: ?[]const u8, pub const LookupCache = void; pub const DebugInfo = struct { - loaded_elf: ?Dwarf.ElfModule, + loaded_elf: ?ElfFile, + scanned_dwarf: bool, unwind: [2]?Dwarf.Unwind, pub const init: DebugInfo = .{ .loaded_elf = null, + .scanned_dwarf = false, .unwind = @splat(null), }; pub fn deinit(di: *DebugInfo, gpa: Allocator) void { @@ -92,55 +94,92 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!ElfModu }; return error.MissingDebugInfo; } -fn loadDwarf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { +fn loadElf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { + std.debug.assert(di.loaded_elf == null); + std.debug.assert(!di.scanned_dwarf); + const load_result = if (module.name.len > 0) res: { - break :res Dwarf.ElfModule.load(gpa, .{ - .root_dir = .cwd(), - .sub_path = module.name, - }, module.build_id, null, null, null); + var file = std.fs.cwd().openFile(module.name, .{}) catch return error.MissingDebugInfo; + defer file.close(); + break :res ElfFile.load(gpa, file, module.build_id, &.native(module.name)); } else res: { const path = std.fs.selfExePathAlloc(gpa) catch |err| switch (err) { error.OutOfMemory => |e| return e, else => return error.ReadFailed, }; defer gpa.free(path); - break :res Dwarf.ElfModule.load(gpa, .{ - .root_dir = .cwd(), - .sub_path = path, - }, module.build_id, null, null, null); + var file = std.fs.cwd().openFile(path, .{}) catch return error.MissingDebugInfo; + defer file.close(); + break :res ElfFile.load(gpa, file, module.build_id, &.native(path)); }; di.loaded_elf = load_result catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - error.OutOfMemory, - error.InvalidDebugInfo, - error.MissingDebugInfo, error.Unexpected, => |e| return e, - error.InvalidElfEndian, + error.Overflow, + error.TruncatedElfFile, + error.InvalidCompressedSection, error.InvalidElfMagic, error.InvalidElfVersion, - error.InvalidUtf8, - error.InvalidWtf8, - error.EndOfStream, - error.Overflow, - error.UnimplementedDwarfForeignEndian, // this should be impossible as we're looking at the debug info for this process + error.InvalidElfClass, + error.InvalidElfEndian, => return error.InvalidDebugInfo, - else => return error.ReadFailed, + error.SystemResources, + error.MemoryMappingNotSupported, + error.AccessDenied, + error.LockedMemoryLimitExceeded, + error.ProcessFdQuotaExceeded, + error.SystemFdQuotaExceeded, + => return error.ReadFailed, }; + + const matches_native = + di.loaded_elf.?.endian == native_endian and + di.loaded_elf.?.is_64 == (@sizeOf(usize) == 8); + + if (!matches_native) { + di.loaded_elf.?.deinit(gpa); + di.loaded_elf = null; + return error.InvalidDebugInfo; + } } pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { - if (di.loaded_elf == null) try module.loadDwarf(gpa, di); + if (di.loaded_elf == null) try module.loadElf(gpa, di); const vaddr = address - module.load_offset; - return di.loaded_elf.?.dwarf.getSymbol(gpa, native_endian, vaddr) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, - error.ReadFailed, - error.EndOfStream, - error.Overflow, - error.StreamTooLong, - => return error.InvalidDebugInfo, + if (di.loaded_elf.?.dwarf) |*dwarf| { + if (!di.scanned_dwarf) { + dwarf.open(gpa, native_endian) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.ReadFailed, + error.StreamTooLong, + => return error.InvalidDebugInfo, + }; + di.scanned_dwarf = true; + } + return dwarf.getSymbol(gpa, native_endian, vaddr) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.ReadFailed, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + => return error.InvalidDebugInfo, + }; + } + // When there's no DWARF available, fall back to searching the symtab. + return di.loaded_elf.?.searchSymtab(gpa, vaddr) catch |err| switch (err) { + error.NoSymtab, error.NoStrtab => return error.MissingDebugInfo, + error.BadSymtab => return error.InvalidDebugInfo, + error.OutOfMemory => |e| return e, }; } fn prepareUnwindLookup(unwind: *Dwarf.Unwind, gpa: Allocator) Error!void { @@ -166,7 +205,7 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro } else unwinds: { // There is no `.eh_frame_hdr` section. There may still be an `.eh_frame` or `.debug_frame` // section, but we'll have to load the binary to get at it. - try module.loadDwarf(gpa, di); + try module.loadElf(gpa, di); const opt_debug_frame = &di.loaded_elf.?.debug_frame; const opt_eh_frame = &di.loaded_elf.?.eh_frame; // If both are present, we can't just pick one -- the info could be split between them. @@ -232,6 +271,7 @@ const ElfModule = @This(); const std = @import("../../std.zig"); const Allocator = std.mem.Allocator; const Dwarf = std.debug.Dwarf; +const ElfFile = std.debug.ElfFile; const elf = std.elf; const mem = std.mem; const Error = std.debug.SelfInfo.Error; |
