diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2024-08-02 17:10:41 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-08-02 17:10:41 -0700 |
| commit | a931bfada5e358ace980b2f8fbc50ce424ced526 (patch) | |
| tree | 5aabd9fb3833765926ee5409c1ce14e04d2d9fd0 /lib/std | |
| parent | 9e2668cd2ecc587390335e1c9f6e1592a7bd6eb6 (diff) | |
| parent | 6d606cc38b4df2b20af9d77367f8ab22bbbea092 (diff) | |
| download | zig-a931bfada5e358ace980b2f8fbc50ce424ced526.tar.gz zig-a931bfada5e358ace980b2f8fbc50ce424ced526.zip | |
Merge pull request #20908 from ziglang/reorg-std.debug-again
std.debug: reorg and clarify API goals
Diffstat (limited to 'lib/std')
| -rw-r--r-- | lib/std/debug.zig | 1669 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf.zig | 922 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/abi.zig | 167 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/call_frame.zig | 388 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/expression.zig | 26 | ||||
| -rw-r--r-- | lib/std/debug/MemoryAccessor.zig | 128 | ||||
| -rw-r--r-- | lib/std/debug/Pdb.zig | 591 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo.zig | 2438 | ||||
| -rw-r--r-- | lib/std/pdb.zig | 607 |
9 files changed, 3510 insertions, 3426 deletions
diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 22a7e551ec..4d3437f665 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -1,16 +1,11 @@ -const std = @import("std.zig"); const builtin = @import("builtin"); +const std = @import("std.zig"); const math = std.math; const mem = std.mem; const io = std.io; const posix = std.posix; const fs = std.fs; const testing = std.testing; -const elf = std.elf; -const DW = std.dwarf; -const macho = std.macho; -const coff = std.coff; -const pdb = std.pdb; const root = @import("root"); const File = std.fs.File; const windows = std.os.windows; @@ -18,8 +13,24 @@ const native_arch = builtin.cpu.arch; const native_os = builtin.os.tag; const native_endian = native_arch.endian(); +pub const MemoryAccessor = @import("debug/MemoryAccessor.zig"); pub const Dwarf = @import("debug/Dwarf.zig"); +pub const Pdb = @import("debug/Pdb.zig"); +pub const SelfInfo = @import("debug/SelfInfo.zig"); + +/// Unresolved source locations can be represented with a single `usize` that +/// corresponds to a virtual memory address of the program counter. Combined +/// with debug information, those values can be converted into a resolved +/// source location, including file, line, and column. +pub const SourceLocation = struct { + line: u64, + column: u64, + file_name: []const u8, +}; +/// Deprecated because it returns the optimization mode of the standard +/// library, when the caller probably wants to use the optimization mode of +/// their own module. pub const runtime_safety = switch (builtin.mode) { .Debug, .ReleaseSafe => true, .ReleaseFast, .ReleaseSmall => false, @@ -46,39 +57,6 @@ pub const sys_can_stack_trace = switch (builtin.cpu.arch) { else => true, }; -pub const LineInfo = struct { - line: u64, - column: u64, - file_name: []const u8, - - pub fn deinit(self: LineInfo, allocator: mem.Allocator) void { - allocator.free(self.file_name); - } -}; - -pub const SymbolInfo = struct { - symbol_name: []const u8 = "???", - compile_unit_name: []const u8 = "???", - line_info: ?LineInfo = null, - - pub fn deinit(self: SymbolInfo, allocator: mem.Allocator) void { - if (self.line_info) |li| { - li.deinit(allocator); - } - } -}; -const PdbOrDwarf = union(enum) { - pdb: pdb.Pdb, - dwarf: Dwarf, - - fn deinit(self: *PdbOrDwarf, allocator: mem.Allocator) void { - switch (self.*) { - .pdb => |*inner| inner.deinit(), - .dwarf => |*inner| inner.deinit(allocator), - } - } -}; - /// Allows the caller to freely write to stderr until `unlockStdErr` is called. /// /// During the lock, any `std.Progress` information is cleared from the terminal. @@ -104,13 +82,13 @@ pub fn getStderrMutex() *std.Thread.Mutex { } /// TODO multithreaded awareness -var self_debug_info: ?Info = null; +var self_debug_info: ?SelfInfo = null; -pub fn getSelfDebugInfo() !*Info { +pub fn getSelfDebugInfo() !*SelfInfo { if (self_debug_info) |*info| { return info; } else { - self_debug_info = try openSelfDebugInfo(getDebugInfoAllocator()); + self_debug_info = try SelfInfo.open(getDebugInfoAllocator()); return &self_debug_info.?; } } @@ -266,7 +244,7 @@ pub inline fn getContext(context: *ThreadContext) bool { /// Tries to print the stack trace starting from the supplied base pointer to stderr, /// unbuffered, and ignores any error returned. /// TODO multithreaded awareness -pub fn dumpStackTraceFromBase(context: *const ThreadContext) void { +pub fn dumpStackTraceFromBase(context: *ThreadContext) void { nosuspend { if (comptime builtin.target.isWasm()) { if (native_os == .wasi) { @@ -348,7 +326,7 @@ pub fn captureStackTrace(first_address: ?usize, stack_trace: *std.builtin.StackT stack_trace.index = slice.len; } else { // TODO: This should use the DWARF unwinder if .eh_frame_hdr is available (so that full debug info parsing isn't required). - // A new path for loading Info needs to be created which will only attempt to parse in-memory sections, because + // A new path for loading SelfInfo needs to be created which will only attempt to parse in-memory sections, because // stopping to load other debug info (ie. source line info) from disk here is not required for unwinding. var it = StackIterator.init(first_address, null); defer it.deinit(); @@ -526,7 +504,7 @@ pub fn writeStackTrace( stack_trace: std.builtin.StackTrace, out_stream: anytype, allocator: mem.Allocator, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, ) !void { _ = allocator; @@ -563,12 +541,12 @@ pub const StackIterator = struct { fp: usize, ma: MemoryAccessor = MemoryAccessor.init, - // When Info and a register context is available, this iterator can unwind + // When SelfInfo and a register context is available, this iterator can unwind // stacks with frames that don't use a frame pointer (ie. -fomit-frame-pointer), // using DWARF and MachO unwind info. unwind_state: if (have_ucontext) ?struct { - debug_info: *Info, - dwarf_context: Dwarf.UnwindContext, + debug_info: *SelfInfo, + dwarf_context: SelfInfo.UnwindContext, last_error: ?UnwindError = null, failed: bool = false, } else void = if (have_ucontext) null else {}, @@ -592,20 +570,22 @@ pub const StackIterator = struct { }; } - pub fn initWithContext(first_address: ?usize, debug_info: *Info, context: *const posix.ucontext_t) !StackIterator { + pub fn initWithContext(first_address: ?usize, debug_info: *SelfInfo, context: *posix.ucontext_t) !StackIterator { // The implementation of DWARF unwinding on aarch64-macos is not complete. However, Apple mandates that // the frame pointer register is always used, so on this platform we can safely use the FP-based unwinder. - if (comptime builtin.target.isDarwin() and native_arch == .aarch64) { + if (builtin.target.isDarwin() and native_arch == .aarch64) return init(first_address, context.mcontext.ss.fp); - } else { + + if (SelfInfo.supports_unwinding) { var iterator = init(first_address, null); iterator.unwind_state = .{ .debug_info = debug_info, - .dwarf_context = try Dwarf.UnwindContext.init(debug_info.allocator, context), + .dwarf_context = try SelfInfo.UnwindContext.init(debug_info.allocator, context), }; - return iterator; } + + return init(first_address, null); } pub fn deinit(it: *StackIterator) void { @@ -667,116 +647,6 @@ pub const StackIterator = struct { return address; } - fn isValidMemory(address: usize) bool { - // We are unable to determine validity of memory for freestanding targets - if (native_os == .freestanding or native_os == .uefi) return true; - - const aligned_address = address & ~@as(usize, @intCast((mem.page_size - 1))); - if (aligned_address == 0) return false; - const aligned_memory = @as([*]align(mem.page_size) u8, @ptrFromInt(aligned_address))[0..mem.page_size]; - - if (native_os == .windows) { - var memory_info: windows.MEMORY_BASIC_INFORMATION = undefined; - - // The only error this function can throw is ERROR_INVALID_PARAMETER. - // supply an address that invalid i'll be thrown. - const rc = windows.VirtualQuery(aligned_memory, &memory_info, aligned_memory.len) catch { - return false; - }; - - // Result code has to be bigger than zero (number of bytes written) - if (rc == 0) { - return false; - } - - // Free pages cannot be read, they are unmapped - if (memory_info.State == windows.MEM_FREE) { - return false; - } - - return true; - } else if (have_msync) { - posix.msync(aligned_memory, posix.MSF.ASYNC) catch |err| { - switch (err) { - error.UnmappedMemory => return false, - else => unreachable, - } - }; - - return true; - } else { - // We are unable to determine validity of memory on this target. - return true; - } - } - - pub const MemoryAccessor = struct { - var cached_pid: posix.pid_t = -1; - - mem: switch (native_os) { - .linux => File, - else => void, - }, - - pub const init: MemoryAccessor = .{ - .mem = switch (native_os) { - .linux => .{ .handle = -1 }, - else => {}, - }, - }; - - fn read(ma: *MemoryAccessor, address: usize, buf: []u8) bool { - switch (native_os) { - .linux => while (true) switch (ma.mem.handle) { - -2 => break, - -1 => { - const linux = std.os.linux; - const pid = switch (@atomicLoad(posix.pid_t, &cached_pid, .monotonic)) { - -1 => pid: { - const pid = linux.getpid(); - @atomicStore(posix.pid_t, &cached_pid, pid, .monotonic); - break :pid pid; - }, - else => |pid| pid, - }; - const bytes_read = linux.process_vm_readv( - pid, - &.{.{ .base = buf.ptr, .len = buf.len }}, - &.{.{ .base = @ptrFromInt(address), .len = buf.len }}, - 0, - ); - switch (linux.E.init(bytes_read)) { - .SUCCESS => return bytes_read == buf.len, - .FAULT => return false, - .INVAL, .PERM, .SRCH => unreachable, // own pid is always valid - .NOMEM => {}, - .NOSYS => {}, // QEMU is known not to implement this syscall. - else => unreachable, // unexpected - } - var path_buf: [ - std.fmt.count("/proc/{d}/mem", .{math.minInt(posix.pid_t)}) - ]u8 = undefined; - const path = std.fmt.bufPrint(&path_buf, "/proc/{d}/mem", .{pid}) catch - unreachable; - ma.mem = std.fs.openFileAbsolute(path, .{}) catch { - ma.mem.handle = -2; - break; - }; - }, - else => return (ma.mem.pread(buf, address) catch return false) == buf.len, - }, - else => {}, - } - if (!isValidMemory(address)) return false; - @memcpy(buf, @as([*]const u8, @ptrFromInt(address))); - return true; - } - pub fn load(ma: *MemoryAccessor, comptime Type: type, address: usize) ?Type { - var result: Type = undefined; - return if (ma.read(address, std.mem.asBytes(&result))) result else null; - } - }; - fn next_unwind(it: *StackIterator) !usize { const unwind_state = &it.unwind_state.?; const module = try unwind_state.debug_info.getModuleForAddress(unwind_state.dwarf_context.pc); @@ -785,7 +655,13 @@ pub const StackIterator = struct { // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding // via DWARF before attempting to use the compact unwind info will produce incorrect results. if (module.unwind_info) |unwind_info| { - if (Dwarf.unwindFrameMachO(&unwind_state.dwarf_context, &it.ma, unwind_info, module.eh_frame, module.base_address)) |return_address| { + if (SelfInfo.unwindFrameMachO( + &unwind_state.dwarf_context, + &it.ma, + unwind_info, + module.eh_frame, + module.base_address, + )) |return_address| { return return_address; } else |err| { if (err != error.RequiresDWARFUnwind) return err; @@ -796,7 +672,7 @@ pub const StackIterator = struct { } if (try module.getDwarfInfoForAddress(unwind_state.debug_info.allocator, unwind_state.dwarf_context.pc)) |di| { - return di.unwindFrame(&unwind_state.dwarf_context, &it.ma, null); + return SelfInfo.unwindFrameDwarf(di, &unwind_state.dwarf_context, &it.ma, null); } else return error.MissingDebugInfo; } @@ -845,22 +721,19 @@ pub const StackIterator = struct { } }; -const have_msync = switch (native_os) { - .wasi, .emscripten, .windows => false, - else => true, -}; - pub fn writeCurrentStackTrace( out_stream: anytype, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, start_addr: ?usize, ) !void { - var context: ThreadContext = undefined; - const has_context = getContext(&context); if (native_os == .windows) { + var context: ThreadContext = undefined; + assert(getContext(&context)); return writeStackTraceWindows(out_stream, debug_info, tty_config, &context, start_addr); } + var context: ThreadContext = undefined; + const has_context = getContext(&context); var it = (if (has_context) blk: { break :blk StackIterator.initWithContext(start_addr, debug_info, &context) catch null; @@ -938,7 +811,7 @@ pub noinline fn walkStackWindows(addresses: []usize, existing_context: ?*const w pub fn writeStackTraceWindows( out_stream: anytype, - debug_info: *Info, + debug_info: *SelfInfo, tty_config: io.tty.Config, context: *const windows.CONTEXT, start_addr: ?usize, @@ -957,52 +830,7 @@ pub fn writeStackTraceWindows( } } -fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { - var min: usize = 0; - var max: usize = symbols.len - 1; - while (min < max) { - const mid = min + (max - min) / 2; - const curr = &symbols[mid]; - const next = &symbols[mid + 1]; - if (address >= next.address()) { - min = mid + 1; - } else if (address < curr.address()) { - max = mid; - } else { - return curr; - } - } - - const max_sym = &symbols[symbols.len - 1]; - if (address >= max_sym.address()) - return max_sym; - - return null; -} - -test machoSearchSymbols { - const symbols = [_]MachoSymbol{ - .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, - }; - - try testing.expectEqual(null, machoSearchSymbols(&symbols, 0)); - try testing.expectEqual(null, machoSearchSymbols(&symbols, 99)); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 100).?); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 150).?); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 199).?); - - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 200).?); - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 250).?); - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 299).?); - - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 300).?); - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 301).?); - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); -} - -fn printUnknownSource(debug_info: *Info, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { +fn printUnknownSource(debug_info: *SelfInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address); return printLineInfo( out_stream, @@ -1015,14 +843,14 @@ fn printUnknownSource(debug_info: *Info, out_stream: anytype, address: usize, tt ); } -fn printLastUnwindError(it: *StackIterator, debug_info: *Info, out_stream: anytype, tty_config: io.tty.Config) void { +fn printLastUnwindError(it: *StackIterator, debug_info: *SelfInfo, out_stream: anytype, tty_config: io.tty.Config) void { if (!have_ucontext) return; if (it.getLastError()) |unwind_error| { printUnwindError(debug_info, out_stream, unwind_error.address, unwind_error.err, tty_config) catch {}; } } -fn printUnwindError(debug_info: *Info, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { +fn printUnwindError(debug_info: *SelfInfo, out_stream: anytype, address: usize, err: UnwindError, tty_config: io.tty.Config) !void { const module_name = debug_info.getModuleNameForAddress(address) orelse "???"; try tty_config.setColor(out_stream, .dim); if (err == error.MissingDebugInfo) { @@ -1033,7 +861,7 @@ fn printUnwindError(debug_info: *Info, out_stream: anytype, address: usize, err: try tty_config.setColor(out_stream, .reset); } -pub fn printSourceAtAddress(debug_info: *Info, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { +pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: usize, tty_config: io.tty.Config) !void { const module = debug_info.getModuleForAddress(address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), else => return err, @@ -1058,7 +886,7 @@ pub fn printSourceAtAddress(debug_info: *Info, out_stream: anytype, address: usi fn printLineInfo( out_stream: anytype, - line_info: ?LineInfo, + line_info: ?SourceLocation, address: usize, symbol_name: []const u8, compile_unit_name: []const u8, @@ -1104,428 +932,7 @@ fn printLineInfo( } } -pub const OpenSelfDebugInfoError = error{ - MissingDebugInfo, - UnsupportedOperatingSystem, -} || @typeInfo(@typeInfo(@TypeOf(Info.init)).Fn.return_type.?).ErrorUnion.error_set; - -pub fn openSelfDebugInfo(allocator: mem.Allocator) OpenSelfDebugInfoError!Info { - nosuspend { - if (builtin.strip_debug_info) - return error.MissingDebugInfo; - if (@hasDecl(root, "os") and @hasDecl(root.os, "debug") and @hasDecl(root.os.debug, "openSelfDebugInfo")) { - return root.os.debug.openSelfDebugInfo(allocator); - } - switch (native_os) { - .linux, - .freebsd, - .netbsd, - .dragonfly, - .openbsd, - .macos, - .solaris, - .illumos, - .windows, - => return try Info.init(allocator), - else => return error.UnsupportedOperatingSystem, - } - } -} - -fn readCoffDebugInfo(allocator: mem.Allocator, coff_obj: *coff.Coff) !ModuleDebugInfo { - nosuspend { - var di = ModuleDebugInfo{ - .base_address = undefined, - .coff_image_base = coff_obj.getImageBase(), - .coff_section_headers = undefined, - }; - - if (coff_obj.getSectionByName(".debug_info")) |_| { - // This coff file has embedded DWARF debug info - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - - inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { - sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { - break :blk .{ - .data = try coff_obj.getSectionDataAlloc(section_header, allocator), - .virtual_address = section_header.virtual_address, - .owned = true, - }; - } else null; - } - - var dwarf = Dwarf{ - .endian = native_endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&dwarf, allocator); - di.dwarf = dwarf; - } - - const raw_path = try coff_obj.getPdbPath() orelse return di; - const path = blk: { - if (fs.path.isAbsolute(raw_path)) { - break :blk raw_path; - } else { - const self_dir = try fs.selfExeDirPathAlloc(allocator); - defer allocator.free(self_dir); - break :blk try fs.path.join(allocator, &.{ self_dir, raw_path }); - } - }; - defer if (path.ptr != raw_path.ptr) allocator.free(path); - - di.pdb = pdb.Pdb.init(allocator, path) catch |err| switch (err) { - error.FileNotFound, error.IsDir => { - if (di.dwarf == null) return error.MissingDebugInfo; - return di; - }, - else => return err, - }; - try di.pdb.?.parseInfoStream(); - try di.pdb.?.parseDbiStream(); - - if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) - return error.InvalidDebugInfo; - - // Only used by the pdb path - di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(allocator); - errdefer allocator.free(di.coff_section_headers); - - return di; - } -} - -fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { - const start = math.cast(usize, offset) orelse return error.Overflow; - const end = start + (math.cast(usize, size) orelse return error.Overflow); - return ptr[start..end]; -} - -/// Reads debug info from an ELF file, or the current binary if none in specified. -/// If the required sections aren't present but a reference to external debug info is, -/// then this this function will recurse to attempt to load the debug sections from -/// an external file. -pub fn readElfDebugInfo( - allocator: mem.Allocator, - elf_filename: ?[]const u8, - build_id: ?[]const u8, - expected_crc: ?u32, - parent_sections: *Dwarf.SectionArray, - parent_mapped_mem: ?[]align(mem.page_size) const u8, -) !ModuleDebugInfo { - nosuspend { - const elf_file = (if (elf_filename) |filename| blk: { - break :blk fs.cwd().openFile(filename, .{}); - } else fs.openSelfExe(.{})) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - - const mapped_mem = try mapWholeFile(elf_file); - if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; - - const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); - if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; - if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; - - const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { - elf.ELFDATA2LSB => .little, - elf.ELFDATA2MSB => .big, - else => return error.InvalidElfEndian, - }; - assert(endian == native_endian); // this is our own debug info - - const shoff = hdr.e_shoff; - const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); - const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow])); - const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; - const shdrs = @as( - [*]const elf.Shdr, - @ptrCast(@alignCast(&mapped_mem[shoff])), - )[0..hdr.e_shnum]; - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - - // Combine section list. This takes ownership over any owned sections from the parent scope. - for (parent_sections, §ions) |*parent, *section| { - if (parent.*) |*p| { - section.* = p.*; - p.owned = false; - } - } - errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - - var separate_debug_filename: ?[]const u8 = null; - var separate_debug_crc: ?u32 = null; - - for (shdrs) |*shdr| { - if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; - const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); - - if (mem.eql(u8, name, ".gnu_debuglink")) { - const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); - const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); - const crc_bytes = gnu_debuglink[crc_offset..][0..4]; - separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); - separate_debug_filename = debug_filename; - continue; - } - - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "." ++ section.name, name)) section_index = i; - } - if (section_index == null) continue; - if (sections[section_index.?] != null) continue; - - const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { - var section_stream = io.fixedBufferStream(section_bytes); - var section_reader = section_stream.reader(); - const chdr = section_reader.readStruct(elf.Chdr) catch continue; - if (chdr.ch_type != .ZLIB) continue; - - var zlib_stream = std.compress.zlib.decompressor(section_stream.reader()); - - const decompressed_section = try allocator.alloc(u8, chdr.ch_size); - errdefer allocator.free(decompressed_section); - - const read = zlib_stream.reader().readAll(decompressed_section) catch continue; - assert(read == decompressed_section.len); - - break :blk .{ - .data = decompressed_section, - .virtual_address = shdr.sh_addr, - .owned = true, - }; - } else .{ - .data = section_bytes, - .virtual_address = shdr.sh_addr, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - - // Attempt to load debug info from an external file - // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html - if (missing_debug_info) { - - // Only allow one level of debug info nesting - if (parent_mapped_mem) |_| { - return error.MissingDebugInfo; - } - - const global_debug_directories = [_][]const u8{ - "/usr/lib/debug", - }; - - // <global debug directory>/.build-id/<2-character id prefix>/<id remainder>.debug - if (build_id) |id| blk: { - if (id.len < 3) break :blk; - - // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice - const extension = ".debug"; - var id_prefix_buf: [2]u8 = undefined; - var filename_buf: [38 + extension.len]u8 = undefined; - - _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; - const filename = std.fmt.bufPrint( - &filename_buf, - "{s}" ++ extension, - .{std.fmt.fmtSliceHexLower(id[1..])}, - ) catch break :blk; - - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); - defer allocator.free(path); - - return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; - } - } - - // use the path from .gnu_debuglink, in the same search order as gdb - if (separate_debug_filename) |separate_filename| blk: { - if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; - - // <cwd>/<gnu_debuglink> - if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - - // <cwd>/.debug/<gnu_debuglink> - { - const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); - defer allocator.free(path); - - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - - var cwd_buf: [fs.max_path_bytes]u8 = undefined; - const cwd_path = posix.realpath(".", &cwd_buf) catch break :blk; - - // <global debug directory>/<absolute folder of current binary>/<gnu_debuglink> - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); - defer allocator.free(path); - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - } - - return error.MissingDebugInfo; - } - - var di = Dwarf{ - .endian = endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&di, allocator); - - return ModuleDebugInfo{ - .base_address = undefined, - .dwarf = di, - .mapped_memory = parent_mapped_mem orelse mapped_mem, - .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, - }; - } -} - -/// This takes ownership of macho_file: users of this function should not close -/// it themselves, even on error. -/// TODO it's weird to take ownership even on error, rework this code. -fn readMachODebugInfo(allocator: mem.Allocator, macho_file: File) !ModuleDebugInfo { - const mapped_mem = try mapWholeFile(macho_file); - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - var it = macho.LoadCommandIterator{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break cmd.cast(macho.symtab_command).?, - else => {}, - } else return error.MissingDebugInfo; - - const syms = @as( - [*]const macho.nlist_64, - @ptrCast(@alignCast(&mapped_mem[symtab.symoff])), - )[0..symtab.nsyms]; - const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; - - const symbols_buf = try allocator.alloc(MachoSymbol, syms.len); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var symbol_index: usize = 0; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (!sym.stab()) continue; - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type) { - macho.N_OSO => { - switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_BNSYM => { - switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .size = 0, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_FUN => { - switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - last_sym.size = @as(u32, @intCast(sym.n_value)); - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_ENSYM => { - switch (state) { - .fun_size => { - state = .ensym; - symbols_buf[symbol_index] = last_sym; - symbol_index += 1; - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_SO => { - switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - } - }, - else => {}, - } - } - - switch (state) { - .init => return error.MissingDebugInfo, - .oso_close => {}, - else => return error.InvalidDebugInfo, - } - - const symbols = try allocator.realloc(symbols_buf, symbol_index); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); - - return ModuleDebugInfo{ - .base_address = undefined, - .vmaddr_slide = undefined, - .mapped_memory = mapped_mem, - .ofiles = ModuleDebugInfo.OFileTable.init(allocator), - .symbols = symbols, - .strings = strings, - }; -} - -fn printLineFromFileAnyOs(out_stream: anytype, line_info: LineInfo) !void { +fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void { // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. var f = try fs.cwd().openFile(line_info.file_name, .{}); @@ -1591,7 +998,7 @@ test printLineFromFileAnyOs { var test_dir = std.testing.tmpDir(.{}); defer test_dir.cleanup(); - // Relies on testing.tmpDir internals which is not ideal, but LineInfo requires paths. + // Relies on testing.tmpDir internals which is not ideal, but SourceLocation requires paths. const test_dir_path = try join(allocator, &.{ ".zig-cache", "tmp", test_dir.sub_path[0..] }); defer allocator.free(test_dir_path); @@ -1702,871 +1109,6 @@ test printLineFromFileAnyOs { } } -const MachoSymbol = struct { - strx: u32, - addr: u64, - size: u32, - ofile: u32, - - /// Returns the address from the macho file - fn address(self: MachoSymbol) u64 { - return self.addr; - } - - fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { - _ = context; - return lhs.addr < rhs.addr; - } -}; - -/// Takes ownership of file, even on error. -/// TODO it's weird to take ownership even on error, rework this code. -fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 { - nosuspend { - defer file.close(); - - const file_len = math.cast(usize, try file.getEndPos()) orelse math.maxInt(usize); - const mapped_mem = try posix.mmap( - null, - file_len, - posix.PROT.READ, - .{ .TYPE = .SHARED }, - file.handle, - 0, - ); - errdefer posix.munmap(mapped_mem); - - return mapped_mem; - } -} - -pub const WindowsModuleInfo = struct { - base_address: usize, - size: u32, - name: []const u8, - handle: windows.HMODULE, - - // Set when the image file needed to be mapped from disk - mapped_file: ?struct { - file: File, - section_handle: windows.HANDLE, - section_view: []const u8, - - pub fn deinit(self: @This()) void { - const process_handle = windows.GetCurrentProcess(); - assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(@ptrCast(self.section_view.ptr))) == .SUCCESS); - windows.CloseHandle(self.section_handle); - self.file.close(); - } - } = null, -}; - -pub const Info = struct { - allocator: mem.Allocator, - address_map: std.AutoHashMap(usize, *ModuleDebugInfo), - modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModuleInfo) else void, - - pub fn init(allocator: mem.Allocator) !Info { - var debug_info = Info{ - .allocator = allocator, - .address_map = std.AutoHashMap(usize, *ModuleDebugInfo).init(allocator), - .modules = if (native_os == .windows) .{} else {}, - }; - - if (native_os == .windows) { - errdefer debug_info.modules.deinit(allocator); - - const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); - if (handle == windows.INVALID_HANDLE_VALUE) { - switch (windows.GetLastError()) { - else => |err| return windows.unexpectedError(err), - } - } - defer windows.CloseHandle(handle); - - var module_entry: windows.MODULEENTRY32 = undefined; - module_entry.dwSize = @sizeOf(windows.MODULEENTRY32); - if (windows.kernel32.Module32First(handle, &module_entry) == 0) { - return error.MissingDebugInfo; - } - - var module_valid = true; - while (module_valid) { - const module_info = try debug_info.modules.addOne(allocator); - const name = allocator.dupe(u8, mem.sliceTo(&module_entry.szModule, 0)) catch &.{}; - errdefer allocator.free(name); - - module_info.* = .{ - .base_address = @intFromPtr(module_entry.modBaseAddr), - .size = module_entry.modBaseSize, - .name = name, - .handle = module_entry.hModule, - }; - - module_valid = windows.kernel32.Module32Next(handle, &module_entry) == 1; - } - } - - return debug_info; - } - - pub fn deinit(self: *Info) void { - var it = self.address_map.iterator(); - while (it.next()) |entry| { - const mdi = entry.value_ptr.*; - mdi.deinit(self.allocator); - self.allocator.destroy(mdi); - } - self.address_map.deinit(); - if (native_os == .windows) { - for (self.modules.items) |module| { - self.allocator.free(module.name); - if (module.mapped_file) |mapped_file| mapped_file.deinit(); - } - self.modules.deinit(self.allocator); - } - } - - pub fn getModuleForAddress(self: *Info, address: usize) !*ModuleDebugInfo { - if (comptime builtin.target.isDarwin()) { - return self.lookupModuleDyld(address); - } else if (native_os == .windows) { - return self.lookupModuleWin32(address); - } else if (native_os == .haiku) { - return self.lookupModuleHaiku(address); - } else if (comptime builtin.target.isWasm()) { - return self.lookupModuleWasm(address); - } else { - return self.lookupModuleDl(address); - } - } - - // Returns the module name for a given address. - // This can be called when getModuleForAddress fails, so implementations should provide - // a path that doesn't rely on any side-effects of a prior successful module lookup. - pub fn getModuleNameForAddress(self: *Info, address: usize) ?[]const u8 { - if (comptime builtin.target.isDarwin()) { - return self.lookupModuleNameDyld(address); - } else if (native_os == .windows) { - return self.lookupModuleNameWin32(address); - } else if (native_os == .haiku) { - return null; - } else if (comptime builtin.target.isWasm()) { - return null; - } else { - return self.lookupModuleNameDl(address); - } - } - - fn lookupModuleDyld(self: *Info, address: usize) !*ModuleDebugInfo { - const image_count = std.c._dyld_image_count(); - - var i: u32 = 0; - while (i < image_count) : (i += 1) { - const header = std.c._dyld_get_image_header(i) orelse continue; - const base_address = @intFromPtr(header); - if (address < base_address) continue; - const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); - - var it = macho.LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = @alignCast(@as( - [*]u8, - @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), - )[0..header.sizeofcmds]), - }; - - var unwind_info: ?[]const u8 = null; - var eh_frame: ?[]const u8 = null; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - const segment_cmd = cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; - - const seg_start = segment_cmd.vmaddr + vmaddr_slide; - const seg_end = seg_start + segment_cmd.vmsize; - if (address >= seg_start and address < seg_end) { - if (self.address_map.get(base_address)) |obj_di| { - return obj_di; - } - - for (cmd.getSections()) |sect| { - if (mem.eql(u8, "__unwind_info", sect.sectName())) { - unwind_info = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; - } else if (mem.eql(u8, "__eh_frame", sect.sectName())) { - eh_frame = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; - } - } - - const obj_di = try self.allocator.create(ModuleDebugInfo); - errdefer self.allocator.destroy(obj_di); - - const macho_path = mem.sliceTo(std.c._dyld_get_image_name(i), 0); - const macho_file = fs.cwd().openFile(macho_path, .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - obj_di.* = try readMachODebugInfo(self.allocator, macho_file); - obj_di.base_address = base_address; - obj_di.vmaddr_slide = vmaddr_slide; - obj_di.unwind_info = unwind_info; - obj_di.eh_frame = eh_frame; - - try self.address_map.putNoClobber(base_address, obj_di); - - return obj_di; - } - }, - else => {}, - }; - } - - return error.MissingDebugInfo; - } - - fn lookupModuleNameDyld(self: *Info, address: usize) ?[]const u8 { - _ = self; - const image_count = std.c._dyld_image_count(); - - var i: u32 = 0; - while (i < image_count) : (i += 1) { - const header = std.c._dyld_get_image_header(i) orelse continue; - const base_address = @intFromPtr(header); - if (address < base_address) continue; - const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); - - var it = macho.LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = @alignCast(@as( - [*]u8, - @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), - )[0..header.sizeofcmds]), - }; - - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - const segment_cmd = cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; - - const original_address = address - vmaddr_slide; - const seg_start = segment_cmd.vmaddr; - const seg_end = seg_start + segment_cmd.vmsize; - if (original_address >= seg_start and original_address < seg_end) { - return fs.path.basename(mem.sliceTo(std.c._dyld_get_image_name(i), 0)); - } - }, - else => {}, - }; - } - - return null; - } - - fn lookupModuleWin32(self: *Info, address: usize) !*ModuleDebugInfo { - for (self.modules.items) |*module| { - if (address >= module.base_address and address < module.base_address + module.size) { - if (self.address_map.get(module.base_address)) |obj_di| { - return obj_di; - } - - const obj_di = try self.allocator.create(ModuleDebugInfo); - errdefer self.allocator.destroy(obj_di); - - const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; - var coff_obj = try coff.Coff.init(mapped_module, true); - - // The string table is not mapped into memory by the loader, so if a section name is in the - // string table then we have to map the full image file from disk. This can happen when - // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. - if (coff_obj.strtabRequired()) { - var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; - // openFileAbsoluteW requires the prefix to be present - @memcpy(name_buffer[0..4], &[_]u16{ '\\', '?', '?', '\\' }); - - const process_handle = windows.GetCurrentProcess(); - const len = windows.kernel32.GetModuleFileNameExW( - process_handle, - module.handle, - @ptrCast(&name_buffer[4]), - windows.PATH_MAX_WIDE, - ); - - if (len == 0) return error.MissingDebugInfo; - const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - errdefer coff_file.close(); - - var section_handle: windows.HANDLE = undefined; - const create_section_rc = windows.ntdll.NtCreateSection( - §ion_handle, - windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, - null, - null, - windows.PAGE_READONLY, - // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. - // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. - windows.SEC_COMMIT, - coff_file.handle, - ); - if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer windows.CloseHandle(section_handle); - - var coff_len: usize = 0; - var base_ptr: usize = 0; - const map_section_rc = windows.ntdll.NtMapViewOfSection( - section_handle, - process_handle, - @ptrCast(&base_ptr), - null, - 0, - null, - &coff_len, - .ViewUnmap, - 0, - windows.PAGE_READONLY, - ); - if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrFromInt(base_ptr)) == .SUCCESS); - - const section_view = @as([*]const u8, @ptrFromInt(base_ptr))[0..coff_len]; - coff_obj = try coff.Coff.init(section_view, false); - - module.mapped_file = .{ - .file = coff_file, - .section_handle = section_handle, - .section_view = section_view, - }; - } - errdefer if (module.mapped_file) |mapped_file| mapped_file.deinit(); - - obj_di.* = try readCoffDebugInfo(self.allocator, &coff_obj); - obj_di.base_address = module.base_address; - - try self.address_map.putNoClobber(module.base_address, obj_di); - return obj_di; - } - } - - return error.MissingDebugInfo; - } - - fn lookupModuleNameWin32(self: *Info, address: usize) ?[]const u8 { - for (self.modules.items) |module| { - if (address >= module.base_address and address < module.base_address + module.size) { - return module.name; - } - } - return null; - } - - fn lookupModuleNameDl(self: *Info, address: usize) ?[]const u8 { - _ = self; - - var ctx: struct { - // Input - address: usize, - // Output - name: []const u8 = "", - } = .{ .address = address }; - const CtxTy = @TypeOf(ctx); - - if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { - fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { - _ = size; - if (context.address < info.addr) return; - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - context.name = mem.sliceTo(info.name, 0) orelse ""; - break; - } - } else return; - - return error.Found; - } - }.callback)) { - return null; - } else |err| switch (err) { - error.Found => return fs.path.basename(ctx.name), - } - - return null; - } - - fn lookupModuleDl(self: *Info, address: usize) !*ModuleDebugInfo { - var ctx: struct { - // Input - address: usize, - // Output - base_address: usize = undefined, - name: []const u8 = undefined, - build_id: ?[]const u8 = null, - gnu_eh_frame: ?[]const u8 = null, - } = .{ .address = address }; - const CtxTy = @TypeOf(ctx); - - if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { - fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { - _ = size; - // The base address is too high - if (context.address < info.addr) - return; - - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - - // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - // Android libc uses NULL instead of an empty string to mark the - // main program - context.name = mem.sliceTo(info.name, 0) orelse ""; - context.base_address = info.addr; - break; - } - } else return; - - for (info.phdr[0..info.phnum]) |phdr| { - switch (phdr.p_type) { - elf.PT_NOTE => { - // Look for .note.gnu.build-id - const note_bytes = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; - const name_size = mem.readInt(u32, note_bytes[0..4], native_endian); - if (name_size != 4) continue; - const desc_size = mem.readInt(u32, note_bytes[4..8], native_endian); - const note_type = mem.readInt(u32, note_bytes[8..12], native_endian); - if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; - context.build_id = note_bytes[16..][0..desc_size]; - }, - elf.PT_GNU_EH_FRAME => { - context.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; - }, - else => {}, - } - } - - // Stop the iteration - return error.Found; - } - }.callback)) { - return error.MissingDebugInfo; - } else |err| switch (err) { - error.Found => {}, - } - - if (self.address_map.get(ctx.base_address)) |obj_di| { - return obj_di; - } - - const obj_di = try self.allocator.create(ModuleDebugInfo); - errdefer self.allocator.destroy(obj_di); - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - if (ctx.gnu_eh_frame) |eh_frame_hdr| { - // This is a special case - pointer offsets inside .eh_frame_hdr - // are encoded relative to its base address, so we must use the - // version that is already memory mapped, and not the one that - // will be mapped separately from the ELF file. - sections[@intFromEnum(Dwarf.Section.Id.eh_frame_hdr)] = .{ - .data = eh_frame_hdr, - .owned = false, - }; - } - - obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); - obj_di.base_address = ctx.base_address; - - // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding - obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; - - try self.address_map.putNoClobber(ctx.base_address, obj_di); - - return obj_di; - } - - fn lookupModuleHaiku(self: *Info, address: usize) !*ModuleDebugInfo { - _ = self; - _ = address; - @panic("TODO implement lookup module for Haiku"); - } - - fn lookupModuleWasm(self: *Info, address: usize) !*ModuleDebugInfo { - _ = self; - _ = address; - @panic("TODO implement lookup module for Wasm"); - } -}; - -pub const ModuleDebugInfo = switch (native_os) { - .macos, .ios, .watchos, .tvos, .visionos => struct { - base_address: usize, - vmaddr_slide: usize, - mapped_memory: []align(mem.page_size) const u8, - symbols: []const MachoSymbol, - strings: [:0]const u8, - ofiles: OFileTable, - - // Backed by the in-memory sections mapped by the loader - unwind_info: ?[]const u8 = null, - eh_frame: ?[]const u8 = null, - - const OFileTable = std.StringHashMap(OFileInfo); - const OFileInfo = struct { - di: Dwarf, - addr_table: std.StringHashMap(u64), - }; - - pub fn deinit(self: *@This(), allocator: mem.Allocator) void { - var it = self.ofiles.iterator(); - while (it.next()) |entry| { - const ofile = entry.value_ptr; - ofile.di.deinit(allocator); - ofile.addr_table.deinit(); - } - self.ofiles.deinit(); - allocator.free(self.symbols); - posix.munmap(self.mapped_memory); - } - - fn loadOFile(self: *@This(), allocator: mem.Allocator, o_file_path: []const u8) !*OFileInfo { - const o_file = try fs.cwd().openFile(o_file_path, .{}); - const mapped_mem = try mapWholeFile(o_file); - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - var segcmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtabcmd: ?macho.symtab_command = null; - var it = macho.LoadCommandIterator{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => segcmd = cmd, - .SYMTAB => symtabcmd = cmd.cast(macho.symtab_command).?, - else => {}, - }; - - if (segcmd == null or symtabcmd == null) return error.MissingDebugInfo; - - // Parse symbols - const strtab = @as( - [*]const u8, - @ptrCast(&mapped_mem[symtabcmd.?.stroff]), - )[0 .. symtabcmd.?.strsize - 1 :0]; - const symtab = @as( - [*]const macho.nlist_64, - @ptrCast(@alignCast(&mapped_mem[symtabcmd.?.symoff])), - )[0..symtabcmd.?.nsyms]; - - // TODO handle tentative (common) symbols - var addr_table = std.StringHashMap(u64).init(allocator); - try addr_table.ensureTotalCapacity(@as(u32, @intCast(symtab.len))); - for (symtab) |sym| { - if (sym.n_strx == 0) continue; - if (sym.undf() or sym.tentative() or sym.abs()) continue; - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - // TODO is it possible to have a symbol collision? - addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); - } - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - if (self.eh_frame) |eh_frame| sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ - .data = eh_frame, - .owned = false, - }; - - for (segcmd.?.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; - } - if (section_index == null) continue; - - const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); - sections[section_index.?] = .{ - .data = section_bytes, - .virtual_address = sect.addr, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var di = Dwarf{ - .endian = .little, - .sections = sections, - .is_macho = true, - }; - - try Dwarf.open(&di, allocator); - const info = OFileInfo{ - .di = di, - .addr_table = addr_table, - }; - - // Add the debug info to the cache - const result = try self.ofiles.getOrPut(o_file_path); - assert(!result.found_existing); - result.value_ptr.* = info; - - return result.value_ptr; - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { - nosuspend { - const result = try self.getOFileInfoForAddress(allocator, address); - if (result.symbol == null) return .{}; - - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); - if (result.o_file_info == null) return .{ .symbol_name = stab_symbol }; - - // Translate again the address, this time into an address inside the - // .o file - const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ - .symbol_name = "???", - }; - - const addr_off = result.relocated_address - result.symbol.?.addr; - const o_file_di = &result.o_file_info.?.di; - if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { - return SymbolInfo{ - .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString( - o_file_di, - DW.AT.name, - o_file_di.section(.debug_str), - compile_unit.*, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .line_info = o_file_di.getLineNumberInfo( - allocator, - compile_unit.*, - relocated_address_o + addr_off, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{ .symbol_name = stab_symbol }; - }, - else => return err, - } - } - } - - pub fn getOFileInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !struct { - relocated_address: usize, - symbol: ?*const MachoSymbol = null, - o_file_info: ?*OFileInfo = null, - } { - nosuspend { - // Translate the VA into an address into this object - const relocated_address = address - self.vmaddr_slide; - - // Find the .o file where this symbol is defined - const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ - .relocated_address = relocated_address, - }; - - // Check if its debug infos are already in the cache - const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); - const o_file_info = self.ofiles.getPtr(o_file_path) orelse - (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { - error.FileNotFound, - error.MissingDebugInfo, - error.InvalidDebugInfo, - => return .{ - .relocated_address = relocated_address, - .symbol = symbol, - }, - else => return err, - }); - - return .{ - .relocated_address = relocated_address, - .symbol = symbol, - .o_file_info = o_file_info, - }; - } - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const Dwarf { - return if ((try self.getOFileInfoForAddress(allocator, address)).o_file_info) |o_file_info| &o_file_info.di else null; - } - }, - .uefi, .windows => struct { - base_address: usize, - pdb: ?pdb.Pdb = null, - dwarf: ?Dwarf = null, - coff_image_base: u64, - - /// Only used if pdb is non-null - coff_section_headers: []coff.SectionHeader, - - pub fn deinit(self: *@This(), allocator: mem.Allocator) void { - if (self.dwarf) |*dwarf| { - dwarf.deinit(allocator); - } - - if (self.pdb) |*p| { - p.deinit(); - allocator.free(self.coff_section_headers); - } - } - - fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?SymbolInfo { - var coff_section: *align(1) const coff.SectionHeader = undefined; - const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { - if (sect_contrib.Section > self.coff_section_headers.len) continue; - // Remember that SectionContribEntry.Section is 1-based. - coff_section = &self.coff_section_headers[sect_contrib.Section - 1]; - - const vaddr_start = coff_section.virtual_address + sect_contrib.Offset; - const vaddr_end = vaddr_start + sect_contrib.Size; - if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { - break sect_contrib.ModuleIndex; - } - } else { - // we have no information to add to the address - return null; - }; - - const module = (try self.pdb.?.getModule(mod_index)) orelse - return error.InvalidDebugInfo; - const obj_basename = fs.path.basename(module.obj_file_name); - - const symbol_name = self.pdb.?.getSymbolName( - module, - relocated_address - coff_section.virtual_address, - ) orelse "???"; - const opt_line_info = try self.pdb.?.getLineNumberInfo( - module, - relocated_address - coff_section.virtual_address, - ); - - return SymbolInfo{ - .symbol_name = symbol_name, - .compile_unit_name = obj_basename, - .line_info = opt_line_info, - }; - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - - if (self.pdb != null) { - if (try self.getSymbolFromPdb(relocated_address)) |symbol| return symbol; - } - - if (self.dwarf) |*dwarf| { - const dwarf_address = relocated_address + self.coff_image_base; - return getSymbolFromDwarf(allocator, dwarf_address, dwarf); - } - - return SymbolInfo{}; - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const Dwarf { - _ = allocator; - _ = address; - - return switch (self.debug_data) { - .dwarf => |*dwarf| dwarf, - else => null, - }; - } - }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { - base_address: usize, - dwarf: Dwarf, - mapped_memory: []align(mem.page_size) const u8, - external_mapped_memory: ?[]align(mem.page_size) const u8, - - pub fn deinit(self: *@This(), allocator: mem.Allocator) void { - self.dwarf.deinit(allocator); - posix.munmap(self.mapped_memory); - if (self.external_mapped_memory) |m| posix.munmap(m); - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const Dwarf { - _ = allocator; - _ = address; - return &self.dwarf; - } - }, - .wasi, .emscripten => struct { - pub fn deinit(self: *@This(), allocator: mem.Allocator) void { - _ = self; - _ = allocator; - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: mem.Allocator, address: usize) !SymbolInfo { - _ = self; - _ = allocator; - _ = address; - return SymbolInfo{}; - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: mem.Allocator, address: usize) !?*const Dwarf { - _ = self; - _ = allocator; - _ = address; - return null; - } - }, - else => Dwarf, -}; - -fn getSymbolFromDwarf(allocator: mem.Allocator, address: u64, di: *Dwarf) !SymbolInfo { - if (nosuspend di.findCompileUnit(address)) |compile_unit| { - return SymbolInfo{ - .symbol_name = nosuspend di.getSymbolName(address) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString(di, DW.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{}; - }, - else => return err, - } -} - /// TODO multithreaded awareness var debug_info_allocator: ?mem.Allocator = null; var debug_info_arena_allocator: std.heap.ArenaAllocator = undefined; @@ -2687,7 +1229,7 @@ fn handleSegfaultPosix(sig: i32, info: *const posix.siginfo_t, ctx_ptr: ?*anyopa posix.abort(); } -fn dumpSegfaultInfoPosix(sig: i32, code: i32, addr: usize, ctx_ptr: ?*const anyopaque) void { +fn dumpSegfaultInfoPosix(sig: i32, code: i32, addr: usize, ctx_ptr: ?*anyopaque) void { const stderr = io.getStdErr().writer(); _ = switch (sig) { posix.SIG.SEGV => if (native_arch == .x86_64 and native_os == .linux and code == 128) // SI_KERNEL @@ -2713,7 +1255,7 @@ fn dumpSegfaultInfoPosix(sig: i32, code: i32, addr: usize, ctx_ptr: ?*const anyo .arm, .aarch64, => { - const ctx: *const posix.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); + const ctx: *posix.ucontext_t = @ptrCast(@alignCast(ctx_ptr)); dumpStackTraceFromBase(ctx); }, else => {}, @@ -2802,7 +1344,7 @@ test "manage resources correctly" { } const writer = std.io.null_writer; - var di = try openSelfDebugInfo(testing.allocator); + var di = try SelfInfo.open(testing.allocator); defer di.deinit(); try printSourceAtAddress(&di, writer, showMyTrace(), io.tty.detectConfig(std.io.getStdErr())); } @@ -2939,6 +1481,99 @@ pub const SafetyLock = struct { } }; +/// Deprecated. Don't use this, just read from your memory directly. +/// +/// This only exists because someone was too lazy to rework logic that used to +/// operate on an open file to operate on a memory buffer instead. +pub const DeprecatedFixedBufferReader = struct { + buf: []const u8, + pos: usize = 0, + endian: std.builtin.Endian, + + pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; + + pub fn seekTo(fbr: *DeprecatedFixedBufferReader, pos: u64) Error!void { + if (pos > fbr.buf.len) return error.EndOfBuffer; + fbr.pos = @intCast(pos); + } + + pub fn seekForward(fbr: *DeprecatedFixedBufferReader, amount: u64) Error!void { + if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; + fbr.pos += @intCast(amount); + } + + pub inline fn readByte(fbr: *DeprecatedFixedBufferReader) Error!u8 { + if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; + defer fbr.pos += 1; + return fbr.buf[fbr.pos]; + } + + pub fn readByteSigned(fbr: *DeprecatedFixedBufferReader) Error!i8 { + return @bitCast(try fbr.readByte()); + } + + pub fn readInt(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { + const size = @divExact(@typeInfo(T).Int.bits, 8); + if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; + defer fbr.pos += size; + return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); + } + + pub fn readIntChecked( + fbr: *DeprecatedFixedBufferReader, + comptime T: type, + ma: *MemoryAccessor, + ) Error!T { + if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) + return error.InvalidBuffer; + + return fbr.readInt(T); + } + + pub fn readUleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { + return std.leb.readUleb128(T, fbr); + } + + pub fn readIleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { + return std.leb.readIleb128(T, fbr); + } + + pub fn readAddress(fbr: *DeprecatedFixedBufferReader, format: std.dwarf.Format) Error!u64 { + return switch (format) { + .@"32" => try fbr.readInt(u32), + .@"64" => try fbr.readInt(u64), + }; + } + + pub fn readAddressChecked( + fbr: *DeprecatedFixedBufferReader, + format: std.dwarf.Format, + ma: *MemoryAccessor, + ) Error!u64 { + return switch (format) { + .@"32" => try fbr.readIntChecked(u32, ma), + .@"64" => try fbr.readIntChecked(u64, ma), + }; + } + + pub fn readBytes(fbr: *DeprecatedFixedBufferReader, len: usize) Error![]const u8 { + if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; + defer fbr.pos += len; + return fbr.buf[fbr.pos..][0..len]; + } + + pub fn readBytesTo(fbr: *DeprecatedFixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { + const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ + u8, + fbr.buf, + fbr.pos, + sentinel, + }) orelse return error.EndOfBuffer; + defer fbr.pos = end + 1; + return fbr.buf[fbr.pos..end :sentinel]; + } +}; + /// Detect whether the program is being executed in the Valgrind virtual machine. /// /// When Valgrind integrations are disabled, this returns comptime-known false. @@ -2950,5 +1585,9 @@ pub inline fn inValgrind() bool { } test { + _ = &Dwarf; + _ = &MemoryAccessor; + _ = &Pdb; + _ = &SelfInfo; _ = &dumpHex; } diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index f17fd737a1..991c731549 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1,23 +1,32 @@ //! Implements parsing, decoding, and caching of DWARF information. //! +//! This API does not assume the current executable is itself the thing being +//! debugged, however, it does assume the debug info has the same CPU +//! architecture and OS as the current executable. It is planned to remove this +//! limitation. +//! //! For unopinionated types and bits, see `std.dwarf`. const builtin = @import("builtin"); +const native_endian = builtin.cpu.arch.endian(); + const std = @import("../std.zig"); -const AT = DW.AT; const Allocator = std.mem.Allocator; const DW = std.dwarf; +const AT = DW.AT; const EH = DW.EH; const FORM = DW.FORM; const Format = DW.Format; const RLE = DW.RLE; -const StackIterator = std.debug.StackIterator; const UT = DW.UT; const assert = std.debug.assert; const cast = std.math.cast; const maxInt = std.math.maxInt; -const native_endian = builtin.cpu.arch.endian(); const readInt = std.mem.readInt; +const MemoryAccessor = std.debug.MemoryAccessor; + +/// Did I mention this is deprecated? +const DeprecatedFixedBufferReader = std.debug.DeprecatedFixedBufferReader; const Dwarf = @This(); @@ -153,7 +162,7 @@ pub const FormValue = union(enum) { .string => |s| return s, .strp => |off| return di.getString(off), .line_strp => |off| return di.getLineString(off), - else => return badDwarf(), + else => return bad(), } } @@ -162,8 +171,8 @@ pub const FormValue = union(enum) { inline .udata, .sdata, .sec_offset, - => |c| cast(U, c) orelse badDwarf(), - else => badDwarf(), + => |c| cast(U, c) orelse bad(), + else => bad(), }; } }; @@ -237,25 +246,25 @@ pub const Die = struct { .string => |value| return value, .strp => |offset| return di.getString(offset), .strx => |index| { - const debug_str_offsets = di.section(.debug_str_offsets) orelse return badDwarf(); - if (compile_unit.str_offsets_base == 0) return badDwarf(); + const debug_str_offsets = di.section(.debug_str_offsets) orelse return bad(); + if (compile_unit.str_offsets_base == 0) return bad(); switch (compile_unit.format) { .@"32" => { const byte_offset = compile_unit.str_offsets_base + 4 * index; - if (byte_offset + 4 > debug_str_offsets.len) return badDwarf(); + if (byte_offset + 4 > debug_str_offsets.len) return bad(); const offset = readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); return getStringGeneric(opt_str, offset); }, .@"64" => { const byte_offset = compile_unit.str_offsets_base + 8 * index; - if (byte_offset + 8 > debug_str_offsets.len) return badDwarf(); + if (byte_offset + 8 > debug_str_offsets.len) return bad(); const offset = readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); return getStringGeneric(opt_str, offset); }, } }, .line_strp => |offset| return di.getLineString(offset), - else => return badDwarf(), + else => return bad(), } } }; @@ -279,7 +288,7 @@ pub const ExceptionFrameHeader = struct { EH.PE.sdata8, => 16, // This is a binary search table, so all entries must be the same length - else => return badDwarf(), + else => return bad(), }; } @@ -287,7 +296,7 @@ pub const ExceptionFrameHeader = struct { self: ExceptionFrameHeader, comptime T: type, ptr: usize, - ma: *StackIterator.MemoryAccessor, + ma: *MemoryAccessor, eh_frame_len: ?usize, ) bool { if (eh_frame_len) |len| { @@ -304,7 +313,7 @@ pub const ExceptionFrameHeader = struct { /// If `eh_frame_len` is provided, then these checks can be skipped. pub fn findEntry( self: ExceptionFrameHeader, - ma: *StackIterator.MemoryAccessor, + ma: *MemoryAccessor, eh_frame_len: ?usize, eh_frame_hdr_ptr: usize, pc: usize, @@ -316,7 +325,7 @@ pub const ExceptionFrameHeader = struct { var left: usize = 0; var len: usize = self.fde_count; - var fbr: FixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; while (len > 1) { const mid = left + len / 2; @@ -326,7 +335,7 @@ pub const ExceptionFrameHeader = struct { .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]), .follow_indirect = true, .data_rel_base = eh_frame_hdr_ptr, - }) orelse return badDwarf(); + }) orelse return bad(); if (pc < pc_begin) { len /= 2; @@ -337,7 +346,7 @@ pub const ExceptionFrameHeader = struct { } } - if (len == 0) return badDwarf(); + if (len == 0) return bad(); fbr.pos = left * entry_size; // Read past the pc_begin field of the entry @@ -345,36 +354,36 @@ pub const ExceptionFrameHeader = struct { .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]), .follow_indirect = true, .data_rel_base = eh_frame_hdr_ptr, - }) orelse return badDwarf(); + }) orelse return bad(); const fde_ptr = cast(usize, try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]), .follow_indirect = true, .data_rel_base = eh_frame_hdr_ptr, - }) orelse return badDwarf()) orelse return badDwarf(); + }) orelse return bad()) orelse return bad(); - if (fde_ptr < self.eh_frame_ptr) return badDwarf(); + if (fde_ptr < self.eh_frame_ptr) return bad(); // Even if eh_frame_len is not specified, all ranges accssed are checked via MemoryAccessor const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0 .. eh_frame_len orelse maxInt(u32)]; const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: FixedBufferReader = .{ + var eh_frame_fbr: DeprecatedFixedBufferReader = .{ .buf = eh_frame, .pos = fde_offset, .endian = native_endian, }; const fde_entry_header = try EntryHeader.read(&eh_frame_fbr, if (eh_frame_len == null) ma else null, .eh_frame); - if (!self.isValidPtr(u8, @intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]), ma, eh_frame_len)) return badDwarf(); - if (fde_entry_header.type != .fde) return badDwarf(); + if (!self.isValidPtr(u8, @intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]), ma, eh_frame_len)) return bad(); + if (fde_entry_header.type != .fde) return bad(); // CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable const cie_offset = fde_entry_header.type.fde; try eh_frame_fbr.seekTo(cie_offset); const cie_entry_header = try EntryHeader.read(&eh_frame_fbr, if (eh_frame_len == null) ma else null, .eh_frame); - if (!self.isValidPtr(u8, @intFromPtr(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]), ma, eh_frame_len)) return badDwarf(); - if (cie_entry_header.type != .cie) return badDwarf(); + if (!self.isValidPtr(u8, @intFromPtr(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]), ma, eh_frame_len)) return bad(); + if (cie_entry_header.type != .cie) return bad(); cie.* = try CommonInformationEntry.parse( cie_entry_header.entry_bytes, @@ -417,17 +426,17 @@ pub const EntryHeader = struct { } /// Reads a header for either an FDE or a CIE, then advances the fbr to the position after the trailing structure. - /// `fbr` must be a FixedBufferReader backed by either the .eh_frame or .debug_frame sections. + /// `fbr` must be a DeprecatedFixedBufferReader backed by either the .eh_frame or .debug_frame sections. pub fn read( - fbr: *FixedBufferReader, - opt_ma: ?*StackIterator.MemoryAccessor, + fbr: *DeprecatedFixedBufferReader, + opt_ma: ?*MemoryAccessor, dwarf_section: Section.Id, ) !EntryHeader { assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame); const length_offset = fbr.pos; const unit_header = try readUnitHeader(fbr, opt_ma); - const unit_length = cast(usize, unit_header.unit_length) orelse return badDwarf(); + const unit_length = cast(usize, unit_header.unit_length) orelse return bad(); if (unit_length == 0) return .{ .length_offset = length_offset, .format = unit_header.format, @@ -532,7 +541,7 @@ pub const CommonInformationEntry = struct { ) !CommonInformationEntry { if (addr_size_bytes > 8) return error.UnsupportedAddrSize; - var fbr: FixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; const version = try fbr.readByte(); switch (dwarf_section) { @@ -550,15 +559,15 @@ pub const CommonInformationEntry = struct { while (aug_byte != 0) : (aug_byte = try fbr.readByte()) { switch (aug_byte) { 'z' => { - if (aug_str_len != 0) return badDwarf(); + if (aug_str_len != 0) return bad(); has_aug_data = true; }, 'e' => { - if (has_aug_data or aug_str_len != 0) return badDwarf(); - if (try fbr.readByte() != 'h') return badDwarf(); + if (has_aug_data or aug_str_len != 0) return bad(); + if (try fbr.readByte() != 'h') return bad(); has_eh_data = true; }, - else => if (has_eh_data) return badDwarf(), + else => if (has_eh_data) return bad(), } aug_str_len += 1; @@ -604,7 +613,7 @@ pub const CommonInformationEntry = struct { fde_pointer_enc = try fbr.readByte(); }, 'S', 'B', 'G' => {}, - else => return badDwarf(), + else => return bad(), } } @@ -666,17 +675,17 @@ pub const FrameDescriptionEntry = struct { ) !FrameDescriptionEntry { if (addr_size_bytes > 8) return error.InvalidAddrSize; - var fbr: FixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset), .follow_indirect = is_runtime, - }) orelse return badDwarf(); + }) orelse return bad(); const pc_range = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ .pc_rel_base = 0, .follow_indirect = false, - }) orelse return badDwarf(); + }) orelse return bad(); var aug_data: []const u8 = &[_]u8{}; const lsda_pointer = if (cie.aug_str.len > 0) blk: { @@ -708,54 +717,6 @@ pub const FrameDescriptionEntry = struct { } }; -pub const UnwindContext = struct { - allocator: Allocator, - cfa: ?usize, - pc: usize, - thread_context: *std.debug.ThreadContext, - reg_context: abi.RegisterContext, - vm: call_frame.VirtualMachine, - stack_machine: expression.StackMachine(.{ .call_frame_context = true }), - - pub fn init( - allocator: Allocator, - thread_context: *const std.debug.ThreadContext, - ) !UnwindContext { - const pc = abi.stripInstructionPtrAuthCode( - (try abi.regValueNative( - usize, - thread_context, - abi.ipRegNum(), - null, - )).*, - ); - - const context_copy = try allocator.create(std.debug.ThreadContext); - std.debug.copyContext(thread_context, context_copy); - - return .{ - .allocator = allocator, - .cfa = null, - .pc = pc, - .thread_context = context_copy, - .reg_context = undefined, - .vm = .{}, - .stack_machine = .{}, - }; - } - - pub fn deinit(self: *UnwindContext) void { - self.vm.deinit(self.allocator); - self.stack_machine.deinit(self.allocator); - self.allocator.destroy(self.thread_context); - self.* = undefined; - } - - pub fn getFp(self: *const UnwindContext) !usize { - return (try abi.regValueNative(usize, self.thread_context, abi.fpRegNum(self.reg_context), self.reg_context)).*; - } -}; - const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); pub const SectionArray = [num_sections]?Section; pub const null_section_array = [_]?Section{null} ** num_sections; @@ -817,7 +778,7 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { } fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { - var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; while (this_unit_offset < fbr.buf.len) { @@ -828,20 +789,20 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { const next_offset = unit_header.header_length + unit_header.unit_length; const version = try fbr.readInt(u16); - if (version < 2 or version > 5) return badDwarf(); + if (version < 2 or version > 5) return bad(); var address_size: u8 = undefined; var debug_abbrev_offset: u64 = undefined; if (version >= 5) { const unit_type = try fbr.readInt(u8); - if (unit_type != DW.UT.compile) return badDwarf(); + if (unit_type != DW.UT.compile) return bad(); address_size = try fbr.readByte(); debug_abbrev_offset = try fbr.readAddress(unit_header.format); } else { debug_abbrev_offset = try fbr.readAddress(unit_header.format); address_size = try fbr.readByte(); } - if (address_size != @sizeOf(usize)) return badDwarf(); + if (address_size != @sizeOf(usize)) return bad(); const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); @@ -915,28 +876,28 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { // Follow the DIE it points to and repeat const ref_offset = try this_die_obj.getAttrRef(AT.abstract_origin); - if (ref_offset > next_offset) return badDwarf(); + if (ref_offset > next_offset) return bad(); try fbr.seekTo(this_unit_offset + ref_offset); this_die_obj = (try parseDie( &fbr, attrs_bufs[2], abbrev_table, unit_header.format, - )) orelse return badDwarf(); + )) orelse return bad(); } else if (this_die_obj.getAttr(AT.specification)) |_| { const after_die_offset = fbr.pos; defer fbr.pos = after_die_offset; // Follow the DIE it points to and repeat const ref_offset = try this_die_obj.getAttrRef(AT.specification); - if (ref_offset > next_offset) return badDwarf(); + if (ref_offset > next_offset) return bad(); try fbr.seekTo(this_unit_offset + ref_offset); this_die_obj = (try parseDie( &fbr, attrs_bufs[2], abbrev_table, unit_header.format, - )) orelse return badDwarf(); + )) orelse return bad(); } else { break :x null; } @@ -950,7 +911,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { const pc_end = switch (high_pc_value.*) { .addr => |value| value, .udata => |offset| low_pc + offset, - else => return badDwarf(), + else => return bad(), }; try di.func_list.append(allocator, .{ @@ -1004,7 +965,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { } fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { - var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; var attrs_buf = std.ArrayList(Die.Attr).init(allocator); @@ -1018,20 +979,20 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { const next_offset = unit_header.header_length + unit_header.unit_length; const version = try fbr.readInt(u16); - if (version < 2 or version > 5) return badDwarf(); + if (version < 2 or version > 5) return bad(); var address_size: u8 = undefined; var debug_abbrev_offset: u64 = undefined; if (version >= 5) { const unit_type = try fbr.readInt(u8); - if (unit_type != UT.compile) return badDwarf(); + if (unit_type != UT.compile) return bad(); address_size = try fbr.readByte(); debug_abbrev_offset = try fbr.readAddress(unit_header.format); } else { debug_abbrev_offset = try fbr.readAddress(unit_header.format); address_size = try fbr.readByte(); } - if (address_size != @sizeOf(usize)) return badDwarf(); + if (address_size != @sizeOf(usize)) return bad(); const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); @@ -1046,9 +1007,9 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { attrs_buf.items, abbrev_table, unit_header.format, - )) orelse return badDwarf(); + )) orelse return bad(); - if (compile_unit_die.tag_id != DW.TAG.compile_unit) return badDwarf(); + if (compile_unit_die.tag_id != DW.TAG.compile_unit) return bad(); compile_unit_die.attrs = try allocator.dupe(Die.Attr, compile_unit_die.attrs); @@ -1070,7 +1031,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { const pc_end = switch (high_pc_value.*) { .addr => |value| value, .udata => |offset| low_pc + offset, - else => return badDwarf(), + else => return bad(), }; break :x PcRange{ .start = low_pc, @@ -1096,7 +1057,7 @@ const DebugRangeIterator = struct { section_type: Section.Id, di: *const Dwarf, compile_unit: *const CompileUnit, - fbr: FixedBufferReader, + fbr: DeprecatedFixedBufferReader, pub fn init(ranges_value: *const FormValue, di: *const Dwarf, compile_unit: *const CompileUnit) !@This() { const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges; @@ -1108,19 +1069,19 @@ const DebugRangeIterator = struct { switch (compile_unit.format) { .@"32" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx)); - if (offset_loc + 4 > debug_ranges.len) return badDwarf(); + if (offset_loc + 4 > debug_ranges.len) return bad(); const offset = readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); break :off compile_unit.rnglists_base + offset; }, .@"64" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx)); - if (offset_loc + 8 > debug_ranges.len) return badDwarf(); + if (offset_loc + 8 > debug_ranges.len) return bad(); const offset = readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); break :off compile_unit.rnglists_base + offset; }, } }, - else => return badDwarf(), + else => return bad(), }; // All the addresses in the list are relative to the value @@ -1139,7 +1100,7 @@ const DebugRangeIterator = struct { .compile_unit = compile_unit, .fbr = .{ .buf = debug_ranges, - .pos = cast(usize, ranges_offset) orelse return badDwarf(), + .pos = cast(usize, ranges_offset) orelse return bad(), .endian = di.endian, }, }; @@ -1214,7 +1175,7 @@ const DebugRangeIterator = struct { .end_addr = end_addr, }; }, - else => return badDwarf(), + else => return bad(), } }, .debug_ranges => { @@ -1251,7 +1212,7 @@ pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUni } } - return missingDwarf(); + return missing(); } /// Gets an already existing AbbrevTable given the abbrev_offset, or if not found, @@ -1270,9 +1231,9 @@ fn getAbbrevTable(di: *Dwarf, allocator: Allocator, abbrev_offset: u64) !*const } fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table { - var fbr: FixedBufferReader = .{ + var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_abbrev).?, - .pos = cast(usize, offset) orelse return badDwarf(), + .pos = cast(usize, offset) orelse return bad(), .endian = di.endian, }; @@ -1322,14 +1283,14 @@ fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table } fn parseDie( - fbr: *FixedBufferReader, + fbr: *DeprecatedFixedBufferReader, attrs_buf: []Die.Attr, abbrev_table: *const Abbrev.Table, format: Format, ) !?Die { const abbrev_code = try fbr.readUleb128(u64); if (abbrev_code == 0) return null; - const table_entry = abbrev_table.get(abbrev_code) orelse return badDwarf(); + const table_entry = abbrev_table.get(abbrev_code) orelse return bad(); const attrs = attrs_buf[0..table_entry.attrs.len]; for (attrs, table_entry.attrs) |*result_attr, attr| result_attr.* = Die.Attr{ @@ -1353,19 +1314,19 @@ pub fn getLineNumberInfo( allocator: Allocator, compile_unit: CompileUnit, target_address: u64, -) !std.debug.LineInfo { +) !std.debug.SourceLocation { const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: FixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; try fbr.seekTo(line_info_offset); const unit_header = try readUnitHeader(&fbr, null); - if (unit_header.unit_length == 0) return missingDwarf(); + if (unit_header.unit_length == 0) return missing(); const next_offset = unit_header.header_length + unit_header.unit_length; const version = try fbr.readInt(u16); - if (version < 2) return badDwarf(); + if (version < 2) return bad(); var addr_size: u8 = switch (unit_header.format) { .@"32" => 4, @@ -1381,7 +1342,7 @@ pub fn getLineNumberInfo( const prog_start_offset = fbr.pos + prologue_length; const minimum_instruction_length = try fbr.readByte(); - if (minimum_instruction_length == 0) return badDwarf(); + if (minimum_instruction_length == 0) return bad(); if (version >= 4) { // maximum_operations_per_instruction @@ -1392,7 +1353,7 @@ pub fn getLineNumberInfo( const line_base = try fbr.readByteSigned(); const line_range = try fbr.readByte(); - if (line_range == 0) return badDwarf(); + if (line_range == 0) return bad(); const opcode_base = try fbr.readByte(); @@ -1433,7 +1394,7 @@ pub fn getLineNumberInfo( { var dir_ent_fmt_buf: [10]FileEntFmt = undefined; const directory_entry_format_count = try fbr.readByte(); - if (directory_entry_format_count > dir_ent_fmt_buf.len) return badDwarf(); + if (directory_entry_format_count > dir_ent_fmt_buf.len) return bad(); for (dir_ent_fmt_buf[0..directory_entry_format_count]) |*ent_fmt| { ent_fmt.* = .{ .content_type_code = try fbr.readUleb128(u8), @@ -1461,7 +1422,7 @@ pub fn getLineNumberInfo( DW.LNCT.size => e.size = try form_value.getUInt(u64), DW.LNCT.MD5 => e.md5 = switch (form_value) { .data16 => |data16| data16.*, - else => return badDwarf(), + else => return bad(), }, else => continue, } @@ -1473,7 +1434,7 @@ pub fn getLineNumberInfo( var file_ent_fmt_buf: [10]FileEntFmt = undefined; const file_name_entry_format_count = try fbr.readByte(); - if (file_name_entry_format_count > file_ent_fmt_buf.len) return badDwarf(); + if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { ent_fmt.* = .{ .content_type_code = try fbr.readUleb128(u8), @@ -1501,7 +1462,7 @@ pub fn getLineNumberInfo( DW.LNCT.size => e.size = try form_value.getUInt(u64), DW.LNCT.MD5 => e.md5 = switch (form_value) { .data16 => |data16| data16.*, - else => return badDwarf(), + else => return bad(), }, else => continue, } @@ -1527,7 +1488,7 @@ pub fn getLineNumberInfo( if (opcode == DW.LNS.extended_op) { const op_size = try fbr.readUleb128(u64); - if (op_size < 1) return badDwarf(); + if (op_size < 1) return bad(); const sub_op = try fbr.readByte(); switch (sub_op) { DW.LNE.end_sequence => { @@ -1600,14 +1561,14 @@ pub fn getLineNumberInfo( }, DW.LNS.set_prologue_end => {}, else => { - if (opcode - 1 >= standard_opcode_lengths.len) return badDwarf(); + if (opcode - 1 >= standard_opcode_lengths.len) return bad(); try fbr.seekForward(standard_opcode_lengths[opcode - 1]); }, } } } - return missingDwarf(); + return missing(); } fn getString(di: Dwarf, offset: u64) ![:0]const u8 { @@ -1619,28 +1580,28 @@ fn getLineString(di: Dwarf, offset: u64) ![:0]const u8 { } fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { - const debug_addr = di.section(.debug_addr) orelse return badDwarf(); + const debug_addr = di.section(.debug_addr) orelse return bad(); // addr_base points to the first item after the header, however we // need to read the header to know the size of each item. Empirically, // it may disagree with is_64 on the compile unit. // The header is 8 or 12 bytes depending on is_64. - if (compile_unit.addr_base < 8) return badDwarf(); + if (compile_unit.addr_base < 8) return bad(); const version = readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); - if (version != 5) return badDwarf(); + if (version != 5) return bad(); const addr_size = debug_addr[compile_unit.addr_base - 2]; const seg_size = debug_addr[compile_unit.addr_base - 1]; const byte_offset = @as(usize, @intCast(compile_unit.addr_base + (addr_size + seg_size) * index)); - if (byte_offset + addr_size > debug_addr.len) return badDwarf(); + if (byte_offset + addr_size > debug_addr.len) return bad(); return switch (addr_size) { 1 => debug_addr[byte_offset], 2 => readInt(u16, debug_addr[byte_offset..][0..2], di.endian), 4 => readInt(u32, debug_addr[byte_offset..][0..4], di.endian), 8 => readInt(u64, debug_addr[byte_offset..][0..8], di.endian), - else => badDwarf(), + else => bad(), }; } @@ -1650,7 +1611,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { /// of FDEs is built for binary searching during unwinding. pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: FixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; const version = try fbr.readByte(); if (version != 1) break :blk; @@ -1665,16 +1626,16 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) const eh_frame_ptr = cast(usize, try readEhPointer(&fbr, eh_frame_ptr_enc, @sizeOf(usize), .{ .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.pos]), .follow_indirect = true, - }) orelse return badDwarf()) orelse return badDwarf(); + }) orelse return bad()) orelse return bad(); const fde_count = cast(usize, try readEhPointer(&fbr, fde_count_enc, @sizeOf(usize), .{ .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.pos]), .follow_indirect = true, - }) orelse return badDwarf()) orelse return badDwarf(); + }) orelse return bad()) orelse return bad(); const entry_size = try ExceptionFrameHeader.entrySize(table_enc); const entries_len = fde_count * entry_size; - if (entries_len > eh_frame_hdr.len - fbr.pos) return badDwarf(); + if (entries_len > eh_frame_hdr.len - fbr.pos) return bad(); di.eh_frame_hdr = .{ .eh_frame_ptr = eh_frame_ptr, @@ -1690,7 +1651,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; for (frame_sections) |frame_section| { if (di.section(frame_section)) |section_data| { - var fbr: FixedBufferReader = .{ .buf = section_data, .endian = di.endian }; + var fbr: DeprecatedFixedBufferReader = .{ .buf = section_data, .endian = di.endian }; while (fbr.pos < fbr.buf.len) { const entry_header = try EntryHeader.read(&fbr, null, frame_section); switch (entry_header.type) { @@ -1708,7 +1669,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) try di.cie_map.put(allocator, entry_header.length_offset, cie); }, .fde => |cie_offset| { - const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); + const cie = di.cie_map.get(cie_offset) orelse return bad(); const fde = try FrameDescriptionEntry.parse( entry_header.entry_bytes, di.sectionVirtualOffset(frame_section, base_address).?, @@ -1733,205 +1694,8 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) } } -/// Unwind a stack frame using DWARF unwinding info, updating the register context. -/// -/// If `.eh_frame_hdr` is available, it will be used to binary search for the FDE. -/// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. -/// -/// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info -/// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. -pub fn unwindFrame(di: *const Dwarf, context: *UnwindContext, ma: *StackIterator.MemoryAccessor, explicit_fde_offset: ?usize) !usize { - if (!comptime abi.supportsUnwinding(builtin.target)) return error.UnsupportedCpuArchitecture; - if (context.pc == 0) return 0; - - // Find the FDE and CIE - var cie: CommonInformationEntry = undefined; - var fde: FrameDescriptionEntry = undefined; - - if (explicit_fde_offset) |fde_offset| { - const dwarf_section: Section.Id = .eh_frame; - const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; - if (fde_offset >= frame_section.len) return error.MissingFDE; - - var fbr: FixedBufferReader = .{ - .buf = frame_section, - .pos = fde_offset, - .endian = di.endian, - }; - - const fde_entry_header = try EntryHeader.read(&fbr, null, dwarf_section); - if (fde_entry_header.type != .fde) return error.MissingFDE; - - const cie_offset = fde_entry_header.type.fde; - try fbr.seekTo(cie_offset); - - fbr.endian = native_endian; - const cie_entry_header = try EntryHeader.read(&fbr, null, dwarf_section); - if (cie_entry_header.type != .cie) return badDwarf(); - - cie = try CommonInformationEntry.parse( - cie_entry_header.entry_bytes, - 0, - true, - cie_entry_header.format, - dwarf_section, - cie_entry_header.length_offset, - @sizeOf(usize), - native_endian, - ); - - fde = try FrameDescriptionEntry.parse( - fde_entry_header.entry_bytes, - 0, - true, - cie, - @sizeOf(usize), - native_endian, - ); - } else if (di.eh_frame_hdr) |header| { - const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; - try header.findEntry( - ma, - eh_frame_len, - @intFromPtr(di.section(.eh_frame_hdr).?.ptr), - context.pc, - &cie, - &fde, - ); - } else { - const index = std.sort.binarySearch(FrameDescriptionEntry, context.pc, di.fde_list.items, {}, struct { - pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order { - if (pc < mid_item.pc_begin) return .lt; - - const range_end = mid_item.pc_begin + mid_item.pc_range; - if (pc < range_end) return .eq; - - return .gt; - } - }.compareFn); - - fde = if (index) |i| di.fde_list.items[i] else return error.MissingFDE; - cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; - } - - var expression_context: expression.Context = .{ - .format = cie.format, - .memory_accessor = ma, - .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, - .thread_context = context.thread_context, - .reg_context = context.reg_context, - .cfa = context.cfa, - }; - - context.vm.reset(); - context.reg_context.eh_frame = cie.version != 4; - context.reg_context.is_macho = di.is_macho; - - const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); - context.cfa = switch (row.cfa.rule) { - .val_offset => |offset| blk: { - const register = row.cfa.register orelse return error.InvalidCFARule; - const value = readInt(usize, (try abi.regBytes(context.thread_context, register, context.reg_context))[0..@sizeOf(usize)], native_endian); - break :blk try call_frame.applyOffset(value, offset); - }, - .expression => |expr| blk: { - context.stack_machine.reset(); - const value = try context.stack_machine.run( - expr, - context.allocator, - expression_context, - context.cfa, - ); - - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; - }, - else => return error.InvalidCFARule, - }; - - if (ma.load(usize, context.cfa.?) == null) return error.InvalidCFA; - expression_context.cfa = context.cfa; - - // Buffering the modifications is done because copying the thread context is not portable, - // some implementations (ie. darwin) use internal pointers to the mcontext. - var arena = std.heap.ArenaAllocator.init(context.allocator); - defer arena.deinit(); - const update_allocator = arena.allocator(); - - const RegisterUpdate = struct { - // Backed by thread_context - dest: []u8, - // Backed by arena - src: []const u8, - prev: ?*@This(), - }; - - var update_tail: ?*RegisterUpdate = null; - var has_return_address = true; - for (context.vm.rowColumns(row)) |column| { - if (column.register) |register| { - if (register == cie.return_address_register) { - has_return_address = column.rule != .undefined; - } - - const dest = try abi.regBytes(context.thread_context, register, context.reg_context); - const src = try update_allocator.alloc(u8, dest.len); - - const prev = update_tail; - update_tail = try update_allocator.create(RegisterUpdate); - update_tail.?.* = .{ - .dest = dest, - .src = src, - .prev = prev, - }; - - try column.resolveValue( - context, - expression_context, - ma, - src, - ); - } - } - - // On all implemented architectures, the CFA is defined as being the previous frame's SP - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; - - while (update_tail) |tail| { - @memcpy(tail.dest, tail.src); - update_tail = tail.prev; - } - - if (has_return_address) { - context.pc = abi.stripInstructionPtrAuthCode(readInt(usize, (try abi.regBytes( - context.thread_context, - cie.return_address_register, - context.reg_context, - ))[0..@sizeOf(usize)], native_endian)); - } else { - context.pc = 0; - } - - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), context.reg_context)).* = context.pc; - - // The call instruction will have pushed the address of the instruction that follows the call as the return address. - // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in - // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up - // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, - // we subtract one so that the next lookup is guaranteed to land inside the - // - // The exception to this rule is signal frames, where we return execution would be returned to the instruction - // that triggered the handler. - const return_address = context.pc; - if (context.pc > 0 and !cie.isSignalFrame()) context.pc -= 1; - - return return_address; -} - fn parseFormValue( - fbr: *FixedBufferReader, + fbr: *DeprecatedFixedBufferReader, form_id: u64, format: Format, implicit_const: ?i64, @@ -1990,12 +1754,12 @@ fn parseFormValue( FORM.strx => .{ .strx = try fbr.readUleb128(usize) }, FORM.line_strp => .{ .line_strp = try fbr.readAddress(format) }, FORM.indirect => parseFormValue(fbr, try fbr.readUleb128(u64), format, implicit_const), - FORM.implicit_const => .{ .sdata = implicit_const orelse return badDwarf() }, + FORM.implicit_const => .{ .sdata = implicit_const orelse return bad() }, FORM.loclistx => .{ .loclistx = try fbr.readUleb128(u64) }, FORM.rnglistx => .{ .rnglistx = try fbr.readUleb128(u64) }, else => { //debug.print("unrecognized form id: {x}\n", .{form_id}); - return badDwarf(); + return bad(); }, }; } @@ -2084,27 +1848,27 @@ const LineNumberProgram = struct { self: *LineNumberProgram, allocator: Allocator, file_entries: []const FileEntry, - ) !?std.debug.LineInfo { + ) !?std.debug.SourceLocation { if (self.prev_valid and self.target_address >= self.prev_address and self.target_address < self.address) { const file_index = if (self.version >= 5) self.prev_file else i: { - if (self.prev_file == 0) return missingDwarf(); + if (self.prev_file == 0) return missing(); break :i self.prev_file - 1; }; - if (file_index >= file_entries.len) return badDwarf(); + if (file_index >= file_entries.len) return bad(); const file_entry = &file_entries[file_index]; - if (file_entry.dir_index >= self.include_dirs.len) return badDwarf(); + if (file_entry.dir_index >= self.include_dirs.len) return bad(); const dir_name = self.include_dirs[file_entry.dir_index].path; const file_name = try std.fs.path.join(allocator, &[_][]const u8{ dir_name, file_entry.path, }); - return std.debug.LineInfo{ + return std.debug.SourceLocation{ .line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0, .column = self.prev_column, .file_name = file_name, @@ -2128,14 +1892,14 @@ const UnitHeader = struct { header_length: u4, unit_length: u64, }; -fn readUnitHeader(fbr: *FixedBufferReader, opt_ma: ?*StackIterator.MemoryAccessor) !UnitHeader { +fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) !UnitHeader { return switch (try if (opt_ma) |ma| fbr.readIntChecked(u32, ma) else fbr.readInt(u32)) { 0...0xfffffff0 - 1 => |unit_length| .{ .format = .@"32", .header_length = 4, .unit_length = unit_length, }, - 0xfffffff0...0xffffffff - 1 => badDwarf(), + 0xfffffff0...0xffffffff - 1 => bad(), 0xffffffff => .{ .format = .@"64", .header_length = 12, @@ -2145,7 +1909,7 @@ fn readUnitHeader(fbr: *FixedBufferReader, opt_ma: ?*StackIterator.MemoryAccesso } /// Returns the DWARF register number for an x86_64 register number found in compact unwind info -fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { +pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { return switch (unwind_reg_number) { 1 => 3, // RBX 2 => 12, // R12 @@ -2159,473 +1923,25 @@ fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { /// This function is to make it handy to comment out the return and make it /// into a crash when working on this file. -fn badDwarf() error{InvalidDebugInfo} { - //if (true) @panic("badDwarf"); // can be handy to uncomment when working on this file +pub fn bad() error{InvalidDebugInfo} { + //if (true) @panic("bad dwarf"); // can be handy to uncomment when working on this file return error.InvalidDebugInfo; } -fn missingDwarf() error{MissingDebugInfo} { - //if (true) @panic("missingDwarf"); // can be handy to uncomment when working on this file +fn missing() error{MissingDebugInfo} { + //if (true) @panic("missing dwarf"); // can be handy to uncomment when working on this file return error.MissingDebugInfo; } fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { - const str = opt_str orelse return badDwarf(); - if (offset > str.len) return badDwarf(); - const casted_offset = cast(usize, offset) orelse return badDwarf(); + const str = opt_str orelse return bad(); + if (offset > str.len) return bad(); + const casted_offset = cast(usize, offset) orelse return bad(); // Valid strings always have a terminating zero byte - const last = std.mem.indexOfScalarPos(u8, str, casted_offset, 0) orelse return badDwarf(); + const last = std.mem.indexOfScalarPos(u8, str, casted_offset, 0) orelse return bad(); return str[casted_offset..last :0]; } -// Reading debug info needs to be fast, even when compiled in debug mode, -// so avoid using a `std.io.FixedBufferStream` which is too slow. -pub const FixedBufferReader = struct { - buf: []const u8, - pos: usize = 0, - endian: std.builtin.Endian, - - pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; - - fn seekTo(fbr: *FixedBufferReader, pos: u64) Error!void { - if (pos > fbr.buf.len) return error.EndOfBuffer; - fbr.pos = @intCast(pos); - } - - fn seekForward(fbr: *FixedBufferReader, amount: u64) Error!void { - if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; - fbr.pos += @intCast(amount); - } - - pub inline fn readByte(fbr: *FixedBufferReader) Error!u8 { - if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; - defer fbr.pos += 1; - return fbr.buf[fbr.pos]; - } - - fn readByteSigned(fbr: *FixedBufferReader) Error!i8 { - return @bitCast(try fbr.readByte()); - } - - fn readInt(fbr: *FixedBufferReader, comptime T: type) Error!T { - const size = @divExact(@typeInfo(T).Int.bits, 8); - if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; - defer fbr.pos += size; - return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); - } - - fn readIntChecked( - fbr: *FixedBufferReader, - comptime T: type, - ma: *std.debug.StackIterator.MemoryAccessor, - ) Error!T { - if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) - return error.InvalidBuffer; - - return fbr.readInt(T); - } - - fn readUleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { - return std.leb.readUleb128(T, fbr); - } - - fn readIleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { - return std.leb.readIleb128(T, fbr); - } - - fn readAddress(fbr: *FixedBufferReader, format: Format) Error!u64 { - return switch (format) { - .@"32" => try fbr.readInt(u32), - .@"64" => try fbr.readInt(u64), - }; - } - - fn readAddressChecked( - fbr: *FixedBufferReader, - format: Format, - ma: *std.debug.StackIterator.MemoryAccessor, - ) Error!u64 { - return switch (format) { - .@"32" => try fbr.readIntChecked(u32, ma), - .@"64" => try fbr.readIntChecked(u64, ma), - }; - } - - fn readBytes(fbr: *FixedBufferReader, len: usize) Error![]const u8 { - if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; - defer fbr.pos += len; - return fbr.buf[fbr.pos..][0..len]; - } - - fn readBytesTo(fbr: *FixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { - const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ - u8, - fbr.buf, - fbr.pos, - sentinel, - }) orelse return error.EndOfBuffer; - defer fbr.pos = end + 1; - return fbr.buf[fbr.pos..end :sentinel]; - } -}; - -/// Unwind a frame using MachO compact unwind info (from __unwind_info). -/// If the compact encoding can't encode a way to unwind a frame, it will -/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrameMachO( - context: *UnwindContext, - ma: *StackIterator.MemoryAccessor, - unwind_info: []const u8, - eh_frame: ?[]const u8, - module_base_address: usize, -) !usize { - const macho = std.macho; - - const header = std.mem.bytesAsValue( - macho.unwind_info_section_header, - unwind_info[0..@sizeOf(macho.unwind_info_section_header)], - ); - const indices = std.mem.bytesAsSlice( - macho.unwind_info_section_header_index_entry, - unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry)], - ); - if (indices.len == 0) return error.MissingUnwindInfo; - - const mapped_pc = context.pc - module_base_address; - const second_level_index = blk: { - var left: usize = 0; - var len: usize = indices.len; - - while (len > 1) { - const mid = left + len / 2; - const offset = indices[mid].functionOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - // Last index is a sentinel containing the highest address as its functionOffset - if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; - break :blk &indices[left]; - }; - - const common_encodings = std.mem.bytesAsSlice( - macho.compact_unwind_encoding_t, - unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t)], - ); - - const start_offset = second_level_index.secondLevelPagesSectionOffset; - const kind = std.mem.bytesAsValue( - macho.UNWIND_SECOND_LEVEL, - unwind_info[start_offset..][0..@sizeOf(macho.UNWIND_SECOND_LEVEL)], - ); - - const entry: struct { - function_offset: usize, - raw_encoding: u32, - } = switch (kind.*) { - .REGULAR => blk: { - const page_header = std.mem.bytesAsValue( - macho.unwind_info_regular_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_regular_second_level_page_header)], - ); - - const entries = std.mem.bytesAsSlice( - macho.unwind_info_regular_second_level_entry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = entries[mid].functionOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - break :blk .{ - .function_offset = entries[left].functionOffset, - .raw_encoding = entries[left].encoding, - }; - }, - .COMPRESSED => blk: { - const page_header = std.mem.bytesAsValue( - macho.unwind_info_compressed_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_compressed_second_level_page_header)], - ); - - const entries = std.mem.bytesAsSlice( - macho.UnwindInfoCompressedEntry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = second_level_index.functionOffset + entries[mid].funcOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - const entry = entries[left]; - const function_offset = second_level_index.functionOffset + entry.funcOffset; - if (entry.encodingIndex < header.commonEncodingsArrayCount) { - if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = common_encodings[entry.encodingIndex], - }; - } else { - const local_index = try std.math.sub( - u8, - entry.encodingIndex, - cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, - ); - const local_encodings = std.mem.bytesAsSlice( - macho.compact_unwind_encoding_t, - unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t)], - ); - if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = local_encodings[local_index], - }; - } - }, - else => return error.InvalidUnwindInfo, - }; - - if (entry.raw_encoding == 0) return error.NoUnwindInfo; - const reg_context = abi.RegisterContext{ - .eh_frame = false, - .is_macho = true, - }; - - const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); - const new_ip = switch (builtin.cpu.arch) { - .x86_64 => switch (encoding.mode.x86_64) { - .OLD => return error.UnimplementedUnwindEncoding, - .RBP_FRAME => blk: { - const regs: [5]u3 = .{ - encoding.value.x86_64.frame.reg0, - encoding.value.x86_64.frame.reg1, - encoding.value.x86_64.frame.reg2, - encoding.value.x86_64.frame.reg3, - encoding.value.x86_64.frame.reg4, - }; - - const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); - var max_reg: usize = 0; - inline for (regs, 0..) |reg, i| { - if (reg > 0) max_reg = i; - } - - const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; - const new_sp = fp + 2 * @sizeOf(usize); - - // Verify the stack range we're about to read register values from - if (ma.load(usize, new_sp) == null or ma.load(usize, fp - frame_offset + max_reg * @sizeOf(usize)) == null) return error.InvalidUnwindInfo; - - const ip_ptr = fp + @sizeOf(usize); - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; - - for (regs, 0..) |reg, i| { - if (reg == 0) continue; - const addr = fp - frame_offset + i * @sizeOf(usize); - const reg_number = try compactUnwindToDwarfRegNumber(reg); - (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; - } - - break :blk new_ip; - }, - .STACK_IMMD, - .STACK_IND, - => blk: { - const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; - const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) - @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) - else stack_size: { - // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. - const sub_offset_addr = - module_base_address + - entry.function_offset + - encoding.value.x86_64.frameless.stack.indirect.sub_offset; - if (ma.load(usize, sub_offset_addr) == null) return error.InvalidUnwindInfo; - - // `sub_offset_addr` points to the offset of the literal within the instruction - const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; - break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); - }; - - // Decode the Lehmer-coded sequence of registers. - // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - - // Decode the variable-based permutation number into its digits. Each digit represents - // an index into the list of register numbers that weren't yet used in the sequence at - // the time the digit was added. - const reg_count = encoding.value.x86_64.frameless.stack_reg_count; - const ip_ptr = if (reg_count > 0) reg_blk: { - var digits: [6]u3 = undefined; - var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; - var base: usize = 2; - for (0..reg_count) |i| { - const div = accumulator / base; - digits[digits.len - 1 - i] = @intCast(accumulator - base * div); - accumulator = div; - base += 1; - } - - const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; - var registers: [reg_numbers.len]u3 = undefined; - var used_indices = [_]bool{false} ** reg_numbers.len; - for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { - var unused_count: u8 = 0; - const unused_index = for (used_indices, 0..) |used, index| { - if (!used) { - if (target_unused_index == unused_count) break index; - unused_count += 1; - } - } else unreachable; - - registers[i] = reg_numbers[unused_index]; - used_indices[unused_index] = true; - } - - var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); - if (ma.load(usize, reg_addr) == null) return error.InvalidUnwindInfo; - for (0..reg_count) |i| { - const reg_number = try compactUnwindToDwarfRegNumber(registers[i]); - (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - - break :reg_blk reg_addr; - } else sp + stack_size - @sizeOf(usize); - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_sp = ip_ptr + @sizeOf(usize); - if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; - - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; - - break :blk new_ip; - }, - .DWARF => { - return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); - }, - }, - .aarch64 => switch (encoding.mode.arm64) { - .OLD => return error.UnimplementedUnwindEncoding, - .FRAMELESS => blk: { - const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; - const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; - const new_ip = (try abi.regValueNative(usize, context.thread_context, 30, reg_context)).*; - if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; - (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; - break :blk new_ip; - }, - .DWARF => { - return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); - }, - .FRAME => blk: { - const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; - const new_sp = fp + 16; - const ip_ptr = fp + @sizeOf(usize); - - const num_restored_pairs: usize = - @popCount(@as(u5, @bitCast(encoding.value.arm64.frame.x_reg_pairs))) + - @popCount(@as(u4, @bitCast(encoding.value.arm64.frame.d_reg_pairs))); - const min_reg_addr = fp - num_restored_pairs * 2 * @sizeOf(usize); - - if (ma.load(usize, new_sp) == null or ma.load(usize, min_reg_addr) == null) return error.InvalidUnwindInfo; - - var reg_addr = fp - @sizeOf(usize); - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).Struct.fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { - (try abi.regValueNative(usize, context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - (try abi.regValueNative(usize, context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - } - - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).Struct.fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { - // Only the lower half of the 128-bit V registers are restored during unwinding - @memcpy( - try abi.regBytes(context.thread_context, 64 + 8 + i, context.reg_context), - std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - @memcpy( - try abi.regBytes(context.thread_context, 64 + 9 + i, context.reg_context), - std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - } - } - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; - (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; - - break :blk new_ip; - }, - }, - else => return error.UnimplementedArch, - }; - - context.pc = abi.stripInstructionPtrAuthCode(new_ip); - if (context.pc > 0) context.pc -= 1; - return new_ip; -} - -fn unwindFrameMachODwarf( - context: *UnwindContext, - ma: *std.debug.StackIterator.MemoryAccessor, - eh_frame: []const u8, - fde_offset: usize, -) !usize { - var di = Dwarf{ - .endian = native_endian, - .is_macho = true, - }; - defer di.deinit(context.allocator); - - di.sections[@intFromEnum(Section.Id.eh_frame)] = .{ - .data = eh_frame, - .owned = false, - }; - - return di.unwindFrame(context, ma, fde_offset); -} - const EhPointerContext = struct { // The address of the pointer field itself pc_rel_base: u64, @@ -2641,7 +1957,7 @@ const EhPointerContext = struct { text_rel_base: ?u64 = null, function_rel_base: ?u64 = null, }; -fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { +fn readEhPointer(fbr: *DeprecatedFixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { if (enc == EH.PE.omit) return null; const value: union(enum) { @@ -2664,7 +1980,7 @@ fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhP EH.PE.sdata2 => .{ .signed = try fbr.readInt(i16) }, EH.PE.sdata4 => .{ .signed = try fbr.readInt(i32) }, EH.PE.sdata8 => .{ .signed = try fbr.readInt(i64) }, - else => return badDwarf(), + else => return bad(), }; const base = switch (enc & EH.PE.rel_mask) { diff --git a/lib/std/debug/Dwarf/abi.zig b/lib/std/debug/Dwarf/abi.zig index 1a47625ae7..b2c4cb9536 100644 --- a/lib/std/debug/Dwarf/abi.zig +++ b/lib/std/debug/Dwarf/abi.zig @@ -1,44 +1,50 @@ const builtin = @import("builtin"); + const std = @import("../../std.zig"); const mem = std.mem; -const native_os = builtin.os.tag; const posix = std.posix; +const Arch = std.Target.Cpu.Arch; +/// Tells whether unwinding for this target is supported by the Dwarf standard. +/// +/// See also `std.debug.SelfInfo.supportsUnwinding` which tells whether the Zig +/// standard library has a working implementation of unwinding for this target. pub fn supportsUnwinding(target: std.Target) bool { return switch (target.cpu.arch) { - .x86 => switch (target.os.tag) { - .linux, .netbsd, .solaris, .illumos => true, - else => false, - }, - .x86_64 => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, - else => false, - }, - .arm => switch (target.os.tag) { - .linux => true, - else => false, - }, - .aarch64 => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .macos, .ios => true, - else => false, - }, - else => false, + .amdgcn, + .nvptx, + .nvptx64, + .spirv, + .spirv32, + .spirv64, + .spu_2, + => false, + + // Enabling this causes relocation errors such as: + // error: invalid relocation type R_RISCV_SUB32 at offset 0x20 + .riscv64, .riscv32 => false, + + // Conservative guess. Feel free to update this logic with any targets + // that are known to not support Dwarf unwinding. + else => true, }; } -pub fn ipRegNum() u8 { - return switch (builtin.cpu.arch) { +/// Returns `null` for CPU architectures without an instruction pointer register. +pub fn ipRegNum(arch: Arch) ?u8 { + return switch (arch) { .x86 => 8, .x86_64 => 16, .arm => 15, .aarch64 => 32, - else => unreachable, + else => null, }; } -pub fn fpRegNum(reg_context: RegisterContext) u8 { - return switch (builtin.cpu.arch) { - // GCC on OS X historically did the opposite of ELF for these registers (only in .eh_frame), and that is now the convention for MachO +pub fn fpRegNum(arch: Arch, reg_context: RegisterContext) u8 { + return switch (arch) { + // GCC on OS X historically did the opposite of ELF for these registers + // (only in .eh_frame), and that is now the convention for MachO .x86 => if (reg_context.eh_frame and reg_context.is_macho) 4 else 5, .x86_64 => 6, .arm => 11, @@ -47,8 +53,8 @@ pub fn fpRegNum(reg_context: RegisterContext) u8 { }; } -pub fn spRegNum(reg_context: RegisterContext) u8 { - return switch (builtin.cpu.arch) { +pub fn spRegNum(arch: Arch, reg_context: RegisterContext) u8 { + return switch (arch) { .x86 => if (reg_context.eh_frame and reg_context.is_macho) 5 else 4, .x86_64 => 7, .arm => 13, @@ -57,33 +63,12 @@ pub fn spRegNum(reg_context: RegisterContext) u8 { }; } -/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. -/// This function clears these signature bits to make the pointer usable. -pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { - if (builtin.cpu.arch == .aarch64) { - // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) - // The save / restore is because `xpaclri` operates on x30 (LR) - return asm ( - \\mov x16, x30 - \\mov x30, x15 - \\hint 0x07 - \\mov x15, x30 - \\mov x30, x16 - : [ret] "={x15}" (-> usize), - : [ptr] "{x15}" (ptr), - : "x16" - ); - } - - return ptr; -} - pub const RegisterContext = struct { eh_frame: bool, is_macho: bool, }; -pub const AbiError = error{ +pub const RegBytesError = error{ InvalidRegister, UnimplementedArch, UnimplementedOs, @@ -91,55 +76,21 @@ pub const AbiError = error{ ThreadContextNotSupported, }; -fn RegValueReturnType(comptime ContextPtrType: type, comptime T: type) type { - const reg_bytes_type = comptime RegBytesReturnType(ContextPtrType); - const info = @typeInfo(reg_bytes_type).Pointer; - return @Type(.{ - .Pointer = .{ - .size = .One, - .is_const = info.is_const, - .is_volatile = info.is_volatile, - .is_allowzero = info.is_allowzero, - .alignment = info.alignment, - .address_space = info.address_space, - .child = T, - .sentinel = null, - }, - }); -} - -/// Returns a pointer to a register stored in a ThreadContext, preserving the pointer attributes of the context. -pub fn regValueNative( - comptime T: type, - thread_context_ptr: anytype, - reg_number: u8, - reg_context: ?RegisterContext, -) !RegValueReturnType(@TypeOf(thread_context_ptr), T) { - const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context); - if (@sizeOf(T) != reg_bytes.len) return error.IncompatibleRegisterSize; - return mem.bytesAsValue(T, reg_bytes[0..@sizeOf(T)]); -} - -fn RegBytesReturnType(comptime ContextPtrType: type) type { - const info = @typeInfo(ContextPtrType); - if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) { - @compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType))); - } - - return if (info.Pointer.is_const) return []const u8 else []u8; -} - /// Returns a slice containing the backing storage for `reg_number`. /// +/// This function assumes the Dwarf information corresponds not necessarily to +/// the current executable, but at least with a matching CPU architecture and +/// OS. It is planned to lift this limitation with a future enhancement. +/// /// `reg_context` describes in what context the register number is used, as it can have different /// meanings depending on the DWARF container. It is only required when getting the stack or /// frame pointer register on some architectures. pub fn regBytes( - thread_context_ptr: anytype, + thread_context_ptr: *std.debug.ThreadContext, reg_number: u8, reg_context: ?RegisterContext, -) AbiError!RegBytesReturnType(@TypeOf(thread_context_ptr)) { - if (native_os == .windows) { +) RegBytesError![]u8 { + if (builtin.os.tag == .windows) { return switch (builtin.cpu.arch) { .x86 => switch (reg_number) { 0 => mem.asBytes(&thread_context_ptr.Eax), @@ -194,7 +145,7 @@ pub fn regBytes( const ucontext_ptr = thread_context_ptr; return switch (builtin.cpu.arch) { - .x86 => switch (native_os) { + .x86 => switch (builtin.os.tag) { .linux, .netbsd, .solaris, .illumos => switch (reg_number) { 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EAX]), 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ECX]), @@ -229,7 +180,7 @@ pub fn regBytes( }, else => error.UnimplementedOs, }, - .x86_64 => switch (native_os) { + .x86_64 => switch (builtin.os.tag) { .linux, .solaris, .illumos => switch (reg_number) { 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RAX]), 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RDX]), @@ -248,7 +199,7 @@ pub fn regBytes( 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R14]), 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R15]), 16 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RIP]), - 17...32 => |i| if (native_os.isSolarish()) + 17...32 => |i| if (builtin.os.tag.isSolarish()) mem.asBytes(&ucontext_ptr.mcontext.fpregs.chip_state.xmm[i - 17]) else mem.asBytes(&ucontext_ptr.mcontext.fpregs.xmm[i - 17]), @@ -318,7 +269,7 @@ pub fn regBytes( }, else => error.UnimplementedOs, }, - .arm => switch (native_os) { + .arm => switch (builtin.os.tag) { .linux => switch (reg_number) { 0 => mem.asBytes(&ucontext_ptr.mcontext.arm_r0), 1 => mem.asBytes(&ucontext_ptr.mcontext.arm_r1), @@ -341,7 +292,7 @@ pub fn regBytes( }, else => error.UnimplementedOs, }, - .aarch64 => switch (native_os) { + .aarch64 => switch (builtin.os.tag) { .macos, .ios => switch (reg_number) { 0...28 => mem.asBytes(&ucontext_ptr.mcontext.ss.regs[reg_number]), 29 => mem.asBytes(&ucontext_ptr.mcontext.ss.fp), @@ -389,22 +340,14 @@ pub fn regBytes( }; } -/// Returns the ABI-defined default value this register has in the unwinding table -/// before running any of the CIE instructions. The DWARF spec defines these as having -/// the .undefined rule by default, but allows ABI authors to override that. -pub fn getRegDefaultValue(reg_number: u8, context: *std.debug.Dwarf.UnwindContext, out: []u8) !void { - switch (builtin.cpu.arch) { - .aarch64 => { - // Callee-saved registers are initialized as if they had the .same_value rule - if (reg_number >= 19 and reg_number <= 28) { - const src = try regBytes(context.thread_context, reg_number, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return; - } - }, - else => {}, - } - - @memset(out, undefined); +/// Returns a pointer to a register stored in a ThreadContext, preserving the +/// pointer attributes of the context. +pub fn regValueNative( + thread_context_ptr: *std.debug.ThreadContext, + reg_number: u8, + reg_context: ?RegisterContext, +) !*align(1) usize { + const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context); + if (@sizeOf(usize) != reg_bytes.len) return error.IncompatibleRegisterSize; + return mem.bytesAsValue(usize, reg_bytes[0..@sizeOf(usize)]); } diff --git a/lib/std/debug/Dwarf/call_frame.zig b/lib/std/debug/Dwarf/call_frame.zig index 73e00d3099..3e3d2585db 100644 --- a/lib/std/debug/Dwarf/call_frame.zig +++ b/lib/std/debug/Dwarf/call_frame.zig @@ -297,391 +297,3 @@ pub const Instruction = union(Opcode) { } } }; - -/// Since register rules are applied (usually) during a panic, -/// checked addition / subtraction is used so that we can return -/// an error and fall back to FP-based unwinding. -pub fn applyOffset(base: usize, offset: i64) !usize { - return if (offset >= 0) - try std.math.add(usize, base, @as(usize, @intCast(offset))) - else - try std.math.sub(usize, base, @as(usize, @intCast(-offset))); -} - -/// This is a virtual machine that runs DWARF call frame instructions. -pub const VirtualMachine = struct { - /// See section 6.4.1 of the DWARF5 specification for details on each - const RegisterRule = union(enum) { - // The spec says that the default rule for each column is the undefined rule. - // However, it also allows ABI / compiler authors to specify alternate defaults, so - // there is a distinction made here. - default: void, - - undefined: void, - same_value: void, - - // offset(N) - offset: i64, - - // val_offset(N) - val_offset: i64, - - // register(R) - register: u8, - - // expression(E) - expression: []const u8, - - // val_expression(E) - val_expression: []const u8, - - // Augmenter-defined rule - architectural: void, - }; - - /// Each row contains unwinding rules for a set of registers. - pub const Row = struct { - /// Offset from `FrameDescriptionEntry.pc_begin` - offset: u64 = 0, - - /// Special-case column that defines the CFA (Canonical Frame Address) rule. - /// The register field of this column defines the register that CFA is derived from. - cfa: Column = .{}, - - /// The register fields in these columns define the register the rule applies to. - columns: ColumnRange = .{}, - - /// Indicates that the next write to any column in this row needs to copy - /// the backing column storage first, as it may be referenced by previous rows. - copy_on_write: bool = false, - }; - - pub const Column = struct { - register: ?u8 = null, - rule: RegisterRule = .{ .default = {} }, - - /// Resolves the register rule and places the result into `out` (see dwarf.abi.regBytes) - pub fn resolveValue( - self: Column, - context: *std.debug.Dwarf.UnwindContext, - expression_context: std.debug.Dwarf.expression.Context, - ma: *debug.StackIterator.MemoryAccessor, - out: []u8, - ) !void { - switch (self.rule) { - .default => { - const register = self.register orelse return error.InvalidRegister; - try abi.getRegDefaultValue(register, context, out); - }, - .undefined => { - @memset(out, undefined); - }, - .same_value => { - // TODO: This copy could be eliminated if callers always copy the state then call this function to update it - const register = self.register orelse return error.InvalidRegister; - const src = try abi.regBytes(context.thread_context, register, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - }, - .offset => |offset| { - if (context.cfa) |cfa| { - const addr = try applyOffset(cfa, offset); - if (ma.load(usize, addr) == null) return error.InvalidAddress; - const ptr: *const usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - } else return error.InvalidCFA; - }, - .val_offset => |offset| { - if (context.cfa) |cfa| { - mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); - } else return error.InvalidCFA; - }, - .register => |register| { - const src = try abi.regBytes(context.thread_context, register, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, try abi.regBytes(context.thread_context, register, context.reg_context)); - }, - .expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); - const addr = if (value) |v| blk: { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; - - if (ma.load(usize, addr) == null) return error.InvalidExpressionAddress; - const ptr: *usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - }, - .val_expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); - } else return error.NoExpressionValue; - }, - .architectural => return error.UnimplementedRegisterRule, - } - } - }; - - const ColumnRange = struct { - /// Index into `columns` of the first column in this row. - start: usize = undefined, - len: u8 = 0, - }; - - columns: std.ArrayListUnmanaged(Column) = .{}, - stack: std.ArrayListUnmanaged(ColumnRange) = .{}, - current_row: Row = .{}, - - /// The result of executing the CIE's initial_instructions - cie_row: ?Row = null, - - pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { - self.stack.deinit(allocator); - self.columns.deinit(allocator); - self.* = undefined; - } - - pub fn reset(self: *VirtualMachine) void { - self.stack.clearRetainingCapacity(); - self.columns.clearRetainingCapacity(); - self.current_row = .{}; - self.cie_row = null; - } - - /// Return a slice backed by the row's non-CFA columns - pub fn rowColumns(self: VirtualMachine, row: Row) []Column { - if (row.columns.len == 0) return &.{}; - return self.columns.items[row.columns.start..][0..row.columns.len]; - } - - /// Either retrieves or adds a column for `register` (non-CFA) in the current row. - fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { - for (self.rowColumns(self.current_row)) |*c| { - if (c.register == register) return c; - } - - if (self.current_row.columns.len == 0) { - self.current_row.columns.start = self.columns.items.len; - } - self.current_row.columns.len += 1; - - const column = try self.columns.addOne(allocator); - column.* = .{ - .register = register, - }; - - return column; - } - - /// Runs the CIE instructions, then the FDE instructions. Execution halts - /// once the row that corresponds to `pc` is known, and the row is returned. - pub fn runTo( - self: *VirtualMachine, - allocator: std.mem.Allocator, - pc: u64, - cie: std.debug.Dwarf.CommonInformationEntry, - fde: std.debug.Dwarf.FrameDescriptionEntry, - addr_size_bytes: u8, - endian: std.builtin.Endian, - ) !Row { - assert(self.cie_row == null); - if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange; - - var prev_row: Row = self.current_row; - - var cie_stream = std.io.fixedBufferStream(cie.initial_instructions); - var fde_stream = std.io.fixedBufferStream(fde.instructions); - var streams = [_]*std.io.FixedBufferStream([]const u8){ - &cie_stream, - &fde_stream, - }; - - for (&streams, 0..) |stream, i| { - while (stream.pos < stream.buffer.len) { - const instruction = try std.debug.Dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); - prev_row = try self.step(allocator, cie, i == 0, instruction); - if (pc < fde.pc_begin + self.current_row.offset) return prev_row; - } - } - - return self.current_row; - } - - pub fn runToNative( - self: *VirtualMachine, - allocator: std.mem.Allocator, - pc: u64, - cie: std.debug.Dwarf.CommonInformationEntry, - fde: std.debug.Dwarf.FrameDescriptionEntry, - ) !Row { - return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), builtin.target.cpu.arch.endian()); - } - - fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void { - if (!self.current_row.copy_on_write) return; - - const new_start = self.columns.items.len; - if (self.current_row.columns.len > 0) { - try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); - self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); - self.current_row.columns.start = new_start; - } - } - - /// Executes a single instruction. - /// If this instruction is from the CIE, `is_initial` should be set. - /// Returns the value of `current_row` before executing this instruction. - pub fn step( - self: *VirtualMachine, - allocator: std.mem.Allocator, - cie: std.debug.Dwarf.CommonInformationEntry, - is_initial: bool, - instruction: Instruction, - ) !Row { - // CIE instructions must be run before FDE instructions - assert(!is_initial or self.cie_row == null); - if (!is_initial and self.cie_row == null) { - self.cie_row = self.current_row; - self.current_row.copy_on_write = true; - } - - const prev_row = self.current_row; - switch (instruction) { - .set_loc => |i| { - if (i.address <= self.current_row.offset) return error.InvalidOperation; - // TODO: Check cie.segment_selector_size != 0 for DWARFV4 - self.current_row.offset = i.address; - }, - inline .advance_loc, - .advance_loc1, - .advance_loc2, - .advance_loc4, - => |i| { - self.current_row.offset += i.delta * cie.code_alignment_factor; - self.current_row.copy_on_write = true; - }, - inline .offset, - .offset_extended, - .offset_extended_sf, - => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; - }, - inline .restore, - .restore_extended, - => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.cie_row) |cie_row| { - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = for (self.rowColumns(cie_row)) |cie_column| { - if (cie_column.register == i.register) break cie_column.rule; - } else .{ .default = {} }; - } else return error.InvalidOperation; - }, - .nop => {}, - .undefined => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .undefined = {} }; - }, - .same_value => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .same_value = {} }; - }, - .register => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .register = i.target_register }; - }, - .remember_state => { - try self.stack.append(allocator, self.current_row.columns); - self.current_row.copy_on_write = true; - }, - .restore_state => { - const restored_columns = self.stack.popOrNull() orelse return error.InvalidOperation; - self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); - try self.columns.ensureUnusedCapacity(allocator, restored_columns.len); - - self.current_row.columns.start = self.columns.items.len; - self.current_row.columns.len = restored_columns.len; - self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); - }, - .def_cfa => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = @intCast(i.offset) }, - }; - }, - .def_cfa_sf => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, - }; - }, - .def_cfa_register => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.register = i.register; - }, - .def_cfa_offset => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = @intCast(i.offset), - }; - }, - .def_cfa_offset_sf => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .def_cfa_expression => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa.register = undefined; - self.current_row.cfa.rule = .{ - .expression = i.block, - }; - }, - .expression => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .expression = i.block, - }; - }, - .val_offset => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, - }; - }, - .val_offset_sf => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .val_expression => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_expression = i.block, - }; - }, - } - - return prev_row; - } -}; diff --git a/lib/std/debug/Dwarf/expression.zig b/lib/std/debug/Dwarf/expression.zig index 6243ea9717..5fab56de6e 100644 --- a/lib/std/debug/Dwarf/expression.zig +++ b/lib/std/debug/Dwarf/expression.zig @@ -1,11 +1,13 @@ -const std = @import("std"); const builtin = @import("builtin"); +const native_arch = builtin.cpu.arch; +const native_endian = native_arch.endian(); + +const std = @import("std"); const leb = std.leb; const OP = std.dwarf.OP; const abi = std.debug.Dwarf.abi; const mem = std.mem; const assert = std.debug.assert; -const native_endian = builtin.cpu.arch.endian(); /// Expressions can be evaluated in different contexts, each requiring its own set of inputs. /// Callers should specify all the fields relevant to their context. If a field is required @@ -14,7 +16,7 @@ pub const Context = struct { /// The dwarf format of the section this expression is in format: std.dwarf.Format = .@"32", /// If specified, any addresses will pass through before being accessed - memory_accessor: ?*std.debug.StackIterator.MemoryAccessor = null, + memory_accessor: ?*std.debug.MemoryAccessor = null, /// The compilation unit this expression relates to, if any compile_unit: ?*const std.debug.Dwarf.CompileUnit = null, /// When evaluating a user-presented expression, this is the address of the object being evaluated @@ -34,7 +36,7 @@ pub const Options = struct { /// The address size of the target architecture addr_size: u8 = @sizeOf(usize), /// Endianness of the target architecture - endian: std.builtin.Endian = builtin.target.cpu.arch.endian(), + endian: std.builtin.Endian = native_endian, /// Restrict the stack machine to a subset of opcodes used in call frame instructions call_frame_context: bool = false, }; @@ -60,7 +62,7 @@ pub const Error = error{ InvalidTypeLength, TruncatedIntegralType, -} || abi.AbiError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero }; +} || abi.RegBytesError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero }; /// A stack machine that can decode and run DWARF expressions. /// Expressions can be decoded for non-native address size and endianness, @@ -304,7 +306,7 @@ pub fn StackMachine(comptime options: Options) type { allocator: std.mem.Allocator, context: Context, ) Error!bool { - if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != comptime builtin.target.cpu.arch.endian()) + if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != native_endian) @compileError("Execution of non-native address sizes / endianness is not supported"); const opcode = try stream.reader().readByte(); @@ -1186,13 +1188,13 @@ test "DWARF expressions" { // TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); - (try abi.regValueNative(usize, &thread_context, abi.fpRegNum(reg_context), reg_context)).* = 1; - (try abi.regValueNative(usize, &thread_context, abi.spRegNum(reg_context), reg_context)).* = 2; - (try abi.regValueNative(usize, &thread_context, abi.ipRegNum(), reg_context)).* = 3; + (try abi.regValueNative(&thread_context, abi.fpRegNum(native_arch, reg_context), reg_context)).* = 1; + (try abi.regValueNative(&thread_context, abi.spRegNum(native_arch, reg_context), reg_context)).* = 2; + (try abi.regValueNative(&thread_context, abi.ipRegNum(native_arch).?, reg_context)).* = 3; - try b.writeBreg(writer, abi.fpRegNum(reg_context), @as(usize, 100)); - try b.writeBreg(writer, abi.spRegNum(reg_context), @as(usize, 200)); - try b.writeBregx(writer, abi.ipRegNum(), @as(usize, 300)); + try b.writeBreg(writer, abi.fpRegNum(native_arch, reg_context), @as(usize, 100)); + try b.writeBreg(writer, abi.spRegNum(native_arch, reg_context), @as(usize, 200)); + try b.writeBregx(writer, abi.ipRegNum(native_arch).?, @as(usize, 300)); try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400)); _ = try stack_machine.run(program.items, allocator, context, 0); diff --git a/lib/std/debug/MemoryAccessor.zig b/lib/std/debug/MemoryAccessor.zig new file mode 100644 index 0000000000..bfdda609f6 --- /dev/null +++ b/lib/std/debug/MemoryAccessor.zig @@ -0,0 +1,128 @@ +//! Reads memory from any address of the current location using OS-specific +//! syscalls, bypassing memory page protection. Useful for stack unwinding. + +const builtin = @import("builtin"); +const native_os = builtin.os.tag; + +const std = @import("../std.zig"); +const posix = std.posix; +const File = std.fs.File; +const page_size = std.mem.page_size; + +const MemoryAccessor = @This(); + +var cached_pid: posix.pid_t = -1; + +mem: switch (native_os) { + .linux => File, + else => void, +}, + +pub const init: MemoryAccessor = .{ + .mem = switch (native_os) { + .linux => .{ .handle = -1 }, + else => {}, + }, +}; + +fn read(ma: *MemoryAccessor, address: usize, buf: []u8) bool { + switch (native_os) { + .linux => while (true) switch (ma.mem.handle) { + -2 => break, + -1 => { + const linux = std.os.linux; + const pid = switch (@atomicLoad(posix.pid_t, &cached_pid, .monotonic)) { + -1 => pid: { + const pid = linux.getpid(); + @atomicStore(posix.pid_t, &cached_pid, pid, .monotonic); + break :pid pid; + }, + else => |pid| pid, + }; + const bytes_read = linux.process_vm_readv( + pid, + &.{.{ .base = buf.ptr, .len = buf.len }}, + &.{.{ .base = @ptrFromInt(address), .len = buf.len }}, + 0, + ); + switch (linux.E.init(bytes_read)) { + .SUCCESS => return bytes_read == buf.len, + .FAULT => return false, + .INVAL, .PERM, .SRCH => unreachable, // own pid is always valid + .NOMEM => {}, + .NOSYS => {}, // QEMU is known not to implement this syscall. + else => unreachable, // unexpected + } + var path_buf: [ + std.fmt.count("/proc/{d}/mem", .{std.math.minInt(posix.pid_t)}) + ]u8 = undefined; + const path = std.fmt.bufPrint(&path_buf, "/proc/{d}/mem", .{pid}) catch + unreachable; + ma.mem = std.fs.openFileAbsolute(path, .{}) catch { + ma.mem.handle = -2; + break; + }; + }, + else => return (ma.mem.pread(buf, address) catch return false) == buf.len, + }, + else => {}, + } + if (!isValidMemory(address)) return false; + @memcpy(buf, @as([*]const u8, @ptrFromInt(address))); + return true; +} + +pub fn load(ma: *MemoryAccessor, comptime Type: type, address: usize) ?Type { + var result: Type = undefined; + return if (ma.read(address, std.mem.asBytes(&result))) result else null; +} + +pub fn isValidMemory(address: usize) bool { + // We are unable to determine validity of memory for freestanding targets + if (native_os == .freestanding or native_os == .uefi) return true; + + const aligned_address = address & ~@as(usize, @intCast((page_size - 1))); + if (aligned_address == 0) return false; + const aligned_memory = @as([*]align(page_size) u8, @ptrFromInt(aligned_address))[0..page_size]; + + if (native_os == .windows) { + const windows = std.os.windows; + + var memory_info: windows.MEMORY_BASIC_INFORMATION = undefined; + + // The only error this function can throw is ERROR_INVALID_PARAMETER. + // supply an address that invalid i'll be thrown. + const rc = windows.VirtualQuery(aligned_memory, &memory_info, aligned_memory.len) catch { + return false; + }; + + // Result code has to be bigger than zero (number of bytes written) + if (rc == 0) { + return false; + } + + // Free pages cannot be read, they are unmapped + if (memory_info.State == windows.MEM_FREE) { + return false; + } + + return true; + } else if (have_msync) { + posix.msync(aligned_memory, posix.MSF.ASYNC) catch |err| { + switch (err) { + error.UnmappedMemory => return false, + else => unreachable, + } + }; + + return true; + } else { + // We are unable to determine validity of memory on this target. + return true; + } +} + +const have_msync = switch (native_os) { + .wasi, .emscripten, .windows => false, + else => true, +}; diff --git a/lib/std/debug/Pdb.zig b/lib/std/debug/Pdb.zig new file mode 100644 index 0000000000..bdcc108c1d --- /dev/null +++ b/lib/std/debug/Pdb.zig @@ -0,0 +1,591 @@ +const std = @import("../std.zig"); +const File = std.fs.File; +const Allocator = std.mem.Allocator; +const pdb = std.pdb; + +const Pdb = @This(); + +in_file: File, +msf: Msf, +allocator: Allocator, +string_table: ?*MsfStream, +dbi: ?*MsfStream, +modules: []Module, +sect_contribs: []pdb.SectionContribEntry, +guid: [16]u8, +age: u32, + +pub const Module = struct { + mod_info: pdb.ModInfo, + module_name: []u8, + obj_file_name: []u8, + // The fields below are filled on demand. + populated: bool, + symbols: []u8, + subsect_info: []u8, + checksum_offset: ?usize, + + pub fn deinit(self: *Module, allocator: Allocator) void { + allocator.free(self.module_name); + allocator.free(self.obj_file_name); + if (self.populated) { + allocator.free(self.symbols); + allocator.free(self.subsect_info); + } + } +}; + +pub fn init(allocator: Allocator, path: []const u8) !Pdb { + const file = try std.fs.cwd().openFile(path, .{}); + errdefer file.close(); + + return .{ + .in_file = file, + .allocator = allocator, + .string_table = null, + .dbi = null, + .msf = try Msf.init(allocator, file), + .modules = &[_]Module{}, + .sect_contribs = &[_]pdb.SectionContribEntry{}, + .guid = undefined, + .age = undefined, + }; +} + +pub fn deinit(self: *Pdb) void { + self.in_file.close(); + self.msf.deinit(self.allocator); + for (self.modules) |*module| { + module.deinit(self.allocator); + } + self.allocator.free(self.modules); + self.allocator.free(self.sect_contribs); +} + +pub fn parseDbiStream(self: *Pdb) !void { + var stream = self.getStream(pdb.StreamType.Dbi) orelse + return error.InvalidDebugInfo; + const reader = stream.reader(); + + const header = try reader.readStruct(std.pdb.DbiStreamHeader); + if (header.VersionHeader != 19990903) // V70, only value observed by LLVM team + return error.UnknownPDBVersion; + // if (header.Age != age) + // return error.UnmatchingPDB; + + const mod_info_size = header.ModInfoSize; + const section_contrib_size = header.SectionContributionSize; + + var modules = std.ArrayList(Module).init(self.allocator); + errdefer modules.deinit(); + + // Module Info Substream + var mod_info_offset: usize = 0; + while (mod_info_offset != mod_info_size) { + const mod_info = try reader.readStruct(pdb.ModInfo); + var this_record_len: usize = @sizeOf(pdb.ModInfo); + + const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); + errdefer self.allocator.free(module_name); + this_record_len += module_name.len + 1; + + const obj_file_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); + errdefer self.allocator.free(obj_file_name); + this_record_len += obj_file_name.len + 1; + + if (this_record_len % 4 != 0) { + const round_to_next_4 = (this_record_len | 0x3) + 1; + const march_forward_bytes = round_to_next_4 - this_record_len; + try stream.seekBy(@as(isize, @intCast(march_forward_bytes))); + this_record_len += march_forward_bytes; + } + + try modules.append(Module{ + .mod_info = mod_info, + .module_name = module_name, + .obj_file_name = obj_file_name, + + .populated = false, + .symbols = undefined, + .subsect_info = undefined, + .checksum_offset = null, + }); + + mod_info_offset += this_record_len; + if (mod_info_offset > mod_info_size) + return error.InvalidDebugInfo; + } + + // Section Contribution Substream + var sect_contribs = std.ArrayList(pdb.SectionContribEntry).init(self.allocator); + errdefer sect_contribs.deinit(); + + var sect_cont_offset: usize = 0; + if (section_contrib_size != 0) { + const version = reader.readEnum(std.pdb.SectionContrSubstreamVersion, .little) catch |err| switch (err) { + error.InvalidValue => return error.InvalidDebugInfo, + else => |e| return e, + }; + _ = version; + sect_cont_offset += @sizeOf(u32); + } + while (sect_cont_offset != section_contrib_size) { + const entry = try sect_contribs.addOne(); + entry.* = try reader.readStruct(pdb.SectionContribEntry); + sect_cont_offset += @sizeOf(pdb.SectionContribEntry); + + if (sect_cont_offset > section_contrib_size) + return error.InvalidDebugInfo; + } + + self.modules = try modules.toOwnedSlice(); + self.sect_contribs = try sect_contribs.toOwnedSlice(); +} + +pub fn parseInfoStream(self: *Pdb) !void { + var stream = self.getStream(pdb.StreamType.Pdb) orelse + return error.InvalidDebugInfo; + const reader = stream.reader(); + + // Parse the InfoStreamHeader. + const version = try reader.readInt(u32, .little); + const signature = try reader.readInt(u32, .little); + _ = signature; + const age = try reader.readInt(u32, .little); + const guid = try reader.readBytesNoEof(16); + + if (version != 20000404) // VC70, only value observed by LLVM team + return error.UnknownPDBVersion; + + self.guid = guid; + self.age = age; + + // Find the string table. + const string_table_index = str_tab_index: { + const name_bytes_len = try reader.readInt(u32, .little); + const name_bytes = try self.allocator.alloc(u8, name_bytes_len); + defer self.allocator.free(name_bytes); + try reader.readNoEof(name_bytes); + + const HashTableHeader = extern struct { + Size: u32, + Capacity: u32, + + fn maxLoad(cap: u32) u32 { + return cap * 2 / 3 + 1; + } + }; + const hash_tbl_hdr = try reader.readStruct(HashTableHeader); + if (hash_tbl_hdr.Capacity == 0) + return error.InvalidDebugInfo; + + if (hash_tbl_hdr.Size > HashTableHeader.maxLoad(hash_tbl_hdr.Capacity)) + return error.InvalidDebugInfo; + + const present = try readSparseBitVector(&reader, self.allocator); + defer self.allocator.free(present); + if (present.len != hash_tbl_hdr.Size) + return error.InvalidDebugInfo; + const deleted = try readSparseBitVector(&reader, self.allocator); + defer self.allocator.free(deleted); + + for (present) |_| { + const name_offset = try reader.readInt(u32, .little); + const name_index = try reader.readInt(u32, .little); + if (name_offset > name_bytes.len) + return error.InvalidDebugInfo; + const name = std.mem.sliceTo(name_bytes[name_offset..], 0); + if (std.mem.eql(u8, name, "/names")) { + break :str_tab_index name_index; + } + } + return error.MissingDebugInfo; + }; + + self.string_table = self.getStreamById(string_table_index) orelse + return error.MissingDebugInfo; +} + +pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 { + _ = self; + std.debug.assert(module.populated); + + var symbol_i: usize = 0; + while (symbol_i != module.symbols.len) { + const prefix = @as(*align(1) pdb.RecordPrefix, @ptrCast(&module.symbols[symbol_i])); + if (prefix.RecordLen < 2) + return null; + switch (prefix.RecordKind) { + .S_LPROC32, .S_GPROC32 => { + const proc_sym = @as(*align(1) pdb.ProcSym, @ptrCast(&module.symbols[symbol_i + @sizeOf(pdb.RecordPrefix)])); + if (address >= proc_sym.CodeOffset and address < proc_sym.CodeOffset + proc_sym.CodeSize) { + return std.mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.Name[0])), 0); + } + }, + else => {}, + } + symbol_i += prefix.RecordLen + @sizeOf(u16); + } + + return null; +} + +pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !std.debug.SourceLocation { + std.debug.assert(module.populated); + const subsect_info = module.subsect_info; + + var sect_offset: usize = 0; + var skip_len: usize = undefined; + const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo; + while (sect_offset != subsect_info.len) : (sect_offset += skip_len) { + const subsect_hdr = @as(*align(1) pdb.DebugSubsectionHeader, @ptrCast(&subsect_info[sect_offset])); + skip_len = subsect_hdr.Length; + sect_offset += @sizeOf(pdb.DebugSubsectionHeader); + + switch (subsect_hdr.Kind) { + .Lines => { + var line_index = sect_offset; + + const line_hdr = @as(*align(1) pdb.LineFragmentHeader, @ptrCast(&subsect_info[line_index])); + if (line_hdr.RelocSegment == 0) + return error.MissingDebugInfo; + line_index += @sizeOf(pdb.LineFragmentHeader); + const frag_vaddr_start = line_hdr.RelocOffset; + const frag_vaddr_end = frag_vaddr_start + line_hdr.CodeSize; + + if (address >= frag_vaddr_start and address < frag_vaddr_end) { + // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records) + // from now on. We will iterate through them, and eventually find a SourceLocation that we're interested in, + // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection. + const subsection_end_index = sect_offset + subsect_hdr.Length; + + while (line_index < subsection_end_index) { + const block_hdr = @as(*align(1) pdb.LineBlockFragmentHeader, @ptrCast(&subsect_info[line_index])); + line_index += @sizeOf(pdb.LineBlockFragmentHeader); + const start_line_index = line_index; + + const has_column = line_hdr.Flags.LF_HaveColumns; + + // All line entries are stored inside their line block by ascending start address. + // Heuristic: we want to find the last line entry + // that has a vaddr_start <= address. + // This is done with a simple linear search. + var line_i: u32 = 0; + while (line_i < block_hdr.NumLines) : (line_i += 1) { + const line_num_entry = @as(*align(1) pdb.LineNumberEntry, @ptrCast(&subsect_info[line_index])); + line_index += @sizeOf(pdb.LineNumberEntry); + + const vaddr_start = frag_vaddr_start + line_num_entry.Offset; + if (address < vaddr_start) { + break; + } + } + + // line_i == 0 would mean that no matching pdb.LineNumberEntry was found. + if (line_i > 0) { + const subsect_index = checksum_offset + block_hdr.NameIndex; + const chksum_hdr = @as(*align(1) pdb.FileChecksumEntryHeader, @ptrCast(&module.subsect_info[subsect_index])); + const strtab_offset = @sizeOf(pdb.StringTableHeader) + chksum_hdr.FileNameOffset; + try self.string_table.?.seekTo(strtab_offset); + const source_file_name = try self.string_table.?.reader().readUntilDelimiterAlloc(self.allocator, 0, 1024); + + const line_entry_idx = line_i - 1; + + const column = if (has_column) blk: { + const start_col_index = start_line_index + @sizeOf(pdb.LineNumberEntry) * block_hdr.NumLines; + const col_index = start_col_index + @sizeOf(pdb.ColumnNumberEntry) * line_entry_idx; + const col_num_entry = @as(*align(1) pdb.ColumnNumberEntry, @ptrCast(&subsect_info[col_index])); + break :blk col_num_entry.StartColumn; + } else 0; + + const found_line_index = start_line_index + line_entry_idx * @sizeOf(pdb.LineNumberEntry); + const line_num_entry: *align(1) pdb.LineNumberEntry = @ptrCast(&subsect_info[found_line_index]); + const flags: *align(1) pdb.LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags); + + return .{ + .file_name = source_file_name, + .line = flags.Start, + .column = column, + }; + } + } + + // Checking that we are not reading garbage after the (possibly) multiple block fragments. + if (line_index != subsection_end_index) { + return error.InvalidDebugInfo; + } + } + }, + else => {}, + } + + if (sect_offset > subsect_info.len) + return error.InvalidDebugInfo; + } + + return error.MissingDebugInfo; +} + +pub fn getModule(self: *Pdb, index: usize) !?*Module { + if (index >= self.modules.len) + return null; + + const mod = &self.modules[index]; + if (mod.populated) + return mod; + + // At most one can be non-zero. + if (mod.mod_info.C11ByteSize != 0 and mod.mod_info.C13ByteSize != 0) + return error.InvalidDebugInfo; + if (mod.mod_info.C13ByteSize == 0) + return error.InvalidDebugInfo; + + const stream = self.getStreamById(mod.mod_info.ModuleSymStream) orelse + return error.MissingDebugInfo; + const reader = stream.reader(); + + const signature = try reader.readInt(u32, .little); + if (signature != 4) + return error.InvalidDebugInfo; + + mod.symbols = try self.allocator.alloc(u8, mod.mod_info.SymByteSize - 4); + errdefer self.allocator.free(mod.symbols); + try reader.readNoEof(mod.symbols); + + mod.subsect_info = try self.allocator.alloc(u8, mod.mod_info.C13ByteSize); + errdefer self.allocator.free(mod.subsect_info); + try reader.readNoEof(mod.subsect_info); + + var sect_offset: usize = 0; + var skip_len: usize = undefined; + while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) { + const subsect_hdr = @as(*align(1) pdb.DebugSubsectionHeader, @ptrCast(&mod.subsect_info[sect_offset])); + skip_len = subsect_hdr.Length; + sect_offset += @sizeOf(pdb.DebugSubsectionHeader); + + switch (subsect_hdr.Kind) { + .FileChecksums => { + mod.checksum_offset = sect_offset; + break; + }, + else => {}, + } + + if (sect_offset > mod.subsect_info.len) + return error.InvalidDebugInfo; + } + + mod.populated = true; + return mod; +} + +pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream { + if (id >= self.msf.streams.len) + return null; + return &self.msf.streams[id]; +} + +pub fn getStream(self: *Pdb, stream: pdb.StreamType) ?*MsfStream { + const id = @intFromEnum(stream); + return self.getStreamById(id); +} + +/// https://llvm.org/docs/PDB/MsfFile.html +const Msf = struct { + directory: MsfStream, + streams: []MsfStream, + + fn init(allocator: Allocator, file: File) !Msf { + const in = file.reader(); + + const superblock = try in.readStruct(pdb.SuperBlock); + + // Sanity checks + if (!std.mem.eql(u8, &superblock.FileMagic, pdb.SuperBlock.file_magic)) + return error.InvalidDebugInfo; + if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2) + return error.InvalidDebugInfo; + const file_len = try file.getEndPos(); + if (superblock.NumBlocks * superblock.BlockSize != file_len) + return error.InvalidDebugInfo; + switch (superblock.BlockSize) { + // llvm only supports 4096 but we can handle any of these values + 512, 1024, 2048, 4096 => {}, + else => return error.InvalidDebugInfo, + } + + const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize); + if (dir_block_count > superblock.BlockSize / @sizeOf(u32)) + return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment. + + try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr); + const dir_blocks = try allocator.alloc(u32, dir_block_count); + for (dir_blocks) |*b| { + b.* = try in.readInt(u32, .little); + } + var directory = MsfStream.init( + superblock.BlockSize, + file, + dir_blocks, + ); + + const begin = directory.pos; + const stream_count = try directory.reader().readInt(u32, .little); + const stream_sizes = try allocator.alloc(u32, stream_count); + defer allocator.free(stream_sizes); + + // Microsoft's implementation uses @as(u32, -1) for inexistent streams. + // These streams are not used, but still participate in the file + // and must be taken into account when resolving stream indices. + const Nil = 0xFFFFFFFF; + for (stream_sizes) |*s| { + const size = try directory.reader().readInt(u32, .little); + s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize); + } + + const streams = try allocator.alloc(MsfStream, stream_count); + for (streams, 0..) |*stream, i| { + const size = stream_sizes[i]; + if (size == 0) { + stream.* = MsfStream{ + .blocks = &[_]u32{}, + }; + } else { + var blocks = try allocator.alloc(u32, size); + var j: u32 = 0; + while (j < size) : (j += 1) { + const block_id = try directory.reader().readInt(u32, .little); + const n = (block_id % superblock.BlockSize); + // 0 is for pdb.SuperBlock, 1 and 2 for FPMs. + if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > file_len) + return error.InvalidBlockIndex; + blocks[j] = block_id; + } + + stream.* = MsfStream.init( + superblock.BlockSize, + file, + blocks, + ); + } + } + + const end = directory.pos; + if (end - begin != superblock.NumDirectoryBytes) + return error.InvalidStreamDirectory; + + return Msf{ + .directory = directory, + .streams = streams, + }; + } + + fn deinit(self: *Msf, allocator: Allocator) void { + allocator.free(self.directory.blocks); + for (self.streams) |*stream| { + allocator.free(stream.blocks); + } + allocator.free(self.streams); + } +}; + +const MsfStream = struct { + in_file: File = undefined, + pos: u64 = undefined, + blocks: []u32 = undefined, + block_size: u32 = undefined, + + pub const Error = @typeInfo(@typeInfo(@TypeOf(read)).Fn.return_type.?).ErrorUnion.error_set; + + fn init(block_size: u32, file: File, blocks: []u32) MsfStream { + const stream = MsfStream{ + .in_file = file, + .pos = 0, + .blocks = blocks, + .block_size = block_size, + }; + + return stream; + } + + fn read(self: *MsfStream, buffer: []u8) !usize { + var block_id = @as(usize, @intCast(self.pos / self.block_size)); + if (block_id >= self.blocks.len) return 0; // End of Stream + var block = self.blocks[block_id]; + var offset = self.pos % self.block_size; + + try self.in_file.seekTo(block * self.block_size + offset); + const in = self.in_file.reader(); + + var size: usize = 0; + var rem_buffer = buffer; + while (size < buffer.len) { + const size_to_read = @min(self.block_size - offset, rem_buffer.len); + size += try in.read(rem_buffer[0..size_to_read]); + rem_buffer = buffer[size..]; + offset += size_to_read; + + // If we're at the end of a block, go to the next one. + if (offset == self.block_size) { + offset = 0; + block_id += 1; + if (block_id >= self.blocks.len) break; // End of Stream + block = self.blocks[block_id]; + try self.in_file.seekTo(block * self.block_size); + } + } + + self.pos += buffer.len; + return buffer.len; + } + + pub fn seekBy(self: *MsfStream, len: i64) !void { + self.pos = @as(u64, @intCast(@as(i64, @intCast(self.pos)) + len)); + if (self.pos >= self.blocks.len * self.block_size) + return error.EOF; + } + + pub fn seekTo(self: *MsfStream, len: u64) !void { + self.pos = len; + if (self.pos >= self.blocks.len * self.block_size) + return error.EOF; + } + + fn getSize(self: *const MsfStream) u64 { + return self.blocks.len * self.block_size; + } + + fn getFilePos(self: MsfStream) u64 { + const block_id = self.pos / self.block_size; + const block = self.blocks[block_id]; + const offset = self.pos % self.block_size; + + return block * self.block_size + offset; + } + + pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) { + return .{ .context = self }; + } +}; + +fn readSparseBitVector(stream: anytype, allocator: Allocator) ![]u32 { + const num_words = try stream.readInt(u32, .little); + var list = std.ArrayList(u32).init(allocator); + errdefer list.deinit(); + var word_i: u32 = 0; + while (word_i != num_words) : (word_i += 1) { + const word = try stream.readInt(u32, .little); + var bit_i: u5 = 0; + while (true) : (bit_i += 1) { + if (word & (@as(u32, 1) << bit_i) != 0) { + try list.append(word_i * 32 + bit_i); + } + if (bit_i == std.math.maxInt(u5)) break; + } + } + return try list.toOwnedSlice(); +} + +fn blockCountFromSize(size: u32, block_size: u32) u32 { + return (size + block_size - 1) / block_size; +} diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig new file mode 100644 index 0000000000..c27f466fb3 --- /dev/null +++ b/lib/std/debug/SelfInfo.zig @@ -0,0 +1,2438 @@ +//! Cross-platform abstraction for this binary's own debug information, with a +//! goal of minimal code bloat and compilation speed penalty. + +const builtin = @import("builtin"); +const native_os = builtin.os.tag; +const native_endian = native_arch.endian(); +const native_arch = builtin.cpu.arch; + +const std = @import("../std.zig"); +const mem = std.mem; +const Allocator = std.mem.Allocator; +const windows = std.os.windows; +const macho = std.macho; +const fs = std.fs; +const coff = std.coff; +const pdb = std.pdb; +const assert = std.debug.assert; +const posix = std.posix; +const elf = std.elf; +const Dwarf = std.debug.Dwarf; +const Pdb = std.debug.Pdb; +const File = std.fs.File; +const math = std.math; +const testing = std.testing; +const StackIterator = std.debug.StackIterator; +const regBytes = Dwarf.abi.regBytes; +const regValueNative = Dwarf.abi.regValueNative; + +const SelfInfo = @This(); + +const root = @import("root"); + +allocator: Allocator, +address_map: std.AutoHashMap(usize, *Module), +modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModule) else void, + +pub const OpenError = error{ + MissingDebugInfo, + UnsupportedOperatingSystem, +} || @typeInfo(@typeInfo(@TypeOf(SelfInfo.init)).Fn.return_type.?).ErrorUnion.error_set; + +pub fn open(allocator: Allocator) OpenError!SelfInfo { + nosuspend { + if (builtin.strip_debug_info) + return error.MissingDebugInfo; + switch (native_os) { + .linux, + .freebsd, + .netbsd, + .dragonfly, + .openbsd, + .macos, + .solaris, + .illumos, + .windows, + => return try SelfInfo.init(allocator), + else => return error.UnsupportedOperatingSystem, + } + } +} + +pub fn init(allocator: Allocator) !SelfInfo { + var debug_info: SelfInfo = .{ + .allocator = allocator, + .address_map = std.AutoHashMap(usize, *Module).init(allocator), + .modules = if (native_os == .windows) .{} else {}, + }; + + if (native_os == .windows) { + errdefer debug_info.modules.deinit(allocator); + + const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); + if (handle == windows.INVALID_HANDLE_VALUE) { + switch (windows.GetLastError()) { + else => |err| return windows.unexpectedError(err), + } + } + defer windows.CloseHandle(handle); + + var module_entry: windows.MODULEENTRY32 = undefined; + module_entry.dwSize = @sizeOf(windows.MODULEENTRY32); + if (windows.kernel32.Module32First(handle, &module_entry) == 0) { + return error.MissingDebugInfo; + } + + var module_valid = true; + while (module_valid) { + const module_info = try debug_info.modules.addOne(allocator); + const name = allocator.dupe(u8, mem.sliceTo(&module_entry.szModule, 0)) catch &.{}; + errdefer allocator.free(name); + + module_info.* = .{ + .base_address = @intFromPtr(module_entry.modBaseAddr), + .size = module_entry.modBaseSize, + .name = name, + .handle = module_entry.hModule, + }; + + module_valid = windows.kernel32.Module32Next(handle, &module_entry) == 1; + } + } + + return debug_info; +} + +pub fn deinit(self: *SelfInfo) void { + var it = self.address_map.iterator(); + while (it.next()) |entry| { + const mdi = entry.value_ptr.*; + mdi.deinit(self.allocator); + self.allocator.destroy(mdi); + } + self.address_map.deinit(); + if (native_os == .windows) { + for (self.modules.items) |module| { + self.allocator.free(module.name); + if (module.mapped_file) |mapped_file| mapped_file.deinit(); + } + self.modules.deinit(self.allocator); + } +} + +pub fn getModuleForAddress(self: *SelfInfo, address: usize) !*Module { + if (comptime builtin.target.isDarwin()) { + return self.lookupModuleDyld(address); + } else if (native_os == .windows) { + return self.lookupModuleWin32(address); + } else if (native_os == .haiku) { + return self.lookupModuleHaiku(address); + } else if (comptime builtin.target.isWasm()) { + return self.lookupModuleWasm(address); + } else { + return self.lookupModuleDl(address); + } +} + +// Returns the module name for a given address. +// This can be called when getModuleForAddress fails, so implementations should provide +// a path that doesn't rely on any side-effects of a prior successful module lookup. +pub fn getModuleNameForAddress(self: *SelfInfo, address: usize) ?[]const u8 { + if (comptime builtin.target.isDarwin()) { + return self.lookupModuleNameDyld(address); + } else if (native_os == .windows) { + return self.lookupModuleNameWin32(address); + } else if (native_os == .haiku) { + return null; + } else if (comptime builtin.target.isWasm()) { + return null; + } else { + return self.lookupModuleNameDl(address); + } +} + +fn lookupModuleDyld(self: *SelfInfo, address: usize) !*Module { + const image_count = std.c._dyld_image_count(); + + var i: u32 = 0; + while (i < image_count) : (i += 1) { + const header = std.c._dyld_get_image_header(i) orelse continue; + const base_address = @intFromPtr(header); + if (address < base_address) continue; + const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); + + var it = macho.LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = @alignCast(@as( + [*]u8, + @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), + )[0..header.sizeofcmds]), + }; + + var unwind_info: ?[]const u8 = null; + var eh_frame: ?[]const u8 = null; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => { + const segment_cmd = cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; + + const seg_start = segment_cmd.vmaddr + vmaddr_slide; + const seg_end = seg_start + segment_cmd.vmsize; + if (address >= seg_start and address < seg_end) { + if (self.address_map.get(base_address)) |obj_di| { + return obj_di; + } + + for (cmd.getSections()) |sect| { + if (mem.eql(u8, "__unwind_info", sect.sectName())) { + unwind_info = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; + } else if (mem.eql(u8, "__eh_frame", sect.sectName())) { + eh_frame = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; + } + } + + const obj_di = try self.allocator.create(Module); + errdefer self.allocator.destroy(obj_di); + + const macho_path = mem.sliceTo(std.c._dyld_get_image_name(i), 0); + const macho_file = fs.cwd().openFile(macho_path, .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + obj_di.* = try readMachODebugInfo(self.allocator, macho_file); + obj_di.base_address = base_address; + obj_di.vmaddr_slide = vmaddr_slide; + obj_di.unwind_info = unwind_info; + obj_di.eh_frame = eh_frame; + + try self.address_map.putNoClobber(base_address, obj_di); + + return obj_di; + } + }, + else => {}, + }; + } + + return error.MissingDebugInfo; +} + +fn lookupModuleNameDyld(self: *SelfInfo, address: usize) ?[]const u8 { + _ = self; + const image_count = std.c._dyld_image_count(); + + var i: u32 = 0; + while (i < image_count) : (i += 1) { + const header = std.c._dyld_get_image_header(i) orelse continue; + const base_address = @intFromPtr(header); + if (address < base_address) continue; + const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); + + var it = macho.LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = @alignCast(@as( + [*]u8, + @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), + )[0..header.sizeofcmds]), + }; + + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => { + const segment_cmd = cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; + + const original_address = address - vmaddr_slide; + const seg_start = segment_cmd.vmaddr; + const seg_end = seg_start + segment_cmd.vmsize; + if (original_address >= seg_start and original_address < seg_end) { + return fs.path.basename(mem.sliceTo(std.c._dyld_get_image_name(i), 0)); + } + }, + else => {}, + }; + } + + return null; +} + +fn lookupModuleWin32(self: *SelfInfo, address: usize) !*Module { + for (self.modules.items) |*module| { + if (address >= module.base_address and address < module.base_address + module.size) { + if (self.address_map.get(module.base_address)) |obj_di| { + return obj_di; + } + + const obj_di = try self.allocator.create(Module); + errdefer self.allocator.destroy(obj_di); + + const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; + var coff_obj = try coff.Coff.init(mapped_module, true); + + // The string table is not mapped into memory by the loader, so if a section name is in the + // string table then we have to map the full image file from disk. This can happen when + // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. + if (coff_obj.strtabRequired()) { + var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; + // openFileAbsoluteW requires the prefix to be present + @memcpy(name_buffer[0..4], &[_]u16{ '\\', '?', '?', '\\' }); + + const process_handle = windows.GetCurrentProcess(); + const len = windows.kernel32.GetModuleFileNameExW( + process_handle, + module.handle, + @ptrCast(&name_buffer[4]), + windows.PATH_MAX_WIDE, + ); + + if (len == 0) return error.MissingDebugInfo; + const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + errdefer coff_file.close(); + + var section_handle: windows.HANDLE = undefined; + const create_section_rc = windows.ntdll.NtCreateSection( + §ion_handle, + windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, + null, + null, + windows.PAGE_READONLY, + // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. + // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. + windows.SEC_COMMIT, + coff_file.handle, + ); + if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer windows.CloseHandle(section_handle); + + var coff_len: usize = 0; + var base_ptr: usize = 0; + const map_section_rc = windows.ntdll.NtMapViewOfSection( + section_handle, + process_handle, + @ptrCast(&base_ptr), + null, + 0, + null, + &coff_len, + .ViewUnmap, + 0, + windows.PAGE_READONLY, + ); + if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrFromInt(base_ptr)) == .SUCCESS); + + const section_view = @as([*]const u8, @ptrFromInt(base_ptr))[0..coff_len]; + coff_obj = try coff.Coff.init(section_view, false); + + module.mapped_file = .{ + .file = coff_file, + .section_handle = section_handle, + .section_view = section_view, + }; + } + errdefer if (module.mapped_file) |mapped_file| mapped_file.deinit(); + + obj_di.* = try readCoffDebugInfo(self.allocator, &coff_obj); + obj_di.base_address = module.base_address; + + try self.address_map.putNoClobber(module.base_address, obj_di); + return obj_di; + } + } + + return error.MissingDebugInfo; +} + +fn lookupModuleNameWin32(self: *SelfInfo, address: usize) ?[]const u8 { + for (self.modules.items) |module| { + if (address >= module.base_address and address < module.base_address + module.size) { + return module.name; + } + } + return null; +} + +fn lookupModuleNameDl(self: *SelfInfo, address: usize) ?[]const u8 { + _ = self; + + var ctx: struct { + // Input + address: usize, + // Output + name: []const u8 = "", + } = .{ .address = address }; + const CtxTy = @TypeOf(ctx); + + if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { + fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { + _ = size; + if (context.address < info.addr) return; + const phdrs = info.phdr[0..info.phnum]; + for (phdrs) |*phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + + const seg_start = info.addr +% phdr.p_vaddr; + const seg_end = seg_start + phdr.p_memsz; + if (context.address >= seg_start and context.address < seg_end) { + context.name = mem.sliceTo(info.name, 0) orelse ""; + break; + } + } else return; + + return error.Found; + } + }.callback)) { + return null; + } else |err| switch (err) { + error.Found => return fs.path.basename(ctx.name), + } + + return null; +} + +fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { + var ctx: struct { + // Input + address: usize, + // Output + base_address: usize = undefined, + name: []const u8 = undefined, + build_id: ?[]const u8 = null, + gnu_eh_frame: ?[]const u8 = null, + } = .{ .address = address }; + const CtxTy = @TypeOf(ctx); + + if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { + fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { + _ = size; + // The base address is too high + if (context.address < info.addr) + return; + + const phdrs = info.phdr[0..info.phnum]; + for (phdrs) |*phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + + // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 + const seg_start = info.addr +% phdr.p_vaddr; + const seg_end = seg_start + phdr.p_memsz; + if (context.address >= seg_start and context.address < seg_end) { + // Android libc uses NULL instead of an empty string to mark the + // main program + context.name = mem.sliceTo(info.name, 0) orelse ""; + context.base_address = info.addr; + break; + } + } else return; + + for (info.phdr[0..info.phnum]) |phdr| { + switch (phdr.p_type) { + elf.PT_NOTE => { + // Look for .note.gnu.build-id + const note_bytes = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; + const name_size = mem.readInt(u32, note_bytes[0..4], native_endian); + if (name_size != 4) continue; + const desc_size = mem.readInt(u32, note_bytes[4..8], native_endian); + const note_type = mem.readInt(u32, note_bytes[8..12], native_endian); + if (note_type != elf.NT_GNU_BUILD_ID) continue; + if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; + context.build_id = note_bytes[16..][0..desc_size]; + }, + elf.PT_GNU_EH_FRAME => { + context.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; + }, + else => {}, + } + } + + // Stop the iteration + return error.Found; + } + }.callback)) { + return error.MissingDebugInfo; + } else |err| switch (err) { + error.Found => {}, + } + + if (self.address_map.get(ctx.base_address)) |obj_di| { + return obj_di; + } + + const obj_di = try self.allocator.create(Module); + errdefer self.allocator.destroy(obj_di); + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + if (ctx.gnu_eh_frame) |eh_frame_hdr| { + // This is a special case - pointer offsets inside .eh_frame_hdr + // are encoded relative to its base address, so we must use the + // version that is already memory mapped, and not the one that + // will be mapped separately from the ELF file. + sections[@intFromEnum(Dwarf.Section.Id.eh_frame_hdr)] = .{ + .data = eh_frame_hdr, + .owned = false, + }; + } + + obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); + obj_di.base_address = ctx.base_address; + + // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding + obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; + + try self.address_map.putNoClobber(ctx.base_address, obj_di); + + return obj_di; +} + +fn lookupModuleHaiku(self: *SelfInfo, address: usize) !*Module { + _ = self; + _ = address; + @panic("TODO implement lookup module for Haiku"); +} + +fn lookupModuleWasm(self: *SelfInfo, address: usize) !*Module { + _ = self; + _ = address; + @panic("TODO implement lookup module for Wasm"); +} + +pub const Module = switch (native_os) { + .macos, .ios, .watchos, .tvos, .visionos => struct { + base_address: usize, + vmaddr_slide: usize, + mapped_memory: []align(mem.page_size) const u8, + symbols: []const MachoSymbol, + strings: [:0]const u8, + ofiles: OFileTable, + + // Backed by the in-memory sections mapped by the loader + unwind_info: ?[]const u8 = null, + eh_frame: ?[]const u8 = null, + + const OFileTable = std.StringHashMap(OFileInfo); + const OFileInfo = struct { + di: Dwarf, + addr_table: std.StringHashMap(u64), + }; + + pub fn deinit(self: *@This(), allocator: Allocator) void { + var it = self.ofiles.iterator(); + while (it.next()) |entry| { + const ofile = entry.value_ptr; + ofile.di.deinit(allocator); + ofile.addr_table.deinit(); + } + self.ofiles.deinit(); + allocator.free(self.symbols); + posix.munmap(self.mapped_memory); + } + + fn loadOFile(self: *@This(), allocator: Allocator, o_file_path: []const u8) !*OFileInfo { + const o_file = try fs.cwd().openFile(o_file_path, .{}); + const mapped_mem = try mapWholeFile(o_file); + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != std.macho.MH_MAGIC_64) + return error.InvalidDebugInfo; + + var segcmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtabcmd: ?macho.symtab_command = null; + var it = macho.LoadCommandIterator{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => segcmd = cmd, + .SYMTAB => symtabcmd = cmd.cast(macho.symtab_command).?, + else => {}, + }; + + if (segcmd == null or symtabcmd == null) return error.MissingDebugInfo; + + // Parse symbols + const strtab = @as( + [*]const u8, + @ptrCast(&mapped_mem[symtabcmd.?.stroff]), + )[0 .. symtabcmd.?.strsize - 1 :0]; + const symtab = @as( + [*]const macho.nlist_64, + @ptrCast(@alignCast(&mapped_mem[symtabcmd.?.symoff])), + )[0..symtabcmd.?.nsyms]; + + // TODO handle tentative (common) symbols + var addr_table = std.StringHashMap(u64).init(allocator); + try addr_table.ensureTotalCapacity(@as(u32, @intCast(symtab.len))); + for (symtab) |sym| { + if (sym.n_strx == 0) continue; + if (sym.undf() or sym.tentative() or sym.abs()) continue; + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + // TODO is it possible to have a symbol collision? + addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); + } + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + if (self.eh_frame) |eh_frame| sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ + .data = eh_frame, + .owned = false, + }; + + for (segcmd.?.getSections()) |sect| { + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + + var section_index: ?usize = null; + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; + } + if (section_index == null) continue; + + const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); + sections[section_index.?] = .{ + .data = section_bytes, + .virtual_address = sect.addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + if (missing_debug_info) return error.MissingDebugInfo; + + var di = Dwarf{ + .endian = .little, + .sections = sections, + .is_macho = true, + }; + + try Dwarf.open(&di, allocator); + const info = OFileInfo{ + .di = di, + .addr_table = addr_table, + }; + + // Add the debug info to the cache + const result = try self.ofiles.getOrPut(o_file_path); + assert(!result.found_existing); + result.value_ptr.* = info; + + return result.value_ptr; + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + nosuspend { + const result = try self.getOFileInfoForAddress(allocator, address); + if (result.symbol == null) return .{}; + + // Take the symbol name from the N_FUN STAB entry, we're going to + // use it if we fail to find the DWARF infos + const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); + if (result.o_file_info == null) return .{ .symbol_name = stab_symbol }; + + // Translate again the address, this time into an address inside the + // .o file + const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ + .symbol_name = "???", + }; + + const addr_off = result.relocated_address - result.symbol.?.addr; + const o_file_di = &result.o_file_info.?.di; + if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { + return SymbolInfo{ + .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString( + o_file_di, + std.dwarf.AT.name, + o_file_di.section(.debug_str), + compile_unit.*, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .line_info = o_file_di.getLineNumberInfo( + allocator, + compile_unit.*, + relocated_address_o + addr_off, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + return SymbolInfo{ .symbol_name = stab_symbol }; + }, + else => return err, + } + } + } + + pub fn getOFileInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !struct { + relocated_address: usize, + symbol: ?*const MachoSymbol = null, + o_file_info: ?*OFileInfo = null, + } { + nosuspend { + // Translate the VA into an address into this object + const relocated_address = address - self.vmaddr_slide; + + // Find the .o file where this symbol is defined + const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ + .relocated_address = relocated_address, + }; + + // Check if its debug infos are already in the cache + const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); + const o_file_info = self.ofiles.getPtr(o_file_path) orelse + (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { + error.FileNotFound, + error.MissingDebugInfo, + error.InvalidDebugInfo, + => return .{ + .relocated_address = relocated_address, + .symbol = symbol, + }, + else => return err, + }); + + return .{ + .relocated_address = relocated_address, + .symbol = symbol, + .o_file_info = o_file_info, + }; + } + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + return if ((try self.getOFileInfoForAddress(allocator, address)).o_file_info) |o_file_info| &o_file_info.di else null; + } + }, + .uefi, .windows => struct { + base_address: usize, + pdb: ?Pdb = null, + dwarf: ?Dwarf = null, + coff_image_base: u64, + + /// Only used if pdb is non-null + coff_section_headers: []coff.SectionHeader, + + pub fn deinit(self: *@This(), allocator: Allocator) void { + if (self.dwarf) |*dwarf| { + dwarf.deinit(allocator); + } + + if (self.pdb) |*p| { + p.deinit(); + allocator.free(self.coff_section_headers); + } + } + + fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?SymbolInfo { + var coff_section: *align(1) const coff.SectionHeader = undefined; + const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { + if (sect_contrib.Section > self.coff_section_headers.len) continue; + // Remember that SectionContribEntry.Section is 1-based. + coff_section = &self.coff_section_headers[sect_contrib.Section - 1]; + + const vaddr_start = coff_section.virtual_address + sect_contrib.Offset; + const vaddr_end = vaddr_start + sect_contrib.Size; + if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { + break sect_contrib.ModuleIndex; + } + } else { + // we have no information to add to the address + return null; + }; + + const module = (try self.pdb.?.getModule(mod_index)) orelse + return error.InvalidDebugInfo; + const obj_basename = fs.path.basename(module.obj_file_name); + + const symbol_name = self.pdb.?.getSymbolName( + module, + relocated_address - coff_section.virtual_address, + ) orelse "???"; + const opt_line_info = try self.pdb.?.getLineNumberInfo( + module, + relocated_address - coff_section.virtual_address, + ); + + return SymbolInfo{ + .symbol_name = symbol_name, + .compile_unit_name = obj_basename, + .line_info = opt_line_info, + }; + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + // Translate the VA into an address into this object + const relocated_address = address - self.base_address; + + if (self.pdb != null) { + if (try self.getSymbolFromPdb(relocated_address)) |symbol| return symbol; + } + + if (self.dwarf) |*dwarf| { + const dwarf_address = relocated_address + self.coff_image_base; + return getSymbolFromDwarf(allocator, dwarf_address, dwarf); + } + + return SymbolInfo{}; + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = allocator; + _ = address; + + return switch (self.debug_data) { + .dwarf => |*dwarf| dwarf, + else => null, + }; + } + }, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { + base_address: usize, + dwarf: Dwarf, + mapped_memory: []align(mem.page_size) const u8, + external_mapped_memory: ?[]align(mem.page_size) const u8, + + pub fn deinit(self: *@This(), allocator: Allocator) void { + self.dwarf.deinit(allocator); + posix.munmap(self.mapped_memory); + if (self.external_mapped_memory) |m| posix.munmap(m); + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + // Translate the VA into an address into this object + const relocated_address = address - self.base_address; + return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = allocator; + _ = address; + return &self.dwarf; + } + }, + .wasi, .emscripten => struct { + pub fn deinit(self: *@This(), allocator: Allocator) void { + _ = self; + _ = allocator; + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + _ = self; + _ = allocator; + _ = address; + return SymbolInfo{}; + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = self; + _ = allocator; + _ = address; + return null; + } + }, + else => Dwarf, +}; + +/// How is this different than `Module` when the host is Windows? +/// Why are both stored in the `SelfInfo` struct? +/// Boy, it sure would be nice if someone added documentation comments for this +/// struct explaining it. +pub const WindowsModule = struct { + base_address: usize, + size: u32, + name: []const u8, + handle: windows.HMODULE, + + // Set when the image file needed to be mapped from disk + mapped_file: ?struct { + file: File, + section_handle: windows.HANDLE, + section_view: []const u8, + + pub fn deinit(self: @This()) void { + const process_handle = windows.GetCurrentProcess(); + assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(@ptrCast(self.section_view.ptr))) == .SUCCESS); + windows.CloseHandle(self.section_handle); + self.file.close(); + } + } = null, +}; + +/// This takes ownership of macho_file: users of this function should not close +/// it themselves, even on error. +/// TODO it's weird to take ownership even on error, rework this code. +fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { + const mapped_mem = try mapWholeFile(macho_file); + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != macho.MH_MAGIC_64) + return error.InvalidDebugInfo; + + var it = macho.LoadCommandIterator{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => break cmd.cast(macho.symtab_command).?, + else => {}, + } else return error.MissingDebugInfo; + + const syms = @as( + [*]const macho.nlist_64, + @ptrCast(@alignCast(&mapped_mem[symtab.symoff])), + )[0..symtab.nsyms]; + const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; + + const symbols_buf = try allocator.alloc(MachoSymbol, syms.len); + + var ofile: u32 = undefined; + var last_sym: MachoSymbol = undefined; + var symbol_index: usize = 0; + var state: enum { + init, + oso_open, + oso_close, + bnsym, + fun_strx, + fun_size, + ensym, + } = .init; + + for (syms) |*sym| { + if (!sym.stab()) continue; + + // TODO handle globals N_GSYM, and statics N_STSYM + switch (sym.n_type) { + macho.N_OSO => { + switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_BNSYM => { + switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .size = 0, + .ofile = ofile, + }; + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_FUN => { + switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + last_sym.size = @as(u32, @intCast(sym.n_value)); + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_ENSYM => { + switch (state) { + .fun_size => { + state = .ensym; + symbols_buf[symbol_index] = last_sym; + symbol_index += 1; + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_SO => { + switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidDebugInfo, + } + }, + else => {}, + } + } + + switch (state) { + .init => return error.MissingDebugInfo, + .oso_close => {}, + else => return error.InvalidDebugInfo, + } + + const symbols = try allocator.realloc(symbols_buf, symbol_index); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + mem.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); + + return .{ + .base_address = undefined, + .vmaddr_slide = undefined, + .mapped_memory = mapped_mem, + .ofiles = Module.OFileTable.init(allocator), + .symbols = symbols, + .strings = strings, + }; +} + +fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { + nosuspend { + var di: Module = .{ + .base_address = undefined, + .coff_image_base = coff_obj.getImageBase(), + .coff_section_headers = undefined, + }; + + if (coff_obj.getSectionByName(".debug_info")) |_| { + // This coff file has embedded DWARF debug info + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); + + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { + sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { + break :blk .{ + .data = try coff_obj.getSectionDataAlloc(section_header, allocator), + .virtual_address = section_header.virtual_address, + .owned = true, + }; + } else null; + } + + var dwarf = Dwarf{ + .endian = native_endian, + .sections = sections, + .is_macho = false, + }; + + try Dwarf.open(&dwarf, allocator); + di.dwarf = dwarf; + } + + const raw_path = try coff_obj.getPdbPath() orelse return di; + const path = blk: { + if (fs.path.isAbsolute(raw_path)) { + break :blk raw_path; + } else { + const self_dir = try fs.selfExeDirPathAlloc(allocator); + defer allocator.free(self_dir); + break :blk try fs.path.join(allocator, &.{ self_dir, raw_path }); + } + }; + defer if (path.ptr != raw_path.ptr) allocator.free(path); + + di.pdb = Pdb.init(allocator, path) catch |err| switch (err) { + error.FileNotFound, error.IsDir => { + if (di.dwarf == null) return error.MissingDebugInfo; + return di; + }, + else => return err, + }; + try di.pdb.?.parseInfoStream(); + try di.pdb.?.parseDbiStream(); + + if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) + return error.InvalidDebugInfo; + + // Only used by the pdb path + di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(allocator); + errdefer allocator.free(di.coff_section_headers); + + return di; + } +} + +/// Reads debug info from an ELF file, or the current binary if none in specified. +/// If the required sections aren't present but a reference to external debug info is, +/// then this this function will recurse to attempt to load the debug sections from +/// an external file. +pub fn readElfDebugInfo( + allocator: Allocator, + elf_filename: ?[]const u8, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(mem.page_size) const u8, +) !Module { + nosuspend { + const elf_file = (if (elf_filename) |filename| blk: { + break :blk fs.cwd().openFile(filename, .{}); + } else fs.openSelfExe(.{})) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + + const mapped_mem = try mapWholeFile(elf_file); + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; + + const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); + if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; + if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; + + const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { + elf.ELFDATA2LSB => .little, + elf.ELFDATA2MSB => .big, + else => return error.InvalidElfEndian, + }; + assert(endian == native_endian); // this is our own debug info + + const shoff = hdr.e_shoff; + const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); + const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow])); + const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; + const shdrs = @as( + [*]const elf.Shdr, + @ptrCast(@alignCast(&mapped_mem[shoff])), + )[0..hdr.e_shnum]; + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + + // Combine section list. This takes ownership over any owned sections from the parent scope. + for (parent_sections, §ions) |*parent, *section| { + if (parent.*) |*p| { + section.* = p.*; + p.owned = false; + } + } + errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); + + var separate_debug_filename: ?[]const u8 = null; + var separate_debug_crc: ?u32 = null; + + for (shdrs) |*shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); + + if (mem.eql(u8, name, ".gnu_debuglink")) { + const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); + const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); + const crc_bytes = gnu_debuglink[crc_offset..][0..4]; + separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); + separate_debug_filename = debug_filename; + continue; + } + + var section_index: ?usize = null; + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { + if (mem.eql(u8, "." ++ section.name, name)) section_index = i; + } + if (section_index == null) continue; + if (sections[section_index.?] != null) continue; + + const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { + var section_stream = std.io.fixedBufferStream(section_bytes); + var section_reader = section_stream.reader(); + const chdr = section_reader.readStruct(elf.Chdr) catch continue; + if (chdr.ch_type != .ZLIB) continue; + + var zlib_stream = std.compress.zlib.decompressor(section_stream.reader()); + + const decompressed_section = try allocator.alloc(u8, chdr.ch_size); + errdefer allocator.free(decompressed_section); + + const read = zlib_stream.reader().readAll(decompressed_section) catch continue; + assert(read == decompressed_section.len); + + break :blk .{ + .data = decompressed_section, + .virtual_address = shdr.sh_addr, + .owned = true, + }; + } else .{ + .data = section_bytes, + .virtual_address = shdr.sh_addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + + // Attempt to load debug info from an external file + // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html + if (missing_debug_info) { + + // Only allow one level of debug info nesting + if (parent_mapped_mem) |_| { + return error.MissingDebugInfo; + } + + const global_debug_directories = [_][]const u8{ + "/usr/lib/debug", + }; + + // <global debug directory>/.build-id/<2-character id prefix>/<id remainder>.debug + if (build_id) |id| blk: { + if (id.len < 3) break :blk; + + // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice + const extension = ".debug"; + var id_prefix_buf: [2]u8 = undefined; + var filename_buf: [38 + extension.len]u8 = undefined; + + _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; + const filename = std.fmt.bufPrint( + &filename_buf, + "{s}" ++ extension, + .{std.fmt.fmtSliceHexLower(id[1..])}, + ) catch break :blk; + + for (global_debug_directories) |global_directory| { + const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); + defer allocator.free(path); + + return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; + } + } + + // use the path from .gnu_debuglink, in the same search order as gdb + if (separate_debug_filename) |separate_filename| blk: { + if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; + + // <cwd>/<gnu_debuglink> + if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + + // <cwd>/.debug/<gnu_debuglink> + { + const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); + defer allocator.free(path); + + if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + + var cwd_buf: [fs.max_path_bytes]u8 = undefined; + const cwd_path = posix.realpath(".", &cwd_buf) catch break :blk; + + // <global debug directory>/<absolute folder of current binary>/<gnu_debuglink> + for (global_debug_directories) |global_directory| { + const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); + defer allocator.free(path); + if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + } + + return error.MissingDebugInfo; + } + + var di = Dwarf{ + .endian = endian, + .sections = sections, + .is_macho = false, + }; + + try Dwarf.open(&di, allocator); + + return .{ + .base_address = undefined, + .dwarf = di, + .mapped_memory = parent_mapped_mem orelse mapped_mem, + .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, + }; + } +} + +const MachoSymbol = struct { + strx: u32, + addr: u64, + size: u32, + ofile: u32, + + /// Returns the address from the macho file + fn address(self: MachoSymbol) u64 { + return self.addr; + } + + fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { + _ = context; + return lhs.addr < rhs.addr; + } +}; + +/// Takes ownership of file, even on error. +/// TODO it's weird to take ownership even on error, rework this code. +fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 { + nosuspend { + defer file.close(); + + const file_len = math.cast(usize, try file.getEndPos()) orelse math.maxInt(usize); + const mapped_mem = try posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); + errdefer posix.munmap(mapped_mem); + + return mapped_mem; + } +} + +fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { + const start = math.cast(usize, offset) orelse return error.Overflow; + const end = start + (math.cast(usize, size) orelse return error.Overflow); + return ptr[start..end]; +} + +pub const SymbolInfo = struct { + symbol_name: []const u8 = "???", + compile_unit_name: []const u8 = "???", + line_info: ?std.debug.SourceLocation = null, + + pub fn deinit(self: SymbolInfo, allocator: Allocator) void { + if (self.line_info) |li| allocator.free(li.file_name); + } +}; + +fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { + var min: usize = 0; + var max: usize = symbols.len - 1; + while (min < max) { + const mid = min + (max - min) / 2; + const curr = &symbols[mid]; + const next = &symbols[mid + 1]; + if (address >= next.address()) { + min = mid + 1; + } else if (address < curr.address()) { + max = mid; + } else { + return curr; + } + } + + const max_sym = &symbols[symbols.len - 1]; + if (address >= max_sym.address()) + return max_sym; + + return null; +} + +test machoSearchSymbols { + const symbols = [_]MachoSymbol{ + .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, + }; + + try testing.expectEqual(null, machoSearchSymbols(&symbols, 0)); + try testing.expectEqual(null, machoSearchSymbols(&symbols, 99)); + try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 100).?); + try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 150).?); + try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 199).?); + + try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 200).?); + try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 250).?); + try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 299).?); + + try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 300).?); + try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 301).?); + try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); +} + +fn getSymbolFromDwarf(allocator: Allocator, address: u64, di: *Dwarf) !SymbolInfo { + if (nosuspend di.findCompileUnit(address)) |compile_unit| { + return SymbolInfo{ + .symbol_name = nosuspend di.getSymbolName(address) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + return SymbolInfo{}; + }, + else => return err, + } +} + +/// Unwind a frame using MachO compact unwind info (from __unwind_info). +/// If the compact encoding can't encode a way to unwind a frame, it will +/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. +pub fn unwindFrameMachO( + context: *UnwindContext, + ma: *std.debug.MemoryAccessor, + unwind_info: []const u8, + eh_frame: ?[]const u8, + module_base_address: usize, +) !usize { + const header = std.mem.bytesAsValue( + macho.unwind_info_section_header, + unwind_info[0..@sizeOf(macho.unwind_info_section_header)], + ); + const indices = std.mem.bytesAsSlice( + macho.unwind_info_section_header_index_entry, + unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry)], + ); + if (indices.len == 0) return error.MissingUnwindInfo; + + const mapped_pc = context.pc - module_base_address; + const second_level_index = blk: { + var left: usize = 0; + var len: usize = indices.len; + + while (len > 1) { + const mid = left + len / 2; + const offset = indices[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + // Last index is a sentinel containing the highest address as its functionOffset + if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; + break :blk &indices[left]; + }; + + const common_encodings = std.mem.bytesAsSlice( + macho.compact_unwind_encoding_t, + unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t)], + ); + + const start_offset = second_level_index.secondLevelPagesSectionOffset; + const kind = std.mem.bytesAsValue( + macho.UNWIND_SECOND_LEVEL, + unwind_info[start_offset..][0..@sizeOf(macho.UNWIND_SECOND_LEVEL)], + ); + + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { + .REGULAR => blk: { + const page_header = std.mem.bytesAsValue( + macho.unwind_info_regular_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_regular_second_level_page_header)], + ); + + const entries = std.mem.bytesAsSlice( + macho.unwind_info_regular_second_level_entry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = entries[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + break :blk .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; + }, + .COMPRESSED => blk: { + const page_header = std.mem.bytesAsValue( + macho.unwind_info_compressed_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_compressed_second_level_page_header)], + ); + + const entries = std.mem.bytesAsSlice( + macho.UnwindInfoCompressedEntry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = second_level_index.functionOffset + entries[mid].funcOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + const entry = entries[left]; + const function_offset = second_level_index.functionOffset + entry.funcOffset; + if (entry.encodingIndex < header.commonEncodingsArrayCount) { + if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; + } else { + const local_index = try math.sub( + u8, + entry.encodingIndex, + math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, + ); + const local_encodings = std.mem.bytesAsSlice( + macho.compact_unwind_encoding_t, + unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t)], + ); + if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; + } + }, + else => return error.InvalidUnwindInfo, + }; + + if (entry.raw_encoding == 0) return error.NoUnwindInfo; + const reg_context = Dwarf.abi.RegisterContext{ + .eh_frame = false, + .is_macho = true, + }; + + const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnimplementedUnwindEncoding, + .RBP_FRAME => blk: { + const regs: [5]u3 = .{ + encoding.value.x86_64.frame.reg0, + encoding.value.x86_64.frame.reg1, + encoding.value.x86_64.frame.reg2, + encoding.value.x86_64.frame.reg3, + encoding.value.x86_64.frame.reg4, + }; + + const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); + var max_reg: usize = 0; + inline for (regs, 0..) |reg, i| { + if (reg > 0) max_reg = i; + } + + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 2 * @sizeOf(usize); + + // Verify the stack range we're about to read register values from + if (ma.load(usize, new_sp) == null or ma.load(usize, fp - frame_offset + max_reg * @sizeOf(usize)) == null) return error.InvalidUnwindInfo; + + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame_offset + i * @sizeOf(usize); + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + } + + break :blk new_ip; + }, + .STACK_IMMD, + .STACK_IND, + => blk: { + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) + @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) + else stack_size: { + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + module_base_address + + entry.function_offset + + encoding.value.x86_64.frameless.stack.indirect.sub_offset; + if (ma.load(usize, sub_offset_addr) == null) return error.InvalidUnwindInfo; + + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); + }; + + // Decode the Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h + + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = encoding.value.x86_64.frameless.stack_reg_count; + const ip_ptr = if (reg_count > 0) reg_blk: { + var digits: [6]u3 = undefined; + var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; + var registers: [reg_numbers.len]u3 = undefined; + var used_indices = [_]bool{false} ** reg_numbers.len; + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + + registers[i] = reg_numbers[unused_index]; + used_indices[unused_index] = true; + } + + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); + if (ma.load(usize, reg_addr) == null) return error.InvalidUnwindInfo; + for (0..reg_count) |i| { + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :reg_blk reg_addr; + } else sp + stack_size - @sizeOf(usize); + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; + + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :blk new_ip; + }, + .DWARF => { + return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); + }, + }, + .aarch64 => switch (encoding.mode.arm64) { + .OLD => return error.UnimplementedUnwindEncoding, + .FRAMELESS => blk: { + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; + if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + break :blk new_ip; + }, + .DWARF => { + return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); + }, + .FRAME => blk: { + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 16; + const ip_ptr = fp + @sizeOf(usize); + + const num_restored_pairs: usize = + @popCount(@as(u5, @bitCast(encoding.value.arm64.frame.x_reg_pairs))) + + @popCount(@as(u4, @bitCast(encoding.value.arm64.frame.d_reg_pairs))); + const min_reg_addr = fp - num_restored_pairs * 2 * @sizeOf(usize); + + if (ma.load(usize, new_sp) == null or ma.load(usize, min_reg_addr) == null) return error.InvalidUnwindInfo; + + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { + (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } + + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + @memcpy( + try regBytes(context.thread_context, 64 + 8 + i, context.reg_context), + std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + @memcpy( + try regBytes(context.thread_context, 64 + 9 + i, context.reg_context), + std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + } + } + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :blk new_ip; + }, + }, + else => return error.UnimplementedArch, + }; + + context.pc = stripInstructionPtrAuthCode(new_ip); + if (context.pc > 0) context.pc -= 1; + return new_ip; +} + +pub const UnwindContext = struct { + allocator: Allocator, + cfa: ?usize, + pc: usize, + thread_context: *std.debug.ThreadContext, + reg_context: Dwarf.abi.RegisterContext, + vm: VirtualMachine, + stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), + + pub fn init( + allocator: Allocator, + thread_context: *std.debug.ThreadContext, + ) !UnwindContext { + comptime assert(supports_unwinding); + + const pc = stripInstructionPtrAuthCode( + (try regValueNative(thread_context, ip_reg_num, null)).*, + ); + + const context_copy = try allocator.create(std.debug.ThreadContext); + std.debug.copyContext(thread_context, context_copy); + + return .{ + .allocator = allocator, + .cfa = null, + .pc = pc, + .thread_context = context_copy, + .reg_context = undefined, + .vm = .{}, + .stack_machine = .{}, + }; + } + + pub fn deinit(self: *UnwindContext) void { + self.vm.deinit(self.allocator); + self.stack_machine.deinit(self.allocator); + self.allocator.destroy(self.thread_context); + self.* = undefined; + } + + pub fn getFp(self: *const UnwindContext) !usize { + return (try regValueNative(self.thread_context, fpRegNum(self.reg_context), self.reg_context)).*; + } +}; + +/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. +/// This function clears these signature bits to make the pointer usable. +pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { + if (native_arch == .aarch64) { + // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) + // The save / restore is because `xpaclri` operates on x30 (LR) + return asm ( + \\mov x16, x30 + \\mov x30, x15 + \\hint 0x07 + \\mov x15, x30 + \\mov x30, x16 + : [ret] "={x15}" (-> usize), + : [ptr] "{x15}" (ptr), + : "x16" + ); + } + + return ptr; +} + +/// Unwind a stack frame using DWARF unwinding info, updating the register context. +/// +/// If `.eh_frame_hdr` is available, it will be used to binary search for the FDE. +/// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. +/// +/// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info +/// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. +pub fn unwindFrameDwarf( + di: *const Dwarf, + context: *UnwindContext, + ma: *std.debug.MemoryAccessor, + explicit_fde_offset: ?usize, +) !usize { + if (!supports_unwinding) return error.UnsupportedCpuArchitecture; + if (context.pc == 0) return 0; + + // Find the FDE and CIE + var cie: Dwarf.CommonInformationEntry = undefined; + var fde: Dwarf.FrameDescriptionEntry = undefined; + + if (explicit_fde_offset) |fde_offset| { + const dwarf_section: Dwarf.Section.Id = .eh_frame; + const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; + if (fde_offset >= frame_section.len) return error.MissingFDE; + + var fbr: std.debug.DeprecatedFixedBufferReader = .{ + .buf = frame_section, + .pos = fde_offset, + .endian = di.endian, + }; + + const fde_entry_header = try Dwarf.EntryHeader.read(&fbr, null, dwarf_section); + if (fde_entry_header.type != .fde) return error.MissingFDE; + + const cie_offset = fde_entry_header.type.fde; + try fbr.seekTo(cie_offset); + + fbr.endian = native_endian; + const cie_entry_header = try Dwarf.EntryHeader.read(&fbr, null, dwarf_section); + if (cie_entry_header.type != .cie) return Dwarf.bad(); + + cie = try Dwarf.CommonInformationEntry.parse( + cie_entry_header.entry_bytes, + 0, + true, + cie_entry_header.format, + dwarf_section, + cie_entry_header.length_offset, + @sizeOf(usize), + native_endian, + ); + + fde = try Dwarf.FrameDescriptionEntry.parse( + fde_entry_header.entry_bytes, + 0, + true, + cie, + @sizeOf(usize), + native_endian, + ); + } else if (di.eh_frame_hdr) |header| { + const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; + try header.findEntry( + ma, + eh_frame_len, + @intFromPtr(di.section(.eh_frame_hdr).?.ptr), + context.pc, + &cie, + &fde, + ); + } else { + const index = std.sort.binarySearch(Dwarf.FrameDescriptionEntry, context.pc, di.fde_list.items, {}, struct { + pub fn compareFn(_: void, pc: usize, mid_item: Dwarf.FrameDescriptionEntry) std.math.Order { + if (pc < mid_item.pc_begin) return .lt; + + const range_end = mid_item.pc_begin + mid_item.pc_range; + if (pc < range_end) return .eq; + + return .gt; + } + }.compareFn); + + fde = if (index) |i| di.fde_list.items[i] else return error.MissingFDE; + cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; + } + + var expression_context: Dwarf.expression.Context = .{ + .format = cie.format, + .memory_accessor = ma, + .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, + .thread_context = context.thread_context, + .reg_context = context.reg_context, + .cfa = context.cfa, + }; + + context.vm.reset(); + context.reg_context.eh_frame = cie.version != 4; + context.reg_context.is_macho = di.is_macho; + + const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); + context.cfa = switch (row.cfa.rule) { + .val_offset => |offset| blk: { + const register = row.cfa.register orelse return error.InvalidCFARule; + const value = mem.readInt(usize, (try regBytes(context.thread_context, register, context.reg_context))[0..@sizeOf(usize)], native_endian); + break :blk try applyOffset(value, offset); + }, + .expression => |expr| blk: { + context.stack_machine.reset(); + const value = try context.stack_machine.run( + expr, + context.allocator, + expression_context, + context.cfa, + ); + + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + }, + else => return error.InvalidCFARule, + }; + + if (ma.load(usize, context.cfa.?) == null) return error.InvalidCFA; + expression_context.cfa = context.cfa; + + // Buffering the modifications is done because copying the thread context is not portable, + // some implementations (ie. darwin) use internal pointers to the mcontext. + var arena = std.heap.ArenaAllocator.init(context.allocator); + defer arena.deinit(); + const update_allocator = arena.allocator(); + + const RegisterUpdate = struct { + // Backed by thread_context + dest: []u8, + // Backed by arena + src: []const u8, + prev: ?*@This(), + }; + + var update_tail: ?*RegisterUpdate = null; + var has_return_address = true; + for (context.vm.rowColumns(row)) |column| { + if (column.register) |register| { + if (register == cie.return_address_register) { + has_return_address = column.rule != .undefined; + } + + const dest = try regBytes(context.thread_context, register, context.reg_context); + const src = try update_allocator.alloc(u8, dest.len); + + const prev = update_tail; + update_tail = try update_allocator.create(RegisterUpdate); + update_tail.?.* = .{ + .dest = dest, + .src = src, + .prev = prev, + }; + + try column.resolveValue( + context, + expression_context, + ma, + src, + ); + } + } + + // On all implemented architectures, the CFA is defined as being the previous frame's SP + (try regValueNative(context.thread_context, spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; + + while (update_tail) |tail| { + @memcpy(tail.dest, tail.src); + update_tail = tail.prev; + } + + if (has_return_address) { + context.pc = stripInstructionPtrAuthCode(mem.readInt(usize, (try regBytes( + context.thread_context, + cie.return_address_register, + context.reg_context, + ))[0..@sizeOf(usize)], native_endian)); + } else { + context.pc = 0; + } + + (try regValueNative(context.thread_context, ip_reg_num, context.reg_context)).* = context.pc; + + // The call instruction will have pushed the address of the instruction that follows the call as the return address. + // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in + // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up + // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, + // we subtract one so that the next lookup is guaranteed to land inside the + // + // The exception to this rule is signal frames, where we return execution would be returned to the instruction + // that triggered the handler. + const return_address = context.pc; + if (context.pc > 0 and !cie.isSignalFrame()) context.pc -= 1; + + return return_address; +} + +fn fpRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { + return Dwarf.abi.fpRegNum(native_arch, reg_context); +} + +fn spRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { + return Dwarf.abi.spRegNum(native_arch, reg_context); +} + +const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; + +/// Tells whether unwinding for the host is implemented. +pub const supports_unwinding = supportsUnwinding(builtin.target); + +comptime { + if (supports_unwinding) assert(Dwarf.abi.supportsUnwinding(builtin.target)); +} + +/// Tells whether unwinding for this target is *implemented* here in the Zig +/// standard library. +/// +/// See also `Dwarf.abi.supportsUnwinding` which tells whether Dwarf supports +/// unwinding on that target *in theory*. +pub fn supportsUnwinding(target: std.Target) bool { + return switch (target.cpu.arch) { + .x86 => switch (target.os.tag) { + .linux, .netbsd, .solaris, .illumos => true, + else => false, + }, + .x86_64 => switch (target.os.tag) { + .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, + else => false, + }, + .arm => switch (target.os.tag) { + .linux => true, + else => false, + }, + .aarch64 => switch (target.os.tag) { + .linux, .netbsd, .freebsd, .macos, .ios => true, + else => false, + }, + // Unwinding is possible on other targets but this implementation does + // not support them...yet! + else => false, + }; +} + +fn unwindFrameMachODwarf( + context: *UnwindContext, + ma: *std.debug.MemoryAccessor, + eh_frame: []const u8, + fde_offset: usize, +) !usize { + var di: Dwarf = .{ + .endian = native_endian, + .is_macho = true, + }; + defer di.deinit(context.allocator); + + di.sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ + .data = eh_frame, + .owned = false, + }; + + return unwindFrameDwarf(&di, context, ma, fde_offset); +} + +/// This is a virtual machine that runs DWARF call frame instructions. +pub const VirtualMachine = struct { + /// See section 6.4.1 of the DWARF5 specification for details on each + const RegisterRule = union(enum) { + // The spec says that the default rule for each column is the undefined rule. + // However, it also allows ABI / compiler authors to specify alternate defaults, so + // there is a distinction made here. + default: void, + undefined: void, + same_value: void, + // offset(N) + offset: i64, + // val_offset(N) + val_offset: i64, + // register(R) + register: u8, + // expression(E) + expression: []const u8, + // val_expression(E) + val_expression: []const u8, + // Augmenter-defined rule + architectural: void, + }; + + /// Each row contains unwinding rules for a set of registers. + pub const Row = struct { + /// Offset from `FrameDescriptionEntry.pc_begin` + offset: u64 = 0, + /// Special-case column that defines the CFA (Canonical Frame Address) rule. + /// The register field of this column defines the register that CFA is derived from. + cfa: Column = .{}, + /// The register fields in these columns define the register the rule applies to. + columns: ColumnRange = .{}, + /// Indicates that the next write to any column in this row needs to copy + /// the backing column storage first, as it may be referenced by previous rows. + copy_on_write: bool = false, + }; + + pub const Column = struct { + register: ?u8 = null, + rule: RegisterRule = .{ .default = {} }, + + /// Resolves the register rule and places the result into `out` (see regBytes) + pub fn resolveValue( + self: Column, + context: *SelfInfo.UnwindContext, + expression_context: std.debug.Dwarf.expression.Context, + ma: *std.debug.MemoryAccessor, + out: []u8, + ) !void { + switch (self.rule) { + .default => { + const register = self.register orelse return error.InvalidRegister; + try getRegDefaultValue(register, context, out); + }, + .undefined => { + @memset(out, undefined); + }, + .same_value => { + // TODO: This copy could be eliminated if callers always copy the state then call this function to update it + const register = self.register orelse return error.InvalidRegister; + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + }, + .offset => |offset| { + if (context.cfa) |cfa| { + const addr = try applyOffset(cfa, offset); + if (ma.load(usize, addr) == null) return error.InvalidAddress; + const ptr: *const usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + } else return error.InvalidCFA; + }, + .val_offset => |offset| { + if (context.cfa) |cfa| { + mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); + } else return error.InvalidCFA; + }, + .register => |register| { + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, try regBytes(context.thread_context, register, context.reg_context)); + }, + .expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); + const addr = if (value) |v| blk: { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + + if (ma.load(usize, addr) == null) return error.InvalidExpressionAddress; + const ptr: *usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + }, + .val_expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); + } else return error.NoExpressionValue; + }, + .architectural => return error.UnimplementedRegisterRule, + } + } + }; + + const ColumnRange = struct { + /// Index into `columns` of the first column in this row. + start: usize = undefined, + len: u8 = 0, + }; + + columns: std.ArrayListUnmanaged(Column) = .{}, + stack: std.ArrayListUnmanaged(ColumnRange) = .{}, + current_row: Row = .{}, + + /// The result of executing the CIE's initial_instructions + cie_row: ?Row = null, + + pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { + self.stack.deinit(allocator); + self.columns.deinit(allocator); + self.* = undefined; + } + + pub fn reset(self: *VirtualMachine) void { + self.stack.clearRetainingCapacity(); + self.columns.clearRetainingCapacity(); + self.current_row = .{}; + self.cie_row = null; + } + + /// Return a slice backed by the row's non-CFA columns + pub fn rowColumns(self: VirtualMachine, row: Row) []Column { + if (row.columns.len == 0) return &.{}; + return self.columns.items[row.columns.start..][0..row.columns.len]; + } + + /// Either retrieves or adds a column for `register` (non-CFA) in the current row. + fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { + for (self.rowColumns(self.current_row)) |*c| { + if (c.register == register) return c; + } + + if (self.current_row.columns.len == 0) { + self.current_row.columns.start = self.columns.items.len; + } + self.current_row.columns.len += 1; + + const column = try self.columns.addOne(allocator); + column.* = .{ + .register = register, + }; + + return column; + } + + /// Runs the CIE instructions, then the FDE instructions. Execution halts + /// once the row that corresponds to `pc` is known, and the row is returned. + pub fn runTo( + self: *VirtualMachine, + allocator: std.mem.Allocator, + pc: u64, + cie: std.debug.Dwarf.CommonInformationEntry, + fde: std.debug.Dwarf.FrameDescriptionEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Row { + assert(self.cie_row == null); + if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange; + + var prev_row: Row = self.current_row; + + var cie_stream = std.io.fixedBufferStream(cie.initial_instructions); + var fde_stream = std.io.fixedBufferStream(fde.instructions); + var streams = [_]*std.io.FixedBufferStream([]const u8){ + &cie_stream, + &fde_stream, + }; + + for (&streams, 0..) |stream, i| { + while (stream.pos < stream.buffer.len) { + const instruction = try std.debug.Dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); + prev_row = try self.step(allocator, cie, i == 0, instruction); + if (pc < fde.pc_begin + self.current_row.offset) return prev_row; + } + } + + return self.current_row; + } + + pub fn runToNative( + self: *VirtualMachine, + allocator: std.mem.Allocator, + pc: u64, + cie: std.debug.Dwarf.CommonInformationEntry, + fde: std.debug.Dwarf.FrameDescriptionEntry, + ) !Row { + return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), native_endian); + } + + fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void { + if (!self.current_row.copy_on_write) return; + + const new_start = self.columns.items.len; + if (self.current_row.columns.len > 0) { + try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); + self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); + self.current_row.columns.start = new_start; + } + } + + /// Executes a single instruction. + /// If this instruction is from the CIE, `is_initial` should be set. + /// Returns the value of `current_row` before executing this instruction. + pub fn step( + self: *VirtualMachine, + allocator: std.mem.Allocator, + cie: std.debug.Dwarf.CommonInformationEntry, + is_initial: bool, + instruction: Dwarf.call_frame.Instruction, + ) !Row { + // CIE instructions must be run before FDE instructions + assert(!is_initial or self.cie_row == null); + if (!is_initial and self.cie_row == null) { + self.cie_row = self.current_row; + self.current_row.copy_on_write = true; + } + + const prev_row = self.current_row; + switch (instruction) { + .set_loc => |i| { + if (i.address <= self.current_row.offset) return error.InvalidOperation; + // TODO: Check cie.segment_selector_size != 0 for DWARFV4 + self.current_row.offset = i.address; + }, + inline .advance_loc, + .advance_loc1, + .advance_loc2, + .advance_loc4, + => |i| { + self.current_row.offset += i.delta * cie.code_alignment_factor; + self.current_row.copy_on_write = true; + }, + inline .offset, + .offset_extended, + .offset_extended_sf, + => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; + }, + inline .restore, + .restore_extended, + => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.cie_row) |cie_row| { + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = for (self.rowColumns(cie_row)) |cie_column| { + if (cie_column.register == i.register) break cie_column.rule; + } else .{ .default = {} }; + } else return error.InvalidOperation; + }, + .nop => {}, + .undefined => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .undefined = {} }; + }, + .same_value => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .same_value = {} }; + }, + .register => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .register = i.target_register }; + }, + .remember_state => { + try self.stack.append(allocator, self.current_row.columns); + self.current_row.copy_on_write = true; + }, + .restore_state => { + const restored_columns = self.stack.popOrNull() orelse return error.InvalidOperation; + self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); + try self.columns.ensureUnusedCapacity(allocator, restored_columns.len); + + self.current_row.columns.start = self.columns.items.len; + self.current_row.columns.len = restored_columns.len; + self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); + }, + .def_cfa => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = @intCast(i.offset) }, + }; + }, + .def_cfa_sf => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, + }; + }, + .def_cfa_register => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.register = i.register; + }, + .def_cfa_offset => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = @intCast(i.offset), + }; + }, + .def_cfa_offset_sf => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .def_cfa_expression => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa.register = undefined; + self.current_row.cfa.rule = .{ + .expression = i.block, + }; + }, + .expression => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .expression = i.block, + }; + }, + .val_offset => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, + }; + }, + .val_offset_sf => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .val_expression => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_expression = i.block, + }; + }, + } + + return prev_row; + } +}; + +/// Returns the ABI-defined default value this register has in the unwinding table +/// before running any of the CIE instructions. The DWARF spec defines these as having +/// the .undefined rule by default, but allows ABI authors to override that. +fn getRegDefaultValue(reg_number: u8, context: *UnwindContext, out: []u8) !void { + switch (builtin.cpu.arch) { + .aarch64 => { + // Callee-saved registers are initialized as if they had the .same_value rule + if (reg_number >= 19 and reg_number <= 28) { + const src = try regBytes(context.thread_context, reg_number, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + return; + } + }, + else => {}, + } + + @memset(out, undefined); +} + +/// Since register rules are applied (usually) during a panic, +/// checked addition / subtraction is used so that we can return +/// an error and fall back to FP-based unwinding. +fn applyOffset(base: usize, offset: i64) !usize { + return if (offset >= 0) + try std.math.add(usize, base, @as(usize, @intCast(offset))) + else + try std.math.sub(usize, base, @as(usize, @intCast(-offset))); +} diff --git a/lib/std/pdb.zig b/lib/std/pdb.zig index ece1cc63dc..31ad02e945 100644 --- a/lib/std/pdb.zig +++ b/lib/std/pdb.zig @@ -1,3 +1,12 @@ +//! Program Data Base debugging information format. +//! +//! This namespace contains unopinionated types and data definitions only. For +//! an implementation of parsing and caching PDB information, see +//! `std.debug.Pdb`. +//! +//! Most of this is based on information gathered from LLVM source code, +//! documentation and/or contributors. + const std = @import("std.zig"); const io = std.io; const math = std.math; @@ -9,10 +18,7 @@ const debug = std.debug; const ArrayList = std.ArrayList; -// Note: most of this is based on information gathered from LLVM source code, -// documentation and/or contributors. - -// https://llvm.org/docs/PDB/DbiStream.html#stream-header +/// https://llvm.org/docs/PDB/DbiStream.html#stream-header pub const DbiStreamHeader = extern struct { VersionSignature: i32, VersionHeader: u32, @@ -415,10 +421,8 @@ pub const ColumnNumberEntry = extern struct { pub const FileChecksumEntryHeader = extern struct { /// Byte offset of filename in global string table. FileNameOffset: u32, - /// Number of bytes of checksum. ChecksumSize: u8, - /// FileChecksumKind ChecksumKind: u8, }; @@ -451,525 +455,15 @@ pub const DebugSubsectionHeader = extern struct { Length: u32, }; -pub const PDBStringTableHeader = extern struct { +pub const StringTableHeader = extern struct { /// PDBStringTableSignature Signature: u32, - /// 1 or 2 HashVersion: u32, - /// Number of bytes of names buffer. ByteSize: u32, }; -fn readSparseBitVector(stream: anytype, allocator: mem.Allocator) ![]u32 { - const num_words = try stream.readInt(u32, .little); - var list = ArrayList(u32).init(allocator); - errdefer list.deinit(); - var word_i: u32 = 0; - while (word_i != num_words) : (word_i += 1) { - const word = try stream.readInt(u32, .little); - var bit_i: u5 = 0; - while (true) : (bit_i += 1) { - if (word & (@as(u32, 1) << bit_i) != 0) { - try list.append(word_i * 32 + bit_i); - } - if (bit_i == std.math.maxInt(u5)) break; - } - } - return try list.toOwnedSlice(); -} - -pub const Pdb = struct { - in_file: File, - msf: Msf, - allocator: mem.Allocator, - string_table: ?*MsfStream, - dbi: ?*MsfStream, - modules: []Module, - sect_contribs: []SectionContribEntry, - guid: [16]u8, - age: u32, - - pub const Module = struct { - mod_info: ModInfo, - module_name: []u8, - obj_file_name: []u8, - // The fields below are filled on demand. - populated: bool, - symbols: []u8, - subsect_info: []u8, - checksum_offset: ?usize, - - pub fn deinit(self: *Module, allocator: mem.Allocator) void { - allocator.free(self.module_name); - allocator.free(self.obj_file_name); - if (self.populated) { - allocator.free(self.symbols); - allocator.free(self.subsect_info); - } - } - }; - - pub fn init(allocator: mem.Allocator, path: []const u8) !Pdb { - const file = try fs.cwd().openFile(path, .{}); - errdefer file.close(); - - return Pdb{ - .in_file = file, - .allocator = allocator, - .string_table = null, - .dbi = null, - .msf = try Msf.init(allocator, file), - .modules = &[_]Module{}, - .sect_contribs = &[_]SectionContribEntry{}, - .guid = undefined, - .age = undefined, - }; - } - - pub fn deinit(self: *Pdb) void { - self.in_file.close(); - self.msf.deinit(self.allocator); - for (self.modules) |*module| { - module.deinit(self.allocator); - } - self.allocator.free(self.modules); - self.allocator.free(self.sect_contribs); - } - - pub fn parseDbiStream(self: *Pdb) !void { - var stream = self.getStream(StreamType.Dbi) orelse - return error.InvalidDebugInfo; - const reader = stream.reader(); - - const header = try reader.readStruct(DbiStreamHeader); - if (header.VersionHeader != 19990903) // V70, only value observed by LLVM team - return error.UnknownPDBVersion; - // if (header.Age != age) - // return error.UnmatchingPDB; - - const mod_info_size = header.ModInfoSize; - const section_contrib_size = header.SectionContributionSize; - - var modules = ArrayList(Module).init(self.allocator); - errdefer modules.deinit(); - - // Module Info Substream - var mod_info_offset: usize = 0; - while (mod_info_offset != mod_info_size) { - const mod_info = try reader.readStruct(ModInfo); - var this_record_len: usize = @sizeOf(ModInfo); - - const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); - errdefer self.allocator.free(module_name); - this_record_len += module_name.len + 1; - - const obj_file_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); - errdefer self.allocator.free(obj_file_name); - this_record_len += obj_file_name.len + 1; - - if (this_record_len % 4 != 0) { - const round_to_next_4 = (this_record_len | 0x3) + 1; - const march_forward_bytes = round_to_next_4 - this_record_len; - try stream.seekBy(@as(isize, @intCast(march_forward_bytes))); - this_record_len += march_forward_bytes; - } - - try modules.append(Module{ - .mod_info = mod_info, - .module_name = module_name, - .obj_file_name = obj_file_name, - - .populated = false, - .symbols = undefined, - .subsect_info = undefined, - .checksum_offset = null, - }); - - mod_info_offset += this_record_len; - if (mod_info_offset > mod_info_size) - return error.InvalidDebugInfo; - } - - // Section Contribution Substream - var sect_contribs = ArrayList(SectionContribEntry).init(self.allocator); - errdefer sect_contribs.deinit(); - - var sect_cont_offset: usize = 0; - if (section_contrib_size != 0) { - const version = reader.readEnum(SectionContrSubstreamVersion, .little) catch |err| switch (err) { - error.InvalidValue => return error.InvalidDebugInfo, - else => |e| return e, - }; - _ = version; - sect_cont_offset += @sizeOf(u32); - } - while (sect_cont_offset != section_contrib_size) { - const entry = try sect_contribs.addOne(); - entry.* = try reader.readStruct(SectionContribEntry); - sect_cont_offset += @sizeOf(SectionContribEntry); - - if (sect_cont_offset > section_contrib_size) - return error.InvalidDebugInfo; - } - - self.modules = try modules.toOwnedSlice(); - self.sect_contribs = try sect_contribs.toOwnedSlice(); - } - - pub fn parseInfoStream(self: *Pdb) !void { - var stream = self.getStream(StreamType.Pdb) orelse - return error.InvalidDebugInfo; - const reader = stream.reader(); - - // Parse the InfoStreamHeader. - const version = try reader.readInt(u32, .little); - const signature = try reader.readInt(u32, .little); - _ = signature; - const age = try reader.readInt(u32, .little); - const guid = try reader.readBytesNoEof(16); - - if (version != 20000404) // VC70, only value observed by LLVM team - return error.UnknownPDBVersion; - - self.guid = guid; - self.age = age; - - // Find the string table. - const string_table_index = str_tab_index: { - const name_bytes_len = try reader.readInt(u32, .little); - const name_bytes = try self.allocator.alloc(u8, name_bytes_len); - defer self.allocator.free(name_bytes); - try reader.readNoEof(name_bytes); - - const HashTableHeader = extern struct { - Size: u32, - Capacity: u32, - - fn maxLoad(cap: u32) u32 { - return cap * 2 / 3 + 1; - } - }; - const hash_tbl_hdr = try reader.readStruct(HashTableHeader); - if (hash_tbl_hdr.Capacity == 0) - return error.InvalidDebugInfo; - - if (hash_tbl_hdr.Size > HashTableHeader.maxLoad(hash_tbl_hdr.Capacity)) - return error.InvalidDebugInfo; - - const present = try readSparseBitVector(&reader, self.allocator); - defer self.allocator.free(present); - if (present.len != hash_tbl_hdr.Size) - return error.InvalidDebugInfo; - const deleted = try readSparseBitVector(&reader, self.allocator); - defer self.allocator.free(deleted); - - for (present) |_| { - const name_offset = try reader.readInt(u32, .little); - const name_index = try reader.readInt(u32, .little); - if (name_offset > name_bytes.len) - return error.InvalidDebugInfo; - const name = mem.sliceTo(name_bytes[name_offset..], 0); - if (mem.eql(u8, name, "/names")) { - break :str_tab_index name_index; - } - } - return error.MissingDebugInfo; - }; - - self.string_table = self.getStreamById(string_table_index) orelse - return error.MissingDebugInfo; - } - - pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 { - _ = self; - std.debug.assert(module.populated); - - var symbol_i: usize = 0; - while (symbol_i != module.symbols.len) { - const prefix = @as(*align(1) RecordPrefix, @ptrCast(&module.symbols[symbol_i])); - if (prefix.RecordLen < 2) - return null; - switch (prefix.RecordKind) { - .S_LPROC32, .S_GPROC32 => { - const proc_sym = @as(*align(1) ProcSym, @ptrCast(&module.symbols[symbol_i + @sizeOf(RecordPrefix)])); - if (address >= proc_sym.CodeOffset and address < proc_sym.CodeOffset + proc_sym.CodeSize) { - return mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.Name[0])), 0); - } - }, - else => {}, - } - symbol_i += prefix.RecordLen + @sizeOf(u16); - } - - return null; - } - - pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !debug.LineInfo { - std.debug.assert(module.populated); - const subsect_info = module.subsect_info; - - var sect_offset: usize = 0; - var skip_len: usize = undefined; - const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo; - while (sect_offset != subsect_info.len) : (sect_offset += skip_len) { - const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&subsect_info[sect_offset])); - skip_len = subsect_hdr.Length; - sect_offset += @sizeOf(DebugSubsectionHeader); - - switch (subsect_hdr.Kind) { - .Lines => { - var line_index = sect_offset; - - const line_hdr = @as(*align(1) LineFragmentHeader, @ptrCast(&subsect_info[line_index])); - if (line_hdr.RelocSegment == 0) - return error.MissingDebugInfo; - line_index += @sizeOf(LineFragmentHeader); - const frag_vaddr_start = line_hdr.RelocOffset; - const frag_vaddr_end = frag_vaddr_start + line_hdr.CodeSize; - - if (address >= frag_vaddr_start and address < frag_vaddr_end) { - // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records) - // from now on. We will iterate through them, and eventually find a LineInfo that we're interested in, - // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection. - const subsection_end_index = sect_offset + subsect_hdr.Length; - - while (line_index < subsection_end_index) { - const block_hdr = @as(*align(1) LineBlockFragmentHeader, @ptrCast(&subsect_info[line_index])); - line_index += @sizeOf(LineBlockFragmentHeader); - const start_line_index = line_index; - - const has_column = line_hdr.Flags.LF_HaveColumns; - - // All line entries are stored inside their line block by ascending start address. - // Heuristic: we want to find the last line entry - // that has a vaddr_start <= address. - // This is done with a simple linear search. - var line_i: u32 = 0; - while (line_i < block_hdr.NumLines) : (line_i += 1) { - const line_num_entry = @as(*align(1) LineNumberEntry, @ptrCast(&subsect_info[line_index])); - line_index += @sizeOf(LineNumberEntry); - - const vaddr_start = frag_vaddr_start + line_num_entry.Offset; - if (address < vaddr_start) { - break; - } - } - - // line_i == 0 would mean that no matching LineNumberEntry was found. - if (line_i > 0) { - const subsect_index = checksum_offset + block_hdr.NameIndex; - const chksum_hdr = @as(*align(1) FileChecksumEntryHeader, @ptrCast(&module.subsect_info[subsect_index])); - const strtab_offset = @sizeOf(PDBStringTableHeader) + chksum_hdr.FileNameOffset; - try self.string_table.?.seekTo(strtab_offset); - const source_file_name = try self.string_table.?.reader().readUntilDelimiterAlloc(self.allocator, 0, 1024); - - const line_entry_idx = line_i - 1; - - const column = if (has_column) blk: { - const start_col_index = start_line_index + @sizeOf(LineNumberEntry) * block_hdr.NumLines; - const col_index = start_col_index + @sizeOf(ColumnNumberEntry) * line_entry_idx; - const col_num_entry = @as(*align(1) ColumnNumberEntry, @ptrCast(&subsect_info[col_index])); - break :blk col_num_entry.StartColumn; - } else 0; - - const found_line_index = start_line_index + line_entry_idx * @sizeOf(LineNumberEntry); - const line_num_entry: *align(1) LineNumberEntry = @ptrCast(&subsect_info[found_line_index]); - const flags: *align(1) LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags); - - return debug.LineInfo{ - .file_name = source_file_name, - .line = flags.Start, - .column = column, - }; - } - } - - // Checking that we are not reading garbage after the (possibly) multiple block fragments. - if (line_index != subsection_end_index) { - return error.InvalidDebugInfo; - } - } - }, - else => {}, - } - - if (sect_offset > subsect_info.len) - return error.InvalidDebugInfo; - } - - return error.MissingDebugInfo; - } - - pub fn getModule(self: *Pdb, index: usize) !?*Module { - if (index >= self.modules.len) - return null; - - const mod = &self.modules[index]; - if (mod.populated) - return mod; - - // At most one can be non-zero. - if (mod.mod_info.C11ByteSize != 0 and mod.mod_info.C13ByteSize != 0) - return error.InvalidDebugInfo; - if (mod.mod_info.C13ByteSize == 0) - return error.InvalidDebugInfo; - - const stream = self.getStreamById(mod.mod_info.ModuleSymStream) orelse - return error.MissingDebugInfo; - const reader = stream.reader(); - - const signature = try reader.readInt(u32, .little); - if (signature != 4) - return error.InvalidDebugInfo; - - mod.symbols = try self.allocator.alloc(u8, mod.mod_info.SymByteSize - 4); - errdefer self.allocator.free(mod.symbols); - try reader.readNoEof(mod.symbols); - - mod.subsect_info = try self.allocator.alloc(u8, mod.mod_info.C13ByteSize); - errdefer self.allocator.free(mod.subsect_info); - try reader.readNoEof(mod.subsect_info); - - var sect_offset: usize = 0; - var skip_len: usize = undefined; - while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) { - const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&mod.subsect_info[sect_offset])); - skip_len = subsect_hdr.Length; - sect_offset += @sizeOf(DebugSubsectionHeader); - - switch (subsect_hdr.Kind) { - .FileChecksums => { - mod.checksum_offset = sect_offset; - break; - }, - else => {}, - } - - if (sect_offset > mod.subsect_info.len) - return error.InvalidDebugInfo; - } - - mod.populated = true; - return mod; - } - - pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream { - if (id >= self.msf.streams.len) - return null; - return &self.msf.streams[id]; - } - - pub fn getStream(self: *Pdb, stream: StreamType) ?*MsfStream { - const id = @intFromEnum(stream); - return self.getStreamById(id); - } -}; - -// see https://llvm.org/docs/PDB/MsfFile.html -const Msf = struct { - directory: MsfStream, - streams: []MsfStream, - - fn init(allocator: mem.Allocator, file: File) !Msf { - const in = file.reader(); - - const superblock = try in.readStruct(SuperBlock); - - // Sanity checks - if (!mem.eql(u8, &superblock.FileMagic, SuperBlock.file_magic)) - return error.InvalidDebugInfo; - if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2) - return error.InvalidDebugInfo; - const file_len = try file.getEndPos(); - if (superblock.NumBlocks * superblock.BlockSize != file_len) - return error.InvalidDebugInfo; - switch (superblock.BlockSize) { - // llvm only supports 4096 but we can handle any of these values - 512, 1024, 2048, 4096 => {}, - else => return error.InvalidDebugInfo, - } - - const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize); - if (dir_block_count > superblock.BlockSize / @sizeOf(u32)) - return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment. - - try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr); - const dir_blocks = try allocator.alloc(u32, dir_block_count); - for (dir_blocks) |*b| { - b.* = try in.readInt(u32, .little); - } - var directory = MsfStream.init( - superblock.BlockSize, - file, - dir_blocks, - ); - - const begin = directory.pos; - const stream_count = try directory.reader().readInt(u32, .little); - const stream_sizes = try allocator.alloc(u32, stream_count); - defer allocator.free(stream_sizes); - - // Microsoft's implementation uses @as(u32, -1) for inexistent streams. - // These streams are not used, but still participate in the file - // and must be taken into account when resolving stream indices. - const Nil = 0xFFFFFFFF; - for (stream_sizes) |*s| { - const size = try directory.reader().readInt(u32, .little); - s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize); - } - - const streams = try allocator.alloc(MsfStream, stream_count); - for (streams, 0..) |*stream, i| { - const size = stream_sizes[i]; - if (size == 0) { - stream.* = MsfStream{ - .blocks = &[_]u32{}, - }; - } else { - var blocks = try allocator.alloc(u32, size); - var j: u32 = 0; - while (j < size) : (j += 1) { - const block_id = try directory.reader().readInt(u32, .little); - const n = (block_id % superblock.BlockSize); - // 0 is for SuperBlock, 1 and 2 for FPMs. - if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > file_len) - return error.InvalidBlockIndex; - blocks[j] = block_id; - } - - stream.* = MsfStream.init( - superblock.BlockSize, - file, - blocks, - ); - } - } - - const end = directory.pos; - if (end - begin != superblock.NumDirectoryBytes) - return error.InvalidStreamDirectory; - - return Msf{ - .directory = directory, - .streams = streams, - }; - } - - fn deinit(self: *Msf, allocator: mem.Allocator) void { - allocator.free(self.directory.blocks); - for (self.streams) |*stream| { - allocator.free(stream.blocks); - } - allocator.free(self.streams); - } -}; - -fn blockCountFromSize(size: u32, block_size: u32) u32 { - return (size + block_size - 1) / block_size; -} - // https://llvm.org/docs/PDB/MsfFile.html#the-superblock pub const SuperBlock = extern struct { /// The LLVM docs list a space between C / C++ but empirically this is not the case. @@ -1016,82 +510,3 @@ pub const SuperBlock = extern struct { // implement it so we're kind of safe making this assumption for now. BlockMapAddr: u32, }; - -const MsfStream = struct { - in_file: File = undefined, - pos: u64 = undefined, - blocks: []u32 = undefined, - block_size: u32 = undefined, - - pub const Error = @typeInfo(@typeInfo(@TypeOf(read)).Fn.return_type.?).ErrorUnion.error_set; - - fn init(block_size: u32, file: File, blocks: []u32) MsfStream { - const stream = MsfStream{ - .in_file = file, - .pos = 0, - .blocks = blocks, - .block_size = block_size, - }; - - return stream; - } - - fn read(self: *MsfStream, buffer: []u8) !usize { - var block_id = @as(usize, @intCast(self.pos / self.block_size)); - if (block_id >= self.blocks.len) return 0; // End of Stream - var block = self.blocks[block_id]; - var offset = self.pos % self.block_size; - - try self.in_file.seekTo(block * self.block_size + offset); - const in = self.in_file.reader(); - - var size: usize = 0; - var rem_buffer = buffer; - while (size < buffer.len) { - const size_to_read = @min(self.block_size - offset, rem_buffer.len); - size += try in.read(rem_buffer[0..size_to_read]); - rem_buffer = buffer[size..]; - offset += size_to_read; - - // If we're at the end of a block, go to the next one. - if (offset == self.block_size) { - offset = 0; - block_id += 1; - if (block_id >= self.blocks.len) break; // End of Stream - block = self.blocks[block_id]; - try self.in_file.seekTo(block * self.block_size); - } - } - - self.pos += buffer.len; - return buffer.len; - } - - pub fn seekBy(self: *MsfStream, len: i64) !void { - self.pos = @as(u64, @intCast(@as(i64, @intCast(self.pos)) + len)); - if (self.pos >= self.blocks.len * self.block_size) - return error.EOF; - } - - pub fn seekTo(self: *MsfStream, len: u64) !void { - self.pos = len; - if (self.pos >= self.blocks.len * self.block_size) - return error.EOF; - } - - fn getSize(self: *const MsfStream) u64 { - return self.blocks.len * self.block_size; - } - - fn getFilePos(self: MsfStream) u64 { - const block_id = self.pos / self.block_size; - const block = self.blocks[block_id]; - const offset = self.pos % self.block_size; - - return block * self.block_size + offset; - } - - pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) { - return .{ .context = self }; - } -}; |
