diff options
Diffstat (limited to 'lib/std/debug/SelfInfo.zig')
| -rw-r--r-- | lib/std/debug/SelfInfo.zig | 2438 |
1 files changed, 2438 insertions, 0 deletions
diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig new file mode 100644 index 0000000000..c27f466fb3 --- /dev/null +++ b/lib/std/debug/SelfInfo.zig @@ -0,0 +1,2438 @@ +//! Cross-platform abstraction for this binary's own debug information, with a +//! goal of minimal code bloat and compilation speed penalty. + +const builtin = @import("builtin"); +const native_os = builtin.os.tag; +const native_endian = native_arch.endian(); +const native_arch = builtin.cpu.arch; + +const std = @import("../std.zig"); +const mem = std.mem; +const Allocator = std.mem.Allocator; +const windows = std.os.windows; +const macho = std.macho; +const fs = std.fs; +const coff = std.coff; +const pdb = std.pdb; +const assert = std.debug.assert; +const posix = std.posix; +const elf = std.elf; +const Dwarf = std.debug.Dwarf; +const Pdb = std.debug.Pdb; +const File = std.fs.File; +const math = std.math; +const testing = std.testing; +const StackIterator = std.debug.StackIterator; +const regBytes = Dwarf.abi.regBytes; +const regValueNative = Dwarf.abi.regValueNative; + +const SelfInfo = @This(); + +const root = @import("root"); + +allocator: Allocator, +address_map: std.AutoHashMap(usize, *Module), +modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModule) else void, + +pub const OpenError = error{ + MissingDebugInfo, + UnsupportedOperatingSystem, +} || @typeInfo(@typeInfo(@TypeOf(SelfInfo.init)).Fn.return_type.?).ErrorUnion.error_set; + +pub fn open(allocator: Allocator) OpenError!SelfInfo { + nosuspend { + if (builtin.strip_debug_info) + return error.MissingDebugInfo; + switch (native_os) { + .linux, + .freebsd, + .netbsd, + .dragonfly, + .openbsd, + .macos, + .solaris, + .illumos, + .windows, + => return try SelfInfo.init(allocator), + else => return error.UnsupportedOperatingSystem, + } + } +} + +pub fn init(allocator: Allocator) !SelfInfo { + var debug_info: SelfInfo = .{ + .allocator = allocator, + .address_map = std.AutoHashMap(usize, *Module).init(allocator), + .modules = if (native_os == .windows) .{} else {}, + }; + + if (native_os == .windows) { + errdefer debug_info.modules.deinit(allocator); + + const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); + if (handle == windows.INVALID_HANDLE_VALUE) { + switch (windows.GetLastError()) { + else => |err| return windows.unexpectedError(err), + } + } + defer windows.CloseHandle(handle); + + var module_entry: windows.MODULEENTRY32 = undefined; + module_entry.dwSize = @sizeOf(windows.MODULEENTRY32); + if (windows.kernel32.Module32First(handle, &module_entry) == 0) { + return error.MissingDebugInfo; + } + + var module_valid = true; + while (module_valid) { + const module_info = try debug_info.modules.addOne(allocator); + const name = allocator.dupe(u8, mem.sliceTo(&module_entry.szModule, 0)) catch &.{}; + errdefer allocator.free(name); + + module_info.* = .{ + .base_address = @intFromPtr(module_entry.modBaseAddr), + .size = module_entry.modBaseSize, + .name = name, + .handle = module_entry.hModule, + }; + + module_valid = windows.kernel32.Module32Next(handle, &module_entry) == 1; + } + } + + return debug_info; +} + +pub fn deinit(self: *SelfInfo) void { + var it = self.address_map.iterator(); + while (it.next()) |entry| { + const mdi = entry.value_ptr.*; + mdi.deinit(self.allocator); + self.allocator.destroy(mdi); + } + self.address_map.deinit(); + if (native_os == .windows) { + for (self.modules.items) |module| { + self.allocator.free(module.name); + if (module.mapped_file) |mapped_file| mapped_file.deinit(); + } + self.modules.deinit(self.allocator); + } +} + +pub fn getModuleForAddress(self: *SelfInfo, address: usize) !*Module { + if (comptime builtin.target.isDarwin()) { + return self.lookupModuleDyld(address); + } else if (native_os == .windows) { + return self.lookupModuleWin32(address); + } else if (native_os == .haiku) { + return self.lookupModuleHaiku(address); + } else if (comptime builtin.target.isWasm()) { + return self.lookupModuleWasm(address); + } else { + return self.lookupModuleDl(address); + } +} + +// Returns the module name for a given address. +// This can be called when getModuleForAddress fails, so implementations should provide +// a path that doesn't rely on any side-effects of a prior successful module lookup. +pub fn getModuleNameForAddress(self: *SelfInfo, address: usize) ?[]const u8 { + if (comptime builtin.target.isDarwin()) { + return self.lookupModuleNameDyld(address); + } else if (native_os == .windows) { + return self.lookupModuleNameWin32(address); + } else if (native_os == .haiku) { + return null; + } else if (comptime builtin.target.isWasm()) { + return null; + } else { + return self.lookupModuleNameDl(address); + } +} + +fn lookupModuleDyld(self: *SelfInfo, address: usize) !*Module { + const image_count = std.c._dyld_image_count(); + + var i: u32 = 0; + while (i < image_count) : (i += 1) { + const header = std.c._dyld_get_image_header(i) orelse continue; + const base_address = @intFromPtr(header); + if (address < base_address) continue; + const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); + + var it = macho.LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = @alignCast(@as( + [*]u8, + @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), + )[0..header.sizeofcmds]), + }; + + var unwind_info: ?[]const u8 = null; + var eh_frame: ?[]const u8 = null; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => { + const segment_cmd = cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; + + const seg_start = segment_cmd.vmaddr + vmaddr_slide; + const seg_end = seg_start + segment_cmd.vmsize; + if (address >= seg_start and address < seg_end) { + if (self.address_map.get(base_address)) |obj_di| { + return obj_di; + } + + for (cmd.getSections()) |sect| { + if (mem.eql(u8, "__unwind_info", sect.sectName())) { + unwind_info = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; + } else if (mem.eql(u8, "__eh_frame", sect.sectName())) { + eh_frame = @as([*]const u8, @ptrFromInt(sect.addr + vmaddr_slide))[0..sect.size]; + } + } + + const obj_di = try self.allocator.create(Module); + errdefer self.allocator.destroy(obj_di); + + const macho_path = mem.sliceTo(std.c._dyld_get_image_name(i), 0); + const macho_file = fs.cwd().openFile(macho_path, .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + obj_di.* = try readMachODebugInfo(self.allocator, macho_file); + obj_di.base_address = base_address; + obj_di.vmaddr_slide = vmaddr_slide; + obj_di.unwind_info = unwind_info; + obj_di.eh_frame = eh_frame; + + try self.address_map.putNoClobber(base_address, obj_di); + + return obj_di; + } + }, + else => {}, + }; + } + + return error.MissingDebugInfo; +} + +fn lookupModuleNameDyld(self: *SelfInfo, address: usize) ?[]const u8 { + _ = self; + const image_count = std.c._dyld_image_count(); + + var i: u32 = 0; + while (i < image_count) : (i += 1) { + const header = std.c._dyld_get_image_header(i) orelse continue; + const base_address = @intFromPtr(header); + if (address < base_address) continue; + const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); + + var it = macho.LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = @alignCast(@as( + [*]u8, + @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), + )[0..header.sizeofcmds]), + }; + + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => { + const segment_cmd = cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; + + const original_address = address - vmaddr_slide; + const seg_start = segment_cmd.vmaddr; + const seg_end = seg_start + segment_cmd.vmsize; + if (original_address >= seg_start and original_address < seg_end) { + return fs.path.basename(mem.sliceTo(std.c._dyld_get_image_name(i), 0)); + } + }, + else => {}, + }; + } + + return null; +} + +fn lookupModuleWin32(self: *SelfInfo, address: usize) !*Module { + for (self.modules.items) |*module| { + if (address >= module.base_address and address < module.base_address + module.size) { + if (self.address_map.get(module.base_address)) |obj_di| { + return obj_di; + } + + const obj_di = try self.allocator.create(Module); + errdefer self.allocator.destroy(obj_di); + + const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; + var coff_obj = try coff.Coff.init(mapped_module, true); + + // The string table is not mapped into memory by the loader, so if a section name is in the + // string table then we have to map the full image file from disk. This can happen when + // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. + if (coff_obj.strtabRequired()) { + var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; + // openFileAbsoluteW requires the prefix to be present + @memcpy(name_buffer[0..4], &[_]u16{ '\\', '?', '?', '\\' }); + + const process_handle = windows.GetCurrentProcess(); + const len = windows.kernel32.GetModuleFileNameExW( + process_handle, + module.handle, + @ptrCast(&name_buffer[4]), + windows.PATH_MAX_WIDE, + ); + + if (len == 0) return error.MissingDebugInfo; + const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + errdefer coff_file.close(); + + var section_handle: windows.HANDLE = undefined; + const create_section_rc = windows.ntdll.NtCreateSection( + §ion_handle, + windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, + null, + null, + windows.PAGE_READONLY, + // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. + // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. + windows.SEC_COMMIT, + coff_file.handle, + ); + if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer windows.CloseHandle(section_handle); + + var coff_len: usize = 0; + var base_ptr: usize = 0; + const map_section_rc = windows.ntdll.NtMapViewOfSection( + section_handle, + process_handle, + @ptrCast(&base_ptr), + null, + 0, + null, + &coff_len, + .ViewUnmap, + 0, + windows.PAGE_READONLY, + ); + if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrFromInt(base_ptr)) == .SUCCESS); + + const section_view = @as([*]const u8, @ptrFromInt(base_ptr))[0..coff_len]; + coff_obj = try coff.Coff.init(section_view, false); + + module.mapped_file = .{ + .file = coff_file, + .section_handle = section_handle, + .section_view = section_view, + }; + } + errdefer if (module.mapped_file) |mapped_file| mapped_file.deinit(); + + obj_di.* = try readCoffDebugInfo(self.allocator, &coff_obj); + obj_di.base_address = module.base_address; + + try self.address_map.putNoClobber(module.base_address, obj_di); + return obj_di; + } + } + + return error.MissingDebugInfo; +} + +fn lookupModuleNameWin32(self: *SelfInfo, address: usize) ?[]const u8 { + for (self.modules.items) |module| { + if (address >= module.base_address and address < module.base_address + module.size) { + return module.name; + } + } + return null; +} + +fn lookupModuleNameDl(self: *SelfInfo, address: usize) ?[]const u8 { + _ = self; + + var ctx: struct { + // Input + address: usize, + // Output + name: []const u8 = "", + } = .{ .address = address }; + const CtxTy = @TypeOf(ctx); + + if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { + fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { + _ = size; + if (context.address < info.addr) return; + const phdrs = info.phdr[0..info.phnum]; + for (phdrs) |*phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + + const seg_start = info.addr +% phdr.p_vaddr; + const seg_end = seg_start + phdr.p_memsz; + if (context.address >= seg_start and context.address < seg_end) { + context.name = mem.sliceTo(info.name, 0) orelse ""; + break; + } + } else return; + + return error.Found; + } + }.callback)) { + return null; + } else |err| switch (err) { + error.Found => return fs.path.basename(ctx.name), + } + + return null; +} + +fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { + var ctx: struct { + // Input + address: usize, + // Output + base_address: usize = undefined, + name: []const u8 = undefined, + build_id: ?[]const u8 = null, + gnu_eh_frame: ?[]const u8 = null, + } = .{ .address = address }; + const CtxTy = @TypeOf(ctx); + + if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { + fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { + _ = size; + // The base address is too high + if (context.address < info.addr) + return; + + const phdrs = info.phdr[0..info.phnum]; + for (phdrs) |*phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + + // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 + const seg_start = info.addr +% phdr.p_vaddr; + const seg_end = seg_start + phdr.p_memsz; + if (context.address >= seg_start and context.address < seg_end) { + // Android libc uses NULL instead of an empty string to mark the + // main program + context.name = mem.sliceTo(info.name, 0) orelse ""; + context.base_address = info.addr; + break; + } + } else return; + + for (info.phdr[0..info.phnum]) |phdr| { + switch (phdr.p_type) { + elf.PT_NOTE => { + // Look for .note.gnu.build-id + const note_bytes = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; + const name_size = mem.readInt(u32, note_bytes[0..4], native_endian); + if (name_size != 4) continue; + const desc_size = mem.readInt(u32, note_bytes[4..8], native_endian); + const note_type = mem.readInt(u32, note_bytes[8..12], native_endian); + if (note_type != elf.NT_GNU_BUILD_ID) continue; + if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; + context.build_id = note_bytes[16..][0..desc_size]; + }, + elf.PT_GNU_EH_FRAME => { + context.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; + }, + else => {}, + } + } + + // Stop the iteration + return error.Found; + } + }.callback)) { + return error.MissingDebugInfo; + } else |err| switch (err) { + error.Found => {}, + } + + if (self.address_map.get(ctx.base_address)) |obj_di| { + return obj_di; + } + + const obj_di = try self.allocator.create(Module); + errdefer self.allocator.destroy(obj_di); + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + if (ctx.gnu_eh_frame) |eh_frame_hdr| { + // This is a special case - pointer offsets inside .eh_frame_hdr + // are encoded relative to its base address, so we must use the + // version that is already memory mapped, and not the one that + // will be mapped separately from the ELF file. + sections[@intFromEnum(Dwarf.Section.Id.eh_frame_hdr)] = .{ + .data = eh_frame_hdr, + .owned = false, + }; + } + + obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); + obj_di.base_address = ctx.base_address; + + // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding + obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; + + try self.address_map.putNoClobber(ctx.base_address, obj_di); + + return obj_di; +} + +fn lookupModuleHaiku(self: *SelfInfo, address: usize) !*Module { + _ = self; + _ = address; + @panic("TODO implement lookup module for Haiku"); +} + +fn lookupModuleWasm(self: *SelfInfo, address: usize) !*Module { + _ = self; + _ = address; + @panic("TODO implement lookup module for Wasm"); +} + +pub const Module = switch (native_os) { + .macos, .ios, .watchos, .tvos, .visionos => struct { + base_address: usize, + vmaddr_slide: usize, + mapped_memory: []align(mem.page_size) const u8, + symbols: []const MachoSymbol, + strings: [:0]const u8, + ofiles: OFileTable, + + // Backed by the in-memory sections mapped by the loader + unwind_info: ?[]const u8 = null, + eh_frame: ?[]const u8 = null, + + const OFileTable = std.StringHashMap(OFileInfo); + const OFileInfo = struct { + di: Dwarf, + addr_table: std.StringHashMap(u64), + }; + + pub fn deinit(self: *@This(), allocator: Allocator) void { + var it = self.ofiles.iterator(); + while (it.next()) |entry| { + const ofile = entry.value_ptr; + ofile.di.deinit(allocator); + ofile.addr_table.deinit(); + } + self.ofiles.deinit(); + allocator.free(self.symbols); + posix.munmap(self.mapped_memory); + } + + fn loadOFile(self: *@This(), allocator: Allocator, o_file_path: []const u8) !*OFileInfo { + const o_file = try fs.cwd().openFile(o_file_path, .{}); + const mapped_mem = try mapWholeFile(o_file); + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != std.macho.MH_MAGIC_64) + return error.InvalidDebugInfo; + + var segcmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtabcmd: ?macho.symtab_command = null; + var it = macho.LoadCommandIterator{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => segcmd = cmd, + .SYMTAB => symtabcmd = cmd.cast(macho.symtab_command).?, + else => {}, + }; + + if (segcmd == null or symtabcmd == null) return error.MissingDebugInfo; + + // Parse symbols + const strtab = @as( + [*]const u8, + @ptrCast(&mapped_mem[symtabcmd.?.stroff]), + )[0 .. symtabcmd.?.strsize - 1 :0]; + const symtab = @as( + [*]const macho.nlist_64, + @ptrCast(@alignCast(&mapped_mem[symtabcmd.?.symoff])), + )[0..symtabcmd.?.nsyms]; + + // TODO handle tentative (common) symbols + var addr_table = std.StringHashMap(u64).init(allocator); + try addr_table.ensureTotalCapacity(@as(u32, @intCast(symtab.len))); + for (symtab) |sym| { + if (sym.n_strx == 0) continue; + if (sym.undf() or sym.tentative() or sym.abs()) continue; + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + // TODO is it possible to have a symbol collision? + addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); + } + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + if (self.eh_frame) |eh_frame| sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ + .data = eh_frame, + .owned = false, + }; + + for (segcmd.?.getSections()) |sect| { + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + + var section_index: ?usize = null; + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; + } + if (section_index == null) continue; + + const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); + sections[section_index.?] = .{ + .data = section_bytes, + .virtual_address = sect.addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + if (missing_debug_info) return error.MissingDebugInfo; + + var di = Dwarf{ + .endian = .little, + .sections = sections, + .is_macho = true, + }; + + try Dwarf.open(&di, allocator); + const info = OFileInfo{ + .di = di, + .addr_table = addr_table, + }; + + // Add the debug info to the cache + const result = try self.ofiles.getOrPut(o_file_path); + assert(!result.found_existing); + result.value_ptr.* = info; + + return result.value_ptr; + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + nosuspend { + const result = try self.getOFileInfoForAddress(allocator, address); + if (result.symbol == null) return .{}; + + // Take the symbol name from the N_FUN STAB entry, we're going to + // use it if we fail to find the DWARF infos + const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); + if (result.o_file_info == null) return .{ .symbol_name = stab_symbol }; + + // Translate again the address, this time into an address inside the + // .o file + const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ + .symbol_name = "???", + }; + + const addr_off = result.relocated_address - result.symbol.?.addr; + const o_file_di = &result.o_file_info.?.di; + if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { + return SymbolInfo{ + .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString( + o_file_di, + std.dwarf.AT.name, + o_file_di.section(.debug_str), + compile_unit.*, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .line_info = o_file_di.getLineNumberInfo( + allocator, + compile_unit.*, + relocated_address_o + addr_off, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + return SymbolInfo{ .symbol_name = stab_symbol }; + }, + else => return err, + } + } + } + + pub fn getOFileInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !struct { + relocated_address: usize, + symbol: ?*const MachoSymbol = null, + o_file_info: ?*OFileInfo = null, + } { + nosuspend { + // Translate the VA into an address into this object + const relocated_address = address - self.vmaddr_slide; + + // Find the .o file where this symbol is defined + const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ + .relocated_address = relocated_address, + }; + + // Check if its debug infos are already in the cache + const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); + const o_file_info = self.ofiles.getPtr(o_file_path) orelse + (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { + error.FileNotFound, + error.MissingDebugInfo, + error.InvalidDebugInfo, + => return .{ + .relocated_address = relocated_address, + .symbol = symbol, + }, + else => return err, + }); + + return .{ + .relocated_address = relocated_address, + .symbol = symbol, + .o_file_info = o_file_info, + }; + } + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + return if ((try self.getOFileInfoForAddress(allocator, address)).o_file_info) |o_file_info| &o_file_info.di else null; + } + }, + .uefi, .windows => struct { + base_address: usize, + pdb: ?Pdb = null, + dwarf: ?Dwarf = null, + coff_image_base: u64, + + /// Only used if pdb is non-null + coff_section_headers: []coff.SectionHeader, + + pub fn deinit(self: *@This(), allocator: Allocator) void { + if (self.dwarf) |*dwarf| { + dwarf.deinit(allocator); + } + + if (self.pdb) |*p| { + p.deinit(); + allocator.free(self.coff_section_headers); + } + } + + fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?SymbolInfo { + var coff_section: *align(1) const coff.SectionHeader = undefined; + const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { + if (sect_contrib.Section > self.coff_section_headers.len) continue; + // Remember that SectionContribEntry.Section is 1-based. + coff_section = &self.coff_section_headers[sect_contrib.Section - 1]; + + const vaddr_start = coff_section.virtual_address + sect_contrib.Offset; + const vaddr_end = vaddr_start + sect_contrib.Size; + if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { + break sect_contrib.ModuleIndex; + } + } else { + // we have no information to add to the address + return null; + }; + + const module = (try self.pdb.?.getModule(mod_index)) orelse + return error.InvalidDebugInfo; + const obj_basename = fs.path.basename(module.obj_file_name); + + const symbol_name = self.pdb.?.getSymbolName( + module, + relocated_address - coff_section.virtual_address, + ) orelse "???"; + const opt_line_info = try self.pdb.?.getLineNumberInfo( + module, + relocated_address - coff_section.virtual_address, + ); + + return SymbolInfo{ + .symbol_name = symbol_name, + .compile_unit_name = obj_basename, + .line_info = opt_line_info, + }; + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + // Translate the VA into an address into this object + const relocated_address = address - self.base_address; + + if (self.pdb != null) { + if (try self.getSymbolFromPdb(relocated_address)) |symbol| return symbol; + } + + if (self.dwarf) |*dwarf| { + const dwarf_address = relocated_address + self.coff_image_base; + return getSymbolFromDwarf(allocator, dwarf_address, dwarf); + } + + return SymbolInfo{}; + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = allocator; + _ = address; + + return switch (self.debug_data) { + .dwarf => |*dwarf| dwarf, + else => null, + }; + } + }, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { + base_address: usize, + dwarf: Dwarf, + mapped_memory: []align(mem.page_size) const u8, + external_mapped_memory: ?[]align(mem.page_size) const u8, + + pub fn deinit(self: *@This(), allocator: Allocator) void { + self.dwarf.deinit(allocator); + posix.munmap(self.mapped_memory); + if (self.external_mapped_memory) |m| posix.munmap(m); + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + // Translate the VA into an address into this object + const relocated_address = address - self.base_address; + return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = allocator; + _ = address; + return &self.dwarf; + } + }, + .wasi, .emscripten => struct { + pub fn deinit(self: *@This(), allocator: Allocator) void { + _ = self; + _ = allocator; + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + _ = self; + _ = allocator; + _ = address; + return SymbolInfo{}; + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = self; + _ = allocator; + _ = address; + return null; + } + }, + else => Dwarf, +}; + +/// How is this different than `Module` when the host is Windows? +/// Why are both stored in the `SelfInfo` struct? +/// Boy, it sure would be nice if someone added documentation comments for this +/// struct explaining it. +pub const WindowsModule = struct { + base_address: usize, + size: u32, + name: []const u8, + handle: windows.HMODULE, + + // Set when the image file needed to be mapped from disk + mapped_file: ?struct { + file: File, + section_handle: windows.HANDLE, + section_view: []const u8, + + pub fn deinit(self: @This()) void { + const process_handle = windows.GetCurrentProcess(); + assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(@ptrCast(self.section_view.ptr))) == .SUCCESS); + windows.CloseHandle(self.section_handle); + self.file.close(); + } + } = null, +}; + +/// This takes ownership of macho_file: users of this function should not close +/// it themselves, even on error. +/// TODO it's weird to take ownership even on error, rework this code. +fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { + const mapped_mem = try mapWholeFile(macho_file); + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != macho.MH_MAGIC_64) + return error.InvalidDebugInfo; + + var it = macho.LoadCommandIterator{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => break cmd.cast(macho.symtab_command).?, + else => {}, + } else return error.MissingDebugInfo; + + const syms = @as( + [*]const macho.nlist_64, + @ptrCast(@alignCast(&mapped_mem[symtab.symoff])), + )[0..symtab.nsyms]; + const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; + + const symbols_buf = try allocator.alloc(MachoSymbol, syms.len); + + var ofile: u32 = undefined; + var last_sym: MachoSymbol = undefined; + var symbol_index: usize = 0; + var state: enum { + init, + oso_open, + oso_close, + bnsym, + fun_strx, + fun_size, + ensym, + } = .init; + + for (syms) |*sym| { + if (!sym.stab()) continue; + + // TODO handle globals N_GSYM, and statics N_STSYM + switch (sym.n_type) { + macho.N_OSO => { + switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_BNSYM => { + switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .size = 0, + .ofile = ofile, + }; + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_FUN => { + switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + last_sym.size = @as(u32, @intCast(sym.n_value)); + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_ENSYM => { + switch (state) { + .fun_size => { + state = .ensym; + symbols_buf[symbol_index] = last_sym; + symbol_index += 1; + }, + else => return error.InvalidDebugInfo, + } + }, + macho.N_SO => { + switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidDebugInfo, + } + }, + else => {}, + } + } + + switch (state) { + .init => return error.MissingDebugInfo, + .oso_close => {}, + else => return error.InvalidDebugInfo, + } + + const symbols = try allocator.realloc(symbols_buf, symbol_index); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + mem.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); + + return .{ + .base_address = undefined, + .vmaddr_slide = undefined, + .mapped_memory = mapped_mem, + .ofiles = Module.OFileTable.init(allocator), + .symbols = symbols, + .strings = strings, + }; +} + +fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { + nosuspend { + var di: Module = .{ + .base_address = undefined, + .coff_image_base = coff_obj.getImageBase(), + .coff_section_headers = undefined, + }; + + if (coff_obj.getSectionByName(".debug_info")) |_| { + // This coff file has embedded DWARF debug info + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); + + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { + sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { + break :blk .{ + .data = try coff_obj.getSectionDataAlloc(section_header, allocator), + .virtual_address = section_header.virtual_address, + .owned = true, + }; + } else null; + } + + var dwarf = Dwarf{ + .endian = native_endian, + .sections = sections, + .is_macho = false, + }; + + try Dwarf.open(&dwarf, allocator); + di.dwarf = dwarf; + } + + const raw_path = try coff_obj.getPdbPath() orelse return di; + const path = blk: { + if (fs.path.isAbsolute(raw_path)) { + break :blk raw_path; + } else { + const self_dir = try fs.selfExeDirPathAlloc(allocator); + defer allocator.free(self_dir); + break :blk try fs.path.join(allocator, &.{ self_dir, raw_path }); + } + }; + defer if (path.ptr != raw_path.ptr) allocator.free(path); + + di.pdb = Pdb.init(allocator, path) catch |err| switch (err) { + error.FileNotFound, error.IsDir => { + if (di.dwarf == null) return error.MissingDebugInfo; + return di; + }, + else => return err, + }; + try di.pdb.?.parseInfoStream(); + try di.pdb.?.parseDbiStream(); + + if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) + return error.InvalidDebugInfo; + + // Only used by the pdb path + di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(allocator); + errdefer allocator.free(di.coff_section_headers); + + return di; + } +} + +/// Reads debug info from an ELF file, or the current binary if none in specified. +/// If the required sections aren't present but a reference to external debug info is, +/// then this this function will recurse to attempt to load the debug sections from +/// an external file. +pub fn readElfDebugInfo( + allocator: Allocator, + elf_filename: ?[]const u8, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(mem.page_size) const u8, +) !Module { + nosuspend { + const elf_file = (if (elf_filename) |filename| blk: { + break :blk fs.cwd().openFile(filename, .{}); + } else fs.openSelfExe(.{})) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return err, + }; + + const mapped_mem = try mapWholeFile(elf_file); + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; + + const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); + if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; + if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; + + const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { + elf.ELFDATA2LSB => .little, + elf.ELFDATA2MSB => .big, + else => return error.InvalidElfEndian, + }; + assert(endian == native_endian); // this is our own debug info + + const shoff = hdr.e_shoff; + const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); + const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow])); + const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; + const shdrs = @as( + [*]const elf.Shdr, + @ptrCast(@alignCast(&mapped_mem[shoff])), + )[0..hdr.e_shnum]; + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + + // Combine section list. This takes ownership over any owned sections from the parent scope. + for (parent_sections, §ions) |*parent, *section| { + if (parent.*) |*p| { + section.* = p.*; + p.owned = false; + } + } + errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); + + var separate_debug_filename: ?[]const u8 = null; + var separate_debug_crc: ?u32 = null; + + for (shdrs) |*shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); + + if (mem.eql(u8, name, ".gnu_debuglink")) { + const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); + const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); + const crc_bytes = gnu_debuglink[crc_offset..][0..4]; + separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); + separate_debug_filename = debug_filename; + continue; + } + + var section_index: ?usize = null; + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { + if (mem.eql(u8, "." ++ section.name, name)) section_index = i; + } + if (section_index == null) continue; + if (sections[section_index.?] != null) continue; + + const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { + var section_stream = std.io.fixedBufferStream(section_bytes); + var section_reader = section_stream.reader(); + const chdr = section_reader.readStruct(elf.Chdr) catch continue; + if (chdr.ch_type != .ZLIB) continue; + + var zlib_stream = std.compress.zlib.decompressor(section_stream.reader()); + + const decompressed_section = try allocator.alloc(u8, chdr.ch_size); + errdefer allocator.free(decompressed_section); + + const read = zlib_stream.reader().readAll(decompressed_section) catch continue; + assert(read == decompressed_section.len); + + break :blk .{ + .data = decompressed_section, + .virtual_address = shdr.sh_addr, + .owned = true, + }; + } else .{ + .data = section_bytes, + .virtual_address = shdr.sh_addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + + // Attempt to load debug info from an external file + // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html + if (missing_debug_info) { + + // Only allow one level of debug info nesting + if (parent_mapped_mem) |_| { + return error.MissingDebugInfo; + } + + const global_debug_directories = [_][]const u8{ + "/usr/lib/debug", + }; + + // <global debug directory>/.build-id/<2-character id prefix>/<id remainder>.debug + if (build_id) |id| blk: { + if (id.len < 3) break :blk; + + // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice + const extension = ".debug"; + var id_prefix_buf: [2]u8 = undefined; + var filename_buf: [38 + extension.len]u8 = undefined; + + _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; + const filename = std.fmt.bufPrint( + &filename_buf, + "{s}" ++ extension, + .{std.fmt.fmtSliceHexLower(id[1..])}, + ) catch break :blk; + + for (global_debug_directories) |global_directory| { + const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); + defer allocator.free(path); + + return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; + } + } + + // use the path from .gnu_debuglink, in the same search order as gdb + if (separate_debug_filename) |separate_filename| blk: { + if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; + + // <cwd>/<gnu_debuglink> + if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + + // <cwd>/.debug/<gnu_debuglink> + { + const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); + defer allocator.free(path); + + if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + + var cwd_buf: [fs.max_path_bytes]u8 = undefined; + const cwd_path = posix.realpath(".", &cwd_buf) catch break :blk; + + // <global debug directory>/<absolute folder of current binary>/<gnu_debuglink> + for (global_debug_directories) |global_directory| { + const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); + defer allocator.free(path); + if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + } + + return error.MissingDebugInfo; + } + + var di = Dwarf{ + .endian = endian, + .sections = sections, + .is_macho = false, + }; + + try Dwarf.open(&di, allocator); + + return .{ + .base_address = undefined, + .dwarf = di, + .mapped_memory = parent_mapped_mem orelse mapped_mem, + .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, + }; + } +} + +const MachoSymbol = struct { + strx: u32, + addr: u64, + size: u32, + ofile: u32, + + /// Returns the address from the macho file + fn address(self: MachoSymbol) u64 { + return self.addr; + } + + fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { + _ = context; + return lhs.addr < rhs.addr; + } +}; + +/// Takes ownership of file, even on error. +/// TODO it's weird to take ownership even on error, rework this code. +fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 { + nosuspend { + defer file.close(); + + const file_len = math.cast(usize, try file.getEndPos()) orelse math.maxInt(usize); + const mapped_mem = try posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); + errdefer posix.munmap(mapped_mem); + + return mapped_mem; + } +} + +fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { + const start = math.cast(usize, offset) orelse return error.Overflow; + const end = start + (math.cast(usize, size) orelse return error.Overflow); + return ptr[start..end]; +} + +pub const SymbolInfo = struct { + symbol_name: []const u8 = "???", + compile_unit_name: []const u8 = "???", + line_info: ?std.debug.SourceLocation = null, + + pub fn deinit(self: SymbolInfo, allocator: Allocator) void { + if (self.line_info) |li| allocator.free(li.file_name); + } +}; + +fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { + var min: usize = 0; + var max: usize = symbols.len - 1; + while (min < max) { + const mid = min + (max - min) / 2; + const curr = &symbols[mid]; + const next = &symbols[mid + 1]; + if (address >= next.address()) { + min = mid + 1; + } else if (address < curr.address()) { + max = mid; + } else { + return curr; + } + } + + const max_sym = &symbols[symbols.len - 1]; + if (address >= max_sym.address()) + return max_sym; + + return null; +} + +test machoSearchSymbols { + const symbols = [_]MachoSymbol{ + .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, + }; + + try testing.expectEqual(null, machoSearchSymbols(&symbols, 0)); + try testing.expectEqual(null, machoSearchSymbols(&symbols, 99)); + try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 100).?); + try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 150).?); + try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 199).?); + + try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 200).?); + try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 250).?); + try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 299).?); + + try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 300).?); + try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 301).?); + try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); +} + +fn getSymbolFromDwarf(allocator: Allocator, address: u64, di: *Dwarf) !SymbolInfo { + if (nosuspend di.findCompileUnit(address)) |compile_unit| { + return SymbolInfo{ + .symbol_name = nosuspend di.getSymbolName(address) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + return SymbolInfo{}; + }, + else => return err, + } +} + +/// Unwind a frame using MachO compact unwind info (from __unwind_info). +/// If the compact encoding can't encode a way to unwind a frame, it will +/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. +pub fn unwindFrameMachO( + context: *UnwindContext, + ma: *std.debug.MemoryAccessor, + unwind_info: []const u8, + eh_frame: ?[]const u8, + module_base_address: usize, +) !usize { + const header = std.mem.bytesAsValue( + macho.unwind_info_section_header, + unwind_info[0..@sizeOf(macho.unwind_info_section_header)], + ); + const indices = std.mem.bytesAsSlice( + macho.unwind_info_section_header_index_entry, + unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry)], + ); + if (indices.len == 0) return error.MissingUnwindInfo; + + const mapped_pc = context.pc - module_base_address; + const second_level_index = blk: { + var left: usize = 0; + var len: usize = indices.len; + + while (len > 1) { + const mid = left + len / 2; + const offset = indices[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + // Last index is a sentinel containing the highest address as its functionOffset + if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; + break :blk &indices[left]; + }; + + const common_encodings = std.mem.bytesAsSlice( + macho.compact_unwind_encoding_t, + unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t)], + ); + + const start_offset = second_level_index.secondLevelPagesSectionOffset; + const kind = std.mem.bytesAsValue( + macho.UNWIND_SECOND_LEVEL, + unwind_info[start_offset..][0..@sizeOf(macho.UNWIND_SECOND_LEVEL)], + ); + + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { + .REGULAR => blk: { + const page_header = std.mem.bytesAsValue( + macho.unwind_info_regular_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_regular_second_level_page_header)], + ); + + const entries = std.mem.bytesAsSlice( + macho.unwind_info_regular_second_level_entry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = entries[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + break :blk .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; + }, + .COMPRESSED => blk: { + const page_header = std.mem.bytesAsValue( + macho.unwind_info_compressed_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_compressed_second_level_page_header)], + ); + + const entries = std.mem.bytesAsSlice( + macho.UnwindInfoCompressedEntry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = second_level_index.functionOffset + entries[mid].funcOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + const entry = entries[left]; + const function_offset = second_level_index.functionOffset + entry.funcOffset; + if (entry.encodingIndex < header.commonEncodingsArrayCount) { + if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; + } else { + const local_index = try math.sub( + u8, + entry.encodingIndex, + math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, + ); + const local_encodings = std.mem.bytesAsSlice( + macho.compact_unwind_encoding_t, + unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t)], + ); + if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; + } + }, + else => return error.InvalidUnwindInfo, + }; + + if (entry.raw_encoding == 0) return error.NoUnwindInfo; + const reg_context = Dwarf.abi.RegisterContext{ + .eh_frame = false, + .is_macho = true, + }; + + const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnimplementedUnwindEncoding, + .RBP_FRAME => blk: { + const regs: [5]u3 = .{ + encoding.value.x86_64.frame.reg0, + encoding.value.x86_64.frame.reg1, + encoding.value.x86_64.frame.reg2, + encoding.value.x86_64.frame.reg3, + encoding.value.x86_64.frame.reg4, + }; + + const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); + var max_reg: usize = 0; + inline for (regs, 0..) |reg, i| { + if (reg > 0) max_reg = i; + } + + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 2 * @sizeOf(usize); + + // Verify the stack range we're about to read register values from + if (ma.load(usize, new_sp) == null or ma.load(usize, fp - frame_offset + max_reg * @sizeOf(usize)) == null) return error.InvalidUnwindInfo; + + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame_offset + i * @sizeOf(usize); + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + } + + break :blk new_ip; + }, + .STACK_IMMD, + .STACK_IND, + => blk: { + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) + @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) + else stack_size: { + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + module_base_address + + entry.function_offset + + encoding.value.x86_64.frameless.stack.indirect.sub_offset; + if (ma.load(usize, sub_offset_addr) == null) return error.InvalidUnwindInfo; + + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); + }; + + // Decode the Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h + + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = encoding.value.x86_64.frameless.stack_reg_count; + const ip_ptr = if (reg_count > 0) reg_blk: { + var digits: [6]u3 = undefined; + var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; + var registers: [reg_numbers.len]u3 = undefined; + var used_indices = [_]bool{false} ** reg_numbers.len; + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + + registers[i] = reg_numbers[unused_index]; + used_indices[unused_index] = true; + } + + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); + if (ma.load(usize, reg_addr) == null) return error.InvalidUnwindInfo; + for (0..reg_count) |i| { + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :reg_blk reg_addr; + } else sp + stack_size - @sizeOf(usize); + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; + + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :blk new_ip; + }, + .DWARF => { + return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); + }, + }, + .aarch64 => switch (encoding.mode.arm64) { + .OLD => return error.UnimplementedUnwindEncoding, + .FRAMELESS => blk: { + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; + if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + break :blk new_ip; + }, + .DWARF => { + return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); + }, + .FRAME => blk: { + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 16; + const ip_ptr = fp + @sizeOf(usize); + + const num_restored_pairs: usize = + @popCount(@as(u5, @bitCast(encoding.value.arm64.frame.x_reg_pairs))) + + @popCount(@as(u4, @bitCast(encoding.value.arm64.frame.d_reg_pairs))); + const min_reg_addr = fp - num_restored_pairs * 2 * @sizeOf(usize); + + if (ma.load(usize, new_sp) == null or ma.load(usize, min_reg_addr) == null) return error.InvalidUnwindInfo; + + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { + (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } + + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + @memcpy( + try regBytes(context.thread_context, 64 + 8 + i, context.reg_context), + std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + @memcpy( + try regBytes(context.thread_context, 64 + 9 + i, context.reg_context), + std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + } + } + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :blk new_ip; + }, + }, + else => return error.UnimplementedArch, + }; + + context.pc = stripInstructionPtrAuthCode(new_ip); + if (context.pc > 0) context.pc -= 1; + return new_ip; +} + +pub const UnwindContext = struct { + allocator: Allocator, + cfa: ?usize, + pc: usize, + thread_context: *std.debug.ThreadContext, + reg_context: Dwarf.abi.RegisterContext, + vm: VirtualMachine, + stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), + + pub fn init( + allocator: Allocator, + thread_context: *std.debug.ThreadContext, + ) !UnwindContext { + comptime assert(supports_unwinding); + + const pc = stripInstructionPtrAuthCode( + (try regValueNative(thread_context, ip_reg_num, null)).*, + ); + + const context_copy = try allocator.create(std.debug.ThreadContext); + std.debug.copyContext(thread_context, context_copy); + + return .{ + .allocator = allocator, + .cfa = null, + .pc = pc, + .thread_context = context_copy, + .reg_context = undefined, + .vm = .{}, + .stack_machine = .{}, + }; + } + + pub fn deinit(self: *UnwindContext) void { + self.vm.deinit(self.allocator); + self.stack_machine.deinit(self.allocator); + self.allocator.destroy(self.thread_context); + self.* = undefined; + } + + pub fn getFp(self: *const UnwindContext) !usize { + return (try regValueNative(self.thread_context, fpRegNum(self.reg_context), self.reg_context)).*; + } +}; + +/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. +/// This function clears these signature bits to make the pointer usable. +pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { + if (native_arch == .aarch64) { + // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) + // The save / restore is because `xpaclri` operates on x30 (LR) + return asm ( + \\mov x16, x30 + \\mov x30, x15 + \\hint 0x07 + \\mov x15, x30 + \\mov x30, x16 + : [ret] "={x15}" (-> usize), + : [ptr] "{x15}" (ptr), + : "x16" + ); + } + + return ptr; +} + +/// Unwind a stack frame using DWARF unwinding info, updating the register context. +/// +/// If `.eh_frame_hdr` is available, it will be used to binary search for the FDE. +/// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. +/// +/// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info +/// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. +pub fn unwindFrameDwarf( + di: *const Dwarf, + context: *UnwindContext, + ma: *std.debug.MemoryAccessor, + explicit_fde_offset: ?usize, +) !usize { + if (!supports_unwinding) return error.UnsupportedCpuArchitecture; + if (context.pc == 0) return 0; + + // Find the FDE and CIE + var cie: Dwarf.CommonInformationEntry = undefined; + var fde: Dwarf.FrameDescriptionEntry = undefined; + + if (explicit_fde_offset) |fde_offset| { + const dwarf_section: Dwarf.Section.Id = .eh_frame; + const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; + if (fde_offset >= frame_section.len) return error.MissingFDE; + + var fbr: std.debug.DeprecatedFixedBufferReader = .{ + .buf = frame_section, + .pos = fde_offset, + .endian = di.endian, + }; + + const fde_entry_header = try Dwarf.EntryHeader.read(&fbr, null, dwarf_section); + if (fde_entry_header.type != .fde) return error.MissingFDE; + + const cie_offset = fde_entry_header.type.fde; + try fbr.seekTo(cie_offset); + + fbr.endian = native_endian; + const cie_entry_header = try Dwarf.EntryHeader.read(&fbr, null, dwarf_section); + if (cie_entry_header.type != .cie) return Dwarf.bad(); + + cie = try Dwarf.CommonInformationEntry.parse( + cie_entry_header.entry_bytes, + 0, + true, + cie_entry_header.format, + dwarf_section, + cie_entry_header.length_offset, + @sizeOf(usize), + native_endian, + ); + + fde = try Dwarf.FrameDescriptionEntry.parse( + fde_entry_header.entry_bytes, + 0, + true, + cie, + @sizeOf(usize), + native_endian, + ); + } else if (di.eh_frame_hdr) |header| { + const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; + try header.findEntry( + ma, + eh_frame_len, + @intFromPtr(di.section(.eh_frame_hdr).?.ptr), + context.pc, + &cie, + &fde, + ); + } else { + const index = std.sort.binarySearch(Dwarf.FrameDescriptionEntry, context.pc, di.fde_list.items, {}, struct { + pub fn compareFn(_: void, pc: usize, mid_item: Dwarf.FrameDescriptionEntry) std.math.Order { + if (pc < mid_item.pc_begin) return .lt; + + const range_end = mid_item.pc_begin + mid_item.pc_range; + if (pc < range_end) return .eq; + + return .gt; + } + }.compareFn); + + fde = if (index) |i| di.fde_list.items[i] else return error.MissingFDE; + cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; + } + + var expression_context: Dwarf.expression.Context = .{ + .format = cie.format, + .memory_accessor = ma, + .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, + .thread_context = context.thread_context, + .reg_context = context.reg_context, + .cfa = context.cfa, + }; + + context.vm.reset(); + context.reg_context.eh_frame = cie.version != 4; + context.reg_context.is_macho = di.is_macho; + + const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); + context.cfa = switch (row.cfa.rule) { + .val_offset => |offset| blk: { + const register = row.cfa.register orelse return error.InvalidCFARule; + const value = mem.readInt(usize, (try regBytes(context.thread_context, register, context.reg_context))[0..@sizeOf(usize)], native_endian); + break :blk try applyOffset(value, offset); + }, + .expression => |expr| blk: { + context.stack_machine.reset(); + const value = try context.stack_machine.run( + expr, + context.allocator, + expression_context, + context.cfa, + ); + + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + }, + else => return error.InvalidCFARule, + }; + + if (ma.load(usize, context.cfa.?) == null) return error.InvalidCFA; + expression_context.cfa = context.cfa; + + // Buffering the modifications is done because copying the thread context is not portable, + // some implementations (ie. darwin) use internal pointers to the mcontext. + var arena = std.heap.ArenaAllocator.init(context.allocator); + defer arena.deinit(); + const update_allocator = arena.allocator(); + + const RegisterUpdate = struct { + // Backed by thread_context + dest: []u8, + // Backed by arena + src: []const u8, + prev: ?*@This(), + }; + + var update_tail: ?*RegisterUpdate = null; + var has_return_address = true; + for (context.vm.rowColumns(row)) |column| { + if (column.register) |register| { + if (register == cie.return_address_register) { + has_return_address = column.rule != .undefined; + } + + const dest = try regBytes(context.thread_context, register, context.reg_context); + const src = try update_allocator.alloc(u8, dest.len); + + const prev = update_tail; + update_tail = try update_allocator.create(RegisterUpdate); + update_tail.?.* = .{ + .dest = dest, + .src = src, + .prev = prev, + }; + + try column.resolveValue( + context, + expression_context, + ma, + src, + ); + } + } + + // On all implemented architectures, the CFA is defined as being the previous frame's SP + (try regValueNative(context.thread_context, spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; + + while (update_tail) |tail| { + @memcpy(tail.dest, tail.src); + update_tail = tail.prev; + } + + if (has_return_address) { + context.pc = stripInstructionPtrAuthCode(mem.readInt(usize, (try regBytes( + context.thread_context, + cie.return_address_register, + context.reg_context, + ))[0..@sizeOf(usize)], native_endian)); + } else { + context.pc = 0; + } + + (try regValueNative(context.thread_context, ip_reg_num, context.reg_context)).* = context.pc; + + // The call instruction will have pushed the address of the instruction that follows the call as the return address. + // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in + // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up + // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, + // we subtract one so that the next lookup is guaranteed to land inside the + // + // The exception to this rule is signal frames, where we return execution would be returned to the instruction + // that triggered the handler. + const return_address = context.pc; + if (context.pc > 0 and !cie.isSignalFrame()) context.pc -= 1; + + return return_address; +} + +fn fpRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { + return Dwarf.abi.fpRegNum(native_arch, reg_context); +} + +fn spRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { + return Dwarf.abi.spRegNum(native_arch, reg_context); +} + +const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; + +/// Tells whether unwinding for the host is implemented. +pub const supports_unwinding = supportsUnwinding(builtin.target); + +comptime { + if (supports_unwinding) assert(Dwarf.abi.supportsUnwinding(builtin.target)); +} + +/// Tells whether unwinding for this target is *implemented* here in the Zig +/// standard library. +/// +/// See also `Dwarf.abi.supportsUnwinding` which tells whether Dwarf supports +/// unwinding on that target *in theory*. +pub fn supportsUnwinding(target: std.Target) bool { + return switch (target.cpu.arch) { + .x86 => switch (target.os.tag) { + .linux, .netbsd, .solaris, .illumos => true, + else => false, + }, + .x86_64 => switch (target.os.tag) { + .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, + else => false, + }, + .arm => switch (target.os.tag) { + .linux => true, + else => false, + }, + .aarch64 => switch (target.os.tag) { + .linux, .netbsd, .freebsd, .macos, .ios => true, + else => false, + }, + // Unwinding is possible on other targets but this implementation does + // not support them...yet! + else => false, + }; +} + +fn unwindFrameMachODwarf( + context: *UnwindContext, + ma: *std.debug.MemoryAccessor, + eh_frame: []const u8, + fde_offset: usize, +) !usize { + var di: Dwarf = .{ + .endian = native_endian, + .is_macho = true, + }; + defer di.deinit(context.allocator); + + di.sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ + .data = eh_frame, + .owned = false, + }; + + return unwindFrameDwarf(&di, context, ma, fde_offset); +} + +/// This is a virtual machine that runs DWARF call frame instructions. +pub const VirtualMachine = struct { + /// See section 6.4.1 of the DWARF5 specification for details on each + const RegisterRule = union(enum) { + // The spec says that the default rule for each column is the undefined rule. + // However, it also allows ABI / compiler authors to specify alternate defaults, so + // there is a distinction made here. + default: void, + undefined: void, + same_value: void, + // offset(N) + offset: i64, + // val_offset(N) + val_offset: i64, + // register(R) + register: u8, + // expression(E) + expression: []const u8, + // val_expression(E) + val_expression: []const u8, + // Augmenter-defined rule + architectural: void, + }; + + /// Each row contains unwinding rules for a set of registers. + pub const Row = struct { + /// Offset from `FrameDescriptionEntry.pc_begin` + offset: u64 = 0, + /// Special-case column that defines the CFA (Canonical Frame Address) rule. + /// The register field of this column defines the register that CFA is derived from. + cfa: Column = .{}, + /// The register fields in these columns define the register the rule applies to. + columns: ColumnRange = .{}, + /// Indicates that the next write to any column in this row needs to copy + /// the backing column storage first, as it may be referenced by previous rows. + copy_on_write: bool = false, + }; + + pub const Column = struct { + register: ?u8 = null, + rule: RegisterRule = .{ .default = {} }, + + /// Resolves the register rule and places the result into `out` (see regBytes) + pub fn resolveValue( + self: Column, + context: *SelfInfo.UnwindContext, + expression_context: std.debug.Dwarf.expression.Context, + ma: *std.debug.MemoryAccessor, + out: []u8, + ) !void { + switch (self.rule) { + .default => { + const register = self.register orelse return error.InvalidRegister; + try getRegDefaultValue(register, context, out); + }, + .undefined => { + @memset(out, undefined); + }, + .same_value => { + // TODO: This copy could be eliminated if callers always copy the state then call this function to update it + const register = self.register orelse return error.InvalidRegister; + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + }, + .offset => |offset| { + if (context.cfa) |cfa| { + const addr = try applyOffset(cfa, offset); + if (ma.load(usize, addr) == null) return error.InvalidAddress; + const ptr: *const usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + } else return error.InvalidCFA; + }, + .val_offset => |offset| { + if (context.cfa) |cfa| { + mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); + } else return error.InvalidCFA; + }, + .register => |register| { + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, try regBytes(context.thread_context, register, context.reg_context)); + }, + .expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); + const addr = if (value) |v| blk: { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + + if (ma.load(usize, addr) == null) return error.InvalidExpressionAddress; + const ptr: *usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + }, + .val_expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); + } else return error.NoExpressionValue; + }, + .architectural => return error.UnimplementedRegisterRule, + } + } + }; + + const ColumnRange = struct { + /// Index into `columns` of the first column in this row. + start: usize = undefined, + len: u8 = 0, + }; + + columns: std.ArrayListUnmanaged(Column) = .{}, + stack: std.ArrayListUnmanaged(ColumnRange) = .{}, + current_row: Row = .{}, + + /// The result of executing the CIE's initial_instructions + cie_row: ?Row = null, + + pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { + self.stack.deinit(allocator); + self.columns.deinit(allocator); + self.* = undefined; + } + + pub fn reset(self: *VirtualMachine) void { + self.stack.clearRetainingCapacity(); + self.columns.clearRetainingCapacity(); + self.current_row = .{}; + self.cie_row = null; + } + + /// Return a slice backed by the row's non-CFA columns + pub fn rowColumns(self: VirtualMachine, row: Row) []Column { + if (row.columns.len == 0) return &.{}; + return self.columns.items[row.columns.start..][0..row.columns.len]; + } + + /// Either retrieves or adds a column for `register` (non-CFA) in the current row. + fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { + for (self.rowColumns(self.current_row)) |*c| { + if (c.register == register) return c; + } + + if (self.current_row.columns.len == 0) { + self.current_row.columns.start = self.columns.items.len; + } + self.current_row.columns.len += 1; + + const column = try self.columns.addOne(allocator); + column.* = .{ + .register = register, + }; + + return column; + } + + /// Runs the CIE instructions, then the FDE instructions. Execution halts + /// once the row that corresponds to `pc` is known, and the row is returned. + pub fn runTo( + self: *VirtualMachine, + allocator: std.mem.Allocator, + pc: u64, + cie: std.debug.Dwarf.CommonInformationEntry, + fde: std.debug.Dwarf.FrameDescriptionEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Row { + assert(self.cie_row == null); + if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange; + + var prev_row: Row = self.current_row; + + var cie_stream = std.io.fixedBufferStream(cie.initial_instructions); + var fde_stream = std.io.fixedBufferStream(fde.instructions); + var streams = [_]*std.io.FixedBufferStream([]const u8){ + &cie_stream, + &fde_stream, + }; + + for (&streams, 0..) |stream, i| { + while (stream.pos < stream.buffer.len) { + const instruction = try std.debug.Dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); + prev_row = try self.step(allocator, cie, i == 0, instruction); + if (pc < fde.pc_begin + self.current_row.offset) return prev_row; + } + } + + return self.current_row; + } + + pub fn runToNative( + self: *VirtualMachine, + allocator: std.mem.Allocator, + pc: u64, + cie: std.debug.Dwarf.CommonInformationEntry, + fde: std.debug.Dwarf.FrameDescriptionEntry, + ) !Row { + return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), native_endian); + } + + fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void { + if (!self.current_row.copy_on_write) return; + + const new_start = self.columns.items.len; + if (self.current_row.columns.len > 0) { + try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); + self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); + self.current_row.columns.start = new_start; + } + } + + /// Executes a single instruction. + /// If this instruction is from the CIE, `is_initial` should be set. + /// Returns the value of `current_row` before executing this instruction. + pub fn step( + self: *VirtualMachine, + allocator: std.mem.Allocator, + cie: std.debug.Dwarf.CommonInformationEntry, + is_initial: bool, + instruction: Dwarf.call_frame.Instruction, + ) !Row { + // CIE instructions must be run before FDE instructions + assert(!is_initial or self.cie_row == null); + if (!is_initial and self.cie_row == null) { + self.cie_row = self.current_row; + self.current_row.copy_on_write = true; + } + + const prev_row = self.current_row; + switch (instruction) { + .set_loc => |i| { + if (i.address <= self.current_row.offset) return error.InvalidOperation; + // TODO: Check cie.segment_selector_size != 0 for DWARFV4 + self.current_row.offset = i.address; + }, + inline .advance_loc, + .advance_loc1, + .advance_loc2, + .advance_loc4, + => |i| { + self.current_row.offset += i.delta * cie.code_alignment_factor; + self.current_row.copy_on_write = true; + }, + inline .offset, + .offset_extended, + .offset_extended_sf, + => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; + }, + inline .restore, + .restore_extended, + => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.cie_row) |cie_row| { + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = for (self.rowColumns(cie_row)) |cie_column| { + if (cie_column.register == i.register) break cie_column.rule; + } else .{ .default = {} }; + } else return error.InvalidOperation; + }, + .nop => {}, + .undefined => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .undefined = {} }; + }, + .same_value => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .same_value = {} }; + }, + .register => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .register = i.target_register }; + }, + .remember_state => { + try self.stack.append(allocator, self.current_row.columns); + self.current_row.copy_on_write = true; + }, + .restore_state => { + const restored_columns = self.stack.popOrNull() orelse return error.InvalidOperation; + self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); + try self.columns.ensureUnusedCapacity(allocator, restored_columns.len); + + self.current_row.columns.start = self.columns.items.len; + self.current_row.columns.len = restored_columns.len; + self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); + }, + .def_cfa => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = @intCast(i.offset) }, + }; + }, + .def_cfa_sf => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, + }; + }, + .def_cfa_register => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.register = i.register; + }, + .def_cfa_offset => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = @intCast(i.offset), + }; + }, + .def_cfa_offset_sf => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .def_cfa_expression => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa.register = undefined; + self.current_row.cfa.rule = .{ + .expression = i.block, + }; + }, + .expression => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .expression = i.block, + }; + }, + .val_offset => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, + }; + }, + .val_offset_sf => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .val_expression => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_expression = i.block, + }; + }, + } + + return prev_row; + } +}; + +/// Returns the ABI-defined default value this register has in the unwinding table +/// before running any of the CIE instructions. The DWARF spec defines these as having +/// the .undefined rule by default, but allows ABI authors to override that. +fn getRegDefaultValue(reg_number: u8, context: *UnwindContext, out: []u8) !void { + switch (builtin.cpu.arch) { + .aarch64 => { + // Callee-saved registers are initialized as if they had the .same_value rule + if (reg_number >= 19 and reg_number <= 28) { + const src = try regBytes(context.thread_context, reg_number, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + return; + } + }, + else => {}, + } + + @memset(out, undefined); +} + +/// Since register rules are applied (usually) during a panic, +/// checked addition / subtraction is used so that we can return +/// an error and fall back to FP-based unwinding. +fn applyOffset(base: usize, offset: i64) !usize { + return if (offset >= 0) + try std.math.add(usize, base, @as(usize, @intCast(offset))) + else + try std.math.sub(usize, base, @as(usize, @intCast(-offset))); +} |
