diff options
| author | mlugg <mlugg@mlugg.co.uk> | 2025-09-30 11:06:21 +0100 |
|---|---|---|
| committer | mlugg <mlugg@mlugg.co.uk> | 2025-09-30 14:18:26 +0100 |
| commit | 1120546f72405ac263dce7414eb71ca4e6c96fc8 (patch) | |
| tree | 4a6f90029d8feff983889a133326fbe2a4e3465d /lib/std/debug/SelfInfo/DarwinModule.zig | |
| parent | 12ceb896faebf25195d8b360e4972dd2bf23ede1 (diff) | |
| download | zig-1120546f72405ac263dce7414eb71ca4e6c96fc8.tar.gz zig-1120546f72405ac263dce7414eb71ca4e6c96fc8.zip | |
std.debug.SelfInfo: remove shared logic
There were only a few dozen lines of common logic, and they frankly
introduced more complexity than they eliminated. Instead, let's accept
that the implementations of `SelfInfo` are all pretty different and want
to track different state. This probably fixes some synchronization and
memory bugs by simplifying a bunch of stuff. It also improves the DWARF
unwind cache, making it around twice as fast in a debug build with the
self-hosted x86_64 backend, because we no longer have to redundantly go
through the hashmap lookup logic to find the module. Unwinding on
Windows will also see a slight performance boost from this change,
because `RtlVirtualUnwind` does not need to know the module whatsoever,
so the old `SelfInfo` implementation was doing redundant work. Lastly,
this makes it even easier to implement `SelfInfo` on freestanding
targets; there is no longer a need to emulate a real module system,
since the user controls the whole implementation!
There are various other small refactors here in the `SelfInfo`
implementations as well as in the DWARF unwinding logic. This change
turned out to make a lot of stuff simpler!
Diffstat (limited to 'lib/std/debug/SelfInfo/DarwinModule.zig')
| -rw-r--r-- | lib/std/debug/SelfInfo/DarwinModule.zig | 954 |
1 files changed, 0 insertions, 954 deletions
diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig deleted file mode 100644 index 71e43a9a74..0000000000 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ /dev/null @@ -1,954 +0,0 @@ -/// The runtime address where __TEXT is loaded. -text_base: usize, -name: []const u8, - -pub fn key(m: *const DarwinModule) usize { - return m.text_base; -} - -/// No cache needed, because `_dyld_get_image_header` etc are already fast. -pub const LookupCache = void; -pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!DarwinModule { - _ = cache; - _ = gpa; - var info: std.c.dl_info = undefined; - switch (std.c.dladdr(@ptrFromInt(address), &info)) { - 0 => return error.MissingDebugInfo, - else => return .{ - .name = std.mem.span(info.fname), - .text_base = @intFromPtr(info.fbase), - }, - } -} -fn loadUnwindInfo(module: *const DarwinModule, gpa: Allocator, out: *DebugInfo) !void { - const header: *std.macho.mach_header = @ptrFromInt(module.text_base); - - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const sections, const text_vmaddr = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; - const segment_cmd = load_cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; - break .{ load_cmd.getSections(), segment_cmd.vmaddr }; - } else unreachable; - - const vmaddr_slide = module.text_base - text_vmaddr; - - var opt_unwind_info: ?[]const u8 = null; - var opt_eh_frame: ?[]const u8 = null; - for (sections) |sect| { - if (mem.eql(u8, sect.sectName(), "__unwind_info")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); - opt_unwind_info = sect_ptr[0..@intCast(sect.size)]; - } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { - const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); - opt_eh_frame = sect_ptr[0..@intCast(sect.size)]; - } - } - const eh_frame = opt_eh_frame orelse { - out.unwind = .{ - .vmaddr_slide = vmaddr_slide, - .unwind_info = opt_unwind_info, - .dwarf = null, - .dwarf_cache = undefined, - }; - return; - }; - var dwarf: Dwarf.Unwind = .initSection(.eh_frame, @intFromPtr(eh_frame.ptr) - vmaddr_slide, eh_frame); - errdefer dwarf.deinit(gpa); - // We don't need lookups, so this call is just for scanning CIEs. - dwarf.prepare(gpa, @sizeOf(usize), native_endian, false, true) catch |err| switch (err) { - error.ReadFailed => unreachable, // it's all fixed buffers - error.InvalidDebugInfo, - error.MissingDebugInfo, - error.OutOfMemory, - => |e| return e, - error.EndOfStream, - error.Overflow, - error.StreamTooLong, - error.InvalidOperand, - error.InvalidOpcode, - error.InvalidOperation, - => return error.InvalidDebugInfo, - error.UnsupportedAddrSize, - error.UnsupportedDwarfVersion, - error.UnimplementedUserOpcode, - => return error.UnsupportedDebugInfo, - }; - - const dwarf_cache = try gpa.create(UnwindContext.Cache); - errdefer gpa.destroy(dwarf_cache); - dwarf_cache.init(); - - out.unwind = .{ - .vmaddr_slide = vmaddr_slide, - .unwind_info = opt_unwind_info, - .dwarf = dwarf, - .dwarf_cache = dwarf_cache, - }; -} -fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO { - const all_mapped_memory = try mapDebugInfoFile(module.name); - errdefer posix.munmap(all_mapped_memory); - - // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal - // binary": a simple file format which contains Mach-O binaries for multiple targets. For - // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images - // for both ARM64 Macs and x86_64 Macs. - if (all_mapped_memory.len < 4) return error.InvalidDebugInfo; - const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*; - // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. - const mapped_macho = switch (magic) { - macho.MH_MAGIC_64 => all_mapped_memory, - - macho.FAT_CIGAM => mapped_macho: { - // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing - // is big-endian, so we'll be swapping some bytes. - if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo; - const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr); - const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header)); - const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)]; - const native_cpu_type = switch (builtin.cpu.arch) { - .x86_64 => macho.CPU_TYPE_X86_64, - .aarch64 => macho.CPU_TYPE_ARM64, - else => comptime unreachable, - }; - for (archs) |*arch| { - if (@byteSwap(arch.cputype) != native_cpu_type) continue; - const offset = @byteSwap(arch.offset); - const size = @byteSwap(arch.size); - break :mapped_macho all_mapped_memory[offset..][0..size]; - } - // Our native architecture was not present in the fat binary. - return error.MissingDebugInfo; - }, - - // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It - // will be fairly easy to add support here if necessary; it's very similar to above. - macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, - - else => return error.InvalidDebugInfo, - }; - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: { - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - var symtab: ?macho.symtab_command = null; - var text_vmaddr: ?u64 = null; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { - if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; - text_vmaddr = seg_cmd.vmaddr; - }, - else => {}, - }; - break :lc_iter .{ - symtab orelse return error.MissingDebugInfo, - text_vmaddr orelse return error.MissingDebugInfo, - }; - }; - - const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]); - const syms = syms_ptr[0..symtab.nsyms]; - const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; - - var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); - defer symbols.deinit(gpa); - - // This map is temporary; it is used only to detect duplicates here. This is - // necessary because we prefer to use STAB ("symbolic debugging table") symbols, - // but they might not be present, so we track normal symbols too. - // Indices match 1-1 with those of `symbols`. - var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; - defer symbol_names.deinit(gpa); - try symbol_names.ensureUnusedCapacity(gpa, syms.len); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (sym.n_type.bits.is_stab == 0) { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf, .pbud, .indr, .abs, _ => continue, - .sect => { - const name = std.mem.sliceTo(strings[sym.n_strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(.{ - .strx = sym.n_strx, - .addr = sym.n_value, - .ofile = MachoSymbol.unknown_ofile, - }); - } - }, - } - continue; - } - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type.stab) { - .oso => switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - }, - .bnsym => switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - }, - .fun => switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - }, - else => return error.InvalidDebugInfo, - }, - .ensym => switch (state) { - .fun_size => { - state = .ensym; - if (last_sym.strx != 0) { - const name = std.mem.sliceTo(strings[last_sym.strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(last_sym); - } else { - symbols.items[gop.index] = last_sym; - } - } - }, - else => return error.InvalidDebugInfo, - }, - .so => switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - }, - else => {}, - } - } - - switch (state) { - .init => { - // Missing STAB symtab entries is still okay, unless there were also no normal symbols. - if (symbols.items.len == 0) return error.MissingDebugInfo; - }, - .oso_close => {}, - else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab - } - - const symbols_slice = try symbols.toOwnedSlice(gpa); - errdefer gpa.free(symbols_slice); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - - return .{ - .mapped_memory = all_mapped_memory, - .symbols = symbols_slice, - .strings = strings, - .ofiles = .empty, - .vaddr_offset = module.text_base - text_vmaddr, - }; -} -pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol { - // We need the lock for a few things: - // * loading the Mach-O module - // * loading the referenced object file - // * scanning the DWARF of that object file - // * building the line number table of that object file - // That's enough that it doesn't really seem worth scoping the lock more tightly than the whole function.. - di.mutex.lock(); - defer di.mutex.unlock(); - - if (di.loaded_macho == null) di.loaded_macho = module.loadMachO(gpa) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| return e, - else => return error.ReadFailed, - }; - const loaded_macho = &di.loaded_macho.?; - - const vaddr = address - loaded_macho.vaddr_offset; - const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; - - // offset of `address` from start of `symbol` - const address_symbol_offset = vaddr - symbol.addr; - - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0); - - // If any information is missing, we can at least return this from now on. - const sym_only_result: std.debug.Symbol = .{ - .name = stab_symbol, - .compile_unit_name = null, - .source_location = null, - }; - - if (symbol.ofile == MachoSymbol.unknown_ofile) { - // We don't have STAB info, so can't track down the object file; all we can do is the symbol name. - return sym_only_result; - } - - const o_file: *DebugInfo.OFile = of: { - const gop = try loaded_macho.ofiles.getOrPut(gpa, symbol.ofile); - if (!gop.found_existing) { - const o_file_path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0); - gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch { - _ = loaded_macho.ofiles.pop().?; - return sym_only_result; - }; - } - break :of gop.value_ptr; - }; - - const symbol_index = o_file.symbols_by_name.getKeyAdapted( - @as([]const u8, stab_symbol), - @as(DebugInfo.OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }), - ) orelse return sym_only_result; - const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value; - - const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result; - - return .{ - .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol, - .compile_unit_name = compile_unit.die.getAttrString( - &o_file.dwarf, - native_endian, - std.dwarf.AT.name, - o_file.dwarf.section(.debug_str), - compile_unit, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - }, - .source_location = o_file.dwarf.getLineNumberInfo( - gpa, - native_endian, - compile_unit, - symbol_ofile_vaddr + address_symbol_offset, - ) catch null, - }; -} -pub const supports_unwinding: bool = true; -pub const UnwindContext = std.debug.SelfInfo.DwarfUnwindContext; -/// Unwind a frame using MachO compact unwind info (from __unwind_info). -/// If the compact encoding can't encode a way to unwind a frame, it will -/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { - return unwindFrameInner(module, gpa, di, context) catch |err| switch (err) { - error.InvalidDebugInfo, - error.MissingDebugInfo, - error.UnsupportedDebugInfo, - error.ReadFailed, - error.OutOfMemory, - error.Unexpected, - => |e| return e, - error.UnsupportedRegister, - => return error.UnsupportedDebugInfo, - error.InvalidRegister, - error.IncompatibleRegisterSize, - => return error.InvalidDebugInfo, - }; -} -fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - const unwind: *DebugInfo.Unwind = u: { - di.mutex.lock(); - defer di.mutex.unlock(); - if (di.unwind == null) try module.loadUnwindInfo(gpa, di); - break :u &di.unwind.?; - }; - - const unwind_info = unwind.unwind_info orelse return error.MissingDebugInfo; - if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidDebugInfo; - const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); - - const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry); - if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidDebugInfo; - const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); - if (indices.len == 0) return error.MissingDebugInfo; - - // offset of the PC into the `__TEXT` segment - const pc_text_offset = context.pc - module.text_base; - - const start_offset: u32, const first_level_offset: u32 = index: { - var left: usize = 0; - var len: usize = indices.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < indices[mid].functionOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset }; - }; - // An offset of 0 is a sentinel indicating a range does not have unwind info. - if (start_offset == 0) return error.MissingDebugInfo; - - const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t); - if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidDebugInfo; - const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( - unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count], - ); - - if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidDebugInfo; - const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]); - - const entry: struct { - function_offset: usize, - raw_encoding: u32, - } = switch (kind.*) { - .REGULAR => entry: { - if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidDebugInfo; - const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]); - - const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry); - if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo; - const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast( - unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], - ); - if (entries.len == 0) return error.InvalidDebugInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < entries[mid].functionOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - break :entry .{ - .function_offset = entries[left].functionOffset, - .raw_encoding = entries[left].encoding, - }; - }, - .COMPRESSED => entry: { - if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidDebugInfo; - const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]); - - const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry); - if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo; - const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast( - unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], - ); - if (entries.len == 0) return error.InvalidDebugInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - if (pc_text_offset < first_level_offset + entries[mid].funcOffset) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - const entry = entries[left]; - - const function_offset = first_level_offset + entry.funcOffset; - if (entry.encodingIndex < common_encodings.len) { - break :entry .{ - .function_offset = function_offset, - .raw_encoding = common_encodings[entry.encodingIndex], - }; - } - - const local_index = entry.encodingIndex - common_encodings.len; - const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t); - if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidDebugInfo; - const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( - unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count], - ); - if (local_index >= local_encodings.len) return error.InvalidDebugInfo; - break :entry .{ - .function_offset = function_offset, - .raw_encoding = local_encodings[local_index], - }; - }, - else => return error.InvalidDebugInfo, - }; - - if (entry.raw_encoding == 0) return error.MissingDebugInfo; - - const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); - const new_ip = switch (builtin.cpu.arch) { - .x86_64 => switch (encoding.mode.x86_64) { - .OLD => return error.UnsupportedDebugInfo, - .RBP_FRAME => ip: { - const frame = encoding.value.x86_64.frame; - - const fp = (try dwarfRegNative(&context.cpu_context, fp_reg_num)).*; - const new_sp = fp + 2 * @sizeOf(usize); - - const ip_ptr = fp + @sizeOf(usize); - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try dwarfRegNative(&context.cpu_context, fp_reg_num)).* = new_fp; - (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp; - (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip; - - const regs: [5]u3 = .{ - frame.reg0, - frame.reg1, - frame.reg2, - frame.reg3, - frame.reg4, - }; - for (regs, 0..) |reg, i| { - if (reg == 0) continue; - const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize); - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); - (try dwarfRegNative(&context.cpu_context, reg_number)).* = @as(*const usize, @ptrFromInt(addr)).*; - } - - break :ip new_ip; - }, - .STACK_IMMD, - .STACK_IND, - => ip: { - const frameless = encoding.value.x86_64.frameless; - - const sp = (try dwarfRegNative(&context.cpu_context, sp_reg_num)).*; - const stack_size: usize = stack_size: { - if (encoding.mode.x86_64 == .STACK_IMMD) { - break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize); - } - // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. - const sub_offset_addr = - module.text_base + - entry.function_offset + - frameless.stack.indirect.sub_offset; - // `sub_offset_addr` points to the offset of the literal within the instruction - const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; - break :stack_size sub_operand + @sizeOf(usize) * @as(usize, frameless.stack.indirect.stack_adjust); - }; - - // Decode the Lehmer-coded sequence of registers. - // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - - // Decode the variable-based permutation number into its digits. Each digit represents - // an index into the list of register numbers that weren't yet used in the sequence at - // the time the digit was added. - const reg_count = frameless.stack_reg_count; - const ip_ptr = ip_ptr: { - var digits: [6]u3 = undefined; - var accumulator: usize = frameless.stack_reg_permutation; - var base: usize = 2; - for (0..reg_count) |i| { - const div = accumulator / base; - digits[digits.len - 1 - i] = @intCast(accumulator - base * div); - accumulator = div; - base += 1; - } - - var registers: [6]u3 = undefined; - var used_indices: [6]bool = @splat(false); - for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { - var unused_count: u8 = 0; - const unused_index = for (used_indices, 0..) |used, index| { - if (!used) { - if (target_unused_index == unused_count) break index; - unused_count += 1; - } - } else unreachable; - registers[i] = @intCast(unused_index + 1); - used_indices[unused_index] = true; - } - - var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); - for (0..reg_count) |i| { - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); - (try dwarfRegNative(&context.cpu_context, reg_number)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - - break :ip_ptr reg_addr; - }; - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_sp = ip_ptr + @sizeOf(usize); - - (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp; - (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip; - - break :ip new_ip; - }, - .DWARF => { - const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); - return context.unwindFrame(unwind.dwarf_cache, gpa, dwarf, unwind.vmaddr_slide, encoding.value.x86_64.dwarf); - }, - }, - .aarch64, .aarch64_be => switch (encoding.mode.arm64) { - .OLD => return error.UnsupportedDebugInfo, - .FRAMELESS => ip: { - const sp = (try dwarfRegNative(&context.cpu_context, sp_reg_num)).*; - const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; - const new_ip = (try dwarfRegNative(&context.cpu_context, 30)).*; - (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp; - break :ip new_ip; - }, - .DWARF => { - const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); - return context.unwindFrame(unwind.dwarf_cache, gpa, dwarf, unwind.vmaddr_slide, encoding.value.arm64.dwarf); - }, - .FRAME => ip: { - const frame = encoding.value.arm64.frame; - - const fp = (try dwarfRegNative(&context.cpu_context, fp_reg_num)).*; - const ip_ptr = fp + @sizeOf(usize); - - var reg_addr = fp - @sizeOf(usize); - inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(frame.x_reg_pairs, field.name) != 0) { - (try dwarfRegNative(&context.cpu_context, 19 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - (try dwarfRegNative(&context.cpu_context, 20 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - } - - inline for (@typeInfo(@TypeOf(frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(frame.d_reg_pairs, field.name) != 0) { - // Only the lower half of the 128-bit V registers are restored during unwinding - { - const dest: *align(1) usize = @ptrCast(try context.cpu_context.dwarfRegisterBytes(64 + 8 + i)); - dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; - } - reg_addr += @sizeOf(usize); - { - const dest: *align(1) usize = @ptrCast(try context.cpu_context.dwarfRegisterBytes(64 + 9 + i)); - dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; - } - reg_addr += @sizeOf(usize); - } - } - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try dwarfRegNative(&context.cpu_context, fp_reg_num)).* = new_fp; - (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip; - - break :ip new_ip; - }, - }, - else => comptime unreachable, // unimplemented - }; - - const ret_addr = std.debug.stripInstructionPtrAuthCode(new_ip); - - // Like `DwarfUnwindContext.unwindFrame`, adjust our next lookup pc in case the `call` was this - // function's last instruction making `ret_addr` one byte past its end. - context.pc = ret_addr -| 1; - - return ret_addr; -} -pub const DebugInfo = struct { - /// Held while checking and/or populating `unwind` or `loaded_macho`. - /// Once a field is populated and the pointer `&di.loaded_macho.?` or `&di.unwind.?` has been - /// gotten, the lock is released; i.e. it is not held while *using* the loaded info. - mutex: std.Thread.Mutex, - - unwind: ?Unwind, - loaded_macho: ?LoadedMachO, - - pub const init: DebugInfo = .{ - .mutex = .{}, - - .unwind = null, - .loaded_macho = null, - }; - - pub fn deinit(di: *DebugInfo, gpa: Allocator) void { - if (di.loaded_macho) |*loaded_macho| { - for (loaded_macho.ofiles.values()) |*ofile| { - ofile.dwarf.deinit(gpa); - ofile.symbols_by_name.deinit(gpa); - posix.munmap(ofile.mapped_memory); - } - loaded_macho.ofiles.deinit(gpa); - gpa.free(loaded_macho.symbols); - posix.munmap(loaded_macho.mapped_memory); - } - } - - const Unwind = struct { - /// The slide applied to the `__unwind_info` and `__eh_frame` sections. - /// So, `unwind_info.ptr` is this many bytes higher than the section's vmaddr. - vmaddr_slide: u64, - /// Backed by the in-memory section mapped by the loader. - unwind_info: ?[]const u8, - /// Backed by the in-memory `__eh_frame` section mapped by the loader. - dwarf: ?Dwarf.Unwind, - /// This is `undefined` if `dwarf == null`. - dwarf_cache: *UnwindContext.Cache, - }; - - const LoadedMachO = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: []const u8, - /// Key is index into `strings` of the file path. - ofiles: std.AutoArrayHashMapUnmanaged(u32, OFile), - /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is - /// because the segments in the file on disk might differ from the ones in memory. Normally - /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: - /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in - /// the dyld cache (dyld actually restart itself from cache after loading it), and the two - /// versions have (very) different segment base addresses. It's sort of like a large slide - /// has been applied to all addresses in memory. For an optimal experience, we consider the - /// on-disk vmaddr instead of the in-memory one. - vaddr_offset: usize, - }; - - const OFile = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - dwarf: Dwarf, - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed - /// through `SymbolAdapter`, so that the symbol name is used as the logical key. - symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), - - const SymbolAdapter = struct { - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { - _ = ctx; - return @truncate(std.hash.Wyhash.hash(0, sym_name)); - } - pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { - _ = b_index; - const b_sym = ctx.symtab[b_sym_index]; - const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); - return mem.eql(u8, a_sym_name, b_sym_name); - } - }; - }; - - fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { - const mapped_mem = try mapDebugInfoFile(o_file_path); - errdefer posix.munmap(mapped_mem); - - if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; - - const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { - var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtab_cmd: ?macho.symtab_command = null; - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => seg_cmd = cmd, - .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - else => {}, - }; - break :cmds .{ - seg_cmd orelse return error.MissingDebugInfo, - symtab_cmd orelse return error.MissingDebugInfo, - }; - }; - - if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; - if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; - - const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); - if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; - const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); - - // TODO handle tentative (common) symbols - var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; - defer symbols_by_name.deinit(gpa); - try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len)); - for (symtab, 0..) |sym, sym_index| { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf => continue, // includes tentative symbols - .abs => continue, - else => {}, - } - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( - @as([]const u8, sym_name), - @as(DebugInfo.OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }), - ); - if (gop.found_existing) return error.InvalidDebugInfo; - gop.key_ptr.* = @intCast(sym_index); - } - - var sections: Dwarf.SectionArray = @splat(null); - for (seg_cmd.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - - const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; - } else continue; - - if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; - const section_bytes = mapped_mem[sect.offset..][0..sect.size]; - sections[section_index] = .{ - .data = section_bytes, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var dwarf: Dwarf = .{ .sections = sections }; - errdefer dwarf.deinit(gpa); - try dwarf.open(gpa, native_endian); - - return .{ - .mapped_memory = mapped_mem, - .dwarf = dwarf, - .strtab = strtab, - .symtab = symtab, - .symbols_by_name = symbols_by_name.move(), - }; - } -}; - -const MachoSymbol = struct { - strx: u32, - addr: u64, - /// Value may be `unknown_ofile`. - ofile: u32, - const unknown_ofile = std.math.maxInt(u32); - fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { - _ = context; - return lhs.addr < rhs.addr; - } - /// Assumes that `symbols` is sorted in order of ascending `addr`. - fn find(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { - if (symbols.len == 0) return null; // no potential match - if (address < symbols[0].addr) return null; // address is before the lowest-address symbol - var left: usize = 0; - var len: usize = symbols.len; - while (len > 1) { - const mid = left + len / 2; - if (address < symbols[mid].addr) { - len /= 2; - } else { - left = mid; - len -= len / 2; - } - } - return &symbols[left]; - } - - test find { - const symbols: []const MachoSymbol = &.{ - .{ .addr = 100, .strx = undefined, .ofile = undefined }, - .{ .addr = 200, .strx = undefined, .ofile = undefined }, - .{ .addr = 300, .strx = undefined, .ofile = undefined }, - }; - - try testing.expectEqual(null, find(symbols, 0)); - try testing.expectEqual(null, find(symbols, 99)); - try testing.expectEqual(&symbols[0], find(symbols, 100).?); - try testing.expectEqual(&symbols[0], find(symbols, 150).?); - try testing.expectEqual(&symbols[0], find(symbols, 199).?); - - try testing.expectEqual(&symbols[1], find(symbols, 200).?); - try testing.expectEqual(&symbols[1], find(symbols, 250).?); - try testing.expectEqual(&symbols[1], find(symbols, 299).?); - - try testing.expectEqual(&symbols[2], find(symbols, 300).?); - try testing.expectEqual(&symbols[2], find(symbols, 301).?); - try testing.expectEqual(&symbols[2], find(symbols, 5000).?); - } -}; -test { - _ = MachoSymbol; -} - -const ip_reg_num = Dwarf.ipRegNum(builtin.target.cpu.arch).?; -const fp_reg_num = Dwarf.fpRegNum(builtin.target.cpu.arch); -const sp_reg_num = Dwarf.spRegNum(builtin.target.cpu.arch); - -/// Uses `mmap` to map the file at `path` into memory. -fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 { - const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return error.ReadFailed, - }; - defer file.close(); - - const file_len = std.math.cast(usize, try file.getEndPos()) orelse return error.InvalidDebugInfo; - - return posix.mmap( - null, - file_len, - posix.PROT.READ, - .{ .TYPE = .SHARED }, - file.handle, - 0, - ); -} - -const DarwinModule = @This(); - -const std = @import("../../std.zig"); -const Allocator = std.mem.Allocator; -const Dwarf = std.debug.Dwarf; -const assert = std.debug.assert; -const macho = std.macho; -const mem = std.mem; -const posix = std.posix; -const testing = std.testing; -const Error = std.debug.SelfInfo.Error; -const dwarfRegNative = std.debug.SelfInfo.DwarfUnwindContext.regNative; - -const builtin = @import("builtin"); -const native_endian = builtin.target.cpu.arch.endian(); |
