diff options
Diffstat (limited to 'lib/std/debug/SelfInfo/MachO.zig')
| -rw-r--r-- | lib/std/debug/SelfInfo/MachO.zig | 458 |
1 files changed, 61 insertions, 397 deletions
diff --git a/lib/std/debug/SelfInfo/MachO.zig b/lib/std/debug/SelfInfo/MachO.zig index f7eb4465c5..94d50bbf77 100644 --- a/lib/std/debug/SelfInfo/MachO.zig +++ b/lib/std/debug/SelfInfo/MachO.zig @@ -1,12 +1,10 @@ mutex: std.Thread.Mutex, /// Accessed through `Module.Adapter`. modules: std.ArrayHashMapUnmanaged(Module, void, Module.Context, false), -ofiles: std.StringArrayHashMapUnmanaged(?OFile), pub const init: SelfInfo = .{ .mutex = .{}, .modules = .empty, - .ofiles = .empty, }; pub fn deinit(si: *SelfInfo, gpa: Allocator) void { for (si.modules.keys()) |*module| { @@ -14,20 +12,12 @@ pub fn deinit(si: *SelfInfo, gpa: Allocator) void { const u = &(module.unwind orelse break :unwind catch break :unwind); if (u.dwarf) |*dwarf| dwarf.deinit(gpa); } - loaded: { - const l = &(module.loaded_macho orelse break :loaded catch break :loaded); - gpa.free(l.symbols); - posix.munmap(l.mapped_memory); + file: { + const f = &(module.file orelse break :file catch break :file); + f.deinit(gpa); } } - for (si.ofiles.values()) |*opt_ofile| { - const ofile = &(opt_ofile.* orelse continue); - ofile.dwarf.deinit(gpa); - ofile.symbols_by_name.deinit(gpa); - posix.munmap(ofile.mapped_memory); - } si.modules.deinit(gpa); - si.ofiles.deinit(gpa); } pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!std.debug.Symbol { @@ -35,67 +25,55 @@ pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!st const module = try si.findModule(gpa, address); defer si.mutex.unlock(); - const loaded_macho = try module.getLoadedMachO(gpa); - - const vaddr = address - loaded_macho.vaddr_offset; - const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; + const file = try module.getFile(gpa); - // offset of `address` from start of `symbol` - const address_symbol_offset = vaddr - symbol.addr; + // This is not necessarily the same as the vmaddr_slide that dyld would report. This is + // because the segments in the file on disk might differ from the ones in memory. Normally + // we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: + // it exists on disk (necessarily, because the kernel needs to load it!), but is also in + // the dyld cache (dyld actually restart itself from cache after loading it), and the two + // versions have (very) different segment base addresses. It's sort of like a large slide + // has been applied to all addresses in memory. For an optimal experience, we consider the + // on-disk vmaddr instead of the in-memory one. + const vaddr_offset = module.text_base - file.text_vmaddr; - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0); + const vaddr = address - vaddr_offset; - // If any information is missing, we can at least return this from now on. - const sym_only_result: std.debug.Symbol = .{ - .name = stab_symbol, - .compile_unit_name = null, - .source_location = null, + const ofile_dwarf, const ofile_vaddr = file.getDwarfForAddress(gpa, vaddr) catch { + // Return at least the symbol name if available. + return .{ + .name = try file.lookupSymbolName(vaddr), + .compile_unit_name = null, + .source_location = null, + }; }; - if (symbol.ofile == MachoSymbol.unknown_ofile) { - // We don't have STAB info, so can't track down the object file; all we can do is the symbol name. - return sym_only_result; - } - - const o_file: *OFile = of: { - const path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0); - const gop = try si.ofiles.getOrPut(gpa, path); - if (!gop.found_existing) { - gop.value_ptr.* = loadOFile(gpa, path) catch null; - } - if (gop.value_ptr.*) |*o_file| { - break :of o_file; - } else { - return sym_only_result; - } + const compile_unit = ofile_dwarf.findCompileUnit(native_endian, ofile_vaddr) catch { + // Return at least the symbol name if available. + return .{ + .name = try file.lookupSymbolName(vaddr), + .compile_unit_name = null, + .source_location = null, + }; }; - const symbol_index = o_file.symbols_by_name.getKeyAdapted( - @as([]const u8, stab_symbol), - @as(OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }), - ) orelse return sym_only_result; - const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value; - - const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result; - return .{ - .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol, + .name = ofile_dwarf.getSymbolName(ofile_vaddr) orelse + try file.lookupSymbolName(vaddr), .compile_unit_name = compile_unit.die.getAttrString( - &o_file.dwarf, + ofile_dwarf, native_endian, std.dwarf.AT.name, - o_file.dwarf.section(.debug_str), + ofile_dwarf.section(.debug_str), compile_unit, ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, }, - .source_location = o_file.dwarf.getLineNumberInfo( + .source_location = ofile_dwarf.getLineNumberInfo( gpa, native_endian, compile_unit, - symbol_ofile_vaddr + address_symbol_offset, + ofile_vaddr, ) catch null, }; } @@ -104,6 +82,20 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons defer si.mutex.unlock(); return module.name; } +pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize { + const module = try si.findModule(gpa, address); + defer si.mutex.unlock(); + const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base); + const raw_macho: [*]u8 = @ptrCast(header); + var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable; + const text_vmaddr = while (it.next() catch unreachable) |load_cmd| { + if (load_cmd.hdr.cmd != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break segment_cmd.vmaddr; + } else unreachable; + return module.text_base - text_vmaddr; +} pub const can_unwind: bool = true; pub const UnwindContext = std.debug.Dwarf.SelfUnwinder; @@ -447,7 +439,7 @@ fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) Error!*Module { .text_base = @intFromPtr(info.fbase), .name = std.mem.span(info.fname), .unwind = null, - .loaded_macho = null, + .file = null, }; } return gop.key_ptr; @@ -457,7 +449,7 @@ const Module = struct { text_base: usize, name: []const u8, unwind: ?(Error!Unwind), - loaded_macho: ?(Error!LoadedMachO), + file: ?(Error!MachOFile), const Adapter = struct { pub fn hash(_: Adapter, text_base: usize) u32 { @@ -488,34 +480,17 @@ const Module = struct { dwarf: ?Dwarf.Unwind, }; - const LoadedMachO = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: []const u8, - /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is - /// because the segments in the file on disk might differ from the ones in memory. Normally - /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: - /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in - /// the dyld cache (dyld actually restart itself from cache after loading it), and the two - /// versions have (very) different segment base addresses. It's sort of like a large slide - /// has been applied to all addresses in memory. For an optimal experience, we consider the - /// on-disk vmaddr instead of the in-memory one. - vaddr_offset: usize, - }; - fn getUnwindInfo(module: *Module, gpa: Allocator) Error!*Unwind { if (module.unwind == null) module.unwind = loadUnwindInfo(module, gpa); return if (module.unwind.?) |*unwind| unwind else |err| err; } fn loadUnwindInfo(module: *const Module, gpa: Allocator) Error!Unwind { - const header: *std.macho.mach_header = @ptrFromInt(module.text_base); + const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base); - var it: macho.LoadCommandIterator = .{ - .ncmds = header.ncmds, - .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], - }; - const sections, const text_vmaddr = while (it.next()) |load_cmd| { - if (load_cmd.cmd() != .SEGMENT_64) continue; + const raw_macho: [*]u8 = @ptrCast(header); + var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable; + const sections, const text_vmaddr = while (it.next() catch unreachable) |load_cmd| { + if (load_cmd.hdr.cmd != .SEGMENT_64) continue; const segment_cmd = load_cmd.cast(macho.segment_command_64).?; if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; break .{ load_cmd.getSections(), segment_cmd.vmaddr }; @@ -568,237 +543,15 @@ const Module = struct { }; } - fn getLoadedMachO(module: *Module, gpa: Allocator) Error!*LoadedMachO { - if (module.loaded_macho == null) module.loaded_macho = loadMachO(module, gpa) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| e, - else => error.ReadFailed, - }; - return if (module.loaded_macho.?) |*lm| lm else |err| err; - } - fn loadMachO(module: *const Module, gpa: Allocator) Error!LoadedMachO { - const all_mapped_memory = try mapDebugInfoFile(module.name); - errdefer posix.munmap(all_mapped_memory); - - // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal - // binary": a simple file format which contains Mach-O binaries for multiple targets. For - // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images - // for both ARM64 macOS and x86_64 macOS. - if (all_mapped_memory.len < 4) return error.InvalidDebugInfo; - const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*; - // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. - const mapped_macho = switch (magic) { - macho.MH_MAGIC_64 => all_mapped_memory, - - macho.FAT_CIGAM => mapped_macho: { - // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing - // is big-endian, so we'll be swapping some bytes. - if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo; - const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr); - const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header)); - const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)]; - const native_cpu_type = switch (builtin.cpu.arch) { - .x86_64 => macho.CPU_TYPE_X86_64, - .aarch64 => macho.CPU_TYPE_ARM64, - else => comptime unreachable, - }; - for (archs) |*arch| { - if (@byteSwap(arch.cputype) != native_cpu_type) continue; - const offset = @byteSwap(arch.offset); - const size = @byteSwap(arch.size); - break :mapped_macho all_mapped_memory[offset..][0..size]; - } - // Our native architecture was not present in the fat binary. - return error.MissingDebugInfo; - }, - - // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It - // will be fairly easy to add support here if necessary; it's very similar to above. - macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, - - else => return error.InvalidDebugInfo, - }; - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: { - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - var symtab: ?macho.symtab_command = null; - var text_vmaddr: ?u64 = null; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { - if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; - text_vmaddr = seg_cmd.vmaddr; - }, - else => {}, - }; - break :lc_iter .{ - symtab orelse return error.MissingDebugInfo, - text_vmaddr orelse return error.MissingDebugInfo, - }; - }; - - const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]); - const syms = syms_ptr[0..symtab.nsyms]; - const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; - - var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); - defer symbols.deinit(gpa); - - // This map is temporary; it is used only to detect duplicates here. This is - // necessary because we prefer to use STAB ("symbolic debugging table") symbols, - // but they might not be present, so we track normal symbols too. - // Indices match 1-1 with those of `symbols`. - var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; - defer symbol_names.deinit(gpa); - try symbol_names.ensureUnusedCapacity(gpa, syms.len); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (sym.n_type.bits.is_stab == 0) { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf, .pbud, .indr, .abs, _ => continue, - .sect => { - const name = std.mem.sliceTo(strings[sym.n_strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(.{ - .strx = sym.n_strx, - .addr = sym.n_value, - .ofile = MachoSymbol.unknown_ofile, - }); - } - }, - } - continue; - } - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type.stab) { - .oso => switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - }, - .bnsym => switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - }, - .fun => switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - }, - else => return error.InvalidDebugInfo, - }, - .ensym => switch (state) { - .fun_size => { - state = .ensym; - if (last_sym.strx != 0) { - const name = std.mem.sliceTo(strings[last_sym.strx..], 0); - const gop = symbol_names.getOrPutAssumeCapacity(name); - if (!gop.found_existing) { - assert(gop.index == symbols.items.len); - symbols.appendAssumeCapacity(last_sym); - } else { - symbols.items[gop.index] = last_sym; - } - } - }, - else => return error.InvalidDebugInfo, - }, - .so => switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - }, - else => {}, - } - } - - switch (state) { - .init => { - // Missing STAB symtab entries is still okay, unless there were also no normal symbols. - if (symbols.items.len == 0) return error.MissingDebugInfo; - }, - .oso_close => {}, - else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab - } - - const symbols_slice = try symbols.toOwnedSlice(gpa); - errdefer gpa.free(symbols_slice); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - - return .{ - .mapped_memory = all_mapped_memory, - .symbols = symbols_slice, - .strings = strings, - .vaddr_offset = module.text_base - text_vmaddr, + fn getFile(module: *Module, gpa: Allocator) Error!*MachOFile { + if (module.file == null) module.file = MachOFile.load(gpa, module.name, builtin.cpu.arch) catch |err| switch (err) { + error.InvalidMachO, error.InvalidDwarf => error.InvalidDebugInfo, + error.MissingDebugInfo, error.OutOfMemory, error.UnsupportedDebugInfo, error.ReadFailed => |e| e, }; + return if (module.file.?) |*f| f else |err| err; } }; -const OFile = struct { - mapped_memory: []align(std.heap.page_size_min) const u8, - dwarf: Dwarf, - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed - /// through `SymbolAdapter`, so that the symbol name is used as the logical key. - symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), - - const SymbolAdapter = struct { - strtab: []const u8, - symtab: []align(1) const macho.nlist_64, - pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { - _ = ctx; - return @truncate(std.hash.Wyhash.hash(0, sym_name)); - } - pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { - _ = b_index; - const b_sym = ctx.symtab[b_sym_index]; - const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); - return mem.eql(u8, a_sym_name, b_sym_name); - } - }; -}; - const MachoSymbol = struct { strx: u32, addr: u64, @@ -880,101 +633,12 @@ fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 }; } -fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { - const mapped_mem = try mapDebugInfoFile(o_file_path); - errdefer posix.munmap(mapped_mem); - - if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; - - const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { - var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtab_cmd: ?macho.symtab_command = null; - var it: macho.LoadCommandIterator = .{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => seg_cmd = cmd, - .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, - else => {}, - }; - break :cmds .{ - seg_cmd orelse return error.MissingDebugInfo, - symtab_cmd orelse return error.MissingDebugInfo, - }; - }; - - if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; - if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; - const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; - - const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); - if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; - const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); - - // TODO handle tentative (common) symbols - var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; - defer symbols_by_name.deinit(gpa); - try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len)); - for (symtab, 0..) |sym, sym_index| { - if (sym.n_strx == 0) continue; - switch (sym.n_type.bits.type) { - .undf => continue, // includes tentative symbols - .abs => continue, - else => {}, - } - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( - @as([]const u8, sym_name), - @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }), - ); - if (gop.found_existing) return error.InvalidDebugInfo; - gop.key_ptr.* = @intCast(sym_index); - } - - var sections: Dwarf.SectionArray = @splat(null); - for (seg_cmd.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - - const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; - } else continue; - - if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; - const section_bytes = mapped_mem[sect.offset..][0..sect.size]; - sections[section_index] = .{ - .data = section_bytes, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var dwarf: Dwarf = .{ .sections = sections }; - errdefer dwarf.deinit(gpa); - try dwarf.open(gpa, native_endian); - - return .{ - .mapped_memory = mapped_mem, - .dwarf = dwarf, - .strtab = strtab, - .symtab = symtab, - .symbols_by_name = symbols_by_name.move(), - }; -} - const std = @import("std"); const Io = std.Io; const Allocator = std.mem.Allocator; const Dwarf = std.debug.Dwarf; const Error = std.debug.SelfInfoError; +const MachOFile = std.debug.MachOFile; const assert = std.debug.assert; const posix = std.posix; const macho = std.macho; |
