std.debug.SelfInfo: remove shared logic

There were only a few dozen lines of common logic, and they frankly introduced more complexity than they eliminated. Instead, let's accept that the implementations of `SelfInfo` are all pretty different and want to track different state. This probably fixes some synchronization and memory bugs by simplifying a bunch of stuff. It also improves the DWARF unwind cache, making it around twice as fast in a debug build with the self-hosted x86_64 backend, because we no longer have to redundantly go through the hashmap lookup logic to find the module. Unwinding on Windows will also see a slight performance boost from this change, because `RtlVirtualUnwind` does not need to know the module whatsoever, so the old `SelfInfo` implementation was doing redundant work. Lastly, this makes it even easier to implement `SelfInfo` on freestanding targets; there is no longer a need to emulate a real module system, since the user controls the whole implementation! There are various other small refactors here in the `SelfInfo` implementations as well as in the DWARF unwinding logic. This change turned out to make a lot of stuff simpler!
author: mlugg <mlugg@mlugg.co.uk> 2025-09-30 11:06:21 +0100
committer: mlugg <mlugg@mlugg.co.uk> 2025-09-30 14:18:26 +0100
commit: 1120546f72405ac263dce7414eb71ca4e6c96fc8 (patch)
tree: 4a6f90029d8feff983889a133326fbe2a4e3465d /lib/std/debug/SelfInfo/DarwinModule.zig
parent: 12ceb896faebf25195d8b360e4972dd2bf23ede1 (diff)
download: zig-1120546f72405ac263dce7414eb71ca4e6c96fc8.tar.gz
zig-1120546f72405ac263dce7414eb71ca4e6c96fc8.zip
1 files changed, 0 insertions, 954 deletions
diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig
deleted file mode 100644
index 71e43a9a74..0000000000
--- a/lib/std/debug/SelfInfo/DarwinModule.zig
+++ /dev/null
@@ -1,954 +0,0 @@
-/// The runtime address where __TEXT is loaded.
-text_base: usize,
-name: []const u8,
-
-pub fn key(m: *const DarwinModule) usize {
-    return m.text_base;
-}
-
-/// No cache needed, because `_dyld_get_image_header` etc are already fast.
-pub const LookupCache = void;
-pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!DarwinModule {
-    _ = cache;
-    _ = gpa;
-    var info: std.c.dl_info = undefined;
-    switch (std.c.dladdr(@ptrFromInt(address), &info)) {
-        0 => return error.MissingDebugInfo,
-        else => return .{
-            .name = std.mem.span(info.fname),
-            .text_base = @intFromPtr(info.fbase),
-        },
-    }
-}
-fn loadUnwindInfo(module: *const DarwinModule, gpa: Allocator, out: *DebugInfo) !void {
-    const header: *std.macho.mach_header = @ptrFromInt(module.text_base);
-
-    var it: macho.LoadCommandIterator = .{
-        .ncmds = header.ncmds,
-        .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds],
-    };
-    const sections, const text_vmaddr = while (it.next()) |load_cmd| {
-        if (load_cmd.cmd() != .SEGMENT_64) continue;
-        const segment_cmd = load_cmd.cast(macho.segment_command_64).?;
-        if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue;
-        break .{ load_cmd.getSections(), segment_cmd.vmaddr };
-    } else unreachable;
-
-    const vmaddr_slide = module.text_base - text_vmaddr;
-
-    var opt_unwind_info: ?[]const u8 = null;
-    var opt_eh_frame: ?[]const u8 = null;
-    for (sections) |sect| {
-        if (mem.eql(u8, sect.sectName(), "__unwind_info")) {
-            const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr)));
-            opt_unwind_info = sect_ptr[0..@intCast(sect.size)];
-        } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) {
-            const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr)));
-            opt_eh_frame = sect_ptr[0..@intCast(sect.size)];
-        }
-    }
-    const eh_frame = opt_eh_frame orelse {
-        out.unwind = .{
-            .vmaddr_slide = vmaddr_slide,
-            .unwind_info = opt_unwind_info,
-            .dwarf = null,
-            .dwarf_cache = undefined,
-        };
-        return;
-    };
-    var dwarf: Dwarf.Unwind = .initSection(.eh_frame, @intFromPtr(eh_frame.ptr) - vmaddr_slide, eh_frame);
-    errdefer dwarf.deinit(gpa);
-    // We don't need lookups, so this call is just for scanning CIEs.
-    dwarf.prepare(gpa, @sizeOf(usize), native_endian, false, true) catch |err| switch (err) {
-        error.ReadFailed => unreachable, // it's all fixed buffers
-        error.InvalidDebugInfo,
-        error.MissingDebugInfo,
-        error.OutOfMemory,
-        => |e| return e,
-        error.EndOfStream,
-        error.Overflow,
-        error.StreamTooLong,
-        error.InvalidOperand,
-        error.InvalidOpcode,
-        error.InvalidOperation,
-        => return error.InvalidDebugInfo,
-        error.UnsupportedAddrSize,
-        error.UnsupportedDwarfVersion,
-        error.UnimplementedUserOpcode,
-        => return error.UnsupportedDebugInfo,
-    };
-
-    const dwarf_cache = try gpa.create(UnwindContext.Cache);
-    errdefer gpa.destroy(dwarf_cache);
-    dwarf_cache.init();
-
-    out.unwind = .{
-        .vmaddr_slide = vmaddr_slide,
-        .unwind_info = opt_unwind_info,
-        .dwarf = dwarf,
-        .dwarf_cache = dwarf_cache,
-    };
-}
-fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO {
-    const all_mapped_memory = try mapDebugInfoFile(module.name);
-    errdefer posix.munmap(all_mapped_memory);
-
-    // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal
-    // binary": a simple file format which contains Mach-O binaries for multiple targets. For
-    // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images
-    // for both ARM64 Macs and x86_64 Macs.
-    if (all_mapped_memory.len < 4) return error.InvalidDebugInfo;
-    const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*;
-    // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`.
-    const mapped_macho = switch (magic) {
-        macho.MH_MAGIC_64 => all_mapped_memory,
-
-        macho.FAT_CIGAM => mapped_macho: {
-            // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing
-            // is big-endian, so we'll be swapping some bytes.
-            if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo;
-            const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr);
-            const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header));
-            const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)];
-            const native_cpu_type = switch (builtin.cpu.arch) {
-                .x86_64 => macho.CPU_TYPE_X86_64,
-                .aarch64 => macho.CPU_TYPE_ARM64,
-                else => comptime unreachable,
-            };
-            for (archs) |*arch| {
-                if (@byteSwap(arch.cputype) != native_cpu_type) continue;
-                const offset = @byteSwap(arch.offset);
-                const size = @byteSwap(arch.size);
-                break :mapped_macho all_mapped_memory[offset..][0..size];
-            }
-            // Our native architecture was not present in the fat binary.
-            return error.MissingDebugInfo;
-        },
-
-        // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It
-        // will be fairly easy to add support here if necessary; it's very similar to above.
-        macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo,
-
-        else => return error.InvalidDebugInfo,
-    };
-
-    const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr));
-    if (hdr.magic != macho.MH_MAGIC_64)
-        return error.InvalidDebugInfo;
-
-    const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: {
-        var it: macho.LoadCommandIterator = .{
-            .ncmds = hdr.ncmds,
-            .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
-        };
-        var symtab: ?macho.symtab_command = null;
-        var text_vmaddr: ?u64 = null;
-        while (it.next()) |cmd| switch (cmd.cmd()) {
-            .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo,
-            .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| {
-                if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue;
-                text_vmaddr = seg_cmd.vmaddr;
-            },
-            else => {},
-        };
-        break :lc_iter .{
-            symtab orelse return error.MissingDebugInfo,
-            text_vmaddr orelse return error.MissingDebugInfo,
-        };
-    };
-
-    const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]);
-    const syms = syms_ptr[0..symtab.nsyms];
-    const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1];
-
-    var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len);
-    defer symbols.deinit(gpa);
-
-    // This map is temporary; it is used only to detect duplicates here. This is
-    // necessary because we prefer to use STAB ("symbolic debugging table") symbols,
-    // but they might not be present, so we track normal symbols too.
-    // Indices match 1-1 with those of `symbols`.
-    var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty;
-    defer symbol_names.deinit(gpa);
-    try symbol_names.ensureUnusedCapacity(gpa, syms.len);
-
-    var ofile: u32 = undefined;
-    var last_sym: MachoSymbol = undefined;
-    var state: enum {
-        init,
-        oso_open,
-        oso_close,
-        bnsym,
-        fun_strx,
-        fun_size,
-        ensym,
-    } = .init;
-
-    for (syms) |*sym| {
-        if (sym.n_type.bits.is_stab == 0) {
-            if (sym.n_strx == 0) continue;
-            switch (sym.n_type.bits.type) {
-                .undf, .pbud, .indr, .abs, _ => continue,
-                .sect => {
-                    const name = std.mem.sliceTo(strings[sym.n_strx..], 0);
-                    const gop = symbol_names.getOrPutAssumeCapacity(name);
-                    if (!gop.found_existing) {
-                        assert(gop.index == symbols.items.len);
-                        symbols.appendAssumeCapacity(.{
-                            .strx = sym.n_strx,
-                            .addr = sym.n_value,
-                            .ofile = MachoSymbol.unknown_ofile,
-                        });
-                    }
-                },
-            }
-            continue;
-        }
-
-        // TODO handle globals N_GSYM, and statics N_STSYM
-        switch (sym.n_type.stab) {
-            .oso => switch (state) {
-                .init, .oso_close => {
-                    state = .oso_open;
-                    ofile = sym.n_strx;
-                },
-                else => return error.InvalidDebugInfo,
-            },
-            .bnsym => switch (state) {
-                .oso_open, .ensym => {
-                    state = .bnsym;
-                    last_sym = .{
-                        .strx = 0,
-                        .addr = sym.n_value,
-                        .ofile = ofile,
-                    };
-                },
-                else => return error.InvalidDebugInfo,
-            },
-            .fun => switch (state) {
-                .bnsym => {
-                    state = .fun_strx;
-                    last_sym.strx = sym.n_strx;
-                },
-                .fun_strx => {
-                    state = .fun_size;
-                },
-                else => return error.InvalidDebugInfo,
-            },
-            .ensym => switch (state) {
-                .fun_size => {
-                    state = .ensym;
-                    if (last_sym.strx != 0) {
-                        const name = std.mem.sliceTo(strings[last_sym.strx..], 0);
-                        const gop = symbol_names.getOrPutAssumeCapacity(name);
-                        if (!gop.found_existing) {
-                            assert(gop.index == symbols.items.len);
-                            symbols.appendAssumeCapacity(last_sym);
-                        } else {
-                            symbols.items[gop.index] = last_sym;
-                        }
-                    }
-                },
-                else => return error.InvalidDebugInfo,
-            },
-            .so => switch (state) {
-                .init, .oso_close => {},
-                .oso_open, .ensym => {
-                    state = .oso_close;
-                },
-                else => return error.InvalidDebugInfo,
-            },
-            else => {},
-        }
-    }
-
-    switch (state) {
-        .init => {
-            // Missing STAB symtab entries is still okay, unless there were also no normal symbols.
-            if (symbols.items.len == 0) return error.MissingDebugInfo;
-        },
-        .oso_close => {},
-        else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab
-    }
-
-    const symbols_slice = try symbols.toOwnedSlice(gpa);
-    errdefer gpa.free(symbols_slice);
-
-    // Even though lld emits symbols in ascending order, this debug code
-    // should work for programs linked in any valid way.
-    // This sort is so that we can binary search later.
-    mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan);
-
-    return .{
-        .mapped_memory = all_mapped_memory,
-        .symbols = symbols_slice,
-        .strings = strings,
-        .ofiles = .empty,
-        .vaddr_offset = module.text_base - text_vmaddr,
-    };
-}
-pub fn getSymbolAtAddress(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, address: usize) Error!std.debug.Symbol {
-    // We need the lock for a few things:
-    // * loading the Mach-O module
-    // * loading the referenced object file
-    // * scanning the DWARF of that object file
-    // * building the line number table of that object file
-    // That's enough that it doesn't really seem worth scoping the lock more tightly than the whole function..
-    di.mutex.lock();
-    defer di.mutex.unlock();
-
-    if (di.loaded_macho == null) di.loaded_macho = module.loadMachO(gpa) catch |err| switch (err) {
-        error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| return e,
-        else => return error.ReadFailed,
-    };
-    const loaded_macho = &di.loaded_macho.?;
-
-    const vaddr = address - loaded_macho.vaddr_offset;
-    const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown;
-
-    // offset of `address` from start of `symbol`
-    const address_symbol_offset = vaddr - symbol.addr;
-
-    // Take the symbol name from the N_FUN STAB entry, we're going to
-    // use it if we fail to find the DWARF infos
-    const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0);
-
-    // If any information is missing, we can at least return this from now on.
-    const sym_only_result: std.debug.Symbol = .{
-        .name = stab_symbol,
-        .compile_unit_name = null,
-        .source_location = null,
-    };
-
-    if (symbol.ofile == MachoSymbol.unknown_ofile) {
-        // We don't have STAB info, so can't track down the object file; all we can do is the symbol name.
-        return sym_only_result;
-    }
-
-    const o_file: *DebugInfo.OFile = of: {
-        const gop = try loaded_macho.ofiles.getOrPut(gpa, symbol.ofile);
-        if (!gop.found_existing) {
-            const o_file_path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0);
-            gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch {
-                _ = loaded_macho.ofiles.pop().?;
-                return sym_only_result;
-            };
-        }
-        break :of gop.value_ptr;
-    };
-
-    const symbol_index = o_file.symbols_by_name.getKeyAdapted(
-        @as([]const u8, stab_symbol),
-        @as(DebugInfo.OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }),
-    ) orelse return sym_only_result;
-    const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value;
-
-    const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result;
-
-    return .{
-        .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol,
-        .compile_unit_name = compile_unit.die.getAttrString(
-            &o_file.dwarf,
-            native_endian,
-            std.dwarf.AT.name,
-            o_file.dwarf.section(.debug_str),
-            compile_unit,
-        ) catch |err| switch (err) {
-            error.MissingDebugInfo, error.InvalidDebugInfo => null,
-        },
-        .source_location = o_file.dwarf.getLineNumberInfo(
-            gpa,
-            native_endian,
-            compile_unit,
-            symbol_ofile_vaddr + address_symbol_offset,
-        ) catch null,
-    };
-}
-pub const supports_unwinding: bool = true;
-pub const UnwindContext = std.debug.SelfInfo.DwarfUnwindContext;
-/// Unwind a frame using MachO compact unwind info (from __unwind_info).
-/// If the compact encoding can't encode a way to unwind a frame, it will
-/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available.
-pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize {
-    return unwindFrameInner(module, gpa, di, context) catch |err| switch (err) {
-        error.InvalidDebugInfo,
-        error.MissingDebugInfo,
-        error.UnsupportedDebugInfo,
-        error.ReadFailed,
-        error.OutOfMemory,
-        error.Unexpected,
-        => |e| return e,
-        error.UnsupportedRegister,
-        => return error.UnsupportedDebugInfo,
-        error.InvalidRegister,
-        error.IncompatibleRegisterSize,
-        => return error.InvalidDebugInfo,
-    };
-}
-fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize {
-    const unwind: *DebugInfo.Unwind = u: {
-        di.mutex.lock();
-        defer di.mutex.unlock();
-        if (di.unwind == null) try module.loadUnwindInfo(gpa, di);
-        break :u &di.unwind.?;
-    };
-
-    const unwind_info = unwind.unwind_info orelse return error.MissingDebugInfo;
-    if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidDebugInfo;
-    const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info);
-
-    const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry);
-    if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidDebugInfo;
-    const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]);
-    if (indices.len == 0) return error.MissingDebugInfo;
-
-    // offset of the PC into the `__TEXT` segment
-    const pc_text_offset = context.pc - module.text_base;
-
-    const start_offset: u32, const first_level_offset: u32 = index: {
-        var left: usize = 0;
-        var len: usize = indices.len;
-        while (len > 1) {
-            const mid = left + len / 2;
-            if (pc_text_offset < indices[mid].functionOffset) {
-                len /= 2;
-            } else {
-                left = mid;
-                len -= len / 2;
-            }
-        }
-        break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset };
-    };
-    // An offset of 0 is a sentinel indicating a range does not have unwind info.
-    if (start_offset == 0) return error.MissingDebugInfo;
-
-    const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t);
-    if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidDebugInfo;
-    const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast(
-        unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count],
-    );
-
-    if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidDebugInfo;
-    const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]);
-
-    const entry: struct {
-        function_offset: usize,
-        raw_encoding: u32,
-    } = switch (kind.*) {
-        .REGULAR => entry: {
-            if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidDebugInfo;
-            const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]);
-
-            const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry);
-            if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo;
-            const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast(
-                unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count],
-            );
-            if (entries.len == 0) return error.InvalidDebugInfo;
-
-            var left: usize = 0;
-            var len: usize = entries.len;
-            while (len > 1) {
-                const mid = left + len / 2;
-                if (pc_text_offset < entries[mid].functionOffset) {
-                    len /= 2;
-                } else {
-                    left = mid;
-                    len -= len / 2;
-                }
-            }
-            break :entry .{
-                .function_offset = entries[left].functionOffset,
-                .raw_encoding = entries[left].encoding,
-            };
-        },
-        .COMPRESSED => entry: {
-            if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidDebugInfo;
-            const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]);
-
-            const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry);
-            if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo;
-            const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast(
-                unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count],
-            );
-            if (entries.len == 0) return error.InvalidDebugInfo;
-
-            var left: usize = 0;
-            var len: usize = entries.len;
-            while (len > 1) {
-                const mid = left + len / 2;
-                if (pc_text_offset < first_level_offset + entries[mid].funcOffset) {
-                    len /= 2;
-                } else {
-                    left = mid;
-                    len -= len / 2;
-                }
-            }
-            const entry = entries[left];
-
-            const function_offset = first_level_offset + entry.funcOffset;
-            if (entry.encodingIndex < common_encodings.len) {
-                break :entry .{
-                    .function_offset = function_offset,
-                    .raw_encoding = common_encodings[entry.encodingIndex],
-                };
-            }
-
-            const local_index = entry.encodingIndex - common_encodings.len;
-            const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t);
-            if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidDebugInfo;
-            const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast(
-                unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count],
-            );
-            if (local_index >= local_encodings.len) return error.InvalidDebugInfo;
-            break :entry .{
-                .function_offset = function_offset,
-                .raw_encoding = local_encodings[local_index],
-            };
-        },
-        else => return error.InvalidDebugInfo,
-    };
-
-    if (entry.raw_encoding == 0) return error.MissingDebugInfo;
-
-    const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding);
-    const new_ip = switch (builtin.cpu.arch) {
-        .x86_64 => switch (encoding.mode.x86_64) {
-            .OLD => return error.UnsupportedDebugInfo,
-            .RBP_FRAME => ip: {
-                const frame = encoding.value.x86_64.frame;
-
-                const fp = (try dwarfRegNative(&context.cpu_context, fp_reg_num)).*;
-                const new_sp = fp + 2 * @sizeOf(usize);
-
-                const ip_ptr = fp + @sizeOf(usize);
-                const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*;
-                const new_fp = @as(*const usize, @ptrFromInt(fp)).*;
-
-                (try dwarfRegNative(&context.cpu_context, fp_reg_num)).* = new_fp;
-                (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp;
-                (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip;
-
-                const regs: [5]u3 = .{
-                    frame.reg0,
-                    frame.reg1,
-                    frame.reg2,
-                    frame.reg3,
-                    frame.reg4,
-                };
-                for (regs, 0..) |reg, i| {
-                    if (reg == 0) continue;
-                    const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize);
-                    const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg);
-                    (try dwarfRegNative(&context.cpu_context, reg_number)).* = @as(*const usize, @ptrFromInt(addr)).*;
-                }
-
-                break :ip new_ip;
-            },
-            .STACK_IMMD,
-            .STACK_IND,
-            => ip: {
-                const frameless = encoding.value.x86_64.frameless;
-
-                const sp = (try dwarfRegNative(&context.cpu_context, sp_reg_num)).*;
-                const stack_size: usize = stack_size: {
-                    if (encoding.mode.x86_64 == .STACK_IMMD) {
-                        break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize);
-                    }
-                    // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function.
-                    const sub_offset_addr =
-                        module.text_base +
-                        entry.function_offset +
-                        frameless.stack.indirect.sub_offset;
-                    // `sub_offset_addr` points to the offset of the literal within the instruction
-                    const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*;
-                    break :stack_size sub_operand + @sizeOf(usize) * @as(usize, frameless.stack.indirect.stack_adjust);
-                };
-
-                // Decode the Lehmer-coded sequence of registers.
-                // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h
-
-                // Decode the variable-based permutation number into its digits. Each digit represents
-                // an index into the list of register numbers that weren't yet used in the sequence at
-                // the time the digit was added.
-                const reg_count = frameless.stack_reg_count;
-                const ip_ptr = ip_ptr: {
-                    var digits: [6]u3 = undefined;
-                    var accumulator: usize = frameless.stack_reg_permutation;
-                    var base: usize = 2;
-                    for (0..reg_count) |i| {
-                        const div = accumulator / base;
-                        digits[digits.len - 1 - i] = @intCast(accumulator - base * div);
-                        accumulator = div;
-                        base += 1;
-                    }
-
-                    var registers: [6]u3 = undefined;
-                    var used_indices: [6]bool = @splat(false);
-                    for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| {
-                        var unused_count: u8 = 0;
-                        const unused_index = for (used_indices, 0..) |used, index| {
-                            if (!used) {
-                                if (target_unused_index == unused_count) break index;
-                                unused_count += 1;
-                            }
-                        } else unreachable;
-                        registers[i] = @intCast(unused_index + 1);
-                        used_indices[unused_index] = true;
-                    }
-
-                    var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1);
-                    for (0..reg_count) |i| {
-                        const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]);
-                        (try dwarfRegNative(&context.cpu_context, reg_number)).* = @as(*const usize, @ptrFromInt(reg_addr)).*;
-                        reg_addr += @sizeOf(usize);
-                    }
-
-                    break :ip_ptr reg_addr;
-                };
-
-                const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*;
-                const new_sp = ip_ptr + @sizeOf(usize);
-
-                (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp;
-                (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip;
-
-                break :ip new_ip;
-            },
-            .DWARF => {
-                const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo);
-                return context.unwindFrame(unwind.dwarf_cache, gpa, dwarf, unwind.vmaddr_slide, encoding.value.x86_64.dwarf);
-            },
-        },
-        .aarch64, .aarch64_be => switch (encoding.mode.arm64) {
-            .OLD => return error.UnsupportedDebugInfo,
-            .FRAMELESS => ip: {
-                const sp = (try dwarfRegNative(&context.cpu_context, sp_reg_num)).*;
-                const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16;
-                const new_ip = (try dwarfRegNative(&context.cpu_context, 30)).*;
-                (try dwarfRegNative(&context.cpu_context, sp_reg_num)).* = new_sp;
-                break :ip new_ip;
-            },
-            .DWARF => {
-                const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo);
-                return context.unwindFrame(unwind.dwarf_cache, gpa, dwarf, unwind.vmaddr_slide, encoding.value.arm64.dwarf);
-            },
-            .FRAME => ip: {
-                const frame = encoding.value.arm64.frame;
-
-                const fp = (try dwarfRegNative(&context.cpu_context, fp_reg_num)).*;
-                const ip_ptr = fp + @sizeOf(usize);
-
-                var reg_addr = fp - @sizeOf(usize);
-                inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| {
-                    if (@field(frame.x_reg_pairs, field.name) != 0) {
-                        (try dwarfRegNative(&context.cpu_context, 19 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*;
-                        reg_addr += @sizeOf(usize);
-                        (try dwarfRegNative(&context.cpu_context, 20 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*;
-                        reg_addr += @sizeOf(usize);
-                    }
-                }
-
-                inline for (@typeInfo(@TypeOf(frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| {
-                    if (@field(frame.d_reg_pairs, field.name) != 0) {
-                        // Only the lower half of the 128-bit V registers are restored during unwinding
-                        {
-                            const dest: *align(1) usize = @ptrCast(try context.cpu_context.dwarfRegisterBytes(64 + 8 + i));
-                            dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*;
-                        }
-                        reg_addr += @sizeOf(usize);
-                        {
-                            const dest: *align(1) usize = @ptrCast(try context.cpu_context.dwarfRegisterBytes(64 + 9 + i));
-                            dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*;
-                        }
-                        reg_addr += @sizeOf(usize);
-                    }
-                }
-
-                const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*;
-                const new_fp = @as(*const usize, @ptrFromInt(fp)).*;
-
-                (try dwarfRegNative(&context.cpu_context, fp_reg_num)).* = new_fp;
-                (try dwarfRegNative(&context.cpu_context, ip_reg_num)).* = new_ip;
-
-                break :ip new_ip;
-            },
-        },
-        else => comptime unreachable, // unimplemented
-    };
-
-    const ret_addr = std.debug.stripInstructionPtrAuthCode(new_ip);
-
-    // Like `DwarfUnwindContext.unwindFrame`, adjust our next lookup pc in case the `call` was this
-    // function's last instruction making `ret_addr` one byte past its end.
-    context.pc = ret_addr -| 1;
-
-    return ret_addr;
-}
-pub const DebugInfo = struct {
-    /// Held while checking and/or populating `unwind` or `loaded_macho`.
-    /// Once a field is populated and the pointer `&di.loaded_macho.?` or `&di.unwind.?` has been
-    /// gotten, the lock is released; i.e. it is not held while *using* the loaded info.
-    mutex: std.Thread.Mutex,
-
-    unwind: ?Unwind,
-    loaded_macho: ?LoadedMachO,
-
-    pub const init: DebugInfo = .{
-        .mutex = .{},
-
-        .unwind = null,
-        .loaded_macho = null,
-    };
-
-    pub fn deinit(di: *DebugInfo, gpa: Allocator) void {
-        if (di.loaded_macho) |*loaded_macho| {
-            for (loaded_macho.ofiles.values()) |*ofile| {
-                ofile.dwarf.deinit(gpa);
-                ofile.symbols_by_name.deinit(gpa);
-                posix.munmap(ofile.mapped_memory);
-            }
-            loaded_macho.ofiles.deinit(gpa);
-            gpa.free(loaded_macho.symbols);
-            posix.munmap(loaded_macho.mapped_memory);
-        }
-    }
-
-    const Unwind = struct {
-        /// The slide applied to the `__unwind_info` and `__eh_frame` sections.
-        /// So, `unwind_info.ptr` is this many bytes higher than the section's vmaddr.
-        vmaddr_slide: u64,
-        /// Backed by the in-memory section mapped by the loader.
-        unwind_info: ?[]const u8,
-        /// Backed by the in-memory `__eh_frame` section mapped by the loader.
-        dwarf: ?Dwarf.Unwind,
-        /// This is `undefined` if `dwarf == null`.
-        dwarf_cache: *UnwindContext.Cache,
-    };
-
-    const LoadedMachO = struct {
-        mapped_memory: []align(std.heap.page_size_min) const u8,
-        symbols: []const MachoSymbol,
-        strings: []const u8,
-        /// Key is index into `strings` of the file path.
-        ofiles: std.AutoArrayHashMapUnmanaged(u32, OFile),
-        /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is
-        /// because the segments in the file on disk might differ from the ones in memory. Normally
-        /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying:
-        /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in
-        /// the dyld cache (dyld actually restart itself from cache after loading it), and the two
-        /// versions have (very) different segment base addresses. It's sort of like a large slide
-        /// has been applied to all addresses in memory. For an optimal experience, we consider the
-        /// on-disk vmaddr instead of the in-memory one.
-        vaddr_offset: usize,
-    };
-
-    const OFile = struct {
-        mapped_memory: []align(std.heap.page_size_min) const u8,
-        dwarf: Dwarf,
-        strtab: []const u8,
-        symtab: []align(1) const macho.nlist_64,
-        /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed
-        /// through `SymbolAdapter`, so that the symbol name is used as the logical key.
-        symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true),
-
-        const SymbolAdapter = struct {
-            strtab: []const u8,
-            symtab: []align(1) const macho.nlist_64,
-            pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 {
-                _ = ctx;
-                return @truncate(std.hash.Wyhash.hash(0, sym_name));
-            }
-            pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool {
-                _ = b_index;
-                const b_sym = ctx.symtab[b_sym_index];
-                const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0);
-                return mem.eql(u8, a_sym_name, b_sym_name);
-            }
-        };
-    };
-
-    fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile {
-        const mapped_mem = try mapDebugInfoFile(o_file_path);
-        errdefer posix.munmap(mapped_mem);
-
-        if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo;
-        const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr));
-        if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo;
-
-        const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: {
-            var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null;
-            var symtab_cmd: ?macho.symtab_command = null;
-            var it: macho.LoadCommandIterator = .{
-                .ncmds = hdr.ncmds,
-                .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
-            };
-            while (it.next()) |cmd| switch (cmd.cmd()) {
-                .SEGMENT_64 => seg_cmd = cmd,
-                .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo,
-                else => {},
-            };
-            break :cmds .{
-                seg_cmd orelse return error.MissingDebugInfo,
-                symtab_cmd orelse return error.MissingDebugInfo,
-            };
-        };
-
-        if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo;
-        if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo;
-        const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1];
-
-        const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64);
-        if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo;
-        const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]);
-
-        // TODO handle tentative (common) symbols
-        var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty;
-        defer symbols_by_name.deinit(gpa);
-        try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len));
-        for (symtab, 0..) |sym, sym_index| {
-            if (sym.n_strx == 0) continue;
-            switch (sym.n_type.bits.type) {
-                .undf => continue, // includes tentative symbols
-                .abs => continue,
-                else => {},
-            }
-            const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0);
-            const gop = symbols_by_name.getOrPutAssumeCapacityAdapted(
-                @as([]const u8, sym_name),
-                @as(DebugInfo.OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }),
-            );
-            if (gop.found_existing) return error.InvalidDebugInfo;
-            gop.key_ptr.* = @intCast(sym_index);
-        }
-
-        var sections: Dwarf.SectionArray = @splat(null);
-        for (seg_cmd.getSections()) |sect| {
-            if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue;
-
-            const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| {
-                if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i;
-            } else continue;
-
-            if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo;
-            const section_bytes = mapped_mem[sect.offset..][0..sect.size];
-            sections[section_index] = .{
-                .data = section_bytes,
-                .owned = false,
-            };
-        }
-
-        const missing_debug_info =
-            sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or
-            sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or
-            sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or
-            sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null;
-        if (missing_debug_info) return error.MissingDebugInfo;
-
-        var dwarf: Dwarf = .{ .sections = sections };
-        errdefer dwarf.deinit(gpa);
-        try dwarf.open(gpa, native_endian);
-
-        return .{
-            .mapped_memory = mapped_mem,
-            .dwarf = dwarf,
-            .strtab = strtab,
-            .symtab = symtab,
-            .symbols_by_name = symbols_by_name.move(),
-        };
-    }
-};
-
-const MachoSymbol = struct {
-    strx: u32,
-    addr: u64,
-    /// Value may be `unknown_ofile`.
-    ofile: u32,
-    const unknown_ofile = std.math.maxInt(u32);
-    fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool {
-        _ = context;
-        return lhs.addr < rhs.addr;
-    }
-    /// Assumes that `symbols` is sorted in order of ascending `addr`.
-    fn find(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol {
-        if (symbols.len == 0) return null; // no potential match
-        if (address < symbols[0].addr) return null; // address is before the lowest-address symbol
-        var left: usize = 0;
-        var len: usize = symbols.len;
-        while (len > 1) {
-            const mid = left + len / 2;
-            if (address < symbols[mid].addr) {
-                len /= 2;
-            } else {
-                left = mid;
-                len -= len / 2;
-            }
-        }
-        return &symbols[left];
-    }
-
-    test find {
-        const symbols: []const MachoSymbol = &.{
-            .{ .addr = 100, .strx = undefined, .ofile = undefined },
-            .{ .addr = 200, .strx = undefined, .ofile = undefined },
-            .{ .addr = 300, .strx = undefined, .ofile = undefined },
-        };
-
-        try testing.expectEqual(null, find(symbols, 0));
-        try testing.expectEqual(null, find(symbols, 99));
-        try testing.expectEqual(&symbols[0], find(symbols, 100).?);
-        try testing.expectEqual(&symbols[0], find(symbols, 150).?);
-        try testing.expectEqual(&symbols[0], find(symbols, 199).?);
-
-        try testing.expectEqual(&symbols[1], find(symbols, 200).?);
-        try testing.expectEqual(&symbols[1], find(symbols, 250).?);
-        try testing.expectEqual(&symbols[1], find(symbols, 299).?);
-
-        try testing.expectEqual(&symbols[2], find(symbols, 300).?);
-        try testing.expectEqual(&symbols[2], find(symbols, 301).?);
-        try testing.expectEqual(&symbols[2], find(symbols, 5000).?);
-    }
-};
-test {
-    _ = MachoSymbol;
-}
-
-const ip_reg_num = Dwarf.ipRegNum(builtin.target.cpu.arch).?;
-const fp_reg_num = Dwarf.fpRegNum(builtin.target.cpu.arch);
-const sp_reg_num = Dwarf.spRegNum(builtin.target.cpu.arch);
-
-/// Uses `mmap` to map the file at `path` into memory.
-fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 {
-    const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) {
-        error.FileNotFound => return error.MissingDebugInfo,
-        else => return error.ReadFailed,
-    };
-    defer file.close();
-
-    const file_len = std.math.cast(usize, try file.getEndPos()) orelse return error.InvalidDebugInfo;
-
-    return posix.mmap(
-        null,
-        file_len,
-        posix.PROT.READ,
-        .{ .TYPE = .SHARED },
-        file.handle,
-        0,
-    );
-}
-
-const DarwinModule = @This();
-
-const std = @import("../../std.zig");
-const Allocator = std.mem.Allocator;
-const Dwarf = std.debug.Dwarf;
-const assert = std.debug.assert;
-const macho = std.macho;
-const mem = std.mem;
-const posix = std.posix;
-const testing = std.testing;
-const Error = std.debug.SelfInfo.Error;
-const dwarfRegNative = std.debug.SelfInfo.DwarfUnwindContext.regNative;
-
-const builtin = @import("builtin");
-const native_endian = builtin.target.cpu.arch.endian();
author	mlugg <mlugg@mlugg.co.uk>	2025-09-30 11:06:21 +0100
committer	mlugg <mlugg@mlugg.co.uk>	2025-09-30 14:18:26 +0100
commit	1120546f72405ac263dce7414eb71ca4e6c96fc8 (patch)
tree	4a6f90029d8feff983889a133326fbe2a4e3465d /lib/std/debug/SelfInfo/DarwinModule.zig
parent	12ceb896faebf25195d8b360e4972dd2bf23ede1 (diff)
download	zig-1120546f72405ac263dce7414eb71ca4e6c96fc8.tar.gz zig-1120546f72405ac263dce7414eb71ca4e6c96fc8.zip