From 0caca625ebad92495a758e3121c91ba1f32774dd Mon Sep 17 00:00:00 2001
From: Matthew Lugg <mlugg@mlugg.co.uk>
Date: Wed, 19 Nov 2025 12:01:49 +0000
Subject: std.debug: split up Mach-O debug info handling

Like ELF, we now have `std.debug.MachOFile` for the host-independent
parts, and `std.debug.SelfInfo.MachO` for logic requiring the file to
correspond to the running program.
---
 lib/std/debug/SelfInfo/MachO.zig | 444 +++++----------------------------------
 1 file changed, 47 insertions(+), 397 deletions(-)

(limited to 'lib/std/debug/SelfInfo/MachO.zig')

diff --git a/lib/std/debug/SelfInfo/MachO.zig b/lib/std/debug/SelfInfo/MachO.zig
index f7eb4465c5..83adb6dcd4 100644
--- a/lib/std/debug/SelfInfo/MachO.zig
+++ b/lib/std/debug/SelfInfo/MachO.zig
@@ -1,12 +1,10 @@
 mutex: std.Thread.Mutex,
 /// Accessed through `Module.Adapter`.
 modules: std.ArrayHashMapUnmanaged(Module, void, Module.Context, false),
-ofiles: std.StringArrayHashMapUnmanaged(?OFile),
 
 pub const init: SelfInfo = .{
     .mutex = .{},
     .modules = .empty,
-    .ofiles = .empty,
 };
 pub fn deinit(si: *SelfInfo, gpa: Allocator) void {
     for (si.modules.keys()) |*module| {
@@ -14,20 +12,12 @@ pub fn deinit(si: *SelfInfo, gpa: Allocator) void {
             const u = &(module.unwind orelse break :unwind catch break :unwind);
             if (u.dwarf) |*dwarf| dwarf.deinit(gpa);
         }
-        loaded: {
-            const l = &(module.loaded_macho orelse break :loaded catch break :loaded);
-            gpa.free(l.symbols);
-            posix.munmap(l.mapped_memory);
+        file: {
+            const f = &(module.file orelse break :file catch break :file);
+            f.deinit(gpa);
         }
     }
-    for (si.ofiles.values()) |*opt_ofile| {
-        const ofile = &(opt_ofile.* orelse continue);
-        ofile.dwarf.deinit(gpa);
-        ofile.symbols_by_name.deinit(gpa);
-        posix.munmap(ofile.mapped_memory);
-    }
     si.modules.deinit(gpa);
-    si.ofiles.deinit(gpa);
 }
 
 pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!std.debug.Symbol {
@@ -35,67 +25,55 @@ pub fn getSymbol(si: *SelfInfo, gpa: Allocator, io: Io, address: usize) Error!st
     const module = try si.findModule(gpa, address);
     defer si.mutex.unlock();
 
-    const loaded_macho = try module.getLoadedMachO(gpa);
-
-    const vaddr = address - loaded_macho.vaddr_offset;
-    const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown;
+    const file = try module.getFile(gpa);
 
-    // offset of `address` from start of `symbol`
-    const address_symbol_offset = vaddr - symbol.addr;
+    // This is not necessarily the same as the vmaddr_slide that dyld would report. This is
+    // because the segments in the file on disk might differ from the ones in memory. Normally
+    // we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying:
+    // it exists on disk (necessarily, because the kernel needs to load it!), but is also in
+    // the dyld cache (dyld actually restart itself from cache after loading it), and the two
+    // versions have (very) different segment base addresses. It's sort of like a large slide
+    // has been applied to all addresses in memory. For an optimal experience, we consider the
+    // on-disk vmaddr instead of the in-memory one.
+    const vaddr_offset = module.text_base - file.text_vmaddr;
 
-    // Take the symbol name from the N_FUN STAB entry, we're going to
-    // use it if we fail to find the DWARF infos
-    const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0);
+    const vaddr = address - vaddr_offset;
 
-    // If any information is missing, we can at least return this from now on.
-    const sym_only_result: std.debug.Symbol = .{
-        .name = stab_symbol,
-        .compile_unit_name = null,
-        .source_location = null,
+    const ofile_dwarf, const ofile_vaddr = file.getDwarfForAddress(gpa, vaddr) catch {
+        // Return at least the symbol name if available.
+        return .{
+            .name = try file.lookupSymbolName(vaddr),
+            .compile_unit_name = null,
+            .source_location = null,
+        };
     };
 
-    if (symbol.ofile == MachoSymbol.unknown_ofile) {
-        // We don't have STAB info, so can't track down the object file; all we can do is the symbol name.
-        return sym_only_result;
-    }
-
-    const o_file: *OFile = of: {
-        const path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0);
-        const gop = try si.ofiles.getOrPut(gpa, path);
-        if (!gop.found_existing) {
-            gop.value_ptr.* = loadOFile(gpa, path) catch null;
-        }
-        if (gop.value_ptr.*) |*o_file| {
-            break :of o_file;
-        } else {
-            return sym_only_result;
-        }
+    const compile_unit = ofile_dwarf.findCompileUnit(native_endian, ofile_vaddr) catch {
+        // Return at least the symbol name if available.
+        return .{
+            .name = try file.lookupSymbolName(vaddr),
+            .compile_unit_name = null,
+            .source_location = null,
+        };
     };
 
-    const symbol_index = o_file.symbols_by_name.getKeyAdapted(
-        @as([]const u8, stab_symbol),
-        @as(OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }),
-    ) orelse return sym_only_result;
-    const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value;
-
-    const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result;
-
     return .{
-        .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol,
+        .name = ofile_dwarf.getSymbolName(ofile_vaddr) orelse
+            try file.lookupSymbolName(vaddr),
         .compile_unit_name = compile_unit.die.getAttrString(
-            &o_file.dwarf,
+            ofile_dwarf,
             native_endian,
             std.dwarf.AT.name,
-            o_file.dwarf.section(.debug_str),
+            ofile_dwarf.section(.debug_str),
             compile_unit,
         ) catch |err| switch (err) {
             error.MissingDebugInfo, error.InvalidDebugInfo => null,
         },
-        .source_location = o_file.dwarf.getLineNumberInfo(
+        .source_location = ofile_dwarf.getLineNumberInfo(
             gpa,
             native_endian,
             compile_unit,
-            symbol_ofile_vaddr + address_symbol_offset,
+            ofile_vaddr,
         ) catch null,
     };
 }
@@ -447,7 +425,7 @@ fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) Error!*Module {
             .text_base = @intFromPtr(info.fbase),
             .name = std.mem.span(info.fname),
             .unwind = null,
-            .loaded_macho = null,
+            .file = null,
         };
     }
     return gop.key_ptr;
@@ -457,7 +435,7 @@ const Module = struct {
     text_base: usize,
     name: []const u8,
     unwind: ?(Error!Unwind),
-    loaded_macho: ?(Error!LoadedMachO),
+    file: ?(Error!MachOFile),
 
     const Adapter = struct {
         pub fn hash(_: Adapter, text_base: usize) u32 {
@@ -488,34 +466,17 @@ const Module = struct {
         dwarf: ?Dwarf.Unwind,
     };
 
-    const LoadedMachO = struct {
-        mapped_memory: []align(std.heap.page_size_min) const u8,
-        symbols: []const MachoSymbol,
-        strings: []const u8,
-        /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is
-        /// because the segments in the file on disk might differ from the ones in memory. Normally
-        /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying:
-        /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in
-        /// the dyld cache (dyld actually restart itself from cache after loading it), and the two
-        /// versions have (very) different segment base addresses. It's sort of like a large slide
-        /// has been applied to all addresses in memory. For an optimal experience, we consider the
-        /// on-disk vmaddr instead of the in-memory one.
-        vaddr_offset: usize,
-    };
-
     fn getUnwindInfo(module: *Module, gpa: Allocator) Error!*Unwind {
         if (module.unwind == null) module.unwind = loadUnwindInfo(module, gpa);
         return if (module.unwind.?) |*unwind| unwind else |err| err;
     }
     fn loadUnwindInfo(module: *const Module, gpa: Allocator) Error!Unwind {
-        const header: *std.macho.mach_header = @ptrFromInt(module.text_base);
+        const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base);
 
-        var it: macho.LoadCommandIterator = .{
-            .ncmds = header.ncmds,
-            .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds],
-        };
-        const sections, const text_vmaddr = while (it.next()) |load_cmd| {
-            if (load_cmd.cmd() != .SEGMENT_64) continue;
+        const raw_macho: [*]u8 = @ptrCast(header);
+        var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable;
+        const sections, const text_vmaddr = while (it.next() catch unreachable) |load_cmd| {
+            if (load_cmd.hdr.cmd != .SEGMENT_64) continue;
             const segment_cmd = load_cmd.cast(macho.segment_command_64).?;
             if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue;
             break .{ load_cmd.getSections(), segment_cmd.vmaddr };
@@ -568,237 +529,15 @@ const Module = struct {
         };
     }
 
-    fn getLoadedMachO(module: *Module, gpa: Allocator) Error!*LoadedMachO {
-        if (module.loaded_macho == null) module.loaded_macho = loadMachO(module, gpa) catch |err| switch (err) {
-            error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| e,
-            else => error.ReadFailed,
-        };
-        return if (module.loaded_macho.?) |*lm| lm else |err| err;
-    }
-    fn loadMachO(module: *const Module, gpa: Allocator) Error!LoadedMachO {
-        const all_mapped_memory = try mapDebugInfoFile(module.name);
-        errdefer posix.munmap(all_mapped_memory);
-
-        // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal
-        // binary": a simple file format which contains Mach-O binaries for multiple targets. For
-        // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images
-        // for both ARM64 macOS and x86_64 macOS.
-        if (all_mapped_memory.len < 4) return error.InvalidDebugInfo;
-        const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*;
-        // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`.
-        const mapped_macho = switch (magic) {
-            macho.MH_MAGIC_64 => all_mapped_memory,
-
-            macho.FAT_CIGAM => mapped_macho: {
-                // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing
-                // is big-endian, so we'll be swapping some bytes.
-                if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo;
-                const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr);
-                const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header));
-                const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)];
-                const native_cpu_type = switch (builtin.cpu.arch) {
-                    .x86_64 => macho.CPU_TYPE_X86_64,
-                    .aarch64 => macho.CPU_TYPE_ARM64,
-                    else => comptime unreachable,
-                };
-                for (archs) |*arch| {
-                    if (@byteSwap(arch.cputype) != native_cpu_type) continue;
-                    const offset = @byteSwap(arch.offset);
-                    const size = @byteSwap(arch.size);
-                    break :mapped_macho all_mapped_memory[offset..][0..size];
-                }
-                // Our native architecture was not present in the fat binary.
-                return error.MissingDebugInfo;
-            },
-
-            // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It
-            // will be fairly easy to add support here if necessary; it's very similar to above.
-            macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo,
-
-            else => return error.InvalidDebugInfo,
-        };
-
-        const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr));
-        if (hdr.magic != macho.MH_MAGIC_64)
-            return error.InvalidDebugInfo;
-
-        const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: {
-            var it: macho.LoadCommandIterator = .{
-                .ncmds = hdr.ncmds,
-                .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
-            };
-            var symtab: ?macho.symtab_command = null;
-            var text_vmaddr: ?u64 = null;
-            while (it.next()) |cmd| switch (cmd.cmd()) {
-                .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo,
-                .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| {
-                    if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue;
-                    text_vmaddr = seg_cmd.vmaddr;
-                },
-                else => {},
-            };
-            break :lc_iter .{
-                symtab orelse return error.MissingDebugInfo,
-                text_vmaddr orelse return error.MissingDebugInfo,
-            };
-        };
-
-        const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]);
-        const syms = syms_ptr[0..symtab.nsyms];
-        const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1];
-
-        var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len);
-        defer symbols.deinit(gpa);
-
-        // This map is temporary; it is used only to detect duplicates here. This is
-        // necessary because we prefer to use STAB ("symbolic debugging table") symbols,
-        // but they might not be present, so we track normal symbols too.
-        // Indices match 1-1 with those of `symbols`.
-        var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty;
-        defer symbol_names.deinit(gpa);
-        try symbol_names.ensureUnusedCapacity(gpa, syms.len);
-
-        var ofile: u32 = undefined;
-        var last_sym: MachoSymbol = undefined;
-        var state: enum {
-            init,
-            oso_open,
-            oso_close,
-            bnsym,
-            fun_strx,
-            fun_size,
-            ensym,
-        } = .init;
-
-        for (syms) |*sym| {
-            if (sym.n_type.bits.is_stab == 0) {
-                if (sym.n_strx == 0) continue;
-                switch (sym.n_type.bits.type) {
-                    .undf, .pbud, .indr, .abs, _ => continue,
-                    .sect => {
-                        const name = std.mem.sliceTo(strings[sym.n_strx..], 0);
-                        const gop = symbol_names.getOrPutAssumeCapacity(name);
-                        if (!gop.found_existing) {
-                            assert(gop.index == symbols.items.len);
-                            symbols.appendAssumeCapacity(.{
-                                .strx = sym.n_strx,
-                                .addr = sym.n_value,
-                                .ofile = MachoSymbol.unknown_ofile,
-                            });
-                        }
-                    },
-                }
-                continue;
-            }
-
-            // TODO handle globals N_GSYM, and statics N_STSYM
-            switch (sym.n_type.stab) {
-                .oso => switch (state) {
-                    .init, .oso_close => {
-                        state = .oso_open;
-                        ofile = sym.n_strx;
-                    },
-                    else => return error.InvalidDebugInfo,
-                },
-                .bnsym => switch (state) {
-                    .oso_open, .ensym => {
-                        state = .bnsym;
-                        last_sym = .{
-                            .strx = 0,
-                            .addr = sym.n_value,
-                            .ofile = ofile,
-                        };
-                    },
-                    else => return error.InvalidDebugInfo,
-                },
-                .fun => switch (state) {
-                    .bnsym => {
-                        state = .fun_strx;
-                        last_sym.strx = sym.n_strx;
-                    },
-                    .fun_strx => {
-                        state = .fun_size;
-                    },
-                    else => return error.InvalidDebugInfo,
-                },
-                .ensym => switch (state) {
-                    .fun_size => {
-                        state = .ensym;
-                        if (last_sym.strx != 0) {
-                            const name = std.mem.sliceTo(strings[last_sym.strx..], 0);
-                            const gop = symbol_names.getOrPutAssumeCapacity(name);
-                            if (!gop.found_existing) {
-                                assert(gop.index == symbols.items.len);
-                                symbols.appendAssumeCapacity(last_sym);
-                            } else {
-                                symbols.items[gop.index] = last_sym;
-                            }
-                        }
-                    },
-                    else => return error.InvalidDebugInfo,
-                },
-                .so => switch (state) {
-                    .init, .oso_close => {},
-                    .oso_open, .ensym => {
-                        state = .oso_close;
-                    },
-                    else => return error.InvalidDebugInfo,
-                },
-                else => {},
-            }
-        }
-
-        switch (state) {
-            .init => {
-                // Missing STAB symtab entries is still okay, unless there were also no normal symbols.
-                if (symbols.items.len == 0) return error.MissingDebugInfo;
-            },
-            .oso_close => {},
-            else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab
-        }
-
-        const symbols_slice = try symbols.toOwnedSlice(gpa);
-        errdefer gpa.free(symbols_slice);
-
-        // Even though lld emits symbols in ascending order, this debug code
-        // should work for programs linked in any valid way.
-        // This sort is so that we can binary search later.
-        mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan);
-
-        return .{
-            .mapped_memory = all_mapped_memory,
-            .symbols = symbols_slice,
-            .strings = strings,
-            .vaddr_offset = module.text_base - text_vmaddr,
+    fn getFile(module: *Module, gpa: Allocator) Error!*MachOFile {
+        if (module.file == null) module.file = MachOFile.load(gpa, module.name, builtin.cpu.arch) catch |err| switch (err) {
+            error.InvalidMachO, error.InvalidDwarf => error.InvalidDebugInfo,
+            error.MissingDebugInfo, error.OutOfMemory, error.UnsupportedDebugInfo, error.ReadFailed => |e| e,
         };
+        return if (module.file.?) |*f| f else |err| err;
     }
 };
 
-const OFile = struct {
-    mapped_memory: []align(std.heap.page_size_min) const u8,
-    dwarf: Dwarf,
-    strtab: []const u8,
-    symtab: []align(1) const macho.nlist_64,
-    /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed
-    /// through `SymbolAdapter`, so that the symbol name is used as the logical key.
-    symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true),
-
-    const SymbolAdapter = struct {
-        strtab: []const u8,
-        symtab: []align(1) const macho.nlist_64,
-        pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 {
-            _ = ctx;
-            return @truncate(std.hash.Wyhash.hash(0, sym_name));
-        }
-        pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool {
-            _ = b_index;
-            const b_sym = ctx.symtab[b_sym_index];
-            const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0);
-            return mem.eql(u8, a_sym_name, b_sym_name);
-        }
-    };
-};
-
 const MachoSymbol = struct {
     strx: u32,
     addr: u64,
@@ -880,101 +619,12 @@ fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8
     };
 }
 
-fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile {
-    const mapped_mem = try mapDebugInfoFile(o_file_path);
-    errdefer posix.munmap(mapped_mem);
-
-    if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo;
-    const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr));
-    if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo;
-
-    const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: {
-        var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null;
-        var symtab_cmd: ?macho.symtab_command = null;
-        var it: macho.LoadCommandIterator = .{
-            .ncmds = hdr.ncmds,
-            .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds],
-        };
-        while (it.next()) |cmd| switch (cmd.cmd()) {
-            .SEGMENT_64 => seg_cmd = cmd,
-            .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo,
-            else => {},
-        };
-        break :cmds .{
-            seg_cmd orelse return error.MissingDebugInfo,
-            symtab_cmd orelse return error.MissingDebugInfo,
-        };
-    };
-
-    if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo;
-    if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo;
-    const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1];
-
-    const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64);
-    if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo;
-    const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]);
-
-    // TODO handle tentative (common) symbols
-    var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty;
-    defer symbols_by_name.deinit(gpa);
-    try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len));
-    for (symtab, 0..) |sym, sym_index| {
-        if (sym.n_strx == 0) continue;
-        switch (sym.n_type.bits.type) {
-            .undf => continue, // includes tentative symbols
-            .abs => continue,
-            else => {},
-        }
-        const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0);
-        const gop = symbols_by_name.getOrPutAssumeCapacityAdapted(
-            @as([]const u8, sym_name),
-            @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }),
-        );
-        if (gop.found_existing) return error.InvalidDebugInfo;
-        gop.key_ptr.* = @intCast(sym_index);
-    }
-
-    var sections: Dwarf.SectionArray = @splat(null);
-    for (seg_cmd.getSections()) |sect| {
-        if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue;
-
-        const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| {
-            if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i;
-        } else continue;
-
-        if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo;
-        const section_bytes = mapped_mem[sect.offset..][0..sect.size];
-        sections[section_index] = .{
-            .data = section_bytes,
-            .owned = false,
-        };
-    }
-
-    const missing_debug_info =
-        sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or
-        sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or
-        sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or
-        sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null;
-    if (missing_debug_info) return error.MissingDebugInfo;
-
-    var dwarf: Dwarf = .{ .sections = sections };
-    errdefer dwarf.deinit(gpa);
-    try dwarf.open(gpa, native_endian);
-
-    return .{
-        .mapped_memory = mapped_mem,
-        .dwarf = dwarf,
-        .strtab = strtab,
-        .symtab = symtab,
-        .symbols_by_name = symbols_by_name.move(),
-    };
-}
-
 const std = @import("std");
 const Io = std.Io;
 const Allocator = std.mem.Allocator;
 const Dwarf = std.debug.Dwarf;
 const Error = std.debug.SelfInfoError;
+const MachOFile = std.debug.MachOFile;
 const assert = std.debug.assert;
 const posix = std.posix;
 const macho = std.macho;
-- 
cgit v1.2.3


From 010dcd6a9b64d5bd13579a4b0c4c70a5aee5c967 Mon Sep 17 00:00:00 2001
From: Matthew Lugg <mlugg@mlugg.co.uk>
Date: Wed, 19 Nov 2025 12:55:17 +0000
Subject: fuzzer: account for runtime address slide

This is relevant to PIEs, which are notably enabled by default on macOS.
The build system needs to only see virtual addresses, that is, those
which do not have the slide applied; but the fuzzer itself naturally
sees relocated addresses (i.e. with the slide applied). We just need to
subtract the slide when we communicate addresses to the build system.
---
 lib/compiler/test_runner.zig       |  2 +-
 lib/fuzzer.zig                     | 34 ++++++++++++++++++++++++++++------
 lib/std/Build/abi.zig              |  1 +
 lib/std/debug.zig                  |  2 +-
 lib/std/debug/SelfInfo/Elf.zig     |  5 +++++
 lib/std/debug/SelfInfo/MachO.zig   | 14 ++++++++++++++
 lib/std/debug/SelfInfo/Windows.zig |  6 ++++++
 7 files changed, 56 insertions(+), 8 deletions(-)

(limited to 'lib/std/debug/SelfInfo/MachO.zig')

diff --git a/lib/compiler/test_runner.zig b/lib/compiler/test_runner.zig
index 0d6f451947..054fe1eb27 100644
--- a/lib/compiler/test_runner.zig
+++ b/lib/compiler/test_runner.zig
@@ -184,7 +184,7 @@ fn mainServer() !void {
                 const test_fn = builtin.test_functions[index];
                 const entry_addr = @intFromPtr(test_fn.func);
 
-                try server.serveU64Message(.fuzz_start_addr, entry_addr);
+                try server.serveU64Message(.fuzz_start_addr, fuzz_abi.fuzzer_unslide_address(entry_addr));
                 defer if (testing.allocator_instance.deinit() == .leak) std.process.exit(1);
                 is_fuzz_test = false;
                 fuzz_test_index = index;
diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig
index 6b7a846e4c..5c452340f6 100644
--- a/lib/fuzzer.zig
+++ b/lib/fuzzer.zig
@@ -116,13 +116,18 @@ const Executable = struct {
                 "failed to init memory map for coverage file '{s}': {t}",
                 .{ &coverage_file_name, e },
             );
-            map.appendSliceAssumeCapacity(mem.asBytes(&abi.SeenPcsHeader{
+            map.appendSliceAssumeCapacity(@ptrCast(&abi.SeenPcsHeader{
                 .n_runs = 0,
                 .unique_runs = 0,
                 .pcs_len = pcs.len,
             }));
             map.appendNTimesAssumeCapacity(0, pc_bitset_usizes * @sizeOf(usize));
-            map.appendSliceAssumeCapacity(mem.sliceAsBytes(pcs));
+            // Relocations have been applied to `pcs` so it contains runtime addresses (with slide
+            // applied). We need to translate these to the virtual addresses as on disk.
+            for (pcs) |pc| {
+                const pc_vaddr = fuzzer_unslide_address(pc);
+                map.appendSliceAssumeCapacity(@ptrCast(&pc_vaddr));
+            }
             return map;
         } else {
             const size = coverage_file.getEndPos() catch |e| panic(
@@ -215,7 +220,16 @@ const Executable = struct {
             .{ self.pc_counters.len, pcs.len },
         );
 
-        self.pc_digest = std.hash.Wyhash.hash(0, mem.sliceAsBytes(pcs));
+        self.pc_digest = digest: {
+            // Relocations have been applied to `pcs` so it contains runtime addresses (with slide
+            // applied). We need to translate these to the virtual addresses as on disk.
+            var h: std.hash.Wyhash = .init(0);
+            for (pcs) |pc| {
+                const pc_vaddr = fuzzer_unslide_address(pc);
+                h.update(@ptrCast(&pc_vaddr));
+            }
+            break :digest h.final();
+        };
         self.shared_seen_pcs = getCoverageFile(cache_dir, pcs, self.pc_digest);
 
         return self;
@@ -622,6 +636,14 @@ export fn fuzzer_main(limit_kind: abi.LimitKind, amount: u64) void {
     }
 }
 
+export fn fuzzer_unslide_address(addr: usize) usize {
+    const si = std.debug.getSelfDebugInfo() catch @compileError("unsupported");
+    const slide = si.getModuleSlide(std.debug.getDebugInfoAllocator(), addr) catch |err| {
+        std.debug.panic("failed to find virtual address slide: {t}", .{err});
+    };
+    return addr - slide;
+}
+
 /// Helps determine run uniqueness in the face of recursion.
 /// Currently not used by the fuzzer.
 export threadlocal var __sancov_lowest_stack: usize = 0;
@@ -1185,13 +1207,13 @@ const Mutation = enum {
                         const j = rng.uintAtMostBiased(usize, corpus[splice_i].len - len);
                         out.appendSliceAssumeCapacity(corpus[splice_i][j..][0..len]);
                     },
-                    .@"const" => out.appendSliceAssumeCapacity(mem.asBytes(
+                    .@"const" => out.appendSliceAssumeCapacity(@ptrCast(
                         &data_ctx[rng.uintLessThanBiased(usize, data_ctx.len)],
                     )),
-                    .small => out.appendSliceAssumeCapacity(mem.asBytes(
+                    .small => out.appendSliceAssumeCapacity(@ptrCast(
                         &mem.nativeTo(data_ctx[0], rng.int(SmallValue), data_ctx[1]),
                     )),
-                    .few => out.appendSliceAssumeCapacity(mem.asBytes(
+                    .few => out.appendSliceAssumeCapacity(@ptrCast(
                         &fewValue(rng, data_ctx[0], data_ctx[1]),
                     )),
                 }
diff --git a/lib/std/Build/abi.zig b/lib/std/Build/abi.zig
index eb8f6cb1be..b7c1e7379d 100644
--- a/lib/std/Build/abi.zig
+++ b/lib/std/Build/abi.zig
@@ -145,6 +145,7 @@ pub const fuzz = struct {
     pub extern fn fuzzer_init_test(test_one: TestOne, unit_test_name: Slice) void;
     pub extern fn fuzzer_new_input(bytes: Slice) void;
     pub extern fn fuzzer_main(limit_kind: LimitKind, amount: u64) void;
+    pub extern fn fuzzer_unslide_address(addr: usize) usize;
 
     pub const Slice = extern struct {
         ptr: [*]const u8,
diff --git a/lib/std/debug.zig b/lib/std/debug.zig
index 182ea94766..29c0731f4e 100644
--- a/lib/std/debug.zig
+++ b/lib/std/debug.zig
@@ -1367,7 +1367,7 @@ test printLineFromFile {
 
 /// The returned allocator should be thread-safe if the compilation is multi-threaded, because
 /// multiple threads could capture and/or print stack traces simultaneously.
-fn getDebugInfoAllocator() Allocator {
+pub fn getDebugInfoAllocator() Allocator {
     // Allow overriding the debug info allocator by exposing `root.debug.getDebugInfoAllocator`.
     if (@hasDecl(root, "debug") and @hasDecl(root.debug, "getDebugInfoAllocator")) {
         return root.debug.getDebugInfoAllocator();
diff --git a/lib/std/debug/SelfInfo/Elf.zig b/lib/std/debug/SelfInfo/Elf.zig
index 5036d40197..59c0b42451 100644
--- a/lib/std/debug/SelfInfo/Elf.zig
+++ b/lib/std/debug/SelfInfo/Elf.zig
@@ -80,6 +80,11 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons
     if (module.name.len == 0) return error.MissingDebugInfo;
     return module.name;
 }
+pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize {
+    const module = try si.findModule(gpa, address, .shared);
+    defer si.rwlock.unlockShared();
+    return module.load_offset;
+}
 
 pub const can_unwind: bool = s: {
     // The DWARF code can't deal with ILP32 ABIs yet: https://github.com/ziglang/zig/issues/25447
diff --git a/lib/std/debug/SelfInfo/MachO.zig b/lib/std/debug/SelfInfo/MachO.zig
index 83adb6dcd4..94d50bbf77 100644
--- a/lib/std/debug/SelfInfo/MachO.zig
+++ b/lib/std/debug/SelfInfo/MachO.zig
@@ -82,6 +82,20 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons
     defer si.mutex.unlock();
     return module.name;
 }
+pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize {
+    const module = try si.findModule(gpa, address);
+    defer si.mutex.unlock();
+    const header: *std.macho.mach_header_64 = @ptrFromInt(module.text_base);
+    const raw_macho: [*]u8 = @ptrCast(header);
+    var it = macho.LoadCommandIterator.init(header, raw_macho[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds]) catch unreachable;
+    const text_vmaddr = while (it.next() catch unreachable) |load_cmd| {
+        if (load_cmd.hdr.cmd != .SEGMENT_64) continue;
+        const segment_cmd = load_cmd.cast(macho.segment_command_64).?;
+        if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue;
+        break segment_cmd.vmaddr;
+    } else unreachable;
+    return module.text_base - text_vmaddr;
+}
 
 pub const can_unwind: bool = true;
 pub const UnwindContext = std.debug.Dwarf.SelfUnwinder;
diff --git a/lib/std/debug/SelfInfo/Windows.zig b/lib/std/debug/SelfInfo/Windows.zig
index 70009217db..306287a9e7 100644
--- a/lib/std/debug/SelfInfo/Windows.zig
+++ b/lib/std/debug/SelfInfo/Windows.zig
@@ -33,6 +33,12 @@ pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]cons
     const module = try si.findModule(gpa, address);
     return module.name;
 }
+pub fn getModuleSlide(si: *SelfInfo, gpa: Allocator, address: usize) Error!usize {
+    si.mutex.lock();
+    defer si.mutex.unlock();
+    const module = try si.findModule(gpa, address);
+    return module.base_address;
+}
 
 pub const can_unwind: bool = switch (builtin.cpu.arch) {
     else => true,
-- 
cgit v1.2.3