diff options
| author | mlugg <mlugg@mlugg.co.uk> | 2025-09-01 16:50:39 +0100 |
|---|---|---|
| committer | mlugg <mlugg@mlugg.co.uk> | 2025-09-30 13:44:49 +0100 |
| commit | b750e7cf9e2a1225b20ef7fdf53df9ef97cf8065 (patch) | |
| tree | d4760dd9e1279db621fbf4318264f951117a2172 /lib/std/debug | |
| parent | b706949736fe67e104a14ac1dcaac8b7eb1cc33f (diff) | |
| download | zig-b750e7cf9e2a1225b20ef7fdf53df9ef97cf8065.tar.gz zig-b750e7cf9e2a1225b20ef7fdf53df9ef97cf8065.zip | |
change one million things
Diffstat (limited to 'lib/std/debug')
| -rw-r--r-- | lib/std/debug/Dwarf.zig | 83 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/Unwind.zig | 1002 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/Unwind/VirtualMachine.zig | 298 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/call_frame.zig | 36 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo.zig | 2845 |
5 files changed, 1931 insertions, 2333 deletions
diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index e0d74172da..8d1087b6ca 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -78,17 +78,6 @@ pub const Section = struct { debug_addr, debug_names, }; - - // For sections that are not memory mapped by the loader, this is an offset - // from `data.ptr` to where the section would have been mapped. Otherwise, - // `data` is directly backed by the section and the offset is zero. - pub fn virtualOffset(self: Section, base_address: usize) i64 { - return if (self.virtual_address) |va| - @as(i64, @intCast(base_address + va)) - - @as(i64, @intCast(@intFromPtr(self.data.ptr))) - else - 0; - } }; pub const Abbrev = struct { @@ -342,10 +331,6 @@ pub fn section(di: Dwarf, dwarf_section: Section.Id) ?[]const u8 { return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.data else null; } -pub fn sectionVirtualOffset(di: Dwarf, dwarf_section: Section.Id, base_address: usize) ?i64 { - return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.virtualOffset(base_address) else null; -} - pub fn deinit(di: *Dwarf, gpa: Allocator) void { for (di.sections) |opt_section| { if (opt_section) |s| if (s.owned) gpa.free(s.data); @@ -364,8 +349,6 @@ pub fn deinit(di: *Dwarf, gpa: Allocator) void { } di.compile_unit_list.deinit(gpa); di.func_list.deinit(gpa); - di.cie_map.deinit(gpa); - di.fde_list.deinit(gpa); di.ranges.deinit(gpa); di.* = undefined; } @@ -983,8 +966,8 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, endian: Endian, compile_unit: }, 0, }; - _ = addr_size; - _ = seg_size; + if (seg_size != 0) return bad(); // unsupported + _ = addr_size; // TODO: ignoring this is incorrect, we should use it to decide address lengths const prologue_length = try readAddress(&fr, unit_header.format, endian); const prog_start_offset = fr.seek + prologue_length; @@ -1472,44 +1455,27 @@ pub const ElfModule = struct { mapped_memory: ?[]align(std.heap.page_size_min) const u8, external_mapped_memory: ?[]align(std.heap.page_size_min) const u8, - pub const Lookup = struct { - base_address: usize, - name: []const u8, - build_id: ?[]const u8, - gnu_eh_frame: ?[]const u8, + pub const init: ElfModule = .{ + .unwind = .{ + .debug_frame = null, + .eh_frame = null, + }, + .dwarf = .{}, + .mapped_memory = null, + .external_mapped_memory = null, }; - pub fn init(lookup: *const Lookup) ElfModule { - var em: ElfModule = .{ - .unwind = .{ - .sections = @splat(null), - }, - .dwarf = .{}, - .mapped_memory = null, - .external_mapped_memory = null, - }; - if (lookup.gnu_eh_frame) |eh_frame_hdr| { - // This is a special case - pointer offsets inside .eh_frame_hdr - // are encoded relative to its base address, so we must use the - // version that is already memory mapped, and not the one that - // will be mapped separately from the ELF file. - em.unwind.sections[@intFromEnum(Dwarf.Unwind.Section.Id.eh_frame_hdr)] = .{ - .data = eh_frame_hdr, - }; - } - return em; - } - pub fn deinit(self: *@This(), allocator: Allocator) void { self.dwarf.deinit(allocator); std.posix.munmap(self.mapped_memory); if (self.external_mapped_memory) |m| std.posix.munmap(m); } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, endian: Endian, base_address: usize, address: usize) !std.debug.Symbol { - // Translate the VA into an address into this object - const relocated_address = address - base_address; - return self.dwarf.getSymbol(allocator, endian, relocated_address); + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, endian: Endian, load_offset: usize, address: usize) !std.debug.Symbol { + // Translate the runtime address into a virtual address into the module + // MLUGG TODO: this clearly tells us that the logic should live near SelfInfo... + const vaddr = address - load_offset; + return self.dwarf.getSymbol(allocator, endian, vaddr); } pub fn getDwarfUnwindForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf.Unwind { @@ -1548,7 +1514,7 @@ pub const ElfModule = struct { mapped_mem: []align(std.heap.page_size_min) const u8, build_id: ?[]const u8, expected_crc: ?u32, - parent_sections: *Dwarf.SectionArray, + parent_sections: ?*Dwarf.SectionArray, parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, elf_filename: ?[]const u8, ) LoadError!void { @@ -1577,10 +1543,12 @@ pub const ElfModule = struct { var sections: Dwarf.SectionArray = @splat(null); // Combine section list. This takes ownership over any owned sections from the parent scope. - for (parent_sections, §ions) |*parent, *section_elem| { - if (parent.*) |*p| { - section_elem.* = p.*; - p.owned = false; + if (parent_sections) |ps| { + for (ps, §ions) |*parent, *section_elem| { + if (parent.*) |*p| { + section_elem.* = p.*; + p.owned = false; + } } } errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); @@ -1647,7 +1615,6 @@ pub const ElfModule = struct { // Attempt to load debug info from an external file // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html if (missing_debug_info) { - // Only allow one level of debug info nesting if (parent_mapped_mem) |_| { return error.MissingDebugInfo; @@ -1775,6 +1742,7 @@ pub const ElfModule = struct { em.mapped_memory = parent_mapped_mem orelse mapped_mem; em.external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null; + em.dwarf.sections = sections; try em.dwarf.open(gpa, endian); } @@ -1844,7 +1812,8 @@ pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]cons return ptr[start..end]; } -pub fn readAddress(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { +fn readAddress(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { + // MLUGG TODO FIX BEFORE MERGE: this function is slightly bogus. addresses have a byte width which is independent of the `dwarf.Format`! return switch (format) { .@"32" => try r.takeInt(u32, endian), .@"64" => try r.takeInt(u64, endian), @@ -1852,6 +1821,8 @@ pub fn readAddress(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { } fn nativeFormat() std.dwarf.Format { + // MLUGG TODO FIX BEFORE MERGE: this is nonsensical. this is neither what `dwarf.Format` is for, nor does it make sense to check the NATIVE FUCKING FORMAT + // when parsing ARBITRARY DWARF. return switch (@sizeOf(usize)) { 4 => .@"32", 8 => .@"64", diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 1da318a048..a51c417e7c 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -1,632 +1,622 @@ -sections: SectionArray = @splat(null), +pub const VirtualMachine = @import("Unwind/VirtualMachine.zig"); -/// Starts out non-`null` if the `.eh_frame_hdr` section is present. May become `null` later if we -/// find that `.eh_frame_hdr` is incomplete. -eh_frame_hdr: ?ExceptionFrameHeader = null, -/// These lookup tables are only used if `eh_frame_hdr` is null -cie_map: std.AutoArrayHashMapUnmanaged(u64, CommonInformationEntry) = .empty, -/// Sorted by start_pc -fde_list: std.ArrayList(FrameDescriptionEntry) = .empty, - -pub const Section = struct { +/// The contents of the `.debug_frame` section as specified by DWARF. This might be a more reliable +/// stack unwind mechanism in some cases, or it may be present when `.eh_frame` is not, but fetching +/// the data requires loading the binary, so it is not a viable approach for fast stack trace +/// capturing within a process. +debug_frame: ?struct { data: []const u8, - - pub const Id = enum { - debug_frame, - eh_frame, - eh_frame_hdr, - }; + /// Offsets into `data` of FDEs, sorted by ascending `pc_begin`. + sorted_fdes: []SortedFdeEntry, +}, + +/// Data associated with the `.eh_frame` and `.eh_frame_hdr` sections as defined by LSB Core. The +/// format of `.eh_frame` is an extension of that of DWARF's `.debug_frame` -- in fact it is almost +/// identical, though subtly different in a few places. +eh_frame: ?struct { + header: EhFrameHeader, + /// Though this is a slice, it may be longer than the `.eh_frame` section. When unwinding + /// through the runtime-loaded `.eh_frame_hdr` data, we are not told the size of the `.eh_frame` + /// section, so construct a slice referring to all of the rest of memory. The end of the section + /// must be detected through `EntryHeader.terminator`. + eh_frame_data: []const u8, + /// Offsets into `eh_frame_data` of FDEs, sorted by ascending `pc_begin`. + /// Populated only if `header` does not already contain a lookup table. + sorted_fdes: ?[]SortedFdeEntry, +}, + +const SortedFdeEntry = struct { + /// This FDE's value of `pc_begin`. + pc_begin: u64, + /// Offset into the section of the corresponding FDE, including the entry header. + fde_offset: u64, }; -const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); -pub const SectionArray = [num_sections]?Section; - -pub fn section(unwind: Unwind, dwarf_section: Section.Id) ?[]const u8 { - return if (unwind.sections[@intFromEnum(dwarf_section)]) |s| s.data else null; -} +const Section = enum { debug_frame, eh_frame }; /// This represents the decoded .eh_frame_hdr header -pub const ExceptionFrameHeader = struct { - eh_frame_ptr: usize, - table_enc: u8, - fde_count: usize, - entries: []const u8, - - pub fn entrySize(table_enc: u8) !u8 { - return switch (table_enc & EH.PE.type_mask) { - EH.PE.udata2, - EH.PE.sdata2, - => 4, - EH.PE.udata4, - EH.PE.sdata4, - => 8, - EH.PE.udata8, - EH.PE.sdata8, - => 16, - // This is a binary search table, so all entries must be the same length - else => return bad(), +pub const EhFrameHeader = struct { + vaddr: u64, + eh_frame_vaddr: u64, + search_table: ?struct { + /// The byte offset of the search table into the `.eh_frame_hdr` section. + offset: u8, + encoding: EH.PE, + fde_count: usize, + entries: []const u8, + }, + + pub fn entrySize(table_enc: EH.PE, addr_size_bytes: u8) !u8 { + return switch (table_enc.type) { + .absptr => 2 * addr_size_bytes, + .udata2, .sdata2 => 4, + .udata4, .sdata4 => 8, + .udata8, .sdata8 => 16, + .uleb128, .sleb128 => return bad(), // this is a binary search table; all entries must be the same size + _ => return bad(), }; } - pub fn findEntry( - self: ExceptionFrameHeader, - eh_frame_len: usize, - eh_frame_hdr_ptr: usize, - pc: usize, - cie: *CommonInformationEntry, - fde: *FrameDescriptionEntry, + pub fn parse( + eh_frame_hdr_vaddr: u64, + eh_frame_hdr_bytes: []const u8, + addr_size_bytes: u8, endian: Endian, - ) !void { - const entry_size = try entrySize(self.table_enc); + ) !EhFrameHeader { + var r: Reader = .fixed(eh_frame_hdr_bytes); - var left: usize = 0; - var len: usize = self.fde_count; - var fbr: Reader = .fixed(self.entries); + const version = try r.takeByte(); + if (version != 1) return bad(); - while (len > 1) { - const mid = left + len / 2; + const eh_frame_ptr_enc: EH.PE = @bitCast(try r.takeByte()); + const fde_count_enc: EH.PE = @bitCast(try r.takeByte()); + const table_enc: EH.PE = @bitCast(try r.takeByte()); - fbr.seek = mid * entry_size; - const pc_begin = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad(); + const eh_frame_ptr = try readEhPointer(&r, eh_frame_ptr_enc, addr_size_bytes, .{ + .pc_rel_base = eh_frame_hdr_vaddr + r.seek, + }, endian); + return .{ + .vaddr = eh_frame_hdr_vaddr, + .eh_frame_vaddr = eh_frame_ptr, + .search_table = table: { + if (fde_count_enc == EH.PE.omit) break :table null; + if (table_enc == EH.PE.omit) break :table null; + const fde_count = try readEhPointer(&r, fde_count_enc, addr_size_bytes, .{ + .pc_rel_base = eh_frame_hdr_vaddr + r.seek, + }, endian); + const entry_size = try entrySize(table_enc, addr_size_bytes); + const bytes_offset = r.seek; + const bytes_len = cast(usize, fde_count * entry_size) orelse return error.EndOfStream; + const bytes = try r.take(bytes_len); + break :table .{ + .encoding = table_enc, + .fde_count = @intCast(fde_count), + .entries = bytes, + .offset = @intCast(bytes_offset), + }; + }, + }; + } + + /// Asserts that `eh_frame_hdr.search_table != null`. + fn findEntry( + eh_frame_hdr: *const EhFrameHeader, + pc: u64, + addr_size_bytes: u8, + endian: Endian, + ) !?u64 { + const table = &eh_frame_hdr.search_table.?; + const table_vaddr = eh_frame_hdr.vaddr + table.offset; + const entry_size = try EhFrameHeader.entrySize(table.encoding, addr_size_bytes); + var left: usize = 0; + var len: usize = table.fde_count; + while (len > 1) { + const mid = left + len / 2; + var entry_reader: Reader = .fixed(table.entries[mid * entry_size ..][0..entry_size]); + const pc_begin = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ + .pc_rel_base = table_vaddr + left * entry_size, + .data_rel_base = eh_frame_hdr.vaddr, + }, endian); if (pc < pc_begin) { len /= 2; } else { left = mid; - if (pc == pc_begin) break; len -= len / 2; } } - - if (len == 0) return missing(); - fbr.seek = left * entry_size; - - // Read past the pc_begin field of the entry - _ = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad(); - - const fde_ptr = cast(usize, try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad()) orelse return bad(); - - if (fde_ptr < self.eh_frame_ptr) return bad(); - - const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0..eh_frame_len]; - - const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: Reader = .fixed(eh_frame); - eh_frame_fbr.seek = fde_offset; - - const fde_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); - if (fde_entry_header.type != .fde) return bad(); - - // CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable - const cie_offset = fde_entry_header.type.fde; - eh_frame_fbr.seek = @intCast(cie_offset); - const cie_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); - if (cie_entry_header.type != .cie) return bad(); - - cie.* = try CommonInformationEntry.parse( - cie_entry_header.entry_bytes, - 0, - true, - cie_entry_header.format, - .eh_frame, - cie_entry_header.length_offset, - @sizeOf(usize), - endian, - ); - - fde.* = try FrameDescriptionEntry.parse( - fde_entry_header.entry_bytes, - 0, - true, - cie.*, - @sizeOf(usize), - endian, - ); - - if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return missing(); + if (len == 0) return null; + var entry_reader: Reader = .fixed(table.entries[left * entry_size ..][0..entry_size]); + // Skip past `pc_begin`; we're now interested in the fde offset + _ = try readEhPointerAbs(&entry_reader, table.encoding.type, addr_size_bytes, endian); + const fde_ptr = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ + .pc_rel_base = table_vaddr + left * entry_size, + .data_rel_base = eh_frame_hdr.vaddr, + }, endian); + return std.math.sub(u64, fde_ptr, eh_frame_hdr.eh_frame_vaddr) catch bad(); // offset into .eh_frame } }; -pub const EntryHeader = struct { - /// Offset of the length field in the backing buffer - length_offset: usize, - format: Format, - type: union(enum) { - cie, - /// Value is the offset of the corresponding CIE - fde: u64, - terminator, +pub const EntryHeader = union(enum) { + cie: struct { + format: Format, + /// Remaining bytes in the CIE. These are parseable by `CommonInformationEntry.parse`. + bytes_len: u64, + }, + fde: struct { + format: Format, + /// Offset into the section of the corresponding CIE, *including* its entry header. + cie_offset: u64, + /// Remaining bytes in the FDE. These are parseable by `FrameDescriptionEntry.parse`. + bytes_len: u64, }, - /// The entry's contents, not including the ID field - entry_bytes: []const u8, + /// The `.eh_frame` format includes terminators which indicate that the last CIE/FDE has been + /// reached. However, `.debug_frame` does not include such a terminator, so the caller must + /// keep track of how many section bytes remain when parsing all entries in `.debug_frame`. + terminator, - /// The length of the entry including the ID field, but not the length field itself - pub fn entryLength(self: EntryHeader) usize { - return self.entry_bytes.len + @as(u8, if (self.format == .@"64") 8 else 4); - } + pub fn read(r: *Reader, header_section_offset: u64, section: Section, endian: Endian) !EntryHeader { + const unit_header = try Dwarf.readUnitHeader(r, endian); + if (unit_header.unit_length == 0) return .terminator; - /// Reads a header for either an FDE or a CIE, then advances the fbr to the - /// position after the trailing structure. - /// - /// `fbr` must be backed by either the .eh_frame or .debug_frame sections. - /// - /// TODO that's a bad API, don't do that. this function should neither require - /// a fixed reader nor depend on seeking. - pub fn read(fbr: *Reader, dwarf_section: Section.Id, endian: Endian) !EntryHeader { - assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame); - - const length_offset = fbr.seek; - const unit_header = try Dwarf.readUnitHeader(fbr, endian); - const unit_length = cast(usize, unit_header.unit_length) orelse return bad(); - if (unit_length == 0) return .{ - .length_offset = length_offset, - .format = unit_header.format, - .type = .terminator, - .entry_bytes = &.{}, - }; - const start_offset = fbr.seek; - const end_offset = start_offset + unit_length; - defer fbr.seek = end_offset; - - const id = try Dwarf.readAddress(fbr, unit_header.format, endian); - const entry_bytes = fbr.buffer[fbr.seek..end_offset]; - const cie_id: u64 = switch (dwarf_section) { - .eh_frame => CommonInformationEntry.eh_id, + // TODO MLUGG: seriously, just... check the formats of everything in BOTH LSB Core and DWARF. this is a fucking *mess*. maybe add spec references. + + // Next is a value which will disambiguate CIEs and FDEs. Annoyingly, LSB Core makes this + // value always 4-byte, whereas DWARF makes it depend on the `dwarf.Format`. + const cie_ptr_or_id_size: u8 = switch (section) { + .eh_frame => 4, .debug_frame => switch (unit_header.format) { - .@"32" => CommonInformationEntry.dwarf32_id, - .@"64" => CommonInformationEntry.dwarf64_id, + .@"32" => 4, + .@"64" => 8, }, + }; + const cie_ptr_or_id = switch (cie_ptr_or_id_size) { + 4 => try r.takeInt(u32, endian), + 8 => try r.takeInt(u64, endian), else => unreachable, }; + const remaining_bytes = unit_header.unit_length - cie_ptr_or_id_size; - return .{ - .length_offset = length_offset, - .format = unit_header.format, - .type = if (id == cie_id) .cie else .{ .fde = switch (dwarf_section) { - .eh_frame => try std.math.sub(u64, start_offset, id), - .debug_frame => id, - else => unreachable, - } }, - .entry_bytes = entry_bytes, + // If this entry is a CIE, then `cie_ptr_or_id` will have this value, which is different + // between the DWARF `.debug_frame` section and the LSB Core `.eh_frame` section. + const cie_id: u64 = switch (section) { + .eh_frame => 0, + .debug_frame => switch (unit_header.format) { + .@"32" => maxInt(u32), + .@"64" => maxInt(u64), + }, }; + if (cie_ptr_or_id == cie_id) { + return .{ .cie = .{ + .format = unit_header.format, + .bytes_len = remaining_bytes, + } }; + } + + // This is an FDE -- `cie_ptr_or_id` points to the associated CIE. Unfortunately, the format + // of that pointer again differs between `.debug_frame` and `.eh_frame`. + const cie_offset = switch (section) { + .eh_frame => try std.math.sub(u64, header_section_offset + unit_header.header_length, cie_ptr_or_id), + .debug_frame => cie_ptr_or_id, + }; + return .{ .fde = .{ + .format = unit_header.format, + .cie_offset = cie_offset, + .bytes_len = remaining_bytes, + } }; } }; pub const CommonInformationEntry = struct { - // Used in .eh_frame - pub const eh_id = 0; - - // Used in .debug_frame (DWARF32) - pub const dwarf32_id = maxInt(u32); - - // Used in .debug_frame (DWARF64) - pub const dwarf64_id = maxInt(u64); - - // Offset of the length field of this entry in the eh_frame section. - // This is the key that FDEs use to reference CIEs. - length_offset: u64, version: u8, - address_size: u8, - format: Format, - // Only present in version 4 - segment_selector_size: ?u8, + /// In version 4, CIEs can specify the address size used in the CIE and associated FDEs. + /// This value must be used *only* to parse associated FDEs in `FrameDescriptionEntry.parse`. + addr_size_bytes: u8, + + /// Always 0 for versions which do not specify this (currently all versions other than 4). + segment_selector_size: u8, code_alignment_factor: u32, data_alignment_factor: i32, return_address_register: u8, - aug_str: []const u8, - aug_data: []const u8, - lsda_pointer_enc: u8, - personality_enc: ?u8, - personality_routine_pointer: ?u64, - fde_pointer_enc: u8, - initial_instructions: []const u8, + fde_pointer_enc: EH.PE, + is_signal_frame: bool, - pub fn isSignalFrame(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'S') return true; - return false; - } + augmentation_kind: AugmentationKind, - pub fn addressesSignedWithBKey(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'B') return true; - return false; - } + initial_instructions: []const u8, - pub fn mteTaggedFrame(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'G') return true; - return false; - } + pub const AugmentationKind = enum { none, gcc_eh, lsb_z }; /// This function expects to read the CIE starting with the version field. - /// The returned struct references memory backed by cie_bytes. - /// - /// See the FrameDescriptionEntry.parse documentation for the description - /// of `pc_rel_offset` and `is_runtime`. + /// The returned struct references memory backed by `cie_bytes`. /// /// `length_offset` specifies the offset of this CIE's length field in the /// .eh_frame / .debug_frame section. pub fn parse( cie_bytes: []const u8, - pc_rel_offset: i64, - is_runtime: bool, - format: Format, - dwarf_section: Section.Id, - length_offset: u64, - addr_size_bytes: u8, - endian: Endian, + section: Section, + default_addr_size_bytes: u8, ) !CommonInformationEntry { - if (addr_size_bytes > 8) return error.UnsupportedAddrSize; + // We only read the data through this reader. + var r: Reader = .fixed(cie_bytes); - var fbr: Reader = .fixed(cie_bytes); - - const version = try fbr.takeByte(); - switch (dwarf_section) { + const version = try r.takeByte(); + switch (section) { .eh_frame => if (version != 1 and version != 3) return error.UnsupportedDwarfVersion, .debug_frame => if (version != 4) return error.UnsupportedDwarfVersion, - else => return error.UnsupportedDwarfSection, } - var has_eh_data = false; - var has_aug_data = false; - - var aug_str_len: usize = 0; - const aug_str_start = fbr.seek; - var aug_byte = try fbr.takeByte(); - while (aug_byte != 0) : (aug_byte = try fbr.takeByte()) { - switch (aug_byte) { - 'z' => { - if (aug_str_len != 0) return bad(); - has_aug_data = true; - }, - 'e' => { - if (has_aug_data or aug_str_len != 0) return bad(); - if (try fbr.takeByte() != 'h') return bad(); - has_eh_data = true; - }, - else => if (has_eh_data) return bad(), - } - - aug_str_len += 1; - } + const aug_str = try r.takeSentinel(0); + const aug_kind: AugmentationKind = aug: { + if (aug_str.len == 0) break :aug .none; + if (aug_str[0] == 'z') break :aug .lsb_z; + if (std.mem.eql(u8, aug_str, "eh")) break :aug .gcc_eh; + // We can't finish parsing the CIE if we don't know what its augmentation means. + return bad(); + }; - if (has_eh_data) { - // legacy data created by older versions of gcc - unsupported here - for (0..addr_size_bytes) |_| _ = try fbr.takeByte(); + switch (aug_kind) { + .none => {}, // no extra data + .lsb_z => {}, // no extra data yet, but there is a bit later + .gcc_eh => try r.discardAll(default_addr_size_bytes), // unsupported data } - const address_size = if (version == 4) try fbr.takeByte() else addr_size_bytes; - const segment_selector_size = if (version == 4) try fbr.takeByte() else null; - - const code_alignment_factor = try fbr.takeLeb128(u32); - const data_alignment_factor = try fbr.takeLeb128(i32); - const return_address_register = if (version == 1) try fbr.takeByte() else try fbr.takeLeb128(u8); - - var lsda_pointer_enc: u8 = EH.PE.omit; - var personality_enc: ?u8 = null; - var personality_routine_pointer: ?u64 = null; - var fde_pointer_enc: u8 = EH.PE.absptr; - - var aug_data: []const u8 = &[_]u8{}; - const aug_str = if (has_aug_data) blk: { - const aug_data_len = try fbr.takeLeb128(usize); - const aug_data_start = fbr.seek; - aug_data = cie_bytes[aug_data_start..][0..aug_data_len]; - - const aug_str = cie_bytes[aug_str_start..][0..aug_str_len]; - for (aug_str[1..]) |byte| { - switch (byte) { - 'L' => { - lsda_pointer_enc = try fbr.takeByte(); - }, - 'P' => { - personality_enc = try fbr.takeByte(); - personality_routine_pointer = try readEhPointer(&fbr, personality_enc.?, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&cie_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian); - }, - 'R' => { - fde_pointer_enc = try fbr.takeByte(); - }, - 'S', 'B', 'G' => {}, - else => return bad(), - } - } - - // aug_data_len can include padding so the CIE ends on an address boundary - fbr.seek = aug_data_start + aug_data_len; - break :blk aug_str; - } else &[_]u8{}; + const addr_size_bytes = if (version == 4) try r.takeByte() else default_addr_size_bytes; + const segment_selector_size: u8 = if (version == 4) try r.takeByte() else 0; + const code_alignment_factor = try r.takeLeb128(u32); + const data_alignment_factor = try r.takeLeb128(i32); + const return_address_register = if (version == 1) try r.takeByte() else try r.takeLeb128(u8); + + // This is where LSB's augmentation might add some data. + const fde_pointer_enc: EH.PE, const is_signal_frame: bool = aug: { + const default_fde_pointer_enc: EH.PE = .{ .type = .absptr, .rel = .abs }; + if (aug_kind != .lsb_z) break :aug .{ default_fde_pointer_enc, false }; + const aug_data_len = try r.takeLeb128(u32); + var aug_data: Reader = .fixed(try r.take(aug_data_len)); + var fde_pointer_enc: EH.PE = default_fde_pointer_enc; + var is_signal_frame = false; + for (aug_str[1..]) |byte| switch (byte) { + 'L' => _ = try aug_data.takeByte(), // we ignore the LSDA pointer + 'P' => { + const enc: EH.PE = @bitCast(try aug_data.takeByte()); + const endian: Endian = .little; // irrelevant because we're discarding the value anyway + _ = try readEhPointerAbs(&r, enc.type, addr_size_bytes, endian); // we ignore the personality routine; endianness is irrelevant since we're discarding + }, + 'R' => fde_pointer_enc = @bitCast(try aug_data.takeByte()), + 'S' => is_signal_frame = true, + 'B', 'G' => {}, + else => return bad(), + }; + break :aug .{ fde_pointer_enc, is_signal_frame }; + }; - const initial_instructions = cie_bytes[fbr.seek..]; return .{ - .length_offset = length_offset, .version = version, - .address_size = address_size, - .format = format, + .addr_size_bytes = addr_size_bytes, .segment_selector_size = segment_selector_size, .code_alignment_factor = code_alignment_factor, .data_alignment_factor = data_alignment_factor, .return_address_register = return_address_register, - .aug_str = aug_str, - .aug_data = aug_data, - .lsda_pointer_enc = lsda_pointer_enc, - .personality_enc = personality_enc, - .personality_routine_pointer = personality_routine_pointer, .fde_pointer_enc = fde_pointer_enc, - .initial_instructions = initial_instructions, + .is_signal_frame = is_signal_frame, + .augmentation_kind = aug_kind, + .initial_instructions = r.buffered(), }; } }; pub const FrameDescriptionEntry = struct { - // Offset into eh_frame where the CIE for this FDE is stored - cie_length_offset: u64, - pc_begin: u64, pc_range: u64, - lsda_pointer: ?u64, - aug_data: []const u8, instructions: []const u8, /// This function expects to read the FDE starting at the PC Begin field. /// The returned struct references memory backed by `fde_bytes`. - /// - /// `pc_rel_offset` specifies an offset to be applied to pc_rel_base values - /// used when decoding pointers. This should be set to zero if fde_bytes is - /// backed by the memory of a .eh_frame / .debug_frame section in the running executable. - /// Otherwise, it should be the relative offset to translate addresses from - /// where the section is currently stored in memory, to where it *would* be - /// stored at runtime: section base addr - backing data base ptr. - /// - /// Similarly, `is_runtime` specifies this function is being called on a runtime - /// section, and so indirect pointers can be followed. pub fn parse( + /// The virtual address of the FDE we're parsing, *excluding* its entry header (i.e. the + /// address is after the header). If `fde_bytes` is backed by the memory of a loaded + /// module's `.eh_frame` section, this will equal `fde_bytes.ptr`. + fde_vaddr: u64, fde_bytes: []const u8, - pc_rel_offset: i64, - is_runtime: bool, cie: CommonInformationEntry, - addr_size_bytes: u8, endian: Endian, ) !FrameDescriptionEntry { - if (addr_size_bytes > 8) return error.InvalidAddrSize; - - var fbr: Reader = .fixed(fde_bytes); - - const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian) orelse return bad(); - - const pc_range = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = 0, - .follow_indirect = false, - }, endian) orelse return bad(); - - var aug_data: []const u8 = &[_]u8{}; - const lsda_pointer = if (cie.aug_str.len > 0) blk: { - const aug_data_len = try fbr.takeLeb128(usize); - const aug_data_start = fbr.seek; - aug_data = fde_bytes[aug_data_start..][0..aug_data_len]; - - const lsda_pointer = if (cie.lsda_pointer_enc != EH.PE.omit) - try readEhPointer(&fbr, cie.lsda_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian) - else - null; - - fbr.seek = aug_data_start + aug_data_len; - break :blk lsda_pointer; - } else null; - - const instructions = fde_bytes[fbr.seek..]; + if (cie.segment_selector_size != 0) return error.UnsupportedAddrSize; + + var r: Reader = .fixed(fde_bytes); + + const pc_begin = try readEhPointer(&r, cie.fde_pointer_enc, cie.addr_size_bytes, .{ + .pc_rel_base = fde_vaddr, + }, endian); + + // I swear I'm not kidding when I say that PC Range is encoded with `cie.fde_pointer_enc`, but ignoring `rel`. + const pc_range = switch (try readEhPointerAbs(&r, cie.fde_pointer_enc.type, cie.addr_size_bytes, endian)) { + .unsigned => |x| x, + .signed => |x| cast(u64, x) orelse return bad(), + }; + + switch (cie.augmentation_kind) { + .none, .gcc_eh => {}, + .lsb_z => { + // There is augmentation data, but it's irrelevant to us -- it + // only contains the LSDA pointer, which we don't care about. + const aug_data_len = try r.takeLeb128(u64); + _ = try r.discardAll(aug_data_len); + }, + } + return .{ - .cie_length_offset = cie.length_offset, .pc_begin = pc_begin, .pc_range = pc_range, - .lsda_pointer = lsda_pointer, - .aug_data = aug_data, - .instructions = instructions, + .instructions = r.buffered(), }; } }; -/// If `.eh_frame_hdr` is present, then only the header needs to be parsed. Otherwise, `.eh_frame` -/// and `.debug_frame` are scanned and a sorted list of FDEs is built for binary searching during -/// unwinding. Even if `.eh_frame_hdr` is used, we may find during unwinding that it's incomplete, -/// in which case we build the sorted list of FDEs at that point. -/// -/// See also `scanCieFdeInfo`. -pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { - const endian = di.endian; - - if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: Reader = .fixed(eh_frame_hdr); - - const version = try fbr.takeByte(); - if (version != 1) break :blk; - - const eh_frame_ptr_enc = try fbr.takeByte(); - if (eh_frame_ptr_enc == EH.PE.omit) break :blk; - const fde_count_enc = try fbr.takeByte(); - if (fde_count_enc == EH.PE.omit) break :blk; - const table_enc = try fbr.takeByte(); - if (table_enc == EH.PE.omit) break :blk; - - const eh_frame_ptr = cast(usize, try readEhPointer(&fbr, eh_frame_ptr_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), - .follow_indirect = true, - }, endian) orelse return bad()) orelse return bad(); - - const fde_count = cast(usize, try readEhPointer(&fbr, fde_count_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), - .follow_indirect = true, - }, endian) orelse return bad()) orelse return bad(); - - const entry_size = try ExceptionFrameHeader.entrySize(table_enc); - const entries_len = fde_count * entry_size; - if (entries_len > eh_frame_hdr.len - fbr.seek) return bad(); - - di.eh_frame_hdr = .{ - .eh_frame_ptr = eh_frame_ptr, - .table_enc = table_enc, - .fde_count = fde_count, - .entries = eh_frame_hdr[fbr.seek..][0..entries_len], - }; +pub fn scanDebugFrame( + unwind: *Unwind, + gpa: Allocator, + section_vaddr: u64, + section_bytes: []const u8, + addr_size_bytes: u8, + endian: Endian, +) void { + assert(unwind.debug_frame == null); + + var fbr: Reader = .fixed(section_bytes); + var fde_list: std.ArrayList(SortedFdeEntry) = .empty; + defer fde_list.deinit(gpa); + while (fbr.seek < fbr.buffer.len) { + const entry_offset = fbr.seek; + switch (try EntryHeader.read(&fbr, fbr.seek, .debug_frame, endian)) { + // Ignore CIEs; we only need them to parse the FDEs! + .cie => |info| { + try fbr.discardAll(info.bytes_len); + continue; + }, + .fde => |info| { + const cie: CommonInformationEntry = cie: { + var cie_reader: Reader = .fixed(section_bytes[info.cie_offset..]); + const cie_info = switch (try EntryHeader.read(&cie_reader, info.cie_offset, .debug_frame, endian)) { + .cie => |cie_info| cie_info, + .fde, .terminator => return bad(), // This is meant to be a CIE + }; + break :cie try .parse(try cie_reader.take(cie_info.bytes_len), .debug_frame, addr_size_bytes); + }; + const fde: FrameDescriptionEntry = try .parse( + section_vaddr + fbr.seek, + try fbr.take(info.bytes_len), + cie, + endian, + ); + try fde_list.append(.{ + .pc_begin = fde.pc_begin, + .fde_offset = entry_offset, // *not* `fde_offset`, because we need to include the entry header + }); + }, + .terminator => return bad(), // DWARF `.debug_frame` isn't meant to have terminators + } + } + const fde_slice = try fde_list.toOwnedSlice(gpa); + errdefer comptime unreachable; + std.mem.sortUnstable(SortedFdeEntry, fde_slice, {}, struct { + fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { + ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); + unwind.debug_frame = .{ .data = section_bytes, .sorted_fdes = fde_slice }; +} + +pub fn scanEhFrame( + unwind: *Unwind, + gpa: Allocator, + header: EhFrameHeader, + section_bytes_ptr: [*]const u8, + /// This is separate from `section_bytes_ptr` because it is unknown when `.eh_frame` is accessed + /// through the pointer in the `.eh_frame_hdr` section. If this is non-`null`, we avoid reading + /// past this number of bytes, but if `null`, we must assume that the `.eh_frame` data has a + /// valid terminator. + section_bytes_len: ?usize, + addr_size_bytes: u8, + endian: Endian, +) !void { + assert(unwind.eh_frame == null); + + const section_bytes: []const u8 = bytes: { + // If the length is unknown, let the slice span from `section_bytes_ptr` to the end of memory. + const len = section_bytes_len orelse (std.math.maxInt(usize) - @intFromPtr(section_bytes_ptr)); + break :bytes section_bytes_ptr[0..len]; + }; - // No need to scan .eh_frame, we have a binary search table already + if (header.search_table != null) { + // No need to populate `sorted_fdes`, the header contains a search table. + unwind.eh_frame = .{ + .header = header, + .eh_frame_data = section_bytes, + .sorted_fdes = null, + }; return; } - try di.scanCieFdeInfo(allocator, base_address); + // We aren't told the length of this section. Luckily, we don't need it, because there will be + // an `EntryHeader.terminator` after the last CIE/FDE. Just make a `Reader` which will give us + // alllll of the bytes! + var fbr: Reader = .fixed(section_bytes); + + var fde_list: std.ArrayList(SortedFdeEntry) = .empty; + defer fde_list.deinit(gpa); + + while (true) { + const entry_offset = fbr.seek; + switch (try EntryHeader.read(&fbr, fbr.seek, .eh_frame, endian)) { + // Ignore CIEs; we only need them to parse the FDEs! + .cie => |info| { + try fbr.discardAll(info.bytes_len); + continue; + }, + .fde => |info| { + const cie: CommonInformationEntry = cie: { + var cie_reader: Reader = .fixed(section_bytes[info.cie_offset..]); + const cie_info = switch (try EntryHeader.read(&cie_reader, info.cie_offset, .eh_frame, endian)) { + .cie => |cie_info| cie_info, + .fde, .terminator => return bad(), // This is meant to be a CIE + }; + break :cie try .parse(try cie_reader.take(cie_info.bytes_len), .eh_frame, addr_size_bytes); + }; + const fde: FrameDescriptionEntry = try .parse( + header.eh_frame_vaddr + fbr.seek, + try fbr.take(info.bytes_len), + cie, + endian, + ); + try fde_list.append(gpa, .{ + .pc_begin = fde.pc_begin, + .fde_offset = entry_offset, // *not* `fde_offset`, because we need to include the entry header + }); + }, + // Unlike `.debug_frame`, the `.eh_frame` section does have a terminator CIE -- this is + // necessary because `header` doesn't include the length of the `.eh_frame` section + .terminator => break, + } + } + const fde_slice = try fde_list.toOwnedSlice(gpa); + errdefer comptime unreachable; + std.mem.sortUnstable(SortedFdeEntry, fde_slice, {}, struct { + fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { + ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); + unwind.eh_frame = .{ + .header = header, + .eh_frame_data = section_bytes, + .sorted_fdes = fde_slice, + }; } -/// Scan `.eh_frame` and `.debug_frame` and build a sorted list of FDEs for binary searching during -/// unwinding. -pub fn scanCieFdeInfo(unwind: *Unwind, allocator: Allocator, endian: Endian, base_address: usize) !void { - const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; - for (frame_sections) |frame_section| { - if (unwind.section(frame_section)) |section_data| { - var fbr: Reader = .fixed(section_data); - while (fbr.seek < fbr.buffer.len) { - const entry_header = try EntryHeader.read(&fbr, frame_section, endian); - switch (entry_header.type) { - .cie => { - const cie = try CommonInformationEntry.parse( - entry_header.entry_bytes, - unwind.sectionVirtualOffset(frame_section, base_address).?, - true, - entry_header.format, - frame_section, - entry_header.length_offset, - @sizeOf(usize), - endian, - ); - try unwind.cie_map.put(allocator, entry_header.length_offset, cie); - }, - .fde => |cie_offset| { - const cie = unwind.cie_map.get(cie_offset) orelse return bad(); - const fde = try FrameDescriptionEntry.parse( - entry_header.entry_bytes, - unwind.sectionVirtualOffset(frame_section, base_address).?, - true, - cie, - @sizeOf(usize), - endian, - ); - try unwind.fde_list.append(allocator, fde); - }, - .terminator => break, - } - } - - std.mem.sortUnstable(FrameDescriptionEntry, unwind.fde_list.items, {}, struct { - fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { - _ = ctx; - return a.pc_begin < b.pc_begin; - } - }.lessThan); +/// The return value may be a false positive. After loading the FDE with `loadFde`, the caller must +/// validate that `pc` is indeed in its range -- if it is not, then no FDE matches `pc`. +pub fn findFdeOffset(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: Endian) !?u64 { + // We'll break from this block only if we have a manually-constructed search table. + const sorted_fdes: []const SortedFdeEntry = fdes: { + if (unwind.debug_frame) |df| break :fdes df.sorted_fdes; + if (unwind.eh_frame) |eh_frame| { + if (eh_frame.sorted_fdes) |fdes| break :fdes fdes; + // Use the search table from the `.eh_frame_hdr` section rather than one of our own + return eh_frame.header.findEntry(pc, addr_size_bytes, endian); } - } + // We have no available unwind info + return null; + }; + const first_bad_idx = std.sort.partitionPoint(SortedFdeEntry, sorted_fdes, pc, struct { + fn canIncludePc(target_pc: u64, entry: SortedFdeEntry) bool { + return target_pc >= entry.pc_begin; // i.e. does 'entry_pc..<last pc>' include 'target_pc' + } + }.canIncludePc); + // `first_bad_idx` is the index of the first FDE whose `pc_begin` is too high to include `pc`. + // So if any FDE matches, it'll be the one at `first_bad_idx - 1` (maybe false positive). + if (first_bad_idx == 0) return null; + return sorted_fdes[first_bad_idx - 1].fde_offset; +} + +pub fn loadFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } { + const section_bytes: []const u8, const section_vaddr: u64, const section: Section = s: { + if (unwind.debug_frame) |df| break :s .{ df.data, if (true) @panic("MLUGG TODO"), .debug_frame }; + if (unwind.eh_frame) |ef| break :s .{ ef.eh_frame_data, ef.header.eh_frame_vaddr, .eh_frame }; + unreachable; // how did you get `fde_offset`?! + }; + + var fde_reader: Reader = .fixed(section_bytes[fde_offset..]); + const fde_info = switch (try EntryHeader.read(&fde_reader, fde_offset, section, endian)) { + .fde => |info| info, + .cie, .terminator => return bad(), // This is meant to be an FDE + }; + + const cie_offset = fde_info.cie_offset; + var cie_reader: Reader = .fixed(section_bytes[cie_offset..]); + const cie_info = switch (try EntryHeader.read(&cie_reader, cie_offset, section, endian)) { + .cie => |info| info, + .fde, .terminator => return bad(), // This is meant to be a CIE + }; + + const cie: CommonInformationEntry = try .parse( + try cie_reader.take(cie_info.bytes_len), + section, + addr_size_bytes, + ); + const fde: FrameDescriptionEntry = try .parse( + section_vaddr + fde_offset + fde_reader.seek, + try fde_reader.take(fde_info.bytes_len), + cie, + endian, + ); + + return .{ cie_info.format, cie, fde }; } const EhPointerContext = struct { // The address of the pointer field itself pc_rel_base: u64, - // Whether or not to follow indirect pointers. This should only be - // used when decoding pointers at runtime using the current process's - // debug info - follow_indirect: bool, - // These relative addressing modes are only used in specific cases, and // might not be available / required in all parsing contexts data_rel_base: ?u64 = null, text_rel_base: ?u64 = null, function_rel_base: ?u64 = null, }; - -fn readEhPointer(fbr: *Reader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !?u64 { - if (enc == EH.PE.omit) return null; - - const value: union(enum) { - signed: i64, - unsigned: u64, - } = switch (enc & EH.PE.type_mask) { - EH.PE.absptr => .{ +/// Returns `error.InvalidDebugInfo` if the encoding is `EH.PE.omit`. +fn readEhPointerAbs(r: *Reader, enc_ty: EH.PE.Type, addr_size_bytes: u8, endian: Endian) !union(enum) { + signed: i64, + unsigned: u64, +} { + return switch (enc_ty) { + .absptr => .{ .unsigned = switch (addr_size_bytes) { - 2 => try fbr.takeInt(u16, endian), - 4 => try fbr.takeInt(u32, endian), - 8 => try fbr.takeInt(u64, endian), - else => return error.InvalidAddrSize, + 2 => try r.takeInt(u16, endian), + 4 => try r.takeInt(u32, endian), + 8 => try r.takeInt(u64, endian), + else => return error.UnsupportedAddrSize, }, }, - EH.PE.uleb128 => .{ .unsigned = try fbr.takeLeb128(u64) }, - EH.PE.udata2 => .{ .unsigned = try fbr.takeInt(u16, endian) }, - EH.PE.udata4 => .{ .unsigned = try fbr.takeInt(u32, endian) }, - EH.PE.udata8 => .{ .unsigned = try fbr.takeInt(u64, endian) }, - EH.PE.sleb128 => .{ .signed = try fbr.takeLeb128(i64) }, - EH.PE.sdata2 => .{ .signed = try fbr.takeInt(i16, endian) }, - EH.PE.sdata4 => .{ .signed = try fbr.takeInt(i32, endian) }, - EH.PE.sdata8 => .{ .signed = try fbr.takeInt(i64, endian) }, + .uleb128 => .{ .unsigned = try r.takeLeb128(u64) }, + .udata2 => .{ .unsigned = try r.takeInt(u16, endian) }, + .udata4 => .{ .unsigned = try r.takeInt(u32, endian) }, + .udata8 => .{ .unsigned = try r.takeInt(u64, endian) }, + .sleb128 => .{ .signed = try r.takeLeb128(i64) }, + .sdata2 => .{ .signed = try r.takeInt(i16, endian) }, + .sdata4 => .{ .signed = try r.takeInt(i32, endian) }, + .sdata8 => .{ .signed = try r.takeInt(i64, endian) }, else => return bad(), }; - - const base = switch (enc & EH.PE.rel_mask) { - EH.PE.pcrel => ctx.pc_rel_base, - EH.PE.textrel => ctx.text_rel_base orelse return error.PointerBaseNotSpecified, - EH.PE.datarel => ctx.data_rel_base orelse return error.PointerBaseNotSpecified, - EH.PE.funcrel => ctx.function_rel_base orelse return error.PointerBaseNotSpecified, - else => null, +} +/// Returns `error.InvalidDebugInfo` if the encoding is `EH.PE.omit`. +fn readEhPointer(fbr: *Reader, enc: EH.PE, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !u64 { + const offset = try readEhPointerAbs(fbr, enc.type, addr_size_bytes, endian); + const base = switch (enc.rel) { + .abs, .aligned => 0, + .pcrel => ctx.pc_rel_base, + .textrel => ctx.text_rel_base orelse return bad(), + .datarel => ctx.data_rel_base orelse return bad(), + .funcrel => ctx.function_rel_base orelse return bad(), + .indirect => return bad(), // GCC extension; not supported + _ => return bad(), }; - - const ptr: u64 = if (base) |b| switch (value) { - .signed => |s| @intCast(try std.math.add(i64, s, @as(i64, @intCast(b)))), + return switch (offset) { + .signed => |s| @intCast(try std.math.add(i64, s, @as(i64, @intCast(base)))), // absptr can actually contain signed values in some cases (aarch64 MachO) - .unsigned => |u| u +% b, - } else switch (value) { - .signed => |s| @as(u64, @intCast(s)), - .unsigned => |u| u, + .unsigned => |u| u +% base, }; - - if ((enc & EH.PE.indirect) > 0 and ctx.follow_indirect) { - if (@sizeOf(usize) != addr_size_bytes) { - // See the documentation for `follow_indirect` - return error.NonNativeIndirection; - } - - const native_ptr = cast(usize, ptr) orelse return error.PointerOverflow; - return switch (addr_size_bytes) { - 2, 4, 8 => return @as(*const usize, @ptrFromInt(native_ptr)).*, - else => return error.UnsupportedAddrSize, - }; - } else { - return ptr; - } } -fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { - if (pc_rel_offset < 0) { - return std.math.sub(usize, field_ptr, @as(usize, @intCast(-pc_rel_offset))); - } else { - return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); - } +/// Like `Reader.fixed`, but when the length of the data is unknown and we just want to allow +/// reading indefinitely. +fn maxSlice(ptr: [*]const u8) []const u8 { + const len = std.math.maxInt(usize) - @intFromPtr(ptr); + return ptr[0..len]; } const Allocator = std.mem.Allocator; diff --git a/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig new file mode 100644 index 0000000000..66100f5eda --- /dev/null +++ b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig @@ -0,0 +1,298 @@ +//! Virtual machine that evaluates DWARF call frame instructions + +/// See section 6.4.1 of the DWARF5 specification for details on each +pub const RegisterRule = union(enum) { + /// The spec says that the default rule for each column is the undefined rule. + /// However, it also allows ABI / compiler authors to specify alternate defaults, so + /// there is a distinction made here. + default: void, + undefined: void, + same_value: void, + /// offset(N) + offset: i64, + /// val_offset(N) + val_offset: i64, + /// register(R) + register: u8, + /// expression(E) + expression: []const u8, + /// val_expression(E) + val_expression: []const u8, + /// Augmenter-defined rule + architectural: void, +}; + +/// Each row contains unwinding rules for a set of registers. +pub const Row = struct { + /// Offset from `FrameDescriptionEntry.pc_begin` + offset: u64 = 0, + /// Special-case column that defines the CFA (Canonical Frame Address) rule. + /// The register field of this column defines the register that CFA is derived from. + cfa: Column = .{}, + /// The register fields in these columns define the register the rule applies to. + columns: ColumnRange = .{}, + /// Indicates that the next write to any column in this row needs to copy + /// the backing column storage first, as it may be referenced by previous rows. + copy_on_write: bool = false, +}; + +pub const Column = struct { + register: ?u8 = null, + rule: RegisterRule = .{ .default = {} }, +}; + +const ColumnRange = struct { + /// Index into `columns` of the first column in this row. + start: usize = undefined, + len: u8 = 0, +}; + +columns: std.ArrayList(Column) = .empty, +stack: std.ArrayList(ColumnRange) = .empty, +current_row: Row = .{}, + +/// The result of executing the CIE's initial_instructions +cie_row: ?Row = null, + +pub fn deinit(self: *VirtualMachine, gpa: Allocator) void { + self.stack.deinit(gpa); + self.columns.deinit(gpa); + self.* = undefined; +} + +pub fn reset(self: *VirtualMachine) void { + self.stack.clearRetainingCapacity(); + self.columns.clearRetainingCapacity(); + self.current_row = .{}; + self.cie_row = null; +} + +/// Return a slice backed by the row's non-CFA columns +pub fn rowColumns(self: VirtualMachine, row: Row) []Column { + if (row.columns.len == 0) return &.{}; + return self.columns.items[row.columns.start..][0..row.columns.len]; +} + +/// Either retrieves or adds a column for `register` (non-CFA) in the current row. +fn getOrAddColumn(self: *VirtualMachine, gpa: Allocator, register: u8) !*Column { + for (self.rowColumns(self.current_row)) |*c| { + if (c.register == register) return c; + } + + if (self.current_row.columns.len == 0) { + self.current_row.columns.start = self.columns.items.len; + } + self.current_row.columns.len += 1; + + const column = try self.columns.addOne(gpa); + column.* = .{ + .register = register, + }; + + return column; +} + +/// Runs the CIE instructions, then the FDE instructions. Execution halts +/// once the row that corresponds to `pc` is known, and the row is returned. +pub fn runTo( + self: *VirtualMachine, + gpa: Allocator, + pc: u64, + cie: Dwarf.Unwind.CommonInformationEntry, + fde: Dwarf.Unwind.FrameDescriptionEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, +) !Row { + assert(self.cie_row == null); + assert(pc >= fde.pc_begin); + assert(pc < fde.pc_begin + fde.pc_range); + + var prev_row: Row = self.current_row; + + const instruction_slices: [2][]const u8 = .{ + cie.initial_instructions, + fde.instructions, + }; + for (instruction_slices, [2]bool{ true, false }) |slice, is_cie_stream| { + var stream: std.Io.Reader = .fixed(slice); + while (stream.seek < slice.len) { + const instruction: Dwarf.call_frame.Instruction = try .read(&stream, addr_size_bytes, endian); + prev_row = try self.step(gpa, cie, is_cie_stream, instruction); + if (pc < fde.pc_begin + self.current_row.offset) return prev_row; + } + } + + return self.current_row; +} + +fn resolveCopyOnWrite(self: *VirtualMachine, gpa: Allocator) !void { + if (!self.current_row.copy_on_write) return; + + const new_start = self.columns.items.len; + if (self.current_row.columns.len > 0) { + try self.columns.ensureUnusedCapacity(gpa, self.current_row.columns.len); + self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); + self.current_row.columns.start = new_start; + } +} + +/// Executes a single instruction. +/// If this instruction is from the CIE, `is_initial` should be set. +/// Returns the value of `current_row` before executing this instruction. +pub fn step( + self: *VirtualMachine, + gpa: Allocator, + cie: Dwarf.Unwind.CommonInformationEntry, + is_initial: bool, + instruction: Dwarf.call_frame.Instruction, +) !Row { + // CIE instructions must be run before FDE instructions + assert(!is_initial or self.cie_row == null); + if (!is_initial and self.cie_row == null) { + self.cie_row = self.current_row; + self.current_row.copy_on_write = true; + } + + const prev_row = self.current_row; + switch (instruction) { + .set_loc => |i| { + if (i.address <= self.current_row.offset) return error.InvalidOperation; + if (cie.segment_selector_size != 0) return error.InvalidOperation; // unsupported + // TODO: Check cie.segment_selector_size != 0 for DWARFV4 + self.current_row.offset = i.address; + }, + inline .advance_loc, + .advance_loc1, + .advance_loc2, + .advance_loc4, + => |i| { + self.current_row.offset += i.delta * cie.code_alignment_factor; + self.current_row.copy_on_write = true; + }, + inline .offset, + .offset_extended, + .offset_extended_sf, + => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; + }, + inline .restore, + .restore_extended, + => |i| { + try self.resolveCopyOnWrite(gpa); + if (self.cie_row) |cie_row| { + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = for (self.rowColumns(cie_row)) |cie_column| { + if (cie_column.register == i.register) break cie_column.rule; + } else .{ .default = {} }; + } else return error.InvalidOperation; + }, + .nop => {}, + .undefined => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ .undefined = {} }; + }, + .same_value => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ .same_value = {} }; + }, + .register => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ .register = i.target_register }; + }, + .remember_state => { + try self.stack.append(gpa, self.current_row.columns); + self.current_row.copy_on_write = true; + }, + .restore_state => { + const restored_columns = self.stack.pop() orelse return error.InvalidOperation; + self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); + try self.columns.ensureUnusedCapacity(gpa, restored_columns.len); + + self.current_row.columns.start = self.columns.items.len; + self.current_row.columns.len = restored_columns.len; + self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); + }, + .def_cfa => |i| { + try self.resolveCopyOnWrite(gpa); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = @intCast(i.offset) }, + }; + }, + .def_cfa_sf => |i| { + try self.resolveCopyOnWrite(gpa); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, + }; + }, + .def_cfa_register => |i| { + try self.resolveCopyOnWrite(gpa); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.register = i.register; + }, + .def_cfa_offset => |i| { + try self.resolveCopyOnWrite(gpa); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = @intCast(i.offset), + }; + }, + .def_cfa_offset_sf => |i| { + try self.resolveCopyOnWrite(gpa); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .def_cfa_expression => |i| { + try self.resolveCopyOnWrite(gpa); + self.current_row.cfa.register = undefined; + self.current_row.cfa.rule = .{ + .expression = i.block, + }; + }, + .expression => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ + .expression = i.block, + }; + }, + .val_offset => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ + .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, + }; + }, + .val_offset_sf => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .val_expression => |i| { + try self.resolveCopyOnWrite(gpa); + const column = try self.getOrAddColumn(gpa, i.register); + column.rule = .{ + .val_expression = i.block, + }; + }, + } + + return prev_row; +} + +const std = @import("../../../std.zig"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; + +const VirtualMachine = @This(); diff --git a/lib/std/debug/Dwarf/call_frame.zig b/lib/std/debug/Dwarf/call_frame.zig index f78ed4378b..8f1758f4eb 100644 --- a/lib/std/debug/Dwarf/call_frame.zig +++ b/lib/std/debug/Dwarf/call_frame.zig @@ -1,12 +1,5 @@ -const builtin = @import("builtin"); const std = @import("../../std.zig"); -const mem = std.mem; -const debug = std.debug; -const leb = std.leb; -const DW = std.dwarf; -const abi = std.debug.Dwarf.abi; -const assert = std.debug.assert; -const native_endian = builtin.cpu.arch.endian(); +const Reader = std.Io.Reader; /// TODO merge with std.dwarf.CFA const Opcode = enum(u8) { @@ -51,9 +44,13 @@ const Opcode = enum(u8) { pub const hi_user = 0x3f; }; -fn readBlock(reader: *std.Io.Reader) ![]const u8 { +/// The returned slice points into `reader.buffer`. +fn readBlock(reader: *Reader) ![]const u8 { const block_len = try reader.takeLeb128(usize); - return reader.take(block_len); + return reader.take(block_len) catch |err| switch (err) { + error.EndOfStream => return error.InvalidOperand, + error.ReadFailed => |e| return e, + }; } pub const Instruction = union(Opcode) { @@ -140,8 +137,9 @@ pub const Instruction = union(Opcode) { block: []const u8, }, + /// `reader` must be a `Reader.fixed` so that regions of its buffer are never invalidated. pub fn read( - reader: *std.Io.Reader, + reader: *Reader, addr_size_bytes: u8, endian: std.builtin.Endian, ) !Instruction { @@ -173,16 +171,14 @@ pub const Instruction = union(Opcode) { .restore, => unreachable, .nop => .{ .nop = {} }, - .set_loc => .{ - .set_loc = .{ - .address = switch (addr_size_bytes) { - 2 => try reader.takeInt(u16, endian), - 4 => try reader.takeInt(u32, endian), - 8 => try reader.takeInt(u64, endian), - else => return error.InvalidAddrSize, - }, + .set_loc => .{ .set_loc = .{ + .address = switch (addr_size_bytes) { + 2 => try reader.takeInt(u16, endian), + 4 => try reader.takeInt(u32, endian), + 8 => try reader.takeInt(u64, endian), + else => return error.UnsupportedAddrSize, }, - }, + } }, .advance_loc1 => .{ .advance_loc1 = .{ .delta = try reader.takeByte() }, }, diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 4a29eb0fa7..daa5cf12d2 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -13,7 +13,6 @@ const windows = std.os.windows; const macho = std.macho; const fs = std.fs; const coff = std.coff; -const pdb = std.pdb; const assert = std.debug.assert; const posix = std.posix; const elf = std.elf; @@ -22,86 +21,37 @@ const Pdb = std.debug.Pdb; const File = std.fs.File; const math = std.math; const testing = std.testing; -const StackIterator = std.debug.StackIterator; const regBytes = Dwarf.abi.regBytes; const regValueNative = Dwarf.abi.regValueNative; const SelfInfo = @This(); -const root = @import("root"); - -allocator: Allocator, -address_map: std.AutoHashMapUnmanaged(usize, Module), -modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModule) else void, - -pub const OpenError = error{ - MissingDebugInfo, - UnsupportedOperatingSystem, -} || @typeInfo(@typeInfo(@TypeOf(SelfInfo.init)).@"fn".return_type.?).error_union.error_set; - -pub fn open(allocator: Allocator) OpenError!SelfInfo { - if (builtin.strip_debug_info) - return error.MissingDebugInfo; - switch (native_os) { - .linux, - .freebsd, - .netbsd, - .dragonfly, - .openbsd, - .macos, - .solaris, - .illumos, - .windows, - => return try SelfInfo.init(allocator), - else => return error.UnsupportedOperatingSystem, - } -} - -pub fn init(allocator: Allocator) !SelfInfo { - var debug_info: SelfInfo = .{ - .allocator = allocator, - .address_map = .empty, - .modules = if (native_os == .windows) .{} else {}, - }; - - if (native_os == .windows) { - errdefer debug_info.modules.deinit(allocator); - - const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); - if (handle == windows.INVALID_HANDLE_VALUE) { - switch (windows.GetLastError()) { - else => |err| return windows.unexpectedError(err), - } - } - defer windows.CloseHandle(handle); - - var module_entry: windows.MODULEENTRY32 = undefined; - module_entry.dwSize = @sizeOf(windows.MODULEENTRY32); - if (windows.kernel32.Module32First(handle, &module_entry) == 0) { - return error.MissingDebugInfo; - } - - var module_valid = true; - while (module_valid) { - const module_info = try debug_info.modules.addOne(allocator); - const name = allocator.dupe(u8, mem.sliceTo(&module_entry.szModule, 0)) catch &.{}; - errdefer allocator.free(name); - - module_info.* = .{ - .base_address = @intFromPtr(module_entry.modBaseAddr), - .size = module_entry.modBaseSize, - .name = name, - .handle = module_entry.hModule, - }; - - module_valid = windows.kernel32.Module32Next(handle, &module_entry) == 1; - } - } +/// MLUGG TODO: what if this field had a less stupid name... +address_map: std.AutoHashMapUnmanaged(usize, Module.DebugInfo), + +module_cache: if (native_os == .windows) std.ArrayListUnmanaged(windows.MODULEENTRY32) else void, + +pub const target_supported: bool = switch (native_os) { + .linux, + .freebsd, + .netbsd, + .dragonfly, + .openbsd, + .macos, + .solaris, + .illumos, + .windows, + => true, + else => false, +}; - return debug_info; -} +pub const init: SelfInfo = .{ + .address_map = .empty, + .module_cache = if (native_os == .windows) .empty, +}; pub fn deinit(self: *SelfInfo) void { + // MLUGG TODO: that's amusing, this function is straight-up unused. i... wonder if it even should be used anywhere? perhaps not... so perhaps it should not even exist...???? var it = self.address_map.iterator(); while (it.next()) |entry| { const mdi = entry.value_ptr.*; @@ -118,49 +68,91 @@ pub fn deinit(self: *SelfInfo) void { } } -fn lookupModuleForAddress(self: *SelfInfo, address: usize) !Module.Lookup { +fn lookupModuleForAddress(self: *SelfInfo, gpa: Allocator, address: usize) !Module { if (builtin.target.os.tag.isDarwin()) { return self.lookupModuleDyld(address); } else if (native_os == .windows) { - return self.lookupModuleWin32(address); + return self.lookupModuleWin32(gpa, address); } else if (native_os == .haiku) { - return self.lookupModuleHaiku(address); + @panic("TODO implement lookup module for Haiku"); } else if (builtin.target.cpu.arch.isWasm()) { - return self.lookupModuleWasm(address); + @panic("TODO implement lookup module for Wasm"); } else { return self.lookupModuleDl(address); } } -fn loadModuleDebugInfo(self: *SelfInfo, lookup: *const Module.Lookup, module: *Module) !void { +fn loadModuleDebugInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { + // MLUGG TODO: this should totally just go into the `Module` impl or something, right? lol + if (builtin.target.os.tag.isDarwin()) { + try loadMachODebugInfo(gpa, module, di); + } else if (native_os == .windows) { + // MLUGG TODO: deal with 'already loaded' properly + try readCoffDebugInfo(gpa, module, di); + } else if (native_os == .haiku) { + unreachable; + } else if (builtin.target.cpu.arch.isWasm()) { + unreachable; + } else { + if (di.mapped_memory != null) return; // already loaded + const filename: ?[]const u8 = if (module.name.len > 0) module.name else null; + const mapped_mem = mapFileOrSelfExe(filename) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.FileTooBig => return error.InvalidDebugInfo, + else => |e| return e, + }; + errdefer posix.munmap(mapped_mem); + try di.load(gpa, mapped_mem, module.build_id, null, null, null, filename); + assert(di.mapped_memory != null); + } +} + +fn loadModuleUnwindInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { if (builtin.target.os.tag.isDarwin()) { - @compileError("TODO"); + // MLUGG TODO HACKHACK + try loadMachODebugInfo(gpa, module, di); } else if (native_os == .windows) { - @compileError("TODO"); + comptime unreachable; // not supported } else if (native_os == .haiku) { - @compileError("TODO"); + comptime unreachable; // not supported } else if (builtin.target.cpu.arch.isWasm()) { - @compileError("TODO"); + comptime unreachable; // not supported } else { - if (module.mapped_memory == null) { - var sections: Dwarf.SectionArray = @splat(null); - try readElfDebugInfo(module, self.allocator, if (lookup.name.len > 0) lookup.name else null, lookup.build_id, §ions); - assert(module.mapped_memory != null); + eh_frame: { + if (di.unwind.eh_frame != null) break :eh_frame; // already loaded + const eh_frame_hdr_bytes = module.gnu_eh_frame orelse break :eh_frame; + const eh_frame_hdr: Dwarf.Unwind.EhFrameHeader = try .parse( + @intFromPtr(eh_frame_hdr_bytes.ptr) - module.load_offset, + eh_frame_hdr_bytes, + @sizeOf(usize), + native_endian, + ); + const eh_frame_addr = module.load_offset + @as(usize, @intCast(eh_frame_hdr.eh_frame_vaddr)); + try di.unwind.scanEhFrame( + gpa, + eh_frame_hdr, + @ptrFromInt(eh_frame_addr), + null, + @sizeOf(usize), + native_endian, + ); } } } -pub fn unwindFrame(self: *SelfInfo, context: *UnwindContext) !usize { - const lookup = try self.lookupModuleForAddress(context.pc); - const gop = try self.address_map.getOrPut(self.allocator, lookup.base_address); - if (!gop.found_existing) gop.value_ptr.* = .init(&lookup); +pub fn unwindFrame(self: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize { + comptime assert(target_supported); + const module = try self.lookupModuleForAddress(gpa, context.pc); + const gop = try self.address_map.getOrPut(gpa, module.load_offset); + if (!gop.found_existing) gop.value_ptr.* = .init; + try loadModuleUnwindInfo(gpa, &module, gop.value_ptr); if (native_os.isDarwin()) { // __unwind_info is a requirement for unwinding on Darwin. It may fall back to DWARF, but unwinding // via DWARF before attempting to use the compact unwind info will produce incorrect results. if (gop.value_ptr.unwind_info) |unwind_info| { if (unwindFrameMachO( - self.allocator, - lookup.base_address, + module.text_base, + module.load_offset, context, unwind_info, gop.value_ptr.eh_frame, @@ -169,292 +161,42 @@ pub fn unwindFrame(self: *SelfInfo, context: *UnwindContext) !usize { } else |err| { if (err != error.RequiresDWARFUnwind) return err; } - } else return error.MissingUnwindInfo; + } + return error.MissingUnwindInfo; + } + if (try gop.value_ptr.getDwarfUnwindForAddress(gpa, context.pc)) |unwind| { + return unwindFrameDwarf(unwind, module.load_offset, context, null); } - if (try gop.value_ptr.getDwarfUnwindForAddress(self.allocator, context.pc)) |unwind| { - return unwindFrameDwarf(self.allocator, unwind, lookup.base_address, context, null); - } else return error.MissingDebugInfo; + return error.MissingDebugInfo; } -pub fn getSymbolAtAddress(self: *SelfInfo, address: usize) !std.debug.Symbol { - const lookup = try self.lookupModuleForAddress(address); - const gop = try self.address_map.getOrPut(self.allocator, lookup.base_address); - if (!gop.found_existing) gop.value_ptr.* = .init(&lookup); - try self.loadModuleDebugInfo(&lookup, gop.value_ptr); - return gop.value_ptr.getSymbolAtAddress(self.allocator, native_endian, lookup.base_address, address); +pub fn getSymbolAtAddress(self: *SelfInfo, gpa: Allocator, address: usize) !std.debug.Symbol { + comptime assert(target_supported); + const module = try self.lookupModuleForAddress(gpa, address); + const gop = try self.address_map.getOrPut(gpa, module.key()); + if (!gop.found_existing) gop.value_ptr.* = .init; + try loadModuleDebugInfo(gpa, &module, gop.value_ptr); + return module.getSymbolAtAddress(gpa, gop.value_ptr, address); } /// Returns the module name for a given address. /// This can be called when getModuleForAddress fails, so implementations should provide /// a path that doesn't rely on any side-effects of a prior successful module lookup. -pub fn getModuleNameForAddress(self: *SelfInfo, address: usize) ?[]const u8 { - return if (self.lookupModuleForAddress(address)) |lookup| lookup.name else |err| switch (err) { - error.MissingDebugInfo => null, - }; -} - -fn lookupModuleDyld(self: *SelfInfo, address: usize) !*Module { - const image_count = std.c._dyld_image_count(); - - var i: u32 = 0; - while (i < image_count) : (i += 1) { - const header = std.c._dyld_get_image_header(i) orelse continue; - const base_address = @intFromPtr(header); - if (address < base_address) continue; - const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); - - var it = macho.LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = @alignCast(@as( - [*]u8, - @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), - )[0..header.sizeofcmds]), - }; - - var unwind_info: ?[]const u8 = null; - var eh_frame: ?[]const u8 = null; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - const segment_cmd = cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; - - const seg_start = segment_cmd.vmaddr + vmaddr_slide; - const seg_end = seg_start + segment_cmd.vmsize; - if (address >= seg_start and address < seg_end) { - if (self.address_map.get(base_address)) |obj_di| { - return obj_di; - } - - for (cmd.getSections()) |sect| { - const sect_addr: usize = @intCast(sect.addr); - const sect_size: usize = @intCast(sect.size); - if (mem.eql(u8, "__unwind_info", sect.sectName())) { - unwind_info = @as([*]const u8, @ptrFromInt(sect_addr + vmaddr_slide))[0..sect_size]; - } else if (mem.eql(u8, "__eh_frame", sect.sectName())) { - eh_frame = @as([*]const u8, @ptrFromInt(sect_addr + vmaddr_slide))[0..sect_size]; - } - } - - const obj_di = try self.allocator.create(Module); - errdefer self.allocator.destroy(obj_di); - - const macho_path = mem.sliceTo(std.c._dyld_get_image_name(i), 0); - const macho_file = fs.cwd().openFile(macho_path, .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - obj_di.* = try readMachODebugInfo(self.allocator, macho_file); - obj_di.base_address = base_address; - obj_di.vmaddr_slide = vmaddr_slide; - obj_di.unwind_info = unwind_info; - obj_di.eh_frame = eh_frame; - - try self.address_map.putNoClobber(base_address, obj_di); - - return obj_di; - } - }, - else => {}, - }; - } - - return error.MissingDebugInfo; -} - -fn lookupModuleNameDyld(self: *SelfInfo, address: usize) ?[]const u8 { - _ = self; - const image_count = std.c._dyld_image_count(); - - var i: u32 = 0; - while (i < image_count) : (i += 1) { - const header = std.c._dyld_get_image_header(i) orelse continue; - const base_address = @intFromPtr(header); - if (address < base_address) continue; - const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); - - var it = macho.LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = @alignCast(@as( - [*]u8, - @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), - )[0..header.sizeofcmds]), - }; - - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - const segment_cmd = cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; - - const original_address = address - vmaddr_slide; - const seg_start = segment_cmd.vmaddr; - const seg_end = seg_start + segment_cmd.vmsize; - if (original_address >= seg_start and original_address < seg_end) { - return fs.path.basename(mem.sliceTo(std.c._dyld_get_image_name(i), 0)); - } - }, - else => {}, - }; - } - - return null; -} - -fn lookupModuleWin32(self: *SelfInfo, address: usize) !*Module { - for (self.modules.items) |*module| { - if (address >= module.base_address and address < module.base_address + module.size) { - if (self.address_map.get(module.base_address)) |obj_di| { - return obj_di; - } - - const obj_di = try self.allocator.create(Module); - errdefer self.allocator.destroy(obj_di); - - const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; - var coff_obj = try coff.Coff.init(mapped_module, true); - - // The string table is not mapped into memory by the loader, so if a section name is in the - // string table then we have to map the full image file from disk. This can happen when - // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. - if (coff_obj.strtabRequired()) { - var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; - // openFileAbsoluteW requires the prefix to be present - @memcpy(name_buffer[0..4], &[_]u16{ '\\', '?', '?', '\\' }); - - const process_handle = windows.GetCurrentProcess(); - const len = windows.kernel32.GetModuleFileNameExW( - process_handle, - module.handle, - @ptrCast(&name_buffer[4]), - windows.PATH_MAX_WIDE, - ); - - if (len == 0) return error.MissingDebugInfo; - const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - errdefer coff_file.close(); - - var section_handle: windows.HANDLE = undefined; - const create_section_rc = windows.ntdll.NtCreateSection( - §ion_handle, - windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, - null, - null, - windows.PAGE_READONLY, - // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. - // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. - windows.SEC_COMMIT, - coff_file.handle, - ); - if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer windows.CloseHandle(section_handle); - - var coff_len: usize = 0; - var base_ptr: usize = 0; - const map_section_rc = windows.ntdll.NtMapViewOfSection( - section_handle, - process_handle, - @ptrCast(&base_ptr), - null, - 0, - null, - &coff_len, - .ViewUnmap, - 0, - windows.PAGE_READONLY, - ); - if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrFromInt(base_ptr)) == .SUCCESS); - - const section_view = @as([*]const u8, @ptrFromInt(base_ptr))[0..coff_len]; - coff_obj = try coff.Coff.init(section_view, false); - - module.mapped_file = .{ - .file = coff_file, - .section_handle = section_handle, - .section_view = section_view, - }; - } - errdefer if (module.mapped_file) |mapped_file| mapped_file.deinit(); - - obj_di.* = try readCoffDebugInfo(self.allocator, &coff_obj); - obj_di.base_address = module.base_address; - - try self.address_map.putNoClobber(module.base_address, obj_di); - return obj_di; - } - } - - return error.MissingDebugInfo; -} - -fn lookupModuleNameWin32(self: *SelfInfo, address: usize) ?[]const u8 { - for (self.modules.items) |module| { - if (address >= module.base_address and address < module.base_address + module.size) { - return module.name; - } - } - return null; -} - -fn lookupModuleNameDl(self: *SelfInfo, address: usize) ?[]const u8 { - _ = self; - - var ctx: struct { - // Input - address: usize, - // Output - name: []const u8 = "", - } = .{ .address = address }; - const CtxTy = @TypeOf(ctx); - - if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { - fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { - _ = size; - if (context.address < info.addr) return; - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - context.name = mem.sliceTo(info.name, 0) orelse ""; - break; - } - } else return; - - return error.Found; - } - }.callback)) { - return null; - } else |err| switch (err) { - error.Found => return fs.path.basename(ctx.name), - } - - return null; +pub fn getModuleNameForAddress(self: *SelfInfo, gpa: Allocator, address: usize) error{ Unexpected, OutOfMemory, MissingDebugInfo }![]const u8 { + comptime assert(target_supported); + const module = try self.lookupModuleForAddress(gpa, address); + return module.name; } -fn lookupModuleDl(self: *SelfInfo, address: usize) !Module.Lookup { - var ctx: struct { - // Input +fn lookupModuleDl(self: *SelfInfo, address: usize) !Module { + _ = self; // MLUGG + const DlIterContext = struct { + /// input address: usize, - // Output - lookup: Module.Lookup, - } = .{ - .address = address, - .lookup = .{ - .base_address = undefined, - .name = undefined, - .build_id = null, - .gnu_eh_frame = null, - }, - }; - const CtxTy = @TypeOf(ctx); + /// output + module: Module, - posix.dl_iterate_phdr(&ctx, error{Found}, struct { - fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { + fn callback(info: *posix.dl_phdr_info, size: usize, context: *@This()) !void { _ = size; // The base address is too high if (context.address < info.addr) @@ -468,10 +210,13 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !Module.Lookup { const seg_start = info.addr +% phdr.p_vaddr; const seg_end = seg_start + phdr.p_memsz; if (context.address >= seg_start and context.address < seg_end) { - // Android libc uses NULL instead of an empty string to mark the - // main program - context.lookup.name = mem.sliceTo(info.name, 0) orelse ""; - context.lookup.base_address = info.addr; + context.module = .{ + .load_offset = info.addr, + // Android libc uses NULL instead of "" to mark the main program + .name = mem.sliceTo(info.name, 0) orelse "", + .build_id = null, + .gnu_eh_frame = null, + }; break; } } else return; @@ -480,17 +225,20 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !Module.Lookup { switch (phdr.p_type) { elf.PT_NOTE => { // Look for .note.gnu.build-id - const note_bytes = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; - const name_size = mem.readInt(u32, note_bytes[0..4], native_endian); - if (name_size != 4) continue; - const desc_size = mem.readInt(u32, note_bytes[4..8], native_endian); - const note_type = mem.readInt(u32, note_bytes[8..12], native_endian); + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); + const name_size = r.takeInt(u32, native_endian) catch continue; + const desc_size = r.takeInt(u32, native_endian) catch continue; + const note_type = r.takeInt(u32, native_endian) catch continue; + const name = r.take(name_size) catch continue; if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; - context.lookup.build_id = note_bytes[16..][0..desc_size]; + if (!mem.eql(u8, name, "GNU\x00")) continue; + const desc = r.take(desc_size) catch continue; + context.module.build_id = desc; }, elf.PT_GNU_EH_FRAME => { - context.lookup.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + context.module.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; }, else => {}, } @@ -499,425 +247,558 @@ fn lookupModuleDl(self: *SelfInfo, address: usize) !Module.Lookup { // Stop the iteration return error.Found; } - }.callback) catch |err| switch (err) { - error.Found => return ctx.lookup, }; - if (true) return error.MissingDebugInfo; - - if (self.address_map.get(ctx.lookup.base_address)) |obj_di| { - return obj_di; - } + var ctx: DlIterContext = .{ + .address = address, + .module = undefined, + }; + posix.dl_iterate_phdr(&ctx, error{Found}, DlIterContext.callback) catch |err| switch (err) { + error.Found => return ctx.module, + }; + return error.MissingDebugInfo; +} - var sections: Dwarf.SectionArray = @splat(null); - if (ctx.lookup.gnu_eh_frame) |eh_frame_hdr| { - // This is a special case - pointer offsets inside .eh_frame_hdr - // are encoded relative to its base address, so we must use the - // version that is already memory mapped, and not the one that - // will be mapped separately from the ELF file. - sections[@intFromEnum(Dwarf.Unwind.Section.Id.eh_frame_hdr)] = .{ - .data = eh_frame_hdr, - .owned = false, +fn lookupModuleDyld(self: *SelfInfo, address: usize) !Module { + _ = self; // MLUGG + const image_count = std.c._dyld_image_count(); + for (0..image_count) |image_idx| { + const header = std.c._dyld_get_image_header(@intCast(image_idx)) orelse continue; + const text_base = @intFromPtr(header); + if (address < text_base) continue; + const load_offset = std.c._dyld_get_image_vmaddr_slide(@intCast(image_idx)); + + // Find the __TEXT segment + var it: macho.LoadCommandIterator = .{ + .ncmds = header.ncmds, + .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + const text_segment_cmd, const text_sections = while (it.next()) |load_cmd| { + if (load_cmd.cmd() != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break .{ segment_cmd, load_cmd.getSections() }; + } else continue; + + const seg_start = load_offset + text_segment_cmd.vmaddr; + assert(seg_start == text_base); + const seg_end = seg_start + text_segment_cmd.vmsize; + if (address < seg_start or address >= seg_end) continue; + + // We've found the matching __TEXT segment. This is the image we need, but we must look + // for unwind info in it before returning. + + var result: Module = .{ + .text_base = text_base, + .load_offset = load_offset, + .name = mem.span(std.c._dyld_get_image_name(@intCast(image_idx))), + .unwind_info = null, + .eh_frame = null, }; + for (text_sections) |sect| { + if (mem.eql(u8, sect.sectName(), "__unwind_info")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(load_offset + sect.addr))); + result.unwind_info = sect_ptr[0..@intCast(sect.size)]; + } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(load_offset + sect.addr))); + result.eh_frame = sect_ptr[0..@intCast(sect.size)]; + } + } + return result; } - - const obj_di = try self.allocator.create(Module); - errdefer self.allocator.destroy(obj_di); - obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.lookup.name.len > 0) ctx.lookup.name else null, ctx.lookup.build_id, §ions); - obj_di.base_address = ctx.lookup.base_address; - - // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding - obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.lookup.base_address) catch {}; - - try self.address_map.putNoClobber(self.allocator, ctx.lookup.base_address, obj_di); - - return obj_di; + return error.MissingDebugInfo; } -fn lookupModuleHaiku(self: *SelfInfo, address: usize) !*Module { - _ = self; - _ = address; - @panic("TODO implement lookup module for Haiku"); -} +fn lookupModuleWin32(self: *SelfInfo, gpa: Allocator, address: usize) !Module { + if (self.lookupModuleWin32Cache(address)) |m| return m; -fn lookupModuleWasm(self: *SelfInfo, address: usize) !*Module { - _ = self; - _ = address; - @panic("TODO implement lookup module for Wasm"); -} + { + // Check a new module hasn't been loaded + self.module_cache.clearRetainingCapacity(); -pub const Module = switch (native_os) { - .macos, .ios, .watchos, .tvos, .visionos => struct { - base_address: usize, - vmaddr_slide: usize, - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: [:0]const u8, - ofiles: OFileTable, - - // Backed by the in-memory sections mapped by the loader - unwind_info: ?[]const u8 = null, - eh_frame: ?[]const u8 = null, - - const OFileTable = std.StringHashMap(OFileInfo); - const OFileInfo = struct { - di: Dwarf, - addr_table: std.StringHashMap(u64), - }; + const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); + if (handle == windows.INVALID_HANDLE_VALUE) { + return windows.unexpectedError(windows.GetLastError()); + } + defer windows.CloseHandle(handle); - pub fn deinit(self: *@This(), allocator: Allocator) void { - var it = self.ofiles.iterator(); - while (it.next()) |entry| { - const ofile = entry.value_ptr; - ofile.di.deinit(allocator); - ofile.addr_table.deinit(); + var entry: windows.MODULEENTRY32 = undefined; + entry.dwSize = @sizeOf(windows.MODULEENTRY32); + if (windows.kernel32.Module32First(handle, &entry) != 0) { + try self.module_cache.append(gpa, entry); + while (windows.kernel32.Module32Next(handle, &entry) != 0) { + try self.module_cache.append(gpa, entry); } - self.ofiles.deinit(); - allocator.free(self.symbols); - posix.munmap(self.mapped_memory); } + } - fn loadOFile(self: *@This(), allocator: Allocator, o_file_path: []const u8) !*OFileInfo { - const o_file = try fs.cwd().openFile(o_file_path, .{}); - const mapped_mem = try mapWholeFile(o_file); - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - var segcmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtabcmd: ?macho.symtab_command = null; - var it = macho.LoadCommandIterator{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => segcmd = cmd, - .SYMTAB => symtabcmd = cmd.cast(macho.symtab_command).?, - else => {}, + if (self.lookupModuleWin32Cache(address)) |m| return m; + return error.MissingDebugInfo; +} +fn lookupModuleWin32Cache(self: *SelfInfo, address: usize) ?Module { + for (self.module_cache.items) |*entry| { + const base_address = @intFromPtr(entry.modBaseAddr); + if (address >= base_address and address < base_address + entry.modBaseSize) { + return .{ + .base_address = base_address, + .size = entry.modBaseSize, + .name = std.mem.sliceTo(&entry.szModule, 0), + .handle = entry.hModule, }; + } + } + return null; +} - if (segcmd == null or symtabcmd == null) return error.MissingDebugInfo; - - // Parse symbols - const strtab = @as( - [*]const u8, - @ptrCast(&mapped_mem[symtabcmd.?.stroff]), - )[0 .. symtabcmd.?.strsize - 1 :0]; - const symtab = @as( - [*]const macho.nlist_64, - @ptrCast(@alignCast(&mapped_mem[symtabcmd.?.symoff])), - )[0..symtabcmd.?.nsyms]; - - // TODO handle tentative (common) symbols - var addr_table = std.StringHashMap(u64).init(allocator); - try addr_table.ensureTotalCapacity(@as(u32, @intCast(symtab.len))); - for (symtab) |sym| { - if (sym.n_strx == 0) continue; - if (sym.undf() or sym.tentative() or sym.abs()) continue; - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - // TODO is it possible to have a symbol collision? - addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); - } +fn readCoffDebugInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { + const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); + const mapped = mapped_ptr[0..module.size]; + var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; + // The string table is not mapped into memory by the loader, so if a section name is in the + // string table then we have to map the full image file from disk. This can happen when + // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. + if (coff_obj.strtabRequired()) { + var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; + name_buffer[0..4].* = .{ '\\', '?', '?', '\\' }; // openFileAbsoluteW requires the prefix to be present + const process_handle = windows.GetCurrentProcess(); + const len = windows.kernel32.GetModuleFileNameExW( + process_handle, + module.handle, + name_buffer[4..], + windows.PATH_MAX_WIDE, + ); + if (len == 0) return error.MissingDebugInfo; + const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => |e| return e, + }; + errdefer coff_file.close(); + var section_handle: windows.HANDLE = undefined; + const create_section_rc = windows.ntdll.NtCreateSection( + §ion_handle, + windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, + null, + null, + windows.PAGE_READONLY, + // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. + // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. + windows.SEC_COMMIT, + coff_file.handle, + ); + if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer windows.CloseHandle(section_handle); + var coff_len: usize = 0; + var section_view_ptr: [*]const u8 = undefined; + const map_section_rc = windows.ntdll.NtMapViewOfSection( + section_handle, + process_handle, + @ptrCast(§ion_view_ptr), + null, + 0, + null, + &coff_len, + .ViewUnmap, + 0, + windows.PAGE_READONLY, + ); + if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr)) == .SUCCESS); + const section_view = section_view_ptr[0..coff_len]; + coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; + di.mapped_file = .{ + .file = coff_file, + .section_handle = section_handle, + .section_view = section_view, + }; + } + di.coff_image_base = coff_obj.getImageBase(); - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - if (self.eh_frame) |eh_frame| sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ - .data = eh_frame, - .owned = false, - }; + if (coff_obj.getSectionByName(".debug_info")) |_| { + di.dwarf = .{}; - for (segcmd.?.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + di.dwarf.?.sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { + break :blk .{ + .data = try coff_obj.getSectionDataAlloc(section_header, gpa), + .virtual_address = section_header.virtual_address, + .owned = true, + }; + } else null; + } - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; - } - if (section_index == null) continue; + try di.dwarf.?.open(gpa, native_endian); + } - const section_bytes = try Dwarf.chopSlice(mapped_mem, sect.offset, sect.size); - sections[section_index.?] = .{ - .data = section_bytes, - .virtual_address = @intCast(sect.addr), - .owned = false, - }; + if (try coff_obj.getPdbPath()) |raw_path| pdb: { + const path = blk: { + if (fs.path.isAbsolute(raw_path)) { + break :blk raw_path; + } else { + const self_dir = try fs.selfExeDirPathAlloc(gpa); + defer gpa.free(self_dir); + break :blk try fs.path.join(gpa, &.{ self_dir, raw_path }); } + }; + defer if (path.ptr != raw_path.ptr) gpa.free(path); - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var di: Dwarf = .{ - .endian = .little, - .sections = sections, - .is_macho = true, - }; + di.pdb = Pdb.init(gpa, path) catch |err| switch (err) { + error.FileNotFound, error.IsDir => break :pdb, + else => return err, + }; + try di.pdb.?.parseInfoStream(); + try di.pdb.?.parseDbiStream(); - try Dwarf.open(&di, allocator); - const info = OFileInfo{ - .di = di, - .addr_table = addr_table, - }; + if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) + return error.InvalidDebugInfo; - // Add the debug info to the cache - const result = try self.ofiles.getOrPut(o_file_path); - assert(!result.found_existing); - result.value_ptr.* = info; + di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); + } +} - return result.value_ptr; +const Module = switch (native_os) { + else => "MLUGG TODO", // Dwarf, // TODO MLUGG: it's this on master but that's definitely broken atm... + .macos, .ios, .watchos, .tvos, .visionos => struct { + /// The runtime address where __TEXT is loaded. + text_base: usize, + load_offset: usize, + name: []const u8, + unwind_info: ?[]const u8, + eh_frame: ?[]const u8, + fn key(m: *const Module) usize { + return m.text_base; } + fn getSymbolAtAddress(module: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + const vaddr = address - module.load_offset; + const symbol = MachoSymbol.find(di.symbols, vaddr) orelse return .{}; // MLUGG TODO null? - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { - const result = try self.getOFileInfoForAddress(allocator, address); - if (result.symbol == null) return .{}; + // offset of `address` from start of `symbol` + const address_symbol_offset = vaddr - symbol.addr; // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); - if (result.o_file_info == null) return .{ .name = stab_symbol }; - - // Translate again the address, this time into an address inside the - // .o file - const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ - .name = "???", + const stab_symbol = mem.sliceTo(di.strings[symbol.strx..], 0); + const o_file_path = mem.sliceTo(di.strings[symbol.ofile..], 0); + + const o_file: *DebugInfo.OFile = of: { + const gop = try di.ofiles.getOrPut(gpa, o_file_path); + if (!gop.found_existing) { + gop.value_ptr.* = DebugInfo.loadOFile(gpa, o_file_path) catch |err| { + defer _ = di.ofiles.pop().?; + switch (err) { + error.FileNotFound, + error.MissingDebugInfo, + error.InvalidDebugInfo, + => return .{ .name = stab_symbol }, + else => |e| return e, + } + }; + } + break :of gop.value_ptr; }; - const addr_off = result.relocated_address - result.symbol.?.addr; - const o_file_di = &result.o_file_info.?.di; - if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { - return .{ - .name = o_file_di.getSymbolName(relocated_address_o) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString( - o_file_di, - std.dwarf.AT.name, - o_file_di.section(.debug_str), - compile_unit.*, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .source_location = o_file_di.getLineNumberInfo( - allocator, - compile_unit, - relocated_address_o + addr_off, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return .{ .name = stab_symbol }; - }, - else => return err, - } - } + const symbol_ofile_vaddr = o_file.addr_table.get(stab_symbol) orelse return .{ .name = stab_symbol }; - pub fn getOFileInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !struct { - relocated_address: usize, - symbol: ?*const MachoSymbol = null, - o_file_info: ?*OFileInfo = null, - } { - // Translate the VA into an address into this object - const relocated_address = address - self.vmaddr_slide; - - // Find the .o file where this symbol is defined - const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ - .relocated_address = relocated_address, + const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => return .{ .name = stab_symbol }, + else => |e| return e, }; - // Check if its debug infos are already in the cache - const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); - const o_file_info = self.ofiles.getPtr(o_file_path) orelse - (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { - error.FileNotFound, - error.MissingDebugInfo, - error.InvalidDebugInfo, - => return .{ - .relocated_address = relocated_address, - .symbol = symbol, - }, - else => return err, - }); - return .{ - .relocated_address = relocated_address, - .symbol = symbol, - .o_file_info = o_file_info, + .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr) orelse stab_symbol, + .compile_unit_name = compile_unit.die.getAttrString( + &o_file.dwarf, + native_endian, + std.dwarf.AT.name, + o_file.dwarf.section(.debug_str), + compile_unit, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .source_location = o_file.dwarf.getLineNumberInfo( + gpa, + native_endian, + compile_unit, + symbol_ofile_vaddr + address_symbol_offset, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, }; } + const DebugInfo = struct { + // MLUGG TODO: these are duplicated state. i actually reckon they should be removed from Module, and loadMachODebugInfo should be the one discovering them! + mapped_memory: []align(std.heap.page_size_min) const u8, + symbols: []const MachoSymbol, + strings: [:0]const u8, + // MLUGG TODO: this could use an adapter to just index straight into `strings`! + ofiles: std.StringArrayHashMapUnmanaged(OFile), + + // Backed by the in-memory sections mapped by the loader + unwind_info: ?[]const u8, + eh_frame: ?[]const u8, + + // MLUGG TODO HACKHACK: this is awful + const init: DebugInfo = undefined; + + const OFile = struct { + dwarf: Dwarf, + // MLUGG TODO: this could use an adapter to just index straight into the strtab! + addr_table: std.StringArrayHashMapUnmanaged(u64), + }; - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { - return if ((try self.getOFileInfoForAddress(allocator, address)).o_file_info) |o_file_info| &o_file_info.di else null; - } - }, - .uefi, .windows => struct { - base_address: usize, - pdb: ?Pdb, - dwarf: ?Dwarf, - coff_image_base: u64, + fn deinit(di: *DebugInfo, gpa: Allocator) void { + for (di.ofiles.values()) |*ofile| { + ofile.dwarf.deinit(gpa); + ofile.addr_table.deinit(gpa); + } + di.ofiles.deinit(); + gpa.free(di.symbols); + posix.munmap(di.mapped_memory); + } - /// Only used if pdb is non-null - coff_section_headers: []coff.SectionHeader, + fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { + const mapped_mem = try mapFileOrSelfExe(o_file_path); + errdefer posix.munmap(mapped_mem); - pub fn deinit(self: *@This(), gpa: Allocator) void { - if (self.dwarf) |*dwarf| { - dwarf.deinit(gpa); - } + if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; - if (self.pdb) |*p| { - gpa.free(p.file_reader.interface.buffer); - gpa.destroy(p.file_reader); - p.deinit(); - gpa.free(self.coff_section_headers); - } + const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { + var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtab_cmd: ?macho.symtab_command = null; + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => seg_cmd = cmd, + .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + else => {}, + }; + break :cmds .{ + seg_cmd orelse return error.MissingDebugInfo, + symtab_cmd orelse return error.MissingDebugInfo, + }; + }; - self.* = undefined; - } + if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; + if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; + const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; + + const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); + if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; + const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); + + // TODO handle tentative (common) symbols + // MLUGG TODO: does initCapacity actually make sense? + var addr_table: std.StringArrayHashMapUnmanaged(u64) = .empty; + defer addr_table.deinit(gpa); + try addr_table.ensureUnusedCapacity(gpa, @intCast(symtab.len)); + for (symtab) |sym| { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf => continue, // includes tentative symbols + .abs => continue, + else => {}, + } + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + const gop = addr_table.getOrPutAssumeCapacity(sym_name); + if (gop.found_existing) return error.InvalidDebugInfo; + gop.value_ptr.* = sym.n_value; + } - fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?std.debug.Symbol { - var coff_section: *align(1) const coff.SectionHeader = undefined; - const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { - if (sect_contrib.section > self.coff_section_headers.len) continue; - // Remember that SectionContribEntry.Section is 1-based. - coff_section = &self.coff_section_headers[sect_contrib.section - 1]; - - const vaddr_start = coff_section.virtual_address + sect_contrib.offset; - const vaddr_end = vaddr_start + sect_contrib.size; - if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { - break sect_contrib.module_index; + var sections: Dwarf.SectionArray = @splat(null); + for (seg_cmd.getSections()) |sect| { + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + + const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; + } else continue; + + const section_bytes = try Dwarf.chopSlice(mapped_mem, sect.offset, sect.size); + sections[section_index] = .{ + .data = section_bytes, + .virtual_address = @intCast(sect.addr), + .owned = false, + }; } - } else { - // we have no information to add to the address - return null; - }; - const module = (try self.pdb.?.getModule(mod_index)) orelse - return error.InvalidDebugInfo; - const obj_basename = fs.path.basename(module.obj_file_name); - - const symbol_name = self.pdb.?.getSymbolName( - module, - relocated_address - coff_section.virtual_address, - ) orelse "???"; - const opt_line_info = try self.pdb.?.getLineNumberInfo( - module, - relocated_address - coff_section.virtual_address, - ); + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + if (missing_debug_info) return error.MissingDebugInfo; - return .{ - .name = symbol_name, - .compile_unit_name = obj_basename, - .source_location = opt_line_info, - }; + var dwarf: Dwarf = .{ .sections = sections }; + errdefer dwarf.deinit(gpa); + try dwarf.open(gpa, native_endian); + + return .{ + .dwarf = dwarf, + .addr_table = addr_table.move(), + }; + } + }; + }, + .wasi, .emscripten => struct { + const DebugInfo = struct { + const init: DebugInfo = .{}; + fn getSymbolAtAddress(di: *DebugInfo, gpa: Allocator, base_address: usize, address: usize) !std.debug.Symbol { + _ = di; + _ = gpa; + _ = base_address; + _ = address; + unreachable; + } + }; + }, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { + load_offset: usize, + name: []const u8, + build_id: ?[]const u8, + gnu_eh_frame: ?[]const u8, + fn key(m: Module) usize { + return m.load_offset; // MLUGG TODO: is this technically valid? idk + } + const DebugInfo = Dwarf.ElfModule; + fn getSymbolAtAddress(mod: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + return di.getSymbolAtAddress(gpa, native_endian, mod.load_offset, address); } + }, + .uefi, .windows => struct { + base_address: usize, + size: usize, + name: []const u8, + handle: windows.HMODULE, + fn key(m: Module) usize { + return m.base_address; + } + const DebugInfo = struct { + coff_image_base: u64, + mapped_file: ?struct { + file: File, + section_handle: windows.HANDLE, + section_view: []const u8, + fn deinit(mapped: @This()) void { + const process_handle = windows.GetCurrentProcess(); + assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(mapped.section_view.ptr)) == .SUCCESS); + windows.CloseHandle(mapped.section_handle); + mapped.file.close(); + } + }, - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; + dwarf: ?Dwarf, - if (self.pdb != null) { - if (try self.getSymbolFromPdb(relocated_address)) |symbol| return symbol; - } + pdb: ?Pdb, + /// Populated iff `pdb != null`; otherwise `&.{}`. + coff_section_headers: []coff.SectionHeader, + + const init: DebugInfo = .{ + .coff_image_base = undefined, + .mapped_file = null, + .dwarf = null, + .pdb = null, + .coff_section_headers = &.{}, + }; - if (self.dwarf) |*dwarf| { - const dwarf_address = relocated_address + self.coff_image_base; - return dwarf.getSymbol(allocator, dwarf_address); + fn deinit(di: *DebugInfo, gpa: Allocator) void { + if (di.dwarf) |*dwarf| dwarf.deinit(gpa); + if (di.pdb) |*pdb| pdb.deinit(); + gpa.free(di.coff_section_headers); + if (di.mapped_file) |mapped| mapped.deinit(); } - return .{}; - } + fn getSymbolFromPdb(di: *DebugInfo, relocated_address: usize) !?std.debug.Symbol { + var coff_section: *align(1) const coff.SectionHeader = undefined; + const mod_index = for (di.pdb.?.sect_contribs) |sect_contrib| { + if (sect_contrib.section > di.coff_section_headers.len) continue; + // Remember that SectionContribEntry.Section is 1-based. + coff_section = &di.coff_section_headers[sect_contrib.section - 1]; + + const vaddr_start = coff_section.virtual_address + sect_contrib.offset; + const vaddr_end = vaddr_start + sect_contrib.size; + if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { + break sect_contrib.module_index; + } + } else { + // we have no information to add to the address + return null; + }; - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { - _ = allocator; - _ = address; + const module = (try di.pdb.?.getModule(mod_index)) orelse + return error.InvalidDebugInfo; + const obj_basename = fs.path.basename(module.obj_file_name); + + const symbol_name = di.pdb.?.getSymbolName( + module, + relocated_address - coff_section.virtual_address, + ) orelse "???"; + const opt_line_info = try di.pdb.?.getLineNumberInfo( + module, + relocated_address - coff_section.virtual_address, + ); - return switch (self.debug_data) { - .dwarf => |*dwarf| dwarf, - else => null, - }; - } - }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Dwarf.ElfModule, - .wasi, .emscripten => struct { - pub fn deinit(self: *@This(), allocator: Allocator) void { - _ = self; - _ = allocator; - } + return .{ + .name = symbol_name, + .compile_unit_name = obj_basename, + .source_location = opt_line_info, + }; + } + }; - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { - _ = self; - _ = allocator; - _ = address; - return .{}; - } + fn getSymbolAtAddress(mod: *const Module, gpa: Allocator, di: *DebugInfo, address: usize) !std.debug.Symbol { + // Translate the runtime address into a virtual address into the module + const vaddr = address - mod.base_address; - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { - _ = self; - _ = allocator; - _ = address; - return null; - } - }, - else => Dwarf, -}; + if (di.pdb != null) { + if (try di.getSymbolFromPdb(vaddr)) |symbol| return symbol; + } -/// How is this different than `Module` when the host is Windows? -/// Why are both stored in the `SelfInfo` struct? -/// Boy, it sure would be nice if someone added documentation comments for this -/// struct explaining it. -pub const WindowsModule = struct { - base_address: usize, - size: u32, - name: []const u8, - handle: windows.HMODULE, - - // Set when the image file needed to be mapped from disk - mapped_file: ?struct { - file: File, - section_handle: windows.HANDLE, - section_view: []const u8, - - pub fn deinit(self: @This()) void { - const process_handle = windows.GetCurrentProcess(); - assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrCast(@constCast(self.section_view.ptr))) == .SUCCESS); - windows.CloseHandle(self.section_handle); - self.file.close(); + if (di.dwarf) |*dwarf| { + const dwarf_address = vaddr + di.coff_image_base; + return dwarf.getSymbol(gpa, native_endian, dwarf_address); + } + + return error.MissingDebugInfo; } - } = null, + }, }; -/// This takes ownership of macho_file: users of this function should not close -/// it themselves, even on error. -/// TODO it's weird to take ownership even on error, rework this code. -fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { - const mapped_mem = try mapWholeFile(macho_file); +fn loadMachODebugInfo(gpa: Allocator, module: *const Module, di: *Module.DebugInfo) !void { + const mapped_mem = mapFileOrSelfExe(module.name) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + error.FileTooBig => return error.InvalidDebugInfo, + else => |e| return e, + }; + errdefer posix.munmap(mapped_mem); const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); if (hdr.magic != macho.MH_MAGIC_64) return error.InvalidDebugInfo; - var it = macho.LoadCommandIterator{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + const symtab: macho.symtab_command = symtab: { + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => break :symtab cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + else => {}, + }; + return error.MissingDebugInfo; }; - const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break cmd.cast(macho.symtab_command).?, - else => {}, - } else return error.MissingDebugInfo; - - const syms = @as( - [*]const macho.nlist_64, - @ptrCast(@alignCast(&mapped_mem[symtab.symoff])), - )[0..symtab.nsyms]; + + const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab.symoff..]); + const syms = syms_ptr[0..symtab.nsyms]; const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; - const symbols_buf = try allocator.alloc(MachoSymbol, syms.len); + // MLUGG TODO: does it really make sense to initCapacity here? how many of syms are omitted? + var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); + defer symbols.deinit(gpa); var ofile: u32 = undefined; var last_sym: MachoSymbol = undefined; - var symbol_index: usize = 0; var state: enum { init, oso_open, @@ -929,64 +810,53 @@ fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { } = .init; for (syms) |*sym| { - if (!sym.stab()) continue; + if (sym.n_type.bits.is_stab == 0) continue; // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type) { - macho.N_OSO => { - switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - } + switch (sym.n_type.stab) { + .oso => switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidDebugInfo, }, - macho.N_BNSYM => { - switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .size = 0, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - } + .bnsym => switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .size = 0, + .ofile = ofile, + }; + }, + else => return error.InvalidDebugInfo, }, - macho.N_FUN => { - switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - last_sym.size = @as(u32, @intCast(sym.n_value)); - }, - else => return error.InvalidDebugInfo, - } + .fun => switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + last_sym.size = @intCast(sym.n_value); + }, + else => return error.InvalidDebugInfo, }, - macho.N_ENSYM => { - switch (state) { - .fun_size => { - state = .ensym; - symbols_buf[symbol_index] = last_sym; - symbol_index += 1; - }, - else => return error.InvalidDebugInfo, - } + .ensym => switch (state) { + .fun_size => { + state = .ensym; + symbols.appendAssumeCapacity(last_sym); + }, + else => return error.InvalidDebugInfo, }, - macho.N_SO => { - switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - } + .so => switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidDebugInfo, }, else => {}, } @@ -998,542 +868,98 @@ fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { else => return error.InvalidDebugInfo, } - const symbols = try allocator.realloc(symbols_buf, symbol_index); + const symbols_slice = try symbols.toOwnedSlice(gpa); + errdefer gpa.free(symbols_slice); // Even though lld emits symbols in ascending order, this debug code // should work for programs linked in any valid way. // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); + mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); - return .{ - .base_address = undefined, - .vmaddr_slide = undefined, + di.* = .{ + .unwind_info = module.unwind_info, + .eh_frame = module.eh_frame, .mapped_memory = mapped_mem, - .ofiles = Module.OFileTable.init(allocator), - .symbols = symbols, + .symbols = symbols_slice, .strings = strings, + .ofiles = .empty, }; } -fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { - var di: Module = .{ - .base_address = undefined, - .coff_image_base = coff_obj.getImageBase(), - .coff_section_headers = undefined, - }; - - if (coff_obj.getSectionByName(".debug_info")) |_| { - // This coff file has embedded DWARF debug info - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { - break :blk .{ - .data = try coff_obj.getSectionDataAlloc(section_header, allocator), - .virtual_address = section_header.virtual_address, - .owned = true, - }; - } else null; - } - - var dwarf: Dwarf = .{ - .endian = native_endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&dwarf, allocator); - di.dwarf = dwarf; - } - - const raw_path = try coff_obj.getPdbPath() orelse return di; - const path = blk: { - if (fs.path.isAbsolute(raw_path)) { - break :blk raw_path; - } else { - const self_dir = try fs.selfExeDirPathAlloc(allocator); - defer allocator.free(self_dir); - break :blk try fs.path.join(allocator, &.{ self_dir, raw_path }); - } - }; - defer if (path.ptr != raw_path.ptr) allocator.free(path); - - di.pdb = Pdb.init(allocator, path) catch |err| switch (err) { - error.FileNotFound, error.IsDir => { - if (di.dwarf == null) return error.MissingDebugInfo; - return di; - }, - else => return err, - }; - try di.pdb.?.parseInfoStream(); - try di.pdb.?.parseDbiStream(); - - if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) - return error.InvalidDebugInfo; - - // Only used by the pdb path - di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(allocator); - errdefer allocator.free(di.coff_section_headers); - - return di; -} - -/// Reads debug info from an ELF file, or the current binary if none in specified. -/// If the required sections aren't present but a reference to external debug info is, -/// then this this function will recurse to attempt to load the debug sections from -/// an external file. -pub fn readElfDebugInfo( - em: *Dwarf.ElfModule, - allocator: Allocator, - elf_filename: ?[]const u8, - build_id: ?[]const u8, - parent_sections: *Dwarf.SectionArray, -) !void { - const elf_file = (if (elf_filename) |filename| blk: { - break :blk fs.cwd().openFile(filename, .{}); - } else fs.openSelfExe(.{})) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - - const mapped_mem = try mapWholeFile(elf_file); - return em.load( - allocator, - mapped_mem, - build_id, - null, - parent_sections, - null, - elf_filename, - ); -} - const MachoSymbol = struct { strx: u32, addr: u64, size: u32, ofile: u32, - - /// Returns the address from the macho file - fn address(self: MachoSymbol) u64 { - return self.addr; - } - fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { _ = context; return lhs.addr < rhs.addr; } -}; - -/// Takes ownership of file, even on error. -/// TODO it's weird to take ownership even on error, rework this code. -fn mapWholeFile(file: File) ![]align(std.heap.page_size_min) const u8 { - defer file.close(); - - const file_len = math.cast(usize, try file.getEndPos()) orelse math.maxInt(usize); - const mapped_mem = try posix.mmap( - null, - file_len, - posix.PROT.READ, - .{ .TYPE = .SHARED }, - file.handle, - 0, - ); - errdefer posix.munmap(mapped_mem); - - return mapped_mem; -} - -fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { - var min: usize = 0; - var max: usize = symbols.len - 1; - while (min < max) { - const mid = min + (max - min) / 2; - const curr = &symbols[mid]; - const next = &symbols[mid + 1]; - if (address >= next.address()) { - min = mid + 1; - } else if (address < curr.address()) { - max = mid; - } else { - return curr; - } - } - - const max_sym = &symbols[symbols.len - 1]; - if (address >= max_sym.address()) - return max_sym; - - return null; -} - -test machoSearchSymbols { - const symbols = [_]MachoSymbol{ - .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, - }; - - try testing.expectEqual(null, machoSearchSymbols(&symbols, 0)); - try testing.expectEqual(null, machoSearchSymbols(&symbols, 99)); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 100).?); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 150).?); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 199).?); - - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 200).?); - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 250).?); - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 299).?); - - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 300).?); - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 301).?); - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); -} - -/// Unwind a frame using MachO compact unwind info (from __unwind_info). -/// If the compact encoding can't encode a way to unwind a frame, it will -/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -fn unwindFrameMachO( - allocator: Allocator, - base_address: usize, - context: *UnwindContext, - unwind_info: []const u8, - eh_frame: ?[]const u8, -) !usize { - const header = std.mem.bytesAsValue( - macho.unwind_info_section_header, - unwind_info[0..@sizeOf(macho.unwind_info_section_header)], - ); - const indices = std.mem.bytesAsSlice( - macho.unwind_info_section_header_index_entry, - unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry)], - ); - if (indices.len == 0) return error.MissingUnwindInfo; - - const mapped_pc = context.pc - base_address; - const second_level_index = blk: { + /// Assumes that `symbols` is sorted in order of ascending `addr`. + fn find(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { + if (symbols.len == 0) return null; // no potential match + if (address < symbols[0].addr) return null; // address is before the lowest-address symbol var left: usize = 0; - var len: usize = indices.len; - + var len: usize = symbols.len; while (len > 1) { const mid = left + len / 2; - const offset = indices[mid].functionOffset; - if (mapped_pc < offset) { + if (address < symbols[mid].addr) { len /= 2; } else { left = mid; - if (mapped_pc == offset) break; len -= len / 2; } } + return &symbols[left]; + } - // Last index is a sentinel containing the highest address as its functionOffset - if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; - break :blk &indices[left]; - }; - - const common_encodings = std.mem.bytesAsSlice( - macho.compact_unwind_encoding_t, - unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t)], - ); - - const start_offset = second_level_index.secondLevelPagesSectionOffset; - const kind = std.mem.bytesAsValue( - macho.UNWIND_SECOND_LEVEL, - unwind_info[start_offset..][0..@sizeOf(macho.UNWIND_SECOND_LEVEL)], - ); - - const entry: struct { - function_offset: usize, - raw_encoding: u32, - } = switch (kind.*) { - .REGULAR => blk: { - const page_header = std.mem.bytesAsValue( - macho.unwind_info_regular_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_regular_second_level_page_header)], - ); - - const entries = std.mem.bytesAsSlice( - macho.unwind_info_regular_second_level_entry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = entries[mid].functionOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - break :blk .{ - .function_offset = entries[left].functionOffset, - .raw_encoding = entries[left].encoding, - }; - }, - .COMPRESSED => blk: { - const page_header = std.mem.bytesAsValue( - macho.unwind_info_compressed_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_compressed_second_level_page_header)], - ); - - const entries = std.mem.bytesAsSlice( - macho.UnwindInfoCompressedEntry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = second_level_index.functionOffset + entries[mid].funcOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - const entry = entries[left]; - const function_offset = second_level_index.functionOffset + entry.funcOffset; - if (entry.encodingIndex < header.commonEncodingsArrayCount) { - if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = common_encodings[entry.encodingIndex], - }; - } else { - const local_index = try math.sub( - u8, - entry.encodingIndex, - math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, - ); - const local_encodings = std.mem.bytesAsSlice( - macho.compact_unwind_encoding_t, - unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t)], - ); - if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = local_encodings[local_index], - }; - } - }, - else => return error.InvalidUnwindInfo, - }; - - if (entry.raw_encoding == 0) return error.NoUnwindInfo; - const reg_context = Dwarf.abi.RegisterContext{ - .eh_frame = false, - .is_macho = true, - }; - - const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); - const new_ip = switch (builtin.cpu.arch) { - .x86_64 => switch (encoding.mode.x86_64) { - .OLD => return error.UnimplementedUnwindEncoding, - .RBP_FRAME => blk: { - const regs: [5]u3 = .{ - encoding.value.x86_64.frame.reg0, - encoding.value.x86_64.frame.reg1, - encoding.value.x86_64.frame.reg2, - encoding.value.x86_64.frame.reg3, - encoding.value.x86_64.frame.reg4, - }; - - const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); - var max_reg: usize = 0; - inline for (regs, 0..) |reg, i| { - if (reg > 0) max_reg = i; - } - - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; - const new_sp = fp + 2 * @sizeOf(usize); - - const ip_ptr = fp + @sizeOf(usize); - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - for (regs, 0..) |reg, i| { - if (reg == 0) continue; - const addr = fp - frame_offset + i * @sizeOf(usize); - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; - } - - break :blk new_ip; - }, - .STACK_IMMD, - .STACK_IND, - => blk: { - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; - const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) - @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) - else stack_size: { - // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. - const sub_offset_addr = - base_address + - entry.function_offset + - encoding.value.x86_64.frameless.stack.indirect.sub_offset; - - // `sub_offset_addr` points to the offset of the literal within the instruction - const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; - break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); - }; - - // Decode the Lehmer-coded sequence of registers. - // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - - // Decode the variable-based permutation number into its digits. Each digit represents - // an index into the list of register numbers that weren't yet used in the sequence at - // the time the digit was added. - const reg_count = encoding.value.x86_64.frameless.stack_reg_count; - const ip_ptr = if (reg_count > 0) reg_blk: { - var digits: [6]u3 = undefined; - var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; - var base: usize = 2; - for (0..reg_count) |i| { - const div = accumulator / base; - digits[digits.len - 1 - i] = @intCast(accumulator - base * div); - accumulator = div; - base += 1; - } - - const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; - var registers: [reg_numbers.len]u3 = undefined; - var used_indices = [_]bool{false} ** reg_numbers.len; - for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { - var unused_count: u8 = 0; - const unused_index = for (used_indices, 0..) |used, index| { - if (!used) { - if (target_unused_index == unused_count) break index; - unused_count += 1; - } - } else unreachable; - - registers[i] = reg_numbers[unused_index]; - used_indices[unused_index] = true; - } - - var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); - for (0..reg_count) |i| { - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - - break :reg_blk reg_addr; - } else sp + stack_size - @sizeOf(usize); - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_sp = ip_ptr + @sizeOf(usize); - - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - break :blk new_ip; - }, - .DWARF => { - return unwindFrameMachODwarf(allocator, base_address, context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); - }, - }, - .aarch64, .aarch64_be => switch (encoding.mode.arm64) { - .OLD => return error.UnimplementedUnwindEncoding, - .FRAMELESS => blk: { - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; - const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; - const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - break :blk new_ip; - }, - .DWARF => { - return unwindFrameMachODwarf(allocator, base_address, context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); - }, - .FRAME => blk: { - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; - const ip_ptr = fp + @sizeOf(usize); - - var reg_addr = fp - @sizeOf(usize); - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { - (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - } - - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { - // Only the lower half of the 128-bit V registers are restored during unwinding - @memcpy( - try regBytes(context.thread_context, 64 + 8 + i, context.reg_context), - std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - @memcpy( - try regBytes(context.thread_context, 64 + 9 + i, context.reg_context), - std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - } - } - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + test find { + const symbols: []const MachoSymbol = &.{ + .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, + }; - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + try testing.expectEqual(null, find(symbols, 0)); + try testing.expectEqual(null, find(symbols, 99)); + try testing.expectEqual(&symbols[0], find(symbols, 100).?); + try testing.expectEqual(&symbols[0], find(symbols, 150).?); + try testing.expectEqual(&symbols[0], find(symbols, 199).?); - break :blk new_ip; - }, - }, - else => return error.UnimplementedArch, - }; + try testing.expectEqual(&symbols[1], find(symbols, 200).?); + try testing.expectEqual(&symbols[1], find(symbols, 250).?); + try testing.expectEqual(&symbols[1], find(symbols, 299).?); - context.pc = stripInstructionPtrAuthCode(new_ip); - if (context.pc > 0) context.pc -= 1; - return new_ip; + try testing.expectEqual(&symbols[2], find(symbols, 300).?); + try testing.expectEqual(&symbols[2], find(symbols, 301).?); + try testing.expectEqual(&symbols[2], find(symbols, 5000).?); + } +}; +test { + _ = MachoSymbol; } pub const UnwindContext = struct { - allocator: Allocator, + gpa: Allocator, cfa: ?usize, pc: usize, thread_context: *std.debug.ThreadContext, reg_context: Dwarf.abi.RegisterContext, - vm: VirtualMachine, + vm: Dwarf.Unwind.VirtualMachine, stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), - pub fn init( - allocator: Allocator, - thread_context: *std.debug.ThreadContext, - ) !UnwindContext { + pub fn init(gpa: Allocator, thread_context: *std.debug.ThreadContext) !UnwindContext { comptime assert(supports_unwinding); const pc = stripInstructionPtrAuthCode( (try regValueNative(thread_context, ip_reg_num, null)).*, ); - const context_copy = try allocator.create(std.debug.ThreadContext); + const context_copy = try gpa.create(std.debug.ThreadContext); std.debug.copyContext(thread_context, context_copy); return .{ - .allocator = allocator, + .gpa = gpa, .cfa = null, .pc = pc, .thread_context = context_copy, @@ -1544,15 +970,86 @@ pub const UnwindContext = struct { } pub fn deinit(self: *UnwindContext) void { - self.vm.deinit(self.allocator); - self.stack_machine.deinit(self.allocator); - self.allocator.destroy(self.thread_context); + self.vm.deinit(self.gpa); + self.stack_machine.deinit(self.gpa); + self.gpa.destroy(self.thread_context); self.* = undefined; } pub fn getFp(self: *const UnwindContext) !usize { return (try regValueNative(self.thread_context, fpRegNum(self.reg_context), self.reg_context)).*; } + + /// Resolves the register rule and places the result into `out` (see regBytes) + pub fn resolveRegisterRule( + context: *UnwindContext, + col: Dwarf.Unwind.VirtualMachine.Column, + expression_context: std.debug.Dwarf.expression.Context, + out: []u8, + ) !void { + switch (col.rule) { + .default => { + const register = col.register orelse return error.InvalidRegister; + // The default type is usually undefined, but can be overriden by ABI authors. + // See the doc comment on `Dwarf.Unwind.VirtualMachine.RegisterRule.default`. + if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 18) { + // Callee-saved registers are initialized as if they had the .same_value rule + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + return; + } + @memset(out, undefined); + }, + .undefined => { + @memset(out, undefined); + }, + .same_value => { + // TODO: This copy could be eliminated if callers always copy the state then call this function to update it + const register = col.register orelse return error.InvalidRegister; + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + }, + .offset => |offset| { + if (context.cfa) |cfa| { + const addr = try applyOffset(cfa, offset); + const ptr: *const usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + } else return error.InvalidCFA; + }, + .val_offset => |offset| { + if (context.cfa) |cfa| { + mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); + } else return error.InvalidCFA; + }, + .register => |register| { + const src = try regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, try regBytes(context.thread_context, register, context.reg_context)); + }, + .expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.gpa, expression_context, context.cfa.?); + const addr = if (value) |v| blk: { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + + const ptr: *usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + }, + .val_expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.gpa, expression_context, context.cfa.?); + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); + } else return error.NoExpressionValue; + }, + .architectural => return error.UnimplementedRegisterRule, + } + } }; /// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. @@ -1584,113 +1081,30 @@ pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info /// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. fn unwindFrameDwarf( - allocator: Allocator, - unwind: *Dwarf.Unwind, - base_address: usize, + unwind: *const Dwarf.Unwind, + load_offset: usize, context: *UnwindContext, explicit_fde_offset: ?usize, ) !usize { if (!supports_unwinding) return error.UnsupportedCpuArchitecture; if (context.pc == 0) return 0; - // Find the FDE and CIE - const cie, const fde = if (explicit_fde_offset) |fde_offset| blk: { - const frame_section = unwind.section(.eh_frame) orelse return error.MissingFDE; - if (fde_offset >= frame_section.len) return error.MissingFDE; - - var fbr: std.Io.Reader = .fixed(frame_section); - fbr.seek = fde_offset; - - const fde_entry_header = try Dwarf.Unwind.EntryHeader.read(&fbr, .eh_frame, native_endian); - if (fde_entry_header.type != .fde) return error.MissingFDE; - - const cie_offset = fde_entry_header.type.fde; - fbr.seek = @intCast(cie_offset); - - const cie_entry_header = try Dwarf.Unwind.EntryHeader.read(&fbr, .eh_frame, native_endian); - if (cie_entry_header.type != .cie) return Dwarf.bad(); - - const cie = try Dwarf.Unwind.CommonInformationEntry.parse( - cie_entry_header.entry_bytes, - 0, - true, - cie_entry_header.format, - .eh_frame, - cie_entry_header.length_offset, - @sizeOf(usize), - native_endian, - ); - const fde = try Dwarf.Unwind.FrameDescriptionEntry.parse( - fde_entry_header.entry_bytes, - 0, - true, - cie, - @sizeOf(usize), - native_endian, - ); - - break :blk .{ cie, fde }; - } else blk: { - // `.eh_frame_hdr` may be incomplete. We'll try it first, but if the lookup fails, we fall - // back to loading `.eh_frame`/`.debug_frame` and using those from that point on. - - if (unwind.eh_frame_hdr) |header| hdr: { - const eh_frame_len = if (unwind.section(.eh_frame)) |eh_frame| eh_frame.len else { - try unwind.scanCieFdeInfo(allocator, native_endian, base_address); - unwind.eh_frame_hdr = null; - break :hdr; - }; - - var cie: Dwarf.Unwind.CommonInformationEntry = undefined; - var fde: Dwarf.Unwind.FrameDescriptionEntry = undefined; - - header.findEntry( - eh_frame_len, - @intFromPtr(unwind.section(.eh_frame_hdr).?.ptr), - context.pc, - &cie, - &fde, - native_endian, - ) catch |err| switch (err) { - error.MissingDebugInfo => { - // `.eh_frame_hdr` appears to be incomplete, so go ahead and populate `cie_map` - // and `fde_list`, and fall back to the binary search logic below. - try unwind.scanCieFdeInfo(allocator, native_endian, base_address); - - // Since `.eh_frame_hdr` is incomplete, we're very likely to get more lookup - // failures using it, and we've just built a complete, sorted list of FDEs - // anyway, so just stop using `.eh_frame_hdr` altogether. - unwind.eh_frame_hdr = null; - - break :hdr; - }, - else => return err, - }; - - break :blk .{ cie, fde }; - } - - const index = std.sort.binarySearch(Dwarf.Unwind.FrameDescriptionEntry, unwind.fde_list.items, context.pc, struct { - pub fn compareFn(pc: usize, item: Dwarf.Unwind.FrameDescriptionEntry) std.math.Order { - if (pc < item.pc_begin) return .lt; - - const range_end = item.pc_begin + item.pc_range; - if (pc < range_end) return .eq; + const pc_vaddr = context.pc - load_offset; - return .gt; - } - }.compareFn); - - const fde = if (index) |i| unwind.fde_list.items[i] else return error.MissingFDE; - const cie = unwind.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; + const fde_offset = explicit_fde_offset orelse try unwind.findFdeOffset( + pc_vaddr, + @sizeOf(usize), + native_endian, + ) orelse return error.MissingDebugInfo; + const format, const cie, const fde = try unwind.loadFde(fde_offset, @sizeOf(usize), native_endian); - break :blk .{ cie, fde }; - }; + // Check if this FDE *actually* includes the address. + if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) return error.MissingDebugInfo; // Do not set `compile_unit` because the spec states that CFIs // may not reference other debug sections anyway. var expression_context: Dwarf.expression.Context = .{ - .format = cie.format, + .format = format, .thread_context = context.thread_context, .reg_context = context.reg_context, .cfa = context.cfa, @@ -1700,7 +1114,7 @@ fn unwindFrameDwarf( context.reg_context.eh_frame = cie.version != 4; context.reg_context.is_macho = native_os.isDarwin(); - const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); + const row = try context.vm.runTo(context.gpa, context.pc - load_offset, cie, fde, @sizeOf(usize), native_endian); context.cfa = switch (row.cfa.rule) { .val_offset => |offset| blk: { const register = row.cfa.register orelse return error.InvalidCFARule; @@ -1711,7 +1125,7 @@ fn unwindFrameDwarf( context.stack_machine.reset(); const value = try context.stack_machine.run( expr, - context.allocator, + context.gpa, expression_context, context.cfa, ); @@ -1728,9 +1142,9 @@ fn unwindFrameDwarf( // Buffering the modifications is done because copying the thread context is not portable, // some implementations (ie. darwin) use internal pointers to the mcontext. - var arena = std.heap.ArenaAllocator.init(context.allocator); + var arena: std.heap.ArenaAllocator = .init(context.gpa); defer arena.deinit(); - const update_allocator = arena.allocator(); + const update_arena = arena.allocator(); const RegisterUpdate = struct { // Backed by thread_context @@ -1749,17 +1163,16 @@ fn unwindFrameDwarf( } const dest = try regBytes(context.thread_context, register, context.reg_context); - const src = try update_allocator.alloc(u8, dest.len); + const src = try update_arena.alloc(u8, dest.len); + try context.resolveRegisterRule(column, expression_context, src); - const prev = update_tail; - update_tail = try update_allocator.create(RegisterUpdate); - update_tail.?.* = .{ + const new_update = try update_arena.create(RegisterUpdate); + new_update.* = .{ .dest = dest, .src = src, - .prev = prev, + .prev = update_tail, }; - - try column.resolveValue(context, expression_context, src); + update_tail = new_update; } } @@ -1792,7 +1205,7 @@ fn unwindFrameDwarf( // The exception to this rule is signal frames, where we return execution would be returned to the instruction // that triggered the handler. const return_address = context.pc; - if (context.pc > 0 and !cie.isSignalFrame()) context.pc -= 1; + if (context.pc > 0 and !cie.is_signal_frame) context.pc -= 1; return return_address; } @@ -1843,415 +1256,345 @@ pub fn supportsUnwinding(target: *const std.Target) bool { }; } -fn unwindFrameMachODwarf( - allocator: Allocator, - base_address: usize, - context: *UnwindContext, - eh_frame: []const u8, - fde_offset: usize, -) !usize { - var di: Dwarf = .{ - .endian = native_endian, - .is_macho = true, - }; - defer di.deinit(context.allocator); +/// Since register rules are applied (usually) during a panic, +/// checked addition / subtraction is used so that we can return +/// an error and fall back to FP-based unwinding. +fn applyOffset(base: usize, offset: i64) !usize { + return if (offset >= 0) + try std.math.add(usize, base, @as(usize, @intCast(offset))) + else + try std.math.sub(usize, base, @as(usize, @intCast(-offset))); +} - di.sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ - .data = eh_frame, - .owned = false, - }; +/// Uses `mmap` to map the file at `opt_path` (or, if `null`, the self executable image) into memory. +fn mapFileOrSelfExe(opt_path: ?[]const u8) ![]align(std.heap.page_size_min) const u8 { + const file = if (opt_path) |path| + try fs.cwd().openFile(path, .{}) + else + try fs.openSelfExe(.{}); + defer file.close(); + + const file_len = math.cast(usize, try file.getEndPos()) orelse return error.FileTooBig; - return unwindFrameDwarf(allocator, &di, base_address, context, fde_offset); + return posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ); } -/// This is a virtual machine that runs DWARF call frame instructions. -pub const VirtualMachine = struct { - /// See section 6.4.1 of the DWARF5 specification for details on each - const RegisterRule = union(enum) { - // The spec says that the default rule for each column is the undefined rule. - // However, it also allows ABI / compiler authors to specify alternate defaults, so - // there is a distinction made here. - default: void, - undefined: void, - same_value: void, - // offset(N) - offset: i64, - // val_offset(N) - val_offset: i64, - // register(R) - register: u8, - // expression(E) - expression: []const u8, - // val_expression(E) - val_expression: []const u8, - // Augmenter-defined rule - architectural: void, - }; +/// Unwind a frame using MachO compact unwind info (from __unwind_info). +/// If the compact encoding can't encode a way to unwind a frame, it will +/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. +fn unwindFrameMachO( + text_base: usize, + load_offset: usize, + context: *UnwindContext, + unwind_info: []const u8, + eh_frame: ?[]const u8, +) !usize { + if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidUnwindInfo; + const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); - /// Each row contains unwinding rules for a set of registers. - pub const Row = struct { - /// Offset from `FrameDescriptionEntry.pc_begin` - offset: u64 = 0, - /// Special-case column that defines the CFA (Canonical Frame Address) rule. - /// The register field of this column defines the register that CFA is derived from. - cfa: Column = .{}, - /// The register fields in these columns define the register the rule applies to. - columns: ColumnRange = .{}, - /// Indicates that the next write to any column in this row needs to copy - /// the backing column storage first, as it may be referenced by previous rows. - copy_on_write: bool = false, - }; + const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry); + if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidUnwindInfo; + const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); + if (indices.len == 0) return error.MissingUnwindInfo; - pub const Column = struct { - register: ?u8 = null, - rule: RegisterRule = .{ .default = {} }, - - /// Resolves the register rule and places the result into `out` (see regBytes) - pub fn resolveValue( - self: Column, - context: *SelfInfo.UnwindContext, - expression_context: std.debug.Dwarf.expression.Context, - out: []u8, - ) !void { - switch (self.rule) { - .default => { - const register = self.register orelse return error.InvalidRegister; - try getRegDefaultValue(register, context, out); - }, - .undefined => { - @memset(out, undefined); - }, - .same_value => { - // TODO: This copy could be eliminated if callers always copy the state then call this function to update it - const register = self.register orelse return error.InvalidRegister; - const src = try regBytes(context.thread_context, register, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - }, - .offset => |offset| { - if (context.cfa) |cfa| { - const addr = try applyOffset(cfa, offset); - const ptr: *const usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - } else return error.InvalidCFA; - }, - .val_offset => |offset| { - if (context.cfa) |cfa| { - mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); - } else return error.InvalidCFA; - }, - .register => |register| { - const src = try regBytes(context.thread_context, register, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, try regBytes(context.thread_context, register, context.reg_context)); - }, - .expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); - const addr = if (value) |v| blk: { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; - - const ptr: *usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - }, - .val_expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); - } else return error.NoExpressionValue; - }, - .architectural => return error.UnimplementedRegisterRule, + // MLUGG TODO HACKHACK -- Unwind needs a slight refactor to make this work well + const opt_dwarf_unwind: ?Dwarf.Unwind = if (eh_frame) |eh_frame_data| .{ + .debug_frame = null, + .eh_frame = .{ + .header = .{ + .vaddr = undefined, + .eh_frame_vaddr = @intFromPtr(eh_frame_data.ptr) - load_offset, + .search_table = null, + }, + .eh_frame_data = eh_frame_data, + .sorted_fdes = null, + }, + } else null; + + // offset of the PC into the `__TEXT` segment + const pc_text_offset = context.pc - text_base; + + const start_offset: u32, const first_level_offset: u32 = index: { + var left: usize = 0; + var len: usize = indices.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < indices[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; } } + break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset }; }; + // An offset of 0 is a sentinel indicating a range does not have unwind info. + if (start_offset == 0) return error.MissingUnwindInfo; - const ColumnRange = struct { - /// Index into `columns` of the first column in this row. - start: usize = undefined, - len: u8 = 0, - }; + const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidUnwindInfo; + const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count], + ); - columns: std.ArrayListUnmanaged(Column) = .empty, - stack: std.ArrayListUnmanaged(ColumnRange) = .empty, - current_row: Row = .{}, + if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidUnwindInfo; + const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]); - /// The result of executing the CIE's initial_instructions - cie_row: ?Row = null, + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { + .REGULAR => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidUnwindInfo; + const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; + const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; - pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { - self.stack.deinit(allocator); - self.columns.deinit(allocator); - self.* = undefined; - } + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < entries[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + break :entry .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; + }, + .COMPRESSED => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidUnwindInfo; + const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidUnwindInfo; + const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; - pub fn reset(self: *VirtualMachine) void { - self.stack.clearRetainingCapacity(); - self.columns.clearRetainingCapacity(); - self.current_row = .{}; - self.cie_row = null; - } + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < first_level_offset + entries[mid].funcOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + const entry = entries[left]; - /// Return a slice backed by the row's non-CFA columns - pub fn rowColumns(self: VirtualMachine, row: Row) []Column { - if (row.columns.len == 0) return &.{}; - return self.columns.items[row.columns.start..][0..row.columns.len]; - } + const function_offset = first_level_offset + entry.funcOffset; + if (entry.encodingIndex < common_encodings.len) { + break :entry .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; + } - /// Either retrieves or adds a column for `register` (non-CFA) in the current row. - fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { - for (self.rowColumns(self.current_row)) |*c| { - if (c.register == register) return c; - } + const local_index = entry.encodingIndex - common_encodings.len; + const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidUnwindInfo; + const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count], + ); + if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + break :entry .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; + }, + else => return error.InvalidUnwindInfo, + }; - if (self.current_row.columns.len == 0) { - self.current_row.columns.start = self.columns.items.len; - } - self.current_row.columns.len += 1; + if (entry.raw_encoding == 0) return error.NoUnwindInfo; + const reg_context: Dwarf.abi.RegisterContext = .{ .eh_frame = false, .is_macho = true }; - const column = try self.columns.addOne(allocator); - column.* = .{ - .register = register, - }; + const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnimplementedUnwindEncoding, + .RBP_FRAME => ip: { + const frame = encoding.value.x86_64.frame; - return column; - } + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 2 * @sizeOf(usize); - /// Runs the CIE instructions, then the FDE instructions. Execution halts - /// once the row that corresponds to `pc` is known, and the row is returned. - pub fn runTo( - self: *VirtualMachine, - allocator: std.mem.Allocator, - pc: u64, - cie: std.debug.Dwarf.Unwind.CommonInformationEntry, - fde: std.debug.Dwarf.Unwind.FrameDescriptionEntry, - addr_size_bytes: u8, - endian: std.builtin.Endian, - ) !Row { - assert(self.cie_row == null); - if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange; - - var prev_row: Row = self.current_row; - - var cie_stream: std.Io.Reader = .fixed(cie.initial_instructions); - var fde_stream: std.Io.Reader = .fixed(fde.instructions); - const streams = [_]*std.Io.Reader{ &cie_stream, &fde_stream }; - - for (&streams, 0..) |stream, i| { - while (stream.seek < stream.buffer.len) { - const instruction = try std.debug.Dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); - prev_row = try self.step(allocator, cie, i == 0, instruction); - if (pc < fde.pc_begin + self.current_row.offset) return prev_row; - } - } + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - return self.current_row; - } + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - pub fn runToNative( - self: *VirtualMachine, - allocator: std.mem.Allocator, - pc: u64, - cie: std.debug.Dwarf.Unwind.CommonInformationEntry, - fde: std.debug.Dwarf.Unwind.FrameDescriptionEntry, - ) !Row { - return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), native_endian); - } + const regs: [5]u3 = .{ + frame.reg0, + frame.reg1, + frame.reg2, + frame.reg3, + frame.reg4, + }; + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize); + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + } - fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void { - if (!self.current_row.copy_on_write) return; + break :ip new_ip; + }, + .STACK_IMMD, + .STACK_IND, + => ip: { + const frameless = encoding.value.x86_64.frameless; - const new_start = self.columns.items.len; - if (self.current_row.columns.len > 0) { - try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); - self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); - self.current_row.columns.start = new_start; - } - } + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const stack_size: usize = stack_size: { + if (encoding.mode.x86_64 == .STACK_IMMD) { + break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize); + } + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + text_base + + entry.function_offset + + frameless.stack.indirect.sub_offset; + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, frameless.stack.indirect.stack_adjust); + }; - /// Executes a single instruction. - /// If this instruction is from the CIE, `is_initial` should be set. - /// Returns the value of `current_row` before executing this instruction. - pub fn step( - self: *VirtualMachine, - allocator: std.mem.Allocator, - cie: std.debug.Dwarf.Unwind.CommonInformationEntry, - is_initial: bool, - instruction: Dwarf.call_frame.Instruction, - ) !Row { - // CIE instructions must be run before FDE instructions - assert(!is_initial or self.cie_row == null); - if (!is_initial and self.cie_row == null) { - self.cie_row = self.current_row; - self.current_row.copy_on_write = true; - } + // Decode the Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - const prev_row = self.current_row; - switch (instruction) { - .set_loc => |i| { - if (i.address <= self.current_row.offset) return error.InvalidOperation; - // TODO: Check cie.segment_selector_size != 0 for DWARFV4 - self.current_row.offset = i.address; - }, - inline .advance_loc, - .advance_loc1, - .advance_loc2, - .advance_loc4, - => |i| { - self.current_row.offset += i.delta * cie.code_alignment_factor; - self.current_row.copy_on_write = true; - }, - inline .offset, - .offset_extended, - .offset_extended_sf, - => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; - }, - inline .restore, - .restore_extended, - => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.cie_row) |cie_row| { - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = for (self.rowColumns(cie_row)) |cie_column| { - if (cie_column.register == i.register) break cie_column.rule; - } else .{ .default = {} }; - } else return error.InvalidOperation; - }, - .nop => {}, - .undefined => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .undefined = {} }; - }, - .same_value => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .same_value = {} }; - }, - .register => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .register = i.target_register }; - }, - .remember_state => { - try self.stack.append(allocator, self.current_row.columns); - self.current_row.copy_on_write = true; - }, - .restore_state => { - const restored_columns = self.stack.pop() orelse return error.InvalidOperation; - self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); - try self.columns.ensureUnusedCapacity(allocator, restored_columns.len); - - self.current_row.columns.start = self.columns.items.len; - self.current_row.columns.len = restored_columns.len; - self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); - }, - .def_cfa => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = @intCast(i.offset) }, - }; - }, - .def_cfa_sf => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, - }; - }, - .def_cfa_register => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.register = i.register; - }, - .def_cfa_offset => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = @intCast(i.offset), - }; - }, - .def_cfa_offset_sf => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .def_cfa_expression => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa.register = undefined; - self.current_row.cfa.rule = .{ - .expression = i.block, - }; - }, - .expression => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .expression = i.block, + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = frameless.stack_reg_count; + const ip_ptr = ip_ptr: { + var digits: [6]u3 = undefined; + var accumulator: usize = frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + var registers: [6]u3 = undefined; + var used_indices: [6]bool = @splat(false); + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + registers[i] = @intCast(unused_index + 1); + used_indices[unused_index] = true; + } + + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); + for (0..reg_count) |i| { + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); + (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :ip_ptr reg_addr; }; + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :ip new_ip; }, - .val_offset => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, - }; + .DWARF => { + const dwarf_unwind = &(opt_dwarf_unwind orelse return error.MissingEhFrame); + return unwindFrameDwarf(dwarf_unwind, load_offset, context, @intCast(encoding.value.x86_64.dwarf)); }, - .val_offset_sf => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; + }, + .aarch64, .aarch64_be => switch (encoding.mode.arm64) { + .OLD => return error.UnimplementedUnwindEncoding, + .FRAMELESS => ip: { + const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; + (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; + break :ip new_ip; }, - .val_expression => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_expression = i.block, - }; + .DWARF => { + const dwarf_unwind = &(opt_dwarf_unwind orelse return error.MissingEhFrame); + return unwindFrameDwarf(dwarf_unwind, load_offset, context, @intCast(encoding.value.arm64.dwarf)); }, - } + .FRAME => ip: { + const frame = encoding.value.arm64.frame; - return prev_row; - } -}; + const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; + const ip_ptr = fp + @sizeOf(usize); -/// Returns the ABI-defined default value this register has in the unwinding table -/// before running any of the CIE instructions. The DWARF spec defines these as having -/// the .undefined rule by default, but allows ABI authors to override that. -fn getRegDefaultValue(reg_number: u8, context: *UnwindContext, out: []u8) !void { - switch (builtin.cpu.arch) { - .aarch64, .aarch64_be => { - // Callee-saved registers are initialized as if they had the .same_value rule - if (reg_number >= 19 and reg_number <= 28) { - const src = try regBytes(context.thread_context, reg_number, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return; - } - }, - else => {}, - } + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.x_reg_pairs, field.name) != 0) { + (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } - @memset(out, undefined); -} + inline for (@typeInfo(@TypeOf(frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + { + const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 8 + i, context.reg_context)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + { + const dest: *align(1) usize = @ptrCast(try regBytes(context.thread_context, 64 + 9 + i, context.reg_context)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + } + } -/// Since register rules are applied (usually) during a panic, -/// checked addition / subtraction is used so that we can return -/// an error and fall back to FP-based unwinding. -fn applyOffset(base: usize, offset: i64) !usize { - return if (offset >= 0) - try std.math.add(usize, base, @as(usize, @intCast(offset))) - else - try std.math.sub(usize, base, @as(usize, @intCast(-offset))); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; + (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; + + break :ip new_ip; + }, + }, + else => comptime unreachable, // unimplemented + }; + + context.pc = stripInstructionPtrAuthCode(new_ip); + if (context.pc > 0) context.pc -= 1; + return new_ip; } |
