diff options
Diffstat (limited to 'lib/std/debug')
| -rw-r--r-- | lib/std/debug/Coverage.zig | 3 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf.zig | 1528 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/SelfUnwinder.zig | 334 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/Unwind.zig | 702 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/Unwind/VirtualMachine.zig | 459 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/abi.zig | 351 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/call_frame.zig | 292 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/expression.zig | 161 | ||||
| -rw-r--r-- | lib/std/debug/ElfFile.zig | 536 | ||||
| -rw-r--r-- | lib/std/debug/Info.zig | 30 | ||||
| -rw-r--r-- | lib/std/debug/Pdb.zig | 37 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo.zig | 2238 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo/Darwin.zig | 993 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo/Elf.zig | 427 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo/Windows.zig | 559 | ||||
| -rw-r--r-- | lib/std/debug/cpu_context.zig | 1028 |
16 files changed, 5447 insertions, 4231 deletions
diff --git a/lib/std/debug/Coverage.zig b/lib/std/debug/Coverage.zig index 58e600dc63..f1621c0e12 100644 --- a/lib/std/debug/Coverage.zig +++ b/lib/std/debug/Coverage.zig @@ -145,6 +145,7 @@ pub const ResolveAddressesDwarfError = Dwarf.ScanError; pub fn resolveAddressesDwarf( cov: *Coverage, gpa: Allocator, + endian: std.builtin.Endian, /// Asserts the addresses are in ascending order. sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. @@ -184,7 +185,7 @@ pub fn resolveAddressesDwarf( if (cu.src_loc_cache == null) { cov.mutex.unlock(); defer cov.mutex.lock(); - d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { + d.populateSrcLocCache(gpa, endian, cu) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => { out.* = SourceLocation.invalid; continue :next_pc; diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 5f448075a8..7af76d02a1 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -1,22 +1,18 @@ //! Implements parsing, decoding, and caching of DWARF information. //! -//! This API does not assume the current executable is itself the thing being -//! debugged, however, it does assume the debug info has the same CPU -//! architecture and OS as the current executable. It is planned to remove this -//! limitation. +//! This API makes no assumptions about the relationship between the host and +//! the target being debugged. In other words, any DWARF information can be used +//! from any host via this API. Note, however, that the limits of 32-bit +//! addressing can cause very large 64-bit binaries to be impossible to open on +//! 32-bit hosts. //! //! For unopinionated types and bits, see `std.dwarf`. -const builtin = @import("builtin"); -const native_endian = builtin.cpu.arch.endian(); - const std = @import("../std.zig"); const Allocator = std.mem.Allocator; -const elf = std.elf; const mem = std.mem; const DW = std.dwarf; const AT = DW.AT; -const EH = DW.EH; const FORM = DW.FORM; const Format = DW.Format; const RLE = DW.RLE; @@ -24,7 +20,6 @@ const UT = DW.UT; const assert = std.debug.assert; const cast = std.math.cast; const maxInt = std.math.maxInt; -const Path = std.Build.Cache.Path; const ArrayList = std.ArrayList; const Endian = std.builtin.Endian; const Reader = std.Io.Reader; @@ -32,15 +27,13 @@ const Reader = std.Io.Reader; const Dwarf = @This(); pub const expression = @import("Dwarf/expression.zig"); -pub const abi = @import("Dwarf/abi.zig"); -pub const call_frame = @import("Dwarf/call_frame.zig"); +pub const Unwind = @import("Dwarf/Unwind.zig"); +pub const SelfUnwinder = @import("Dwarf/SelfUnwinder.zig"); /// Useful to temporarily enable while working on this file. const debug_debug_mode = false; -endian: Endian, -sections: SectionArray = null_section_array, -is_macho: bool, +sections: SectionArray = @splat(null), /// Filled later by the initializer abbrev_table_list: ArrayList(Abbrev.Table) = .empty, @@ -49,14 +42,6 @@ compile_unit_list: ArrayList(CompileUnit) = .empty, /// Filled later by the initializer func_list: ArrayList(Func) = .empty, -/// Starts out non-`null` if the `.eh_frame_hdr` section is present. May become `null` later if we -/// find that `.eh_frame_hdr` is incomplete. -eh_frame_hdr: ?ExceptionFrameHeader = null, -/// These lookup tables are only used if `eh_frame_hdr` is null -cie_map: std.AutoArrayHashMapUnmanaged(u64, CommonInformationEntry) = .empty, -/// Sorted by start_pc -fde_list: ArrayList(FrameDescriptionEntry) = .empty, - /// Populated by `populateRanges`. ranges: ArrayList(Range) = .empty, @@ -69,10 +54,7 @@ pub const Range = struct { pub const Section = struct { data: []const u8, - // Module-relative virtual address. - // Only set if the section data was loaded from disk. - virtual_address: ?usize = null, - // If `data` is owned by this Dwarf. + /// If `data` is owned by this Dwarf. owned: bool, pub const Id = enum { @@ -87,21 +69,7 @@ pub const Section = struct { debug_rnglists, debug_addr, debug_names, - debug_frame, - eh_frame, - eh_frame_hdr, }; - - // For sections that are not memory mapped by the loader, this is an offset - // from `data.ptr` to where the section would have been mapped. Otherwise, - // `data` is directly backed by the section and the offset is zero. - pub fn virtualOffset(self: Section, base_address: usize) i64 { - return if (self.virtual_address) |va| - @as(i64, @intCast(base_address + va)) - - @as(i64, @intCast(@intFromPtr(self.data.ptr))) - else - 0; - } }; pub const Abbrev = struct { @@ -110,8 +78,8 @@ pub const Abbrev = struct { has_children: bool, attrs: []Attr, - fn deinit(abbrev: *Abbrev, allocator: Allocator) void { - allocator.free(abbrev.attrs); + fn deinit(abbrev: *Abbrev, gpa: Allocator) void { + gpa.free(abbrev.attrs); abbrev.* = undefined; } @@ -127,11 +95,11 @@ pub const Abbrev = struct { offset: u64, abbrevs: []Abbrev, - fn deinit(table: *Table, allocator: Allocator) void { + fn deinit(table: *Table, gpa: Allocator) void { for (table.abbrevs) |*abbrev| { - abbrev.deinit(allocator); + abbrev.deinit(gpa); } - allocator.free(table.abbrevs); + gpa.free(table.abbrevs); table.* = undefined; } @@ -146,6 +114,7 @@ pub const Abbrev = struct { pub const CompileUnit = struct { version: u16, format: Format, + addr_size_bytes: u8, die: Die, pc_range: ?PcRange, @@ -196,7 +165,7 @@ pub const CompileUnit = struct { pub const FormValue = union(enum) { addr: u64, - addrx: usize, + addrx: u64, block: []const u8, udata: u64, data16: *const [16]u8, @@ -208,7 +177,7 @@ pub const FormValue = union(enum) { ref_addr: u64, string: [:0]const u8, strp: u64, - strx: usize, + strx: u64, line_strp: u64, loclistx: u64, rnglistx: u64, @@ -243,8 +212,8 @@ pub const Die = struct { value: FormValue, }; - fn deinit(self: *Die, allocator: Allocator) void { - allocator.free(self.attrs); + fn deinit(self: *Die, gpa: Allocator) void { + gpa.free(self.attrs); self.* = undefined; } @@ -258,13 +227,14 @@ pub const Die = struct { fn getAttrAddr( self: *const Die, di: *const Dwarf, + endian: Endian, id: u64, - compile_unit: CompileUnit, + compile_unit: *const CompileUnit, ) error{ InvalidDebugInfo, MissingDebugInfo }!u64 { const form_value = self.getAttr(id) orelse return error.MissingDebugInfo; return switch (form_value.*) { .addr => |value| value, - .addrx => |index| di.readDebugAddr(compile_unit, index), + .addrx => |index| di.readDebugAddr(endian, compile_unit, index), else => bad(), }; } @@ -294,9 +264,10 @@ pub const Die = struct { pub fn getAttrString( self: *const Die, di: *Dwarf, + endian: Endian, id: u64, opt_str: ?[]const u8, - compile_unit: CompileUnit, + compile_unit: *const CompileUnit, ) error{ InvalidDebugInfo, MissingDebugInfo }![]const u8 { const form_value = self.getAttr(id) orelse return error.MissingDebugInfo; switch (form_value.*) { @@ -309,13 +280,13 @@ pub const Die = struct { .@"32" => { const byte_offset = compile_unit.str_offsets_base + 4 * index; if (byte_offset + 4 > debug_str_offsets.len) return bad(); - const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); + const offset = mem.readInt(u32, debug_str_offsets[@intCast(byte_offset)..][0..4], endian); return getStringGeneric(opt_str, offset); }, .@"64" => { const byte_offset = compile_unit.str_offsets_base + 8 * index; if (byte_offset + 8 > debug_str_offsets.len) return bad(); - const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); + const offset = mem.readInt(u64, debug_str_offsets[@intCast(byte_offset)..][0..8], endian); return getStringGeneric(opt_str, offset); }, } @@ -326,440 +297,17 @@ pub const Die = struct { } }; -/// This represents the decoded .eh_frame_hdr header -pub const ExceptionFrameHeader = struct { - eh_frame_ptr: usize, - table_enc: u8, - fde_count: usize, - entries: []const u8, - - pub fn entrySize(table_enc: u8) !u8 { - return switch (table_enc & EH.PE.type_mask) { - EH.PE.udata2, - EH.PE.sdata2, - => 4, - EH.PE.udata4, - EH.PE.sdata4, - => 8, - EH.PE.udata8, - EH.PE.sdata8, - => 16, - // This is a binary search table, so all entries must be the same length - else => return bad(), - }; - } - - pub fn findEntry( - self: ExceptionFrameHeader, - eh_frame_len: usize, - eh_frame_hdr_ptr: usize, - pc: usize, - cie: *CommonInformationEntry, - fde: *FrameDescriptionEntry, - endian: Endian, - ) !void { - const entry_size = try entrySize(self.table_enc); - - var left: usize = 0; - var len: usize = self.fde_count; - var fbr: Reader = .fixed(self.entries); - - while (len > 1) { - const mid = left + len / 2; - - fbr.seek = mid * entry_size; - const pc_begin = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad(); - - if (pc < pc_begin) { - len /= 2; - } else { - left = mid; - if (pc == pc_begin) break; - len -= len / 2; - } - } - - if (len == 0) return missing(); - fbr.seek = left * entry_size; - - // Read past the pc_begin field of the entry - _ = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad(); - - const fde_ptr = cast(usize, try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad()) orelse return bad(); - - if (fde_ptr < self.eh_frame_ptr) return bad(); - - const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0..eh_frame_len]; - - const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: Reader = .fixed(eh_frame); - eh_frame_fbr.seek = fde_offset; - - const fde_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); - if (fde_entry_header.type != .fde) return bad(); - - // CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable - const cie_offset = fde_entry_header.type.fde; - eh_frame_fbr.seek = @intCast(cie_offset); - const cie_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); - if (cie_entry_header.type != .cie) return bad(); - - cie.* = try CommonInformationEntry.parse( - cie_entry_header.entry_bytes, - 0, - true, - cie_entry_header.format, - .eh_frame, - cie_entry_header.length_offset, - @sizeOf(usize), - endian, - ); - - fde.* = try FrameDescriptionEntry.parse( - fde_entry_header.entry_bytes, - 0, - true, - cie.*, - @sizeOf(usize), - endian, - ); - - if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return missing(); - } -}; - -pub const EntryHeader = struct { - /// Offset of the length field in the backing buffer - length_offset: usize, - format: Format, - type: union(enum) { - cie, - /// Value is the offset of the corresponding CIE - fde: u64, - terminator, - }, - /// The entry's contents, not including the ID field - entry_bytes: []const u8, - - /// The length of the entry including the ID field, but not the length field itself - pub fn entryLength(self: EntryHeader) usize { - return self.entry_bytes.len + @as(u8, if (self.format == .@"64") 8 else 4); - } - - /// Reads a header for either an FDE or a CIE, then advances the fbr to the - /// position after the trailing structure. - /// - /// `fbr` must be backed by either the .eh_frame or .debug_frame sections. - /// - /// TODO that's a bad API, don't do that. this function should neither require - /// a fixed reader nor depend on seeking. - pub fn read(fbr: *Reader, dwarf_section: Section.Id, endian: Endian) !EntryHeader { - assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame); - - const length_offset = fbr.seek; - const unit_header = try readUnitHeader(fbr, endian); - const unit_length = cast(usize, unit_header.unit_length) orelse return bad(); - if (unit_length == 0) return .{ - .length_offset = length_offset, - .format = unit_header.format, - .type = .terminator, - .entry_bytes = &.{}, - }; - const start_offset = fbr.seek; - const end_offset = start_offset + unit_length; - defer fbr.seek = end_offset; - - const id = try readAddress(fbr, unit_header.format, endian); - const entry_bytes = fbr.buffer[fbr.seek..end_offset]; - const cie_id: u64 = switch (dwarf_section) { - .eh_frame => CommonInformationEntry.eh_id, - .debug_frame => switch (unit_header.format) { - .@"32" => CommonInformationEntry.dwarf32_id, - .@"64" => CommonInformationEntry.dwarf64_id, - }, - else => unreachable, - }; - - return .{ - .length_offset = length_offset, - .format = unit_header.format, - .type = if (id == cie_id) .cie else .{ .fde = switch (dwarf_section) { - .eh_frame => try std.math.sub(u64, start_offset, id), - .debug_frame => id, - else => unreachable, - } }, - .entry_bytes = entry_bytes, - }; - } -}; - -pub const CommonInformationEntry = struct { - // Used in .eh_frame - pub const eh_id = 0; - - // Used in .debug_frame (DWARF32) - pub const dwarf32_id = maxInt(u32); - - // Used in .debug_frame (DWARF64) - pub const dwarf64_id = maxInt(u64); - - // Offset of the length field of this entry in the eh_frame section. - // This is the key that FDEs use to reference CIEs. - length_offset: u64, - version: u8, - address_size: u8, - format: Format, - - // Only present in version 4 - segment_selector_size: ?u8, - - code_alignment_factor: u32, - data_alignment_factor: i32, - return_address_register: u8, - - aug_str: []const u8, - aug_data: []const u8, - lsda_pointer_enc: u8, - personality_enc: ?u8, - personality_routine_pointer: ?u64, - fde_pointer_enc: u8, - initial_instructions: []const u8, - - pub fn isSignalFrame(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'S') return true; - return false; - } - - pub fn addressesSignedWithBKey(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'B') return true; - return false; - } - - pub fn mteTaggedFrame(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'G') return true; - return false; - } - - /// This function expects to read the CIE starting with the version field. - /// The returned struct references memory backed by cie_bytes. - /// - /// See the FrameDescriptionEntry.parse documentation for the description - /// of `pc_rel_offset` and `is_runtime`. - /// - /// `length_offset` specifies the offset of this CIE's length field in the - /// .eh_frame / .debug_frame section. - pub fn parse( - cie_bytes: []const u8, - pc_rel_offset: i64, - is_runtime: bool, - format: Format, - dwarf_section: Section.Id, - length_offset: u64, - addr_size_bytes: u8, - endian: Endian, - ) !CommonInformationEntry { - if (addr_size_bytes > 8) return error.UnsupportedAddrSize; - - var fbr: Reader = .fixed(cie_bytes); - - const version = try fbr.takeByte(); - switch (dwarf_section) { - .eh_frame => if (version != 1 and version != 3) return error.UnsupportedDwarfVersion, - .debug_frame => if (version != 4) return error.UnsupportedDwarfVersion, - else => return error.UnsupportedDwarfSection, - } - - var has_eh_data = false; - var has_aug_data = false; - - var aug_str_len: usize = 0; - const aug_str_start = fbr.seek; - var aug_byte = try fbr.takeByte(); - while (aug_byte != 0) : (aug_byte = try fbr.takeByte()) { - switch (aug_byte) { - 'z' => { - if (aug_str_len != 0) return bad(); - has_aug_data = true; - }, - 'e' => { - if (has_aug_data or aug_str_len != 0) return bad(); - if (try fbr.takeByte() != 'h') return bad(); - has_eh_data = true; - }, - else => if (has_eh_data) return bad(), - } - - aug_str_len += 1; - } - - if (has_eh_data) { - // legacy data created by older versions of gcc - unsupported here - for (0..addr_size_bytes) |_| _ = try fbr.takeByte(); - } - - const address_size = if (version == 4) try fbr.takeByte() else addr_size_bytes; - const segment_selector_size = if (version == 4) try fbr.takeByte() else null; - - const code_alignment_factor = try fbr.takeLeb128(u32); - const data_alignment_factor = try fbr.takeLeb128(i32); - const return_address_register = if (version == 1) try fbr.takeByte() else try fbr.takeLeb128(u8); - - var lsda_pointer_enc: u8 = EH.PE.omit; - var personality_enc: ?u8 = null; - var personality_routine_pointer: ?u64 = null; - var fde_pointer_enc: u8 = EH.PE.absptr; - - var aug_data: []const u8 = &[_]u8{}; - const aug_str = if (has_aug_data) blk: { - const aug_data_len = try fbr.takeLeb128(usize); - const aug_data_start = fbr.seek; - aug_data = cie_bytes[aug_data_start..][0..aug_data_len]; - - const aug_str = cie_bytes[aug_str_start..][0..aug_str_len]; - for (aug_str[1..]) |byte| { - switch (byte) { - 'L' => { - lsda_pointer_enc = try fbr.takeByte(); - }, - 'P' => { - personality_enc = try fbr.takeByte(); - personality_routine_pointer = try readEhPointer(&fbr, personality_enc.?, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&cie_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian); - }, - 'R' => { - fde_pointer_enc = try fbr.takeByte(); - }, - 'S', 'B', 'G' => {}, - else => return bad(), - } - } - - // aug_data_len can include padding so the CIE ends on an address boundary - fbr.seek = aug_data_start + aug_data_len; - break :blk aug_str; - } else &[_]u8{}; - - const initial_instructions = cie_bytes[fbr.seek..]; - return .{ - .length_offset = length_offset, - .version = version, - .address_size = address_size, - .format = format, - .segment_selector_size = segment_selector_size, - .code_alignment_factor = code_alignment_factor, - .data_alignment_factor = data_alignment_factor, - .return_address_register = return_address_register, - .aug_str = aug_str, - .aug_data = aug_data, - .lsda_pointer_enc = lsda_pointer_enc, - .personality_enc = personality_enc, - .personality_routine_pointer = personality_routine_pointer, - .fde_pointer_enc = fde_pointer_enc, - .initial_instructions = initial_instructions, - }; - } -}; - -pub const FrameDescriptionEntry = struct { - // Offset into eh_frame where the CIE for this FDE is stored - cie_length_offset: u64, - - pc_begin: u64, - pc_range: u64, - lsda_pointer: ?u64, - aug_data: []const u8, - instructions: []const u8, - - /// This function expects to read the FDE starting at the PC Begin field. - /// The returned struct references memory backed by `fde_bytes`. - /// - /// `pc_rel_offset` specifies an offset to be applied to pc_rel_base values - /// used when decoding pointers. This should be set to zero if fde_bytes is - /// backed by the memory of a .eh_frame / .debug_frame section in the running executable. - /// Otherwise, it should be the relative offset to translate addresses from - /// where the section is currently stored in memory, to where it *would* be - /// stored at runtime: section base addr - backing data base ptr. - /// - /// Similarly, `is_runtime` specifies this function is being called on a runtime - /// section, and so indirect pointers can be followed. - pub fn parse( - fde_bytes: []const u8, - pc_rel_offset: i64, - is_runtime: bool, - cie: CommonInformationEntry, - addr_size_bytes: u8, - endian: Endian, - ) !FrameDescriptionEntry { - if (addr_size_bytes > 8) return error.InvalidAddrSize; - - var fbr: Reader = .fixed(fde_bytes); - - const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian) orelse return bad(); - - const pc_range = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = 0, - .follow_indirect = false, - }, endian) orelse return bad(); - - var aug_data: []const u8 = &[_]u8{}; - const lsda_pointer = if (cie.aug_str.len > 0) blk: { - const aug_data_len = try fbr.takeLeb128(usize); - const aug_data_start = fbr.seek; - aug_data = fde_bytes[aug_data_start..][0..aug_data_len]; - - const lsda_pointer = if (cie.lsda_pointer_enc != EH.PE.omit) - try readEhPointer(&fbr, cie.lsda_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian) - else - null; - - fbr.seek = aug_data_start + aug_data_len; - break :blk lsda_pointer; - } else null; - - const instructions = fde_bytes[fbr.seek..]; - return .{ - .cie_length_offset = cie.length_offset, - .pc_begin = pc_begin, - .pc_range = pc_range, - .lsda_pointer = lsda_pointer, - .aug_data = aug_data, - .instructions = instructions, - }; - } -}; - const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); pub const SectionArray = [num_sections]?Section; -pub const null_section_array = [_]?Section{null} ** num_sections; pub const OpenError = ScanError; /// Initialize DWARF info. The caller has the responsibility to initialize most /// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the /// main binary file (not the secondary debug info file). -pub fn open(d: *Dwarf, gpa: Allocator) OpenError!void { - try d.scanAllFunctions(gpa); - try d.scanAllCompileUnits(gpa); +pub fn open(d: *Dwarf, gpa: Allocator, endian: Endian) OpenError!void { + try d.scanAllFunctions(gpa, endian); + try d.scanAllCompileUnits(gpa, endian); } const PcRange = struct { @@ -776,10 +324,6 @@ pub fn section(di: Dwarf, dwarf_section: Section.Id) ?[]const u8 { return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.data else null; } -pub fn sectionVirtualOffset(di: Dwarf, dwarf_section: Section.Id, base_address: usize) ?i64 { - return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.virtualOffset(base_address) else null; -} - pub fn deinit(di: *Dwarf, gpa: Allocator) void { for (di.sections) |opt_section| { if (opt_section) |s| if (s.owned) gpa.free(s.data); @@ -798,14 +342,18 @@ pub fn deinit(di: *Dwarf, gpa: Allocator) void { } di.compile_unit_list.deinit(gpa); di.func_list.deinit(gpa); - di.cie_map.deinit(gpa); - di.fde_list.deinit(gpa); di.ranges.deinit(gpa); di.* = undefined; } -pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { - for (di.func_list.items) |*func| { +pub fn getSymbolName(di: *const Dwarf, address: u64) ?[]const u8 { + // Iterate the function list backwards so that we see child DIEs before their parents. This is + // important because `DW_TAG_inlined_subroutine` DIEs will have a range which is a sub-range of + // their caller, and we want to return the callee's name, not the caller's. + var i: usize = di.func_list.items.len; + while (i > 0) { + i -= 1; + const func = &di.func_list.items[i]; if (func.pc_range) |range| { if (address >= range.start and address < range.end) { return func.name; @@ -825,35 +373,33 @@ pub const ScanError = error{ StreamTooLong, } || Allocator.Error; -fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { - const endian = di.endian; - var fbr: Reader = .fixed(di.section(.debug_info).?); +fn scanAllFunctions(di: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void { + var fr: Reader = .fixed(di.section(.debug_info).?); var this_unit_offset: u64 = 0; - while (this_unit_offset < fbr.buffer.len) { - fbr.seek = @intCast(this_unit_offset); + while (this_unit_offset < fr.buffer.len) { + fr.seek = @intCast(this_unit_offset); - const unit_header = try readUnitHeader(&fbr, endian); + const unit_header = try readUnitHeader(&fr, endian); if (unit_header.unit_length == 0) return; const next_offset = unit_header.header_length + unit_header.unit_length; - const version = try fbr.takeInt(u16, endian); + const version = try fr.takeInt(u16, endian); if (version < 2 or version > 5) return bad(); var address_size: u8 = undefined; var debug_abbrev_offset: u64 = undefined; if (version >= 5) { - const unit_type = try fbr.takeByte(); + const unit_type = try fr.takeByte(); if (unit_type != DW.UT.compile) return bad(); - address_size = try fbr.takeByte(); - debug_abbrev_offset = try readAddress(&fbr, unit_header.format, endian); + address_size = try fr.takeByte(); + debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian); } else { - debug_abbrev_offset = try readAddress(&fbr, unit_header.format, endian); - address_size = try fbr.takeByte(); + debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian); + address_size = try fr.takeByte(); } - if (address_size != @sizeOf(usize)) return bad(); - const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); + const abbrev_table = try di.getAbbrevTable(gpa, debug_abbrev_offset); var max_attrs: usize = 0; var zig_padding_abbrev_code: u7 = 0; @@ -868,8 +414,8 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { } } } - const attrs_buf = try allocator.alloc(Die.Attr, max_attrs * 3); - defer allocator.free(attrs_buf); + const attrs_buf = try gpa.alloc(Die.Attr, max_attrs * 3); + defer gpa.free(attrs_buf); var attrs_bufs: [3][]Die.Attr = undefined; for (&attrs_bufs, 0..) |*buf, index| buf.* = attrs_buf[index * max_attrs ..][0..max_attrs]; @@ -878,6 +424,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { var compile_unit: CompileUnit = .{ .version = version, .format = unit_header.format, + .addr_size_bytes = address_size, .die = undefined, .pc_range = null, @@ -890,16 +437,17 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { }; while (true) { - fbr.seek = std.mem.indexOfNonePos(u8, fbr.buffer, fbr.seek, &.{ + fr.seek = std.mem.indexOfNonePos(u8, fr.buffer, fr.seek, &.{ zig_padding_abbrev_code, 0, - }) orelse fbr.buffer.len; - if (fbr.seek >= next_unit_pos) break; + }) orelse fr.buffer.len; + if (fr.seek >= next_unit_pos) break; var die_obj = (try parseDie( - &fbr, + &fr, attrs_bufs[0], abbrev_table, unit_header.format, endian, + address_size, )) orelse continue; switch (die_obj.tag_id) { @@ -920,34 +468,36 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { // Prevent endless loops for (0..3) |_| { if (this_die_obj.getAttr(AT.name)) |_| { - break :x try this_die_obj.getAttrString(di, AT.name, di.section(.debug_str), compile_unit); + break :x try this_die_obj.getAttrString(di, endian, AT.name, di.section(.debug_str), &compile_unit); } else if (this_die_obj.getAttr(AT.abstract_origin)) |_| { - const after_die_offset = fbr.seek; - defer fbr.seek = after_die_offset; + const after_die_offset = fr.seek; + defer fr.seek = after_die_offset; // Follow the DIE it points to and repeat const ref_offset = try this_die_obj.getAttrRef(AT.abstract_origin, this_unit_offset, next_offset); - fbr.seek = @intCast(ref_offset); + fr.seek = @intCast(ref_offset); this_die_obj = (try parseDie( - &fbr, + &fr, attrs_bufs[2], abbrev_table, // wrong abbrev table for different cu unit_header.format, endian, + address_size, )) orelse return bad(); } else if (this_die_obj.getAttr(AT.specification)) |_| { - const after_die_offset = fbr.seek; - defer fbr.seek = after_die_offset; + const after_die_offset = fr.seek; + defer fr.seek = after_die_offset; // Follow the DIE it points to and repeat const ref_offset = try this_die_obj.getAttrRef(AT.specification, this_unit_offset, next_offset); - fbr.seek = @intCast(ref_offset); + fr.seek = @intCast(ref_offset); this_die_obj = (try parseDie( - &fbr, + &fr, attrs_bufs[2], abbrev_table, // wrong abbrev table for different cu unit_header.format, endian, + address_size, )) orelse return bad(); } else { break :x null; @@ -957,7 +507,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { break :x null; }; - var range_added = if (die_obj.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| blk: { + var range_added = if (die_obj.getAttrAddr(di, endian, AT.low_pc, &compile_unit)) |low_pc| blk: { if (die_obj.getAttr(AT.high_pc)) |high_pc_value| { const pc_end = switch (high_pc_value.*) { .addr => |value| value, @@ -965,7 +515,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { else => return bad(), }; - try di.func_list.append(allocator, .{ + try di.func_list.append(gpa, .{ .name = fn_name, .pc_range = .{ .start = low_pc, @@ -983,14 +533,14 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { }; if (die_obj.getAttr(AT.ranges)) |ranges_value| blk: { - var iter = DebugRangeIterator.init(ranges_value, di, &compile_unit) catch |err| { + var iter = DebugRangeIterator.init(ranges_value, di, endian, &compile_unit) catch |err| { if (err != error.MissingDebugInfo) return err; break :blk; }; while (try iter.next()) |range| { range_added = true; - try di.func_list.append(allocator, .{ + try di.func_list.append(gpa, .{ .name = fn_name, .pc_range = .{ .start = range.start, @@ -1001,7 +551,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { } if (fn_name != null and !range_added) { - try di.func_list.append(allocator, .{ + try di.func_list.append(gpa, .{ .name = fn_name, .pc_range = null, }); @@ -1015,38 +565,36 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { } } -fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { - const endian = di.endian; - var fbr: Reader = .fixed(di.section(.debug_info).?); +fn scanAllCompileUnits(di: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void { + var fr: Reader = .fixed(di.section(.debug_info).?); var this_unit_offset: u64 = 0; - var attrs_buf = std.array_list.Managed(Die.Attr).init(allocator); + var attrs_buf = std.array_list.Managed(Die.Attr).init(gpa); defer attrs_buf.deinit(); - while (this_unit_offset < fbr.buffer.len) { - fbr.seek = @intCast(this_unit_offset); + while (this_unit_offset < fr.buffer.len) { + fr.seek = @intCast(this_unit_offset); - const unit_header = try readUnitHeader(&fbr, endian); + const unit_header = try readUnitHeader(&fr, endian); if (unit_header.unit_length == 0) return; const next_offset = unit_header.header_length + unit_header.unit_length; - const version = try fbr.takeInt(u16, endian); + const version = try fr.takeInt(u16, endian); if (version < 2 or version > 5) return bad(); var address_size: u8 = undefined; var debug_abbrev_offset: u64 = undefined; if (version >= 5) { - const unit_type = try fbr.takeByte(); + const unit_type = try fr.takeByte(); if (unit_type != UT.compile) return bad(); - address_size = try fbr.takeByte(); - debug_abbrev_offset = try readAddress(&fbr, unit_header.format, endian); + address_size = try fr.takeByte(); + debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian); } else { - debug_abbrev_offset = try readAddress(&fbr, unit_header.format, endian); - address_size = try fbr.takeByte(); + debug_abbrev_offset = try readFormatSizedInt(&fr, unit_header.format, endian); + address_size = try fr.takeByte(); } - if (address_size != @sizeOf(usize)) return bad(); - const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); + const abbrev_table = try di.getAbbrevTable(gpa, debug_abbrev_offset); var max_attrs: usize = 0; for (abbrev_table.abbrevs) |abbrev| { @@ -1055,20 +603,22 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { try attrs_buf.resize(max_attrs); var compile_unit_die = (try parseDie( - &fbr, + &fr, attrs_buf.items, abbrev_table, unit_header.format, endian, + address_size, )) orelse return bad(); if (compile_unit_die.tag_id != DW.TAG.compile_unit) return bad(); - compile_unit_die.attrs = try allocator.dupe(Die.Attr, compile_unit_die.attrs); + compile_unit_die.attrs = try gpa.dupe(Die.Attr, compile_unit_die.attrs); var compile_unit: CompileUnit = .{ .version = version, .format = unit_header.format, + .addr_size_bytes = address_size, .pc_range = null, .die = compile_unit_die, .str_offsets_base = if (compile_unit_die.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0, @@ -1080,7 +630,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { }; compile_unit.pc_range = x: { - if (compile_unit_die.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| { + if (compile_unit_die.getAttrAddr(di, endian, AT.low_pc, &compile_unit)) |low_pc| { if (compile_unit_die.getAttr(AT.high_pc)) |high_pc_value| { const pc_end = switch (high_pc_value.*) { .addr => |value| value, @@ -1100,13 +650,13 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { } }; - try di.compile_unit_list.append(allocator, compile_unit); + try di.compile_unit_list.append(gpa, compile_unit); this_unit_offset += next_offset; } } -pub fn populateRanges(d: *Dwarf, gpa: Allocator) ScanError!void { +pub fn populateRanges(d: *Dwarf, gpa: Allocator, endian: Endian) ScanError!void { assert(d.ranges.items.len == 0); for (d.compile_unit_list.items, 0..) |*cu, cu_index| { @@ -1119,7 +669,7 @@ pub fn populateRanges(d: *Dwarf, gpa: Allocator) ScanError!void { continue; } const ranges_value = cu.die.getAttr(AT.ranges) orelse continue; - var iter = DebugRangeIterator.init(ranges_value, d, cu) catch continue; + var iter = DebugRangeIterator.init(ranges_value, d, endian, cu) catch continue; while (try iter.next()) |range| { // Not sure why LLVM thinks it's OK to emit these... if (range.start == range.end) continue; @@ -1144,10 +694,11 @@ const DebugRangeIterator = struct { base_address: u64, section_type: Section.Id, di: *const Dwarf, + endian: Endian, compile_unit: *const CompileUnit, - fbr: Reader, + fr: Reader, - pub fn init(ranges_value: *const FormValue, di: *const Dwarf, compile_unit: *const CompileUnit) !@This() { + pub fn init(ranges_value: *const FormValue, di: *const Dwarf, endian: Endian, compile_unit: *const CompileUnit) !@This() { const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges; const debug_ranges = di.section(section_type) orelse return error.MissingDebugInfo; @@ -1156,15 +707,15 @@ const DebugRangeIterator = struct { .rnglistx => |idx| off: { switch (compile_unit.format) { .@"32" => { - const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx)); + const offset_loc = compile_unit.rnglists_base + 4 * idx; if (offset_loc + 4 > debug_ranges.len) return bad(); - const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); + const offset = mem.readInt(u32, debug_ranges[@intCast(offset_loc)..][0..4], endian); break :off compile_unit.rnglists_base + offset; }, .@"64" => { - const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx)); + const offset_loc = compile_unit.rnglists_base + 8 * idx; if (offset_loc + 8 > debug_ranges.len) return bad(); - const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); + const offset = mem.readInt(u64, debug_ranges[@intCast(offset_loc)..][0..8], endian); break :off compile_unit.rnglists_base + offset; }, } @@ -1176,42 +727,44 @@ const DebugRangeIterator = struct { // specified by DW_AT.low_pc or to some other value encoded // in the list itself. // If no starting value is specified use zero. - const base_address = compile_unit.die.getAttrAddr(di, AT.low_pc, compile_unit.*) catch |err| switch (err) { + const base_address = compile_unit.die.getAttrAddr(di, endian, AT.low_pc, compile_unit) catch |err| switch (err) { error.MissingDebugInfo => 0, else => return err, }; - var fbr: Reader = .fixed(debug_ranges); - fbr.seek = cast(usize, ranges_offset) orelse return bad(); + var fr: Reader = .fixed(debug_ranges); + fr.seek = cast(usize, ranges_offset) orelse return bad(); return .{ .base_address = base_address, .section_type = section_type, .di = di, + .endian = endian, .compile_unit = compile_unit, - .fbr = fbr, + .fr = fr, }; } // Returns the next range in the list, or null if the end was reached. pub fn next(self: *@This()) !?PcRange { - const endian = self.di.endian; + const endian = self.endian; + const addr_size_bytes = self.compile_unit.addr_size_bytes; switch (self.section_type) { .debug_rnglists => { - const kind = try self.fbr.takeByte(); + const kind = try self.fr.takeByte(); switch (kind) { RLE.end_of_list => return null, RLE.base_addressx => { - const index = try self.fbr.takeLeb128(usize); - self.base_address = try self.di.readDebugAddr(self.compile_unit.*, index); + const index = try self.fr.takeLeb128(u64); + self.base_address = try self.di.readDebugAddr(endian, self.compile_unit, index); return try self.next(); }, RLE.startx_endx => { - const start_index = try self.fbr.takeLeb128(usize); - const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index); + const start_index = try self.fr.takeLeb128(u64); + const start_addr = try self.di.readDebugAddr(endian, self.compile_unit, start_index); - const end_index = try self.fbr.takeLeb128(usize); - const end_addr = try self.di.readDebugAddr(self.compile_unit.*, end_index); + const end_index = try self.fr.takeLeb128(u64); + const end_addr = try self.di.readDebugAddr(endian, self.compile_unit, end_index); return .{ .start = start_addr, @@ -1219,10 +772,10 @@ const DebugRangeIterator = struct { }; }, RLE.startx_length => { - const start_index = try self.fbr.takeLeb128(usize); - const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index); + const start_index = try self.fr.takeLeb128(u64); + const start_addr = try self.di.readDebugAddr(endian, self.compile_unit, start_index); - const len = try self.fbr.takeLeb128(usize); + const len = try self.fr.takeLeb128(u64); const end_addr = start_addr + len; return .{ @@ -1231,8 +784,8 @@ const DebugRangeIterator = struct { }; }, RLE.offset_pair => { - const start_addr = try self.fbr.takeLeb128(usize); - const end_addr = try self.fbr.takeLeb128(usize); + const start_addr = try self.fr.takeLeb128(u64); + const end_addr = try self.fr.takeLeb128(u64); // This is the only kind that uses the base address return .{ @@ -1241,12 +794,12 @@ const DebugRangeIterator = struct { }; }, RLE.base_address => { - self.base_address = try self.fbr.takeInt(usize, endian); + self.base_address = try readAddress(&self.fr, endian, addr_size_bytes); return try self.next(); }, RLE.start_end => { - const start_addr = try self.fbr.takeInt(usize, endian); - const end_addr = try self.fbr.takeInt(usize, endian); + const start_addr = try readAddress(&self.fr, endian, addr_size_bytes); + const end_addr = try readAddress(&self.fr, endian, addr_size_bytes); return .{ .start = start_addr, @@ -1254,8 +807,8 @@ const DebugRangeIterator = struct { }; }, RLE.start_length => { - const start_addr = try self.fbr.takeInt(usize, endian); - const len = try self.fbr.takeLeb128(usize); + const start_addr = try readAddress(&self.fr, endian, addr_size_bytes); + const len = try self.fr.takeLeb128(u64); const end_addr = start_addr + len; return .{ @@ -1267,12 +820,13 @@ const DebugRangeIterator = struct { } }, .debug_ranges => { - const start_addr = try self.fbr.takeInt(usize, endian); - const end_addr = try self.fbr.takeInt(usize, endian); + const start_addr = try readAddress(&self.fr, endian, addr_size_bytes); + const end_addr = try readAddress(&self.fr, endian, addr_size_bytes); if (start_addr == 0 and end_addr == 0) return null; - // This entry selects a new value for the base address - if (start_addr == maxInt(usize)) { + // The entry with start_addr = max_representable_address selects a new value for the base address + const max_representable_address = ~@as(u64, 0) >> @intCast(64 - addr_size_bytes); + if (start_addr == max_representable_address) { self.base_address = end_addr; return try self.next(); } @@ -1288,14 +842,14 @@ const DebugRangeIterator = struct { }; /// TODO: change this to binary searching the sorted compile unit list -pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*CompileUnit { +pub fn findCompileUnit(di: *const Dwarf, endian: Endian, target_address: u64) !*CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { if (target_address >= range.start and target_address < range.end) return compile_unit; } const ranges_value = compile_unit.die.getAttr(AT.ranges) orelse continue; - var iter = DebugRangeIterator.init(ranges_value, di, compile_unit) catch continue; + var iter = DebugRangeIterator.init(ranges_value, di, endian, compile_unit) catch continue; while (try iter.next()) |range| { if (target_address >= range.start and target_address < range.end) return compile_unit; } @@ -1306,49 +860,49 @@ pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*CompileUnit { /// Gets an already existing AbbrevTable given the abbrev_offset, or if not found, /// seeks in the stream and parses it. -fn getAbbrevTable(di: *Dwarf, allocator: Allocator, abbrev_offset: u64) !*const Abbrev.Table { +fn getAbbrevTable(di: *Dwarf, gpa: Allocator, abbrev_offset: u64) !*const Abbrev.Table { for (di.abbrev_table_list.items) |*table| { if (table.offset == abbrev_offset) { return table; } } try di.abbrev_table_list.append( - allocator, - try di.parseAbbrevTable(allocator, abbrev_offset), + gpa, + try di.parseAbbrevTable(gpa, abbrev_offset), ); return &di.abbrev_table_list.items[di.abbrev_table_list.items.len - 1]; } -fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table { - var fbr: Reader = .fixed(di.section(.debug_abbrev).?); - fbr.seek = cast(usize, offset) orelse return bad(); +fn parseAbbrevTable(di: *Dwarf, gpa: Allocator, offset: u64) !Abbrev.Table { + var fr: Reader = .fixed(di.section(.debug_abbrev).?); + fr.seek = cast(usize, offset) orelse return bad(); - var abbrevs = std.array_list.Managed(Abbrev).init(allocator); + var abbrevs = std.array_list.Managed(Abbrev).init(gpa); defer { for (abbrevs.items) |*abbrev| { - abbrev.deinit(allocator); + abbrev.deinit(gpa); } abbrevs.deinit(); } - var attrs = std.array_list.Managed(Abbrev.Attr).init(allocator); + var attrs = std.array_list.Managed(Abbrev.Attr).init(gpa); defer attrs.deinit(); while (true) { - const code = try fbr.takeLeb128(u64); + const code = try fr.takeLeb128(u64); if (code == 0) break; - const tag_id = try fbr.takeLeb128(u64); - const has_children = (try fbr.takeByte()) == DW.CHILDREN.yes; + const tag_id = try fr.takeLeb128(u64); + const has_children = (try fr.takeByte()) == DW.CHILDREN.yes; while (true) { - const attr_id = try fbr.takeLeb128(u64); - const form_id = try fbr.takeLeb128(u64); + const attr_id = try fr.takeLeb128(u64); + const form_id = try fr.takeLeb128(u64); if (attr_id == 0 and form_id == 0) break; try attrs.append(.{ .id = attr_id, .form_id = form_id, .payload = switch (form_id) { - FORM.implicit_const => try fbr.takeLeb128(i64), + FORM.implicit_const => try fr.takeLeb128(i64), else => undefined, }, }); @@ -1369,20 +923,21 @@ fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table } fn parseDie( - fbr: *Reader, + fr: *Reader, attrs_buf: []Die.Attr, abbrev_table: *const Abbrev.Table, format: Format, endian: Endian, + addr_size_bytes: u8, ) ScanError!?Die { - const abbrev_code = try fbr.takeLeb128(u64); + const abbrev_code = try fr.takeLeb128(u64); if (abbrev_code == 0) return null; const table_entry = abbrev_table.get(abbrev_code) orelse return bad(); const attrs = attrs_buf[0..table_entry.attrs.len]; for (attrs, table_entry.attrs) |*result_attr, attr| result_attr.* = .{ .id = attr.id, - .value = try parseFormValue(fbr, attr.form_id, format, endian, attr.payload), + .value = try parseFormValue(fr, attr.form_id, format, endian, addr_size_bytes, attr.payload), }; return .{ .tag_id = table_entry.tag_id, @@ -1392,55 +947,50 @@ fn parseDie( } /// Ensures that addresses in the returned LineTable are monotonically increasing. -fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) !CompileUnit.SrcLocCache { - const endian = d.endian; - const compile_unit_cwd = try compile_unit.die.getAttrString(d, AT.comp_dir, d.section(.debug_line_str), compile_unit.*); +fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, endian: Endian, compile_unit: *const CompileUnit) !CompileUnit.SrcLocCache { + const compile_unit_cwd = try compile_unit.die.getAttrString(d, endian, AT.comp_dir, d.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: Reader = .fixed(d.section(.debug_line).?); - fbr.seek = @intCast(line_info_offset); + var fr: Reader = .fixed(d.section(.debug_line).?); + fr.seek = @intCast(line_info_offset); - const unit_header = try readUnitHeader(&fbr, endian); + const unit_header = try readUnitHeader(&fr, endian); if (unit_header.unit_length == 0) return missing(); const next_offset = unit_header.header_length + unit_header.unit_length; - const version = try fbr.takeInt(u16, endian); + const version = try fr.takeInt(u16, endian); if (version < 2) return bad(); - const addr_size: u8, const seg_size: u8 = if (version >= 5) .{ - try fbr.takeByte(), - try fbr.takeByte(), + const addr_size_bytes: u8, const seg_size: u8 = if (version >= 5) .{ + try fr.takeByte(), + try fr.takeByte(), } else .{ - switch (unit_header.format) { - .@"32" => 4, - .@"64" => 8, - }, + compile_unit.addr_size_bytes, 0, }; - _ = addr_size; - _ = seg_size; + if (seg_size != 0) return bad(); // unsupported - const prologue_length = try readAddress(&fbr, unit_header.format, endian); - const prog_start_offset = fbr.seek + prologue_length; + const prologue_length = try readFormatSizedInt(&fr, unit_header.format, endian); + const prog_start_offset = fr.seek + prologue_length; - const minimum_instruction_length = try fbr.takeByte(); + const minimum_instruction_length = try fr.takeByte(); if (minimum_instruction_length == 0) return bad(); if (version >= 4) { - const maximum_operations_per_instruction = try fbr.takeByte(); + const maximum_operations_per_instruction = try fr.takeByte(); _ = maximum_operations_per_instruction; } - const default_is_stmt = (try fbr.takeByte()) != 0; - const line_base = try fbr.takeByteSigned(); + const default_is_stmt = (try fr.takeByte()) != 0; + const line_base = try fr.takeByteSigned(); - const line_range = try fbr.takeByte(); + const line_range = try fr.takeByte(); if (line_range == 0) return bad(); - const opcode_base = try fbr.takeByte(); + const opcode_base = try fr.takeByte(); - const standard_opcode_lengths = try fbr.take(opcode_base - 1); + const standard_opcode_lengths = try fr.take(opcode_base - 1); var directories: ArrayList(FileEntry) = .empty; defer directories.deinit(gpa); @@ -1451,17 +1001,17 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! try directories.append(gpa, .{ .path = compile_unit_cwd }); while (true) { - const dir = try fbr.takeSentinel(0); + const dir = try fr.takeSentinel(0); if (dir.len == 0) break; try directories.append(gpa, .{ .path = dir }); } while (true) { - const file_name = try fbr.takeSentinel(0); + const file_name = try fr.takeSentinel(0); if (file_name.len == 0) break; - const dir_index = try fbr.takeLeb128(u32); - const mtime = try fbr.takeLeb128(u64); - const size = try fbr.takeLeb128(u64); + const dir_index = try fr.takeLeb128(u32); + const mtime = try fr.takeLeb128(u64); + const size = try fr.takeLeb128(u64); try file_entries.append(gpa, .{ .path = file_name, .dir_index = dir_index, @@ -1476,21 +1026,21 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! }; { var dir_ent_fmt_buf: [10]FileEntFmt = undefined; - const directory_entry_format_count = try fbr.takeByte(); + const directory_entry_format_count = try fr.takeByte(); if (directory_entry_format_count > dir_ent_fmt_buf.len) return bad(); for (dir_ent_fmt_buf[0..directory_entry_format_count]) |*ent_fmt| { ent_fmt.* = .{ - .content_type_code = try fbr.takeLeb128(u8), - .form_code = try fbr.takeLeb128(u16), + .content_type_code = try fr.takeLeb128(u8), + .form_code = try fr.takeLeb128(u16), }; } - const directories_count = try fbr.takeLeb128(usize); + const directories_count = try fr.takeLeb128(usize); for (try directories.addManyAsSlice(gpa, directories_count)) |*e| { e.* = .{ .path = &.{} }; for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { - const form_value = try parseFormValue(&fbr, ent_fmt.form_code, unit_header.format, endian, null); + const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, addr_size_bytes, null); switch (ent_fmt.content_type_code) { DW.LNCT.path => e.path = try form_value.getString(d.*), DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), @@ -1507,22 +1057,22 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! } var file_ent_fmt_buf: [10]FileEntFmt = undefined; - const file_name_entry_format_count = try fbr.takeByte(); + const file_name_entry_format_count = try fr.takeByte(); if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { ent_fmt.* = .{ - .content_type_code = try fbr.takeLeb128(u16), - .form_code = try fbr.takeLeb128(u16), + .content_type_code = try fr.takeLeb128(u16), + .form_code = try fr.takeLeb128(u16), }; } - const file_names_count = try fbr.takeLeb128(usize); + const file_names_count = try fr.takeLeb128(usize); try file_entries.ensureUnusedCapacity(gpa, file_names_count); for (try file_entries.addManyAsSlice(gpa, file_names_count)) |*e| { e.* = .{ .path = &.{} }; for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { - const form_value = try parseFormValue(&fbr, ent_fmt.form_code, unit_header.format, endian, null); + const form_value = try parseFormValue(&fr, ent_fmt.form_code, unit_header.format, endian, addr_size_bytes, null); switch (ent_fmt.content_type_code) { DW.LNCT.path => e.path = try form_value.getString(d.*), DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), @@ -1542,17 +1092,17 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! var line_table: CompileUnit.SrcLocCache.LineTable = .{}; errdefer line_table.deinit(gpa); - fbr.seek = @intCast(prog_start_offset); + fr.seek = @intCast(prog_start_offset); const next_unit_pos = line_info_offset + next_offset; - while (fbr.seek < next_unit_pos) { - const opcode = try fbr.takeByte(); + while (fr.seek < next_unit_pos) { + const opcode = try fr.takeByte(); if (opcode == DW.LNS.extended_op) { - const op_size = try fbr.takeLeb128(u64); + const op_size = try fr.takeLeb128(u64); if (op_size < 1) return bad(); - const sub_op = try fbr.takeByte(); + const sub_op = try fr.takeByte(); switch (sub_op) { DW.LNE.end_sequence => { // The row being added here is an "end" address, meaning @@ -1571,14 +1121,13 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! prog.reset(); }, DW.LNE.set_address => { - const addr = try fbr.takeInt(usize, endian); - prog.address = addr; + prog.address = try readAddress(&fr, endian, addr_size_bytes); }, DW.LNE.define_file => { - const path = try fbr.takeSentinel(0); - const dir_index = try fbr.takeLeb128(u32); - const mtime = try fbr.takeLeb128(u64); - const size = try fbr.takeLeb128(u64); + const path = try fr.takeSentinel(0); + const dir_index = try fr.takeLeb128(u32); + const mtime = try fr.takeLeb128(u64); + const size = try fr.takeLeb128(u64); try file_entries.append(gpa, .{ .path = path, .dir_index = dir_index, @@ -1586,7 +1135,7 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! .size = size, }); }, - else => try fbr.discardAll64(op_size - 1), + else => try fr.discardAll64(op_size - 1), } } else if (opcode >= opcode_base) { // special opcodes @@ -1604,19 +1153,19 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! prog.basic_block = false; }, DW.LNS.advance_pc => { - const arg = try fbr.takeLeb128(usize); + const arg = try fr.takeLeb128(u64); prog.address += arg * minimum_instruction_length; }, DW.LNS.advance_line => { - const arg = try fbr.takeLeb128(i64); + const arg = try fr.takeLeb128(i64); prog.line += arg; }, DW.LNS.set_file => { - const arg = try fbr.takeLeb128(usize); + const arg = try fr.takeLeb128(usize); prog.file = arg; }, DW.LNS.set_column => { - const arg = try fbr.takeLeb128(u64); + const arg = try fr.takeLeb128(u64); prog.column = arg; }, DW.LNS.negate_stmt => { @@ -1630,13 +1179,13 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! prog.address += inc_addr; }, DW.LNS.fixed_advance_pc => { - const arg = try fbr.takeInt(u16, endian); + const arg = try fr.takeInt(u16, endian); prog.address += arg; }, DW.LNS.set_prologue_end => {}, else => { if (opcode - 1 >= standard_opcode_lengths.len) return bad(); - try fbr.discardAll(standard_opcode_lengths[opcode - 1]); + try fr.discardAll(standard_opcode_lengths[opcode - 1]); }, } } @@ -1661,18 +1210,19 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! }; } -pub fn populateSrcLocCache(d: *Dwarf, gpa: Allocator, cu: *CompileUnit) ScanError!void { +pub fn populateSrcLocCache(d: *Dwarf, gpa: Allocator, endian: Endian, cu: *CompileUnit) ScanError!void { if (cu.src_loc_cache != null) return; - cu.src_loc_cache = try runLineNumberProgram(d, gpa, cu); + cu.src_loc_cache = try d.runLineNumberProgram(gpa, endian, cu); } pub fn getLineNumberInfo( d: *Dwarf, gpa: Allocator, + endian: Endian, compile_unit: *CompileUnit, target_address: u64, ) !std.debug.SourceLocation { - try populateSrcLocCache(d, gpa, compile_unit); + try d.populateSrcLocCache(gpa, endian, compile_unit); const slc = &compile_unit.src_loc_cache.?; const entry = try slc.findSource(target_address); const file_index = entry.file - @intFromBool(slc.version < 5); @@ -1696,7 +1246,7 @@ fn getLineString(di: Dwarf, offset: u64) ![:0]const u8 { return getStringGeneric(di.section(.debug_line_str), offset); } -fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { +fn readDebugAddr(di: Dwarf, endian: Endian, compile_unit: *const CompileUnit, index: u64) !u64 { const debug_addr = di.section(.debug_addr) orelse return bad(); // addr_base points to the first item after the header, however we @@ -1705,139 +1255,40 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { // The header is 8 or 12 bytes depending on is_64. if (compile_unit.addr_base < 8) return bad(); - const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); + const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], endian); if (version != 5) return bad(); const addr_size = debug_addr[compile_unit.addr_base - 2]; const seg_size = debug_addr[compile_unit.addr_base - 1]; - const byte_offset = @as(usize, @intCast(compile_unit.addr_base + (addr_size + seg_size) * index)); + const byte_offset = compile_unit.addr_base + (addr_size + seg_size) * index; if (byte_offset + addr_size > debug_addr.len) return bad(); return switch (addr_size) { - 1 => debug_addr[byte_offset], - 2 => mem.readInt(u16, debug_addr[byte_offset..][0..2], di.endian), - 4 => mem.readInt(u32, debug_addr[byte_offset..][0..4], di.endian), - 8 => mem.readInt(u64, debug_addr[byte_offset..][0..8], di.endian), + 1 => debug_addr[@intCast(byte_offset)], + 2 => mem.readInt(u16, debug_addr[@intCast(byte_offset)..][0..2], endian), + 4 => mem.readInt(u32, debug_addr[@intCast(byte_offset)..][0..4], endian), + 8 => mem.readInt(u64, debug_addr[@intCast(byte_offset)..][0..8], endian), else => bad(), }; } -/// If `.eh_frame_hdr` is present, then only the header needs to be parsed. Otherwise, `.eh_frame` -/// and `.debug_frame` are scanned and a sorted list of FDEs is built for binary searching during -/// unwinding. Even if `.eh_frame_hdr` is used, we may find during unwinding that it's incomplete, -/// in which case we build the sorted list of FDEs at that point. -/// -/// See also `scanCieFdeInfo`. -pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { - const endian = di.endian; - - if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: Reader = .fixed(eh_frame_hdr); - - const version = try fbr.takeByte(); - if (version != 1) break :blk; - - const eh_frame_ptr_enc = try fbr.takeByte(); - if (eh_frame_ptr_enc == EH.PE.omit) break :blk; - const fde_count_enc = try fbr.takeByte(); - if (fde_count_enc == EH.PE.omit) break :blk; - const table_enc = try fbr.takeByte(); - if (table_enc == EH.PE.omit) break :blk; - - const eh_frame_ptr = cast(usize, try readEhPointer(&fbr, eh_frame_ptr_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), - .follow_indirect = true, - }, endian) orelse return bad()) orelse return bad(); - - const fde_count = cast(usize, try readEhPointer(&fbr, fde_count_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), - .follow_indirect = true, - }, endian) orelse return bad()) orelse return bad(); - - const entry_size = try ExceptionFrameHeader.entrySize(table_enc); - const entries_len = fde_count * entry_size; - if (entries_len > eh_frame_hdr.len - fbr.seek) return bad(); - - di.eh_frame_hdr = .{ - .eh_frame_ptr = eh_frame_ptr, - .table_enc = table_enc, - .fde_count = fde_count, - .entries = eh_frame_hdr[fbr.seek..][0..entries_len], - }; - - // No need to scan .eh_frame, we have a binary search table already - return; - } - - try di.scanCieFdeInfo(allocator, base_address); -} - -/// Scan `.eh_frame` and `.debug_frame` and build a sorted list of FDEs for binary searching during -/// unwinding. -pub fn scanCieFdeInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { - const endian = di.endian; - const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; - for (frame_sections) |frame_section| { - if (di.section(frame_section)) |section_data| { - var fbr: Reader = .fixed(section_data); - while (fbr.seek < fbr.buffer.len) { - const entry_header = try EntryHeader.read(&fbr, frame_section, endian); - switch (entry_header.type) { - .cie => { - const cie = try CommonInformationEntry.parse( - entry_header.entry_bytes, - di.sectionVirtualOffset(frame_section, base_address).?, - true, - entry_header.format, - frame_section, - entry_header.length_offset, - @sizeOf(usize), - di.endian, - ); - try di.cie_map.put(allocator, entry_header.length_offset, cie); - }, - .fde => |cie_offset| { - const cie = di.cie_map.get(cie_offset) orelse return bad(); - const fde = try FrameDescriptionEntry.parse( - entry_header.entry_bytes, - di.sectionVirtualOffset(frame_section, base_address).?, - true, - cie, - @sizeOf(usize), - di.endian, - ); - try di.fde_list.append(allocator, fde); - }, - .terminator => break, - } - } - - std.mem.sortUnstable(FrameDescriptionEntry, di.fde_list.items, {}, struct { - fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { - _ = ctx; - return a.pc_begin < b.pc_begin; - } - }.lessThan); - } - } -} - fn parseFormValue( r: *Reader, form_id: u64, format: Format, endian: Endian, + addr_size_bytes: u8, implicit_const: ?i64, ) ScanError!FormValue { return switch (form_id) { // DWARF5.pdf page 213: the size of this value is encoded in the // compilation unit header as address size. - FORM.addr => .{ .addr = try readAddress(r, nativeFormat(), endian) }, + FORM.addr => .{ .addr = try readAddress(r, endian, addr_size_bytes) }, FORM.addrx1 => .{ .addrx = try r.takeByte() }, FORM.addrx2 => .{ .addrx = try r.takeInt(u16, endian) }, FORM.addrx3 => .{ .addrx = try r.takeInt(u24, endian) }, FORM.addrx4 => .{ .addrx = try r.takeInt(u32, endian) }, - FORM.addrx => .{ .addrx = try r.takeLeb128(usize) }, + FORM.addrx => .{ .addrx = try r.takeLeb128(u64) }, FORM.block1 => .{ .block = try r.take(try r.takeByte()) }, FORM.block2 => .{ .block = try r.take(try r.takeInt(u16, endian)) }, @@ -1854,7 +1305,7 @@ fn parseFormValue( FORM.exprloc => .{ .exprloc = try r.take(try r.takeLeb128(usize)) }, FORM.flag => .{ .flag = (try r.takeByte()) != 0 }, FORM.flag_present => .{ .flag = true }, - FORM.sec_offset => .{ .sec_offset = try readAddress(r, format, endian) }, + FORM.sec_offset => .{ .sec_offset = try readFormatSizedInt(r, format, endian) }, FORM.ref1 => .{ .ref = try r.takeByte() }, FORM.ref2 => .{ .ref = try r.takeInt(u16, endian) }, @@ -1862,18 +1313,18 @@ fn parseFormValue( FORM.ref8 => .{ .ref = try r.takeInt(u64, endian) }, FORM.ref_udata => .{ .ref = try r.takeLeb128(u64) }, - FORM.ref_addr => .{ .ref_addr = try readAddress(r, format, endian) }, + FORM.ref_addr => .{ .ref_addr = try readFormatSizedInt(r, format, endian) }, FORM.ref_sig8 => .{ .ref = try r.takeInt(u64, endian) }, FORM.string => .{ .string = try r.takeSentinel(0) }, - FORM.strp => .{ .strp = try readAddress(r, format, endian) }, + FORM.strp => .{ .strp = try readFormatSizedInt(r, format, endian) }, FORM.strx1 => .{ .strx = try r.takeByte() }, FORM.strx2 => .{ .strx = try r.takeInt(u16, endian) }, FORM.strx3 => .{ .strx = try r.takeInt(u24, endian) }, FORM.strx4 => .{ .strx = try r.takeInt(u32, endian) }, FORM.strx => .{ .strx = try r.takeLeb128(usize) }, - FORM.line_strp => .{ .line_strp = try readAddress(r, format, endian) }, - FORM.indirect => parseFormValue(r, try r.takeLeb128(u64), format, endian, implicit_const), + FORM.line_strp => .{ .line_strp = try readFormatSizedInt(r, format, endian) }, + FORM.indirect => parseFormValue(r, try r.takeLeb128(u64), format, endian, addr_size_bytes, implicit_const), FORM.implicit_const => .{ .sdata = implicit_const orelse return bad() }, FORM.loclistx => .{ .loclistx = try r.takeLeb128(u64) }, FORM.rnglistx => .{ .rnglistx = try r.takeLeb128(u64) }, @@ -1946,7 +1397,7 @@ const UnitHeader = struct { unit_length: u64, }; -fn readUnitHeader(r: *Reader, endian: Endian) ScanError!UnitHeader { +pub fn readUnitHeader(r: *Reader, endian: Endian) ScanError!UnitHeader { return switch (try r.takeInt(u32, endian)) { 0...0xfffffff0 - 1 => |unit_length| .{ .format = .@"32", @@ -1963,7 +1414,7 @@ fn readUnitHeader(r: *Reader, endian: Endian) ScanError!UnitHeader { } /// Returns the DWARF register number for an x86_64 register number found in compact unwind info -pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { +pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u16 { return switch (unwind_reg_number) { 1 => 3, // RBX 2 => 12, // R12 @@ -1971,7 +1422,61 @@ pub fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { 4 => 14, // R14 5 => 15, // R15 6 => 6, // RBP - else => error.InvalidUnwindRegisterNumber, + else => error.InvalidRegister, + }; +} + +/// Returns `null` for CPU architectures without an instruction pointer register. +pub fn ipRegNum(arch: std.Target.Cpu.Arch) ?u16 { + return switch (arch) { + .x86 => 8, + .x86_64 => 16, + .arm, .armeb, .thumb, .thumbeb => 15, + .aarch64, .aarch64_be => 32, + else => null, + }; +} + +pub fn fpRegNum(arch: std.Target.Cpu.Arch) u16 { + return switch (arch) { + .x86 => 5, + .x86_64 => 6, + .arm, .armeb, .thumb, .thumbeb => 11, + .aarch64, .aarch64_be => 29, + else => unreachable, + }; +} + +pub fn spRegNum(arch: std.Target.Cpu.Arch) u16 { + return switch (arch) { + .x86 => 4, + .x86_64 => 7, + .arm, .armeb, .thumb, .thumbeb => 13, + .aarch64, .aarch64_be => 31, + else => unreachable, + }; +} + +/// Tells whether unwinding for this target is supported by the Dwarf standard. +/// +/// See also `std.debug.SelfInfo.can_unwind` which tells whether the Zig standard +/// library has a working implementation of unwinding for the current target. +pub fn supportsUnwinding(target: *const std.Target) bool { + return switch (target.cpu.arch) { + .amdgcn, + .nvptx, + .nvptx64, + .spirv32, + .spirv64, + => false, + + // Enabling this causes relocation errors such as: + // error: invalid relocation type R_RISCV_SUB32 at offset 0x20 + .riscv64, .riscv64be, .riscv32, .riscv32be => false, + + // Conservative guess. Feel free to update this logic with any targets + // that are known to not support Dwarf unwinding. + else => true, }; } @@ -1982,11 +1487,11 @@ pub fn bad() error{InvalidDebugInfo} { return error.InvalidDebugInfo; } -fn invalidDebugInfoDetected() void { +pub fn invalidDebugInfoDetected() void { if (debug_debug_mode) @panic("bad dwarf"); } -fn missing() error{MissingDebugInfo} { +pub fn missing() error{MissingDebugInfo} { if (debug_debug_mode) @panic("missing dwarf"); return error.MissingDebugInfo; } @@ -2000,460 +1505,41 @@ fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { return str[casted_offset..last :0]; } -const EhPointerContext = struct { - // The address of the pointer field itself - pc_rel_base: u64, - - // Whether or not to follow indirect pointers. This should only be - // used when decoding pointers at runtime using the current process's - // debug info - follow_indirect: bool, - - // These relative addressing modes are only used in specific cases, and - // might not be available / required in all parsing contexts - data_rel_base: ?u64 = null, - text_rel_base: ?u64 = null, - function_rel_base: ?u64 = null, -}; - -fn readEhPointer(fbr: *Reader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !?u64 { - if (enc == EH.PE.omit) return null; - - const value: union(enum) { - signed: i64, - unsigned: u64, - } = switch (enc & EH.PE.type_mask) { - EH.PE.absptr => .{ - .unsigned = switch (addr_size_bytes) { - 2 => try fbr.takeInt(u16, endian), - 4 => try fbr.takeInt(u32, endian), - 8 => try fbr.takeInt(u64, endian), - else => return error.InvalidAddrSize, - }, - }, - EH.PE.uleb128 => .{ .unsigned = try fbr.takeLeb128(u64) }, - EH.PE.udata2 => .{ .unsigned = try fbr.takeInt(u16, endian) }, - EH.PE.udata4 => .{ .unsigned = try fbr.takeInt(u32, endian) }, - EH.PE.udata8 => .{ .unsigned = try fbr.takeInt(u64, endian) }, - EH.PE.sleb128 => .{ .signed = try fbr.takeLeb128(i64) }, - EH.PE.sdata2 => .{ .signed = try fbr.takeInt(i16, endian) }, - EH.PE.sdata4 => .{ .signed = try fbr.takeInt(i32, endian) }, - EH.PE.sdata8 => .{ .signed = try fbr.takeInt(i64, endian) }, - else => return bad(), - }; - - const base = switch (enc & EH.PE.rel_mask) { - EH.PE.pcrel => ctx.pc_rel_base, - EH.PE.textrel => ctx.text_rel_base orelse return error.PointerBaseNotSpecified, - EH.PE.datarel => ctx.data_rel_base orelse return error.PointerBaseNotSpecified, - EH.PE.funcrel => ctx.function_rel_base orelse return error.PointerBaseNotSpecified, - else => null, - }; - - const ptr: u64 = if (base) |b| switch (value) { - .signed => |s| @intCast(try std.math.add(i64, s, @as(i64, @intCast(b)))), - // absptr can actually contain signed values in some cases (aarch64 MachO) - .unsigned => |u| u +% b, - } else switch (value) { - .signed => |s| @as(u64, @intCast(s)), - .unsigned => |u| u, +pub fn getSymbol(di: *Dwarf, gpa: Allocator, endian: Endian, address: u64) !std.debug.Symbol { + const compile_unit = di.findCompileUnit(endian, address) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => return .unknown, + else => return err, }; - - if ((enc & EH.PE.indirect) > 0 and ctx.follow_indirect) { - if (@sizeOf(usize) != addr_size_bytes) { - // See the documentation for `follow_indirect` - return error.NonNativeIndirection; - } - - const native_ptr = cast(usize, ptr) orelse return error.PointerOverflow; - return switch (addr_size_bytes) { - 2, 4, 8 => return @as(*const usize, @ptrFromInt(native_ptr)).*, - else => return error.UnsupportedAddrSize, - }; - } else { - return ptr; - } -} - -fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { - if (pc_rel_offset < 0) { - return std.math.sub(usize, field_ptr, @as(usize, @intCast(-pc_rel_offset))); - } else { - return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); - } -} - -pub const ElfModule = struct { - base_address: usize, - dwarf: Dwarf, - mapped_memory: []align(std.heap.page_size_min) const u8, - external_mapped_memory: ?[]align(std.heap.page_size_min) const u8, - - pub fn deinit(self: *@This(), allocator: Allocator) void { - self.dwarf.deinit(allocator); - std.posix.munmap(self.mapped_memory); - if (self.external_mapped_memory) |m| std.posix.munmap(m); - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - return self.dwarf.getSymbol(allocator, relocated_address); - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { - _ = allocator; - _ = address; - return &self.dwarf; - } - - pub const LoadError = error{ - InvalidDebugInfo, - MissingDebugInfo, - InvalidElfMagic, - InvalidElfVersion, - InvalidElfEndian, - /// TODO: implement this and then remove this error code - UnimplementedDwarfForeignEndian, - /// The debug info may be valid but this implementation uses memory - /// mapping which limits things to usize. If the target debug info is - /// 64-bit and host is 32-bit, there may be debug info that is not - /// supportable using this method. - Overflow, - - PermissionDenied, - LockedMemoryLimitExceeded, - MemoryMappingNotSupported, - } || Allocator.Error || std.fs.File.OpenError || OpenError; - - /// Reads debug info from an already mapped ELF file. - /// - /// If the required sections aren't present but a reference to external debug - /// info is, then this this function will recurse to attempt to load the debug - /// sections from an external file. - pub fn load( - gpa: Allocator, - mapped_mem: []align(std.heap.page_size_min) const u8, - build_id: ?[]const u8, - expected_crc: ?u32, - parent_sections: *Dwarf.SectionArray, - parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, - elf_filename: ?[]const u8, - ) LoadError!Dwarf.ElfModule { - if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; - - const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); - if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; - if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; - - const endian: Endian = switch (hdr.e_ident[elf.EI_DATA]) { - elf.ELFDATA2LSB => .little, - elf.ELFDATA2MSB => .big, - else => return error.InvalidElfEndian, - }; - if (endian != native_endian) return error.UnimplementedDwarfForeignEndian; - - const shoff = hdr.e_shoff; - const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); - const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[cast(usize, str_section_off) orelse return error.Overflow])); - const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; - const shdrs = @as( - [*]const elf.Shdr, - @ptrCast(@alignCast(&mapped_mem[shoff])), - )[0..hdr.e_shnum]; - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - - // Combine section list. This takes ownership over any owned sections from the parent scope. - for (parent_sections, §ions) |*parent, *section_elem| { - if (parent.*) |*p| { - section_elem.* = p.*; - p.owned = false; - } - } - errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); - - var separate_debug_filename: ?[]const u8 = null; - var separate_debug_crc: ?u32 = null; - - for (shdrs) |*shdr| { - if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; - const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); - - if (mem.eql(u8, name, ".gnu_debuglink")) { - const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); - const crc_offset = mem.alignForward(usize, debug_filename.len + 1, 4); - const crc_bytes = gnu_debuglink[crc_offset..][0..4]; - separate_debug_crc = mem.readInt(u32, crc_bytes, endian); - separate_debug_filename = debug_filename; - continue; - } - - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |sect, i| { - if (mem.eql(u8, "." ++ sect.name, name)) section_index = i; - } - if (section_index == null) continue; - if (sections[section_index.?] != null) continue; - - const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { - var section_reader: Reader = .fixed(section_bytes); - const chdr = section_reader.takeStruct(elf.Chdr, endian) catch continue; - if (chdr.ch_type != .ZLIB) continue; - - var decompress: std.compress.flate.Decompress = .init(§ion_reader, .zlib, &.{}); - var decompressed_section: ArrayList(u8) = .empty; - defer decompressed_section.deinit(gpa); - decompress.reader.appendRemainingUnlimited(gpa, &decompressed_section) catch { - invalidDebugInfoDetected(); - continue; - }; - if (chdr.ch_size != decompressed_section.items.len) { - invalidDebugInfoDetected(); - continue; - } - break :blk .{ - .data = try decompressed_section.toOwnedSlice(gpa), - .virtual_address = shdr.sh_addr, - .owned = true, - }; - } else .{ - .data = section_bytes, - .virtual_address = shdr.sh_addr, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - - // Attempt to load debug info from an external file - // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html - if (missing_debug_info) { - - // Only allow one level of debug info nesting - if (parent_mapped_mem) |_| { - return error.MissingDebugInfo; - } - - // $XDG_CACHE_HOME/debuginfod_client/<buildid>/debuginfo - // This only opportunisticly tries to load from the debuginfod cache, but doesn't try to populate it. - // One can manually run `debuginfod-find debuginfo PATH` to download the symbols - if (build_id) |id| blk: { - var debuginfod_dir: std.fs.Dir = switch (builtin.os.tag) { - .wasi, .windows => break :blk, - else => dir: { - if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |path| { - break :dir std.fs.openDirAbsolute(path, .{}) catch break :blk; - } - if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| { - if (cache_path.len > 0) { - const path = std.fs.path.join(gpa, &[_][]const u8{ cache_path, "debuginfod_client" }) catch break :blk; - defer gpa.free(path); - break :dir std.fs.openDirAbsolute(path, .{}) catch break :blk; - } - } - if (std.posix.getenv("HOME")) |home_path| { - const path = std.fs.path.join(gpa, &[_][]const u8{ home_path, ".cache", "debuginfod_client" }) catch break :blk; - defer gpa.free(path); - break :dir std.fs.openDirAbsolute(path, .{}) catch break :blk; - } - break :blk; - }, - }; - defer debuginfod_dir.close(); - - const filename = std.fmt.allocPrint(gpa, "{x}/debuginfo", .{id}) catch break :blk; - defer gpa.free(filename); - - const path: Path = .{ - .root_dir = .{ .path = null, .handle = debuginfod_dir }, - .sub_path = filename, - }; - - return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch break :blk; - } - - const global_debug_directories = [_][]const u8{ - "/usr/lib/debug", - }; - - // <global debug directory>/.build-id/<2-character id prefix>/<id remainder>.debug - if (build_id) |id| blk: { - if (id.len < 3) break :blk; - - // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice - const extension = ".debug"; - var id_prefix_buf: [2]u8 = undefined; - var filename_buf: [38 + extension.len]u8 = undefined; - - _ = std.fmt.bufPrint(&id_prefix_buf, "{x}", .{id[0..1]}) catch unreachable; - const filename = std.fmt.bufPrint(&filename_buf, "{x}" ++ extension, .{id[1..]}) catch break :blk; - - for (global_debug_directories) |global_directory| { - const path: Path = .{ - .root_dir = std.Build.Cache.Directory.cwd(), - .sub_path = try std.fs.path.join(gpa, &.{ - global_directory, ".build-id", &id_prefix_buf, filename, - }), - }; - defer gpa.free(path.sub_path); - - return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; - } - } - - // use the path from .gnu_debuglink, in the same search order as gdb - if (separate_debug_filename) |separate_filename| blk: { - if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) - return error.MissingDebugInfo; - - exe_dir: { - var exe_dir_buf: [std.fs.max_path_bytes]u8 = undefined; - const exe_dir_path = std.fs.selfExeDirPath(&exe_dir_buf) catch break :exe_dir; - var exe_dir = std.fs.openDirAbsolute(exe_dir_path, .{}) catch break :exe_dir; - defer exe_dir.close(); - - // <exe_dir>/<gnu_debuglink> - if (loadPath( - gpa, - .{ - .root_dir = .{ .path = null, .handle = exe_dir }, - .sub_path = separate_filename, - }, - null, - separate_debug_crc, - §ions, - mapped_mem, - )) |debug_info| { - return debug_info; - } else |_| {} - - // <exe_dir>/.debug/<gnu_debuglink> - const path: Path = .{ - .root_dir = .{ .path = null, .handle = exe_dir }, - .sub_path = try std.fs.path.join(gpa, &.{ ".debug", separate_filename }), - }; - defer gpa.free(path.sub_path); - - if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - - var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; - const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :blk; - - // <global debug directory>/<absolute folder of current binary>/<gnu_debuglink> - for (global_debug_directories) |global_directory| { - const path: Path = .{ - .root_dir = std.Build.Cache.Directory.cwd(), - .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), - }; - defer gpa.free(path.sub_path); - if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - } - - return error.MissingDebugInfo; - } - - var di: Dwarf = .{ - .endian = endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&di, gpa); - - return .{ - .base_address = 0, - .dwarf = di, - .mapped_memory = parent_mapped_mem orelse mapped_mem, - .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, - }; - } - - pub fn loadPath( - gpa: Allocator, - elf_file_path: Path, - build_id: ?[]const u8, - expected_crc: ?u32, - parent_sections: *Dwarf.SectionArray, - parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, - ) LoadError!Dwarf.ElfModule { - const elf_file = elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}) catch |err| switch (err) { - error.FileNotFound => return missing(), + return .{ + .name = di.getSymbolName(address), + .compile_unit_name = compile_unit.die.getAttrString(di, endian, std.dwarf.AT.name, di.section(.debug_str), compile_unit) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + }, + .source_location = di.getLineNumberInfo(gpa, endian, compile_unit, address) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, else => return err, - }; - defer elf_file.close(); - - const end_pos = elf_file.getEndPos() catch return bad(); - const file_len = cast(usize, end_pos) orelse return error.Overflow; - - const mapped_mem = std.posix.mmap( - null, - file_len, - std.posix.PROT.READ, - .{ .TYPE = .SHARED }, - elf_file.handle, - 0, - ) catch |err| switch (err) { - error.MappingAlreadyExists => unreachable, - else => |e| return e, - }; - errdefer std.posix.munmap(mapped_mem); - - return load( - gpa, - mapped_mem, - build_id, - expected_crc, - parent_sections, - parent_mapped_mem, - elf_file_path.sub_path, - ); - } -}; - -pub fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { - if (di.findCompileUnit(address)) |compile_unit| { - return .{ - .name = di.getSymbolName(address) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .source_location = di.getLineNumberInfo(allocator, compile_unit, address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => return .{}, - else => return err, - } -} - -pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { - const start = cast(usize, offset) orelse return error.Overflow; - const end = start + (cast(usize, size) orelse return error.Overflow); - return ptr[start..end]; + }, + }; } -fn readAddress(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { +/// DWARF5 7.4: "In the 32-bit DWARF format, all values that represent lengths of DWARF sections and +/// offsets relative to the beginning of DWARF sections are represented using four bytes. In the +/// 64-bit DWARF format, all values that represent lengths of DWARF sections and offsets relative to +/// the beginning of DWARF sections are represented using eight bytes". +/// +/// This function is for reading such values. +fn readFormatSizedInt(r: *Reader, format: std.dwarf.Format, endian: Endian) !u64 { return switch (format) { .@"32" => try r.takeInt(u32, endian), .@"64" => try r.takeInt(u64, endian), }; } -fn nativeFormat() std.dwarf.Format { - return switch (@sizeOf(usize)) { - 4 => .@"32", - 8 => .@"64", - else => @compileError("unsupported @sizeOf(usize)"), +fn readAddress(r: *Reader, endian: Endian, addr_size_bytes: u8) !u64 { + return switch (addr_size_bytes) { + 2 => try r.takeInt(u16, endian), + 4 => try r.takeInt(u32, endian), + 8 => try r.takeInt(u64, endian), + else => return bad(), }; } diff --git a/lib/std/debug/Dwarf/SelfUnwinder.zig b/lib/std/debug/Dwarf/SelfUnwinder.zig new file mode 100644 index 0000000000..8ee08180dd --- /dev/null +++ b/lib/std/debug/Dwarf/SelfUnwinder.zig @@ -0,0 +1,334 @@ +//! Implements stack unwinding based on `Dwarf.Unwind`. The caller is responsible for providing the +//! initialized `Dwarf.Unwind` from the `.debug_frame` (or equivalent) section; this type handles +//! computing and applying the CFI register rules to evolve a `std.debug.cpu_context.Native` through +//! stack frames, hence performing the virtual unwind. +//! +//! Notably, this type is a valid implementation of `std.debug.SelfInfo.UnwindContext`. + +/// The state of the CPU in the current stack frame. +cpu_state: std.debug.cpu_context.Native, +/// The value of the Program Counter in this frame. This is almost the same as the value of the IP +/// register in `cpu_state`, but may be off by one because the IP is typically a *return* address. +pc: usize, + +cfi_vm: Dwarf.Unwind.VirtualMachine, +expr_vm: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), + +pub const CacheEntry = struct { + const max_regs = 32; + + pc: usize, + cie: *const Dwarf.Unwind.CommonInformationEntry, + cfa_rule: Dwarf.Unwind.VirtualMachine.CfaRule, + num_rules: u8, + rules_regs: [max_regs]u16, + rules: [max_regs]Dwarf.Unwind.VirtualMachine.RegisterRule, + + pub fn find(entries: []const CacheEntry, pc: usize) ?*const CacheEntry { + assert(pc != 0); + const idx = std.hash.int(pc) % entries.len; + const entry = &entries[idx]; + return if (entry.pc == pc) entry else null; + } + + pub fn populate(entry: *const CacheEntry, entries: []CacheEntry) void { + const idx = std.hash.int(entry.pc) % entries.len; + entries[idx] = entry.*; + } + + pub const empty: CacheEntry = .{ + .pc = 0, + .cie = undefined, + .cfa_rule = undefined, + .num_rules = undefined, + .rules_regs = undefined, + .rules = undefined, + }; +}; + +pub fn init(cpu_context: *const std.debug.cpu_context.Native) SelfUnwinder { + // `@constCast` is safe because we aren't going to store to the resulting pointer. + const raw_pc_ptr = regNative(@constCast(cpu_context), ip_reg_num) catch |err| switch (err) { + error.InvalidRegister => unreachable, // `ip_reg_num` is definitely valid + error.UnsupportedRegister => unreachable, // the implementation needs to support ip + error.IncompatibleRegisterSize => unreachable, // ip is definitely `usize`-sized + }; + const pc = stripInstructionPtrAuthCode(raw_pc_ptr.*); + return .{ + .cpu_state = cpu_context.*, + .pc = pc, + .cfi_vm = .{}, + .expr_vm = .{}, + }; +} + +pub fn deinit(unwinder: *SelfUnwinder, gpa: Allocator) void { + unwinder.cfi_vm.deinit(gpa); + unwinder.expr_vm.deinit(gpa); + unwinder.* = undefined; +} + +pub fn getFp(unwinder: *const SelfUnwinder) usize { + // `@constCast` is safe because we aren't going to store to the resulting pointer. + const ptr = regNative(@constCast(&unwinder.cpu_state), fp_reg_num) catch |err| switch (err) { + error.InvalidRegister => unreachable, // `fp_reg_num` is definitely valid + error.UnsupportedRegister => unreachable, // the implementation needs to support fp + error.IncompatibleRegisterSize => unreachable, // fp is a pointer so is `usize`-sized + }; + return ptr.*; +} + +/// Compute the rule set for the address `unwinder.pc` from the information in `unwind`. The caller +/// may store the returned rule set in a simple fixed-size cache keyed on the `pc` field to avoid +/// frequently recomputing register rules when unwinding many times. +/// +/// To actually apply the computed rules, see `next`. +pub fn computeRules( + unwinder: *SelfUnwinder, + gpa: Allocator, + unwind: *const Dwarf.Unwind, + load_offset: usize, + explicit_fde_offset: ?usize, +) !CacheEntry { + assert(unwinder.pc != 0); + + const pc_vaddr = unwinder.pc - load_offset; + + const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( + pc_vaddr, + @sizeOf(usize), + native_endian, + ) orelse return error.MissingDebugInfo; + const cie, const fde = try unwind.getFde(fde_offset, native_endian); + + // `lookupPc` can return false positives, so check if the FDE *actually* includes the pc + if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) { + return error.MissingDebugInfo; + } + + unwinder.cfi_vm.reset(); + const row = try unwinder.cfi_vm.runTo(gpa, pc_vaddr, cie, &fde, @sizeOf(usize), native_endian); + const cols = unwinder.cfi_vm.rowColumns(&row); + + if (cols.len > CacheEntry.max_regs) return error.UnsupportedDebugInfo; + + var entry: CacheEntry = .{ + .pc = unwinder.pc, + .cie = cie, + .cfa_rule = row.cfa, + .num_rules = @intCast(cols.len), + .rules_regs = undefined, + .rules = undefined, + }; + for (cols, 0..) |col, i| { + entry.rules_regs[i] = col.register; + entry.rules[i] = col.rule; + } + return entry; +} + +/// Applies the register rules given in `cache_entry` to the current state of `unwinder`. The caller +/// is responsible for ensuring that `cache_entry` contains the correct rule set for `unwinder.pc`. +/// +/// `unwinder.cpu_state` and `unwinder.pc` are updated to refer to the next frame, and this frame's +/// return address is returned as a `usize`. +pub fn next(unwinder: *SelfUnwinder, gpa: Allocator, cache_entry: *const CacheEntry) std.debug.SelfInfoError!usize { + return unwinder.nextInner(gpa, cache_entry) catch |err| switch (err) { + error.OutOfMemory, + error.InvalidDebugInfo, + => |e| return e, + + error.UnsupportedRegister, + error.UnimplementedExpressionCall, + error.UnimplementedOpcode, + error.UnimplementedUserOpcode, + error.UnimplementedTypedComparison, + error.UnimplementedTypeConversion, + error.UnknownExpressionOpcode, + => return error.UnsupportedDebugInfo, + + error.ReadFailed, + error.EndOfStream, + error.Overflow, + error.IncompatibleRegisterSize, + error.InvalidRegister, + error.IncompleteExpressionContext, + error.InvalidCFAOpcode, + error.InvalidExpression, + error.InvalidFrameBase, + error.InvalidIntegralTypeSize, + error.InvalidSubExpression, + error.InvalidTypeLength, + error.TruncatedIntegralType, + error.DivisionByZero, + => return error.InvalidDebugInfo, + }; +} + +fn nextInner(unwinder: *SelfUnwinder, gpa: Allocator, cache_entry: *const CacheEntry) !usize { + const format = cache_entry.cie.format; + const return_address_register = cache_entry.cie.return_address_register; + + const cfa = switch (cache_entry.cfa_rule) { + .none => return error.InvalidDebugInfo, + .reg_off => |ro| cfa: { + const ptr = try regNative(&unwinder.cpu_state, ro.register); + break :cfa try applyOffset(ptr.*, ro.offset); + }, + .expression => |expr| cfa: { + // On all implemented architectures, the CFA is defined to be the previous frame's SP + const prev_cfa_val = (try regNative(&unwinder.cpu_state, sp_reg_num)).*; + unwinder.expr_vm.reset(); + const value = try unwinder.expr_vm.run(expr, gpa, .{ + .format = format, + .cpu_context = &unwinder.cpu_state, + }, prev_cfa_val) orelse return error.InvalidDebugInfo; + switch (value) { + .generic => |g| break :cfa g, + else => return error.InvalidDebugInfo, + } + }, + }; + + // If unspecified, we'll use the default rule for the return address register, which is + // typically equivalent to `.undefined` (meaning there is no return address), but may be + // overriden by ABIs. + var has_return_address: bool = builtin.cpu.arch.isAARCH64() and + return_address_register >= 19 and + return_address_register <= 28; + + // Create a copy of the CPU state, to which we will apply the new rules. + var new_cpu_state = unwinder.cpu_state; + + // On all implemented architectures, the CFA is defined to be the previous frame's SP + (try regNative(&new_cpu_state, sp_reg_num)).* = cfa; + + const rules_len = cache_entry.num_rules; + for (cache_entry.rules_regs[0..rules_len], cache_entry.rules[0..rules_len]) |register, rule| { + const new_val: union(enum) { + same, + undefined, + val: usize, + bytes: []const u8, + } = switch (rule) { + .default => val: { + // The default rule is typically equivalent to `.undefined`, but ABIs may override it. + if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) { + break :val .same; + } + break :val .undefined; + }, + .undefined => .undefined, + .same_value => .same, + .offset => |offset| val: { + const ptr: *const usize = @ptrFromInt(try applyOffset(cfa, offset)); + break :val .{ .val = ptr.* }; + }, + .val_offset => |offset| .{ .val = try applyOffset(cfa, offset) }, + .register => |r| .{ .bytes = try unwinder.cpu_state.dwarfRegisterBytes(r) }, + .expression => |expr| val: { + unwinder.expr_vm.reset(); + const value = try unwinder.expr_vm.run(expr, gpa, .{ + .format = format, + .cpu_context = &unwinder.cpu_state, + }, cfa) orelse return error.InvalidDebugInfo; + const ptr: *const usize = switch (value) { + .generic => |addr| @ptrFromInt(addr), + else => return error.InvalidDebugInfo, + }; + break :val .{ .val = ptr.* }; + }, + .val_expression => |expr| val: { + unwinder.expr_vm.reset(); + const value = try unwinder.expr_vm.run(expr, gpa, .{ + .format = format, + .cpu_context = &unwinder.cpu_state, + }, cfa) orelse return error.InvalidDebugInfo; + switch (value) { + .generic => |val| break :val .{ .val = val }, + else => return error.InvalidDebugInfo, + } + }, + }; + switch (new_val) { + .same => {}, + .undefined => { + const dest = try new_cpu_state.dwarfRegisterBytes(@intCast(register)); + @memset(dest, undefined); + }, + .val => |val| { + const dest = try new_cpu_state.dwarfRegisterBytes(@intCast(register)); + if (dest.len != @sizeOf(usize)) return error.InvalidDebugInfo; + const dest_ptr: *align(1) usize = @ptrCast(dest); + dest_ptr.* = val; + }, + .bytes => |src| { + const dest = try new_cpu_state.dwarfRegisterBytes(@intCast(register)); + if (dest.len != src.len) return error.InvalidDebugInfo; + @memcpy(dest, src); + }, + } + if (register == return_address_register) { + has_return_address = new_val != .undefined; + } + } + + const return_address: usize = if (has_return_address) pc: { + const raw_ptr = try regNative(&new_cpu_state, return_address_register); + break :pc stripInstructionPtrAuthCode(raw_ptr.*); + } else 0; + + (try regNative(&new_cpu_state, ip_reg_num)).* = return_address; + + // The new CPU state is complete; flush changes. + unwinder.cpu_state = new_cpu_state; + + // The caller will subtract 1 from the return address to get an address corresponding to the + // function call. However, if this is a signal frame, that's actually incorrect, because the + // "return address" we have is the instruction which triggered the signal (if the signal + // handler returned, the instruction would be re-run). Compensate for this by incrementing + // the address in that case. + const adjusted_ret_addr = if (cache_entry.cie.is_signal_frame) return_address +| 1 else return_address; + + // We also want to do that same subtraction here to get the PC for the next frame's FDE. + // This is because if the callee was noreturn, then the function call might be the caller's + // last instruction, so `return_address` might actually point outside of it! + unwinder.pc = adjusted_ret_addr -| 1; + + return adjusted_ret_addr; +} + +pub fn regNative(ctx: *std.debug.cpu_context.Native, num: u16) error{ + InvalidRegister, + UnsupportedRegister, + IncompatibleRegisterSize, +}!*align(1) usize { + const bytes = try ctx.dwarfRegisterBytes(num); + if (bytes.len != @sizeOf(usize)) return error.IncompatibleRegisterSize; + return @ptrCast(bytes); +} + +/// Since register rules are applied (usually) during a panic, +/// checked addition / subtraction is used so that we can return +/// an error and fall back to FP-based unwinding. +fn applyOffset(base: usize, offset: i64) !usize { + return if (offset >= 0) + try std.math.add(usize, base, @as(usize, @intCast(offset))) + else + try std.math.sub(usize, base, @as(usize, @intCast(-offset))); +} + +const ip_reg_num = Dwarf.ipRegNum(builtin.target.cpu.arch).?; +const fp_reg_num = Dwarf.fpRegNum(builtin.target.cpu.arch); +const sp_reg_num = Dwarf.spRegNum(builtin.target.cpu.arch); + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const assert = std.debug.assert; +const stripInstructionPtrAuthCode = std.debug.stripInstructionPtrAuthCode; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); + +const SelfUnwinder = @This(); diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig new file mode 100644 index 0000000000..d351c0421e --- /dev/null +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -0,0 +1,702 @@ +//! Contains state relevant to stack unwinding through the DWARF `.debug_frame` section, or the +//! `.eh_frame` section which is an extension of the former specified by Linux Standard Base Core. +//! Like `Dwarf`, no assumptions are made about the host's relationship to the target of the unwind +//! information -- unwind data for any target can be read by any host. +//! +//! `Unwind` specifically deals with loading the data from CIEs and FDEs in the section, and with +//! performing fast lookups of a program counter's corresponding FDE. The CFI instructions in the +//! CIEs and FDEs can be interpreted by `VirtualMachine`. +//! +//! The typical usage of `Unwind` is as follows: +//! +//! * Initialize with `initEhFrameHdr` or `initSection`, depending on the available data +//! * Call `prepare` to scan CIEs and, if necessary, construct a search table +//! * Call `lookupPc` to find the section offset of the FDE corresponding to a PC +//! * Call `getFde` to load the corresponding FDE and CIE +//! * Check that the PC does indeed fall in that range (`lookupPc` may return a false positive) +//! * Interpret the embedded CFI instructions using `VirtualMachine` +//! +//! In some cases, such as when using the "compact unwind" data in Mach-O binaries, the FDE offsets +//! may already be known. In that case, no call to `lookupPc` is necessary, which means the call to +//! `prepare` can be optimized to only scan CIEs. + +pub const VirtualMachine = @import("Unwind/VirtualMachine.zig"); + +frame_section: struct { + id: Section, + /// The virtual address of the start of the section. "Virtual address" refers to the address in + /// the binary (e.g. `sh_addr` in an ELF file); the equivalent runtime address may be relocated + /// in position-independent binaries. + vaddr: u64, + /// The full contents of the section. May have imprecise bounds depending on `section`. This + /// memory is externally managed. + /// + /// For `.debug_frame`, the slice length is exactly equal to the section length. This is needed + /// to know the number of CIEs and FDEs. + /// + /// For `.eh_frame`, the slice length may exceed the section length, i.e. the slice may refer to + /// more bytes than are in the second. This restriction exists because `.eh_frame_hdr` only + /// includes the address of the loaded `.eh_frame` data, not its length. It is not a problem + /// because unlike `.debug_frame`, the end of the CIE/FDE list is signaled through a sentinel + /// value. If this slice does have bounds, they will still be checked, preventing crashes when + /// reading potentially-invalid `.eh_frame` data from files. + bytes: []const u8, +}, + +/// A structure allowing fast lookups of the FDE corresponding to a particular PC. We use a binary +/// search table for the lookup; essentially, a list of all FDEs ordered by PC range. `null` means +/// the lookup data is not yet populated, so `prepare` must be called before `lookupPc`. +lookup: ?union(enum) { + /// The `.eh_frame_hdr` section contains a pre-computed search table which we can use. + eh_frame_hdr: struct { + /// Virtual address of the `.eh_frame_hdr` section. + vaddr: u64, + table: EhFrameHeader.SearchTable, + }, + /// There is no pre-computed search table, so we have built one ourselves. + /// Allocated into `gpa` and freed by `deinit`. + sorted_fdes: []SortedFdeEntry, +}, + +/// Initially empty; populated by `prepare`. +cie_list: std.MultiArrayList(struct { + offset: u64, + cie: CommonInformationEntry, +}), + +const SortedFdeEntry = struct { + /// This FDE's value of `pc_begin`. + pc_begin: u64, + /// Offset into the section of the corresponding FDE, including the entry header. + fde_offset: u64, +}; + +pub const Section = enum { debug_frame, eh_frame }; + +/// Initialize with unwind information from a header loaded from an `.eh_frame_hdr` section, and a +/// pointer to the contents of the `.eh_frame` section. +/// +/// `.eh_frame_hdr` may embed a binary search table of FDEs. If it does, we will use that table for +/// PC lookups rather than spending time constructing our own search table. +pub fn initEhFrameHdr(header: EhFrameHeader, section_vaddr: u64, section_bytes_ptr: [*]const u8) Unwind { + return .{ + .frame_section = .{ + .id = .eh_frame, + .bytes = maxSlice(section_bytes_ptr), + .vaddr = header.eh_frame_vaddr, + }, + .lookup = if (header.search_table) |table| .{ .eh_frame_hdr = .{ + .vaddr = section_vaddr, + .table = table, + } } else null, + .cie_list = .empty, + }; +} + +/// Initialize with unwind information from the contents of a `.debug_frame` or `.eh_frame` section. +/// +/// If the `.eh_frame_hdr` section is available, consider instead using `initEhFrameHdr`, which +/// allows the implementation to use a search table embedded in that section if it is available. +pub fn initSection(section: Section, section_vaddr: u64, section_bytes: []const u8) Unwind { + return .{ + .frame_section = .{ + .id = section, + .bytes = section_bytes, + .vaddr = section_vaddr, + }, + .lookup = null, + .cie_list = .empty, + }; +} + +pub fn deinit(unwind: *Unwind, gpa: Allocator) void { + if (unwind.lookup) |lookup| switch (lookup) { + .eh_frame_hdr => {}, + .sorted_fdes => |fdes| gpa.free(fdes), + }; + for (unwind.cie_list.items(.cie)) |*cie| { + if (cie.last_row) |*lr| { + gpa.free(lr.cols); + } + } + unwind.cie_list.deinit(gpa); +} + +/// Decoded version of the `.eh_frame_hdr` section. +pub const EhFrameHeader = struct { + /// The virtual address (i.e. as given in the binary, before relocations) of the `.eh_frame` + /// section. This value is important when using `.eh_frame_hdr` to find debug information for + /// the current binary, because it allows locating where the `.eh_frame` section is loaded in + /// memory (by adding it to the ELF module's base address). + eh_frame_vaddr: u64, + search_table: ?SearchTable, + + pub const SearchTable = struct { + /// The byte offset of the search table into the `.eh_frame_hdr` section. + offset: u8, + encoding: EH.PE, + fde_count: usize, + /// The actual table entries are viewed as a plain byte slice because `encoding` causes the + /// size of entries in the table to vary. + entries: []const u8, + + /// Returns the vaddr of the FDE for `pc`, or `null` if no matching FDE was found. + fn findEntry( + table: *const SearchTable, + eh_frame_hdr_vaddr: u64, + pc: u64, + addr_size_bytes: u8, + endian: Endian, + ) !?u64 { + const table_vaddr = eh_frame_hdr_vaddr + table.offset; + const entry_size = try entrySize(table.encoding, addr_size_bytes); + var left: usize = 0; + var len: usize = table.fde_count; + while (len > 1) { + const mid = left + len / 2; + var entry_reader: Reader = .fixed(table.entries[mid * entry_size ..][0..entry_size]); + const pc_begin = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ + .pc_rel_base = table_vaddr + left * entry_size, + .data_rel_base = eh_frame_hdr_vaddr, + }, endian); + if (pc < pc_begin) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + if (len == 0) return null; + var entry_reader: Reader = .fixed(table.entries[left * entry_size ..][0..entry_size]); + // Skip past `pc_begin`; we're now interested in the fde offset + _ = try readEhPointerAbs(&entry_reader, table.encoding.type, addr_size_bytes, endian); + const fde_ptr = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ + .pc_rel_base = table_vaddr + left * entry_size, + .data_rel_base = eh_frame_hdr_vaddr, + }, endian); + return fde_ptr; + } + + fn entrySize(table_enc: EH.PE, addr_size_bytes: u8) !u8 { + return switch (table_enc.type) { + .absptr => 2 * addr_size_bytes, + .udata2, .sdata2 => 4, + .udata4, .sdata4 => 8, + .udata8, .sdata8 => 16, + .uleb128, .sleb128 => return bad(), // this is a binary search table; all entries must be the same size + _ => return bad(), + }; + } + }; + + pub fn parse( + eh_frame_hdr_vaddr: u64, + eh_frame_hdr_bytes: []const u8, + addr_size_bytes: u8, + endian: Endian, + ) !EhFrameHeader { + var r: Reader = .fixed(eh_frame_hdr_bytes); + + const version = try r.takeByte(); + if (version != 1) return bad(); + + const eh_frame_ptr_enc: EH.PE = @bitCast(try r.takeByte()); + const fde_count_enc: EH.PE = @bitCast(try r.takeByte()); + const table_enc: EH.PE = @bitCast(try r.takeByte()); + + const eh_frame_ptr = try readEhPointer(&r, eh_frame_ptr_enc, addr_size_bytes, .{ + .pc_rel_base = eh_frame_hdr_vaddr + r.seek, + }, endian); + + const table: ?SearchTable = table: { + if (fde_count_enc == EH.PE.omit) break :table null; + if (table_enc == EH.PE.omit) break :table null; + const fde_count = try readEhPointer(&r, fde_count_enc, addr_size_bytes, .{ + .pc_rel_base = eh_frame_hdr_vaddr + r.seek, + }, endian); + const entry_size = try SearchTable.entrySize(table_enc, addr_size_bytes); + const bytes_offset = r.seek; + const bytes_len = cast(usize, fde_count * entry_size) orelse return error.EndOfStream; + const bytes = try r.take(bytes_len); + break :table .{ + .encoding = table_enc, + .fde_count = @intCast(fde_count), + .entries = bytes, + .offset = @intCast(bytes_offset), + }; + }; + + return .{ + .eh_frame_vaddr = eh_frame_ptr, + .search_table = table, + }; + } +}; + +/// The shared header of an FDE/CIE, containing a length in bytes (DWARF's "initial length field") +/// and a value which differentiates CIEs from FDEs and maps FDEs to their corresponding CIEs. The +/// `.eh_frame` format also includes a third variation, here called `.terminator`, which acts as a +/// sentinel for the whole section. +/// +/// `CommonInformationEntry.parse` and `FrameDescriptionEntry.parse` expect the `EntryHeader` to +/// have been parsed first: they accept data stored in the `EntryHeader`, and only read the bytes +/// following this header. +const EntryHeader = union(enum) { + cie: struct { + format: Format, + /// Remaining bytes in the CIE. These are parseable by `CommonInformationEntry.parse`. + bytes_len: u64, + }, + fde: struct { + /// Offset into the section of the corresponding CIE, *including* its entry header. + cie_offset: u64, + /// Remaining bytes in the FDE. These are parseable by `FrameDescriptionEntry.parse`. + bytes_len: u64, + }, + /// The `.eh_frame` format includes terminators which indicate that the last CIE/FDE has been + /// reached. However, `.debug_frame` does not include such a terminator, so the caller must + /// keep track of how many section bytes remain when parsing all entries in `.debug_frame`. + terminator, + + fn read(r: *Reader, header_section_offset: u64, section: Section, endian: Endian) !EntryHeader { + const unit_header = try Dwarf.readUnitHeader(r, endian); + if (unit_header.unit_length == 0) return .terminator; + + // Next is a value which will disambiguate CIEs and FDEs. Annoyingly, LSB Core makes this + // value always 4-byte, whereas DWARF makes it depend on the `dwarf.Format`. + const cie_ptr_or_id_size: u8 = switch (section) { + .eh_frame => 4, + .debug_frame => switch (unit_header.format) { + .@"32" => 4, + .@"64" => 8, + }, + }; + const cie_ptr_or_id = switch (cie_ptr_or_id_size) { + 4 => try r.takeInt(u32, endian), + 8 => try r.takeInt(u64, endian), + else => unreachable, + }; + const remaining_bytes = unit_header.unit_length - cie_ptr_or_id_size; + + // If this entry is a CIE, then `cie_ptr_or_id` will have this value, which is different + // between the DWARF `.debug_frame` section and the LSB Core `.eh_frame` section. + const cie_id: u64 = switch (section) { + .eh_frame => 0, + .debug_frame => switch (unit_header.format) { + .@"32" => maxInt(u32), + .@"64" => maxInt(u64), + }, + }; + if (cie_ptr_or_id == cie_id) { + return .{ .cie = .{ + .format = unit_header.format, + .bytes_len = remaining_bytes, + } }; + } + + // This is an FDE -- `cie_ptr_or_id` points to the associated CIE. Unfortunately, the format + // of that pointer again differs between `.debug_frame` and `.eh_frame`. + const cie_offset = switch (section) { + .eh_frame => try std.math.sub(u64, header_section_offset + unit_header.header_length, cie_ptr_or_id), + .debug_frame => cie_ptr_or_id, + }; + return .{ .fde = .{ + .cie_offset = cie_offset, + .bytes_len = remaining_bytes, + } }; + } +}; + +pub const CommonInformationEntry = struct { + version: u8, + format: Format, + + /// In version 4, CIEs can specify the address size used in the CIE and associated FDEs. + /// This value must be used *only* to parse associated FDEs in `FrameDescriptionEntry.parse`. + addr_size_bytes: u8, + + /// Always 0 for versions which do not specify this (currently all versions other than 4). + segment_selector_size: u8, + + code_alignment_factor: u32, + data_alignment_factor: i32, + return_address_register: u8, + + fde_pointer_enc: EH.PE, + is_signal_frame: bool, + + augmentation_kind: AugmentationKind, + + initial_instructions: []const u8, + + last_row: ?struct { + offset: u64, + cfa: VirtualMachine.CfaRule, + cols: []VirtualMachine.Column, + }, + + pub const AugmentationKind = enum { none, gcc_eh, lsb_z }; + + /// This function expects to read the CIE starting with the version field. + /// The returned struct references memory backed by `cie_bytes`. + /// + /// `length_offset` specifies the offset of this CIE's length field in the + /// .eh_frame / .debug_frame section. + fn parse( + format: Format, + cie_bytes: []const u8, + section: Section, + default_addr_size_bytes: u8, + ) !CommonInformationEntry { + // We only read the data through this reader. + var r: Reader = .fixed(cie_bytes); + + const version = try r.takeByte(); + switch (section) { + .eh_frame => if (version != 1 and version != 3) return error.UnsupportedDwarfVersion, + .debug_frame => if (version != 4) return error.UnsupportedDwarfVersion, + } + + const aug_str = try r.takeSentinel(0); + const aug_kind: AugmentationKind = aug: { + if (aug_str.len == 0) break :aug .none; + if (aug_str[0] == 'z') break :aug .lsb_z; + if (std.mem.eql(u8, aug_str, "eh")) break :aug .gcc_eh; + // We can't finish parsing the CIE if we don't know what its augmentation means. + return bad(); + }; + + switch (aug_kind) { + .none => {}, // no extra data + .lsb_z => {}, // no extra data yet, but there is a bit later + .gcc_eh => try r.discardAll(default_addr_size_bytes), // unsupported data + } + + const addr_size_bytes = if (version == 4) try r.takeByte() else default_addr_size_bytes; + const segment_selector_size: u8 = if (version == 4) try r.takeByte() else 0; + const code_alignment_factor = try r.takeLeb128(u32); + const data_alignment_factor = try r.takeLeb128(i32); + const return_address_register = if (version == 1) try r.takeByte() else try r.takeLeb128(u8); + + // This is where LSB's augmentation might add some data. + const fde_pointer_enc: EH.PE, const is_signal_frame: bool = aug: { + const default_fde_pointer_enc: EH.PE = .{ .type = .absptr, .rel = .abs }; + if (aug_kind != .lsb_z) break :aug .{ default_fde_pointer_enc, false }; + const aug_data_len = try r.takeLeb128(u32); + var aug_data: Reader = .fixed(try r.take(aug_data_len)); + var fde_pointer_enc: EH.PE = default_fde_pointer_enc; + var is_signal_frame = false; + for (aug_str[1..]) |byte| switch (byte) { + 'L' => _ = try aug_data.takeByte(), // we ignore the LSDA pointer + 'P' => { + const enc: EH.PE = @bitCast(try aug_data.takeByte()); + const endian: Endian = .little; // irrelevant because we're discarding the value anyway + _ = try readEhPointerAbs(&aug_data, enc.type, addr_size_bytes, endian); // we ignore the personality routine; endianness is irrelevant since we're discarding + }, + 'R' => fde_pointer_enc = @bitCast(try aug_data.takeByte()), + 'S' => is_signal_frame = true, + 'B', 'G' => {}, + else => return bad(), + }; + break :aug .{ fde_pointer_enc, is_signal_frame }; + }; + + return .{ + .format = format, + .version = version, + .addr_size_bytes = addr_size_bytes, + .segment_selector_size = segment_selector_size, + .code_alignment_factor = code_alignment_factor, + .data_alignment_factor = data_alignment_factor, + .return_address_register = return_address_register, + .fde_pointer_enc = fde_pointer_enc, + .is_signal_frame = is_signal_frame, + .augmentation_kind = aug_kind, + .initial_instructions = r.buffered(), + .last_row = null, + }; + } +}; + +pub const FrameDescriptionEntry = struct { + pc_begin: u64, + pc_range: u64, + instructions: []const u8, + + /// This function expects to read the FDE starting at the PC Begin field. + /// The returned struct references memory backed by `fde_bytes`. + fn parse( + /// The virtual address of the FDE we're parsing, *excluding* its entry header (i.e. the + /// address is after the header). If `fde_bytes` is backed by the memory of a loaded + /// module's `.eh_frame` section, this will equal `fde_bytes.ptr`. + fde_vaddr: u64, + fde_bytes: []const u8, + cie: *const CommonInformationEntry, + endian: Endian, + ) !FrameDescriptionEntry { + if (cie.segment_selector_size != 0) return error.UnsupportedAddrSize; + + var r: Reader = .fixed(fde_bytes); + + const pc_begin = try readEhPointer(&r, cie.fde_pointer_enc, cie.addr_size_bytes, .{ + .pc_rel_base = fde_vaddr, + }, endian); + + // I swear I'm not kidding when I say that PC Range is encoded with `cie.fde_pointer_enc`, but ignoring `rel`. + const pc_range = switch (try readEhPointerAbs(&r, cie.fde_pointer_enc.type, cie.addr_size_bytes, endian)) { + .unsigned => |x| x, + .signed => |x| cast(u64, x) orelse return bad(), + }; + + switch (cie.augmentation_kind) { + .none, .gcc_eh => {}, + .lsb_z => { + // There is augmentation data, but it's irrelevant to us -- it + // only contains the LSDA pointer, which we don't care about. + const aug_data_len = try r.takeLeb128(usize); + _ = try r.discardAll(aug_data_len); + }, + } + + return .{ + .pc_begin = pc_begin, + .pc_range = pc_range, + .instructions = r.buffered(), + }; + } +}; + +/// Builds the CIE list and FDE lookup table if they are not already built. It is required to call +/// this function at least once before calling `lookupPc` or `getFde`. If only `getFde` is needed, +/// then `need_lookup` can be set to `false` to make this function more efficient. +pub fn prepare( + unwind: *Unwind, + gpa: Allocator, + addr_size_bytes: u8, + endian: Endian, + need_lookup: bool, + /// The `__eh_frame` section in Mach-O binaries deviates from the standard `.eh_frame` section + /// in one way which this function needs to be aware of. + is_macho: bool, +) !void { + if (unwind.cie_list.len > 0 and (!need_lookup or unwind.lookup != null)) return; + unwind.cie_list.clearRetainingCapacity(); + + if (is_macho) assert(unwind.lookup == null or unwind.lookup.? != .eh_frame_hdr); + + const section = unwind.frame_section; + + var r: Reader = .fixed(section.bytes); + var fde_list: std.ArrayList(SortedFdeEntry) = .empty; + defer fde_list.deinit(gpa); + + const saw_terminator = while (r.seek < r.buffer.len) { + const entry_offset = r.seek; + switch (try EntryHeader.read(&r, entry_offset, section.id, endian)) { + .cie => |cie_info| { + // We will pre-populate a list of CIEs for efficiency: this avoids work re-parsing + // them every time we look up an FDE. It also lets us cache the result of evaluating + // the CIE's initial CFI instructions, which is useful because in the vast majority + // of cases those instructions will be needed to reach the PC we are unwinding to. + const bytes_len = cast(usize, cie_info.bytes_len) orelse return error.EndOfStream; + const idx = unwind.cie_list.len; + try unwind.cie_list.append(gpa, .{ + .offset = entry_offset, + .cie = try .parse(cie_info.format, try r.take(bytes_len), section.id, addr_size_bytes), + }); + errdefer _ = unwind.cie_list.pop().?; + try VirtualMachine.populateCieLastRow(gpa, &unwind.cie_list.items(.cie)[idx], addr_size_bytes, endian); + continue; + }, + .fde => |fde_info| { + const bytes_len = cast(usize, fde_info.bytes_len) orelse return error.EndOfStream; + if (!need_lookup) { + try r.discardAll(bytes_len); + continue; + } + const cie = unwind.findCie(fde_info.cie_offset) orelse return error.InvalidDebugInfo; + const fde: FrameDescriptionEntry = try .parse(section.vaddr + r.seek, try r.take(bytes_len), cie, endian); + try fde_list.append(gpa, .{ + .pc_begin = fde.pc_begin, + .fde_offset = entry_offset, + }); + }, + .terminator => break true, + } + } else false; + const expect_terminator = switch (section.id) { + .eh_frame => !is_macho, // `.eh_frame` indicates the end of the CIE/FDE list with a sentinel entry, though macOS omits this + .debug_frame => false, // `.debug_frame` uses the section bounds and does not specify a sentinel entry + }; + if (saw_terminator != expect_terminator) return bad(); + + if (need_lookup) { + std.mem.sortUnstable(SortedFdeEntry, fde_list.items, {}, struct { + fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { + ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); + + // This temporary is necessary to avoid an RLS footgun where `lookup` ends up non-null `undefined` on OOM. + const final_fdes = try fde_list.toOwnedSlice(gpa); + unwind.lookup = .{ .sorted_fdes = final_fdes }; + } +} + +fn findCie(unwind: *const Unwind, offset: u64) ?*const CommonInformationEntry { + const offsets = unwind.cie_list.items(.offset); + if (offsets.len == 0) return null; + var start: usize = 0; + var len: usize = offsets.len; + while (len > 1) { + const mid = len / 2; + if (offset < offsets[start + mid]) { + len = mid; + } else { + start += mid; + len -= mid; + } + } + if (offsets[start] != offset) return null; + return &unwind.cie_list.items(.cie)[start]; +} + +/// Given a program counter value, returns the offset of the corresponding FDE, or `null` if no +/// matching FDE was found. The returned offset can be passed to `getFde` to load the data +/// associated with the FDE. +/// +/// Before calling this function, `prepare` must return successfully at least once, to ensure that +/// `unwind.lookup` is populated. +/// +/// The return value may be a false positive. After loading the FDE with `loadFde`, the caller must +/// validate that `pc` is indeed in its range -- if it is not, then no FDE matches `pc`. +pub fn lookupPc(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: Endian) !?u64 { + const sorted_fdes: []const SortedFdeEntry = switch (unwind.lookup.?) { + .eh_frame_hdr => |eh_frame_hdr| { + const fde_vaddr = try eh_frame_hdr.table.findEntry( + eh_frame_hdr.vaddr, + pc, + addr_size_bytes, + endian, + ) orelse return null; + return std.math.sub(u64, fde_vaddr, unwind.frame_section.vaddr) catch bad(); // convert vaddr to offset + }, + .sorted_fdes => |sorted_fdes| sorted_fdes, + }; + if (sorted_fdes.len == 0) return null; + var start: usize = 0; + var len: usize = sorted_fdes.len; + while (len > 1) { + const half = len / 2; + if (pc < sorted_fdes[start + half].pc_begin) { + len = half; + } else { + start += half; + len -= half; + } + } + // If any FDE matches, it'll be the one at `start` (maybe false positive). + return sorted_fdes[start].fde_offset; +} + +/// Get the FDE at a given offset, as well as its associated CIE. This offset typically comes from +/// `lookupPc`. The CFI instructions within can be evaluated with `VirtualMachine`. +pub fn getFde(unwind: *const Unwind, fde_offset: u64, endian: Endian) !struct { *const CommonInformationEntry, FrameDescriptionEntry } { + const section = unwind.frame_section; + + if (fde_offset > section.bytes.len) return error.EndOfStream; + var fde_reader: Reader = .fixed(section.bytes[@intCast(fde_offset)..]); + const fde_info = switch (try EntryHeader.read(&fde_reader, fde_offset, section.id, endian)) { + .fde => |info| info, + .cie, .terminator => return bad(), // This is meant to be an FDE + }; + + const cie = unwind.findCie(fde_info.cie_offset) orelse return error.InvalidDebugInfo; + const fde: FrameDescriptionEntry = try .parse( + section.vaddr + fde_offset + fde_reader.seek, + try fde_reader.take(cast(usize, fde_info.bytes_len) orelse return error.EndOfStream), + cie, + endian, + ); + + return .{ cie, fde }; +} + +const EhPointerContext = struct { + /// The address of the pointer field itself + pc_rel_base: u64, + // These relative addressing modes are only used in specific cases, and + // might not be available / required in all parsing contexts + data_rel_base: ?u64 = null, + text_rel_base: ?u64 = null, + function_rel_base: ?u64 = null, +}; +/// Returns `error.InvalidDebugInfo` if the encoding is `EH.PE.omit`. +fn readEhPointerAbs(r: *Reader, enc_ty: EH.PE.Type, addr_size_bytes: u8, endian: Endian) !union(enum) { + signed: i64, + unsigned: u64, +} { + return switch (enc_ty) { + .absptr => .{ + .unsigned = switch (addr_size_bytes) { + 2 => try r.takeInt(u16, endian), + 4 => try r.takeInt(u32, endian), + 8 => try r.takeInt(u64, endian), + else => return error.UnsupportedAddrSize, + }, + }, + .uleb128 => .{ .unsigned = try r.takeLeb128(u64) }, + .udata2 => .{ .unsigned = try r.takeInt(u16, endian) }, + .udata4 => .{ .unsigned = try r.takeInt(u32, endian) }, + .udata8 => .{ .unsigned = try r.takeInt(u64, endian) }, + .sleb128 => .{ .signed = try r.takeLeb128(i64) }, + .sdata2 => .{ .signed = try r.takeInt(i16, endian) }, + .sdata4 => .{ .signed = try r.takeInt(i32, endian) }, + .sdata8 => .{ .signed = try r.takeInt(i64, endian) }, + else => return bad(), + }; +} +/// Returns `error.InvalidDebugInfo` if the encoding is `EH.PE.omit`. +fn readEhPointer(r: *Reader, enc: EH.PE, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !u64 { + const offset = try readEhPointerAbs(r, enc.type, addr_size_bytes, endian); + if (enc.indirect) return bad(); // GCC extension; not supported + const base: u64 = switch (enc.rel) { + .abs, .aligned => 0, + .pcrel => ctx.pc_rel_base, + .textrel => ctx.text_rel_base orelse return bad(), + .datarel => ctx.data_rel_base orelse return bad(), + .funcrel => ctx.function_rel_base orelse return bad(), + _ => return bad(), + }; + return switch (offset) { + .signed => |s| if (s >= 0) + try std.math.add(u64, base, @intCast(s)) + else + try std.math.sub(u64, base, @intCast(-s)), + // absptr can actually contain signed values in some cases (aarch64 MachO) + .unsigned => |u| u +% base, + }; +} + +/// Like `Reader.fixed`, but when the length of the data is unknown and we just want to allow +/// reading indefinitely. +fn maxSlice(ptr: [*]const u8) []const u8 { + const len = std.math.maxInt(usize) - @intFromPtr(ptr); + return ptr[0..len]; +} + +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const bad = Dwarf.bad; +const cast = std.math.cast; +const DW = std.dwarf; +const Dwarf = std.debug.Dwarf; +const EH = DW.EH; +const Endian = std.builtin.Endian; +const Format = DW.Format; +const maxInt = std.math.maxInt; +const missing = Dwarf.missing; +const Reader = std.Io.Reader; +const std = @import("std"); +const Unwind = @This(); diff --git a/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig new file mode 100644 index 0000000000..319841ea7f --- /dev/null +++ b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig @@ -0,0 +1,459 @@ +//! Virtual machine that evaluates DWARF call frame instructions + +/// See section 6.4.1 of the DWARF5 specification for details on each +pub const RegisterRule = union(enum) { + /// The spec says that the default rule for each column is the undefined rule. + /// However, it also allows ABI / compiler authors to specify alternate defaults, so + /// there is a distinction made here. + default, + undefined, + same_value, + /// offset(N) + offset: i64, + /// val_offset(N) + val_offset: i64, + /// register(R) + register: u8, + /// expression(E) + expression: []const u8, + /// val_expression(E) + val_expression: []const u8, +}; + +pub const CfaRule = union(enum) { + none, + reg_off: struct { + register: u8, + offset: i64, + }, + expression: []const u8, +}; + +/// Each row contains unwinding rules for a set of registers. +pub const Row = struct { + /// Offset from `FrameDescriptionEntry.pc_begin` + offset: u64 = 0, + cfa: CfaRule = .none, + /// The register fields in these columns define the register the rule applies to. + columns: ColumnRange = .{ .start = undefined, .len = 0 }, +}; + +pub const Column = struct { + register: u8, + rule: RegisterRule, +}; + +const ColumnRange = struct { + start: usize, + len: u8, +}; + +columns: std.ArrayList(Column) = .empty, +stack: std.ArrayList(struct { + cfa: CfaRule, + columns: ColumnRange, +}) = .empty, +current_row: Row = .{}, + +/// The result of executing the CIE's initial_instructions +cie_row: ?Row = null, + +pub fn deinit(self: *VirtualMachine, gpa: Allocator) void { + self.stack.deinit(gpa); + self.columns.deinit(gpa); + self.* = undefined; +} + +pub fn reset(self: *VirtualMachine) void { + self.stack.clearRetainingCapacity(); + self.columns.clearRetainingCapacity(); + self.current_row = .{}; + self.cie_row = null; +} + +/// Return a slice backed by the row's non-CFA columns +pub fn rowColumns(self: *const VirtualMachine, row: *const Row) []Column { + if (row.columns.len == 0) return &.{}; + return self.columns.items[row.columns.start..][0..row.columns.len]; +} + +/// Either retrieves or adds a column for `register` (non-CFA) in the current row. +fn getOrAddColumn(self: *VirtualMachine, gpa: Allocator, register: u8) !*Column { + for (self.rowColumns(&self.current_row)) |*c| { + if (c.register == register) return c; + } + + if (self.current_row.columns.len == 0) { + self.current_row.columns.start = self.columns.items.len; + } else { + assert(self.current_row.columns.start + self.current_row.columns.len == self.columns.items.len); + } + self.current_row.columns.len += 1; + + const column = try self.columns.addOne(gpa); + column.* = .{ + .register = register, + .rule = .default, + }; + + return column; +} + +pub fn populateCieLastRow( + gpa: Allocator, + cie: *Unwind.CommonInformationEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, +) !void { + assert(cie.last_row == null); + + var vm: VirtualMachine = .{}; + defer vm.deinit(gpa); + + try vm.evalInstructions( + gpa, + cie, + std.math.maxInt(u64), + cie.initial_instructions, + addr_size_bytes, + endian, + ); + + cie.last_row = .{ + .offset = vm.current_row.offset, + .cfa = vm.current_row.cfa, + .cols = try gpa.dupe(Column, vm.rowColumns(&vm.current_row)), + }; +} + +/// Runs the CIE instructions, then the FDE instructions. Execution halts +/// once the row that corresponds to `pc` is known, and the row is returned. +pub fn runTo( + vm: *VirtualMachine, + gpa: Allocator, + pc: u64, + cie: *const Unwind.CommonInformationEntry, + fde: *const Unwind.FrameDescriptionEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, +) !Row { + assert(vm.cie_row == null); + + const target_offset = pc - fde.pc_begin; + assert(target_offset < fde.pc_range); + + const instruction_bytes: []const u8 = insts: { + if (target_offset < cie.last_row.?.offset) { + break :insts cie.initial_instructions; + } + // This is the more common case: start from the CIE's last row. + assert(vm.columns.items.len == 0); + vm.current_row = .{ + .offset = cie.last_row.?.offset, + .cfa = cie.last_row.?.cfa, + .columns = .{ + .start = 0, + .len = @intCast(cie.last_row.?.cols.len), + }, + }; + try vm.columns.appendSlice(gpa, cie.last_row.?.cols); + vm.cie_row = vm.current_row; + break :insts fde.instructions; + }; + + try vm.evalInstructions( + gpa, + cie, + target_offset, + instruction_bytes, + addr_size_bytes, + endian, + ); + return vm.current_row; +} + +/// Evaluates instructions from `instruction_bytes` until `target_addr` is reached or all +/// instructions have been evaluated. +fn evalInstructions( + vm: *VirtualMachine, + gpa: Allocator, + cie: *const Unwind.CommonInformationEntry, + target_addr: u64, + instruction_bytes: []const u8, + addr_size_bytes: u8, + endian: std.builtin.Endian, +) !void { + var fr: std.Io.Reader = .fixed(instruction_bytes); + while (fr.seek < fr.buffer.len) { + switch (try Instruction.read(&fr, addr_size_bytes, endian)) { + .nop => { + // If there was one nop, there's a good chance we've reached the padding and so + // everything left is a nop, which is represented by a 0 byte. + if (std.mem.allEqual(u8, fr.buffered(), 0)) return; + }, + + .remember_state => { + try vm.stack.append(gpa, .{ + .cfa = vm.current_row.cfa, + .columns = vm.current_row.columns, + }); + const cols_len = vm.current_row.columns.len; + const copy_start = vm.columns.items.len; + assert(vm.current_row.columns.start == copy_start - cols_len); + try vm.columns.ensureUnusedCapacity(gpa, cols_len); // to prevent aliasing issues + vm.columns.appendSliceAssumeCapacity(vm.columns.items[copy_start - cols_len ..]); + vm.current_row.columns.start = copy_start; + }, + .restore_state => { + const restored = vm.stack.pop() orelse return error.InvalidOperation; + vm.columns.shrinkRetainingCapacity(restored.columns.start + restored.columns.len); + + vm.current_row.cfa = restored.cfa; + vm.current_row.columns = restored.columns; + }, + + .advance_loc => |delta| { + const new_addr = vm.current_row.offset + delta * cie.code_alignment_factor; + if (new_addr > target_addr) return; + vm.current_row.offset = new_addr; + }, + .set_loc => |new_addr| { + if (new_addr <= vm.current_row.offset) return error.InvalidOperation; + if (cie.segment_selector_size != 0) return error.InvalidOperation; // unsupported + // TODO: Check cie.segment_selector_size != 0 for DWARFV4 + + if (new_addr > target_addr) return; + vm.current_row.offset = new_addr; + }, + + .register => |reg| { + const column = try vm.getOrAddColumn(gpa, reg.index); + column.rule = switch (reg.rule) { + .restore => rule: { + const cie_row = &(vm.cie_row orelse return error.InvalidOperation); + for (vm.rowColumns(cie_row)) |cie_col| { + if (cie_col.register == reg.index) break :rule cie_col.rule; + } + break :rule .default; + }, + .undefined => .undefined, + .same_value => .same_value, + .offset_uf => |off| .{ .offset = @as(i64, @intCast(off)) * cie.data_alignment_factor }, + .offset_sf => |off| .{ .offset = off * cie.data_alignment_factor }, + .val_offset_uf => |off| .{ .val_offset = @as(i64, @intCast(off)) * cie.data_alignment_factor }, + .val_offset_sf => |off| .{ .val_offset = off * cie.data_alignment_factor }, + .register => |callee_reg| .{ .register = callee_reg }, + .expr => |len| .{ .expression = try takeExprBlock(&fr, len) }, + .val_expr => |len| .{ .val_expression = try takeExprBlock(&fr, len) }, + }; + }, + .def_cfa => |cfa| vm.current_row.cfa = .{ .reg_off = .{ + .register = cfa.register, + .offset = @intCast(cfa.offset), + } }, + .def_cfa_sf => |cfa| vm.current_row.cfa = .{ .reg_off = .{ + .register = cfa.register, + .offset = cfa.offset_sf * cie.data_alignment_factor, + } }, + .def_cfa_reg => |register| switch (vm.current_row.cfa) { + .none, .expression => return error.InvalidOperation, + .reg_off => |*ro| ro.register = register, + }, + .def_cfa_offset => |offset| switch (vm.current_row.cfa) { + .none, .expression => return error.InvalidOperation, + .reg_off => |*ro| ro.offset = @intCast(offset), + }, + .def_cfa_offset_sf => |offset_sf| switch (vm.current_row.cfa) { + .none, .expression => return error.InvalidOperation, + .reg_off => |*ro| ro.offset = offset_sf * cie.data_alignment_factor, + }, + .def_cfa_expr => |len| { + vm.current_row.cfa = .{ .expression = try takeExprBlock(&fr, len) }; + }, + } + } +} + +fn takeExprBlock(r: *std.Io.Reader, len: usize) error{ ReadFailed, InvalidOperand }![]const u8 { + return r.take(len) catch |err| switch (err) { + error.ReadFailed => |e| return e, + error.EndOfStream => return error.InvalidOperand, + }; +} + +const OpcodeByte = packed struct(u8) { + low: packed union { + operand: u6, + extended: enum(u6) { + nop = 0, + set_loc = 1, + advance_loc1 = 2, + advance_loc2 = 3, + advance_loc4 = 4, + offset_extended = 5, + restore_extended = 6, + undefined = 7, + same_value = 8, + register = 9, + remember_state = 10, + restore_state = 11, + def_cfa = 12, + def_cfa_register = 13, + def_cfa_offset = 14, + def_cfa_expression = 15, + expression = 16, + offset_extended_sf = 17, + def_cfa_sf = 18, + def_cfa_offset_sf = 19, + val_offset = 20, + val_offset_sf = 21, + val_expression = 22, + _, + }, + }, + opcode: enum(u2) { + extended = 0, + advance_loc = 1, + offset = 2, + restore = 3, + }, +}; + +pub const Instruction = union(enum) { + nop, + remember_state, + restore_state, + advance_loc: u32, + set_loc: u64, + + register: struct { + index: u8, + rule: union(enum) { + restore, // restore from cie + undefined, + same_value, + offset_uf: u64, + offset_sf: i64, + val_offset_uf: u64, + val_offset_sf: i64, + register: u8, + /// Value is the number of bytes in the DWARF expression, which the caller must read. + expr: usize, + /// Value is the number of bytes in the DWARF expression, which the caller must read. + val_expr: usize, + }, + }, + + def_cfa: struct { + register: u8, + offset: u64, + }, + def_cfa_sf: struct { + register: u8, + offset_sf: i64, + }, + def_cfa_reg: u8, + def_cfa_offset: u64, + def_cfa_offset_sf: i64, + /// Value is the number of bytes in the DWARF expression, which the caller must read. + def_cfa_expr: usize, + + pub fn read( + reader: *std.Io.Reader, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Instruction { + const inst: OpcodeByte = @bitCast(try reader.takeByte()); + return switch (inst.opcode) { + .advance_loc => .{ .advance_loc = inst.low.operand }, + .offset => .{ .register = .{ + .index = inst.low.operand, + .rule = .{ .offset_uf = try reader.takeLeb128(u64) }, + } }, + .restore => .{ .register = .{ + .index = inst.low.operand, + .rule = .restore, + } }, + .extended => switch (inst.low.extended) { + .nop => .nop, + .remember_state => .remember_state, + .restore_state => .restore_state, + .advance_loc1 => .{ .advance_loc = try reader.takeByte() }, + .advance_loc2 => .{ .advance_loc = try reader.takeInt(u16, endian) }, + .advance_loc4 => .{ .advance_loc = try reader.takeInt(u32, endian) }, + .set_loc => .{ .set_loc = switch (addr_size_bytes) { + 2 => try reader.takeInt(u16, endian), + 4 => try reader.takeInt(u32, endian), + 8 => try reader.takeInt(u64, endian), + else => return error.UnsupportedAddrSize, + } }, + + .offset_extended => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .offset_uf = try reader.takeLeb128(u64) }, + } }, + .offset_extended_sf => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .offset_sf = try reader.takeLeb128(i64) }, + } }, + .restore_extended => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .restore, + } }, + .undefined => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .undefined, + } }, + .same_value => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .same_value, + } }, + .register => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .register = try reader.takeLeb128(u8) }, + } }, + .val_offset => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .val_offset_uf = try reader.takeLeb128(u64) }, + } }, + .val_offset_sf => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .val_offset_sf = try reader.takeLeb128(i64) }, + } }, + .expression => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .expr = try reader.takeLeb128(usize) }, + } }, + .val_expression => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .val_expr = try reader.takeLeb128(usize) }, + } }, + + .def_cfa => .{ .def_cfa = .{ + .register = try reader.takeLeb128(u8), + .offset = try reader.takeLeb128(u64), + } }, + .def_cfa_sf => .{ .def_cfa_sf = .{ + .register = try reader.takeLeb128(u8), + .offset_sf = try reader.takeLeb128(i64), + } }, + .def_cfa_register => .{ .def_cfa_reg = try reader.takeLeb128(u8) }, + .def_cfa_offset => .{ .def_cfa_offset = try reader.takeLeb128(u64) }, + .def_cfa_offset_sf => .{ .def_cfa_offset_sf = try reader.takeLeb128(i64) }, + .def_cfa_expression => .{ .def_cfa_expr = try reader.takeLeb128(usize) }, + + _ => switch (@intFromEnum(inst.low.extended)) { + 0x1C...0x3F => return error.UnimplementedUserOpcode, + else => return error.InvalidOpcode, + }, + }, + }; + } +}; + +const std = @import("../../../std.zig"); +const assert = std.debug.assert; +const Allocator = std.mem.Allocator; +const Unwind = std.debug.Dwarf.Unwind; + +const VirtualMachine = @This(); diff --git a/lib/std/debug/Dwarf/abi.zig b/lib/std/debug/Dwarf/abi.zig deleted file mode 100644 index c5e509c7b0..0000000000 --- a/lib/std/debug/Dwarf/abi.zig +++ /dev/null @@ -1,351 +0,0 @@ -const builtin = @import("builtin"); - -const std = @import("../../std.zig"); -const mem = std.mem; -const posix = std.posix; -const Arch = std.Target.Cpu.Arch; - -/// Tells whether unwinding for this target is supported by the Dwarf standard. -/// -/// See also `std.debug.SelfInfo.supportsUnwinding` which tells whether the Zig -/// standard library has a working implementation of unwinding for this target. -pub fn supportsUnwinding(target: *const std.Target) bool { - return switch (target.cpu.arch) { - .amdgcn, - .nvptx, - .nvptx64, - .spirv32, - .spirv64, - => false, - - // Enabling this causes relocation errors such as: - // error: invalid relocation type R_RISCV_SUB32 at offset 0x20 - .riscv64, .riscv64be, .riscv32, .riscv32be => false, - - // Conservative guess. Feel free to update this logic with any targets - // that are known to not support Dwarf unwinding. - else => true, - }; -} - -/// Returns `null` for CPU architectures without an instruction pointer register. -pub fn ipRegNum(arch: Arch) ?u8 { - return switch (arch) { - .x86 => 8, - .x86_64 => 16, - .arm, .armeb, .thumb, .thumbeb => 15, - .aarch64, .aarch64_be => 32, - else => null, - }; -} - -pub fn fpRegNum(arch: Arch, reg_context: RegisterContext) u8 { - return switch (arch) { - // GCC on OS X historically did the opposite of ELF for these registers - // (only in .eh_frame), and that is now the convention for MachO - .x86 => if (reg_context.eh_frame and reg_context.is_macho) 4 else 5, - .x86_64 => 6, - .arm, .armeb, .thumb, .thumbeb => 11, - .aarch64, .aarch64_be => 29, - else => unreachable, - }; -} - -pub fn spRegNum(arch: Arch, reg_context: RegisterContext) u8 { - return switch (arch) { - .x86 => if (reg_context.eh_frame and reg_context.is_macho) 5 else 4, - .x86_64 => 7, - .arm, .armeb, .thumb, .thumbeb => 13, - .aarch64, .aarch64_be => 31, - else => unreachable, - }; -} - -pub const RegisterContext = struct { - eh_frame: bool, - is_macho: bool, -}; - -pub const RegBytesError = error{ - InvalidRegister, - UnimplementedArch, - UnimplementedOs, - RegisterContextRequired, - ThreadContextNotSupported, -}; - -/// Returns a slice containing the backing storage for `reg_number`. -/// -/// This function assumes the Dwarf information corresponds not necessarily to -/// the current executable, but at least with a matching CPU architecture and -/// OS. It is planned to lift this limitation with a future enhancement. -/// -/// `reg_context` describes in what context the register number is used, as it can have different -/// meanings depending on the DWARF container. It is only required when getting the stack or -/// frame pointer register on some architectures. -pub fn regBytes( - thread_context_ptr: *std.debug.ThreadContext, - reg_number: u8, - reg_context: ?RegisterContext, -) RegBytesError![]u8 { - if (builtin.os.tag == .windows) { - return switch (builtin.cpu.arch) { - .x86 => switch (reg_number) { - 0 => mem.asBytes(&thread_context_ptr.Eax), - 1 => mem.asBytes(&thread_context_ptr.Ecx), - 2 => mem.asBytes(&thread_context_ptr.Edx), - 3 => mem.asBytes(&thread_context_ptr.Ebx), - 4 => mem.asBytes(&thread_context_ptr.Esp), - 5 => mem.asBytes(&thread_context_ptr.Ebp), - 6 => mem.asBytes(&thread_context_ptr.Esi), - 7 => mem.asBytes(&thread_context_ptr.Edi), - 8 => mem.asBytes(&thread_context_ptr.Eip), - 9 => mem.asBytes(&thread_context_ptr.EFlags), - 10 => mem.asBytes(&thread_context_ptr.SegCs), - 11 => mem.asBytes(&thread_context_ptr.SegSs), - 12 => mem.asBytes(&thread_context_ptr.SegDs), - 13 => mem.asBytes(&thread_context_ptr.SegEs), - 14 => mem.asBytes(&thread_context_ptr.SegFs), - 15 => mem.asBytes(&thread_context_ptr.SegGs), - else => error.InvalidRegister, - }, - .x86_64 => switch (reg_number) { - 0 => mem.asBytes(&thread_context_ptr.Rax), - 1 => mem.asBytes(&thread_context_ptr.Rdx), - 2 => mem.asBytes(&thread_context_ptr.Rcx), - 3 => mem.asBytes(&thread_context_ptr.Rbx), - 4 => mem.asBytes(&thread_context_ptr.Rsi), - 5 => mem.asBytes(&thread_context_ptr.Rdi), - 6 => mem.asBytes(&thread_context_ptr.Rbp), - 7 => mem.asBytes(&thread_context_ptr.Rsp), - 8 => mem.asBytes(&thread_context_ptr.R8), - 9 => mem.asBytes(&thread_context_ptr.R9), - 10 => mem.asBytes(&thread_context_ptr.R10), - 11 => mem.asBytes(&thread_context_ptr.R11), - 12 => mem.asBytes(&thread_context_ptr.R12), - 13 => mem.asBytes(&thread_context_ptr.R13), - 14 => mem.asBytes(&thread_context_ptr.R14), - 15 => mem.asBytes(&thread_context_ptr.R15), - 16 => mem.asBytes(&thread_context_ptr.Rip), - else => error.InvalidRegister, - }, - .aarch64, .aarch64_be => switch (reg_number) { - 0...30 => mem.asBytes(&thread_context_ptr.DUMMYUNIONNAME.X[reg_number]), - 31 => mem.asBytes(&thread_context_ptr.Sp), - 32 => mem.asBytes(&thread_context_ptr.Pc), - else => error.InvalidRegister, - }, - else => error.UnimplementedArch, - }; - } - - if (!std.debug.have_ucontext) return error.ThreadContextNotSupported; - - const ucontext_ptr = thread_context_ptr; - return switch (builtin.cpu.arch) { - .x86 => switch (builtin.os.tag) { - .linux, .netbsd, .solaris, .illumos => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EAX]), - 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ECX]), - 2 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EDX]), - 3 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EBX]), - 4...5 => if (reg_context) |r| bytes: { - if (reg_number == 4) { - break :bytes if (r.eh_frame and r.is_macho) - mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EBP]) - else - mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ESP]); - } else { - break :bytes if (r.eh_frame and r.is_macho) - mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ESP]) - else - mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EBP]); - } - } else error.RegisterContextRequired, - 6 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ESI]), - 7 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EDI]), - 8 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EIP]), - 9 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EFL]), - 10 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.CS]), - 11 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.SS]), - 12 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.DS]), - 13 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ES]), - 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.FS]), - 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.GS]), - 16...23 => error.InvalidRegister, // TODO: Support loading ST0-ST7 from mcontext.fpregs - 32...39 => error.InvalidRegister, // TODO: Support loading XMM0-XMM7 from mcontext.fpregs - else => error.InvalidRegister, - }, - else => error.UnimplementedOs, - }, - .x86_64 => switch (builtin.os.tag) { - .linux, .solaris, .illumos => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RAX]), - 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RDX]), - 2 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RCX]), - 3 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RBX]), - 4 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RSI]), - 5 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RDI]), - 6 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RBP]), - 7 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RSP]), - 8 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R8]), - 9 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R9]), - 10 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R10]), - 11 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R11]), - 12 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R12]), - 13 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R13]), - 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R14]), - 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R15]), - 16 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RIP]), - 17...32 => |i| if (builtin.os.tag.isSolarish()) - mem.asBytes(&ucontext_ptr.mcontext.fpregs.chip_state.xmm[i - 17]) - else - mem.asBytes(&ucontext_ptr.mcontext.fpregs.xmm[i - 17]), - else => error.InvalidRegister, - }, - .freebsd => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.mcontext.rax), - 1 => mem.asBytes(&ucontext_ptr.mcontext.rdx), - 2 => mem.asBytes(&ucontext_ptr.mcontext.rcx), - 3 => mem.asBytes(&ucontext_ptr.mcontext.rbx), - 4 => mem.asBytes(&ucontext_ptr.mcontext.rsi), - 5 => mem.asBytes(&ucontext_ptr.mcontext.rdi), - 6 => mem.asBytes(&ucontext_ptr.mcontext.rbp), - 7 => mem.asBytes(&ucontext_ptr.mcontext.rsp), - 8 => mem.asBytes(&ucontext_ptr.mcontext.r8), - 9 => mem.asBytes(&ucontext_ptr.mcontext.r9), - 10 => mem.asBytes(&ucontext_ptr.mcontext.r10), - 11 => mem.asBytes(&ucontext_ptr.mcontext.r11), - 12 => mem.asBytes(&ucontext_ptr.mcontext.r12), - 13 => mem.asBytes(&ucontext_ptr.mcontext.r13), - 14 => mem.asBytes(&ucontext_ptr.mcontext.r14), - 15 => mem.asBytes(&ucontext_ptr.mcontext.r15), - 16 => mem.asBytes(&ucontext_ptr.mcontext.rip), - // TODO: Extract xmm state from mcontext.fpstate? - else => error.InvalidRegister, - }, - .openbsd => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.sc_rax), - 1 => mem.asBytes(&ucontext_ptr.sc_rdx), - 2 => mem.asBytes(&ucontext_ptr.sc_rcx), - 3 => mem.asBytes(&ucontext_ptr.sc_rbx), - 4 => mem.asBytes(&ucontext_ptr.sc_rsi), - 5 => mem.asBytes(&ucontext_ptr.sc_rdi), - 6 => mem.asBytes(&ucontext_ptr.sc_rbp), - 7 => mem.asBytes(&ucontext_ptr.sc_rsp), - 8 => mem.asBytes(&ucontext_ptr.sc_r8), - 9 => mem.asBytes(&ucontext_ptr.sc_r9), - 10 => mem.asBytes(&ucontext_ptr.sc_r10), - 11 => mem.asBytes(&ucontext_ptr.sc_r11), - 12 => mem.asBytes(&ucontext_ptr.sc_r12), - 13 => mem.asBytes(&ucontext_ptr.sc_r13), - 14 => mem.asBytes(&ucontext_ptr.sc_r14), - 15 => mem.asBytes(&ucontext_ptr.sc_r15), - 16 => mem.asBytes(&ucontext_ptr.sc_rip), - // TODO: Extract xmm state from sc_fpstate? - else => error.InvalidRegister, - }, - .macos, .ios => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.mcontext.ss.rax), - 1 => mem.asBytes(&ucontext_ptr.mcontext.ss.rdx), - 2 => mem.asBytes(&ucontext_ptr.mcontext.ss.rcx), - 3 => mem.asBytes(&ucontext_ptr.mcontext.ss.rbx), - 4 => mem.asBytes(&ucontext_ptr.mcontext.ss.rsi), - 5 => mem.asBytes(&ucontext_ptr.mcontext.ss.rdi), - 6 => mem.asBytes(&ucontext_ptr.mcontext.ss.rbp), - 7 => mem.asBytes(&ucontext_ptr.mcontext.ss.rsp), - 8 => mem.asBytes(&ucontext_ptr.mcontext.ss.r8), - 9 => mem.asBytes(&ucontext_ptr.mcontext.ss.r9), - 10 => mem.asBytes(&ucontext_ptr.mcontext.ss.r10), - 11 => mem.asBytes(&ucontext_ptr.mcontext.ss.r11), - 12 => mem.asBytes(&ucontext_ptr.mcontext.ss.r12), - 13 => mem.asBytes(&ucontext_ptr.mcontext.ss.r13), - 14 => mem.asBytes(&ucontext_ptr.mcontext.ss.r14), - 15 => mem.asBytes(&ucontext_ptr.mcontext.ss.r15), - 16 => mem.asBytes(&ucontext_ptr.mcontext.ss.rip), - else => error.InvalidRegister, - }, - else => error.UnimplementedOs, - }, - .arm, .armeb, .thumb, .thumbeb => switch (builtin.os.tag) { - .linux => switch (reg_number) { - 0 => mem.asBytes(&ucontext_ptr.mcontext.arm_r0), - 1 => mem.asBytes(&ucontext_ptr.mcontext.arm_r1), - 2 => mem.asBytes(&ucontext_ptr.mcontext.arm_r2), - 3 => mem.asBytes(&ucontext_ptr.mcontext.arm_r3), - 4 => mem.asBytes(&ucontext_ptr.mcontext.arm_r4), - 5 => mem.asBytes(&ucontext_ptr.mcontext.arm_r5), - 6 => mem.asBytes(&ucontext_ptr.mcontext.arm_r6), - 7 => mem.asBytes(&ucontext_ptr.mcontext.arm_r7), - 8 => mem.asBytes(&ucontext_ptr.mcontext.arm_r8), - 9 => mem.asBytes(&ucontext_ptr.mcontext.arm_r9), - 10 => mem.asBytes(&ucontext_ptr.mcontext.arm_r10), - 11 => mem.asBytes(&ucontext_ptr.mcontext.arm_fp), - 12 => mem.asBytes(&ucontext_ptr.mcontext.arm_ip), - 13 => mem.asBytes(&ucontext_ptr.mcontext.arm_sp), - 14 => mem.asBytes(&ucontext_ptr.mcontext.arm_lr), - 15 => mem.asBytes(&ucontext_ptr.mcontext.arm_pc), - // CPSR is not allocated a register number (See: https://github.com/ARM-software/abi-aa/blob/main/aadwarf32/aadwarf32.rst, Section 4.1) - else => error.InvalidRegister, - }, - else => error.UnimplementedOs, - }, - .aarch64, .aarch64_be => switch (builtin.os.tag) { - .macos, .ios, .watchos => switch (reg_number) { - 0...28 => mem.asBytes(&ucontext_ptr.mcontext.ss.regs[reg_number]), - 29 => mem.asBytes(&ucontext_ptr.mcontext.ss.fp), - 30 => mem.asBytes(&ucontext_ptr.mcontext.ss.lr), - 31 => mem.asBytes(&ucontext_ptr.mcontext.ss.sp), - 32 => mem.asBytes(&ucontext_ptr.mcontext.ss.pc), - - // TODO: Find storage for this state - //34 => mem.asBytes(&ucontext_ptr.ra_sign_state), - - // V0-V31 - 64...95 => mem.asBytes(&ucontext_ptr.mcontext.ns.q[reg_number - 64]), - else => error.InvalidRegister, - }, - .netbsd => switch (reg_number) { - 0...34 => mem.asBytes(&ucontext_ptr.mcontext.gregs[reg_number]), - else => error.InvalidRegister, - }, - .freebsd => switch (reg_number) { - 0...29 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.x[reg_number]), - 30 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.lr), - 31 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.sp), - - // TODO: This seems wrong, but it was in the previous debug.zig code for mapping PC, check this - 32 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.elr), - - else => error.InvalidRegister, - }, - .openbsd => switch (reg_number) { - 0...30 => mem.asBytes(&ucontext_ptr.sc_x[reg_number]), - 31 => mem.asBytes(&ucontext_ptr.sc_sp), - 32 => mem.asBytes(&ucontext_ptr.sc_lr), - 33 => mem.asBytes(&ucontext_ptr.sc_elr), - 34 => mem.asBytes(&ucontext_ptr.sc_spsr), - else => error.InvalidRegister, - }, - else => switch (reg_number) { - 0...30 => mem.asBytes(&ucontext_ptr.mcontext.regs[reg_number]), - 31 => mem.asBytes(&ucontext_ptr.mcontext.sp), - 32 => mem.asBytes(&ucontext_ptr.mcontext.pc), - else => error.InvalidRegister, - }, - }, - else => error.UnimplementedArch, - }; -} - -/// Returns a pointer to a register stored in a ThreadContext, preserving the -/// pointer attributes of the context. -pub fn regValueNative( - thread_context_ptr: *std.debug.ThreadContext, - reg_number: u8, - reg_context: ?RegisterContext, -) !*align(1) usize { - const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context); - if (@sizeOf(usize) != reg_bytes.len) return error.IncompatibleRegisterSize; - return mem.bytesAsValue(usize, reg_bytes[0..@sizeOf(usize)]); -} diff --git a/lib/std/debug/Dwarf/call_frame.zig b/lib/std/debug/Dwarf/call_frame.zig deleted file mode 100644 index f78ed4378b..0000000000 --- a/lib/std/debug/Dwarf/call_frame.zig +++ /dev/null @@ -1,292 +0,0 @@ -const builtin = @import("builtin"); -const std = @import("../../std.zig"); -const mem = std.mem; -const debug = std.debug; -const leb = std.leb; -const DW = std.dwarf; -const abi = std.debug.Dwarf.abi; -const assert = std.debug.assert; -const native_endian = builtin.cpu.arch.endian(); - -/// TODO merge with std.dwarf.CFA -const Opcode = enum(u8) { - advance_loc = 0x1 << 6, - offset = 0x2 << 6, - restore = 0x3 << 6, - - nop = 0x00, - set_loc = 0x01, - advance_loc1 = 0x02, - advance_loc2 = 0x03, - advance_loc4 = 0x04, - offset_extended = 0x05, - restore_extended = 0x06, - undefined = 0x07, - same_value = 0x08, - register = 0x09, - remember_state = 0x0a, - restore_state = 0x0b, - def_cfa = 0x0c, - def_cfa_register = 0x0d, - def_cfa_offset = 0x0e, - def_cfa_expression = 0x0f, - expression = 0x10, - offset_extended_sf = 0x11, - def_cfa_sf = 0x12, - def_cfa_offset_sf = 0x13, - val_offset = 0x14, - val_offset_sf = 0x15, - val_expression = 0x16, - - // These opcodes encode an operand in the lower 6 bits of the opcode itself - pub const lo_inline = @intFromEnum(Opcode.advance_loc); - pub const hi_inline = @intFromEnum(Opcode.restore) | 0b111111; - - // These opcodes are trailed by zero or more operands - pub const lo_reserved = @intFromEnum(Opcode.nop); - pub const hi_reserved = @intFromEnum(Opcode.val_expression); - - // Vendor-specific opcodes - pub const lo_user = 0x1c; - pub const hi_user = 0x3f; -}; - -fn readBlock(reader: *std.Io.Reader) ![]const u8 { - const block_len = try reader.takeLeb128(usize); - return reader.take(block_len); -} - -pub const Instruction = union(Opcode) { - advance_loc: struct { - delta: u8, - }, - offset: struct { - register: u8, - offset: u64, - }, - restore: struct { - register: u8, - }, - nop: void, - set_loc: struct { - address: u64, - }, - advance_loc1: struct { - delta: u8, - }, - advance_loc2: struct { - delta: u16, - }, - advance_loc4: struct { - delta: u32, - }, - offset_extended: struct { - register: u8, - offset: u64, - }, - restore_extended: struct { - register: u8, - }, - undefined: struct { - register: u8, - }, - same_value: struct { - register: u8, - }, - register: struct { - register: u8, - target_register: u8, - }, - remember_state: void, - restore_state: void, - def_cfa: struct { - register: u8, - offset: u64, - }, - def_cfa_register: struct { - register: u8, - }, - def_cfa_offset: struct { - offset: u64, - }, - def_cfa_expression: struct { - block: []const u8, - }, - expression: struct { - register: u8, - block: []const u8, - }, - offset_extended_sf: struct { - register: u8, - offset: i64, - }, - def_cfa_sf: struct { - register: u8, - offset: i64, - }, - def_cfa_offset_sf: struct { - offset: i64, - }, - val_offset: struct { - register: u8, - offset: u64, - }, - val_offset_sf: struct { - register: u8, - offset: i64, - }, - val_expression: struct { - register: u8, - block: []const u8, - }, - - pub fn read( - reader: *std.Io.Reader, - addr_size_bytes: u8, - endian: std.builtin.Endian, - ) !Instruction { - switch (try reader.takeByte()) { - Opcode.lo_inline...Opcode.hi_inline => |opcode| { - const e: Opcode = @enumFromInt(opcode & 0b11000000); - const value: u6 = @intCast(opcode & 0b111111); - return switch (e) { - .advance_loc => .{ - .advance_loc = .{ .delta = value }, - }, - .offset => .{ - .offset = .{ - .register = value, - .offset = try reader.takeLeb128(u64), - }, - }, - .restore => .{ - .restore = .{ .register = value }, - }, - else => unreachable, - }; - }, - Opcode.lo_reserved...Opcode.hi_reserved => |opcode| { - const e: Opcode = @enumFromInt(opcode); - return switch (e) { - .advance_loc, - .offset, - .restore, - => unreachable, - .nop => .{ .nop = {} }, - .set_loc => .{ - .set_loc = .{ - .address = switch (addr_size_bytes) { - 2 => try reader.takeInt(u16, endian), - 4 => try reader.takeInt(u32, endian), - 8 => try reader.takeInt(u64, endian), - else => return error.InvalidAddrSize, - }, - }, - }, - .advance_loc1 => .{ - .advance_loc1 = .{ .delta = try reader.takeByte() }, - }, - .advance_loc2 => .{ - .advance_loc2 = .{ .delta = try reader.takeInt(u16, endian) }, - }, - .advance_loc4 => .{ - .advance_loc4 = .{ .delta = try reader.takeInt(u32, endian) }, - }, - .offset_extended => .{ - .offset_extended = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(u64), - }, - }, - .restore_extended => .{ - .restore_extended = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .undefined => .{ - .undefined = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .same_value => .{ - .same_value = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .register => .{ - .register = .{ - .register = try reader.takeLeb128(u8), - .target_register = try reader.takeLeb128(u8), - }, - }, - .remember_state => .{ .remember_state = {} }, - .restore_state => .{ .restore_state = {} }, - .def_cfa => .{ - .def_cfa = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(u64), - }, - }, - .def_cfa_register => .{ - .def_cfa_register = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .def_cfa_offset => .{ - .def_cfa_offset = .{ - .offset = try reader.takeLeb128(u64), - }, - }, - .def_cfa_expression => .{ - .def_cfa_expression = .{ - .block = try readBlock(reader), - }, - }, - .expression => .{ - .expression = .{ - .register = try reader.takeLeb128(u8), - .block = try readBlock(reader), - }, - }, - .offset_extended_sf => .{ - .offset_extended_sf = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(i64), - }, - }, - .def_cfa_sf => .{ - .def_cfa_sf = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(i64), - }, - }, - .def_cfa_offset_sf => .{ - .def_cfa_offset_sf = .{ - .offset = try reader.takeLeb128(i64), - }, - }, - .val_offset => .{ - .val_offset = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(u64), - }, - }, - .val_offset_sf => .{ - .val_offset_sf = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(i64), - }, - }, - .val_expression => .{ - .val_expression = .{ - .register = try reader.takeLeb128(u8), - .block = try readBlock(reader), - }, - }, - }; - }, - Opcode.lo_user...Opcode.hi_user => return error.UnimplementedUserOpcode, - else => return error.InvalidOpcode, - } - } -}; diff --git a/lib/std/debug/Dwarf/expression.zig b/lib/std/debug/Dwarf/expression.zig index 68b49587c2..4460bd2bc2 100644 --- a/lib/std/debug/Dwarf/expression.zig +++ b/lib/std/debug/Dwarf/expression.zig @@ -5,12 +5,17 @@ const native_endian = native_arch.endian(); const std = @import("std"); const leb = std.leb; const OP = std.dwarf.OP; -const abi = std.debug.Dwarf.abi; const mem = std.mem; const assert = std.debug.assert; const testing = std.testing; const Writer = std.Io.Writer; +const regNative = std.debug.Dwarf.SelfUnwinder.regNative; + +const ip_reg_num = std.debug.Dwarf.ipRegNum(native_arch).?; +const fp_reg_num = std.debug.Dwarf.fpRegNum(native_arch); +const sp_reg_num = std.debug.Dwarf.spRegNum(native_arch); + /// Expressions can be evaluated in different contexts, each requiring its own set of inputs. /// Callers should specify all the fields relevant to their context. If a field is required /// by the expression and it isn't in the context, error.IncompleteExpressionContext is returned. @@ -23,9 +28,7 @@ pub const Context = struct { object_address: ?*const anyopaque = null, /// .debug_addr section debug_addr: ?[]const u8 = null, - /// Thread context - thread_context: ?*std.debug.ThreadContext = null, - reg_context: ?abi.RegisterContext = null, + cpu_context: ?*std.debug.cpu_context.Native = null, /// Call frame address, if in a CFI context cfa: ?usize = null, /// This expression is a sub-expression from an OP.entry_value instruction @@ -62,7 +65,9 @@ pub const Error = error{ InvalidTypeLength, TruncatedIntegralType, -} || abi.RegBytesError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero, ReadFailed }; + + IncompatibleRegisterSize, +} || std.debug.cpu_context.DwarfRegisterError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero, ReadFailed }; /// A stack machine that can decode and run DWARF expressions. /// Expressions can be decoded for non-native address size and endianness, @@ -369,29 +374,20 @@ pub fn StackMachine(comptime options: Options) type { OP.breg0...OP.breg31, OP.bregx, => { - if (context.thread_context == null) return error.IncompleteExpressionContext; - - const base_register = operand.?.base_register; - var value: i64 = @intCast(mem.readInt(usize, (try abi.regBytes( - context.thread_context.?, - base_register.base_register, - context.reg_context, - ))[0..@sizeOf(usize)], native_endian)); - value += base_register.offset; - try self.stack.append(allocator, .{ .generic = @intCast(value) }); + const cpu_context = context.cpu_context orelse return error.IncompleteExpressionContext; + + const br = operand.?.base_register; + const value: i64 = @intCast((try regNative(cpu_context, br.base_register)).*); + try self.stack.append(allocator, .{ .generic = @intCast(value + br.offset) }); }, OP.regval_type => { - const register_type = operand.?.register_type; - const value = mem.readInt(usize, (try abi.regBytes( - context.thread_context.?, - register_type.register, - context.reg_context, - ))[0..@sizeOf(usize)], native_endian); + const cpu_context = context.cpu_context orelse return error.IncompleteExpressionContext; + const rt = operand.?.register_type; try self.stack.append(allocator, .{ .regval_type = .{ - .type_offset = register_type.type_offset, + .type_offset = rt.type_offset, .type_size = @sizeOf(addr_type), - .value = value, + .value = (try regNative(cpu_context, rt.register)).*, }, }); }, @@ -734,14 +730,14 @@ pub fn StackMachine(comptime options: Options) type { // TODO: The spec states that this sub-expression needs to observe the state (ie. registers) // as it was upon entering the current subprogram. If this isn't being called at the - // end of a frame unwind operation, an additional ThreadContext with this state will be needed. + // end of a frame unwind operation, an additional cpu_context.Native with this state will be needed. if (isOpcodeRegisterLocation(block[0])) { - if (context.thread_context == null) return error.IncompleteExpressionContext; + const cpu_context = context.cpu_context orelse return error.IncompleteExpressionContext; var block_stream: std.Io.Reader = .fixed(block); const register = (try readOperand(&block_stream, block[0], context)).?.register; - const value = mem.readInt(usize, (try abi.regBytes(context.thread_context.?, register, context.reg_context))[0..@sizeOf(usize)], native_endian); + const value = (try regNative(cpu_context, register)).*; try self.stack.append(allocator, .{ .generic = value }); } else { var stack_machine: Self = .{}; @@ -1149,55 +1145,39 @@ test "basics" { } // Register values - if (@sizeOf(std.debug.ThreadContext) != 0) { + if (std.debug.cpu_context.Native != noreturn) { stack_machine.reset(); program.clearRetainingCapacity(); - const reg_context = abi.RegisterContext{ - .eh_frame = true, - .is_macho = builtin.os.tag == .macos, - }; - var thread_context: std.debug.ThreadContext = undefined; - std.debug.relocateContext(&thread_context); + var cpu_context: std.debug.cpu_context.Native = undefined; const context = Context{ - .thread_context = &thread_context, - .reg_context = reg_context, + .cpu_context = &cpu_context, }; - // Only test register operations on arch / os that have them implemented - if (abi.regBytes(&thread_context, 0, reg_context)) |reg_bytes| { - - // TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it - - mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); - (try abi.regValueNative(&thread_context, abi.fpRegNum(native_arch, reg_context), reg_context)).* = 1; - (try abi.regValueNative(&thread_context, abi.spRegNum(native_arch, reg_context), reg_context)).* = 2; - (try abi.regValueNative(&thread_context, abi.ipRegNum(native_arch).?, reg_context)).* = 3; - - try b.writeBreg(writer, abi.fpRegNum(native_arch, reg_context), @as(usize, 100)); - try b.writeBreg(writer, abi.spRegNum(native_arch, reg_context), @as(usize, 200)); - try b.writeBregx(writer, abi.ipRegNum(native_arch).?, @as(usize, 300)); - try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400)); - - _ = try stack_machine.run(program.written(), allocator, context, 0); - - const regval_type = stack_machine.stack.pop().?.regval_type; - try testing.expectEqual(@as(usize, 400), regval_type.type_offset); - try testing.expectEqual(@as(u8, @sizeOf(usize)), regval_type.type_size); - try testing.expectEqual(@as(usize, 0xee), regval_type.value); - - try testing.expectEqual(@as(usize, 303), stack_machine.stack.pop().?.generic); - try testing.expectEqual(@as(usize, 202), stack_machine.stack.pop().?.generic); - try testing.expectEqual(@as(usize, 101), stack_machine.stack.pop().?.generic); - } else |err| { - switch (err) { - error.UnimplementedArch, - error.UnimplementedOs, - error.ThreadContextNotSupported, - => {}, - else => return err, - } - } + const reg_bytes = try cpu_context.dwarfRegisterBytes(0); + + // TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it + + mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); + (try regNative(&cpu_context, fp_reg_num)).* = 1; + (try regNative(&cpu_context, sp_reg_num)).* = 2; + (try regNative(&cpu_context, ip_reg_num)).* = 3; + + try b.writeBreg(writer, fp_reg_num, @as(usize, 100)); + try b.writeBreg(writer, sp_reg_num, @as(usize, 200)); + try b.writeBregx(writer, ip_reg_num, @as(usize, 300)); + try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400)); + + _ = try stack_machine.run(program.written(), allocator, context, 0); + + const regval_type = stack_machine.stack.pop().?.regval_type; + try testing.expectEqual(@as(usize, 400), regval_type.type_offset); + try testing.expectEqual(@as(u8, @sizeOf(usize)), regval_type.type_size); + try testing.expectEqual(@as(usize, 0xee), regval_type.value); + + try testing.expectEqual(@as(usize, 303), stack_machine.stack.pop().?.generic); + try testing.expectEqual(@as(usize, 202), stack_machine.stack.pop().?.generic); + try testing.expectEqual(@as(usize, 101), stack_machine.stack.pop().?.generic); } // Stack operations @@ -1585,38 +1565,21 @@ test "basics" { } // Register location description - const reg_context = abi.RegisterContext{ - .eh_frame = true, - .is_macho = builtin.os.tag == .macos, - }; - var thread_context: std.debug.ThreadContext = undefined; - std.debug.relocateContext(&thread_context); - context = Context{ - .thread_context = &thread_context, - .reg_context = reg_context, - }; + var cpu_context: std.debug.cpu_context.Native = undefined; + context = .{ .cpu_context = &cpu_context }; - if (abi.regBytes(&thread_context, 0, reg_context)) |reg_bytes| { - mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); + const reg_bytes = try cpu_context.dwarfRegisterBytes(0); + mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); - var sub_program: std.Io.Writer.Allocating = .init(allocator); - defer sub_program.deinit(); - const sub_writer = &sub_program.writer; - try b.writeReg(sub_writer, 0); + var sub_program: std.Io.Writer.Allocating = .init(allocator); + defer sub_program.deinit(); + const sub_writer = &sub_program.writer; + try b.writeReg(sub_writer, 0); - stack_machine.reset(); - program.clearRetainingCapacity(); - try b.writeEntryValue(writer, sub_program.written()); - _ = try stack_machine.run(program.written(), allocator, context, null); - try testing.expectEqual(@as(usize, 0xee), stack_machine.stack.pop().?.generic); - } else |err| { - switch (err) { - error.UnimplementedArch, - error.UnimplementedOs, - error.ThreadContextNotSupported, - => {}, - else => return err, - } - } + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeEntryValue(writer, sub_program.written()); + _ = try stack_machine.run(program.written(), allocator, context, null); + try testing.expectEqual(@as(usize, 0xee), stack_machine.stack.pop().?.generic); } } diff --git a/lib/std/debug/ElfFile.zig b/lib/std/debug/ElfFile.zig new file mode 100644 index 0000000000..5be5ee55c5 --- /dev/null +++ b/lib/std/debug/ElfFile.zig @@ -0,0 +1,536 @@ +//! A helper type for loading an ELF file and collecting its DWARF debug information, unwind +//! information, and symbol table. + +is_64: bool, +endian: Endian, + +/// This is `null` iff any of the required DWARF sections were missing. `ElfFile.load` does *not* +/// call `Dwarf.open`, `Dwarf.scanAllFunctions`, etc; that is the caller's responsibility. +dwarf: ?Dwarf, + +/// If non-`null`, describes the `.eh_frame` section, which can be used with `Dwarf.Unwind`. +eh_frame: ?UnwindSection, +/// If non-`null`, describes the `.debug_frame` section, which can be used with `Dwarf.Unwind`. +debug_frame: ?UnwindSection, + +/// If non-`null`, this is the contents of the `.strtab` section. +strtab: ?[]const u8, +/// If non-`null`, describes the `.symtab` section. +symtab: ?SymtabSection, + +/// Binary search table lazily populated by `searchSymtab`. +symbol_search_table: ?[]usize, + +/// The memory-mapped ELF file, which is referenced by `dwarf`. This field is here only so that +/// this memory can be unmapped by `ElfFile.deinit`. +mapped_file: []align(std.heap.page_size_min) const u8, +/// Sometimes, debug info is stored separately to the main ELF file. In that case, `mapped_file` +/// is the mapped ELF binary, and `mapped_debug_file` is the mapped debug info file. Both must +/// be unmapped by `ElfFile.deinit`. +mapped_debug_file: ?[]align(std.heap.page_size_min) const u8, + +arena: std.heap.ArenaAllocator.State, + +pub const UnwindSection = struct { + vaddr: u64, + bytes: []const u8, +}; +pub const SymtabSection = struct { + entry_size: u64, + bytes: []const u8, +}; + +pub const DebugInfoSearchPaths = struct { + /// The location of a debuginfod client directory, which acts as a search path for build IDs. If + /// given, we can load from this directory opportunistically, but make no effort to populate it. + /// To avoid allocation when building the search paths, this is given as two components which + /// will be concatenated. + debuginfod_client: ?[2][]const u8, + /// All "global debug directories" on the system. These are used as search paths for both debug + /// links and build IDs. On typical systems this is just "/usr/lib/debug". + global_debug: []const []const u8, + /// The path to the dirname of the ELF file, which acts as a search path for debug links. + exe_dir: ?[]const u8, + + pub const none: DebugInfoSearchPaths = .{ + .debuginfod_client = null, + .global_debug = &.{}, + .exe_dir = null, + }; + + pub fn native(exe_path: []const u8) DebugInfoSearchPaths { + return .{ + .debuginfod_client = p: { + if (std.posix.getenv("DEBUGINFOD_CACHE_PATH")) |p| { + break :p .{ p, "" }; + } + if (std.posix.getenv("XDG_CACHE_HOME")) |cache_path| { + break :p .{ cache_path, "/debuginfod_client" }; + } + if (std.posix.getenv("HOME")) |home_path| { + break :p .{ home_path, "/.cache/debuginfod_client" }; + } + break :p null; + }, + .global_debug = &.{ + "/usr/lib/debug", + }, + .exe_dir = std.fs.path.dirname(exe_path) orelse ".", + }; + } +}; + +pub fn deinit(ef: *ElfFile, gpa: Allocator) void { + if (ef.dwarf) |*dwarf| dwarf.deinit(gpa); + if (ef.symbol_search_table) |t| gpa.free(t); + var arena = ef.arena.promote(gpa); + arena.deinit(); + + std.posix.munmap(ef.mapped_file); + if (ef.mapped_debug_file) |m| std.posix.munmap(m); + + ef.* = undefined; +} + +pub const LoadError = error{ + OutOfMemory, + Overflow, + TruncatedElfFile, + InvalidCompressedSection, + InvalidElfMagic, + InvalidElfVersion, + InvalidElfClass, + InvalidElfEndian, + // The remaining errors all occur when attemping to stat or mmap a file. + SystemResources, + MemoryMappingNotSupported, + AccessDenied, + LockedMemoryLimitExceeded, + ProcessFdQuotaExceeded, + SystemFdQuotaExceeded, + Unexpected, +}; + +pub fn load( + gpa: Allocator, + elf_file: std.fs.File, + opt_build_id: ?[]const u8, + di_search_paths: *const DebugInfoSearchPaths, +) LoadError!ElfFile { + var arena_instance: std.heap.ArenaAllocator = .init(gpa); + errdefer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + var result = loadInner(arena, elf_file, null) catch |err| switch (err) { + error.CrcMismatch => unreachable, // we passed crc as null + else => |e| return e, + }; + errdefer std.posix.munmap(result.mapped_mem); + + // `loadInner` did most of the work, but we might need to load an external debug info file + + const di_mapped_mem: ?[]align(std.heap.page_size_min) const u8 = load_di: { + if (result.sections.get(.debug_info) != null and + result.sections.get(.debug_abbrev) != null and + result.sections.get(.debug_str) != null and + result.sections.get(.debug_line) != null) + { + // The info is already loaded from this file alone! + break :load_di null; + } + + // We're missing some debug info---let's try and load it from a separate file. + + build_id: { + const build_id = opt_build_id orelse break :build_id; + if (build_id.len < 3) break :build_id; + + for (di_search_paths.global_debug) |global_debug| { + if (try loadSeparateDebugFile(arena, &result, null, "{s}/.build-id/{x}/{x}.debug", .{ + global_debug, + build_id[0..1], + build_id[1..], + })) |mapped| break :load_di mapped; + } + + if (di_search_paths.debuginfod_client) |components| { + if (try loadSeparateDebugFile(arena, &result, null, "{s}{s}/{x}/debuginfo", .{ + components[0], + components[1], + build_id, + })) |mapped| break :load_di mapped; + } + } + + debug_link: { + const section = result.sections.get(.gnu_debuglink) orelse break :debug_link; + const debug_filename = std.mem.sliceTo(section.bytes, 0); + const crc_offset = std.mem.alignForward(usize, debug_filename.len + 1, 4); + if (section.bytes.len < crc_offset + 4) break :debug_link; + const debug_crc = std.mem.readInt(u32, section.bytes[crc_offset..][0..4], result.endian); + + const exe_dir = di_search_paths.exe_dir orelse break :debug_link; + + if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/{s}", .{ + exe_dir, + debug_filename, + })) |mapped| break :load_di mapped; + if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/.debug/{s}", .{ + exe_dir, + debug_filename, + })) |mapped| break :load_di mapped; + for (di_search_paths.global_debug) |global_debug| { + // This looks like a bug; it isn't. They really do embed the absolute path to the + // exe's dirname, *under* the global debug path. + if (try loadSeparateDebugFile(arena, &result, debug_crc, "{s}/{s}/{s}", .{ + global_debug, + exe_dir, + debug_filename, + })) |mapped| break :load_di mapped; + } + } + + break :load_di null; + }; + errdefer comptime unreachable; + + return .{ + .is_64 = result.is_64, + .endian = result.endian, + .dwarf = dwarf: { + if (result.sections.get(.debug_info) == null or + result.sections.get(.debug_abbrev) == null or + result.sections.get(.debug_str) == null or + result.sections.get(.debug_line) == null) + { + break :dwarf null; // debug info not present + } + var sections: Dwarf.SectionArray = @splat(null); + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |f| { + if (result.sections.get(@field(Section.Id, f.name))) |s| { + sections[f.value] = .{ .data = s.bytes, .owned = false }; + } + } + break :dwarf .{ .sections = sections }; + }, + .eh_frame = if (result.sections.get(.eh_frame)) |s| .{ + .vaddr = s.header.sh_addr, + .bytes = s.bytes, + } else null, + .debug_frame = if (result.sections.get(.debug_frame)) |s| .{ + .vaddr = s.header.sh_addr, + .bytes = s.bytes, + } else null, + .strtab = if (result.sections.get(.strtab)) |s| s.bytes else null, + .symtab = if (result.sections.get(.symtab)) |s| .{ + .entry_size = s.header.sh_entsize, + .bytes = s.bytes, + } else null, + .symbol_search_table = null, + .mapped_file = result.mapped_mem, + .mapped_debug_file = di_mapped_mem, + .arena = arena_instance.state, + }; +} + +pub fn searchSymtab(ef: *ElfFile, gpa: Allocator, vaddr: u64) error{ + NoSymtab, + NoStrtab, + BadSymtab, + OutOfMemory, +}!std.debug.Symbol { + const symtab = ef.symtab orelse return error.NoSymtab; + const strtab = ef.strtab orelse return error.NoStrtab; + + if (symtab.bytes.len % symtab.entry_size != 0) return error.BadSymtab; + + const swap_endian = ef.endian != @import("builtin").cpu.arch.endian(); + + switch (ef.is_64) { + inline true, false => |is_64| { + const Sym = if (is_64) elf.Elf64_Sym else elf.Elf32_Sym; + if (symtab.entry_size != @sizeOf(Sym)) return error.BadSymtab; + const symbols: []align(1) const Sym = @ptrCast(symtab.bytes); + if (ef.symbol_search_table == null) { + ef.symbol_search_table = try buildSymbolSearchTable(gpa, ef.endian, Sym, symbols); + } + const search_table = ef.symbol_search_table.?; + const SearchContext = struct { + swap_endian: bool, + target: u64, + symbols: []align(1) const Sym, + fn predicate(ctx: @This(), sym_index: usize) bool { + // We need to return `true` for the first N items, then `false` for the rest -- + // the index we'll get out is the first `false` one. So, we'll return `true` iff + // the target address is after the *end* of this symbol. This synchronizes with + // the logic in `buildSymbolSearchTable` which sorts by *end* address. + var sym = ctx.symbols[sym_index]; + if (ctx.swap_endian) std.mem.byteSwapAllFields(Sym, &sym); + const sym_end = sym.st_value + sym.st_size; + return ctx.target >= sym_end; + } + }; + const sym_index_index = std.sort.partitionPoint(usize, search_table, @as(SearchContext, .{ + .swap_endian = swap_endian, + .target = vaddr, + .symbols = symbols, + }), SearchContext.predicate); + if (sym_index_index == search_table.len) return .unknown; + var sym = symbols[search_table[sym_index_index]]; + if (swap_endian) std.mem.byteSwapAllFields(Sym, &sym); + if (vaddr < sym.st_value or vaddr >= sym.st_value + sym.st_size) return .unknown; + return .{ + .name = std.mem.sliceTo(strtab[sym.st_name..], 0), + .compile_unit_name = null, + .source_location = null, + }; + }, + } +} + +fn buildSymbolSearchTable(gpa: Allocator, endian: Endian, comptime Sym: type, symbols: []align(1) const Sym) error{ + OutOfMemory, + BadSymtab, +}![]usize { + var result: std.ArrayList(usize) = .empty; + defer result.deinit(gpa); + + const swap_endian = endian != @import("builtin").cpu.arch.endian(); + + for (symbols, 0..) |sym_orig, sym_index| { + var sym = sym_orig; + if (swap_endian) std.mem.byteSwapAllFields(Sym, &sym); + if (sym.st_name == 0) continue; + if (sym.st_shndx == elf.SHN_UNDEF) continue; + try result.append(gpa, sym_index); + } + + const SortContext = struct { + swap_endian: bool, + symbols: []align(1) const Sym, + fn lessThan(ctx: @This(), lhs_sym_index: usize, rhs_sym_index: usize) bool { + // We sort by *end* address, not start address. This matches up with logic in `searchSymtab`. + var lhs_sym = ctx.symbols[lhs_sym_index]; + var rhs_sym = ctx.symbols[rhs_sym_index]; + if (ctx.swap_endian) { + std.mem.byteSwapAllFields(Sym, &lhs_sym); + std.mem.byteSwapAllFields(Sym, &rhs_sym); + } + const lhs_val = lhs_sym.st_value + lhs_sym.st_size; + const rhs_val = rhs_sym.st_value + rhs_sym.st_size; + return lhs_val < rhs_val; + } + }; + std.mem.sort(usize, result.items, @as(SortContext, .{ + .swap_endian = swap_endian, + .symbols = symbols, + }), SortContext.lessThan); + + return result.toOwnedSlice(gpa); +} + +/// Only used locally, during `load`. +const Section = struct { + header: elf.Elf64_Shdr, + bytes: []const u8, + const Id = enum { + // DWARF sections: see `Dwarf.Section.Id`. + debug_info, + debug_abbrev, + debug_str, + debug_str_offsets, + debug_line, + debug_line_str, + debug_ranges, + debug_loclists, + debug_rnglists, + debug_addr, + debug_names, + // Then anything else we're interested in. + gnu_debuglink, + eh_frame, + debug_frame, + symtab, + strtab, + }; + const Array = std.enums.EnumArray(Section.Id, ?Section); +}; + +fn loadSeparateDebugFile(arena: Allocator, main_loaded: *LoadInnerResult, opt_crc: ?u32, comptime fmt: []const u8, args: anytype) Allocator.Error!?[]align(std.heap.page_size_min) const u8 { + const path = try std.fmt.allocPrint(arena, fmt, args); + const elf_file = std.fs.cwd().openFile(path, .{}) catch return null; + defer elf_file.close(); + + const result = loadInner(arena, elf_file, opt_crc) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + error.CrcMismatch => return null, + else => return null, + }; + errdefer comptime unreachable; + + const have_debug_sections = inline for (@as([]const []const u8, &.{ + "debug_info", + "debug_abbrev", + "debug_str", + "debug_line", + })) |name| { + const s = @field(Section.Id, name); + if (main_loaded.sections.get(s) == null and result.sections.get(s) != null) { + break false; + } + } else true; + + if (result.is_64 != main_loaded.is_64 or + result.endian != main_loaded.endian or + !have_debug_sections) + { + std.posix.munmap(result.mapped_mem); + return null; + } + + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields) |f| { + const id = @field(Section.Id, f.name); + if (main_loaded.sections.get(id) == null) { + main_loaded.sections.set(id, result.sections.get(id)); + } + } + + return result.mapped_mem; +} + +const LoadInnerResult = struct { + is_64: bool, + endian: Endian, + sections: Section.Array, + mapped_mem: []align(std.heap.page_size_min) const u8, +}; +fn loadInner( + arena: Allocator, + elf_file: std.fs.File, + opt_crc: ?u32, +) (LoadError || error{CrcMismatch})!LoadInnerResult { + const mapped_mem: []align(std.heap.page_size_min) const u8 = mapped: { + const file_len = std.math.cast( + usize, + elf_file.getEndPos() catch |err| switch (err) { + error.PermissionDenied => unreachable, // not asking for PROT_EXEC + else => |e| return e, + }, + ) orelse return error.Overflow; + + break :mapped std.posix.mmap( + null, + file_len, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + elf_file.handle, + 0, + ) catch |err| switch (err) { + error.MappingAlreadyExists => unreachable, // not using FIXED_NOREPLACE + error.PermissionDenied => unreachable, // not asking for PROT_EXEC + else => |e| return e, + }; + }; + + if (opt_crc) |crc| { + if (std.hash.crc.Crc32.hash(mapped_mem) != crc) { + return error.CrcMismatch; + } + } + errdefer std.posix.munmap(mapped_mem); + + var fr: std.Io.Reader = .fixed(mapped_mem); + + const header = elf.Header.read(&fr) catch |err| switch (err) { + error.ReadFailed => unreachable, + error.EndOfStream => return error.TruncatedElfFile, + + error.InvalidElfMagic, + error.InvalidElfVersion, + error.InvalidElfClass, + error.InvalidElfEndian, + => |e| return e, + }; + const endian = header.endian; + + const shstrtab_shdr_off = try std.math.add( + u64, + header.shoff, + try std.math.mul(u64, header.shstrndx, header.shentsize), + ); + fr.seek = std.math.cast(usize, shstrtab_shdr_off) orelse return error.Overflow; + const shstrtab: []const u8 = if (header.is_64) shstrtab: { + const shdr = fr.takeStruct(elf.Elf64_Shdr, endian) catch return error.TruncatedElfFile; + if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile; + break :shstrtab mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + } else shstrtab: { + const shdr = fr.takeStruct(elf.Elf32_Shdr, endian) catch return error.TruncatedElfFile; + if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile; + break :shstrtab mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + }; + + var sections: Section.Array = .initFill(null); + + var it = header.iterateSectionHeadersBuffer(mapped_mem); + while (it.next() catch return error.TruncatedElfFile) |shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + if (shdr.sh_name > shstrtab.len) return error.TruncatedElfFile; + const name = std.mem.sliceTo(shstrtab[@intCast(shdr.sh_name)..], 0); + + const section_id: Section.Id = inline for (@typeInfo(Section.Id).@"enum".fields) |s| { + if (std.mem.eql(u8, "." ++ s.name, name)) { + break @enumFromInt(s.value); + } + } else continue; + + if (sections.get(section_id) != null) continue; + + if (shdr.sh_offset + shdr.sh_size > mapped_mem.len) return error.TruncatedElfFile; + const raw_section_bytes = mapped_mem[@intCast(shdr.sh_offset)..][0..@intCast(shdr.sh_size)]; + const section_bytes: []const u8 = bytes: { + if ((shdr.sh_flags & elf.SHF_COMPRESSED) == 0) break :bytes raw_section_bytes; + + var section_reader: std.Io.Reader = .fixed(raw_section_bytes); + const ch_type: elf.COMPRESS, const ch_size: u64 = if (header.is_64) ch: { + const chdr = section_reader.takeStruct(elf.Elf64_Chdr, endian) catch return error.InvalidCompressedSection; + break :ch .{ chdr.ch_type, chdr.ch_size }; + } else ch: { + const chdr = section_reader.takeStruct(elf.Elf32_Chdr, endian) catch return error.InvalidCompressedSection; + break :ch .{ chdr.ch_type, chdr.ch_size }; + }; + if (ch_type != .ZLIB) { + // The compression algorithm is unsupported, but don't make that a hard error; the + // file might still be valid, and we might still be okay without this section. + continue; + } + + const buf = try arena.alloc(u8, std.math.cast(usize, ch_size) orelse return error.Overflow); + var fw: std.Io.Writer = .fixed(buf); + var decompress: std.compress.flate.Decompress = .init(§ion_reader, .zlib, &.{}); + const n = decompress.reader.streamRemaining(&fw) catch |err| switch (err) { + // If a write failed, then `buf` filled up, so `ch_size` was incorrect + error.WriteFailed => return error.InvalidCompressedSection, + // If a read failed, flate expected the section to have more data + error.ReadFailed => return error.InvalidCompressedSection, + }; + // It's also an error if the data is shorter than expected. + if (n != buf.len) return error.InvalidCompressedSection; + break :bytes buf; + }; + sections.set(section_id, .{ .header = shdr, .bytes = section_bytes }); + } + + return .{ + .is_64 = header.is_64, + .endian = endian, + .sections = sections, + .mapped_mem = mapped_mem, + }; +} + +const std = @import("std"); +const Endian = std.builtin.Endian; +const Dwarf = std.debug.Dwarf; +const ElfFile = @This(); +const Allocator = std.mem.Allocator; +const elf = std.elf; diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index c809547f73..74119a3ea4 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -9,7 +9,7 @@ const std = @import("../std.zig"); const Allocator = std.mem.Allocator; const Path = std.Build.Cache.Path; -const Dwarf = std.debug.Dwarf; +const ElfFile = std.debug.ElfFile; const assert = std.debug.assert; const Coverage = std.debug.Coverage; const SourceLocation = std.debug.Coverage.SourceLocation; @@ -17,27 +17,35 @@ const SourceLocation = std.debug.Coverage.SourceLocation; const Info = @This(); /// Sorted by key, ascending. -address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), +address_map: std.AutoArrayHashMapUnmanaged(u64, ElfFile), /// Externally managed, outlives this `Info` instance. coverage: *Coverage, -pub const LoadError = Dwarf.ElfModule.LoadError; +pub const LoadError = std.fs.File.OpenError || ElfFile.LoadError || std.debug.Dwarf.ScanError || error{MissingDebugInfo}; pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); - try elf_module.dwarf.populateRanges(gpa); + var file = try path.root_dir.handle.openFile(path.sub_path, .{}); + defer file.close(); + + var elf_file: ElfFile = try .load(gpa, file, null, &.none); + errdefer elf_file.deinit(gpa); + + if (elf_file.dwarf == null) return error.MissingDebugInfo; + try elf_file.dwarf.?.open(gpa, elf_file.endian); + try elf_file.dwarf.?.populateRanges(gpa, elf_file.endian); + var info: Info = .{ .address_map = .{}, .coverage = coverage, }; - try info.address_map.put(gpa, elf_module.base_address, elf_module); + try info.address_map.put(gpa, 0, elf_file); + errdefer comptime unreachable; // elf_file is owned by the map now return info; } pub fn deinit(info: *Info, gpa: Allocator) void { - for (info.address_map.values()) |*elf_module| { - elf_module.dwarf.deinit(gpa); + for (info.address_map.values()) |*elf_file| { + elf_file.dwarf.?.deinit(gpa); } info.address_map.deinit(gpa); info.* = undefined; @@ -57,6 +65,6 @@ pub fn resolveAddresses( ) ResolveAddressesError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); - const elf_module = &info.address_map.values()[0]; - return info.coverage.resolveAddressesDwarf(gpa, sorted_pc_addrs, output, &elf_module.dwarf); + const elf_file = &info.address_map.values()[0]; + return info.coverage.resolveAddressesDwarf(gpa, elf_file.endian, sorted_pc_addrs, output, &elf_file.dwarf.?); } diff --git a/lib/std/debug/Pdb.zig b/lib/std/debug/Pdb.zig index 008aad6ab6..c10b361f72 100644 --- a/lib/std/debug/Pdb.zig +++ b/lib/std/debug/Pdb.zig @@ -171,6 +171,7 @@ pub fn parseInfoStream(self: *Pdb) !void { const string_table_index = str_tab_index: { const name_bytes_len = try reader.takeInt(u32, .little); const name_bytes = try reader.readAlloc(gpa, name_bytes_len); + defer gpa.free(name_bytes); const HashTableHeader = extern struct { size: u32, @@ -412,8 +413,7 @@ const Msf = struct { return error.InvalidDebugInfo; if (superblock.free_block_map_block != 1 and superblock.free_block_map_block != 2) return error.InvalidDebugInfo; - const file_len = try file_reader.getSize(); - if (superblock.num_blocks * superblock.block_size != file_len) + if (superblock.num_blocks * superblock.block_size != try file_reader.getSize()) return error.InvalidDebugInfo; switch (superblock.block_size) { // llvm only supports 4096 but we can handle any of these values @@ -427,6 +427,7 @@ const Msf = struct { try file_reader.seekTo(superblock.block_size * superblock.block_map_addr); const dir_blocks = try gpa.alloc(u32, dir_block_count); + errdefer gpa.free(dir_blocks); for (dir_blocks) |*b| { b.* = try file_reader.interface.takeInt(u32, .little); } @@ -450,25 +451,25 @@ const Msf = struct { const streams = try gpa.alloc(MsfStream, stream_count); errdefer gpa.free(streams); - for (streams, 0..) |*stream, i| { - const size = stream_sizes[i]; + for (streams, stream_sizes) |*stream, size| { if (size == 0) { stream.* = .empty; - } else { - const blocks = try gpa.alloc(u32, size); - errdefer gpa.free(blocks); - for (blocks) |*block| { - const block_id = try directory.interface.takeInt(u32, .little); - const n = (block_id % superblock.block_size); - // 0 is for pdb.SuperBlock, 1 and 2 for FPMs. - if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.block_size > file_len) - return error.InvalidBlockIndex; - block.* = block_id; - } - const buffer = try gpa.alloc(u8, 64); - errdefer gpa.free(buffer); - stream.* = .init(superblock.block_size, file_reader, blocks, buffer); + continue; + } + const blocks = try gpa.alloc(u32, size); + errdefer gpa.free(blocks); + for (blocks) |*block| { + const block_id = try directory.interface.takeInt(u32, .little); + // Index 0 is reserved for the superblock. + // In theory, every page which is `n * block_size + 1` or `n * block_size + 2` + // is also reserved, for one of the FPMs. However, LLVM has been observed to map + // these into actual streams, so allow it for compatibility. + if (block_id == 0 or block_id >= superblock.num_blocks) return error.InvalidBlockIndex; + block.* = block_id; } + const buffer = try gpa.alloc(u8, 64); + errdefer gpa.free(buffer); + stream.* = .init(superblock.block_size, file_reader, blocks, buffer); } const end = directory.logicalPos(); diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig deleted file mode 100644 index f77d14b913..0000000000 --- a/lib/std/debug/SelfInfo.zig +++ /dev/null @@ -1,2238 +0,0 @@ -//! Cross-platform abstraction for this binary's own debug information, with a -//! goal of minimal code bloat and compilation speed penalty. - -const builtin = @import("builtin"); -const native_os = builtin.os.tag; -const native_endian = native_arch.endian(); -const native_arch = builtin.cpu.arch; - -const std = @import("../std.zig"); -const mem = std.mem; -const Allocator = std.mem.Allocator; -const windows = std.os.windows; -const macho = std.macho; -const fs = std.fs; -const coff = std.coff; -const pdb = std.pdb; -const assert = std.debug.assert; -const posix = std.posix; -const elf = std.elf; -const Dwarf = std.debug.Dwarf; -const Pdb = std.debug.Pdb; -const File = std.fs.File; -const math = std.math; -const testing = std.testing; -const StackIterator = std.debug.StackIterator; -const regBytes = Dwarf.abi.regBytes; -const regValueNative = Dwarf.abi.regValueNative; - -const SelfInfo = @This(); - -const root = @import("root"); - -allocator: Allocator, -address_map: std.AutoHashMap(usize, *Module), -modules: if (native_os == .windows) std.ArrayListUnmanaged(WindowsModule) else void, - -pub const OpenError = error{ - MissingDebugInfo, - UnsupportedOperatingSystem, -} || @typeInfo(@typeInfo(@TypeOf(SelfInfo.init)).@"fn".return_type.?).error_union.error_set; - -pub fn open(allocator: Allocator) OpenError!SelfInfo { - nosuspend { - if (builtin.strip_debug_info) - return error.MissingDebugInfo; - switch (native_os) { - .linux, - .freebsd, - .netbsd, - .dragonfly, - .openbsd, - .macos, - .solaris, - .illumos, - .windows, - => return try SelfInfo.init(allocator), - else => return error.UnsupportedOperatingSystem, - } - } -} - -pub fn init(allocator: Allocator) !SelfInfo { - var debug_info: SelfInfo = .{ - .allocator = allocator, - .address_map = std.AutoHashMap(usize, *Module).init(allocator), - .modules = if (native_os == .windows) .{} else {}, - }; - - if (native_os == .windows) { - errdefer debug_info.modules.deinit(allocator); - - const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); - if (handle == windows.INVALID_HANDLE_VALUE) { - switch (windows.GetLastError()) { - else => |err| return windows.unexpectedError(err), - } - } - defer windows.CloseHandle(handle); - - var module_entry: windows.MODULEENTRY32 = undefined; - module_entry.dwSize = @sizeOf(windows.MODULEENTRY32); - if (windows.kernel32.Module32First(handle, &module_entry) == 0) { - return error.MissingDebugInfo; - } - - var module_valid = true; - while (module_valid) { - const module_info = try debug_info.modules.addOne(allocator); - const name = allocator.dupe(u8, mem.sliceTo(&module_entry.szModule, 0)) catch &.{}; - errdefer allocator.free(name); - - module_info.* = .{ - .base_address = @intFromPtr(module_entry.modBaseAddr), - .size = module_entry.modBaseSize, - .name = name, - .handle = module_entry.hModule, - }; - - module_valid = windows.kernel32.Module32Next(handle, &module_entry) == 1; - } - } - - return debug_info; -} - -pub fn deinit(self: *SelfInfo) void { - var it = self.address_map.iterator(); - while (it.next()) |entry| { - const mdi = entry.value_ptr.*; - mdi.deinit(self.allocator); - self.allocator.destroy(mdi); - } - self.address_map.deinit(); - if (native_os == .windows) { - for (self.modules.items) |module| { - self.allocator.free(module.name); - if (module.mapped_file) |mapped_file| mapped_file.deinit(); - } - self.modules.deinit(self.allocator); - } -} - -pub fn getModuleForAddress(self: *SelfInfo, address: usize) !*Module { - if (builtin.target.os.tag.isDarwin()) { - return self.lookupModuleDyld(address); - } else if (native_os == .windows) { - return self.lookupModuleWin32(address); - } else if (native_os == .haiku) { - return self.lookupModuleHaiku(address); - } else if (builtin.target.cpu.arch.isWasm()) { - return self.lookupModuleWasm(address); - } else { - return self.lookupModuleDl(address); - } -} - -// Returns the module name for a given address. -// This can be called when getModuleForAddress fails, so implementations should provide -// a path that doesn't rely on any side-effects of a prior successful module lookup. -pub fn getModuleNameForAddress(self: *SelfInfo, address: usize) ?[]const u8 { - if (builtin.target.os.tag.isDarwin()) { - return self.lookupModuleNameDyld(address); - } else if (native_os == .windows) { - return self.lookupModuleNameWin32(address); - } else if (native_os == .haiku) { - return null; - } else if (builtin.target.cpu.arch.isWasm()) { - return null; - } else { - return self.lookupModuleNameDl(address); - } -} - -fn lookupModuleDyld(self: *SelfInfo, address: usize) !*Module { - const image_count = std.c._dyld_image_count(); - - var i: u32 = 0; - while (i < image_count) : (i += 1) { - const header = std.c._dyld_get_image_header(i) orelse continue; - const base_address = @intFromPtr(header); - if (address < base_address) continue; - const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); - - var it = macho.LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = @alignCast(@as( - [*]u8, - @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), - )[0..header.sizeofcmds]), - }; - - var unwind_info: ?[]const u8 = null; - var eh_frame: ?[]const u8 = null; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - const segment_cmd = cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; - - const seg_start = segment_cmd.vmaddr + vmaddr_slide; - const seg_end = seg_start + segment_cmd.vmsize; - if (address >= seg_start and address < seg_end) { - if (self.address_map.get(base_address)) |obj_di| { - return obj_di; - } - - for (cmd.getSections()) |sect| { - const sect_addr: usize = @intCast(sect.addr); - const sect_size: usize = @intCast(sect.size); - if (mem.eql(u8, "__unwind_info", sect.sectName())) { - unwind_info = @as([*]const u8, @ptrFromInt(sect_addr + vmaddr_slide))[0..sect_size]; - } else if (mem.eql(u8, "__eh_frame", sect.sectName())) { - eh_frame = @as([*]const u8, @ptrFromInt(sect_addr + vmaddr_slide))[0..sect_size]; - } - } - - const obj_di = try self.allocator.create(Module); - errdefer self.allocator.destroy(obj_di); - - const macho_path = mem.sliceTo(std.c._dyld_get_image_name(i), 0); - const macho_file = fs.cwd().openFile(macho_path, .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - obj_di.* = try readMachODebugInfo(self.allocator, macho_file); - obj_di.base_address = base_address; - obj_di.vmaddr_slide = vmaddr_slide; - obj_di.unwind_info = unwind_info; - obj_di.eh_frame = eh_frame; - - try self.address_map.putNoClobber(base_address, obj_di); - - return obj_di; - } - }, - else => {}, - }; - } - - return error.MissingDebugInfo; -} - -fn lookupModuleNameDyld(self: *SelfInfo, address: usize) ?[]const u8 { - _ = self; - const image_count = std.c._dyld_image_count(); - - var i: u32 = 0; - while (i < image_count) : (i += 1) { - const header = std.c._dyld_get_image_header(i) orelse continue; - const base_address = @intFromPtr(header); - if (address < base_address) continue; - const vmaddr_slide = std.c._dyld_get_image_vmaddr_slide(i); - - var it = macho.LoadCommandIterator{ - .ncmds = header.ncmds, - .buffer = @alignCast(@as( - [*]u8, - @ptrFromInt(@intFromPtr(header) + @sizeOf(macho.mach_header_64)), - )[0..header.sizeofcmds]), - }; - - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => { - const segment_cmd = cmd.cast(macho.segment_command_64).?; - if (!mem.eql(u8, "__TEXT", segment_cmd.segName())) continue; - - const original_address = address - vmaddr_slide; - const seg_start = segment_cmd.vmaddr; - const seg_end = seg_start + segment_cmd.vmsize; - if (original_address >= seg_start and original_address < seg_end) { - return fs.path.basename(mem.sliceTo(std.c._dyld_get_image_name(i), 0)); - } - }, - else => {}, - }; - } - - return null; -} - -fn lookupModuleWin32(self: *SelfInfo, address: usize) !*Module { - for (self.modules.items) |*module| { - if (address >= module.base_address and address < module.base_address + module.size) { - if (self.address_map.get(module.base_address)) |obj_di| { - return obj_di; - } - - const obj_di = try self.allocator.create(Module); - errdefer self.allocator.destroy(obj_di); - - const mapped_module = @as([*]const u8, @ptrFromInt(module.base_address))[0..module.size]; - var coff_obj = try coff.Coff.init(mapped_module, true); - - // The string table is not mapped into memory by the loader, so if a section name is in the - // string table then we have to map the full image file from disk. This can happen when - // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. - if (coff_obj.strtabRequired()) { - var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; - // openFileAbsoluteW requires the prefix to be present - @memcpy(name_buffer[0..4], &[_]u16{ '\\', '?', '?', '\\' }); - - const process_handle = windows.GetCurrentProcess(); - const len = windows.kernel32.GetModuleFileNameExW( - process_handle, - module.handle, - @ptrCast(&name_buffer[4]), - windows.PATH_MAX_WIDE, - ); - - if (len == 0) return error.MissingDebugInfo; - const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - errdefer coff_file.close(); - - var section_handle: windows.HANDLE = undefined; - const create_section_rc = windows.ntdll.NtCreateSection( - §ion_handle, - windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, - null, - null, - windows.PAGE_READONLY, - // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. - // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. - windows.SEC_COMMIT, - coff_file.handle, - ); - if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer windows.CloseHandle(section_handle); - - var coff_len: usize = 0; - var base_ptr: usize = 0; - const map_section_rc = windows.ntdll.NtMapViewOfSection( - section_handle, - process_handle, - @ptrCast(&base_ptr), - null, - 0, - null, - &coff_len, - .ViewUnmap, - 0, - windows.PAGE_READONLY, - ); - if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; - errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrFromInt(base_ptr)) == .SUCCESS); - - const section_view = @as([*]const u8, @ptrFromInt(base_ptr))[0..coff_len]; - coff_obj = try coff.Coff.init(section_view, false); - - module.mapped_file = .{ - .file = coff_file, - .section_handle = section_handle, - .section_view = section_view, - }; - } - errdefer if (module.mapped_file) |mapped_file| mapped_file.deinit(); - - obj_di.* = try readCoffDebugInfo(self.allocator, &coff_obj); - obj_di.base_address = module.base_address; - - try self.address_map.putNoClobber(module.base_address, obj_di); - return obj_di; - } - } - - return error.MissingDebugInfo; -} - -fn lookupModuleNameWin32(self: *SelfInfo, address: usize) ?[]const u8 { - for (self.modules.items) |module| { - if (address >= module.base_address and address < module.base_address + module.size) { - return module.name; - } - } - return null; -} - -fn lookupModuleNameDl(self: *SelfInfo, address: usize) ?[]const u8 { - _ = self; - - var ctx: struct { - // Input - address: usize, - // Output - name: []const u8 = "", - } = .{ .address = address }; - const CtxTy = @TypeOf(ctx); - - if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { - fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { - _ = size; - if (context.address < info.addr) return; - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - context.name = mem.sliceTo(info.name, 0) orelse ""; - break; - } - } else return; - - return error.Found; - } - }.callback)) { - return null; - } else |err| switch (err) { - error.Found => return fs.path.basename(ctx.name), - } - - return null; -} - -fn lookupModuleDl(self: *SelfInfo, address: usize) !*Module { - var ctx: struct { - // Input - address: usize, - // Output - base_address: usize = undefined, - name: []const u8 = undefined, - build_id: ?[]const u8 = null, - gnu_eh_frame: ?[]const u8 = null, - } = .{ .address = address }; - const CtxTy = @TypeOf(ctx); - - if (posix.dl_iterate_phdr(&ctx, error{Found}, struct { - fn callback(info: *posix.dl_phdr_info, size: usize, context: *CtxTy) !void { - _ = size; - // The base address is too high - if (context.address < info.addr) - return; - - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; - - // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - // Android libc uses NULL instead of an empty string to mark the - // main program - context.name = mem.sliceTo(info.name, 0) orelse ""; - context.base_address = info.addr; - break; - } - } else return; - - for (info.phdr[0..info.phnum]) |phdr| { - switch (phdr.p_type) { - elf.PT_NOTE => { - // Look for .note.gnu.build-id - const note_bytes = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; - const name_size = mem.readInt(u32, note_bytes[0..4], native_endian); - if (name_size != 4) continue; - const desc_size = mem.readInt(u32, note_bytes[4..8], native_endian); - const note_type = mem.readInt(u32, note_bytes[8..12], native_endian); - if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, "GNU\x00", note_bytes[12..16])) continue; - context.build_id = note_bytes[16..][0..desc_size]; - }, - elf.PT_GNU_EH_FRAME => { - context.gnu_eh_frame = @as([*]const u8, @ptrFromInt(info.addr + phdr.p_vaddr))[0..phdr.p_memsz]; - }, - else => {}, - } - } - - // Stop the iteration - return error.Found; - } - }.callback)) { - return error.MissingDebugInfo; - } else |err| switch (err) { - error.Found => {}, - } - - if (self.address_map.get(ctx.base_address)) |obj_di| { - return obj_di; - } - - const obj_di = try self.allocator.create(Module); - errdefer self.allocator.destroy(obj_di); - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - if (ctx.gnu_eh_frame) |eh_frame_hdr| { - // This is a special case - pointer offsets inside .eh_frame_hdr - // are encoded relative to its base address, so we must use the - // version that is already memory mapped, and not the one that - // will be mapped separately from the ELF file. - sections[@intFromEnum(Dwarf.Section.Id.eh_frame_hdr)] = .{ - .data = eh_frame_hdr, - .owned = false, - }; - } - - obj_di.* = try readElfDebugInfo(self.allocator, if (ctx.name.len > 0) ctx.name else null, ctx.build_id, null, §ions, null); - obj_di.base_address = ctx.base_address; - - // Missing unwind info isn't treated as a failure, as the unwinder will fall back to FP-based unwinding - obj_di.dwarf.scanAllUnwindInfo(self.allocator, ctx.base_address) catch {}; - - try self.address_map.putNoClobber(ctx.base_address, obj_di); - - return obj_di; -} - -fn lookupModuleHaiku(self: *SelfInfo, address: usize) !*Module { - _ = self; - _ = address; - @panic("TODO implement lookup module for Haiku"); -} - -fn lookupModuleWasm(self: *SelfInfo, address: usize) !*Module { - _ = self; - _ = address; - @panic("TODO implement lookup module for Wasm"); -} - -pub const Module = switch (native_os) { - .macos, .ios, .watchos, .tvos, .visionos => struct { - base_address: usize, - vmaddr_slide: usize, - mapped_memory: []align(std.heap.page_size_min) const u8, - symbols: []const MachoSymbol, - strings: [:0]const u8, - ofiles: OFileTable, - - // Backed by the in-memory sections mapped by the loader - unwind_info: ?[]const u8 = null, - eh_frame: ?[]const u8 = null, - - const OFileTable = std.StringHashMap(OFileInfo); - const OFileInfo = struct { - di: Dwarf, - addr_table: std.StringHashMap(u64), - }; - - pub fn deinit(self: *@This(), allocator: Allocator) void { - var it = self.ofiles.iterator(); - while (it.next()) |entry| { - const ofile = entry.value_ptr; - ofile.di.deinit(allocator); - ofile.addr_table.deinit(); - } - self.ofiles.deinit(); - allocator.free(self.symbols); - posix.munmap(self.mapped_memory); - } - - fn loadOFile(self: *@This(), allocator: Allocator, o_file_path: []const u8) !*OFileInfo { - const o_file = try fs.cwd().openFile(o_file_path, .{}); - const mapped_mem = try mapWholeFile(o_file); - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != std.macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - var segcmd: ?macho.LoadCommandIterator.LoadCommand = null; - var symtabcmd: ?macho.symtab_command = null; - var it = macho.LoadCommandIterator{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - while (it.next()) |cmd| switch (cmd.cmd()) { - .SEGMENT_64 => segcmd = cmd, - .SYMTAB => symtabcmd = cmd.cast(macho.symtab_command).?, - else => {}, - }; - - if (segcmd == null or symtabcmd == null) return error.MissingDebugInfo; - - // Parse symbols - const strtab = @as( - [*]const u8, - @ptrCast(&mapped_mem[symtabcmd.?.stroff]), - )[0 .. symtabcmd.?.strsize - 1 :0]; - const symtab = @as( - [*]const macho.nlist_64, - @ptrCast(@alignCast(&mapped_mem[symtabcmd.?.symoff])), - )[0..symtabcmd.?.nsyms]; - - // TODO handle tentative (common) symbols - var addr_table = std.StringHashMap(u64).init(allocator); - try addr_table.ensureTotalCapacity(@as(u32, @intCast(symtab.len))); - for (symtab) |sym| { - if (sym.n_strx == 0) continue; - if (sym.undf() or sym.tentative() or sym.abs()) continue; - const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); - // TODO is it possible to have a symbol collision? - addr_table.putAssumeCapacityNoClobber(sym_name, sym.n_value); - } - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - if (self.eh_frame) |eh_frame| sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ - .data = eh_frame, - .owned = false, - }; - - for (segcmd.?.getSections()) |sect| { - if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; - - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - if (mem.eql(u8, "__" ++ section.name, sect.sectName())) section_index = i; - } - if (section_index == null) continue; - - const section_bytes = try Dwarf.chopSlice(mapped_mem, sect.offset, sect.size); - sections[section_index.?] = .{ - .data = section_bytes, - .virtual_address = @intCast(sect.addr), - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - if (missing_debug_info) return error.MissingDebugInfo; - - var di: Dwarf = .{ - .endian = .little, - .sections = sections, - .is_macho = true, - }; - - try Dwarf.open(&di, allocator); - const info = OFileInfo{ - .di = di, - .addr_table = addr_table, - }; - - // Add the debug info to the cache - const result = try self.ofiles.getOrPut(o_file_path); - assert(!result.found_existing); - result.value_ptr.* = info; - - return result.value_ptr; - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { - nosuspend { - const result = try self.getOFileInfoForAddress(allocator, address); - if (result.symbol == null) return .{}; - - // Take the symbol name from the N_FUN STAB entry, we're going to - // use it if we fail to find the DWARF infos - const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); - if (result.o_file_info == null) return .{ .name = stab_symbol }; - - // Translate again the address, this time into an address inside the - // .o file - const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ - .name = "???", - }; - - const addr_off = result.relocated_address - result.symbol.?.addr; - const o_file_di = &result.o_file_info.?.di; - if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { - return .{ - .name = o_file_di.getSymbolName(relocated_address_o) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString( - o_file_di, - std.dwarf.AT.name, - o_file_di.section(.debug_str), - compile_unit.*, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .source_location = o_file_di.getLineNumberInfo( - allocator, - compile_unit, - relocated_address_o + addr_off, - ) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return .{ .name = stab_symbol }; - }, - else => return err, - } - } - } - - pub fn getOFileInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !struct { - relocated_address: usize, - symbol: ?*const MachoSymbol = null, - o_file_info: ?*OFileInfo = null, - } { - nosuspend { - // Translate the VA into an address into this object - const relocated_address = address - self.vmaddr_slide; - - // Find the .o file where this symbol is defined - const symbol = machoSearchSymbols(self.symbols, relocated_address) orelse return .{ - .relocated_address = relocated_address, - }; - - // Check if its debug infos are already in the cache - const o_file_path = mem.sliceTo(self.strings[symbol.ofile..], 0); - const o_file_info = self.ofiles.getPtr(o_file_path) orelse - (self.loadOFile(allocator, o_file_path) catch |err| switch (err) { - error.FileNotFound, - error.MissingDebugInfo, - error.InvalidDebugInfo, - => return .{ - .relocated_address = relocated_address, - .symbol = symbol, - }, - else => return err, - }); - - return .{ - .relocated_address = relocated_address, - .symbol = symbol, - .o_file_info = o_file_info, - }; - } - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { - return if ((try self.getOFileInfoForAddress(allocator, address)).o_file_info) |o_file_info| &o_file_info.di else null; - } - }, - .uefi, .windows => struct { - base_address: usize, - pdb: ?Pdb, - dwarf: ?Dwarf, - coff_image_base: u64, - - /// Only used if pdb is non-null - coff_section_headers: []coff.SectionHeader, - - pub fn deinit(self: *@This(), gpa: Allocator) void { - if (self.dwarf) |*dwarf| { - dwarf.deinit(gpa); - } - - if (self.pdb) |*p| { - gpa.free(p.file_reader.interface.buffer); - gpa.destroy(p.file_reader); - p.deinit(); - gpa.free(self.coff_section_headers); - } - - self.* = undefined; - } - - fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?std.debug.Symbol { - var coff_section: *align(1) const coff.SectionHeader = undefined; - const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { - if (sect_contrib.section > self.coff_section_headers.len) continue; - // Remember that SectionContribEntry.Section is 1-based. - coff_section = &self.coff_section_headers[sect_contrib.section - 1]; - - const vaddr_start = coff_section.virtual_address + sect_contrib.offset; - const vaddr_end = vaddr_start + sect_contrib.size; - if (relocated_address >= vaddr_start and relocated_address < vaddr_end) { - break sect_contrib.module_index; - } - } else { - // we have no information to add to the address - return null; - }; - - const module = (try self.pdb.?.getModule(mod_index)) orelse - return error.InvalidDebugInfo; - const obj_basename = fs.path.basename(module.obj_file_name); - - const symbol_name = self.pdb.?.getSymbolName( - module, - relocated_address - coff_section.virtual_address, - ) orelse "???"; - const opt_line_info = try self.pdb.?.getLineNumberInfo( - module, - relocated_address - coff_section.virtual_address, - ); - - return .{ - .name = symbol_name, - .compile_unit_name = obj_basename, - .source_location = opt_line_info, - }; - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - - if (self.pdb != null) { - if (try self.getSymbolFromPdb(relocated_address)) |symbol| return symbol; - } - - if (self.dwarf) |*dwarf| { - const dwarf_address = relocated_address + self.coff_image_base; - return dwarf.getSymbol(allocator, dwarf_address); - } - - return .{}; - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { - _ = allocator; - _ = address; - - return switch (self.debug_data) { - .dwarf => |*dwarf| dwarf, - else => null, - }; - } - }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Dwarf.ElfModule, - .wasi, .emscripten => struct { - pub fn deinit(self: *@This(), allocator: Allocator) void { - _ = self; - _ = allocator; - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { - _ = self; - _ = allocator; - _ = address; - return .{}; - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*Dwarf { - _ = self; - _ = allocator; - _ = address; - return null; - } - }, - else => Dwarf, -}; - -/// How is this different than `Module` when the host is Windows? -/// Why are both stored in the `SelfInfo` struct? -/// Boy, it sure would be nice if someone added documentation comments for this -/// struct explaining it. -pub const WindowsModule = struct { - base_address: usize, - size: u32, - name: []const u8, - handle: windows.HMODULE, - - // Set when the image file needed to be mapped from disk - mapped_file: ?struct { - file: File, - section_handle: windows.HANDLE, - section_view: []const u8, - - pub fn deinit(self: @This()) void { - const process_handle = windows.GetCurrentProcess(); - assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @ptrCast(@constCast(self.section_view.ptr))) == .SUCCESS); - windows.CloseHandle(self.section_handle); - self.file.close(); - } - } = null, -}; - -/// This takes ownership of macho_file: users of this function should not close -/// it themselves, even on error. -/// TODO it's weird to take ownership even on error, rework this code. -fn readMachODebugInfo(allocator: Allocator, macho_file: File) !Module { - const mapped_mem = try mapWholeFile(macho_file); - - const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); - if (hdr.magic != macho.MH_MAGIC_64) - return error.InvalidDebugInfo; - - var it = macho.LoadCommandIterator{ - .ncmds = hdr.ncmds, - .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], - }; - const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { - .SYMTAB => break cmd.cast(macho.symtab_command).?, - else => {}, - } else return error.MissingDebugInfo; - - const syms = @as( - [*]const macho.nlist_64, - @ptrCast(@alignCast(&mapped_mem[symtab.symoff])), - )[0..symtab.nsyms]; - const strings = mapped_mem[symtab.stroff..][0 .. symtab.strsize - 1 :0]; - - const symbols_buf = try allocator.alloc(MachoSymbol, syms.len); - - var ofile: u32 = undefined; - var last_sym: MachoSymbol = undefined; - var symbol_index: usize = 0; - var state: enum { - init, - oso_open, - oso_close, - bnsym, - fun_strx, - fun_size, - ensym, - } = .init; - - for (syms) |*sym| { - if (!sym.stab()) continue; - - // TODO handle globals N_GSYM, and statics N_STSYM - switch (sym.n_type) { - macho.N_OSO => { - switch (state) { - .init, .oso_close => { - state = .oso_open; - ofile = sym.n_strx; - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_BNSYM => { - switch (state) { - .oso_open, .ensym => { - state = .bnsym; - last_sym = .{ - .strx = 0, - .addr = sym.n_value, - .size = 0, - .ofile = ofile, - }; - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_FUN => { - switch (state) { - .bnsym => { - state = .fun_strx; - last_sym.strx = sym.n_strx; - }, - .fun_strx => { - state = .fun_size; - last_sym.size = @as(u32, @intCast(sym.n_value)); - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_ENSYM => { - switch (state) { - .fun_size => { - state = .ensym; - symbols_buf[symbol_index] = last_sym; - symbol_index += 1; - }, - else => return error.InvalidDebugInfo, - } - }, - macho.N_SO => { - switch (state) { - .init, .oso_close => {}, - .oso_open, .ensym => { - state = .oso_close; - }, - else => return error.InvalidDebugInfo, - } - }, - else => {}, - } - } - - switch (state) { - .init => return error.MissingDebugInfo, - .oso_close => {}, - else => return error.InvalidDebugInfo, - } - - const symbols = try allocator.realloc(symbols_buf, symbol_index); - - // Even though lld emits symbols in ascending order, this debug code - // should work for programs linked in any valid way. - // This sort is so that we can binary search later. - mem.sort(MachoSymbol, symbols, {}, MachoSymbol.addressLessThan); - - return .{ - .base_address = undefined, - .vmaddr_slide = undefined, - .mapped_memory = mapped_mem, - .ofiles = Module.OFileTable.init(allocator), - .symbols = symbols, - .strings = strings, - }; -} - -fn readCoffDebugInfo(gpa: Allocator, coff_obj: *coff.Coff) !Module { - nosuspend { - var di: Module = .{ - .base_address = undefined, - .coff_image_base = coff_obj.getImageBase(), - .coff_section_headers = undefined, - .pdb = null, - .dwarf = null, - }; - - if (coff_obj.getSectionByName(".debug_info")) |_| { - // This coff file has embedded DWARF debug info - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - errdefer for (sections) |section| if (section) |s| if (s.owned) gpa.free(s.data); - - inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { - sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| blk: { - break :blk .{ - .data = try coff_obj.getSectionDataAlloc(section_header, gpa), - .virtual_address = section_header.virtual_address, - .owned = true, - }; - } else null; - } - - var dwarf: Dwarf = .{ - .endian = native_endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&dwarf, gpa); - di.dwarf = dwarf; - } - - const raw_path = try coff_obj.getPdbPath() orelse return di; - const path = blk: { - if (fs.path.isAbsolute(raw_path)) { - break :blk raw_path; - } else { - const self_dir = try fs.selfExeDirPathAlloc(gpa); - defer gpa.free(self_dir); - break :blk try fs.path.join(gpa, &.{ self_dir, raw_path }); - } - }; - defer if (path.ptr != raw_path.ptr) gpa.free(path); - - const pdb_file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound, error.IsDir => { - if (di.dwarf == null) return error.MissingDebugInfo; - return di; - }, - else => |e| return e, - }; - errdefer pdb_file.close(); - - const pdb_file_reader_buffer = try gpa.alloc(u8, 4096); - errdefer gpa.free(pdb_file_reader_buffer); - - const pdb_file_reader = try gpa.create(File.Reader); - errdefer gpa.destroy(pdb_file_reader); - - pdb_file_reader.* = pdb_file.reader(pdb_file_reader_buffer); - - di.pdb = try Pdb.init(gpa, pdb_file_reader); - try di.pdb.?.parseInfoStream(); - try di.pdb.?.parseDbiStream(); - - if (!mem.eql(u8, &coff_obj.guid, &di.pdb.?.guid) or coff_obj.age != di.pdb.?.age) - return error.InvalidDebugInfo; - - // Only used by the pdb path - di.coff_section_headers = try coff_obj.getSectionHeadersAlloc(gpa); - errdefer gpa.free(di.coff_section_headers); - - return di; - } -} - -/// Reads debug info from an ELF file, or the current binary if none in specified. -/// If the required sections aren't present but a reference to external debug info is, -/// then this this function will recurse to attempt to load the debug sections from -/// an external file. -pub fn readElfDebugInfo( - allocator: Allocator, - elf_filename: ?[]const u8, - build_id: ?[]const u8, - expected_crc: ?u32, - parent_sections: *Dwarf.SectionArray, - parent_mapped_mem: ?[]align(std.heap.page_size_min) const u8, -) !Dwarf.ElfModule { - nosuspend { - const elf_file = (if (elf_filename) |filename| blk: { - break :blk fs.cwd().openFile(filename, .{}); - } else fs.openSelfExe(.{})) catch |err| switch (err) { - error.FileNotFound => return error.MissingDebugInfo, - else => return err, - }; - - const mapped_mem = try mapWholeFile(elf_file); - return Dwarf.ElfModule.load( - allocator, - mapped_mem, - build_id, - expected_crc, - parent_sections, - parent_mapped_mem, - elf_filename, - ); - } -} - -const MachoSymbol = struct { - strx: u32, - addr: u64, - size: u32, - ofile: u32, - - /// Returns the address from the macho file - fn address(self: MachoSymbol) u64 { - return self.addr; - } - - fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { - _ = context; - return lhs.addr < rhs.addr; - } -}; - -/// Takes ownership of file, even on error. -/// TODO it's weird to take ownership even on error, rework this code. -fn mapWholeFile(file: File) ![]align(std.heap.page_size_min) const u8 { - nosuspend { - defer file.close(); - - const file_len = math.cast(usize, try file.getEndPos()) orelse math.maxInt(usize); - const mapped_mem = try posix.mmap( - null, - file_len, - posix.PROT.READ, - .{ .TYPE = .SHARED }, - file.handle, - 0, - ); - errdefer posix.munmap(mapped_mem); - - return mapped_mem; - } -} - -fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { - var min: usize = 0; - var max: usize = symbols.len - 1; - while (min < max) { - const mid = min + (max - min) / 2; - const curr = &symbols[mid]; - const next = &symbols[mid + 1]; - if (address >= next.address()) { - min = mid + 1; - } else if (address < curr.address()) { - max = mid; - } else { - return curr; - } - } - - const max_sym = &symbols[symbols.len - 1]; - if (address >= max_sym.address()) - return max_sym; - - return null; -} - -test machoSearchSymbols { - const symbols = [_]MachoSymbol{ - .{ .addr = 100, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 200, .strx = undefined, .size = undefined, .ofile = undefined }, - .{ .addr = 300, .strx = undefined, .size = undefined, .ofile = undefined }, - }; - - try testing.expectEqual(null, machoSearchSymbols(&symbols, 0)); - try testing.expectEqual(null, machoSearchSymbols(&symbols, 99)); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 100).?); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 150).?); - try testing.expectEqual(&symbols[0], machoSearchSymbols(&symbols, 199).?); - - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 200).?); - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 250).?); - try testing.expectEqual(&symbols[1], machoSearchSymbols(&symbols, 299).?); - - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 300).?); - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 301).?); - try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); -} - -/// Unwind a frame using MachO compact unwind info (from __unwind_info). -/// If the compact encoding can't encode a way to unwind a frame, it will -/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -pub fn unwindFrameMachO( - allocator: Allocator, - base_address: usize, - context: *UnwindContext, - unwind_info: []const u8, - eh_frame: ?[]const u8, -) !usize { - const header = std.mem.bytesAsValue( - macho.unwind_info_section_header, - unwind_info[0..@sizeOf(macho.unwind_info_section_header)], - ); - const indices = std.mem.bytesAsSlice( - macho.unwind_info_section_header_index_entry, - unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry)], - ); - if (indices.len == 0) return error.MissingUnwindInfo; - - const mapped_pc = context.pc - base_address; - const second_level_index = blk: { - var left: usize = 0; - var len: usize = indices.len; - - while (len > 1) { - const mid = left + len / 2; - const offset = indices[mid].functionOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - // Last index is a sentinel containing the highest address as its functionOffset - if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; - break :blk &indices[left]; - }; - - const common_encodings = std.mem.bytesAsSlice( - macho.compact_unwind_encoding_t, - unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t)], - ); - - const start_offset = second_level_index.secondLevelPagesSectionOffset; - const kind = std.mem.bytesAsValue( - macho.UNWIND_SECOND_LEVEL, - unwind_info[start_offset..][0..@sizeOf(macho.UNWIND_SECOND_LEVEL)], - ); - - const entry: struct { - function_offset: usize, - raw_encoding: u32, - } = switch (kind.*) { - .REGULAR => blk: { - const page_header = std.mem.bytesAsValue( - macho.unwind_info_regular_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_regular_second_level_page_header)], - ); - - const entries = std.mem.bytesAsSlice( - macho.unwind_info_regular_second_level_entry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = entries[mid].functionOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - break :blk .{ - .function_offset = entries[left].functionOffset, - .raw_encoding = entries[left].encoding, - }; - }, - .COMPRESSED => blk: { - const page_header = std.mem.bytesAsValue( - macho.unwind_info_compressed_second_level_page_header, - unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_compressed_second_level_page_header)], - ); - - const entries = std.mem.bytesAsSlice( - macho.UnwindInfoCompressedEntry, - unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry)], - ); - if (entries.len == 0) return error.InvalidUnwindInfo; - - var left: usize = 0; - var len: usize = entries.len; - while (len > 1) { - const mid = left + len / 2; - const offset = second_level_index.functionOffset + entries[mid].funcOffset; - if (mapped_pc < offset) { - len /= 2; - } else { - left = mid; - if (mapped_pc == offset) break; - len -= len / 2; - } - } - - const entry = entries[left]; - const function_offset = second_level_index.functionOffset + entry.funcOffset; - if (entry.encodingIndex < header.commonEncodingsArrayCount) { - if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = common_encodings[entry.encodingIndex], - }; - } else { - const local_index = try math.sub( - u8, - entry.encodingIndex, - math.cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, - ); - const local_encodings = std.mem.bytesAsSlice( - macho.compact_unwind_encoding_t, - unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t)], - ); - if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; - break :blk .{ - .function_offset = function_offset, - .raw_encoding = local_encodings[local_index], - }; - } - }, - else => return error.InvalidUnwindInfo, - }; - - if (entry.raw_encoding == 0) return error.NoUnwindInfo; - const reg_context = Dwarf.abi.RegisterContext{ - .eh_frame = false, - .is_macho = true, - }; - - const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); - const new_ip = switch (builtin.cpu.arch) { - .x86_64 => switch (encoding.mode.x86_64) { - .OLD => return error.UnimplementedUnwindEncoding, - .RBP_FRAME => blk: { - const regs: [5]u3 = .{ - encoding.value.x86_64.frame.reg0, - encoding.value.x86_64.frame.reg1, - encoding.value.x86_64.frame.reg2, - encoding.value.x86_64.frame.reg3, - encoding.value.x86_64.frame.reg4, - }; - - const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); - var max_reg: usize = 0; - inline for (regs, 0..) |reg, i| { - if (reg > 0) max_reg = i; - } - - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; - const new_sp = fp + 2 * @sizeOf(usize); - - const ip_ptr = fp + @sizeOf(usize); - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - for (regs, 0..) |reg, i| { - if (reg == 0) continue; - const addr = fp - frame_offset + i * @sizeOf(usize); - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; - } - - break :blk new_ip; - }, - .STACK_IMMD, - .STACK_IND, - => blk: { - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; - const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) - @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) - else stack_size: { - // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. - const sub_offset_addr = - base_address + - entry.function_offset + - encoding.value.x86_64.frameless.stack.indirect.sub_offset; - - // `sub_offset_addr` points to the offset of the literal within the instruction - const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; - break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); - }; - - // Decode the Lehmer-coded sequence of registers. - // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h - - // Decode the variable-based permutation number into its digits. Each digit represents - // an index into the list of register numbers that weren't yet used in the sequence at - // the time the digit was added. - const reg_count = encoding.value.x86_64.frameless.stack_reg_count; - const ip_ptr = if (reg_count > 0) reg_blk: { - var digits: [6]u3 = undefined; - var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; - var base: usize = 2; - for (0..reg_count) |i| { - const div = accumulator / base; - digits[digits.len - 1 - i] = @intCast(accumulator - base * div); - accumulator = div; - base += 1; - } - - const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; - var registers: [reg_numbers.len]u3 = undefined; - var used_indices = [_]bool{false} ** reg_numbers.len; - for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { - var unused_count: u8 = 0; - const unused_index = for (used_indices, 0..) |used, index| { - if (!used) { - if (target_unused_index == unused_count) break index; - unused_count += 1; - } - } else unreachable; - - registers[i] = reg_numbers[unused_index]; - used_indices[unused_index] = true; - } - - var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); - for (0..reg_count) |i| { - const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); - (try regValueNative(context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - - break :reg_blk reg_addr; - } else sp + stack_size - @sizeOf(usize); - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_sp = ip_ptr + @sizeOf(usize); - - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - break :blk new_ip; - }, - .DWARF => { - return unwindFrameMachODwarf(allocator, base_address, context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); - }, - }, - .aarch64, .aarch64_be => switch (encoding.mode.arm64) { - .OLD => return error.UnimplementedUnwindEncoding, - .FRAMELESS => blk: { - const sp = (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).*; - const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; - const new_ip = (try regValueNative(context.thread_context, 30, reg_context)).*; - (try regValueNative(context.thread_context, spRegNum(reg_context), reg_context)).* = new_sp; - break :blk new_ip; - }, - .DWARF => { - return unwindFrameMachODwarf(allocator, base_address, context, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); - }, - .FRAME => blk: { - const fp = (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).*; - const ip_ptr = fp + @sizeOf(usize); - - var reg_addr = fp - @sizeOf(usize); - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { - (try regValueNative(context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - (try regValueNative(context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; - reg_addr += @sizeOf(usize); - } - } - - inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { - if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { - // Only the lower half of the 128-bit V registers are restored during unwinding - @memcpy( - try regBytes(context.thread_context, 64 + 8 + i, context.reg_context), - std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - @memcpy( - try regBytes(context.thread_context, 64 + 9 + i, context.reg_context), - std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), - ); - reg_addr += @sizeOf(usize); - } - } - - const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; - const new_fp = @as(*const usize, @ptrFromInt(fp)).*; - - (try regValueNative(context.thread_context, fpRegNum(reg_context), reg_context)).* = new_fp; - (try regValueNative(context.thread_context, ip_reg_num, reg_context)).* = new_ip; - - break :blk new_ip; - }, - }, - else => return error.UnimplementedArch, - }; - - context.pc = stripInstructionPtrAuthCode(new_ip); - if (context.pc > 0) context.pc -= 1; - return new_ip; -} - -pub const UnwindContext = struct { - allocator: Allocator, - cfa: ?usize, - pc: usize, - thread_context: *std.debug.ThreadContext, - reg_context: Dwarf.abi.RegisterContext, - vm: VirtualMachine, - stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), - - pub fn init( - allocator: Allocator, - thread_context: *std.debug.ThreadContext, - ) !UnwindContext { - comptime assert(supports_unwinding); - - const pc = stripInstructionPtrAuthCode( - (try regValueNative(thread_context, ip_reg_num, null)).*, - ); - - const context_copy = try allocator.create(std.debug.ThreadContext); - std.debug.copyContext(thread_context, context_copy); - - return .{ - .allocator = allocator, - .cfa = null, - .pc = pc, - .thread_context = context_copy, - .reg_context = undefined, - .vm = .{}, - .stack_machine = .{}, - }; - } - - pub fn deinit(self: *UnwindContext) void { - self.vm.deinit(self.allocator); - self.stack_machine.deinit(self.allocator); - self.allocator.destroy(self.thread_context); - self.* = undefined; - } - - pub fn getFp(self: *const UnwindContext) !usize { - return (try regValueNative(self.thread_context, fpRegNum(self.reg_context), self.reg_context)).*; - } -}; - -/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. -/// This function clears these signature bits to make the pointer usable. -pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { - if (native_arch.isAARCH64()) { - // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) - // The save / restore is because `xpaclri` operates on x30 (LR) - return asm ( - \\mov x16, x30 - \\mov x30, x15 - \\hint 0x07 - \\mov x15, x30 - \\mov x30, x16 - : [ret] "={x15}" (-> usize), - : [ptr] "{x15}" (ptr), - : .{ .x16 = true }); - } - - return ptr; -} - -/// Unwind a stack frame using DWARF unwinding info, updating the register context. -/// -/// If `.eh_frame_hdr` is available and complete, it will be used to binary search for the FDE. -/// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. The latter -/// may require lazily loading the data in those sections. -/// -/// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info -/// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. -pub fn unwindFrameDwarf( - allocator: Allocator, - di: *Dwarf, - base_address: usize, - context: *UnwindContext, - explicit_fde_offset: ?usize, -) !usize { - if (!supports_unwinding) return error.UnsupportedCpuArchitecture; - if (context.pc == 0) return 0; - - const endian = di.endian; - - // Find the FDE and CIE - const cie, const fde = if (explicit_fde_offset) |fde_offset| blk: { - const dwarf_section: Dwarf.Section.Id = .eh_frame; - const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; - if (fde_offset >= frame_section.len) return error.MissingFDE; - - var fbr: std.Io.Reader = .fixed(frame_section); - fbr.seek = fde_offset; - - const fde_entry_header = try Dwarf.EntryHeader.read(&fbr, dwarf_section, endian); - if (fde_entry_header.type != .fde) return error.MissingFDE; - - const cie_offset = fde_entry_header.type.fde; - fbr.seek = @intCast(cie_offset); - - const cie_entry_header = try Dwarf.EntryHeader.read(&fbr, dwarf_section, endian); - if (cie_entry_header.type != .cie) return Dwarf.bad(); - - const cie = try Dwarf.CommonInformationEntry.parse( - cie_entry_header.entry_bytes, - 0, - true, - cie_entry_header.format, - dwarf_section, - cie_entry_header.length_offset, - @sizeOf(usize), - native_endian, - ); - const fde = try Dwarf.FrameDescriptionEntry.parse( - fde_entry_header.entry_bytes, - 0, - true, - cie, - @sizeOf(usize), - native_endian, - ); - - break :blk .{ cie, fde }; - } else blk: { - // `.eh_frame_hdr` may be incomplete. We'll try it first, but if the lookup fails, we fall - // back to loading `.eh_frame`/`.debug_frame` and using those from that point on. - - if (di.eh_frame_hdr) |header| hdr: { - const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else { - try di.scanCieFdeInfo(allocator, base_address); - di.eh_frame_hdr = null; - break :hdr; - }; - - var cie: Dwarf.CommonInformationEntry = undefined; - var fde: Dwarf.FrameDescriptionEntry = undefined; - - header.findEntry( - eh_frame_len, - @intFromPtr(di.section(.eh_frame_hdr).?.ptr), - context.pc, - &cie, - &fde, - endian, - ) catch |err| switch (err) { - error.MissingDebugInfo => { - // `.eh_frame_hdr` appears to be incomplete, so go ahead and populate `cie_map` - // and `fde_list`, and fall back to the binary search logic below. - try di.scanCieFdeInfo(allocator, base_address); - - // Since `.eh_frame_hdr` is incomplete, we're very likely to get more lookup - // failures using it, and we've just built a complete, sorted list of FDEs - // anyway, so just stop using `.eh_frame_hdr` altogether. - di.eh_frame_hdr = null; - - break :hdr; - }, - else => return err, - }; - - break :blk .{ cie, fde }; - } - - const index = std.sort.binarySearch(Dwarf.FrameDescriptionEntry, di.fde_list.items, context.pc, struct { - pub fn compareFn(pc: usize, item: Dwarf.FrameDescriptionEntry) std.math.Order { - if (pc < item.pc_begin) return .lt; - - const range_end = item.pc_begin + item.pc_range; - if (pc < range_end) return .eq; - - return .gt; - } - }.compareFn); - - const fde = if (index) |i| di.fde_list.items[i] else return error.MissingFDE; - const cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; - - break :blk .{ cie, fde }; - }; - - var expression_context: Dwarf.expression.Context = .{ - .format = cie.format, - .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, - .thread_context = context.thread_context, - .reg_context = context.reg_context, - .cfa = context.cfa, - }; - - context.vm.reset(); - context.reg_context.eh_frame = cie.version != 4; - context.reg_context.is_macho = di.is_macho; - - const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); - context.cfa = switch (row.cfa.rule) { - .val_offset => |offset| blk: { - const register = row.cfa.register orelse return error.InvalidCFARule; - const value = mem.readInt(usize, (try regBytes(context.thread_context, register, context.reg_context))[0..@sizeOf(usize)], native_endian); - break :blk try applyOffset(value, offset); - }, - .expression => |expr| blk: { - context.stack_machine.reset(); - const value = try context.stack_machine.run( - expr, - context.allocator, - expression_context, - context.cfa, - ); - - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; - }, - else => return error.InvalidCFARule, - }; - - expression_context.cfa = context.cfa; - - // Buffering the modifications is done because copying the thread context is not portable, - // some implementations (ie. darwin) use internal pointers to the mcontext. - var arena = std.heap.ArenaAllocator.init(context.allocator); - defer arena.deinit(); - const update_allocator = arena.allocator(); - - const RegisterUpdate = struct { - // Backed by thread_context - dest: []u8, - // Backed by arena - src: []const u8, - prev: ?*@This(), - }; - - var update_tail: ?*RegisterUpdate = null; - var has_return_address = true; - for (context.vm.rowColumns(row)) |column| { - if (column.register) |register| { - if (register == cie.return_address_register) { - has_return_address = column.rule != .undefined; - } - - const dest = try regBytes(context.thread_context, register, context.reg_context); - const src = try update_allocator.alloc(u8, dest.len); - - const prev = update_tail; - update_tail = try update_allocator.create(RegisterUpdate); - update_tail.?.* = .{ - .dest = dest, - .src = src, - .prev = prev, - }; - - try column.resolveValue(context, expression_context, src); - } - } - - // On all implemented architectures, the CFA is defined as being the previous frame's SP - (try regValueNative(context.thread_context, spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; - - while (update_tail) |tail| { - @memcpy(tail.dest, tail.src); - update_tail = tail.prev; - } - - if (has_return_address) { - context.pc = stripInstructionPtrAuthCode(mem.readInt(usize, (try regBytes( - context.thread_context, - cie.return_address_register, - context.reg_context, - ))[0..@sizeOf(usize)], native_endian)); - } else { - context.pc = 0; - } - - (try regValueNative(context.thread_context, ip_reg_num, context.reg_context)).* = context.pc; - - // The call instruction will have pushed the address of the instruction that follows the call as the return address. - // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in - // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up - // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, - // we subtract one so that the next lookup is guaranteed to land inside the - // - // The exception to this rule is signal frames, where we return execution would be returned to the instruction - // that triggered the handler. - const return_address = context.pc; - if (context.pc > 0 and !cie.isSignalFrame()) context.pc -= 1; - - return return_address; -} - -fn fpRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { - return Dwarf.abi.fpRegNum(native_arch, reg_context); -} - -fn spRegNum(reg_context: Dwarf.abi.RegisterContext) u8 { - return Dwarf.abi.spRegNum(native_arch, reg_context); -} - -const ip_reg_num = Dwarf.abi.ipRegNum(native_arch).?; - -/// Tells whether unwinding for the host is implemented. -pub const supports_unwinding = supportsUnwinding(&builtin.target); - -comptime { - if (supports_unwinding) assert(Dwarf.abi.supportsUnwinding(&builtin.target)); -} - -/// Tells whether unwinding for this target is *implemented* here in the Zig -/// standard library. -/// -/// See also `Dwarf.abi.supportsUnwinding` which tells whether Dwarf supports -/// unwinding on that target *in theory*. -pub fn supportsUnwinding(target: *const std.Target) bool { - return switch (target.cpu.arch) { - .x86 => switch (target.os.tag) { - .linux, .netbsd, .solaris, .illumos => true, - else => false, - }, - .x86_64 => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, - else => false, - }, - .arm, .armeb, .thumb, .thumbeb => switch (target.os.tag) { - .linux => true, - else => false, - }, - .aarch64, .aarch64_be => switch (target.os.tag) { - .linux, .netbsd, .freebsd, .macos, .ios => true, - else => false, - }, - // Unwinding is possible on other targets but this implementation does - // not support them...yet! - else => false, - }; -} - -fn unwindFrameMachODwarf( - allocator: Allocator, - base_address: usize, - context: *UnwindContext, - eh_frame: []const u8, - fde_offset: usize, -) !usize { - var di: Dwarf = .{ - .endian = native_endian, - .is_macho = true, - }; - defer di.deinit(context.allocator); - - di.sections[@intFromEnum(Dwarf.Section.Id.eh_frame)] = .{ - .data = eh_frame, - .owned = false, - }; - - return unwindFrameDwarf(allocator, &di, base_address, context, fde_offset); -} - -/// This is a virtual machine that runs DWARF call frame instructions. -pub const VirtualMachine = struct { - /// See section 6.4.1 of the DWARF5 specification for details on each - const RegisterRule = union(enum) { - // The spec says that the default rule for each column is the undefined rule. - // However, it also allows ABI / compiler authors to specify alternate defaults, so - // there is a distinction made here. - default: void, - undefined: void, - same_value: void, - // offset(N) - offset: i64, - // val_offset(N) - val_offset: i64, - // register(R) - register: u8, - // expression(E) - expression: []const u8, - // val_expression(E) - val_expression: []const u8, - // Augmenter-defined rule - architectural: void, - }; - - /// Each row contains unwinding rules for a set of registers. - pub const Row = struct { - /// Offset from `FrameDescriptionEntry.pc_begin` - offset: u64 = 0, - /// Special-case column that defines the CFA (Canonical Frame Address) rule. - /// The register field of this column defines the register that CFA is derived from. - cfa: Column = .{}, - /// The register fields in these columns define the register the rule applies to. - columns: ColumnRange = .{}, - /// Indicates that the next write to any column in this row needs to copy - /// the backing column storage first, as it may be referenced by previous rows. - copy_on_write: bool = false, - }; - - pub const Column = struct { - register: ?u8 = null, - rule: RegisterRule = .{ .default = {} }, - - /// Resolves the register rule and places the result into `out` (see regBytes) - pub fn resolveValue( - self: Column, - context: *SelfInfo.UnwindContext, - expression_context: std.debug.Dwarf.expression.Context, - out: []u8, - ) !void { - switch (self.rule) { - .default => { - const register = self.register orelse return error.InvalidRegister; - try getRegDefaultValue(register, context, out); - }, - .undefined => { - @memset(out, undefined); - }, - .same_value => { - // TODO: This copy could be eliminated if callers always copy the state then call this function to update it - const register = self.register orelse return error.InvalidRegister; - const src = try regBytes(context.thread_context, register, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - }, - .offset => |offset| { - if (context.cfa) |cfa| { - const addr = try applyOffset(cfa, offset); - const ptr: *const usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - } else return error.InvalidCFA; - }, - .val_offset => |offset| { - if (context.cfa) |cfa| { - mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); - } else return error.InvalidCFA; - }, - .register => |register| { - const src = try regBytes(context.thread_context, register, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, try regBytes(context.thread_context, register, context.reg_context)); - }, - .expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); - const addr = if (value) |v| blk: { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; - - const ptr: *usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - }, - .val_expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); - } else return error.NoExpressionValue; - }, - .architectural => return error.UnimplementedRegisterRule, - } - } - }; - - const ColumnRange = struct { - /// Index into `columns` of the first column in this row. - start: usize = undefined, - len: u8 = 0, - }; - - columns: std.ArrayListUnmanaged(Column) = .empty, - stack: std.ArrayListUnmanaged(ColumnRange) = .empty, - current_row: Row = .{}, - - /// The result of executing the CIE's initial_instructions - cie_row: ?Row = null, - - pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { - self.stack.deinit(allocator); - self.columns.deinit(allocator); - self.* = undefined; - } - - pub fn reset(self: *VirtualMachine) void { - self.stack.clearRetainingCapacity(); - self.columns.clearRetainingCapacity(); - self.current_row = .{}; - self.cie_row = null; - } - - /// Return a slice backed by the row's non-CFA columns - pub fn rowColumns(self: VirtualMachine, row: Row) []Column { - if (row.columns.len == 0) return &.{}; - return self.columns.items[row.columns.start..][0..row.columns.len]; - } - - /// Either retrieves or adds a column for `register` (non-CFA) in the current row. - fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { - for (self.rowColumns(self.current_row)) |*c| { - if (c.register == register) return c; - } - - if (self.current_row.columns.len == 0) { - self.current_row.columns.start = self.columns.items.len; - } - self.current_row.columns.len += 1; - - const column = try self.columns.addOne(allocator); - column.* = .{ - .register = register, - }; - - return column; - } - - /// Runs the CIE instructions, then the FDE instructions. Execution halts - /// once the row that corresponds to `pc` is known, and the row is returned. - pub fn runTo( - self: *VirtualMachine, - allocator: std.mem.Allocator, - pc: u64, - cie: std.debug.Dwarf.CommonInformationEntry, - fde: std.debug.Dwarf.FrameDescriptionEntry, - addr_size_bytes: u8, - endian: std.builtin.Endian, - ) !Row { - assert(self.cie_row == null); - if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange; - - var prev_row: Row = self.current_row; - - var cie_stream: std.Io.Reader = .fixed(cie.initial_instructions); - var fde_stream: std.Io.Reader = .fixed(fde.instructions); - const streams = [_]*std.Io.Reader{ &cie_stream, &fde_stream }; - - for (&streams, 0..) |stream, i| { - while (stream.seek < stream.buffer.len) { - const instruction = try std.debug.Dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); - prev_row = try self.step(allocator, cie, i == 0, instruction); - if (pc < fde.pc_begin + self.current_row.offset) return prev_row; - } - } - - return self.current_row; - } - - pub fn runToNative( - self: *VirtualMachine, - allocator: std.mem.Allocator, - pc: u64, - cie: std.debug.Dwarf.CommonInformationEntry, - fde: std.debug.Dwarf.FrameDescriptionEntry, - ) !Row { - return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), native_endian); - } - - fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void { - if (!self.current_row.copy_on_write) return; - - const new_start = self.columns.items.len; - if (self.current_row.columns.len > 0) { - try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); - self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); - self.current_row.columns.start = new_start; - } - } - - /// Executes a single instruction. - /// If this instruction is from the CIE, `is_initial` should be set. - /// Returns the value of `current_row` before executing this instruction. - pub fn step( - self: *VirtualMachine, - allocator: std.mem.Allocator, - cie: std.debug.Dwarf.CommonInformationEntry, - is_initial: bool, - instruction: Dwarf.call_frame.Instruction, - ) !Row { - // CIE instructions must be run before FDE instructions - assert(!is_initial or self.cie_row == null); - if (!is_initial and self.cie_row == null) { - self.cie_row = self.current_row; - self.current_row.copy_on_write = true; - } - - const prev_row = self.current_row; - switch (instruction) { - .set_loc => |i| { - if (i.address <= self.current_row.offset) return error.InvalidOperation; - // TODO: Check cie.segment_selector_size != 0 for DWARFV4 - self.current_row.offset = i.address; - }, - inline .advance_loc, - .advance_loc1, - .advance_loc2, - .advance_loc4, - => |i| { - self.current_row.offset += i.delta * cie.code_alignment_factor; - self.current_row.copy_on_write = true; - }, - inline .offset, - .offset_extended, - .offset_extended_sf, - => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; - }, - inline .restore, - .restore_extended, - => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.cie_row) |cie_row| { - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = for (self.rowColumns(cie_row)) |cie_column| { - if (cie_column.register == i.register) break cie_column.rule; - } else .{ .default = {} }; - } else return error.InvalidOperation; - }, - .nop => {}, - .undefined => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .undefined = {} }; - }, - .same_value => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .same_value = {} }; - }, - .register => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ .register = i.target_register }; - }, - .remember_state => { - try self.stack.append(allocator, self.current_row.columns); - self.current_row.copy_on_write = true; - }, - .restore_state => { - const restored_columns = self.stack.pop() orelse return error.InvalidOperation; - self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); - try self.columns.ensureUnusedCapacity(allocator, restored_columns.len); - - self.current_row.columns.start = self.columns.items.len; - self.current_row.columns.len = restored_columns.len; - self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); - }, - .def_cfa => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = @intCast(i.offset) }, - }; - }, - .def_cfa_sf => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, - }; - }, - .def_cfa_register => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.register = i.register; - }, - .def_cfa_offset => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = @intCast(i.offset), - }; - }, - .def_cfa_offset_sf => |i| { - try self.resolveCopyOnWrite(allocator); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .def_cfa_expression => |i| { - try self.resolveCopyOnWrite(allocator); - self.current_row.cfa.register = undefined; - self.current_row.cfa.rule = .{ - .expression = i.block, - }; - }, - .expression => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .expression = i.block, - }; - }, - .val_offset => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, - }; - }, - .val_offset_sf => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .val_expression => |i| { - try self.resolveCopyOnWrite(allocator); - const column = try self.getOrAddColumn(allocator, i.register); - column.rule = .{ - .val_expression = i.block, - }; - }, - } - - return prev_row; - } -}; - -/// Returns the ABI-defined default value this register has in the unwinding table -/// before running any of the CIE instructions. The DWARF spec defines these as having -/// the .undefined rule by default, but allows ABI authors to override that. -fn getRegDefaultValue(reg_number: u8, context: *UnwindContext, out: []u8) !void { - switch (builtin.cpu.arch) { - .aarch64, .aarch64_be => { - // Callee-saved registers are initialized as if they had the .same_value rule - if (reg_number >= 19 and reg_number <= 28) { - const src = try regBytes(context.thread_context, reg_number, context.reg_context); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return; - } - }, - else => {}, - } - - @memset(out, undefined); -} - -/// Since register rules are applied (usually) during a panic, -/// checked addition / subtraction is used so that we can return -/// an error and fall back to FP-based unwinding. -fn applyOffset(base: usize, offset: i64) !usize { - return if (offset >= 0) - try std.math.add(usize, base, @as(usize, @intCast(offset))) - else - try std.math.sub(usize, base, @as(usize, @intCast(-offset))); -} diff --git a/lib/std/debug/SelfInfo/Darwin.zig b/lib/std/debug/SelfInfo/Darwin.zig new file mode 100644 index 0000000000..a43f279f39 --- /dev/null +++ b/lib/std/debug/SelfInfo/Darwin.zig @@ -0,0 +1,993 @@ +mutex: std.Thread.Mutex, +/// Accessed through `Module.Adapter`. +modules: std.ArrayHashMapUnmanaged(Module, void, Module.Context, false), +ofiles: std.StringArrayHashMapUnmanaged(?OFile), + +pub const init: SelfInfo = .{ + .mutex = .{}, + .modules = .empty, + .ofiles = .empty, +}; +pub fn deinit(si: *SelfInfo, gpa: Allocator) void { + for (si.modules.keys()) |*module| { + unwind: { + const u = &(module.unwind orelse break :unwind catch break :unwind); + if (u.dwarf) |*dwarf| dwarf.deinit(gpa); + } + loaded: { + const l = &(module.loaded_macho orelse break :loaded catch break :loaded); + gpa.free(l.symbols); + posix.munmap(l.mapped_memory); + } + } + for (si.ofiles.values()) |*opt_ofile| { + const ofile = &(opt_ofile.* orelse continue); + ofile.dwarf.deinit(gpa); + ofile.symbols_by_name.deinit(gpa); + posix.munmap(ofile.mapped_memory); + } + si.modules.deinit(gpa); + si.ofiles.deinit(gpa); +} + +pub fn getSymbol(si: *SelfInfo, gpa: Allocator, address: usize) Error!std.debug.Symbol { + const module = try si.findModule(gpa, address); + defer si.mutex.unlock(); + + const loaded_macho = try module.getLoadedMachO(gpa); + + const vaddr = address - loaded_macho.vaddr_offset; + const symbol = MachoSymbol.find(loaded_macho.symbols, vaddr) orelse return .unknown; + + // offset of `address` from start of `symbol` + const address_symbol_offset = vaddr - symbol.addr; + + // Take the symbol name from the N_FUN STAB entry, we're going to + // use it if we fail to find the DWARF infos + const stab_symbol = mem.sliceTo(loaded_macho.strings[symbol.strx..], 0); + + // If any information is missing, we can at least return this from now on. + const sym_only_result: std.debug.Symbol = .{ + .name = stab_symbol, + .compile_unit_name = null, + .source_location = null, + }; + + if (symbol.ofile == MachoSymbol.unknown_ofile) { + // We don't have STAB info, so can't track down the object file; all we can do is the symbol name. + return sym_only_result; + } + + const o_file: *OFile = of: { + const path = mem.sliceTo(loaded_macho.strings[symbol.ofile..], 0); + const gop = try si.ofiles.getOrPut(gpa, path); + if (!gop.found_existing) { + gop.value_ptr.* = loadOFile(gpa, path) catch null; + } + if (gop.value_ptr.*) |*o_file| { + break :of o_file; + } else { + return sym_only_result; + } + }; + + const symbol_index = o_file.symbols_by_name.getKeyAdapted( + @as([]const u8, stab_symbol), + @as(OFile.SymbolAdapter, .{ .strtab = o_file.strtab, .symtab = o_file.symtab }), + ) orelse return sym_only_result; + const symbol_ofile_vaddr = o_file.symtab[symbol_index].n_value; + + const compile_unit = o_file.dwarf.findCompileUnit(native_endian, symbol_ofile_vaddr) catch return sym_only_result; + + return .{ + .name = o_file.dwarf.getSymbolName(symbol_ofile_vaddr + address_symbol_offset) orelse stab_symbol, + .compile_unit_name = compile_unit.die.getAttrString( + &o_file.dwarf, + native_endian, + std.dwarf.AT.name, + o_file.dwarf.section(.debug_str), + compile_unit, + ) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + }, + .source_location = o_file.dwarf.getLineNumberInfo( + gpa, + native_endian, + compile_unit, + symbol_ofile_vaddr + address_symbol_offset, + ) catch null, + }; +} +pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { + const module = try si.findModule(gpa, address); + defer si.mutex.unlock(); + return module.name; +} + +pub const can_unwind: bool = true; +pub const UnwindContext = std.debug.Dwarf.SelfUnwinder; +/// Unwind a frame using MachO compact unwind info (from `__unwind_info`). +/// If the compact encoding can't encode a way to unwind a frame, it will +/// defer unwinding to DWARF, in which case `__eh_frame` will be used if available. +pub fn unwindFrame(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { + return unwindFrameInner(si, gpa, context) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.UnsupportedDebugInfo, + error.ReadFailed, + error.OutOfMemory, + error.Unexpected, + => |e| return e, + error.UnsupportedRegister, + error.UnsupportedAddrSize, + error.UnimplementedUserOpcode, + => return error.UnsupportedDebugInfo, + error.Overflow, + error.EndOfStream, + error.StreamTooLong, + error.InvalidOpcode, + error.InvalidOperation, + error.InvalidOperand, + error.InvalidRegister, + error.IncompatibleRegisterSize, + => return error.InvalidDebugInfo, + }; +} +fn unwindFrameInner(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) !usize { + const module = try si.findModule(gpa, context.pc); + defer si.mutex.unlock(); + + const unwind: *Module.Unwind = try module.getUnwindInfo(gpa); + + const ip_reg_num = comptime Dwarf.ipRegNum(builtin.target.cpu.arch).?; + const fp_reg_num = comptime Dwarf.fpRegNum(builtin.target.cpu.arch); + const sp_reg_num = comptime Dwarf.spRegNum(builtin.target.cpu.arch); + + const unwind_info = unwind.unwind_info orelse return error.MissingDebugInfo; + if (unwind_info.len < @sizeOf(macho.unwind_info_section_header)) return error.InvalidDebugInfo; + const header: *align(1) const macho.unwind_info_section_header = @ptrCast(unwind_info); + + const index_byte_count = header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry); + if (unwind_info.len < header.indexSectionOffset + index_byte_count) return error.InvalidDebugInfo; + const indices: []align(1) const macho.unwind_info_section_header_index_entry = @ptrCast(unwind_info[header.indexSectionOffset..][0..index_byte_count]); + if (indices.len == 0) return error.MissingDebugInfo; + + // offset of the PC into the `__TEXT` segment + const pc_text_offset = context.pc - module.text_base; + + const start_offset: u32, const first_level_offset: u32 = index: { + var left: usize = 0; + var len: usize = indices.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < indices[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + break :index .{ indices[left].secondLevelPagesSectionOffset, indices[left].functionOffset }; + }; + // An offset of 0 is a sentinel indicating a range does not have unwind info. + if (start_offset == 0) return error.MissingDebugInfo; + + const common_encodings_byte_count = header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < header.commonEncodingsArraySectionOffset + common_encodings_byte_count) return error.InvalidDebugInfo; + const common_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[header.commonEncodingsArraySectionOffset..][0..common_encodings_byte_count], + ); + + if (unwind_info.len < start_offset + @sizeOf(macho.UNWIND_SECOND_LEVEL)) return error.InvalidDebugInfo; + const kind: *align(1) const macho.UNWIND_SECOND_LEVEL = @ptrCast(unwind_info[start_offset..]); + + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { + .REGULAR => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_regular_second_level_page_header)) return error.InvalidDebugInfo; + const page_header: *align(1) const macho.unwind_info_regular_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo; + const entries: []align(1) const macho.unwind_info_regular_second_level_entry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidDebugInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < entries[mid].functionOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + break :entry .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; + }, + .COMPRESSED => entry: { + if (unwind_info.len < start_offset + @sizeOf(macho.unwind_info_compressed_second_level_page_header)) return error.InvalidDebugInfo; + const page_header: *align(1) const macho.unwind_info_compressed_second_level_page_header = @ptrCast(unwind_info[start_offset..]); + + const entries_byte_count = page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry); + if (unwind_info.len < start_offset + entries_byte_count) return error.InvalidDebugInfo; + const entries: []align(1) const macho.UnwindInfoCompressedEntry = @ptrCast( + unwind_info[start_offset + page_header.entryPageOffset ..][0..entries_byte_count], + ); + if (entries.len == 0) return error.InvalidDebugInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + if (pc_text_offset < first_level_offset + entries[mid].funcOffset) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + const entry = entries[left]; + + const function_offset = first_level_offset + entry.funcOffset; + if (entry.encodingIndex < common_encodings.len) { + break :entry .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; + } + + const local_index = entry.encodingIndex - common_encodings.len; + const local_encodings_byte_count = page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t); + if (unwind_info.len < start_offset + page_header.encodingsPageOffset + local_encodings_byte_count) return error.InvalidDebugInfo; + const local_encodings: []align(1) const macho.compact_unwind_encoding_t = @ptrCast( + unwind_info[start_offset + page_header.encodingsPageOffset ..][0..local_encodings_byte_count], + ); + if (local_index >= local_encodings.len) return error.InvalidDebugInfo; + break :entry .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; + }, + else => return error.InvalidDebugInfo, + }; + + if (entry.raw_encoding == 0) return error.MissingDebugInfo; + + const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnsupportedDebugInfo, + .RBP_FRAME => ip: { + const frame = encoding.value.x86_64.frame; + + const fp = (try dwarfRegNative(&context.cpu_state, fp_reg_num)).*; + const new_sp = fp + 2 * @sizeOf(usize); + + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try dwarfRegNative(&context.cpu_state, fp_reg_num)).* = new_fp; + (try dwarfRegNative(&context.cpu_state, sp_reg_num)).* = new_sp; + (try dwarfRegNative(&context.cpu_state, ip_reg_num)).* = new_ip; + + const regs: [5]u3 = .{ + frame.reg0, + frame.reg1, + frame.reg2, + frame.reg3, + frame.reg4, + }; + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame.frame_offset * @sizeOf(usize) + i * @sizeOf(usize); + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(reg); + (try dwarfRegNative(&context.cpu_state, reg_number)).* = @as(*const usize, @ptrFromInt(addr)).*; + } + + break :ip new_ip; + }, + .STACK_IMMD, + .STACK_IND, + => ip: { + const frameless = encoding.value.x86_64.frameless; + + const sp = (try dwarfRegNative(&context.cpu_state, sp_reg_num)).*; + const stack_size: usize = stack_size: { + if (encoding.mode.x86_64 == .STACK_IMMD) { + break :stack_size @as(usize, frameless.stack.direct.stack_size) * @sizeOf(usize); + } + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + module.text_base + + entry.function_offset + + frameless.stack.indirect.sub_offset; + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, frameless.stack.indirect.stack_adjust); + }; + + // Decode the Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h + + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = frameless.stack_reg_count; + const ip_ptr = ip_ptr: { + var digits: [6]u3 = undefined; + var accumulator: usize = frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + var registers: [6]u3 = undefined; + var used_indices: [6]bool = @splat(false); + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + registers[i] = @intCast(unused_index + 1); + used_indices[unused_index] = true; + } + + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); + for (0..reg_count) |i| { + const reg_number = try Dwarf.compactUnwindToDwarfRegNumber(registers[i]); + (try dwarfRegNative(&context.cpu_state, reg_number)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :ip_ptr reg_addr; + }; + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + + (try dwarfRegNative(&context.cpu_state, sp_reg_num)).* = new_sp; + (try dwarfRegNative(&context.cpu_state, ip_reg_num)).* = new_ip; + + break :ip new_ip; + }, + .DWARF => { + const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); + const rules = try context.computeRules(gpa, dwarf, unwind.vmaddr_slide, encoding.value.x86_64.dwarf); + return context.next(gpa, &rules); + }, + }, + .aarch64, .aarch64_be => switch (encoding.mode.arm64) { + .OLD => return error.UnsupportedDebugInfo, + .FRAMELESS => ip: { + const sp = (try dwarfRegNative(&context.cpu_state, sp_reg_num)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try dwarfRegNative(&context.cpu_state, 30)).*; + (try dwarfRegNative(&context.cpu_state, sp_reg_num)).* = new_sp; + break :ip new_ip; + }, + .DWARF => { + const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); + const rules = try context.computeRules(gpa, dwarf, unwind.vmaddr_slide, encoding.value.arm64.dwarf); + return context.next(gpa, &rules); + }, + .FRAME => ip: { + const frame = encoding.value.arm64.frame; + + const fp = (try dwarfRegNative(&context.cpu_state, fp_reg_num)).*; + const ip_ptr = fp + @sizeOf(usize); + + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(frame.x_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.x_reg_pairs, field.name) != 0) { + (try dwarfRegNative(&context.cpu_state, 19 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try dwarfRegNative(&context.cpu_state, 20 + i)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } + + inline for (@typeInfo(@TypeOf(frame.d_reg_pairs)).@"struct".fields, 0..) |field, i| { + if (@field(frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + { + const dest: *align(1) usize = @ptrCast(try context.cpu_state.dwarfRegisterBytes(64 + 8 + i)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + { + const dest: *align(1) usize = @ptrCast(try context.cpu_state.dwarfRegisterBytes(64 + 9 + i)); + dest.* = @as(*const usize, @ptrFromInt(reg_addr)).*; + } + reg_addr += @sizeOf(usize); + } + } + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try dwarfRegNative(&context.cpu_state, fp_reg_num)).* = new_fp; + (try dwarfRegNative(&context.cpu_state, ip_reg_num)).* = new_ip; + + break :ip new_ip; + }, + }, + else => comptime unreachable, // unimplemented + }; + + const ret_addr = std.debug.stripInstructionPtrAuthCode(new_ip); + + // Like `Dwarf.SelfUnwinder.next`, adjust our next lookup pc in case the `call` was this + // function's last instruction making `ret_addr` one byte past its end. + context.pc = ret_addr -| 1; + + return ret_addr; +} + +/// Acquires the mutex on success. +fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) Error!*Module { + var info: std.c.dl_info = undefined; + if (std.c.dladdr(@ptrFromInt(address), &info) == 0) { + return error.MissingDebugInfo; + } + si.mutex.lock(); + errdefer si.mutex.unlock(); + const gop = try si.modules.getOrPutAdapted(gpa, @intFromPtr(info.fbase), Module.Adapter{}); + errdefer comptime unreachable; + if (!gop.found_existing) { + gop.key_ptr.* = .{ + .text_base = @intFromPtr(info.fbase), + .name = std.mem.span(info.fname), + .unwind = null, + .loaded_macho = null, + }; + } + return gop.key_ptr; +} + +const Module = struct { + text_base: usize, + name: []const u8, + unwind: ?(Error!Unwind), + loaded_macho: ?(Error!LoadedMachO), + + const Adapter = struct { + pub fn hash(_: Adapter, text_base: usize) u32 { + return @truncate(std.hash.int(text_base)); + } + pub fn eql(_: Adapter, a_text_base: usize, b_module: Module, b_index: usize) bool { + _ = b_index; + return a_text_base == b_module.text_base; + } + }; + const Context = struct { + pub fn hash(_: Context, module: Module) u32 { + return @truncate(std.hash.int(module.text_base)); + } + pub fn eql(_: Context, a_module: Module, b_module: Module, b_index: usize) bool { + _ = b_index; + return a_module.text_base == b_module.text_base; + } + }; + + const Unwind = struct { + /// The slide applied to the `__unwind_info` and `__eh_frame` sections. + /// So, `unwind_info.ptr` is this many bytes higher than the section's vmaddr. + vmaddr_slide: u64, + /// Backed by the in-memory section mapped by the loader. + unwind_info: ?[]const u8, + /// Backed by the in-memory `__eh_frame` section mapped by the loader. + dwarf: ?Dwarf.Unwind, + }; + + const LoadedMachO = struct { + mapped_memory: []align(std.heap.page_size_min) const u8, + symbols: []const MachoSymbol, + strings: []const u8, + /// This is not necessarily the same as the vmaddr_slide that dyld would report. This is + /// because the segments in the file on disk might differ from the ones in memory. Normally + /// we wouldn't necessarily expect that to work, but /usr/lib/dyld is incredibly annoying: + /// it exists on disk (necessarily, because the kernel needs to load it!), but is also in + /// the dyld cache (dyld actually restart itself from cache after loading it), and the two + /// versions have (very) different segment base addresses. It's sort of like a large slide + /// has been applied to all addresses in memory. For an optimal experience, we consider the + /// on-disk vmaddr instead of the in-memory one. + vaddr_offset: usize, + }; + + fn getUnwindInfo(module: *Module, gpa: Allocator) Error!*Unwind { + if (module.unwind == null) module.unwind = loadUnwindInfo(module, gpa); + return if (module.unwind.?) |*unwind| unwind else |err| err; + } + fn loadUnwindInfo(module: *const Module, gpa: Allocator) Error!Unwind { + const header: *std.macho.mach_header = @ptrFromInt(module.text_base); + + var it: macho.LoadCommandIterator = .{ + .ncmds = header.ncmds, + .buffer = @as([*]u8, @ptrCast(header))[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + const sections, const text_vmaddr = while (it.next()) |load_cmd| { + if (load_cmd.cmd() != .SEGMENT_64) continue; + const segment_cmd = load_cmd.cast(macho.segment_command_64).?; + if (!mem.eql(u8, segment_cmd.segName(), "__TEXT")) continue; + break .{ load_cmd.getSections(), segment_cmd.vmaddr }; + } else unreachable; + + const vmaddr_slide = module.text_base - text_vmaddr; + + var opt_unwind_info: ?[]const u8 = null; + var opt_eh_frame: ?[]const u8 = null; + for (sections) |sect| { + if (mem.eql(u8, sect.sectName(), "__unwind_info")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); + opt_unwind_info = sect_ptr[0..@intCast(sect.size)]; + } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { + const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); + opt_eh_frame = sect_ptr[0..@intCast(sect.size)]; + } + } + const eh_frame = opt_eh_frame orelse return .{ + .vmaddr_slide = vmaddr_slide, + .unwind_info = opt_unwind_info, + .dwarf = null, + }; + var dwarf: Dwarf.Unwind = .initSection(.eh_frame, @intFromPtr(eh_frame.ptr) - vmaddr_slide, eh_frame); + errdefer dwarf.deinit(gpa); + // We don't need lookups, so this call is just for scanning CIEs. + dwarf.prepare(gpa, @sizeOf(usize), native_endian, false, true) catch |err| switch (err) { + error.ReadFailed => unreachable, // it's all fixed buffers + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + error.InvalidOperand, + error.InvalidOpcode, + error.InvalidOperation, + => return error.InvalidDebugInfo, + error.UnsupportedAddrSize, + error.UnsupportedDwarfVersion, + error.UnimplementedUserOpcode, + => return error.UnsupportedDebugInfo, + }; + + return .{ + .vmaddr_slide = vmaddr_slide, + .unwind_info = opt_unwind_info, + .dwarf = dwarf, + }; + } + + fn getLoadedMachO(module: *Module, gpa: Allocator) Error!*LoadedMachO { + if (module.loaded_macho == null) module.loaded_macho = loadMachO(module, gpa) catch |err| switch (err) { + error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory, error.Unexpected => |e| e, + else => error.ReadFailed, + }; + return if (module.loaded_macho.?) |*lm| lm else |err| err; + } + fn loadMachO(module: *const Module, gpa: Allocator) Error!LoadedMachO { + const all_mapped_memory = try mapDebugInfoFile(module.name); + errdefer posix.munmap(all_mapped_memory); + + // In most cases, the file we just mapped is a Mach-O binary. However, it could be a "universal + // binary": a simple file format which contains Mach-O binaries for multiple targets. For + // instance, `/usr/lib/dyld` is currently distributed as a universal binary containing images + // for both ARM64 macOS and x86_64 macOS. + if (all_mapped_memory.len < 4) return error.InvalidDebugInfo; + const magic = @as(*const u32, @ptrCast(all_mapped_memory.ptr)).*; + // The contents of a Mach-O file, which may or may not be the whole of `all_mapped_memory`. + const mapped_macho = switch (magic) { + macho.MH_MAGIC_64 => all_mapped_memory, + + macho.FAT_CIGAM => mapped_macho: { + // This is the universal binary format (aka a "fat binary"). Annoyingly, the whole thing + // is big-endian, so we'll be swapping some bytes. + if (all_mapped_memory.len < @sizeOf(macho.fat_header)) return error.InvalidDebugInfo; + const hdr: *const macho.fat_header = @ptrCast(all_mapped_memory.ptr); + const archs_ptr: [*]const macho.fat_arch = @ptrCast(all_mapped_memory.ptr + @sizeOf(macho.fat_header)); + const archs: []const macho.fat_arch = archs_ptr[0..@byteSwap(hdr.nfat_arch)]; + const native_cpu_type = switch (builtin.cpu.arch) { + .x86_64 => macho.CPU_TYPE_X86_64, + .aarch64 => macho.CPU_TYPE_ARM64, + else => comptime unreachable, + }; + for (archs) |*arch| { + if (@byteSwap(arch.cputype) != native_cpu_type) continue; + const offset = @byteSwap(arch.offset); + const size = @byteSwap(arch.size); + break :mapped_macho all_mapped_memory[offset..][0..size]; + } + // Our native architecture was not present in the fat binary. + return error.MissingDebugInfo; + }, + + // Even on modern 64-bit targets, this format doesn't seem to be too extensively used. It + // will be fairly easy to add support here if necessary; it's very similar to above. + macho.FAT_CIGAM_64 => return error.UnsupportedDebugInfo, + + else => return error.InvalidDebugInfo, + }; + + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_macho.ptr)); + if (hdr.magic != macho.MH_MAGIC_64) + return error.InvalidDebugInfo; + + const symtab: macho.symtab_command, const text_vmaddr: u64 = lc_iter: { + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_macho[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + var symtab: ?macho.symtab_command = null; + var text_vmaddr: ?u64 = null; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => symtab = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + .SEGMENT_64 => if (cmd.cast(macho.segment_command_64)) |seg_cmd| { + if (!mem.eql(u8, seg_cmd.segName(), "__TEXT")) continue; + text_vmaddr = seg_cmd.vmaddr; + }, + else => {}, + }; + break :lc_iter .{ + symtab orelse return error.MissingDebugInfo, + text_vmaddr orelse return error.MissingDebugInfo, + }; + }; + + const syms_ptr: [*]align(1) const macho.nlist_64 = @ptrCast(mapped_macho[symtab.symoff..]); + const syms = syms_ptr[0..symtab.nsyms]; + const strings = mapped_macho[symtab.stroff..][0 .. symtab.strsize - 1]; + + var symbols: std.ArrayList(MachoSymbol) = try .initCapacity(gpa, syms.len); + defer symbols.deinit(gpa); + + // This map is temporary; it is used only to detect duplicates here. This is + // necessary because we prefer to use STAB ("symbolic debugging table") symbols, + // but they might not be present, so we track normal symbols too. + // Indices match 1-1 with those of `symbols`. + var symbol_names: std.StringArrayHashMapUnmanaged(void) = .empty; + defer symbol_names.deinit(gpa); + try symbol_names.ensureUnusedCapacity(gpa, syms.len); + + var ofile: u32 = undefined; + var last_sym: MachoSymbol = undefined; + var state: enum { + init, + oso_open, + oso_close, + bnsym, + fun_strx, + fun_size, + ensym, + } = .init; + + for (syms) |*sym| { + if (sym.n_type.bits.is_stab == 0) { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf, .pbud, .indr, .abs, _ => continue, + .sect => { + const name = std.mem.sliceTo(strings[sym.n_strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(.{ + .strx = sym.n_strx, + .addr = sym.n_value, + .ofile = MachoSymbol.unknown_ofile, + }); + } + }, + } + continue; + } + + // TODO handle globals N_GSYM, and statics N_STSYM + switch (sym.n_type.stab) { + .oso => switch (state) { + .init, .oso_close => { + state = .oso_open; + ofile = sym.n_strx; + }, + else => return error.InvalidDebugInfo, + }, + .bnsym => switch (state) { + .oso_open, .ensym => { + state = .bnsym; + last_sym = .{ + .strx = 0, + .addr = sym.n_value, + .ofile = ofile, + }; + }, + else => return error.InvalidDebugInfo, + }, + .fun => switch (state) { + .bnsym => { + state = .fun_strx; + last_sym.strx = sym.n_strx; + }, + .fun_strx => { + state = .fun_size; + }, + else => return error.InvalidDebugInfo, + }, + .ensym => switch (state) { + .fun_size => { + state = .ensym; + if (last_sym.strx != 0) { + const name = std.mem.sliceTo(strings[last_sym.strx..], 0); + const gop = symbol_names.getOrPutAssumeCapacity(name); + if (!gop.found_existing) { + assert(gop.index == symbols.items.len); + symbols.appendAssumeCapacity(last_sym); + } else { + symbols.items[gop.index] = last_sym; + } + } + }, + else => return error.InvalidDebugInfo, + }, + .so => switch (state) { + .init, .oso_close => {}, + .oso_open, .ensym => { + state = .oso_close; + }, + else => return error.InvalidDebugInfo, + }, + else => {}, + } + } + + switch (state) { + .init => { + // Missing STAB symtab entries is still okay, unless there were also no normal symbols. + if (symbols.items.len == 0) return error.MissingDebugInfo; + }, + .oso_close => {}, + else => return error.InvalidDebugInfo, // corrupted STAB entries in symtab + } + + const symbols_slice = try symbols.toOwnedSlice(gpa); + errdefer gpa.free(symbols_slice); + + // Even though lld emits symbols in ascending order, this debug code + // should work for programs linked in any valid way. + // This sort is so that we can binary search later. + mem.sort(MachoSymbol, symbols_slice, {}, MachoSymbol.addressLessThan); + + return .{ + .mapped_memory = all_mapped_memory, + .symbols = symbols_slice, + .strings = strings, + .vaddr_offset = module.text_base - text_vmaddr, + }; + } +}; + +const OFile = struct { + mapped_memory: []align(std.heap.page_size_min) const u8, + dwarf: Dwarf, + strtab: []const u8, + symtab: []align(1) const macho.nlist_64, + /// All named symbols in `symtab`. Stored `u32` key is the index into `symtab`. Accessed + /// through `SymbolAdapter`, so that the symbol name is used as the logical key. + symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true), + + const SymbolAdapter = struct { + strtab: []const u8, + symtab: []align(1) const macho.nlist_64, + pub fn hash(ctx: SymbolAdapter, sym_name: []const u8) u32 { + _ = ctx; + return @truncate(std.hash.Wyhash.hash(0, sym_name)); + } + pub fn eql(ctx: SymbolAdapter, a_sym_name: []const u8, b_sym_index: u32, b_index: usize) bool { + _ = b_index; + const b_sym = ctx.symtab[b_sym_index]; + const b_sym_name = std.mem.sliceTo(ctx.strtab[b_sym.n_strx..], 0); + return mem.eql(u8, a_sym_name, b_sym_name); + } + }; +}; + +const MachoSymbol = struct { + strx: u32, + addr: u64, + /// Value may be `unknown_ofile`. + ofile: u32, + const unknown_ofile = std.math.maxInt(u32); + fn addressLessThan(context: void, lhs: MachoSymbol, rhs: MachoSymbol) bool { + _ = context; + return lhs.addr < rhs.addr; + } + /// Assumes that `symbols` is sorted in order of ascending `addr`. + fn find(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { + if (symbols.len == 0) return null; // no potential match + if (address < symbols[0].addr) return null; // address is before the lowest-address symbol + var left: usize = 0; + var len: usize = symbols.len; + while (len > 1) { + const mid = left + len / 2; + if (address < symbols[mid].addr) { + len /= 2; + } else { + left = mid; + len -= len / 2; + } + } + return &symbols[left]; + } + + test find { + const symbols: []const MachoSymbol = &.{ + .{ .addr = 100, .strx = undefined, .ofile = undefined }, + .{ .addr = 200, .strx = undefined, .ofile = undefined }, + .{ .addr = 300, .strx = undefined, .ofile = undefined }, + }; + + try testing.expectEqual(null, find(symbols, 0)); + try testing.expectEqual(null, find(symbols, 99)); + try testing.expectEqual(&symbols[0], find(symbols, 100).?); + try testing.expectEqual(&symbols[0], find(symbols, 150).?); + try testing.expectEqual(&symbols[0], find(symbols, 199).?); + + try testing.expectEqual(&symbols[1], find(symbols, 200).?); + try testing.expectEqual(&symbols[1], find(symbols, 250).?); + try testing.expectEqual(&symbols[1], find(symbols, 299).?); + + try testing.expectEqual(&symbols[2], find(symbols, 300).?); + try testing.expectEqual(&symbols[2], find(symbols, 301).?); + try testing.expectEqual(&symbols[2], find(symbols, 5000).?); + } +}; +test { + _ = MachoSymbol; +} + +/// Uses `mmap` to map the file at `path` into memory. +fn mapDebugInfoFile(path: []const u8) ![]align(std.heap.page_size_min) const u8 { + const file = std.fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return error.MissingDebugInfo, + else => return error.ReadFailed, + }; + defer file.close(); + + const file_end_pos = file.getEndPos() catch |err| switch (err) { + error.Unexpected => |e| return e, + else => return error.ReadFailed, + }; + const file_len = std.math.cast(usize, file_end_pos) orelse return error.InvalidDebugInfo; + + return posix.mmap( + null, + file_len, + posix.PROT.READ, + .{ .TYPE = .SHARED }, + file.handle, + 0, + ) catch |err| switch (err) { + error.Unexpected => |e| return e, + else => return error.ReadFailed, + }; +} + +fn loadOFile(gpa: Allocator, o_file_path: []const u8) !OFile { + const mapped_mem = try mapDebugInfoFile(o_file_path); + errdefer posix.munmap(mapped_mem); + + if (mapped_mem.len < @sizeOf(macho.mach_header_64)) return error.InvalidDebugInfo; + const hdr: *const macho.mach_header_64 = @ptrCast(@alignCast(mapped_mem.ptr)); + if (hdr.magic != std.macho.MH_MAGIC_64) return error.InvalidDebugInfo; + + const seg_cmd: macho.LoadCommandIterator.LoadCommand, const symtab_cmd: macho.symtab_command = cmds: { + var seg_cmd: ?macho.LoadCommandIterator.LoadCommand = null; + var symtab_cmd: ?macho.symtab_command = null; + var it: macho.LoadCommandIterator = .{ + .ncmds = hdr.ncmds, + .buffer = mapped_mem[@sizeOf(macho.mach_header_64)..][0..hdr.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => seg_cmd = cmd, + .SYMTAB => symtab_cmd = cmd.cast(macho.symtab_command) orelse return error.InvalidDebugInfo, + else => {}, + }; + break :cmds .{ + seg_cmd orelse return error.MissingDebugInfo, + symtab_cmd orelse return error.MissingDebugInfo, + }; + }; + + if (mapped_mem.len < symtab_cmd.stroff + symtab_cmd.strsize) return error.InvalidDebugInfo; + if (mapped_mem[symtab_cmd.stroff + symtab_cmd.strsize - 1] != 0) return error.InvalidDebugInfo; + const strtab = mapped_mem[symtab_cmd.stroff..][0 .. symtab_cmd.strsize - 1]; + + const n_sym_bytes = symtab_cmd.nsyms * @sizeOf(macho.nlist_64); + if (mapped_mem.len < symtab_cmd.symoff + n_sym_bytes) return error.InvalidDebugInfo; + const symtab: []align(1) const macho.nlist_64 = @ptrCast(mapped_mem[symtab_cmd.symoff..][0..n_sym_bytes]); + + // TODO handle tentative (common) symbols + var symbols_by_name: std.ArrayHashMapUnmanaged(u32, void, void, true) = .empty; + defer symbols_by_name.deinit(gpa); + try symbols_by_name.ensureUnusedCapacity(gpa, @intCast(symtab.len)); + for (symtab, 0..) |sym, sym_index| { + if (sym.n_strx == 0) continue; + switch (sym.n_type.bits.type) { + .undf => continue, // includes tentative symbols + .abs => continue, + else => {}, + } + const sym_name = mem.sliceTo(strtab[sym.n_strx..], 0); + const gop = symbols_by_name.getOrPutAssumeCapacityAdapted( + @as([]const u8, sym_name), + @as(OFile.SymbolAdapter, .{ .strtab = strtab, .symtab = symtab }), + ); + if (gop.found_existing) return error.InvalidDebugInfo; + gop.key_ptr.* = @intCast(sym_index); + } + + var sections: Dwarf.SectionArray = @splat(null); + for (seg_cmd.getSections()) |sect| { + if (!std.mem.eql(u8, "__DWARF", sect.segName())) continue; + + const section_index: usize = inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + if (mem.eql(u8, "__" ++ section.name, sect.sectName())) break i; + } else continue; + + if (mapped_mem.len < sect.offset + sect.size) return error.InvalidDebugInfo; + const section_bytes = mapped_mem[sect.offset..][0..sect.size]; + sections[section_index] = .{ + .data = section_bytes, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + if (missing_debug_info) return error.MissingDebugInfo; + + var dwarf: Dwarf = .{ .sections = sections }; + errdefer dwarf.deinit(gpa); + try dwarf.open(gpa, native_endian); + + return .{ + .mapped_memory = mapped_mem, + .dwarf = dwarf, + .strtab = strtab, + .symtab = symtab, + .symbols_by_name = symbols_by_name.move(), + }; +} + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const Error = std.debug.SelfInfoError; +const assert = std.debug.assert; +const posix = std.posix; +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; +const dwarfRegNative = std.debug.Dwarf.SelfUnwinder.regNative; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); + +const SelfInfo = @This(); diff --git a/lib/std/debug/SelfInfo/Elf.zig b/lib/std/debug/SelfInfo/Elf.zig new file mode 100644 index 0000000000..4f9389f2d5 --- /dev/null +++ b/lib/std/debug/SelfInfo/Elf.zig @@ -0,0 +1,427 @@ +rwlock: std.Thread.RwLock, + +modules: std.ArrayList(Module), +ranges: std.ArrayList(Module.Range), + +unwind_cache: if (can_unwind) ?[]Dwarf.SelfUnwinder.CacheEntry else ?noreturn, + +pub const init: SelfInfo = .{ + .rwlock = .{}, + .modules = .empty, + .ranges = .empty, + .unwind_cache = null, +}; +pub fn deinit(si: *SelfInfo, gpa: Allocator) void { + for (si.modules.items) |*mod| { + unwind: { + const u = &(mod.unwind orelse break :unwind catch break :unwind); + for (u.buf[0..u.len]) |*unwind| unwind.deinit(gpa); + } + loaded: { + const l = &(mod.loaded_elf orelse break :loaded catch break :loaded); + l.file.deinit(gpa); + } + } + + si.modules.deinit(gpa); + si.ranges.deinit(gpa); + if (si.unwind_cache) |cache| gpa.free(cache); +} + +pub fn getSymbol(si: *SelfInfo, gpa: Allocator, address: usize) Error!std.debug.Symbol { + const module = try si.findModule(gpa, address, .exclusive); + defer si.rwlock.unlock(); + + const vaddr = address - module.load_offset; + + const loaded_elf = try module.getLoadedElf(gpa); + if (loaded_elf.file.dwarf) |*dwarf| { + if (!loaded_elf.scanned_dwarf) { + dwarf.open(gpa, native_endian) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.ReadFailed, + error.StreamTooLong, + => return error.InvalidDebugInfo, + }; + loaded_elf.scanned_dwarf = true; + } + if (dwarf.getSymbol(gpa, native_endian, vaddr)) |sym| { + return sym; + } else |err| switch (err) { + error.MissingDebugInfo => {}, + + error.InvalidDebugInfo, + error.OutOfMemory, + => |e| return e, + + error.ReadFailed, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + => return error.InvalidDebugInfo, + } + } + // When DWARF is unavailable, fall back to searching the symtab. + return loaded_elf.file.searchSymtab(gpa, vaddr) catch |err| switch (err) { + error.NoSymtab, error.NoStrtab => return error.MissingDebugInfo, + error.BadSymtab => return error.InvalidDebugInfo, + error.OutOfMemory => |e| return e, + }; +} +pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { + const module = try si.findModule(gpa, address, .shared); + defer si.rwlock.unlockShared(); + if (module.name.len == 0) return error.MissingDebugInfo; + return module.name; +} + +pub const can_unwind: bool = s: { + // Notably, we are yet to support unwinding on ARM. There, unwinding is not done through + // `.eh_frame`, but instead with the `.ARM.exidx` section, which has a different format. + const archs: []const std.Target.Cpu.Arch = switch (builtin.target.os.tag) { + .linux => &.{ .x86, .x86_64, .aarch64, .aarch64_be }, + .netbsd => &.{ .x86, .x86_64, .aarch64, .aarch64_be }, + .freebsd => &.{ .x86_64, .aarch64, .aarch64_be }, + .openbsd => &.{.x86_64}, + .solaris => &.{ .x86, .x86_64 }, + .illumos => &.{ .x86, .x86_64 }, + else => unreachable, + }; + for (archs) |a| { + if (builtin.target.cpu.arch == a) break :s true; + } + break :s false; +}; +comptime { + if (can_unwind) { + std.debug.assert(Dwarf.supportsUnwinding(&builtin.target)); + } +} +pub const UnwindContext = Dwarf.SelfUnwinder; +pub fn unwindFrame(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { + comptime assert(can_unwind); + + { + si.rwlock.lockShared(); + defer si.rwlock.unlockShared(); + if (si.unwind_cache) |cache| { + if (Dwarf.SelfUnwinder.CacheEntry.find(cache, context.pc)) |entry| { + return context.next(gpa, entry); + } + } + } + + const module = try si.findModule(gpa, context.pc, .exclusive); + defer si.rwlock.unlock(); + + if (si.unwind_cache == null) { + si.unwind_cache = try gpa.alloc(Dwarf.SelfUnwinder.CacheEntry, 2048); + @memset(si.unwind_cache.?, .empty); + } + + const unwind_sections = try module.getUnwindSections(gpa); + for (unwind_sections) |*unwind| { + if (context.computeRules(gpa, unwind, module.load_offset, null)) |entry| { + entry.populate(si.unwind_cache.?); + return context.next(gpa, &entry); + } else |err| switch (err) { + error.MissingDebugInfo => continue, + + error.InvalidDebugInfo, + error.UnsupportedDebugInfo, + error.OutOfMemory, + => |e| return e, + + error.EndOfStream, + error.StreamTooLong, + error.ReadFailed, + error.Overflow, + error.InvalidOpcode, + error.InvalidOperation, + error.InvalidOperand, + => return error.InvalidDebugInfo, + + error.UnimplementedUserOpcode, + error.UnsupportedAddrSize, + => return error.UnsupportedDebugInfo, + } + } + return error.MissingDebugInfo; +} + +const Module = struct { + load_offset: usize, + name: []const u8, + build_id: ?[]const u8, + gnu_eh_frame: ?[]const u8, + + /// `null` means unwind information has not yet been loaded. + unwind: ?(Error!UnwindSections), + + /// `null` means the ELF file has not yet been loaded. + loaded_elf: ?(Error!LoadedElf), + + const LoadedElf = struct { + file: std.debug.ElfFile, + scanned_dwarf: bool, + }; + + const UnwindSections = struct { + buf: [2]Dwarf.Unwind, + len: usize, + }; + + const Range = struct { + start: usize, + len: usize, + /// Index into `modules` + module_index: usize, + }; + + /// Assumes we already hold an exclusive lock. + fn getUnwindSections(mod: *Module, gpa: Allocator) Error![]Dwarf.Unwind { + if (mod.unwind == null) mod.unwind = loadUnwindSections(mod, gpa); + const us = &(mod.unwind.? catch |err| return err); + return us.buf[0..us.len]; + } + fn loadUnwindSections(mod: *Module, gpa: Allocator) Error!UnwindSections { + var us: UnwindSections = .{ + .buf = undefined, + .len = 0, + }; + if (mod.gnu_eh_frame) |section_bytes| { + const section_vaddr: u64 = @intFromPtr(section_bytes.ptr) - mod.load_offset; + const header = Dwarf.Unwind.EhFrameHeader.parse(section_vaddr, section_bytes, @sizeOf(usize), native_endian) catch |err| switch (err) { + error.ReadFailed => unreachable, // it's all fixed buffers + error.InvalidDebugInfo => |e| return e, + error.EndOfStream, error.Overflow => return error.InvalidDebugInfo, + error.UnsupportedAddrSize => return error.UnsupportedDebugInfo, + }; + us.buf[us.len] = .initEhFrameHdr(header, section_vaddr, @ptrFromInt(@as(usize, @intCast(mod.load_offset + header.eh_frame_vaddr)))); + us.len += 1; + } else { + // There is no `.eh_frame_hdr` section. There may still be an `.eh_frame` or `.debug_frame` + // section, but we'll have to load the binary to get at it. + const loaded = try mod.getLoadedElf(gpa); + // If both are present, we can't just pick one -- the info could be split between them. + // `.debug_frame` is likely to be the more complete section, so we'll prioritize that one. + if (loaded.file.debug_frame) |*debug_frame| { + us.buf[us.len] = .initSection(.debug_frame, debug_frame.vaddr, debug_frame.bytes); + us.len += 1; + } + if (loaded.file.eh_frame) |*eh_frame| { + us.buf[us.len] = .initSection(.eh_frame, eh_frame.vaddr, eh_frame.bytes); + us.len += 1; + } + } + errdefer for (us.buf[0..us.len]) |*u| u.deinit(gpa); + for (us.buf[0..us.len]) |*u| u.prepare(gpa, @sizeOf(usize), native_endian, true, false) catch |err| switch (err) { + error.ReadFailed => unreachable, // it's all fixed buffers + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + error.InvalidOperand, + error.InvalidOpcode, + error.InvalidOperation, + => return error.InvalidDebugInfo, + error.UnsupportedAddrSize, + error.UnsupportedDwarfVersion, + error.UnimplementedUserOpcode, + => return error.UnsupportedDebugInfo, + }; + return us; + } + + /// Assumes we already hold an exclusive lock. + fn getLoadedElf(mod: *Module, gpa: Allocator) Error!*LoadedElf { + if (mod.loaded_elf == null) mod.loaded_elf = loadElf(mod, gpa); + return if (mod.loaded_elf.?) |*elf| elf else |err| err; + } + fn loadElf(mod: *Module, gpa: Allocator) Error!LoadedElf { + const load_result = if (mod.name.len > 0) res: { + var file = std.fs.cwd().openFile(mod.name, .{}) catch return error.MissingDebugInfo; + defer file.close(); + break :res std.debug.ElfFile.load(gpa, file, mod.build_id, &.native(mod.name)); + } else res: { + const path = std.fs.selfExePathAlloc(gpa) catch |err| switch (err) { + error.OutOfMemory => |e| return e, + else => return error.ReadFailed, + }; + defer gpa.free(path); + var file = std.fs.cwd().openFile(path, .{}) catch return error.MissingDebugInfo; + defer file.close(); + break :res std.debug.ElfFile.load(gpa, file, mod.build_id, &.native(path)); + }; + + var elf_file = load_result catch |err| switch (err) { + error.OutOfMemory, + error.Unexpected, + => |e| return e, + + error.Overflow, + error.TruncatedElfFile, + error.InvalidCompressedSection, + error.InvalidElfMagic, + error.InvalidElfVersion, + error.InvalidElfClass, + error.InvalidElfEndian, + => return error.InvalidDebugInfo, + + error.SystemResources, + error.MemoryMappingNotSupported, + error.AccessDenied, + error.LockedMemoryLimitExceeded, + error.ProcessFdQuotaExceeded, + error.SystemFdQuotaExceeded, + => return error.ReadFailed, + }; + errdefer elf_file.deinit(gpa); + + if (elf_file.endian != native_endian) return error.InvalidDebugInfo; + if (elf_file.is_64 != (@sizeOf(usize) == 8)) return error.InvalidDebugInfo; + + return .{ + .file = elf_file, + .scanned_dwarf = false, + }; + } +}; + +fn findModule(si: *SelfInfo, gpa: Allocator, address: usize, lock: enum { shared, exclusive }) Error!*Module { + // With the requested lock, scan the module ranges looking for `address`. + switch (lock) { + .shared => si.rwlock.lockShared(), + .exclusive => si.rwlock.lock(), + } + for (si.ranges.items) |*range| { + if (address >= range.start and address < range.start + range.len) { + return &si.modules.items[range.module_index]; + } + } + // The address wasn't in a known range. We will rebuild the module/range lists, since it's possible + // a new module was loaded. Upgrade to an exclusive lock if necessary. + switch (lock) { + .shared => { + si.rwlock.unlockShared(); + si.rwlock.lock(); + }, + .exclusive => {}, + } + // Rebuild module list with the exclusive lock. + { + errdefer si.rwlock.unlock(); + for (si.modules.items) |*mod| { + unwind: { + const u = &(mod.unwind orelse break :unwind catch break :unwind); + for (u.buf[0..u.len]) |*unwind| unwind.deinit(gpa); + } + loaded: { + const l = &(mod.loaded_elf orelse break :loaded catch break :loaded); + l.file.deinit(gpa); + } + } + si.modules.clearRetainingCapacity(); + si.ranges.clearRetainingCapacity(); + var ctx: DlIterContext = .{ .si = si, .gpa = gpa }; + try std.posix.dl_iterate_phdr(&ctx, error{OutOfMemory}, DlIterContext.callback); + } + // Downgrade the lock back to shared if necessary. + switch (lock) { + .shared => { + si.rwlock.unlock(); + si.rwlock.lockShared(); + }, + .exclusive => {}, + } + // Scan the newly rebuilt module ranges. + for (si.ranges.items) |*range| { + if (address >= range.start and address < range.start + range.len) { + return &si.modules.items[range.module_index]; + } + } + // Still nothing; unlock and error. + switch (lock) { + .shared => si.rwlock.unlockShared(), + .exclusive => si.rwlock.unlock(), + } + return error.MissingDebugInfo; +} +const DlIterContext = struct { + si: *SelfInfo, + gpa: Allocator, + + fn callback(info: *std.posix.dl_phdr_info, size: usize, context: *@This()) !void { + _ = size; + + var build_id: ?[]const u8 = null; + var gnu_eh_frame: ?[]const u8 = null; + + // Populate `build_id` and `gnu_eh_frame` + for (info.phdr[0..info.phnum]) |phdr| { + switch (phdr.p_type) { + std.elf.PT_NOTE => { + // Look for .note.gnu.build-id + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); + const name_size = r.takeInt(u32, native_endian) catch continue; + const desc_size = r.takeInt(u32, native_endian) catch continue; + const note_type = r.takeInt(u32, native_endian) catch continue; + const name = r.take(name_size) catch continue; + if (note_type != std.elf.NT_GNU_BUILD_ID) continue; + if (!std.mem.eql(u8, name, "GNU\x00")) continue; + const desc = r.take(desc_size) catch continue; + build_id = desc; + }, + std.elf.PT_GNU_EH_FRAME => { + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; + }, + else => {}, + } + } + + const gpa = context.gpa; + const si = context.si; + + const module_index = si.modules.items.len; + try si.modules.append(gpa, .{ + .load_offset = info.addr, + // Android libc uses NULL instead of "" to mark the main program + .name = std.mem.sliceTo(info.name, 0) orelse "", + .build_id = build_id, + .gnu_eh_frame = gnu_eh_frame, + .unwind = null, + .loaded_elf = null, + }); + + for (info.phdr[0..info.phnum]) |phdr| { + if (phdr.p_type != std.elf.PT_LOAD) continue; + try context.si.ranges.append(gpa, .{ + // Overflowing addition handles VSDOs having p_vaddr = 0xffffffffff700000 + .start = info.addr +% phdr.p_vaddr, + .len = phdr.p_memsz, + .module_index = module_index, + }); + } + } +}; + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const Error = std.debug.SelfInfoError; +const assert = std.debug.assert; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); + +const SelfInfo = @This(); diff --git a/lib/std/debug/SelfInfo/Windows.zig b/lib/std/debug/SelfInfo/Windows.zig new file mode 100644 index 0000000000..ffa99a27f2 --- /dev/null +++ b/lib/std/debug/SelfInfo/Windows.zig @@ -0,0 +1,559 @@ +mutex: std.Thread.Mutex, +modules: std.ArrayListUnmanaged(Module), +module_name_arena: std.heap.ArenaAllocator.State, + +pub const init: SelfInfo = .{ + .mutex = .{}, + .modules = .empty, + .module_name_arena = .{}, +}; +pub fn deinit(si: *SelfInfo, gpa: Allocator) void { + for (si.modules.items) |*module| { + di: { + const di = &(module.di orelse break :di catch break :di); + di.deinit(gpa); + } + } + si.modules.deinit(gpa); + + var module_name_arena = si.module_name_arena.promote(gpa); + module_name_arena.deinit(); +} + +pub fn getSymbol(si: *SelfInfo, gpa: Allocator, address: usize) Error!std.debug.Symbol { + si.mutex.lock(); + defer si.mutex.unlock(); + const module = try si.findModule(gpa, address); + const di = try module.getDebugInfo(gpa); + return di.getSymbol(gpa, address - module.base_address); +} +pub fn getModuleName(si: *SelfInfo, gpa: Allocator, address: usize) Error![]const u8 { + si.mutex.lock(); + defer si.mutex.unlock(); + const module = try si.findModule(gpa, address); + return module.name; +} + +pub const can_unwind: bool = switch (builtin.cpu.arch) { + else => true, + // On x86, `RtlVirtualUnwind` does not exist. We could in theory use `RtlCaptureStackBackTrace` + // instead, but on x86, it turns out that function is just... doing FP unwinding with esp! It's + // hard to find implementation details to confirm that, but the most authoritative source I have + // is an entry in the LLVM mailing list from 2020/08/16 which contains this quote: + // + // > x86 doesn't have what most architectures would consider an "unwinder" in the sense of + // > restoring registers; there is simply a linked list of frames that participate in SEH and + // > that desire to be called for a dynamic unwind operation, so RtlCaptureStackBackTrace + // > assumes that EBP-based frames are in use and walks an EBP-based frame chain on x86 - not + // > all x86 code is written with EBP-based frames so while even though we generally build the + // > OS that way, you might always run the risk of encountering external code that uses EBP as a + // > general purpose register for which such an unwind attempt for a stack trace would fail. + // + // Regardless, it's easy to effectively confirm this hypothesis just by compiling some code with + // `-fomit-frame-pointer -OReleaseFast` and observing that `RtlCaptureStackBackTrace` returns an + // empty trace when it's called in such an application. Note that without `-OReleaseFast` or + // similar, LLVM seems reluctant to ever clobber ebp, so you'll get a trace returned which just + // contains all of the kernel32/ntdll frames but none of your own. Don't be deceived---this is + // just coincidental! + // + // Anyway, the point is, the only stack walking primitive on x86-windows is FP unwinding. We + // *could* ask Microsoft to do that for us with `RtlCaptureStackBackTrace`... but better to just + // use our existing FP unwinder in `std.debug`! + .x86 => false, +}; +pub const UnwindContext = struct { + pc: usize, + cur: windows.CONTEXT, + history_table: windows.UNWIND_HISTORY_TABLE, + pub fn init(ctx: *const std.debug.cpu_context.Native) UnwindContext { + return .{ + .pc = @returnAddress(), + .cur = switch (builtin.cpu.arch) { + .x86_64 => std.mem.zeroInit(windows.CONTEXT, .{ + .Rax = ctx.gprs.get(.rax), + .Rcx = ctx.gprs.get(.rcx), + .Rdx = ctx.gprs.get(.rdx), + .Rbx = ctx.gprs.get(.rbx), + .Rsp = ctx.gprs.get(.rsp), + .Rbp = ctx.gprs.get(.rbp), + .Rsi = ctx.gprs.get(.rsi), + .Rdi = ctx.gprs.get(.rdi), + .R8 = ctx.gprs.get(.r8), + .R9 = ctx.gprs.get(.r9), + .R10 = ctx.gprs.get(.r10), + .R11 = ctx.gprs.get(.r11), + .R12 = ctx.gprs.get(.r12), + .R13 = ctx.gprs.get(.r13), + .R14 = ctx.gprs.get(.r14), + .R15 = ctx.gprs.get(.r15), + .Rip = ctx.gprs.get(.rip), + }), + .aarch64, .aarch64_be => .{ + .ContextFlags = 0, + .Cpsr = 0, + .DUMMYUNIONNAME = .{ .X = ctx.x }, + .Sp = ctx.sp, + .Pc = ctx.pc, + .V = @splat(.{ .B = @splat(0) }), + .Fpcr = 0, + .Fpsr = 0, + .Bcr = @splat(0), + .Bvr = @splat(0), + .Wcr = @splat(0), + .Wvr = @splat(0), + }, + .thumb => .{ + .ContextFlags = 0, + .R0 = ctx.r[0], + .R1 = ctx.r[1], + .R2 = ctx.r[2], + .R3 = ctx.r[3], + .R4 = ctx.r[4], + .R5 = ctx.r[5], + .R6 = ctx.r[6], + .R7 = ctx.r[7], + .R8 = ctx.r[8], + .R9 = ctx.r[9], + .R10 = ctx.r[10], + .R11 = ctx.r[11], + .R12 = ctx.r[12], + .Sp = ctx.r[13], + .Lr = ctx.r[14], + .Pc = ctx.r[15], + .Cpsr = 0, + .Fpcsr = 0, + .Padding = 0, + .DUMMYUNIONNAME = .{ .S = @splat(0) }, + .Bvr = @splat(0), + .Bcr = @splat(0), + .Wvr = @splat(0), + .Wcr = @splat(0), + .Padding2 = @splat(0), + }, + else => comptime unreachable, + }, + .history_table = std.mem.zeroes(windows.UNWIND_HISTORY_TABLE), + }; + } + pub fn deinit(ctx: *UnwindContext, gpa: Allocator) void { + _ = ctx; + _ = gpa; + } + pub fn getFp(ctx: *UnwindContext) usize { + return ctx.cur.getRegs().bp; + } +}; +pub fn unwindFrame(si: *SelfInfo, gpa: Allocator, context: *UnwindContext) Error!usize { + _ = si; + _ = gpa; + + const current_regs = context.cur.getRegs(); + var image_base: windows.DWORD64 = undefined; + if (windows.ntdll.RtlLookupFunctionEntry(current_regs.ip, &image_base, &context.history_table)) |runtime_function| { + var handler_data: ?*anyopaque = null; + var establisher_frame: u64 = undefined; + _ = windows.ntdll.RtlVirtualUnwind( + windows.UNW_FLAG_NHANDLER, + image_base, + current_regs.ip, + runtime_function, + &context.cur, + &handler_data, + &establisher_frame, + null, + ); + } else { + // leaf function + context.cur.setIp(@as(*const usize, @ptrFromInt(current_regs.sp)).*); + context.cur.setSp(current_regs.sp + @sizeOf(usize)); + } + + const next_regs = context.cur.getRegs(); + const tib = &windows.teb().NtTib; + if (next_regs.sp < @intFromPtr(tib.StackLimit) or next_regs.sp > @intFromPtr(tib.StackBase)) { + context.pc = 0; + return 0; + } + // Like `DwarfUnwindContext.unwindFrame`, adjust our next lookup pc in case the `call` was this + // function's last instruction making `next_regs.ip` one byte past its end. + context.pc = next_regs.ip -| 1; + return next_regs.ip; +} + +const Module = struct { + base_address: usize, + size: u32, + name: []const u8, + handle: windows.HMODULE, + + di: ?(Error!DebugInfo), + + const DebugInfo = struct { + arena: std.heap.ArenaAllocator.State, + coff_image_base: u64, + mapped_file: ?MappedFile, + dwarf: ?Dwarf, + pdb: ?Pdb, + coff_section_headers: []coff.SectionHeader, + + const MappedFile = struct { + file: fs.File, + section_handle: windows.HANDLE, + section_view: []const u8, + fn deinit(mf: *const MappedFile) void { + const process_handle = windows.GetCurrentProcess(); + assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(mf.section_view.ptr)) == .SUCCESS); + windows.CloseHandle(mf.section_handle); + mf.file.close(); + } + }; + + fn deinit(di: *DebugInfo, gpa: Allocator) void { + if (di.dwarf) |*dwarf| dwarf.deinit(gpa); + if (di.pdb) |*pdb| { + pdb.file_reader.file.close(); + pdb.deinit(); + } + if (di.mapped_file) |*mf| mf.deinit(); + + var arena = di.arena.promote(gpa); + arena.deinit(); + } + + fn getSymbol(di: *DebugInfo, gpa: Allocator, vaddr: usize) Error!std.debug.Symbol { + pdb: { + const pdb = &(di.pdb orelse break :pdb); + var coff_section: *align(1) const coff.SectionHeader = undefined; + const mod_index = for (pdb.sect_contribs) |sect_contrib| { + if (sect_contrib.section > di.coff_section_headers.len) continue; + // Remember that SectionContribEntry.Section is 1-based. + coff_section = &di.coff_section_headers[sect_contrib.section - 1]; + + const vaddr_start = coff_section.virtual_address + sect_contrib.offset; + const vaddr_end = vaddr_start + sect_contrib.size; + if (vaddr >= vaddr_start and vaddr < vaddr_end) { + break sect_contrib.module_index; + } + } else { + // we have no information to add to the address + break :pdb; + }; + const module = pdb.getModule(mod_index) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + + error.ReadFailed, + error.EndOfStream, + => return error.InvalidDebugInfo, + } orelse { + return error.InvalidDebugInfo; // bad module index + }; + return .{ + .name = pdb.getSymbolName(module, vaddr - coff_section.virtual_address), + .compile_unit_name = fs.path.basename(module.obj_file_name), + .source_location = pdb.getLineNumberInfo(module, vaddr - coff_section.virtual_address) catch null, + }; + } + dwarf: { + const dwarf = &(di.dwarf orelse break :dwarf); + const dwarf_address = vaddr + di.coff_image_base; + return dwarf.getSymbol(gpa, native_endian, dwarf_address) catch |err| switch (err) { + error.MissingDebugInfo => break :dwarf, + + error.InvalidDebugInfo, + error.OutOfMemory, + => |e| return e, + + error.ReadFailed, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + => return error.InvalidDebugInfo, + }; + } + return error.MissingDebugInfo; + } + }; + + fn getDebugInfo(module: *Module, gpa: Allocator) Error!*DebugInfo { + if (module.di == null) module.di = loadDebugInfo(module, gpa); + return if (module.di.?) |*di| di else |err| err; + } + fn loadDebugInfo(module: *const Module, gpa: Allocator) Error!DebugInfo { + const mapped_ptr: [*]const u8 = @ptrFromInt(module.base_address); + const mapped = mapped_ptr[0..module.size]; + var coff_obj = coff.Coff.init(mapped, true) catch return error.InvalidDebugInfo; + + var arena_instance: std.heap.ArenaAllocator = .init(gpa); + errdefer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + // The string table is not mapped into memory by the loader, so if a section name is in the + // string table then we have to map the full image file from disk. This can happen when + // a binary is produced with -gdwarf, since the section names are longer than 8 bytes. + const mapped_file: ?DebugInfo.MappedFile = mapped: { + if (!coff_obj.strtabRequired()) break :mapped null; + var name_buffer: [windows.PATH_MAX_WIDE + 4:0]u16 = undefined; + name_buffer[0..4].* = .{ '\\', '?', '?', '\\' }; // openFileAbsoluteW requires the prefix to be present + const process_handle = windows.GetCurrentProcess(); + const len = windows.kernel32.GetModuleFileNameExW( + process_handle, + module.handle, + name_buffer[4..], + windows.PATH_MAX_WIDE, + ); + if (len == 0) return error.MissingDebugInfo; + const coff_file = fs.openFileAbsoluteW(name_buffer[0 .. len + 4 :0], .{}) catch |err| switch (err) { + error.Unexpected => |e| return e, + error.FileNotFound => return error.MissingDebugInfo, + + error.FileTooBig, + error.IsDir, + error.NotDir, + error.SymLinkLoop, + error.NameTooLong, + error.InvalidUtf8, + error.InvalidWtf8, + error.BadPathName, + => return error.InvalidDebugInfo, + + error.SystemResources, + error.WouldBlock, + error.AccessDenied, + error.ProcessNotFound, + error.PermissionDenied, + error.NoSpaceLeft, + error.DeviceBusy, + error.NoDevice, + error.SharingViolation, + error.PathAlreadyExists, + error.PipeBusy, + error.NetworkNotFound, + error.AntivirusInterference, + error.ProcessFdQuotaExceeded, + error.SystemFdQuotaExceeded, + error.FileLocksNotSupported, + error.FileBusy, + => return error.ReadFailed, + }; + errdefer coff_file.close(); + var section_handle: windows.HANDLE = undefined; + const create_section_rc = windows.ntdll.NtCreateSection( + §ion_handle, + windows.STANDARD_RIGHTS_REQUIRED | windows.SECTION_QUERY | windows.SECTION_MAP_READ, + null, + null, + windows.PAGE_READONLY, + // The documentation states that if no AllocationAttribute is specified, then SEC_COMMIT is the default. + // In practice, this isn't the case and specifying 0 will result in INVALID_PARAMETER_6. + windows.SEC_COMMIT, + coff_file.handle, + ); + if (create_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer windows.CloseHandle(section_handle); + var coff_len: usize = 0; + var section_view_ptr: ?[*]const u8 = null; + const map_section_rc = windows.ntdll.NtMapViewOfSection( + section_handle, + process_handle, + @ptrCast(§ion_view_ptr), + null, + 0, + null, + &coff_len, + .ViewUnmap, + 0, + windows.PAGE_READONLY, + ); + if (map_section_rc != .SUCCESS) return error.MissingDebugInfo; + errdefer assert(windows.ntdll.NtUnmapViewOfSection(process_handle, @constCast(section_view_ptr.?)) == .SUCCESS); + const section_view = section_view_ptr.?[0..coff_len]; + coff_obj = coff.Coff.init(section_view, false) catch return error.InvalidDebugInfo; + break :mapped .{ + .file = coff_file, + .section_handle = section_handle, + .section_view = section_view, + }; + }; + errdefer if (mapped_file) |*mf| mf.deinit(); + + const coff_image_base = coff_obj.getImageBase(); + + var opt_dwarf: ?Dwarf = dwarf: { + if (coff_obj.getSectionByName(".debug_info") == null) break :dwarf null; + + var sections: Dwarf.SectionArray = undefined; + inline for (@typeInfo(Dwarf.Section.Id).@"enum".fields, 0..) |section, i| { + sections[i] = if (coff_obj.getSectionByName("." ++ section.name)) |section_header| .{ + .data = try coff_obj.getSectionDataAlloc(section_header, arena), + .owned = false, + } else null; + } + break :dwarf .{ .sections = sections }; + }; + errdefer if (opt_dwarf) |*dwarf| dwarf.deinit(gpa); + + if (opt_dwarf) |*dwarf| { + dwarf.open(gpa, native_endian) catch |err| switch (err) { + error.Overflow, + error.EndOfStream, + error.StreamTooLong, + error.ReadFailed, + => return error.InvalidDebugInfo, + + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + }; + } + + var opt_pdb: ?Pdb = pdb: { + const path = coff_obj.getPdbPath() catch { + return error.InvalidDebugInfo; + } orelse { + break :pdb null; + }; + const pdb_file_open_result = if (fs.path.isAbsolute(path)) res: { + break :res std.fs.cwd().openFile(path, .{}); + } else res: { + const self_dir = fs.selfExeDirPathAlloc(gpa) catch |err| switch (err) { + error.OutOfMemory, error.Unexpected => |e| return e, + else => return error.ReadFailed, + }; + defer gpa.free(self_dir); + const abs_path = try fs.path.join(gpa, &.{ self_dir, path }); + defer gpa.free(abs_path); + break :res std.fs.cwd().openFile(abs_path, .{}); + }; + const pdb_file = pdb_file_open_result catch |err| switch (err) { + error.FileNotFound, error.IsDir => break :pdb null, + else => return error.ReadFailed, + }; + errdefer pdb_file.close(); + + const pdb_reader = try arena.create(std.fs.File.Reader); + pdb_reader.* = pdb_file.reader(try arena.alloc(u8, 4096)); + + var pdb = Pdb.init(gpa, pdb_reader) catch |err| switch (err) { + error.OutOfMemory, error.ReadFailed, error.Unexpected => |e| return e, + else => return error.InvalidDebugInfo, + }; + errdefer pdb.deinit(); + pdb.parseInfoStream() catch |err| switch (err) { + error.UnknownPDBVersion => return error.UnsupportedDebugInfo, + error.EndOfStream => return error.InvalidDebugInfo, + + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + error.ReadFailed, + => |e| return e, + }; + pdb.parseDbiStream() catch |err| switch (err) { + error.UnknownPDBVersion => return error.UnsupportedDebugInfo, + + error.EndOfStream, + error.EOF, + error.StreamTooLong, + error.WriteFailed, + => return error.InvalidDebugInfo, + + error.InvalidDebugInfo, + error.OutOfMemory, + error.ReadFailed, + => |e| return e, + }; + + if (!std.mem.eql(u8, &coff_obj.guid, &pdb.guid) or coff_obj.age != pdb.age) + return error.InvalidDebugInfo; + + break :pdb pdb; + }; + errdefer if (opt_pdb) |*pdb| { + pdb.file_reader.file.close(); + pdb.deinit(); + }; + + const coff_section_headers: []coff.SectionHeader = if (opt_pdb != null) csh: { + break :csh try coff_obj.getSectionHeadersAlloc(arena); + } else &.{}; + + return .{ + .arena = arena_instance.state, + .coff_image_base = coff_image_base, + .mapped_file = mapped_file, + .dwarf = opt_dwarf, + .pdb = opt_pdb, + .coff_section_headers = coff_section_headers, + }; + } +}; + +/// Assumes we already hold `si.mutex`. +fn findModule(si: *SelfInfo, gpa: Allocator, address: usize) error{ MissingDebugInfo, OutOfMemory, Unexpected }!*Module { + for (si.modules.items) |*mod| { + if (address >= mod.base_address and address < mod.base_address + mod.size) { + return mod; + } + } + + // A new module might have been loaded; rebuild the list. + { + for (si.modules.items) |*mod| { + const di = &(mod.di orelse continue catch continue); + di.deinit(gpa); + } + si.modules.clearRetainingCapacity(); + + var module_name_arena = si.module_name_arena.promote(gpa); + defer si.module_name_arena = module_name_arena.state; + _ = module_name_arena.reset(.retain_capacity); + + const handle = windows.kernel32.CreateToolhelp32Snapshot(windows.TH32CS_SNAPMODULE | windows.TH32CS_SNAPMODULE32, 0); + if (handle == windows.INVALID_HANDLE_VALUE) { + return windows.unexpectedError(windows.GetLastError()); + } + defer windows.CloseHandle(handle); + var entry: windows.MODULEENTRY32 = undefined; + entry.dwSize = @sizeOf(windows.MODULEENTRY32); + var result = windows.kernel32.Module32First(handle, &entry); + while (result != 0) : (result = windows.kernel32.Module32Next(handle, &entry)) { + try si.modules.append(gpa, .{ + .base_address = @intFromPtr(entry.modBaseAddr), + .size = entry.modBaseSize, + .name = try module_name_arena.allocator().dupe( + u8, + std.mem.sliceTo(&entry.szModule, 0), + ), + .handle = entry.hModule, + .di = null, + }); + } + } + + for (si.modules.items) |*mod| { + if (address >= mod.base_address and address < mod.base_address + mod.size) { + return mod; + } + } + + return error.MissingDebugInfo; +} + +const std = @import("std"); +const Allocator = std.mem.Allocator; +const Dwarf = std.debug.Dwarf; +const Pdb = std.debug.Pdb; +const Error = std.debug.SelfInfoError; +const assert = std.debug.assert; +const coff = std.coff; +const fs = std.fs; +const windows = std.os.windows; + +const builtin = @import("builtin"); +const native_endian = builtin.target.cpu.arch.endian(); + +const SelfInfo = @This(); diff --git a/lib/std/debug/cpu_context.zig b/lib/std/debug/cpu_context.zig new file mode 100644 index 0000000000..1089e74aa6 --- /dev/null +++ b/lib/std/debug/cpu_context.zig @@ -0,0 +1,1028 @@ +/// Register state for the native architecture, used by `std.debug` for stack unwinding. +/// `noreturn` if there is no implementation for the native architecture. +/// This can be overriden by exposing a declaration `root.debug.CpuContext`. +pub const Native = if (@hasDecl(root, "debug") and @hasDecl(root.debug, "CpuContext")) + root.debug.CpuContext +else switch (native_arch) { + .x86 => X86, + .x86_64 => X86_64, + .arm, .armeb, .thumb, .thumbeb => Arm, + .aarch64, .aarch64_be => Aarch64, + else => noreturn, +}; + +pub const DwarfRegisterError = error{ + InvalidRegister, + UnsupportedRegister, +}; + +pub fn fromPosixSignalContext(ctx_ptr: ?*const anyopaque) ?Native { + if (signal_ucontext_t == void) return null; + const uc: *const signal_ucontext_t = @ptrCast(@alignCast(ctx_ptr)); + return switch (native_arch) { + .x86 => switch (native_os) { + .linux, .netbsd, .solaris, .illumos => .{ .gprs = .init(.{ + .eax = uc.mcontext.gregs[std.posix.REG.EAX], + .ecx = uc.mcontext.gregs[std.posix.REG.ECX], + .edx = uc.mcontext.gregs[std.posix.REG.EDX], + .ebx = uc.mcontext.gregs[std.posix.REG.EBX], + .esp = uc.mcontext.gregs[std.posix.REG.ESP], + .ebp = uc.mcontext.gregs[std.posix.REG.EBP], + .esi = uc.mcontext.gregs[std.posix.REG.ESI], + .edi = uc.mcontext.gregs[std.posix.REG.EDI], + .eip = uc.mcontext.gregs[std.posix.REG.EIP], + }) }, + else => null, + }, + .x86_64 => switch (native_os) { + .linux, .solaris, .illumos => .{ .gprs = .init(.{ + .rax = uc.mcontext.gregs[std.posix.REG.RAX], + .rdx = uc.mcontext.gregs[std.posix.REG.RDX], + .rcx = uc.mcontext.gregs[std.posix.REG.RCX], + .rbx = uc.mcontext.gregs[std.posix.REG.RBX], + .rsi = uc.mcontext.gregs[std.posix.REG.RSI], + .rdi = uc.mcontext.gregs[std.posix.REG.RDI], + .rbp = uc.mcontext.gregs[std.posix.REG.RBP], + .rsp = uc.mcontext.gregs[std.posix.REG.RSP], + .r8 = uc.mcontext.gregs[std.posix.REG.R8], + .r9 = uc.mcontext.gregs[std.posix.REG.R9], + .r10 = uc.mcontext.gregs[std.posix.REG.R10], + .r11 = uc.mcontext.gregs[std.posix.REG.R11], + .r12 = uc.mcontext.gregs[std.posix.REG.R12], + .r13 = uc.mcontext.gregs[std.posix.REG.R13], + .r14 = uc.mcontext.gregs[std.posix.REG.R14], + .r15 = uc.mcontext.gregs[std.posix.REG.R15], + .rip = uc.mcontext.gregs[std.posix.REG.RIP], + }) }, + .freebsd => .{ .gprs = .init(.{ + .rax = uc.mcontext.rax, + .rdx = uc.mcontext.rdx, + .rcx = uc.mcontext.rcx, + .rbx = uc.mcontext.rbx, + .rsi = uc.mcontext.rsi, + .rdi = uc.mcontext.rdi, + .rbp = uc.mcontext.rbp, + .rsp = uc.mcontext.rsp, + .r8 = uc.mcontext.r8, + .r9 = uc.mcontext.r9, + .r10 = uc.mcontext.r10, + .r11 = uc.mcontext.r11, + .r12 = uc.mcontext.r12, + .r13 = uc.mcontext.r13, + .r14 = uc.mcontext.r14, + .r15 = uc.mcontext.r15, + .rip = uc.mcontext.rip, + }) }, + .openbsd => .{ .gprs = .init(.{ + .rax = @bitCast(uc.sc_rax), + .rdx = @bitCast(uc.sc_rdx), + .rcx = @bitCast(uc.sc_rcx), + .rbx = @bitCast(uc.sc_rbx), + .rsi = @bitCast(uc.sc_rsi), + .rdi = @bitCast(uc.sc_rdi), + .rbp = @bitCast(uc.sc_rbp), + .rsp = @bitCast(uc.sc_rsp), + .r8 = @bitCast(uc.sc_r8), + .r9 = @bitCast(uc.sc_r9), + .r10 = @bitCast(uc.sc_r10), + .r11 = @bitCast(uc.sc_r11), + .r12 = @bitCast(uc.sc_r12), + .r13 = @bitCast(uc.sc_r13), + .r14 = @bitCast(uc.sc_r14), + .r15 = @bitCast(uc.sc_r15), + .rip = @bitCast(uc.sc_rip), + }) }, + .macos, .ios => .{ .gprs = .init(.{ + .rax = uc.mcontext.ss.rax, + .rdx = uc.mcontext.ss.rdx, + .rcx = uc.mcontext.ss.rcx, + .rbx = uc.mcontext.ss.rbx, + .rsi = uc.mcontext.ss.rsi, + .rdi = uc.mcontext.ss.rdi, + .rbp = uc.mcontext.ss.rbp, + .rsp = uc.mcontext.ss.rsp, + .r8 = uc.mcontext.ss.r8, + .r9 = uc.mcontext.ss.r9, + .r10 = uc.mcontext.ss.r10, + .r11 = uc.mcontext.ss.r11, + .r12 = uc.mcontext.ss.r12, + .r13 = uc.mcontext.ss.r13, + .r14 = uc.mcontext.ss.r14, + .r15 = uc.mcontext.ss.r15, + .rip = uc.mcontext.ss.rip, + }) }, + else => null, + }, + .arm, .armeb, .thumb, .thumbeb => switch (builtin.os.tag) { + .linux => .{ + .r = .{ + uc.mcontext.arm_r0, + uc.mcontext.arm_r1, + uc.mcontext.arm_r2, + uc.mcontext.arm_r3, + uc.mcontext.arm_r4, + uc.mcontext.arm_r5, + uc.mcontext.arm_r6, + uc.mcontext.arm_r7, + uc.mcontext.arm_r8, + uc.mcontext.arm_r9, + uc.mcontext.arm_r10, + uc.mcontext.arm_fp, // r11 = fp + uc.mcontext.arm_ip, // r12 = ip + uc.mcontext.arm_sp, // r13 = sp + uc.mcontext.arm_lr, // r14 = lr + uc.mcontext.arm_pc, // r15 = pc + }, + }, + else => null, + }, + .aarch64, .aarch64_be => switch (builtin.os.tag) { + .macos, .ios, .tvos, .watchos, .visionos => .{ + .x = uc.mcontext.ss.regs ++ @as([2]u64, .{ + uc.mcontext.ss.fp, // x29 = fp + uc.mcontext.ss.lr, // x30 = lr + }), + .sp = uc.mcontext.ss.sp, + .pc = uc.mcontext.ss.pc, + }, + .netbsd => .{ + .x = uc.mcontext.gregs[0..31].*, + .sp = uc.mcontext.gregs[31], + .pc = uc.mcontext.gregs[32], + }, + .freebsd => .{ + .x = uc.mcontext.gpregs.x ++ @as([1]u64, .{ + uc.mcontext.gpregs.lr, // x30 = lr + }), + .sp = uc.mcontext.gpregs.sp, + // On aarch64, the register ELR_LR1 defines the address to return to after handling + // a CPU exception (ELR is "Exception Link Register"). FreeBSD's ucontext_t uses + // this as the field name, but it's the same thing as the context's PC. + .pc = uc.mcontext.gpregs.elr, + }, + .openbsd => .{ + .x = uc.sc_x ++ .{uc.sc_lr}, + .sp = uc.sc_sp, + // Not a bug; see freebsd above for explanation. + .pc = uc.sc_elr, + }, + .linux => .{ + .x = uc.mcontext.regs, + .sp = uc.mcontext.sp, + .pc = uc.mcontext.pc, + }, + else => null, + }, + else => null, + }; +} + +pub fn fromWindowsContext(ctx: *const std.os.windows.CONTEXT) Native { + return switch (native_arch) { + .x86 => .{ .gprs = .init(.{ + .eax = ctx.Eax, + .ecx = ctx.Ecx, + .edx = ctx.Edx, + .ebx = ctx.Ebx, + .esp = ctx.Esp, + .ebp = ctx.Ebp, + .esi = ctx.Esi, + .edi = ctx.Edi, + .eip = ctx.Eip, + }) }, + .x86_64 => .{ .gprs = .init(.{ + .rax = ctx.Rax, + .rdx = ctx.Rdx, + .rcx = ctx.Rcx, + .rbx = ctx.Rbx, + .rsi = ctx.Rsi, + .rdi = ctx.Rdi, + .rbp = ctx.Rbp, + .rsp = ctx.Rsp, + .r8 = ctx.R8, + .r9 = ctx.R9, + .r10 = ctx.R10, + .r11 = ctx.R11, + .r12 = ctx.R12, + .r13 = ctx.R13, + .r14 = ctx.R14, + .r15 = ctx.R15, + .rip = ctx.Rip, + }) }, + .aarch64, .aarch64_be => .{ + .x = ctx.DUMMYUNIONNAME.X[0..31].*, + .sp = ctx.Sp, + .pc = ctx.Pc, + }, + .thumb => .{ .r = .{ + ctx.R0, ctx.R1, ctx.R2, ctx.R3, + ctx.R4, ctx.R5, ctx.R6, ctx.R7, + ctx.R8, ctx.R9, ctx.R10, ctx.R11, + ctx.R12, ctx.Sp, ctx.Lr, ctx.Pc, + } }, + else => comptime unreachable, + }; +} + +pub const X86 = struct { + /// The first 8 registers here intentionally match the order of registers in the x86 instruction + /// encoding. This order is inherited by the PUSHA instruction and the DWARF register mappings, + /// among other things. + pub const Gpr = enum { + // zig fmt: off + eax, ecx, edx, ebx, + esp, ebp, esi, edi, + eip, + // zig fmt: on + }; + gprs: std.enums.EnumArray(Gpr, u32), + + pub inline fn current() X86 { + var ctx: X86 = undefined; + asm volatile ( + \\movl %%eax, 0x00(%%edi) + \\movl %%ecx, 0x04(%%edi) + \\movl %%edx, 0x08(%%edi) + \\movl %%ebx, 0x0c(%%edi) + \\movl %%esp, 0x10(%%edi) + \\movl %%ebp, 0x14(%%edi) + \\movl %%esi, 0x18(%%edi) + \\movl %%edi, 0x1c(%%edi) + \\call 1f + \\1: + \\popl 0x20(%%edi) + : + : [gprs] "{edi}" (&ctx.gprs.values), + : .{ .memory = true }); + return ctx; + } + + pub fn dwarfRegisterBytes(ctx: *X86, register_num: u16) DwarfRegisterError![]u8 { + // System V Application Binary Interface Intel386 Architecture Processor Supplement Version 1.1 + // § 2.4.2 "DWARF Register Number Mapping" + switch (register_num) { + // The order of `Gpr` intentionally matches DWARF's mappings. + // + // x86-macos sometimes uses different mappings (ebp and esp are reversed when the unwind + // information is from `__eh_frame`). This deviation is not considered here, because + // x86-macos is a deprecated target which is not supported by the Zig Standard Library. + 0...8 => return @ptrCast(&ctx.gprs.values[register_num]), + + 9 => return error.UnsupportedRegister, // rflags + 11...18 => return error.UnsupportedRegister, // st0 - st7 + 21...28 => return error.UnsupportedRegister, // xmm0 - xmm7 + 29...36 => return error.UnsupportedRegister, // mm0 - mm7 + 39 => return error.UnsupportedRegister, // mxcsr + 40...45 => return error.UnsupportedRegister, // es, cs, ss, ds, fs, gs + 48 => return error.UnsupportedRegister, // tr + 49 => return error.UnsupportedRegister, // ldtr + 93...94 => return error.UnsupportedRegister, // fs.base, gs.base + + else => return error.InvalidRegister, + } + } +}; + +pub const X86_64 = struct { + /// The order here intentionally matches the order of the DWARF register mappings. It's unclear + /// where those mappings actually originated from---the ordering of the first 4 registers seems + /// quite unusual---but it is currently convenient for us to match DWARF. + pub const Gpr = enum { + // zig fmt: off + rax, rdx, rcx, rbx, + rsi, rdi, rbp, rsp, + r8, r9, r10, r11, + r12, r13, r14, r15, + rip, + // zig fmt: on + }; + gprs: std.enums.EnumArray(Gpr, u64), + + pub inline fn current() X86_64 { + var ctx: X86_64 = undefined; + asm volatile ( + \\movq %%rax, 0x00(%%rdi) + \\movq %%rdx, 0x08(%%rdi) + \\movq %%rcx, 0x10(%%rdi) + \\movq %%rbx, 0x18(%%rdi) + \\movq %%rsi, 0x20(%%rdi) + \\movq %%rdi, 0x28(%%rdi) + \\movq %%rbp, 0x30(%%rdi) + \\movq %%rsp, 0x38(%%rdi) + \\movq %%r8, 0x40(%%rdi) + \\movq %%r9, 0x48(%%rdi) + \\movq %%r10, 0x50(%%rdi) + \\movq %%r11, 0x58(%%rdi) + \\movq %%r12, 0x60(%%rdi) + \\movq %%r13, 0x68(%%rdi) + \\movq %%r14, 0x70(%%rdi) + \\movq %%r15, 0x78(%%rdi) + \\leaq (%%rip), %%rax + \\movq %%rax, 0x80(%%rdi) + \\movq 0x00(%%rdi), %%rax + : + : [gprs] "{rdi}" (&ctx.gprs.values), + : .{ .memory = true }); + return ctx; + } + + pub fn dwarfRegisterBytes(ctx: *X86_64, register_num: u16) DwarfRegisterError![]u8 { + // System V Application Binary Interface AMD64 Architecture Processor Supplement + // § 3.6.2 "DWARF Register Number Mapping" + switch (register_num) { + // The order of `Gpr` intentionally matches DWARF's mappings. + 0...16 => return @ptrCast(&ctx.gprs.values[register_num]), + + 17...32 => return error.UnsupportedRegister, // xmm0 - xmm15 + 33...40 => return error.UnsupportedRegister, // st0 - st7 + 41...48 => return error.UnsupportedRegister, // mm0 - mm7 + 49 => return error.UnsupportedRegister, // rflags + 50...55 => return error.UnsupportedRegister, // es, cs, ss, ds, fs, gs + 58...59 => return error.UnsupportedRegister, // fs.base, gs.base + 62 => return error.UnsupportedRegister, // tr + 63 => return error.UnsupportedRegister, // ldtr + 64 => return error.UnsupportedRegister, // mxcsr + 65 => return error.UnsupportedRegister, // fcw + 66 => return error.UnsupportedRegister, // fsw + + else => return error.InvalidRegister, + } + } +}; + +pub const Arm = struct { + /// The numbered general-purpose registers R0 - R15. + r: [16]u32, + + pub inline fn current() Arm { + var ctx: Arm = undefined; + asm volatile ( + \\// For compatibility with Thumb, we can't write r13 (sp) or r15 (pc) with stm. + \\stm r0, {r0-r12} + \\str r13, [r0, #0x34] + \\str r14, [r0, #0x38] + \\str r15, [r0, #0x3c] + : + : [r] "{r0}" (&ctx.r), + : .{ .memory = true }); + return ctx; + } + + pub fn dwarfRegisterBytes(ctx: *Arm, register_num: u16) DwarfRegisterError![]u8 { + // DWARF for the Arm(r) Architecture § 4.1 "DWARF register names" + switch (register_num) { + // The order of `Gpr` intentionally matches DWARF's mappings. + 0...15 => return @ptrCast(&ctx.r[register_num]), + + 64...95 => return error.UnsupportedRegister, // S0 - S31 + 96...103 => return error.UnsupportedRegister, // F0 - F7 + 104...111 => return error.UnsupportedRegister, // wCGR0 - wCGR7, or ACC0 - ACC7 + 112...127 => return error.UnsupportedRegister, // wR0 - wR15 + 128 => return error.UnsupportedRegister, // SPSR + 129 => return error.UnsupportedRegister, // SPSR_FIQ + 130 => return error.UnsupportedRegister, // SPSR_IRQ + 131 => return error.UnsupportedRegister, // SPSR_ABT + 132 => return error.UnsupportedRegister, // SPSR_UND + 133 => return error.UnsupportedRegister, // SPSR_SVC + 143 => return error.UnsupportedRegister, // RA_AUTH_CODE + 144...150 => return error.UnsupportedRegister, // R8_USR - R14_USR + 151...157 => return error.UnsupportedRegister, // R8_FIQ - R14_FIQ + 158...159 => return error.UnsupportedRegister, // R13_IRQ - R14_IRQ + 160...161 => return error.UnsupportedRegister, // R13_ABT - R14_ABT + 162...163 => return error.UnsupportedRegister, // R13_UND - R14_UND + 164...165 => return error.UnsupportedRegister, // R13_SVC - R14_SVC + 192...199 => return error.UnsupportedRegister, // wC0 - wC7 + 256...287 => return error.UnsupportedRegister, // D0 - D31 + 320 => return error.UnsupportedRegister, // TPIDRURO + 321 => return error.UnsupportedRegister, // TPIDRURW + 322 => return error.UnsupportedRegister, // TPIDPR + 323 => return error.UnsupportedRegister, // HTPIDPR + 8192...16383 => return error.UnsupportedRegister, // Unspecified vendor co-processor register + + else => return error.InvalidRegister, + } + } +}; + +/// This is an `extern struct` so that inline assembly in `current` can use field offsets. +pub const Aarch64 = extern struct { + /// The numbered general-purpose registers X0 - X30. + x: [31]u64, + sp: u64, + pc: u64, + + pub inline fn current() Aarch64 { + var ctx: Aarch64 = undefined; + asm volatile ( + \\stp x0, x1, [x0, #0x000] + \\stp x2, x3, [x0, #0x010] + \\stp x4, x5, [x0, #0x020] + \\stp x6, x7, [x0, #0x030] + \\stp x8, x9, [x0, #0x040] + \\stp x10, x11, [x0, #0x050] + \\stp x12, x13, [x0, #0x060] + \\stp x14, x15, [x0, #0x070] + \\stp x16, x17, [x0, #0x080] + \\stp x18, x19, [x0, #0x090] + \\stp x20, x21, [x0, #0x0a0] + \\stp x22, x23, [x0, #0x0b0] + \\stp x24, x25, [x0, #0x0c0] + \\stp x26, x27, [x0, #0x0d0] + \\stp x28, x29, [x0, #0x0e0] + \\str x30, [x0, #0x0f0] + \\mov x1, sp + \\str x1, [x0, #0x0f8] + \\adr x1, . + \\str x1, [x0, #0x100] + \\ldr x1, [x0, #0x008] + : + : [gprs] "{x0}" (&ctx), + : .{ .memory = true }); + return ctx; + } + + pub fn dwarfRegisterBytes(ctx: *Aarch64, register_num: u16) DwarfRegisterError![]u8 { + // DWARF for the Arm(r) 64-bit Architecture (AArch64) § 4.1 "DWARF register names" + switch (register_num) { + // The order of `Gpr` intentionally matches DWARF's mappings. + 0...30 => return @ptrCast(&ctx.x[register_num]), + 31 => return @ptrCast(&ctx.sp), + 32 => return @ptrCast(&ctx.pc), + + 33 => return error.UnsupportedRegister, // ELF_mode + 34 => return error.UnsupportedRegister, // RA_SIGN_STATE + 35 => return error.UnsupportedRegister, // TPIDRRO_ELO + 36 => return error.UnsupportedRegister, // RPIDR_ELO + 37 => return error.UnsupportedRegister, // RPIDR_EL1 + 38 => return error.UnsupportedRegister, // RPIDR_EL2 + 39 => return error.UnsupportedRegister, // RPIDR_EL3 + 46 => return error.UnsupportedRegister, // VG + 47 => return error.UnsupportedRegister, // FFR + 48...63 => return error.UnsupportedRegister, // P0 - P15 + 64...95 => return error.UnsupportedRegister, // V0 - V31 + 96...127 => return error.UnsupportedRegister, // Z0 - Z31 + + else => return error.InvalidRegister, + } + } +}; + +const signal_ucontext_t = switch (native_os) { + .linux => std.os.linux.ucontext_t, + .emscripten => std.os.emscripten.ucontext_t, + .freebsd => std.os.freebsd.ucontext_t, + .macos, .ios, .tvos, .watchos, .visionos => extern struct { + onstack: c_int, + sigmask: std.c.sigset_t, + stack: std.c.stack_t, + link: ?*signal_ucontext_t, + mcsize: u64, + mcontext: *mcontext_t, + const mcontext_t = switch (native_arch) { + .aarch64 => extern struct { + es: extern struct { + far: u64, // Virtual Fault Address + esr: u32, // Exception syndrome + exception: u32, // Number of arm exception taken + }, + ss: extern struct { + /// General purpose registers + regs: [29]u64, + /// Frame pointer x29 + fp: u64, + /// Link register x30 + lr: u64, + /// Stack pointer x31 + sp: u64, + /// Program counter + pc: u64, + /// Current program status register + cpsr: u32, + __pad: u32, + }, + ns: extern struct { + q: [32]u128, + fpsr: u32, + fpcr: u32, + }, + }, + .x86_64 => extern struct { + es: extern struct { + trapno: u16, + cpu: u16, + err: u32, + faultvaddr: u64, + }, + ss: extern struct { + rax: u64, + rbx: u64, + rcx: u64, + rdx: u64, + rdi: u64, + rsi: u64, + rbp: u64, + rsp: u64, + r8: u64, + r9: u64, + r10: u64, + r11: u64, + r12: u64, + r13: u64, + r14: u64, + r15: u64, + rip: u64, + rflags: u64, + cs: u64, + fs: u64, + gs: u64, + }, + fs: extern struct { + reserved: [2]c_int, + fcw: u16, + fsw: u16, + ftw: u8, + rsrv1: u8, + fop: u16, + ip: u32, + cs: u16, + rsrv2: u16, + dp: u32, + ds: u16, + rsrv3: u16, + mxcsr: u32, + mxcsrmask: u32, + stmm: [8]stmm_reg, + xmm: [16]xmm_reg, + rsrv4: [96]u8, + reserved1: c_int, + + const stmm_reg = [16]u8; + const xmm_reg = [16]u8; + }, + }, + else => void, + }; + }, + .solaris, .illumos => extern struct { + flags: u64, + link: ?*signal_ucontext_t, + sigmask: std.c.sigset_t, + stack: std.c.stack_t, + mcontext: mcontext_t, + brand_data: [3]?*anyopaque, + filler: [2]i64, + const mcontext_t = extern struct { + gregs: [28]u64, + fpregs: std.c.fpregset_t, + }; + }, + .openbsd => switch (builtin.cpu.arch) { + .x86_64 => extern struct { + sc_rdi: c_long, + sc_rsi: c_long, + sc_rdx: c_long, + sc_rcx: c_long, + sc_r8: c_long, + sc_r9: c_long, + sc_r10: c_long, + sc_r11: c_long, + sc_r12: c_long, + sc_r13: c_long, + sc_r14: c_long, + sc_r15: c_long, + sc_rbp: c_long, + sc_rbx: c_long, + sc_rax: c_long, + sc_gs: c_long, + sc_fs: c_long, + sc_es: c_long, + sc_ds: c_long, + sc_trapno: c_long, + sc_err: c_long, + sc_rip: c_long, + sc_cs: c_long, + sc_rflags: c_long, + sc_rsp: c_long, + sc_ss: c_long, + + sc_fpstate: *anyopaque, // struct fxsave64 * + __sc_unused: c_int, + sc_mask: c_int, + sc_cookie: c_long, + }, + .aarch64 => extern struct { + __sc_unused: c_int, + sc_mask: c_int, + sc_sp: c_ulong, + sc_lr: c_ulong, + sc_elr: c_ulong, + sc_spsr: c_ulong, + sc_x: [30]c_ulong, + sc_cookie: c_long, + }, + else => void, + }, + .netbsd => extern struct { + flags: u32, + link: ?*signal_ucontext_t, + sigmask: std.c.sigset_t, + stack: std.c.stack_t, + mcontext: mcontext_t, + __pad: [ + switch (builtin.cpu.arch) { + .x86 => 4, + .mips, .mipsel, .mips64, .mips64el => 14, + .arm, .armeb, .thumb, .thumbeb => 1, + .sparc, .sparc64 => if (@sizeOf(usize) == 4) 43 else 8, + else => 0, + } + ]u32, + const mcontext_t = switch (builtin.cpu.arch) { + .aarch64, .aarch64_be => extern struct { + gregs: [35]u64, + fregs: [528]u8 align(16), + spare: [8]u64, + }, + .x86 => extern struct { + gregs: [19]u32, + fpregs: [161]u32, + mc_tlsbase: u32, + }, + .x86_64 => extern struct { + gregs: [26]u64, + mc_tlsbase: u64, + fpregs: [512]u8 align(8), + }, + else => void, + }; + }, + .dragonfly => extern struct { + sigmask: std.c.sigset_t, + mcontext: mcontext_t, + link: ?*signal_ucontext_t, + stack: std.c.stack_t, + cofunc: ?*fn (?*signal_ucontext_t, ?*anyopaque) void, + arg: ?*void, + _spare: [4]c_int, + const mcontext_t = extern struct { + const register_t = isize; + onstack: register_t, // XXX - sigcontext compat. + rdi: register_t, + rsi: register_t, + rdx: register_t, + rcx: register_t, + r8: register_t, + r9: register_t, + rax: register_t, + rbx: register_t, + rbp: register_t, + r10: register_t, + r11: register_t, + r12: register_t, + r13: register_t, + r14: register_t, + r15: register_t, + xflags: register_t, + trapno: register_t, + addr: register_t, + flags: register_t, + err: register_t, + rip: register_t, + cs: register_t, + rflags: register_t, + rsp: register_t, // machine state + ss: register_t, + + len: c_uint, // sizeof(mcontext_t) + fpformat: c_uint, + ownedfp: c_uint, + reserved: c_uint, + unused: [8]c_uint, + + // NOTE! 64-byte aligned as of here. Also must match savefpu structure. + fpregs: [256]c_int align(64), + }; + }, + .serenity => extern struct { + link: ?*signal_ucontext_t, + sigmask: std.c.sigset_t, + stack: std.c.stack_t, + mcontext: mcontext_t, + const mcontext_t = switch (builtin.cpu.arch) { + // https://github.com/SerenityOS/serenity/blob/200e91cd7f1ec5453799a2720d4dc114a59cc289/Kernel/Arch/aarch64/mcontext.h#L15-L19 + .aarch64 => extern struct { + x: [31]u64, + sp: u64, + pc: u64, + }, + // https://github.com/SerenityOS/serenity/blob/66f8d0f031ef25c409dbb4fecaa454800fecae0f/Kernel/Arch/riscv64/mcontext.h#L15-L18 + .riscv64 => extern struct { + x: [31]u64, + pc: u64, + }, + // https://github.com/SerenityOS/serenity/blob/7b9ea3efdec9f86a1042893e8107d0b23aad8727/Kernel/Arch/x86_64/mcontext.h#L15-L40 + .x86_64 => extern struct { + rax: u64, + rcx: u64, + rdx: u64, + rbx: u64, + rsp: u64, + rbp: u64, + rsi: u64, + rdi: u64, + rip: u64, + r8: u64, + r9: u64, + r10: u64, + r11: u64, + r12: u64, + r13: u64, + r14: u64, + r15: u64, + rflags: u64, + cs: u32, + ss: u32, + ds: u32, + es: u32, + fs: u32, + gs: u32, + }, + else => void, + }; + }, + .haiku => extern struct { + link: ?*signal_ucontext_t, + sigmask: std.c.sigset_t, + stack: std.c.stack_t, + mcontext: mcontext_t, + const mcontext_t = switch (builtin.cpu.arch) { + .arm, .thumb => extern struct { + r0: u32, + r1: u32, + r2: u32, + r3: u32, + r4: u32, + r5: u32, + r6: u32, + r7: u32, + r8: u32, + r9: u32, + r10: u32, + r11: u32, + r12: u32, + r13: u32, + r14: u32, + r15: u32, + cpsr: u32, + }, + .aarch64 => extern struct { + x: [10]u64, + lr: u64, + sp: u64, + elr: u64, + spsr: u64, + fp_q: [32]u128, + fpsr: u32, + fpcr: u32, + }, + .m68k => extern struct { + pc: u32, + d0: u32, + d1: u32, + d2: u32, + d3: u32, + d4: u32, + d5: u32, + d6: u32, + d7: u32, + a0: u32, + a1: u32, + a2: u32, + a3: u32, + a4: u32, + a5: u32, + a6: u32, + a7: u32, + ccr: u8, + f0: f64, + f1: f64, + f2: f64, + f3: f64, + f4: f64, + f5: f64, + f6: f64, + f7: f64, + f8: f64, + f9: f64, + f10: f64, + f11: f64, + f12: f64, + f13: f64, + }, + .mipsel => extern struct { + r0: u32, + }, + .powerpc => extern struct { + pc: u32, + r0: u32, + r1: u32, + r2: u32, + r3: u32, + r4: u32, + r5: u32, + r6: u32, + r7: u32, + r8: u32, + r9: u32, + r10: u32, + r11: u32, + r12: u32, + f0: f64, + f1: f64, + f2: f64, + f3: f64, + f4: f64, + f5: f64, + f6: f64, + f7: f64, + f8: f64, + f9: f64, + f10: f64, + f11: f64, + f12: f64, + f13: f64, + reserved: u32, + fpscr: u32, + ctr: u32, + xer: u32, + cr: u32, + msr: u32, + lr: u32, + }, + .riscv64 => extern struct { + x: [31]u64, + pc: u64, + f: [32]f64, + fcsr: u64, + }, + .sparc64 => extern struct { + g1: u64, + g2: u64, + g3: u64, + g4: u64, + g5: u64, + g6: u64, + g7: u64, + o0: u64, + o1: u64, + o2: u64, + o3: u64, + o4: u64, + o5: u64, + sp: u64, + o7: u64, + l0: u64, + l1: u64, + l2: u64, + l3: u64, + l4: u64, + l5: u64, + l6: u64, + l7: u64, + i0: u64, + i1: u64, + i2: u64, + i3: u64, + i4: u64, + i5: u64, + fp: u64, + i7: u64, + }, + .x86 => extern struct { + pub const old_extended_regs = extern struct { + control: u16, + reserved1: u16, + status: u16, + reserved2: u16, + tag: u16, + reserved3: u16, + eip: u32, + cs: u16, + opcode: u16, + datap: u32, + ds: u16, + reserved4: u16, + fp_mmx: [8][10]u8, + }; + + pub const fp_register = extern struct { value: [10]u8, reserved: [6]u8 }; + + pub const xmm_register = extern struct { value: [16]u8 }; + + pub const new_extended_regs = extern struct { + control: u16, + status: u16, + tag: u16, + opcode: u16, + eip: u32, + cs: u16, + reserved1: u16, + datap: u32, + ds: u16, + reserved2: u16, + mxcsr: u32, + reserved3: u32, + fp_mmx: [8]fp_register, + xmmx: [8]xmm_register, + reserved4: [224]u8, + }; + + pub const extended_regs = extern struct { + state: extern union { + old_format: old_extended_regs, + new_format: new_extended_regs, + }, + format: u32, + }; + + eip: u32, + eflags: u32, + eax: u32, + ecx: u32, + edx: u32, + esp: u32, + ebp: u32, + reserved: u32, + xregs: extended_regs, + edi: u32, + esi: u32, + ebx: u32, + }, + .x86_64 => extern struct { + pub const fp_register = extern struct { + value: [10]u8, + reserved: [6]u8, + }; + + pub const xmm_register = extern struct { + value: [16]u8, + }; + + pub const fpu_state = extern struct { + control: u16, + status: u16, + tag: u16, + opcode: u16, + rip: u64, + rdp: u64, + mxcsr: u32, + mscsr_mask: u32, + + fp_mmx: [8]fp_register, + xmm: [16]xmm_register, + reserved: [96]u8, + }; + + pub const xstate_hdr = extern struct { + bv: u64, + xcomp_bv: u64, + reserved: [48]u8, + }; + + pub const savefpu = extern struct { + fxsave: fpu_state, + xstate: xstate_hdr, + ymm: [16]xmm_register, + }; + + rax: u64, + rbx: u64, + rcx: u64, + rdx: u64, + rdi: u64, + rsi: u64, + rbp: u64, + r8: u64, + r9: u64, + r10: u64, + r11: u64, + r12: u64, + r13: u64, + r14: u64, + r15: u64, + rsp: u64, + rip: u64, + rflags: u64, + fpu: savefpu, + }, + else => void, + }; + }, + else => void, +}; + +const std = @import("../std.zig"); +const root = @import("root"); +const builtin = @import("builtin"); +const native_arch = @import("builtin").target.cpu.arch; +const native_os = @import("builtin").target.os.tag; |
