diff options
| author | mlugg <mlugg@mlugg.co.uk> | 2025-09-01 16:50:39 +0100 |
|---|---|---|
| committer | mlugg <mlugg@mlugg.co.uk> | 2025-09-30 13:44:49 +0100 |
| commit | b750e7cf9e2a1225b20ef7fdf53df9ef97cf8065 (patch) | |
| tree | d4760dd9e1279db621fbf4318264f951117a2172 /lib/std/debug/Dwarf/Unwind.zig | |
| parent | b706949736fe67e104a14ac1dcaac8b7eb1cc33f (diff) | |
| download | zig-b750e7cf9e2a1225b20ef7fdf53df9ef97cf8065.tar.gz zig-b750e7cf9e2a1225b20ef7fdf53df9ef97cf8065.zip | |
change one million things
Diffstat (limited to 'lib/std/debug/Dwarf/Unwind.zig')
| -rw-r--r-- | lib/std/debug/Dwarf/Unwind.zig | 1002 |
1 files changed, 496 insertions, 506 deletions
diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 1da318a048..a51c417e7c 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -1,632 +1,622 @@ -sections: SectionArray = @splat(null), +pub const VirtualMachine = @import("Unwind/VirtualMachine.zig"); -/// Starts out non-`null` if the `.eh_frame_hdr` section is present. May become `null` later if we -/// find that `.eh_frame_hdr` is incomplete. -eh_frame_hdr: ?ExceptionFrameHeader = null, -/// These lookup tables are only used if `eh_frame_hdr` is null -cie_map: std.AutoArrayHashMapUnmanaged(u64, CommonInformationEntry) = .empty, -/// Sorted by start_pc -fde_list: std.ArrayList(FrameDescriptionEntry) = .empty, - -pub const Section = struct { +/// The contents of the `.debug_frame` section as specified by DWARF. This might be a more reliable +/// stack unwind mechanism in some cases, or it may be present when `.eh_frame` is not, but fetching +/// the data requires loading the binary, so it is not a viable approach for fast stack trace +/// capturing within a process. +debug_frame: ?struct { data: []const u8, - - pub const Id = enum { - debug_frame, - eh_frame, - eh_frame_hdr, - }; + /// Offsets into `data` of FDEs, sorted by ascending `pc_begin`. + sorted_fdes: []SortedFdeEntry, +}, + +/// Data associated with the `.eh_frame` and `.eh_frame_hdr` sections as defined by LSB Core. The +/// format of `.eh_frame` is an extension of that of DWARF's `.debug_frame` -- in fact it is almost +/// identical, though subtly different in a few places. +eh_frame: ?struct { + header: EhFrameHeader, + /// Though this is a slice, it may be longer than the `.eh_frame` section. When unwinding + /// through the runtime-loaded `.eh_frame_hdr` data, we are not told the size of the `.eh_frame` + /// section, so construct a slice referring to all of the rest of memory. The end of the section + /// must be detected through `EntryHeader.terminator`. + eh_frame_data: []const u8, + /// Offsets into `eh_frame_data` of FDEs, sorted by ascending `pc_begin`. + /// Populated only if `header` does not already contain a lookup table. + sorted_fdes: ?[]SortedFdeEntry, +}, + +const SortedFdeEntry = struct { + /// This FDE's value of `pc_begin`. + pc_begin: u64, + /// Offset into the section of the corresponding FDE, including the entry header. + fde_offset: u64, }; -const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); -pub const SectionArray = [num_sections]?Section; - -pub fn section(unwind: Unwind, dwarf_section: Section.Id) ?[]const u8 { - return if (unwind.sections[@intFromEnum(dwarf_section)]) |s| s.data else null; -} +const Section = enum { debug_frame, eh_frame }; /// This represents the decoded .eh_frame_hdr header -pub const ExceptionFrameHeader = struct { - eh_frame_ptr: usize, - table_enc: u8, - fde_count: usize, - entries: []const u8, - - pub fn entrySize(table_enc: u8) !u8 { - return switch (table_enc & EH.PE.type_mask) { - EH.PE.udata2, - EH.PE.sdata2, - => 4, - EH.PE.udata4, - EH.PE.sdata4, - => 8, - EH.PE.udata8, - EH.PE.sdata8, - => 16, - // This is a binary search table, so all entries must be the same length - else => return bad(), +pub const EhFrameHeader = struct { + vaddr: u64, + eh_frame_vaddr: u64, + search_table: ?struct { + /// The byte offset of the search table into the `.eh_frame_hdr` section. + offset: u8, + encoding: EH.PE, + fde_count: usize, + entries: []const u8, + }, + + pub fn entrySize(table_enc: EH.PE, addr_size_bytes: u8) !u8 { + return switch (table_enc.type) { + .absptr => 2 * addr_size_bytes, + .udata2, .sdata2 => 4, + .udata4, .sdata4 => 8, + .udata8, .sdata8 => 16, + .uleb128, .sleb128 => return bad(), // this is a binary search table; all entries must be the same size + _ => return bad(), }; } - pub fn findEntry( - self: ExceptionFrameHeader, - eh_frame_len: usize, - eh_frame_hdr_ptr: usize, - pc: usize, - cie: *CommonInformationEntry, - fde: *FrameDescriptionEntry, + pub fn parse( + eh_frame_hdr_vaddr: u64, + eh_frame_hdr_bytes: []const u8, + addr_size_bytes: u8, endian: Endian, - ) !void { - const entry_size = try entrySize(self.table_enc); + ) !EhFrameHeader { + var r: Reader = .fixed(eh_frame_hdr_bytes); - var left: usize = 0; - var len: usize = self.fde_count; - var fbr: Reader = .fixed(self.entries); + const version = try r.takeByte(); + if (version != 1) return bad(); - while (len > 1) { - const mid = left + len / 2; + const eh_frame_ptr_enc: EH.PE = @bitCast(try r.takeByte()); + const fde_count_enc: EH.PE = @bitCast(try r.takeByte()); + const table_enc: EH.PE = @bitCast(try r.takeByte()); - fbr.seek = mid * entry_size; - const pc_begin = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad(); + const eh_frame_ptr = try readEhPointer(&r, eh_frame_ptr_enc, addr_size_bytes, .{ + .pc_rel_base = eh_frame_hdr_vaddr + r.seek, + }, endian); + return .{ + .vaddr = eh_frame_hdr_vaddr, + .eh_frame_vaddr = eh_frame_ptr, + .search_table = table: { + if (fde_count_enc == EH.PE.omit) break :table null; + if (table_enc == EH.PE.omit) break :table null; + const fde_count = try readEhPointer(&r, fde_count_enc, addr_size_bytes, .{ + .pc_rel_base = eh_frame_hdr_vaddr + r.seek, + }, endian); + const entry_size = try entrySize(table_enc, addr_size_bytes); + const bytes_offset = r.seek; + const bytes_len = cast(usize, fde_count * entry_size) orelse return error.EndOfStream; + const bytes = try r.take(bytes_len); + break :table .{ + .encoding = table_enc, + .fde_count = @intCast(fde_count), + .entries = bytes, + .offset = @intCast(bytes_offset), + }; + }, + }; + } + + /// Asserts that `eh_frame_hdr.search_table != null`. + fn findEntry( + eh_frame_hdr: *const EhFrameHeader, + pc: u64, + addr_size_bytes: u8, + endian: Endian, + ) !?u64 { + const table = &eh_frame_hdr.search_table.?; + const table_vaddr = eh_frame_hdr.vaddr + table.offset; + const entry_size = try EhFrameHeader.entrySize(table.encoding, addr_size_bytes); + var left: usize = 0; + var len: usize = table.fde_count; + while (len > 1) { + const mid = left + len / 2; + var entry_reader: Reader = .fixed(table.entries[mid * entry_size ..][0..entry_size]); + const pc_begin = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ + .pc_rel_base = table_vaddr + left * entry_size, + .data_rel_base = eh_frame_hdr.vaddr, + }, endian); if (pc < pc_begin) { len /= 2; } else { left = mid; - if (pc == pc_begin) break; len -= len / 2; } } - - if (len == 0) return missing(); - fbr.seek = left * entry_size; - - // Read past the pc_begin field of the entry - _ = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad(); - - const fde_ptr = cast(usize, try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&self.entries[fbr.seek]), - .follow_indirect = true, - .data_rel_base = eh_frame_hdr_ptr, - }, endian) orelse return bad()) orelse return bad(); - - if (fde_ptr < self.eh_frame_ptr) return bad(); - - const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0..eh_frame_len]; - - const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: Reader = .fixed(eh_frame); - eh_frame_fbr.seek = fde_offset; - - const fde_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); - if (fde_entry_header.type != .fde) return bad(); - - // CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable - const cie_offset = fde_entry_header.type.fde; - eh_frame_fbr.seek = @intCast(cie_offset); - const cie_entry_header = try EntryHeader.read(&eh_frame_fbr, .eh_frame, endian); - if (cie_entry_header.type != .cie) return bad(); - - cie.* = try CommonInformationEntry.parse( - cie_entry_header.entry_bytes, - 0, - true, - cie_entry_header.format, - .eh_frame, - cie_entry_header.length_offset, - @sizeOf(usize), - endian, - ); - - fde.* = try FrameDescriptionEntry.parse( - fde_entry_header.entry_bytes, - 0, - true, - cie.*, - @sizeOf(usize), - endian, - ); - - if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return missing(); + if (len == 0) return null; + var entry_reader: Reader = .fixed(table.entries[left * entry_size ..][0..entry_size]); + // Skip past `pc_begin`; we're now interested in the fde offset + _ = try readEhPointerAbs(&entry_reader, table.encoding.type, addr_size_bytes, endian); + const fde_ptr = try readEhPointer(&entry_reader, table.encoding, addr_size_bytes, .{ + .pc_rel_base = table_vaddr + left * entry_size, + .data_rel_base = eh_frame_hdr.vaddr, + }, endian); + return std.math.sub(u64, fde_ptr, eh_frame_hdr.eh_frame_vaddr) catch bad(); // offset into .eh_frame } }; -pub const EntryHeader = struct { - /// Offset of the length field in the backing buffer - length_offset: usize, - format: Format, - type: union(enum) { - cie, - /// Value is the offset of the corresponding CIE - fde: u64, - terminator, +pub const EntryHeader = union(enum) { + cie: struct { + format: Format, + /// Remaining bytes in the CIE. These are parseable by `CommonInformationEntry.parse`. + bytes_len: u64, + }, + fde: struct { + format: Format, + /// Offset into the section of the corresponding CIE, *including* its entry header. + cie_offset: u64, + /// Remaining bytes in the FDE. These are parseable by `FrameDescriptionEntry.parse`. + bytes_len: u64, }, - /// The entry's contents, not including the ID field - entry_bytes: []const u8, + /// The `.eh_frame` format includes terminators which indicate that the last CIE/FDE has been + /// reached. However, `.debug_frame` does not include such a terminator, so the caller must + /// keep track of how many section bytes remain when parsing all entries in `.debug_frame`. + terminator, - /// The length of the entry including the ID field, but not the length field itself - pub fn entryLength(self: EntryHeader) usize { - return self.entry_bytes.len + @as(u8, if (self.format == .@"64") 8 else 4); - } + pub fn read(r: *Reader, header_section_offset: u64, section: Section, endian: Endian) !EntryHeader { + const unit_header = try Dwarf.readUnitHeader(r, endian); + if (unit_header.unit_length == 0) return .terminator; - /// Reads a header for either an FDE or a CIE, then advances the fbr to the - /// position after the trailing structure. - /// - /// `fbr` must be backed by either the .eh_frame or .debug_frame sections. - /// - /// TODO that's a bad API, don't do that. this function should neither require - /// a fixed reader nor depend on seeking. - pub fn read(fbr: *Reader, dwarf_section: Section.Id, endian: Endian) !EntryHeader { - assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame); - - const length_offset = fbr.seek; - const unit_header = try Dwarf.readUnitHeader(fbr, endian); - const unit_length = cast(usize, unit_header.unit_length) orelse return bad(); - if (unit_length == 0) return .{ - .length_offset = length_offset, - .format = unit_header.format, - .type = .terminator, - .entry_bytes = &.{}, - }; - const start_offset = fbr.seek; - const end_offset = start_offset + unit_length; - defer fbr.seek = end_offset; - - const id = try Dwarf.readAddress(fbr, unit_header.format, endian); - const entry_bytes = fbr.buffer[fbr.seek..end_offset]; - const cie_id: u64 = switch (dwarf_section) { - .eh_frame => CommonInformationEntry.eh_id, + // TODO MLUGG: seriously, just... check the formats of everything in BOTH LSB Core and DWARF. this is a fucking *mess*. maybe add spec references. + + // Next is a value which will disambiguate CIEs and FDEs. Annoyingly, LSB Core makes this + // value always 4-byte, whereas DWARF makes it depend on the `dwarf.Format`. + const cie_ptr_or_id_size: u8 = switch (section) { + .eh_frame => 4, .debug_frame => switch (unit_header.format) { - .@"32" => CommonInformationEntry.dwarf32_id, - .@"64" => CommonInformationEntry.dwarf64_id, + .@"32" => 4, + .@"64" => 8, }, + }; + const cie_ptr_or_id = switch (cie_ptr_or_id_size) { + 4 => try r.takeInt(u32, endian), + 8 => try r.takeInt(u64, endian), else => unreachable, }; + const remaining_bytes = unit_header.unit_length - cie_ptr_or_id_size; - return .{ - .length_offset = length_offset, - .format = unit_header.format, - .type = if (id == cie_id) .cie else .{ .fde = switch (dwarf_section) { - .eh_frame => try std.math.sub(u64, start_offset, id), - .debug_frame => id, - else => unreachable, - } }, - .entry_bytes = entry_bytes, + // If this entry is a CIE, then `cie_ptr_or_id` will have this value, which is different + // between the DWARF `.debug_frame` section and the LSB Core `.eh_frame` section. + const cie_id: u64 = switch (section) { + .eh_frame => 0, + .debug_frame => switch (unit_header.format) { + .@"32" => maxInt(u32), + .@"64" => maxInt(u64), + }, }; + if (cie_ptr_or_id == cie_id) { + return .{ .cie = .{ + .format = unit_header.format, + .bytes_len = remaining_bytes, + } }; + } + + // This is an FDE -- `cie_ptr_or_id` points to the associated CIE. Unfortunately, the format + // of that pointer again differs between `.debug_frame` and `.eh_frame`. + const cie_offset = switch (section) { + .eh_frame => try std.math.sub(u64, header_section_offset + unit_header.header_length, cie_ptr_or_id), + .debug_frame => cie_ptr_or_id, + }; + return .{ .fde = .{ + .format = unit_header.format, + .cie_offset = cie_offset, + .bytes_len = remaining_bytes, + } }; } }; pub const CommonInformationEntry = struct { - // Used in .eh_frame - pub const eh_id = 0; - - // Used in .debug_frame (DWARF32) - pub const dwarf32_id = maxInt(u32); - - // Used in .debug_frame (DWARF64) - pub const dwarf64_id = maxInt(u64); - - // Offset of the length field of this entry in the eh_frame section. - // This is the key that FDEs use to reference CIEs. - length_offset: u64, version: u8, - address_size: u8, - format: Format, - // Only present in version 4 - segment_selector_size: ?u8, + /// In version 4, CIEs can specify the address size used in the CIE and associated FDEs. + /// This value must be used *only* to parse associated FDEs in `FrameDescriptionEntry.parse`. + addr_size_bytes: u8, + + /// Always 0 for versions which do not specify this (currently all versions other than 4). + segment_selector_size: u8, code_alignment_factor: u32, data_alignment_factor: i32, return_address_register: u8, - aug_str: []const u8, - aug_data: []const u8, - lsda_pointer_enc: u8, - personality_enc: ?u8, - personality_routine_pointer: ?u64, - fde_pointer_enc: u8, - initial_instructions: []const u8, + fde_pointer_enc: EH.PE, + is_signal_frame: bool, - pub fn isSignalFrame(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'S') return true; - return false; - } + augmentation_kind: AugmentationKind, - pub fn addressesSignedWithBKey(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'B') return true; - return false; - } + initial_instructions: []const u8, - pub fn mteTaggedFrame(self: CommonInformationEntry) bool { - for (self.aug_str) |c| if (c == 'G') return true; - return false; - } + pub const AugmentationKind = enum { none, gcc_eh, lsb_z }; /// This function expects to read the CIE starting with the version field. - /// The returned struct references memory backed by cie_bytes. - /// - /// See the FrameDescriptionEntry.parse documentation for the description - /// of `pc_rel_offset` and `is_runtime`. + /// The returned struct references memory backed by `cie_bytes`. /// /// `length_offset` specifies the offset of this CIE's length field in the /// .eh_frame / .debug_frame section. pub fn parse( cie_bytes: []const u8, - pc_rel_offset: i64, - is_runtime: bool, - format: Format, - dwarf_section: Section.Id, - length_offset: u64, - addr_size_bytes: u8, - endian: Endian, + section: Section, + default_addr_size_bytes: u8, ) !CommonInformationEntry { - if (addr_size_bytes > 8) return error.UnsupportedAddrSize; + // We only read the data through this reader. + var r: Reader = .fixed(cie_bytes); - var fbr: Reader = .fixed(cie_bytes); - - const version = try fbr.takeByte(); - switch (dwarf_section) { + const version = try r.takeByte(); + switch (section) { .eh_frame => if (version != 1 and version != 3) return error.UnsupportedDwarfVersion, .debug_frame => if (version != 4) return error.UnsupportedDwarfVersion, - else => return error.UnsupportedDwarfSection, } - var has_eh_data = false; - var has_aug_data = false; - - var aug_str_len: usize = 0; - const aug_str_start = fbr.seek; - var aug_byte = try fbr.takeByte(); - while (aug_byte != 0) : (aug_byte = try fbr.takeByte()) { - switch (aug_byte) { - 'z' => { - if (aug_str_len != 0) return bad(); - has_aug_data = true; - }, - 'e' => { - if (has_aug_data or aug_str_len != 0) return bad(); - if (try fbr.takeByte() != 'h') return bad(); - has_eh_data = true; - }, - else => if (has_eh_data) return bad(), - } - - aug_str_len += 1; - } + const aug_str = try r.takeSentinel(0); + const aug_kind: AugmentationKind = aug: { + if (aug_str.len == 0) break :aug .none; + if (aug_str[0] == 'z') break :aug .lsb_z; + if (std.mem.eql(u8, aug_str, "eh")) break :aug .gcc_eh; + // We can't finish parsing the CIE if we don't know what its augmentation means. + return bad(); + }; - if (has_eh_data) { - // legacy data created by older versions of gcc - unsupported here - for (0..addr_size_bytes) |_| _ = try fbr.takeByte(); + switch (aug_kind) { + .none => {}, // no extra data + .lsb_z => {}, // no extra data yet, but there is a bit later + .gcc_eh => try r.discardAll(default_addr_size_bytes), // unsupported data } - const address_size = if (version == 4) try fbr.takeByte() else addr_size_bytes; - const segment_selector_size = if (version == 4) try fbr.takeByte() else null; - - const code_alignment_factor = try fbr.takeLeb128(u32); - const data_alignment_factor = try fbr.takeLeb128(i32); - const return_address_register = if (version == 1) try fbr.takeByte() else try fbr.takeLeb128(u8); - - var lsda_pointer_enc: u8 = EH.PE.omit; - var personality_enc: ?u8 = null; - var personality_routine_pointer: ?u64 = null; - var fde_pointer_enc: u8 = EH.PE.absptr; - - var aug_data: []const u8 = &[_]u8{}; - const aug_str = if (has_aug_data) blk: { - const aug_data_len = try fbr.takeLeb128(usize); - const aug_data_start = fbr.seek; - aug_data = cie_bytes[aug_data_start..][0..aug_data_len]; - - const aug_str = cie_bytes[aug_str_start..][0..aug_str_len]; - for (aug_str[1..]) |byte| { - switch (byte) { - 'L' => { - lsda_pointer_enc = try fbr.takeByte(); - }, - 'P' => { - personality_enc = try fbr.takeByte(); - personality_routine_pointer = try readEhPointer(&fbr, personality_enc.?, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&cie_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian); - }, - 'R' => { - fde_pointer_enc = try fbr.takeByte(); - }, - 'S', 'B', 'G' => {}, - else => return bad(), - } - } - - // aug_data_len can include padding so the CIE ends on an address boundary - fbr.seek = aug_data_start + aug_data_len; - break :blk aug_str; - } else &[_]u8{}; + const addr_size_bytes = if (version == 4) try r.takeByte() else default_addr_size_bytes; + const segment_selector_size: u8 = if (version == 4) try r.takeByte() else 0; + const code_alignment_factor = try r.takeLeb128(u32); + const data_alignment_factor = try r.takeLeb128(i32); + const return_address_register = if (version == 1) try r.takeByte() else try r.takeLeb128(u8); + + // This is where LSB's augmentation might add some data. + const fde_pointer_enc: EH.PE, const is_signal_frame: bool = aug: { + const default_fde_pointer_enc: EH.PE = .{ .type = .absptr, .rel = .abs }; + if (aug_kind != .lsb_z) break :aug .{ default_fde_pointer_enc, false }; + const aug_data_len = try r.takeLeb128(u32); + var aug_data: Reader = .fixed(try r.take(aug_data_len)); + var fde_pointer_enc: EH.PE = default_fde_pointer_enc; + var is_signal_frame = false; + for (aug_str[1..]) |byte| switch (byte) { + 'L' => _ = try aug_data.takeByte(), // we ignore the LSDA pointer + 'P' => { + const enc: EH.PE = @bitCast(try aug_data.takeByte()); + const endian: Endian = .little; // irrelevant because we're discarding the value anyway + _ = try readEhPointerAbs(&r, enc.type, addr_size_bytes, endian); // we ignore the personality routine; endianness is irrelevant since we're discarding + }, + 'R' => fde_pointer_enc = @bitCast(try aug_data.takeByte()), + 'S' => is_signal_frame = true, + 'B', 'G' => {}, + else => return bad(), + }; + break :aug .{ fde_pointer_enc, is_signal_frame }; + }; - const initial_instructions = cie_bytes[fbr.seek..]; return .{ - .length_offset = length_offset, .version = version, - .address_size = address_size, - .format = format, + .addr_size_bytes = addr_size_bytes, .segment_selector_size = segment_selector_size, .code_alignment_factor = code_alignment_factor, .data_alignment_factor = data_alignment_factor, .return_address_register = return_address_register, - .aug_str = aug_str, - .aug_data = aug_data, - .lsda_pointer_enc = lsda_pointer_enc, - .personality_enc = personality_enc, - .personality_routine_pointer = personality_routine_pointer, .fde_pointer_enc = fde_pointer_enc, - .initial_instructions = initial_instructions, + .is_signal_frame = is_signal_frame, + .augmentation_kind = aug_kind, + .initial_instructions = r.buffered(), }; } }; pub const FrameDescriptionEntry = struct { - // Offset into eh_frame where the CIE for this FDE is stored - cie_length_offset: u64, - pc_begin: u64, pc_range: u64, - lsda_pointer: ?u64, - aug_data: []const u8, instructions: []const u8, /// This function expects to read the FDE starting at the PC Begin field. /// The returned struct references memory backed by `fde_bytes`. - /// - /// `pc_rel_offset` specifies an offset to be applied to pc_rel_base values - /// used when decoding pointers. This should be set to zero if fde_bytes is - /// backed by the memory of a .eh_frame / .debug_frame section in the running executable. - /// Otherwise, it should be the relative offset to translate addresses from - /// where the section is currently stored in memory, to where it *would* be - /// stored at runtime: section base addr - backing data base ptr. - /// - /// Similarly, `is_runtime` specifies this function is being called on a runtime - /// section, and so indirect pointers can be followed. pub fn parse( + /// The virtual address of the FDE we're parsing, *excluding* its entry header (i.e. the + /// address is after the header). If `fde_bytes` is backed by the memory of a loaded + /// module's `.eh_frame` section, this will equal `fde_bytes.ptr`. + fde_vaddr: u64, fde_bytes: []const u8, - pc_rel_offset: i64, - is_runtime: bool, cie: CommonInformationEntry, - addr_size_bytes: u8, endian: Endian, ) !FrameDescriptionEntry { - if (addr_size_bytes > 8) return error.InvalidAddrSize; - - var fbr: Reader = .fixed(fde_bytes); - - const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian) orelse return bad(); - - const pc_range = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = 0, - .follow_indirect = false, - }, endian) orelse return bad(); - - var aug_data: []const u8 = &[_]u8{}; - const lsda_pointer = if (cie.aug_str.len > 0) blk: { - const aug_data_len = try fbr.takeLeb128(usize); - const aug_data_start = fbr.seek; - aug_data = fde_bytes[aug_data_start..][0..aug_data_len]; - - const lsda_pointer = if (cie.lsda_pointer_enc != EH.PE.omit) - try readEhPointer(&fbr, cie.lsda_pointer_enc, addr_size_bytes, .{ - .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.seek]), pc_rel_offset), - .follow_indirect = is_runtime, - }, endian) - else - null; - - fbr.seek = aug_data_start + aug_data_len; - break :blk lsda_pointer; - } else null; - - const instructions = fde_bytes[fbr.seek..]; + if (cie.segment_selector_size != 0) return error.UnsupportedAddrSize; + + var r: Reader = .fixed(fde_bytes); + + const pc_begin = try readEhPointer(&r, cie.fde_pointer_enc, cie.addr_size_bytes, .{ + .pc_rel_base = fde_vaddr, + }, endian); + + // I swear I'm not kidding when I say that PC Range is encoded with `cie.fde_pointer_enc`, but ignoring `rel`. + const pc_range = switch (try readEhPointerAbs(&r, cie.fde_pointer_enc.type, cie.addr_size_bytes, endian)) { + .unsigned => |x| x, + .signed => |x| cast(u64, x) orelse return bad(), + }; + + switch (cie.augmentation_kind) { + .none, .gcc_eh => {}, + .lsb_z => { + // There is augmentation data, but it's irrelevant to us -- it + // only contains the LSDA pointer, which we don't care about. + const aug_data_len = try r.takeLeb128(u64); + _ = try r.discardAll(aug_data_len); + }, + } + return .{ - .cie_length_offset = cie.length_offset, .pc_begin = pc_begin, .pc_range = pc_range, - .lsda_pointer = lsda_pointer, - .aug_data = aug_data, - .instructions = instructions, + .instructions = r.buffered(), }; } }; -/// If `.eh_frame_hdr` is present, then only the header needs to be parsed. Otherwise, `.eh_frame` -/// and `.debug_frame` are scanned and a sorted list of FDEs is built for binary searching during -/// unwinding. Even if `.eh_frame_hdr` is used, we may find during unwinding that it's incomplete, -/// in which case we build the sorted list of FDEs at that point. -/// -/// See also `scanCieFdeInfo`. -pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { - const endian = di.endian; - - if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: Reader = .fixed(eh_frame_hdr); - - const version = try fbr.takeByte(); - if (version != 1) break :blk; - - const eh_frame_ptr_enc = try fbr.takeByte(); - if (eh_frame_ptr_enc == EH.PE.omit) break :blk; - const fde_count_enc = try fbr.takeByte(); - if (fde_count_enc == EH.PE.omit) break :blk; - const table_enc = try fbr.takeByte(); - if (table_enc == EH.PE.omit) break :blk; - - const eh_frame_ptr = cast(usize, try readEhPointer(&fbr, eh_frame_ptr_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), - .follow_indirect = true, - }, endian) orelse return bad()) orelse return bad(); - - const fde_count = cast(usize, try readEhPointer(&fbr, fde_count_enc, @sizeOf(usize), .{ - .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.seek]), - .follow_indirect = true, - }, endian) orelse return bad()) orelse return bad(); - - const entry_size = try ExceptionFrameHeader.entrySize(table_enc); - const entries_len = fde_count * entry_size; - if (entries_len > eh_frame_hdr.len - fbr.seek) return bad(); - - di.eh_frame_hdr = .{ - .eh_frame_ptr = eh_frame_ptr, - .table_enc = table_enc, - .fde_count = fde_count, - .entries = eh_frame_hdr[fbr.seek..][0..entries_len], - }; +pub fn scanDebugFrame( + unwind: *Unwind, + gpa: Allocator, + section_vaddr: u64, + section_bytes: []const u8, + addr_size_bytes: u8, + endian: Endian, +) void { + assert(unwind.debug_frame == null); + + var fbr: Reader = .fixed(section_bytes); + var fde_list: std.ArrayList(SortedFdeEntry) = .empty; + defer fde_list.deinit(gpa); + while (fbr.seek < fbr.buffer.len) { + const entry_offset = fbr.seek; + switch (try EntryHeader.read(&fbr, fbr.seek, .debug_frame, endian)) { + // Ignore CIEs; we only need them to parse the FDEs! + .cie => |info| { + try fbr.discardAll(info.bytes_len); + continue; + }, + .fde => |info| { + const cie: CommonInformationEntry = cie: { + var cie_reader: Reader = .fixed(section_bytes[info.cie_offset..]); + const cie_info = switch (try EntryHeader.read(&cie_reader, info.cie_offset, .debug_frame, endian)) { + .cie => |cie_info| cie_info, + .fde, .terminator => return bad(), // This is meant to be a CIE + }; + break :cie try .parse(try cie_reader.take(cie_info.bytes_len), .debug_frame, addr_size_bytes); + }; + const fde: FrameDescriptionEntry = try .parse( + section_vaddr + fbr.seek, + try fbr.take(info.bytes_len), + cie, + endian, + ); + try fde_list.append(.{ + .pc_begin = fde.pc_begin, + .fde_offset = entry_offset, // *not* `fde_offset`, because we need to include the entry header + }); + }, + .terminator => return bad(), // DWARF `.debug_frame` isn't meant to have terminators + } + } + const fde_slice = try fde_list.toOwnedSlice(gpa); + errdefer comptime unreachable; + std.mem.sortUnstable(SortedFdeEntry, fde_slice, {}, struct { + fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { + ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); + unwind.debug_frame = .{ .data = section_bytes, .sorted_fdes = fde_slice }; +} + +pub fn scanEhFrame( + unwind: *Unwind, + gpa: Allocator, + header: EhFrameHeader, + section_bytes_ptr: [*]const u8, + /// This is separate from `section_bytes_ptr` because it is unknown when `.eh_frame` is accessed + /// through the pointer in the `.eh_frame_hdr` section. If this is non-`null`, we avoid reading + /// past this number of bytes, but if `null`, we must assume that the `.eh_frame` data has a + /// valid terminator. + section_bytes_len: ?usize, + addr_size_bytes: u8, + endian: Endian, +) !void { + assert(unwind.eh_frame == null); + + const section_bytes: []const u8 = bytes: { + // If the length is unknown, let the slice span from `section_bytes_ptr` to the end of memory. + const len = section_bytes_len orelse (std.math.maxInt(usize) - @intFromPtr(section_bytes_ptr)); + break :bytes section_bytes_ptr[0..len]; + }; - // No need to scan .eh_frame, we have a binary search table already + if (header.search_table != null) { + // No need to populate `sorted_fdes`, the header contains a search table. + unwind.eh_frame = .{ + .header = header, + .eh_frame_data = section_bytes, + .sorted_fdes = null, + }; return; } - try di.scanCieFdeInfo(allocator, base_address); + // We aren't told the length of this section. Luckily, we don't need it, because there will be + // an `EntryHeader.terminator` after the last CIE/FDE. Just make a `Reader` which will give us + // alllll of the bytes! + var fbr: Reader = .fixed(section_bytes); + + var fde_list: std.ArrayList(SortedFdeEntry) = .empty; + defer fde_list.deinit(gpa); + + while (true) { + const entry_offset = fbr.seek; + switch (try EntryHeader.read(&fbr, fbr.seek, .eh_frame, endian)) { + // Ignore CIEs; we only need them to parse the FDEs! + .cie => |info| { + try fbr.discardAll(info.bytes_len); + continue; + }, + .fde => |info| { + const cie: CommonInformationEntry = cie: { + var cie_reader: Reader = .fixed(section_bytes[info.cie_offset..]); + const cie_info = switch (try EntryHeader.read(&cie_reader, info.cie_offset, .eh_frame, endian)) { + .cie => |cie_info| cie_info, + .fde, .terminator => return bad(), // This is meant to be a CIE + }; + break :cie try .parse(try cie_reader.take(cie_info.bytes_len), .eh_frame, addr_size_bytes); + }; + const fde: FrameDescriptionEntry = try .parse( + header.eh_frame_vaddr + fbr.seek, + try fbr.take(info.bytes_len), + cie, + endian, + ); + try fde_list.append(gpa, .{ + .pc_begin = fde.pc_begin, + .fde_offset = entry_offset, // *not* `fde_offset`, because we need to include the entry header + }); + }, + // Unlike `.debug_frame`, the `.eh_frame` section does have a terminator CIE -- this is + // necessary because `header` doesn't include the length of the `.eh_frame` section + .terminator => break, + } + } + const fde_slice = try fde_list.toOwnedSlice(gpa); + errdefer comptime unreachable; + std.mem.sortUnstable(SortedFdeEntry, fde_slice, {}, struct { + fn lessThan(ctx: void, a: SortedFdeEntry, b: SortedFdeEntry) bool { + ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); + unwind.eh_frame = .{ + .header = header, + .eh_frame_data = section_bytes, + .sorted_fdes = fde_slice, + }; } -/// Scan `.eh_frame` and `.debug_frame` and build a sorted list of FDEs for binary searching during -/// unwinding. -pub fn scanCieFdeInfo(unwind: *Unwind, allocator: Allocator, endian: Endian, base_address: usize) !void { - const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; - for (frame_sections) |frame_section| { - if (unwind.section(frame_section)) |section_data| { - var fbr: Reader = .fixed(section_data); - while (fbr.seek < fbr.buffer.len) { - const entry_header = try EntryHeader.read(&fbr, frame_section, endian); - switch (entry_header.type) { - .cie => { - const cie = try CommonInformationEntry.parse( - entry_header.entry_bytes, - unwind.sectionVirtualOffset(frame_section, base_address).?, - true, - entry_header.format, - frame_section, - entry_header.length_offset, - @sizeOf(usize), - endian, - ); - try unwind.cie_map.put(allocator, entry_header.length_offset, cie); - }, - .fde => |cie_offset| { - const cie = unwind.cie_map.get(cie_offset) orelse return bad(); - const fde = try FrameDescriptionEntry.parse( - entry_header.entry_bytes, - unwind.sectionVirtualOffset(frame_section, base_address).?, - true, - cie, - @sizeOf(usize), - endian, - ); - try unwind.fde_list.append(allocator, fde); - }, - .terminator => break, - } - } - - std.mem.sortUnstable(FrameDescriptionEntry, unwind.fde_list.items, {}, struct { - fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { - _ = ctx; - return a.pc_begin < b.pc_begin; - } - }.lessThan); +/// The return value may be a false positive. After loading the FDE with `loadFde`, the caller must +/// validate that `pc` is indeed in its range -- if it is not, then no FDE matches `pc`. +pub fn findFdeOffset(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: Endian) !?u64 { + // We'll break from this block only if we have a manually-constructed search table. + const sorted_fdes: []const SortedFdeEntry = fdes: { + if (unwind.debug_frame) |df| break :fdes df.sorted_fdes; + if (unwind.eh_frame) |eh_frame| { + if (eh_frame.sorted_fdes) |fdes| break :fdes fdes; + // Use the search table from the `.eh_frame_hdr` section rather than one of our own + return eh_frame.header.findEntry(pc, addr_size_bytes, endian); } - } + // We have no available unwind info + return null; + }; + const first_bad_idx = std.sort.partitionPoint(SortedFdeEntry, sorted_fdes, pc, struct { + fn canIncludePc(target_pc: u64, entry: SortedFdeEntry) bool { + return target_pc >= entry.pc_begin; // i.e. does 'entry_pc..<last pc>' include 'target_pc' + } + }.canIncludePc); + // `first_bad_idx` is the index of the first FDE whose `pc_begin` is too high to include `pc`. + // So if any FDE matches, it'll be the one at `first_bad_idx - 1` (maybe false positive). + if (first_bad_idx == 0) return null; + return sorted_fdes[first_bad_idx - 1].fde_offset; +} + +pub fn loadFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } { + const section_bytes: []const u8, const section_vaddr: u64, const section: Section = s: { + if (unwind.debug_frame) |df| break :s .{ df.data, if (true) @panic("MLUGG TODO"), .debug_frame }; + if (unwind.eh_frame) |ef| break :s .{ ef.eh_frame_data, ef.header.eh_frame_vaddr, .eh_frame }; + unreachable; // how did you get `fde_offset`?! + }; + + var fde_reader: Reader = .fixed(section_bytes[fde_offset..]); + const fde_info = switch (try EntryHeader.read(&fde_reader, fde_offset, section, endian)) { + .fde => |info| info, + .cie, .terminator => return bad(), // This is meant to be an FDE + }; + + const cie_offset = fde_info.cie_offset; + var cie_reader: Reader = .fixed(section_bytes[cie_offset..]); + const cie_info = switch (try EntryHeader.read(&cie_reader, cie_offset, section, endian)) { + .cie => |info| info, + .fde, .terminator => return bad(), // This is meant to be a CIE + }; + + const cie: CommonInformationEntry = try .parse( + try cie_reader.take(cie_info.bytes_len), + section, + addr_size_bytes, + ); + const fde: FrameDescriptionEntry = try .parse( + section_vaddr + fde_offset + fde_reader.seek, + try fde_reader.take(fde_info.bytes_len), + cie, + endian, + ); + + return .{ cie_info.format, cie, fde }; } const EhPointerContext = struct { // The address of the pointer field itself pc_rel_base: u64, - // Whether or not to follow indirect pointers. This should only be - // used when decoding pointers at runtime using the current process's - // debug info - follow_indirect: bool, - // These relative addressing modes are only used in specific cases, and // might not be available / required in all parsing contexts data_rel_base: ?u64 = null, text_rel_base: ?u64 = null, function_rel_base: ?u64 = null, }; - -fn readEhPointer(fbr: *Reader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !?u64 { - if (enc == EH.PE.omit) return null; - - const value: union(enum) { - signed: i64, - unsigned: u64, - } = switch (enc & EH.PE.type_mask) { - EH.PE.absptr => .{ +/// Returns `error.InvalidDebugInfo` if the encoding is `EH.PE.omit`. +fn readEhPointerAbs(r: *Reader, enc_ty: EH.PE.Type, addr_size_bytes: u8, endian: Endian) !union(enum) { + signed: i64, + unsigned: u64, +} { + return switch (enc_ty) { + .absptr => .{ .unsigned = switch (addr_size_bytes) { - 2 => try fbr.takeInt(u16, endian), - 4 => try fbr.takeInt(u32, endian), - 8 => try fbr.takeInt(u64, endian), - else => return error.InvalidAddrSize, + 2 => try r.takeInt(u16, endian), + 4 => try r.takeInt(u32, endian), + 8 => try r.takeInt(u64, endian), + else => return error.UnsupportedAddrSize, }, }, - EH.PE.uleb128 => .{ .unsigned = try fbr.takeLeb128(u64) }, - EH.PE.udata2 => .{ .unsigned = try fbr.takeInt(u16, endian) }, - EH.PE.udata4 => .{ .unsigned = try fbr.takeInt(u32, endian) }, - EH.PE.udata8 => .{ .unsigned = try fbr.takeInt(u64, endian) }, - EH.PE.sleb128 => .{ .signed = try fbr.takeLeb128(i64) }, - EH.PE.sdata2 => .{ .signed = try fbr.takeInt(i16, endian) }, - EH.PE.sdata4 => .{ .signed = try fbr.takeInt(i32, endian) }, - EH.PE.sdata8 => .{ .signed = try fbr.takeInt(i64, endian) }, + .uleb128 => .{ .unsigned = try r.takeLeb128(u64) }, + .udata2 => .{ .unsigned = try r.takeInt(u16, endian) }, + .udata4 => .{ .unsigned = try r.takeInt(u32, endian) }, + .udata8 => .{ .unsigned = try r.takeInt(u64, endian) }, + .sleb128 => .{ .signed = try r.takeLeb128(i64) }, + .sdata2 => .{ .signed = try r.takeInt(i16, endian) }, + .sdata4 => .{ .signed = try r.takeInt(i32, endian) }, + .sdata8 => .{ .signed = try r.takeInt(i64, endian) }, else => return bad(), }; - - const base = switch (enc & EH.PE.rel_mask) { - EH.PE.pcrel => ctx.pc_rel_base, - EH.PE.textrel => ctx.text_rel_base orelse return error.PointerBaseNotSpecified, - EH.PE.datarel => ctx.data_rel_base orelse return error.PointerBaseNotSpecified, - EH.PE.funcrel => ctx.function_rel_base orelse return error.PointerBaseNotSpecified, - else => null, +} +/// Returns `error.InvalidDebugInfo` if the encoding is `EH.PE.omit`. +fn readEhPointer(fbr: *Reader, enc: EH.PE, addr_size_bytes: u8, ctx: EhPointerContext, endian: Endian) !u64 { + const offset = try readEhPointerAbs(fbr, enc.type, addr_size_bytes, endian); + const base = switch (enc.rel) { + .abs, .aligned => 0, + .pcrel => ctx.pc_rel_base, + .textrel => ctx.text_rel_base orelse return bad(), + .datarel => ctx.data_rel_base orelse return bad(), + .funcrel => ctx.function_rel_base orelse return bad(), + .indirect => return bad(), // GCC extension; not supported + _ => return bad(), }; - - const ptr: u64 = if (base) |b| switch (value) { - .signed => |s| @intCast(try std.math.add(i64, s, @as(i64, @intCast(b)))), + return switch (offset) { + .signed => |s| @intCast(try std.math.add(i64, s, @as(i64, @intCast(base)))), // absptr can actually contain signed values in some cases (aarch64 MachO) - .unsigned => |u| u +% b, - } else switch (value) { - .signed => |s| @as(u64, @intCast(s)), - .unsigned => |u| u, + .unsigned => |u| u +% base, }; - - if ((enc & EH.PE.indirect) > 0 and ctx.follow_indirect) { - if (@sizeOf(usize) != addr_size_bytes) { - // See the documentation for `follow_indirect` - return error.NonNativeIndirection; - } - - const native_ptr = cast(usize, ptr) orelse return error.PointerOverflow; - return switch (addr_size_bytes) { - 2, 4, 8 => return @as(*const usize, @ptrFromInt(native_ptr)).*, - else => return error.UnsupportedAddrSize, - }; - } else { - return ptr; - } } -fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { - if (pc_rel_offset < 0) { - return std.math.sub(usize, field_ptr, @as(usize, @intCast(-pc_rel_offset))); - } else { - return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); - } +/// Like `Reader.fixed`, but when the length of the data is unknown and we just want to allow +/// reading indefinitely. +fn maxSlice(ptr: [*]const u8) []const u8 { + const len = std.math.maxInt(usize) - @intFromPtr(ptr); + return ptr[0..len]; } const Allocator = std.mem.Allocator; |
