diff options
| author | mlugg <mlugg@mlugg.co.uk> | 2025-09-26 10:52:09 +0100 |
|---|---|---|
| committer | mlugg <mlugg@mlugg.co.uk> | 2025-09-30 13:44:56 +0100 |
| commit | 156cd8f678ebdcccc48382d093a3ef7e45c85a45 (patch) | |
| tree | ca3f4c37bda9cf1d039ac25ba37b2c45ab5a345f /lib/std/debug | |
| parent | 3f84b6c80ed3306f040dd98b8ccba561a052167a (diff) | |
| download | zig-156cd8f678ebdcccc48382d093a3ef7e45c85a45.tar.gz zig-156cd8f678ebdcccc48382d093a3ef7e45c85a45.zip | |
std.debug: significantly speed up capturing stack traces
By my estimation, these changes speed up DWARF unwinding when using the
self-hosted x86_64 backend by around 7x. There are two very significant
enhancements: we no longer iterate frames which don't fit in the stack
trace buffer, and we cache register rules (in a fixed buffer) to avoid
re-parsing and evaluating CFI instructions in most cases. Alongside this
are a bunch of smaller enhancements, such as pre-caching the result of
evaluating the CIE's initial instructions, avoiding re-parsing of CIEs,
and big simplifications to the `Dwarf.Unwind.VirtualMachine` logic.
Diffstat (limited to 'lib/std/debug')
| -rw-r--r-- | lib/std/debug/Dwarf.zig | 1 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/Unwind.zig | 145 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/Unwind/VirtualMachine.zig | 554 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/call_frame.zig | 288 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo.zig | 341 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo/DarwinModule.zig | 87 | ||||
| -rw-r--r-- | lib/std/debug/SelfInfo/ElfModule.zig | 189 |
7 files changed, 802 insertions, 803 deletions
diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 3934777ea5..cfba366162 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -27,7 +27,6 @@ const Reader = std.Io.Reader; const Dwarf = @This(); pub const expression = @import("Dwarf/expression.zig"); -pub const call_frame = @import("Dwarf/call_frame.zig"); pub const Unwind = @import("Dwarf/Unwind.zig"); /// Useful to temporarily enable while working on this file. diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 2eaa89c404..e251a9175d 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -10,7 +10,7 @@ //! The typical usage of `Unwind` is as follows: //! //! * Initialize with `initEhFrameHdr` or `initSection`, depending on the available data -//! * Call `prepareLookup` to construct a search table if necessary +//! * Call `prepare` to scan CIEs and, if necessary, construct a search table //! * Call `lookupPc` to find the section offset of the FDE corresponding to a PC //! * Call `getFde` to load the corresponding FDE and CIE //! * Check that the PC does indeed fall in that range (`lookupPc` may return a false positive) @@ -18,7 +18,7 @@ //! //! In some cases, such as when using the "compact unwind" data in Mach-O binaries, the FDE offsets //! may already be known. In that case, no call to `lookupPc` is necessary, which means the call to -//! `prepareLookup` can also be omitted. +//! `prepare` can be optimized to only scan CIEs. pub const VirtualMachine = @import("Unwind/VirtualMachine.zig"); @@ -45,7 +45,7 @@ frame_section: struct { /// A structure allowing fast lookups of the FDE corresponding to a particular PC. We use a binary /// search table for the lookup; essentially, a list of all FDEs ordered by PC range. `null` means -/// the lookup data is not yet populated, so `prepareLookup` must be called before `lookupPc`. +/// the lookup data is not yet populated, so `prepare` must be called before `lookupPc`. lookup: ?union(enum) { /// The `.eh_frame_hdr` section contains a pre-computed search table which we can use. eh_frame_hdr: struct { @@ -58,6 +58,12 @@ lookup: ?union(enum) { sorted_fdes: []SortedFdeEntry, }, +/// Initially empty; populated by `prepare`. +cie_list: std.MultiArrayList(struct { + offset: u64, + cie: CommonInformationEntry, +}), + const SortedFdeEntry = struct { /// This FDE's value of `pc_begin`. pc_begin: u64, @@ -83,6 +89,7 @@ pub fn initEhFrameHdr(header: EhFrameHeader, section_vaddr: u64, section_bytes_p .vaddr = section_vaddr, .table = table, } } else null, + .cie_list = .empty, }; } @@ -98,16 +105,21 @@ pub fn initSection(section: Section, section_vaddr: u64, section_bytes: []const .vaddr = section_vaddr, }, .lookup = null, + .cie_list = .empty, }; } -/// Technically, it is only necessary to call this if `prepareLookup` has previously been called, -/// since no other function here allocates resources. pub fn deinit(unwind: *Unwind, gpa: Allocator) void { if (unwind.lookup) |lookup| switch (lookup) { .eh_frame_hdr => {}, .sorted_fdes => |fdes| gpa.free(fdes), }; + for (unwind.cie_list.items(.cie)) |*cie| { + if (cie.last_row) |*lr| { + gpa.free(lr.cols); + } + } + unwind.cie_list.deinit(gpa); } /// Decoded version of the `.eh_frame_hdr` section. @@ -236,7 +248,6 @@ const EntryHeader = union(enum) { bytes_len: u64, }, fde: struct { - format: Format, /// Offset into the section of the corresponding CIE, *including* its entry header. cie_offset: u64, /// Remaining bytes in the FDE. These are parseable by `FrameDescriptionEntry.parse`. @@ -290,7 +301,6 @@ const EntryHeader = union(enum) { .debug_frame => cie_ptr_or_id, }; return .{ .fde = .{ - .format = unit_header.format, .cie_offset = cie_offset, .bytes_len = remaining_bytes, } }; @@ -299,6 +309,7 @@ const EntryHeader = union(enum) { pub const CommonInformationEntry = struct { version: u8, + format: Format, /// In version 4, CIEs can specify the address size used in the CIE and associated FDEs. /// This value must be used *only* to parse associated FDEs in `FrameDescriptionEntry.parse`. @@ -318,6 +329,12 @@ pub const CommonInformationEntry = struct { initial_instructions: []const u8, + last_row: ?struct { + offset: u64, + cfa: VirtualMachine.CfaRule, + cols: []VirtualMachine.Column, + }, + pub const AugmentationKind = enum { none, gcc_eh, lsb_z }; /// This function expects to read the CIE starting with the version field. @@ -326,6 +343,7 @@ pub const CommonInformationEntry = struct { /// `length_offset` specifies the offset of this CIE's length field in the /// .eh_frame / .debug_frame section. fn parse( + format: Format, cie_bytes: []const u8, section: Section, default_addr_size_bytes: u8, @@ -384,6 +402,7 @@ pub const CommonInformationEntry = struct { }; return .{ + .format = format, .version = version, .addr_size_bytes = addr_size_bytes, .segment_selector_size = segment_selector_size, @@ -394,6 +413,7 @@ pub const CommonInformationEntry = struct { .is_signal_frame = is_signal_frame, .augmentation_kind = aug_kind, .initial_instructions = r.buffered(), + .last_row = null, }; } }; @@ -411,7 +431,7 @@ pub const FrameDescriptionEntry = struct { /// module's `.eh_frame` section, this will equal `fde_bytes.ptr`. fde_vaddr: u64, fde_bytes: []const u8, - cie: CommonInformationEntry, + cie: *const CommonInformationEntry, endian: Endian, ) !FrameDescriptionEntry { if (cie.segment_selector_size != 0) return error.UnsupportedAddrSize; @@ -446,11 +466,18 @@ pub const FrameDescriptionEntry = struct { } }; -/// Builds the PC FDE lookup table if it is not already built. It is required to call this function -/// at least once before calling `lookupPc`. Once this function is called, memory has been allocated -/// and so `deinit` (matching this `gpa`) is required to free it. -pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endian: Endian) !void { - if (unwind.lookup != null) return; +/// Builds the CIE list and FDE lookup table if they are not already built. It is required to call +/// this function at least once before calling `lookupPc` or `getFde`. If only `getFde` is needed, +/// then `need_lookup` can be set to `false` to make this function more efficient. +pub fn prepare( + unwind: *Unwind, + gpa: Allocator, + addr_size_bytes: u8, + endian: Endian, + need_lookup: bool, +) !void { + if (unwind.cie_list.len > 0 and (!need_lookup or unwind.lookup != null)) return; + unwind.cie_list.clearRetainingCapacity(); const section = unwind.frame_section; @@ -462,21 +489,28 @@ pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endia const entry_offset = r.seek; switch (try EntryHeader.read(&r, entry_offset, section.id, endian)) { .cie => |cie_info| { - // Ignore CIEs for now; we'll parse them when we read a corresponding FDE - try r.discardAll(cast(usize, cie_info.bytes_len) orelse return error.EndOfStream); + // We will pre-populate a list of CIEs for efficiency: this avoids work re-parsing + // them every time we look up an FDE. It also lets us cache the result of evaluating + // the CIE's initial CFI instructions, which is useful because in the vast majority + // of cases those instructions will be needed to reach the PC we are unwinding to. + const bytes_len = cast(usize, cie_info.bytes_len) orelse return error.EndOfStream; + const idx = unwind.cie_list.len; + try unwind.cie_list.append(gpa, .{ + .offset = entry_offset, + .cie = try .parse(cie_info.format, try r.take(bytes_len), section.id, addr_size_bytes), + }); + errdefer _ = unwind.cie_list.pop().?; + try VirtualMachine.populateCieLastRow(gpa, &unwind.cie_list.items(.cie)[idx], addr_size_bytes, endian); continue; }, .fde => |fde_info| { - if (fde_info.cie_offset > section.bytes.len) return error.EndOfStream; - var cie_r: Reader = .fixed(section.bytes[@intCast(fde_info.cie_offset)..]); - const cie_info = switch (try EntryHeader.read(&cie_r, fde_info.cie_offset, section.id, endian)) { - .cie => |cie_info| cie_info, - .fde, .terminator => return bad(), // this is meant to be a CIE - }; - const cie_bytes_len = cast(usize, cie_info.bytes_len) orelse return error.EndOfStream; - const fde_bytes_len = cast(usize, fde_info.bytes_len) orelse return error.EndOfStream; - const cie: CommonInformationEntry = try .parse(try cie_r.take(cie_bytes_len), section.id, addr_size_bytes); - const fde: FrameDescriptionEntry = try .parse(section.vaddr + r.seek, try r.take(fde_bytes_len), cie, endian); + const bytes_len = cast(usize, fde_info.bytes_len) orelse return error.EndOfStream; + if (!need_lookup) { + try r.discardAll(bytes_len); + continue; + } + const cie = unwind.findCie(fde_info.cie_offset) orelse return error.InvalidDebugInfo; + const fde: FrameDescriptionEntry = try .parse(section.vaddr + r.seek, try r.take(bytes_len), cie, endian); try fde_list.append(gpa, .{ .pc_begin = fde.pc_begin, .fde_offset = entry_offset, @@ -502,12 +536,30 @@ pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endia unwind.lookup = .{ .sorted_fdes = final_fdes }; } +fn findCie(unwind: *const Unwind, offset: u64) ?*const CommonInformationEntry { + const offsets = unwind.cie_list.items(.offset); + if (offsets.len == 0) return null; + var start: usize = 0; + var len: usize = offsets.len; + while (len > 1) { + const mid = len / 2; + if (offset < offsets[start + mid]) { + len = mid; + } else { + start += mid; + len -= mid; + } + } + if (offsets[start] != offset) return null; + return &unwind.cie_list.items(.cie)[start]; +} + /// Given a program counter value, returns the offset of the corresponding FDE, or `null` if no /// matching FDE was found. The returned offset can be passed to `getFde` to load the data /// associated with the FDE. /// -/// Before calling this function, `prepareLookup` must return successfully at least once, to ensure -/// that `unwind.lookup` is populated. +/// Before calling this function, `prepare` must return successfully at least once, to ensure that +/// `unwind.lookup` is populated. /// /// The return value may be a false positive. After loading the FDE with `loadFde`, the caller must /// validate that `pc` is indeed in its range -- if it is not, then no FDE matches `pc`. @@ -524,20 +576,25 @@ pub fn lookupPc(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: End }, .sorted_fdes => |sorted_fdes| sorted_fdes, }; - const first_bad_idx = std.sort.partitionPoint(SortedFdeEntry, sorted_fdes, pc, struct { - fn canIncludePc(target_pc: u64, entry: SortedFdeEntry) bool { - return target_pc >= entry.pc_begin; // i.e. does 'entry_pc..<last pc>' include 'target_pc' + if (sorted_fdes.len == 0) return null; + var start: usize = 0; + var len: usize = sorted_fdes.len; + while (len > 1) { + const half = len / 2; + if (pc < sorted_fdes[start + half].pc_begin) { + len = half; + } else { + start += half; + len -= half; } - }.canIncludePc); - // `first_bad_idx` is the index of the first FDE whose `pc_begin` is too high to include `pc`. - // So if any FDE matches, it'll be the one at `first_bad_idx - 1` (maybe false positive). - if (first_bad_idx == 0) return null; - return sorted_fdes[first_bad_idx - 1].fde_offset; + } + // If any FDE matches, it'll be the one at `start` (maybe false positive). + return sorted_fdes[start].fde_offset; } /// Get the FDE at a given offset, as well as its associated CIE. This offset typically comes from /// `lookupPc`. The CFI instructions within can be evaluated with `VirtualMachine`. -pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } { +pub fn getFde(unwind: *const Unwind, fde_offset: u64, endian: Endian) !struct { *const CommonInformationEntry, FrameDescriptionEntry } { const section = unwind.frame_section; if (fde_offset > section.bytes.len) return error.EndOfStream; @@ -547,19 +604,7 @@ pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endia .cie, .terminator => return bad(), // This is meant to be an FDE }; - const cie_offset = fde_info.cie_offset; - if (cie_offset > section.bytes.len) return error.EndOfStream; - var cie_reader: Reader = .fixed(section.bytes[@intCast(cie_offset)..]); - const cie_info = switch (try EntryHeader.read(&cie_reader, cie_offset, section.id, endian)) { - .cie => |info| info, - .fde, .terminator => return bad(), // This is meant to be a CIE - }; - - const cie: CommonInformationEntry = try .parse( - try cie_reader.take(cast(usize, cie_info.bytes_len) orelse return error.EndOfStream), - section.id, - addr_size_bytes, - ); + const cie = unwind.findCie(fde_info.cie_offset) orelse return error.InvalidDebugInfo; const fde: FrameDescriptionEntry = try .parse( section.vaddr + fde_offset + fde_reader.seek, try fde_reader.take(cast(usize, fde_info.bytes_len) orelse return error.EndOfStream), @@ -567,7 +612,7 @@ pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endia endian, ); - return .{ cie_info.format, cie, fde }; + return .{ cie, fde }; } const EhPointerContext = struct { diff --git a/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig index 997af95cbd..319841ea7f 100644 --- a/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig +++ b/lib/std/debug/Dwarf/Unwind/VirtualMachine.zig @@ -5,9 +5,9 @@ pub const RegisterRule = union(enum) { /// The spec says that the default rule for each column is the undefined rule. /// However, it also allows ABI / compiler authors to specify alternate defaults, so /// there is a distinction made here. - default: void, - undefined: void, - same_value: void, + default, + undefined, + same_value, /// offset(N) offset: i64, /// val_offset(N) @@ -18,38 +18,39 @@ pub const RegisterRule = union(enum) { expression: []const u8, /// val_expression(E) val_expression: []const u8, - /// Augmenter-defined rule - architectural: void, +}; + +pub const CfaRule = union(enum) { + none, + reg_off: struct { + register: u8, + offset: i64, + }, + expression: []const u8, }; /// Each row contains unwinding rules for a set of registers. pub const Row = struct { /// Offset from `FrameDescriptionEntry.pc_begin` offset: u64 = 0, - /// Special-case column that defines the CFA (Canonical Frame Address) rule. - /// The register field of this column defines the register that CFA is derived from. - cfa: Column = .{}, + cfa: CfaRule = .none, /// The register fields in these columns define the register the rule applies to. - columns: ColumnRange = .{}, - /// Indicates that the next write to any column in this row needs to copy - /// the backing column storage first, as it may be referenced by previous rows. - copy_on_write: bool = false, + columns: ColumnRange = .{ .start = undefined, .len = 0 }, }; pub const Column = struct { - register: ?u8 = null, - rule: RegisterRule = .{ .default = {} }, + register: u8, + rule: RegisterRule, }; const ColumnRange = struct { - /// Index into `columns` of the first column in this row. - start: usize = undefined, - len: u8 = 0, + start: usize, + len: u8, }; columns: std.ArrayList(Column) = .empty, stack: std.ArrayList(struct { - cfa: Column, + cfa: CfaRule, columns: ColumnRange, }) = .empty, current_row: Row = .{}, @@ -71,235 +72,388 @@ pub fn reset(self: *VirtualMachine) void { } /// Return a slice backed by the row's non-CFA columns -pub fn rowColumns(self: VirtualMachine, row: Row) []Column { +pub fn rowColumns(self: *const VirtualMachine, row: *const Row) []Column { if (row.columns.len == 0) return &.{}; return self.columns.items[row.columns.start..][0..row.columns.len]; } /// Either retrieves or adds a column for `register` (non-CFA) in the current row. fn getOrAddColumn(self: *VirtualMachine, gpa: Allocator, register: u8) !*Column { - for (self.rowColumns(self.current_row)) |*c| { + for (self.rowColumns(&self.current_row)) |*c| { if (c.register == register) return c; } if (self.current_row.columns.len == 0) { self.current_row.columns.start = self.columns.items.len; + } else { + assert(self.current_row.columns.start + self.current_row.columns.len == self.columns.items.len); } self.current_row.columns.len += 1; const column = try self.columns.addOne(gpa); column.* = .{ .register = register, + .rule = .default, }; return column; } +pub fn populateCieLastRow( + gpa: Allocator, + cie: *Unwind.CommonInformationEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, +) !void { + assert(cie.last_row == null); + + var vm: VirtualMachine = .{}; + defer vm.deinit(gpa); + + try vm.evalInstructions( + gpa, + cie, + std.math.maxInt(u64), + cie.initial_instructions, + addr_size_bytes, + endian, + ); + + cie.last_row = .{ + .offset = vm.current_row.offset, + .cfa = vm.current_row.cfa, + .cols = try gpa.dupe(Column, vm.rowColumns(&vm.current_row)), + }; +} + /// Runs the CIE instructions, then the FDE instructions. Execution halts /// once the row that corresponds to `pc` is known, and the row is returned. pub fn runTo( - self: *VirtualMachine, + vm: *VirtualMachine, gpa: Allocator, pc: u64, - cie: Dwarf.Unwind.CommonInformationEntry, - fde: Dwarf.Unwind.FrameDescriptionEntry, + cie: *const Unwind.CommonInformationEntry, + fde: *const Unwind.FrameDescriptionEntry, addr_size_bytes: u8, endian: std.builtin.Endian, ) !Row { - assert(self.cie_row == null); - assert(pc >= fde.pc_begin); - assert(pc < fde.pc_begin + fde.pc_range); + assert(vm.cie_row == null); - var prev_row: Row = self.current_row; + const target_offset = pc - fde.pc_begin; + assert(target_offset < fde.pc_range); - const instruction_slices: [2][]const u8 = .{ - cie.initial_instructions, - fde.instructions, - }; - for (instruction_slices, [2]bool{ true, false }) |slice, is_cie_stream| { - var stream: std.Io.Reader = .fixed(slice); - while (stream.seek < slice.len) { - const instruction: Dwarf.call_frame.Instruction = try .read(&stream, addr_size_bytes, endian); - prev_row = try self.step(gpa, cie, is_cie_stream, instruction); - if (pc < fde.pc_begin + self.current_row.offset) return prev_row; + const instruction_bytes: []const u8 = insts: { + if (target_offset < cie.last_row.?.offset) { + break :insts cie.initial_instructions; } - } + // This is the more common case: start from the CIE's last row. + assert(vm.columns.items.len == 0); + vm.current_row = .{ + .offset = cie.last_row.?.offset, + .cfa = cie.last_row.?.cfa, + .columns = .{ + .start = 0, + .len = @intCast(cie.last_row.?.cols.len), + }, + }; + try vm.columns.appendSlice(gpa, cie.last_row.?.cols); + vm.cie_row = vm.current_row; + break :insts fde.instructions; + }; - return self.current_row; + try vm.evalInstructions( + gpa, + cie, + target_offset, + instruction_bytes, + addr_size_bytes, + endian, + ); + return vm.current_row; } -fn resolveCopyOnWrite(self: *VirtualMachine, gpa: Allocator) !void { - if (!self.current_row.copy_on_write) return; +/// Evaluates instructions from `instruction_bytes` until `target_addr` is reached or all +/// instructions have been evaluated. +fn evalInstructions( + vm: *VirtualMachine, + gpa: Allocator, + cie: *const Unwind.CommonInformationEntry, + target_addr: u64, + instruction_bytes: []const u8, + addr_size_bytes: u8, + endian: std.builtin.Endian, +) !void { + var fr: std.Io.Reader = .fixed(instruction_bytes); + while (fr.seek < fr.buffer.len) { + switch (try Instruction.read(&fr, addr_size_bytes, endian)) { + .nop => { + // If there was one nop, there's a good chance we've reached the padding and so + // everything left is a nop, which is represented by a 0 byte. + if (std.mem.allEqual(u8, fr.buffered(), 0)) return; + }, + + .remember_state => { + try vm.stack.append(gpa, .{ + .cfa = vm.current_row.cfa, + .columns = vm.current_row.columns, + }); + const cols_len = vm.current_row.columns.len; + const copy_start = vm.columns.items.len; + assert(vm.current_row.columns.start == copy_start - cols_len); + try vm.columns.ensureUnusedCapacity(gpa, cols_len); // to prevent aliasing issues + vm.columns.appendSliceAssumeCapacity(vm.columns.items[copy_start - cols_len ..]); + vm.current_row.columns.start = copy_start; + }, + .restore_state => { + const restored = vm.stack.pop() orelse return error.InvalidOperation; + vm.columns.shrinkRetainingCapacity(restored.columns.start + restored.columns.len); + + vm.current_row.cfa = restored.cfa; + vm.current_row.columns = restored.columns; + }, - const new_start = self.columns.items.len; - if (self.current_row.columns.len > 0) { - try self.columns.ensureUnusedCapacity(gpa, self.current_row.columns.len); - self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); - self.current_row.columns.start = new_start; + .advance_loc => |delta| { + const new_addr = vm.current_row.offset + delta * cie.code_alignment_factor; + if (new_addr > target_addr) return; + vm.current_row.offset = new_addr; + }, + .set_loc => |new_addr| { + if (new_addr <= vm.current_row.offset) return error.InvalidOperation; + if (cie.segment_selector_size != 0) return error.InvalidOperation; // unsupported + // TODO: Check cie.segment_selector_size != 0 for DWARFV4 + + if (new_addr > target_addr) return; + vm.current_row.offset = new_addr; + }, + + .register => |reg| { + const column = try vm.getOrAddColumn(gpa, reg.index); + column.rule = switch (reg.rule) { + .restore => rule: { + const cie_row = &(vm.cie_row orelse return error.InvalidOperation); + for (vm.rowColumns(cie_row)) |cie_col| { + if (cie_col.register == reg.index) break :rule cie_col.rule; + } + break :rule .default; + }, + .undefined => .undefined, + .same_value => .same_value, + .offset_uf => |off| .{ .offset = @as(i64, @intCast(off)) * cie.data_alignment_factor }, + .offset_sf => |off| .{ .offset = off * cie.data_alignment_factor }, + .val_offset_uf => |off| .{ .val_offset = @as(i64, @intCast(off)) * cie.data_alignment_factor }, + .val_offset_sf => |off| .{ .val_offset = off * cie.data_alignment_factor }, + .register => |callee_reg| .{ .register = callee_reg }, + .expr => |len| .{ .expression = try takeExprBlock(&fr, len) }, + .val_expr => |len| .{ .val_expression = try takeExprBlock(&fr, len) }, + }; + }, + .def_cfa => |cfa| vm.current_row.cfa = .{ .reg_off = .{ + .register = cfa.register, + .offset = @intCast(cfa.offset), + } }, + .def_cfa_sf => |cfa| vm.current_row.cfa = .{ .reg_off = .{ + .register = cfa.register, + .offset = cfa.offset_sf * cie.data_alignment_factor, + } }, + .def_cfa_reg => |register| switch (vm.current_row.cfa) { + .none, .expression => return error.InvalidOperation, + .reg_off => |*ro| ro.register = register, + }, + .def_cfa_offset => |offset| switch (vm.current_row.cfa) { + .none, .expression => return error.InvalidOperation, + .reg_off => |*ro| ro.offset = @intCast(offset), + }, + .def_cfa_offset_sf => |offset_sf| switch (vm.current_row.cfa) { + .none, .expression => return error.InvalidOperation, + .reg_off => |*ro| ro.offset = offset_sf * cie.data_alignment_factor, + }, + .def_cfa_expr => |len| { + vm.current_row.cfa = .{ .expression = try takeExprBlock(&fr, len) }; + }, + } } } -/// Executes a single instruction. -/// If this instruction is from the CIE, `is_initial` should be set. -/// Returns the value of `current_row` before executing this instruction. -pub fn step( - self: *VirtualMachine, - gpa: Allocator, - cie: Dwarf.Unwind.CommonInformationEntry, - is_initial: bool, - instruction: Dwarf.call_frame.Instruction, -) !Row { - // CIE instructions must be run before FDE instructions - assert(!is_initial or self.cie_row == null); - if (!is_initial and self.cie_row == null) { - self.cie_row = self.current_row; - self.current_row.copy_on_write = true; - } +fn takeExprBlock(r: *std.Io.Reader, len: usize) error{ ReadFailed, InvalidOperand }![]const u8 { + return r.take(len) catch |err| switch (err) { + error.ReadFailed => |e| return e, + error.EndOfStream => return error.InvalidOperand, + }; +} - const prev_row = self.current_row; - switch (instruction) { - .set_loc => |i| { - if (i.address <= self.current_row.offset) return error.InvalidOperation; - if (cie.segment_selector_size != 0) return error.InvalidOperation; // unsupported - // TODO: Check cie.segment_selector_size != 0 for DWARFV4 - self.current_row.offset = i.address; - }, - inline .advance_loc, - .advance_loc1, - .advance_loc2, - .advance_loc4, - => |i| { - self.current_row.offset += i.delta * cie.code_alignment_factor; - self.current_row.copy_on_write = true; - }, - inline .offset, - .offset_extended, - .offset_extended_sf, - => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; - }, - inline .restore, - .restore_extended, - => |i| { - try self.resolveCopyOnWrite(gpa); - if (self.cie_row) |cie_row| { - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = for (self.rowColumns(cie_row)) |cie_column| { - if (cie_column.register == i.register) break cie_column.rule; - } else .{ .default = {} }; - } else return error.InvalidOperation; - }, - .nop => {}, - .undefined => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ .undefined = {} }; - }, - .same_value => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ .same_value = {} }; - }, - .register => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ .register = i.target_register }; - }, - .remember_state => { - try self.stack.append(gpa, .{ - .cfa = self.current_row.cfa, - .columns = self.current_row.columns, - }); - self.current_row.copy_on_write = true; - }, - .restore_state => { - const restored = self.stack.pop() orelse return error.InvalidOperation; - self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); - try self.columns.ensureUnusedCapacity(gpa, restored.columns.len); - - self.current_row.cfa = restored.cfa; - self.current_row.columns.start = self.columns.items.len; - self.current_row.columns.len = restored.columns.len; - self.columns.appendSliceAssumeCapacity(self.columns.items[restored.columns.start..][0..restored.columns.len]); - }, - .def_cfa => |i| { - try self.resolveCopyOnWrite(gpa); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = @intCast(i.offset) }, - }; - }, - .def_cfa_sf => |i| { - try self.resolveCopyOnWrite(gpa); - self.current_row.cfa = .{ - .register = i.register, - .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, - }; - }, - .def_cfa_register => |i| { - try self.resolveCopyOnWrite(gpa); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.register = i.register; - }, - .def_cfa_offset => |i| { - try self.resolveCopyOnWrite(gpa); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = @intCast(i.offset), - }; - }, - .def_cfa_offset_sf => |i| { - try self.resolveCopyOnWrite(gpa); - if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; - self.current_row.cfa.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .def_cfa_expression => |i| { - try self.resolveCopyOnWrite(gpa); - self.current_row.cfa.register = undefined; - self.current_row.cfa.rule = .{ - .expression = i.block, - }; +const OpcodeByte = packed struct(u8) { + low: packed union { + operand: u6, + extended: enum(u6) { + nop = 0, + set_loc = 1, + advance_loc1 = 2, + advance_loc2 = 3, + advance_loc4 = 4, + offset_extended = 5, + restore_extended = 6, + undefined = 7, + same_value = 8, + register = 9, + remember_state = 10, + restore_state = 11, + def_cfa = 12, + def_cfa_register = 13, + def_cfa_offset = 14, + def_cfa_expression = 15, + expression = 16, + offset_extended_sf = 17, + def_cfa_sf = 18, + def_cfa_offset_sf = 19, + val_offset = 20, + val_offset_sf = 21, + val_expression = 22, + _, }, - .expression => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ - .expression = i.block, - }; - }, - .val_offset => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ - .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, - }; - }, - .val_offset_sf => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ - .val_offset = i.offset * cie.data_alignment_factor, - }; - }, - .val_expression => |i| { - try self.resolveCopyOnWrite(gpa); - const column = try self.getOrAddColumn(gpa, i.register); - column.rule = .{ - .val_expression = i.block, - }; + }, + opcode: enum(u2) { + extended = 0, + advance_loc = 1, + offset = 2, + restore = 3, + }, +}; + +pub const Instruction = union(enum) { + nop, + remember_state, + restore_state, + advance_loc: u32, + set_loc: u64, + + register: struct { + index: u8, + rule: union(enum) { + restore, // restore from cie + undefined, + same_value, + offset_uf: u64, + offset_sf: i64, + val_offset_uf: u64, + val_offset_sf: i64, + register: u8, + /// Value is the number of bytes in the DWARF expression, which the caller must read. + expr: usize, + /// Value is the number of bytes in the DWARF expression, which the caller must read. + val_expr: usize, }, - } + }, - return prev_row; -} + def_cfa: struct { + register: u8, + offset: u64, + }, + def_cfa_sf: struct { + register: u8, + offset_sf: i64, + }, + def_cfa_reg: u8, + def_cfa_offset: u64, + def_cfa_offset_sf: i64, + /// Value is the number of bytes in the DWARF expression, which the caller must read. + def_cfa_expr: usize, + + pub fn read( + reader: *std.Io.Reader, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Instruction { + const inst: OpcodeByte = @bitCast(try reader.takeByte()); + return switch (inst.opcode) { + .advance_loc => .{ .advance_loc = inst.low.operand }, + .offset => .{ .register = .{ + .index = inst.low.operand, + .rule = .{ .offset_uf = try reader.takeLeb128(u64) }, + } }, + .restore => .{ .register = .{ + .index = inst.low.operand, + .rule = .restore, + } }, + .extended => switch (inst.low.extended) { + .nop => .nop, + .remember_state => .remember_state, + .restore_state => .restore_state, + .advance_loc1 => .{ .advance_loc = try reader.takeByte() }, + .advance_loc2 => .{ .advance_loc = try reader.takeInt(u16, endian) }, + .advance_loc4 => .{ .advance_loc = try reader.takeInt(u32, endian) }, + .set_loc => .{ .set_loc = switch (addr_size_bytes) { + 2 => try reader.takeInt(u16, endian), + 4 => try reader.takeInt(u32, endian), + 8 => try reader.takeInt(u64, endian), + else => return error.UnsupportedAddrSize, + } }, + + .offset_extended => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .offset_uf = try reader.takeLeb128(u64) }, + } }, + .offset_extended_sf => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .offset_sf = try reader.takeLeb128(i64) }, + } }, + .restore_extended => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .restore, + } }, + .undefined => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .undefined, + } }, + .same_value => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .same_value, + } }, + .register => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .register = try reader.takeLeb128(u8) }, + } }, + .val_offset => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .val_offset_uf = try reader.takeLeb128(u64) }, + } }, + .val_offset_sf => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .val_offset_sf = try reader.takeLeb128(i64) }, + } }, + .expression => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .expr = try reader.takeLeb128(usize) }, + } }, + .val_expression => .{ .register = .{ + .index = try reader.takeLeb128(u8), + .rule = .{ .val_expr = try reader.takeLeb128(usize) }, + } }, + + .def_cfa => .{ .def_cfa = .{ + .register = try reader.takeLeb128(u8), + .offset = try reader.takeLeb128(u64), + } }, + .def_cfa_sf => .{ .def_cfa_sf = .{ + .register = try reader.takeLeb128(u8), + .offset_sf = try reader.takeLeb128(i64), + } }, + .def_cfa_register => .{ .def_cfa_reg = try reader.takeLeb128(u8) }, + .def_cfa_offset => .{ .def_cfa_offset = try reader.takeLeb128(u64) }, + .def_cfa_offset_sf => .{ .def_cfa_offset_sf = try reader.takeLeb128(i64) }, + .def_cfa_expression => .{ .def_cfa_expr = try reader.takeLeb128(usize) }, + + _ => switch (@intFromEnum(inst.low.extended)) { + 0x1C...0x3F => return error.UnimplementedUserOpcode, + else => return error.InvalidOpcode, + }, + }, + }; + } +}; const std = @import("../../../std.zig"); const assert = std.debug.assert; const Allocator = std.mem.Allocator; -const Dwarf = std.debug.Dwarf; +const Unwind = std.debug.Dwarf.Unwind; const VirtualMachine = @This(); diff --git a/lib/std/debug/Dwarf/call_frame.zig b/lib/std/debug/Dwarf/call_frame.zig deleted file mode 100644 index 8f1758f4eb..0000000000 --- a/lib/std/debug/Dwarf/call_frame.zig +++ /dev/null @@ -1,288 +0,0 @@ -const std = @import("../../std.zig"); -const Reader = std.Io.Reader; - -/// TODO merge with std.dwarf.CFA -const Opcode = enum(u8) { - advance_loc = 0x1 << 6, - offset = 0x2 << 6, - restore = 0x3 << 6, - - nop = 0x00, - set_loc = 0x01, - advance_loc1 = 0x02, - advance_loc2 = 0x03, - advance_loc4 = 0x04, - offset_extended = 0x05, - restore_extended = 0x06, - undefined = 0x07, - same_value = 0x08, - register = 0x09, - remember_state = 0x0a, - restore_state = 0x0b, - def_cfa = 0x0c, - def_cfa_register = 0x0d, - def_cfa_offset = 0x0e, - def_cfa_expression = 0x0f, - expression = 0x10, - offset_extended_sf = 0x11, - def_cfa_sf = 0x12, - def_cfa_offset_sf = 0x13, - val_offset = 0x14, - val_offset_sf = 0x15, - val_expression = 0x16, - - // These opcodes encode an operand in the lower 6 bits of the opcode itself - pub const lo_inline = @intFromEnum(Opcode.advance_loc); - pub const hi_inline = @intFromEnum(Opcode.restore) | 0b111111; - - // These opcodes are trailed by zero or more operands - pub const lo_reserved = @intFromEnum(Opcode.nop); - pub const hi_reserved = @intFromEnum(Opcode.val_expression); - - // Vendor-specific opcodes - pub const lo_user = 0x1c; - pub const hi_user = 0x3f; -}; - -/// The returned slice points into `reader.buffer`. -fn readBlock(reader: *Reader) ![]const u8 { - const block_len = try reader.takeLeb128(usize); - return reader.take(block_len) catch |err| switch (err) { - error.EndOfStream => return error.InvalidOperand, - error.ReadFailed => |e| return e, - }; -} - -pub const Instruction = union(Opcode) { - advance_loc: struct { - delta: u8, - }, - offset: struct { - register: u8, - offset: u64, - }, - restore: struct { - register: u8, - }, - nop: void, - set_loc: struct { - address: u64, - }, - advance_loc1: struct { - delta: u8, - }, - advance_loc2: struct { - delta: u16, - }, - advance_loc4: struct { - delta: u32, - }, - offset_extended: struct { - register: u8, - offset: u64, - }, - restore_extended: struct { - register: u8, - }, - undefined: struct { - register: u8, - }, - same_value: struct { - register: u8, - }, - register: struct { - register: u8, - target_register: u8, - }, - remember_state: void, - restore_state: void, - def_cfa: struct { - register: u8, - offset: u64, - }, - def_cfa_register: struct { - register: u8, - }, - def_cfa_offset: struct { - offset: u64, - }, - def_cfa_expression: struct { - block: []const u8, - }, - expression: struct { - register: u8, - block: []const u8, - }, - offset_extended_sf: struct { - register: u8, - offset: i64, - }, - def_cfa_sf: struct { - register: u8, - offset: i64, - }, - def_cfa_offset_sf: struct { - offset: i64, - }, - val_offset: struct { - register: u8, - offset: u64, - }, - val_offset_sf: struct { - register: u8, - offset: i64, - }, - val_expression: struct { - register: u8, - block: []const u8, - }, - - /// `reader` must be a `Reader.fixed` so that regions of its buffer are never invalidated. - pub fn read( - reader: *Reader, - addr_size_bytes: u8, - endian: std.builtin.Endian, - ) !Instruction { - switch (try reader.takeByte()) { - Opcode.lo_inline...Opcode.hi_inline => |opcode| { - const e: Opcode = @enumFromInt(opcode & 0b11000000); - const value: u6 = @intCast(opcode & 0b111111); - return switch (e) { - .advance_loc => .{ - .advance_loc = .{ .delta = value }, - }, - .offset => .{ - .offset = .{ - .register = value, - .offset = try reader.takeLeb128(u64), - }, - }, - .restore => .{ - .restore = .{ .register = value }, - }, - else => unreachable, - }; - }, - Opcode.lo_reserved...Opcode.hi_reserved => |opcode| { - const e: Opcode = @enumFromInt(opcode); - return switch (e) { - .advance_loc, - .offset, - .restore, - => unreachable, - .nop => .{ .nop = {} }, - .set_loc => .{ .set_loc = .{ - .address = switch (addr_size_bytes) { - 2 => try reader.takeInt(u16, endian), - 4 => try reader.takeInt(u32, endian), - 8 => try reader.takeInt(u64, endian), - else => return error.UnsupportedAddrSize, - }, - } }, - .advance_loc1 => .{ - .advance_loc1 = .{ .delta = try reader.takeByte() }, - }, - .advance_loc2 => .{ - .advance_loc2 = .{ .delta = try reader.takeInt(u16, endian) }, - }, - .advance_loc4 => .{ - .advance_loc4 = .{ .delta = try reader.takeInt(u32, endian) }, - }, - .offset_extended => .{ - .offset_extended = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(u64), - }, - }, - .restore_extended => .{ - .restore_extended = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .undefined => .{ - .undefined = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .same_value => .{ - .same_value = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .register => .{ - .register = .{ - .register = try reader.takeLeb128(u8), - .target_register = try reader.takeLeb128(u8), - }, - }, - .remember_state => .{ .remember_state = {} }, - .restore_state => .{ .restore_state = {} }, - .def_cfa => .{ - .def_cfa = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(u64), - }, - }, - .def_cfa_register => .{ - .def_cfa_register = .{ - .register = try reader.takeLeb128(u8), - }, - }, - .def_cfa_offset => .{ - .def_cfa_offset = .{ - .offset = try reader.takeLeb128(u64), - }, - }, - .def_cfa_expression => .{ - .def_cfa_expression = .{ - .block = try readBlock(reader), - }, - }, - .expression => .{ - .expression = .{ - .register = try reader.takeLeb128(u8), - .block = try readBlock(reader), - }, - }, - .offset_extended_sf => .{ - .offset_extended_sf = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(i64), - }, - }, - .def_cfa_sf => .{ - .def_cfa_sf = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(i64), - }, - }, - .def_cfa_offset_sf => .{ - .def_cfa_offset_sf = .{ - .offset = try reader.takeLeb128(i64), - }, - }, - .val_offset => .{ - .val_offset = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(u64), - }, - }, - .val_offset_sf => .{ - .val_offset_sf = .{ - .register = try reader.takeLeb128(u8), - .offset = try reader.takeLeb128(i64), - }, - }, - .val_expression => .{ - .val_expression = .{ - .register = try reader.takeLeb128(u8), - .block = try readBlock(reader), - }, - }, - }; - }, - Opcode.lo_user...Opcode.hi_user => return error.UnimplementedUserOpcode, - else => return error.InvalidOpcode, - } - } -}; diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 2da5834ba6..bb05ce5216 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -207,6 +207,36 @@ pub const DwarfUnwindContext = struct { vm: Dwarf.Unwind.VirtualMachine, stack_machine: Dwarf.expression.StackMachine(.{ .call_frame_context = true }), + pub const Cache = struct { + /// TODO: to allow `DwarfUnwindContext` to work on freestanding, we currently just don't use + /// this mutex there. That's a bad solution, but a better one depends on the standard + /// library's general support for "bring your own OS" being improved. + mutex: switch (builtin.os.tag) { + else => std.Thread.Mutex, + .freestanding, .other => struct { + fn lock(_: @This()) void {} + fn unlock(_: @This()) void {} + }, + }, + buf: [num_slots]Slot, + const num_slots = 2048; + const Slot = struct { + const max_regs = 32; + pc: usize, + cie: *const Dwarf.Unwind.CommonInformationEntry, + cfa_rule: Dwarf.Unwind.VirtualMachine.CfaRule, + rules_regs: [max_regs]u16, + rules: [max_regs]Dwarf.Unwind.VirtualMachine.RegisterRule, + num_rules: u8, + }; + /// This is a function rather than a declaration to avoid lowering a very large struct value + /// into the binary when most of it is `undefined`. + pub fn init(c: *Cache) void { + c.mutex = .{}; + for (&c.buf) |*slot| slot.pc = 0; + } + }; + pub fn init(cpu_context: *const CpuContext) DwarfUnwindContext { comptime assert(supports_unwinding); @@ -243,126 +273,30 @@ pub const DwarfUnwindContext = struct { return ptr.*; } - /// The default rule is typically equivalent to `.undefined`, but ABIs may define it differently. - fn defaultRuleBehavior(register: u8) enum { undefined, same_value } { - if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) { - // The default rule for callee-saved registers on AArch64 acts like the `.same_value` rule - return .same_value; - } - return .undefined; - } - - /// Resolves the register rule and places the result into `out` (see regBytes). Returns `true` - /// iff the rule was undefined. This is *not* the same as `col.rule == .undefined`, because the - /// default rule may be undefined. - pub fn resolveRegisterRule( - context: *DwarfUnwindContext, - gpa: Allocator, - col: Dwarf.Unwind.VirtualMachine.Column, - expression_context: std.debug.Dwarf.expression.Context, - out: []u8, - ) !bool { - switch (col.rule) { - .default => { - const register = col.register orelse return error.InvalidRegister; - switch (defaultRuleBehavior(register)) { - .undefined => { - @memset(out, undefined); - return true; - }, - .same_value => { - const src = try context.cpu_context.dwarfRegisterBytes(register); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return false; - }, - } - }, - .undefined => { - @memset(out, undefined); - return true; - }, - .same_value => { - // TODO: This copy could be eliminated if callers always copy the state then call this function to update it - const register = col.register orelse return error.InvalidRegister; - const src = try context.cpu_context.dwarfRegisterBytes(register); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return false; - }, - .offset => |offset| { - const cfa = context.cfa orelse return error.InvalidCFA; - const addr = try applyOffset(cfa, offset); - const ptr: *const usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - return false; - }, - .val_offset => |offset| { - const cfa = context.cfa orelse return error.InvalidCFA; - mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); - return false; - }, - .register => |register| { - const src = try context.cpu_context.dwarfRegisterBytes(register); - if (src.len != out.len) return error.RegisterSizeMismatch; - @memcpy(out, src); - return false; - }, - .expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run( - expression, - gpa, - expression_context, - context.cfa.?, - ) orelse return error.NoExpressionValue; - const addr = switch (value) { - .generic => |addr| addr, - else => return error.InvalidExpressionValue, - }; - const ptr: *usize = @ptrFromInt(addr); - mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); - return false; - }, - .val_expression => |expression| { - context.stack_machine.reset(); - const value = try context.stack_machine.run( - expression, - gpa, - expression_context, - context.cfa.?, - ) orelse return error.NoExpressionValue; - const val_raw = switch (value) { - .generic => |raw| raw, - else => return error.InvalidExpressionValue, - }; - mem.writeInt(usize, out[0..@sizeOf(usize)], val_raw, native_endian); - return false; - }, - .architectural => return error.UnimplementedRegisterRule, - } - } - /// Unwind a stack frame using DWARF unwinding info, updating the register context. /// /// If `.eh_frame_hdr` is available and complete, it will be used to binary search for the FDE. /// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. The latter /// may require lazily loading the data in those sections. /// - /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info + /// `explicit_fde_offset` is for cases where the FDE offset is known, such as when using macOS' + /// `__unwind_info` section. pub fn unwindFrame( context: *DwarfUnwindContext, + cache: *Cache, gpa: Allocator, unwind: *const Dwarf.Unwind, load_offset: usize, explicit_fde_offset: ?usize, ) Error!usize { - return unwindFrameInner(context, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, + return unwindFrameInner(context, cache, gpa, unwind, load_offset, explicit_fde_offset) catch |err| switch (err) { + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.UnsupportedDebugInfo, + error.OutOfMemory, + => |e| return e, - error.UnimplementedRegisterRule, error.UnsupportedAddrSize, - error.UnsupportedDwarfVersion, error.UnimplementedUserOpcode, error.UnimplementedExpressionCall, error.UnimplementedOpcode, @@ -394,12 +328,12 @@ pub const DwarfUnwindContext = struct { error.InvalidExpressionValue, error.NoExpressionValue, error.RegisterSizeMismatch, - error.InvalidCFA, => return error.InvalidDebugInfo, }; } fn unwindFrameInner( context: *DwarfUnwindContext, + cache: *Cache, gpa: Allocator, unwind: *const Dwarf.Unwind, load_offset: usize, @@ -411,57 +345,85 @@ pub const DwarfUnwindContext = struct { const pc_vaddr = context.pc - load_offset; - const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( - pc_vaddr, - @sizeOf(usize), - native_endian, - ) orelse return error.MissingDebugInfo; - const format, const cie, const fde = try unwind.getFde(fde_offset, @sizeOf(usize), native_endian); + const cache_slot: Cache.Slot = slot: { + const slot_idx = std.hash.int(pc_vaddr) % Cache.num_slots; - // Check if the FDE *actually* includes the pc (`lookupPc` can return false positives). - if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) { - return error.MissingDebugInfo; - } + { + cache.mutex.lock(); + defer cache.mutex.unlock(); + if (cache.buf[slot_idx].pc == pc_vaddr) break :slot cache.buf[slot_idx]; + } + + const fde_offset = explicit_fde_offset orelse try unwind.lookupPc( + pc_vaddr, + @sizeOf(usize), + native_endian, + ) orelse return error.MissingDebugInfo; + const cie, const fde = try unwind.getFde(fde_offset, native_endian); - // Do not set `compile_unit` because the spec states that CFIs - // may not reference other debug sections anyway. - var expression_context: Dwarf.expression.Context = .{ - .format = format, - .cpu_context = &context.cpu_context, - .cfa = context.cfa, + // Check if the FDE *actually* includes the pc (`lookupPc` can return false positives). + if (pc_vaddr < fde.pc_begin or pc_vaddr >= fde.pc_begin + fde.pc_range) { + return error.MissingDebugInfo; + } + + context.vm.reset(); + + const row = try context.vm.runTo(gpa, pc_vaddr, cie, &fde, @sizeOf(usize), native_endian); + + if (row.columns.len > Cache.Slot.max_regs) return error.UnsupportedDebugInfo; + + var slot: Cache.Slot = .{ + .pc = pc_vaddr, + .cie = cie, + .cfa_rule = row.cfa, + .rules_regs = undefined, + .rules = undefined, + .num_rules = 0, + }; + for (context.vm.rowColumns(&row)) |col| { + const i = slot.num_rules; + slot.rules_regs[i] = col.register; + slot.rules[i] = col.rule; + slot.num_rules += 1; + } + + { + cache.mutex.lock(); + defer cache.mutex.unlock(); + cache.buf[slot_idx] = slot; + } + + break :slot slot; }; - context.vm.reset(); + const format = cache_slot.cie.format; + const return_address_register = cache_slot.cie.return_address_register; - const row = try context.vm.runTo(gpa, pc_vaddr, cie, fde, @sizeOf(usize), native_endian); - context.cfa = switch (row.cfa.rule) { - .val_offset => |offset| blk: { - const register = row.cfa.register orelse return error.InvalidCFARule; - const value = (try regNative(&context.cpu_context, register)).*; - break :blk try applyOffset(value, offset); + context.cfa = switch (cache_slot.cfa_rule) { + .none => return error.InvalidCFARule, + .reg_off => |ro| cfa: { + const ptr = try regNative(&context.cpu_context, ro.register); + break :cfa try applyOffset(ptr.*, ro.offset); }, - .expression => |expr| blk: { + .expression => |expr| cfa: { context.stack_machine.reset(); - const value = try context.stack_machine.run( - expr, - gpa, - expression_context, - context.cfa, - ); - - if (value) |v| { - if (v != .generic) return error.InvalidExpressionValue; - break :blk v.generic; - } else return error.NoExpressionValue; + const value = try context.stack_machine.run(expr, gpa, .{ + .format = format, + .cpu_context = &context.cpu_context, + }, context.cfa) orelse return error.NoExpressionValue; + switch (value) { + .generic => |g| break :cfa g, + else => return error.InvalidExpressionValue, + } }, - else => return error.InvalidCFARule, }; - expression_context.cfa = context.cfa; - - // If the rule for the return address register is 'undefined', that indicates there is no - // return address, i.e. this is the end of the stack. - var explicit_has_return_address: ?bool = null; + // If unspecified, we'll use the default rule for the return address register, which is + // typically equivalent to `.undefined` (meaning there is no return address), but may be + // overriden by ABIs. + var has_return_address: bool = builtin.cpu.arch.isAARCH64() and + return_address_register >= 19 and + return_address_register <= 28; // Create a copy of the CPU context, to which we will apply the new rules. var new_cpu_context = context.cpu_context; @@ -469,25 +431,78 @@ pub const DwarfUnwindContext = struct { // On all implemented architectures, the CFA is defined as being the previous frame's SP (try regNative(&new_cpu_context, sp_reg_num)).* = context.cfa.?; - for (context.vm.rowColumns(row)) |column| { - if (column.register) |register| { - const dest = try new_cpu_context.dwarfRegisterBytes(register); - const rule_undef = try context.resolveRegisterRule(gpa, column, expression_context, dest); - if (register == cie.return_address_register) { - explicit_has_return_address = !rule_undef; - } + const rules_len = cache_slot.num_rules; + for (cache_slot.rules_regs[0..rules_len], cache_slot.rules[0..rules_len]) |register, rule| { + const new_val: union(enum) { + same, + undefined, + val: usize, + bytes: []const u8, + } = switch (rule) { + .default => val: { + // The default rule is typically equivalent to `.undefined`, but ABIs may override it. + if (builtin.cpu.arch.isAARCH64() and register >= 19 and register <= 28) { + break :val .same; + } + break :val .undefined; + }, + .undefined => .undefined, + .same_value => .same, + .offset => |offset| val: { + const ptr: *const usize = @ptrFromInt(try applyOffset(context.cfa.?, offset)); + break :val .{ .val = ptr.* }; + }, + .val_offset => |offset| .{ .val = try applyOffset(context.cfa.?, offset) }, + .register => |r| .{ .bytes = try context.cpu_context.dwarfRegisterBytes(r) }, + .expression => |expr| val: { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expr, gpa, .{ + .format = format, + .cpu_context = &context.cpu_context, + }, context.cfa.?) orelse return error.NoExpressionValue; + const ptr: *const usize = switch (value) { + .generic => |addr| @ptrFromInt(addr), + else => return error.InvalidExpressionValue, + }; + break :val .{ .val = ptr.* }; + }, + .val_expression => |expr| val: { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expr, gpa, .{ + .format = format, + .cpu_context = &context.cpu_context, + }, context.cfa.?) orelse return error.NoExpressionValue; + switch (value) { + .generic => |val| break :val .{ .val = val }, + else => return error.InvalidExpressionValue, + } + }, + }; + switch (new_val) { + .same => {}, + .undefined => { + const dest = try new_cpu_context.dwarfRegisterBytes(@intCast(register)); + @memset(dest, undefined); + }, + .val => |val| { + const dest = try new_cpu_context.dwarfRegisterBytes(@intCast(register)); + if (dest.len != @sizeOf(usize)) return error.RegisterSizeMismatch; + const dest_ptr: *align(1) usize = @ptrCast(dest); + dest_ptr.* = val; + }, + .bytes => |src| { + const dest = try new_cpu_context.dwarfRegisterBytes(@intCast(register)); + if (dest.len != src.len) return error.RegisterSizeMismatch; + @memcpy(dest, src); + }, + } + if (register == return_address_register) { + has_return_address = new_val != .undefined; } } - // If the return address register did not have an explicitly specified rules then it uses - // the default rule, which is usually equivalent to '.undefined', i.e. end-of-stack. - const has_return_address = explicit_has_return_address orelse switch (defaultRuleBehavior(cie.return_address_register)) { - .undefined => false, - .same_value => return error.InvalidDebugInfo, // this doesn't make sense, we would get stuck in an infinite loop - }; - const return_address: usize = if (has_return_address) pc: { - const raw_ptr = try regNative(&new_cpu_context, cie.return_address_register); + const raw_ptr = try regNative(&new_cpu_context, return_address_register); break :pc stripInstructionPtrAuthCode(raw_ptr.*); } else 0; @@ -501,7 +516,7 @@ pub const DwarfUnwindContext = struct { // "return address" we have is the instruction which triggered the signal (if the signal // handler returned, the instruction would be re-run). Compensate for this by incrementing // the address in that case. - const adjusted_ret_addr = if (cie.is_signal_frame) return_address +| 1 else return_address; + const adjusted_ret_addr = if (cache_slot.cie.is_signal_frame) return_address +| 1 else return_address; // We also want to do that same subtraction here to get the PC for the next frame's FDE. // This is because if the callee was noreturn, then the function call might be the caller's diff --git a/lib/std/debug/SelfInfo/DarwinModule.zig b/lib/std/debug/SelfInfo/DarwinModule.zig index 29178b5068..caf2176f75 100644 --- a/lib/std/debug/SelfInfo/DarwinModule.zig +++ b/lib/std/debug/SelfInfo/DarwinModule.zig @@ -20,7 +20,7 @@ pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!DarwinM }, } } -fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind { +fn loadUnwindInfo(module: *const DarwinModule, gpa: Allocator, out: *DebugInfo) !void { const header: *std.macho.mach_header = @ptrFromInt(module.text_base); var it: macho.LoadCommandIterator = .{ @@ -36,21 +36,57 @@ fn loadUnwindInfo(module: *const DarwinModule) DebugInfo.Unwind { const vmaddr_slide = module.text_base - text_vmaddr; - var unwind_info: ?[]const u8 = null; - var eh_frame: ?[]const u8 = null; + var opt_unwind_info: ?[]const u8 = null; + var opt_eh_frame: ?[]const u8 = null; for (sections) |sect| { if (mem.eql(u8, sect.sectName(), "__unwind_info")) { const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); - unwind_info = sect_ptr[0..@intCast(sect.size)]; + opt_unwind_info = sect_ptr[0..@intCast(sect.size)]; } else if (mem.eql(u8, sect.sectName(), "__eh_frame")) { const sect_ptr: [*]u8 = @ptrFromInt(@as(usize, @intCast(vmaddr_slide + sect.addr))); - eh_frame = sect_ptr[0..@intCast(sect.size)]; + opt_eh_frame = sect_ptr[0..@intCast(sect.size)]; } } - return .{ + const eh_frame = opt_eh_frame orelse { + out.unwind = .{ + .vmaddr_slide = vmaddr_slide, + .unwind_info = opt_unwind_info, + .dwarf = null, + .dwarf_cache = undefined, + }; + return; + }; + var dwarf: Dwarf.Unwind = .initSection(.eh_frame, @intFromPtr(eh_frame.ptr) - vmaddr_slide, eh_frame); + errdefer dwarf.deinit(gpa); + // We don't need lookups, so this call is just for scanning CIEs. + dwarf.prepare(gpa, @sizeOf(usize), native_endian, false) catch |err| switch (err) { + error.ReadFailed => unreachable, // it's all fixed buffers + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + error.InvalidOperand, + error.InvalidOpcode, + error.InvalidOperation, + => return error.InvalidDebugInfo, + error.UnsupportedAddrSize, + error.UnsupportedDwarfVersion, + error.UnimplementedUserOpcode, + => return error.UnsupportedDebugInfo, + }; + + const dwarf_cache = try gpa.create(UnwindContext.Cache); + errdefer gpa.destroy(dwarf_cache); + dwarf_cache.init(); + + out.unwind = .{ .vmaddr_slide = vmaddr_slide, - .unwind_info = unwind_info, - .eh_frame = eh_frame, + .unwind_info = opt_unwind_info, + .dwarf = dwarf, + .dwarf_cache = dwarf_cache, }; } fn loadMachO(module: *const DarwinModule, gpa: Allocator) !DebugInfo.LoadedMachO { @@ -350,10 +386,10 @@ pub fn unwindFrame(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, }; } fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) !usize { - const unwind: *const DebugInfo.Unwind = u: { + const unwind: *DebugInfo.Unwind = u: { di.mutex.lock(); defer di.mutex.unlock(); - if (di.unwind == null) di.unwind = module.loadUnwindInfo(); + if (di.unwind == null) try module.loadUnwindInfo(gpa, di); break :u &di.unwind.?; }; @@ -580,14 +616,8 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, break :ip new_ip; }, .DWARF => { - const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - unwind.vmaddr_slide; - return context.unwindFrame( - gpa, - &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - unwind.vmaddr_slide, - @intCast(encoding.value.x86_64.dwarf), - ); + const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); + return context.unwindFrame(unwind.dwarf_cache, gpa, dwarf, unwind.vmaddr_slide, encoding.value.x86_64.dwarf); }, }, .aarch64, .aarch64_be => switch (encoding.mode.arm64) { @@ -600,14 +630,8 @@ fn unwindFrameInner(module: *const DarwinModule, gpa: Allocator, di: *DebugInfo, break :ip new_ip; }, .DWARF => { - const eh_frame = unwind.eh_frame orelse return error.MissingDebugInfo; - const eh_frame_vaddr = @intFromPtr(eh_frame.ptr) - unwind.vmaddr_slide; - return context.unwindFrame( - gpa, - &.initSection(.eh_frame, eh_frame_vaddr, eh_frame), - unwind.vmaddr_slide, - @intCast(encoding.value.x86_64.dwarf), - ); + const dwarf = &(unwind.dwarf orelse return error.MissingDebugInfo); + return context.unwindFrame(unwind.dwarf_cache, gpa, dwarf, unwind.vmaddr_slide, encoding.value.arm64.dwarf); }, .FRAME => ip: { const frame = encoding.value.arm64.frame; @@ -691,12 +715,15 @@ pub const DebugInfo = struct { } const Unwind = struct { - /// The slide applied to the following sections. So, `unwind_info.ptr` is this many bytes - /// higher than the vmaddr of `__unwind_info`, and likewise for `__eh_frame`. + /// The slide applied to the `__unwind_info` and `__eh_frame` sections. + /// So, `unwind_info.ptr` is this many bytes higher than the section's vmaddr. vmaddr_slide: u64, - // Backed by the in-memory sections mapped by the loader + /// Backed by the in-memory section mapped by the loader. unwind_info: ?[]const u8, - eh_frame: ?[]const u8, + /// Backed by the in-memory `__eh_frame` section mapped by the loader. + dwarf: ?Dwarf.Unwind, + /// This is `undefined` if `dwarf == null`. + dwarf_cache: *UnwindContext.Cache, }; const LoadedMachO = struct { diff --git a/lib/std/debug/SelfInfo/ElfModule.zig b/lib/std/debug/SelfInfo/ElfModule.zig index 32d767a44f..eead810a86 100644 --- a/lib/std/debug/SelfInfo/ElfModule.zig +++ b/lib/std/debug/SelfInfo/ElfModule.zig @@ -3,8 +3,22 @@ name: []const u8, build_id: ?[]const u8, gnu_eh_frame: ?[]const u8, -/// No cache needed, because `dl_iterate_phdr` is already fast. -pub const LookupCache = void; +pub const LookupCache = struct { + rwlock: std.Thread.RwLock, + ranges: std.ArrayList(Range), + const Range = struct { + start: usize, + len: usize, + mod: ElfModule, + }; + pub const init: LookupCache = .{ + .rwlock = .{}, + .ranges = .empty, + }; + pub fn deinit(lc: *LookupCache, gpa: Allocator) void { + lc.ranges.deinit(gpa); + } +}; pub const DebugInfo = struct { /// Held while checking and/or populating `loaded_elf`/`scanned_dwarf`/`unwind`. @@ -14,18 +28,24 @@ pub const DebugInfo = struct { loaded_elf: ?ElfFile, scanned_dwarf: bool, - unwind: [2]?Dwarf.Unwind, + unwind: if (supports_unwinding) [2]?Dwarf.Unwind else void, + unwind_cache: if (supports_unwinding) *UnwindContext.Cache else void, + pub const init: DebugInfo = .{ .mutex = .{}, .loaded_elf = null, .scanned_dwarf = false, - .unwind = @splat(null), + .unwind = if (supports_unwinding) @splat(null), + .unwind_cache = undefined, }; pub fn deinit(di: *DebugInfo, gpa: Allocator) void { if (di.loaded_elf) |*loaded_elf| loaded_elf.deinit(gpa); - for (&di.unwind) |*opt_unwind| { - const unwind = &(opt_unwind.* orelse continue); - unwind.deinit(gpa); + if (supports_unwinding) { + if (di.unwind[0] != null) gpa.destroy(di.unwind_cache); + for (&di.unwind) |*opt_unwind| { + const unwind = &(opt_unwind.* orelse continue); + unwind.deinit(gpa); + } } } }; @@ -34,75 +54,84 @@ pub fn key(m: ElfModule) usize { return m.load_offset; } pub fn lookup(cache: *LookupCache, gpa: Allocator, address: usize) Error!ElfModule { - _ = cache; - _ = gpa; - const DlIterContext = struct { - /// input - address: usize, - /// output - module: ElfModule, + if (lookupInCache(cache, address)) |m| return m; - fn callback(info: *std.posix.dl_phdr_info, size: usize, context: *@This()) !void { - _ = size; - // The base address is too high - if (context.address < info.addr) - return; + { + // Check a new module hasn't been loaded + cache.rwlock.lock(); + defer cache.rwlock.unlock(); + const DlIterContext = struct { + ranges: *std.ArrayList(LookupCache.Range), + gpa: Allocator, - const phdrs = info.phdr[0..info.phnum]; - for (phdrs) |*phdr| { - if (phdr.p_type != elf.PT_LOAD) continue; + fn callback(info: *std.posix.dl_phdr_info, size: usize, context: *@This()) !void { + _ = size; - // Overflowing addition is used to handle the case of VSDOs having a p_vaddr = 0xffffffffff700000 - const seg_start = info.addr +% phdr.p_vaddr; - const seg_end = seg_start + phdr.p_memsz; - if (context.address >= seg_start and context.address < seg_end) { - context.module = .{ - .load_offset = info.addr, - // Android libc uses NULL instead of "" to mark the main program - .name = mem.sliceTo(info.name, 0) orelse "", - .build_id = null, - .gnu_eh_frame = null, - }; - break; + var mod: ElfModule = .{ + .load_offset = info.addr, + // Android libc uses NULL instead of "" to mark the main program + .name = mem.sliceTo(info.name, 0) orelse "", + .build_id = null, + .gnu_eh_frame = null, + }; + + // Populate `build_id` and `gnu_eh_frame` + for (info.phdr[0..info.phnum]) |phdr| { + switch (phdr.p_type) { + elf.PT_NOTE => { + // Look for .note.gnu.build-id + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); + const name_size = r.takeInt(u32, native_endian) catch continue; + const desc_size = r.takeInt(u32, native_endian) catch continue; + const note_type = r.takeInt(u32, native_endian) catch continue; + const name = r.take(name_size) catch continue; + if (note_type != elf.NT_GNU_BUILD_ID) continue; + if (!mem.eql(u8, name, "GNU\x00")) continue; + const desc = r.take(desc_size) catch continue; + mod.build_id = desc; + }, + elf.PT_GNU_EH_FRAME => { + const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); + mod.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; + }, + else => {}, + } } - } else return; - for (info.phdr[0..info.phnum]) |phdr| { - switch (phdr.p_type) { - elf.PT_NOTE => { - // Look for .note.gnu.build-id - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - var r: std.Io.Reader = .fixed(segment_ptr[0..phdr.p_memsz]); - const name_size = r.takeInt(u32, native_endian) catch continue; - const desc_size = r.takeInt(u32, native_endian) catch continue; - const note_type = r.takeInt(u32, native_endian) catch continue; - const name = r.take(name_size) catch continue; - if (note_type != elf.NT_GNU_BUILD_ID) continue; - if (!mem.eql(u8, name, "GNU\x00")) continue; - const desc = r.take(desc_size) catch continue; - context.module.build_id = desc; - }, - elf.PT_GNU_EH_FRAME => { - const segment_ptr: [*]const u8 = @ptrFromInt(info.addr + phdr.p_vaddr); - context.module.gnu_eh_frame = segment_ptr[0..phdr.p_memsz]; - }, - else => {}, + // Now that `mod` is populated, create the ranges + for (info.phdr[0..info.phnum]) |phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; + try context.ranges.append(context.gpa, .{ + // Overflowing addition handles VSDOs having p_vaddr = 0xffffffffff700000 + .start = info.addr +% phdr.p_vaddr, + .len = phdr.p_memsz, + .mod = mod, + }); } } + }; + cache.ranges.clearRetainingCapacity(); + var ctx: DlIterContext = .{ + .ranges = &cache.ranges, + .gpa = gpa, + }; + try std.posix.dl_iterate_phdr(&ctx, error{OutOfMemory}, DlIterContext.callback); + } - // Stop the iteration - return error.Found; - } - }; - var ctx: DlIterContext = .{ - .address = address, - .module = undefined, - }; - std.posix.dl_iterate_phdr(&ctx, error{Found}, DlIterContext.callback) catch |err| switch (err) { - error.Found => return ctx.module, - }; + if (lookupInCache(cache, address)) |m| return m; return error.MissingDebugInfo; } +fn lookupInCache(cache: *LookupCache, address: usize) ?ElfModule { + cache.rwlock.lockShared(); + defer cache.rwlock.unlockShared(); + for (cache.ranges.items) |*range| { + if (address >= range.start and address < range.start + range.len) { + return range.mod; + } + } + return null; +} fn loadElf(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { std.debug.assert(di.loaded_elf == null); std.debug.assert(!di.scanned_dwarf); @@ -199,11 +228,23 @@ pub fn getSymbolAtAddress(module: *const ElfModule, gpa: Allocator, di: *DebugIn }; } fn prepareUnwindLookup(unwind: *Dwarf.Unwind, gpa: Allocator) Error!void { - unwind.prepareLookup(gpa, @sizeOf(usize), native_endian) catch |err| switch (err) { + unwind.prepare(gpa, @sizeOf(usize), native_endian, true) catch |err| switch (err) { error.ReadFailed => unreachable, // it's all fixed buffers - error.InvalidDebugInfo, error.MissingDebugInfo, error.OutOfMemory => |e| return e, - error.EndOfStream, error.Overflow, error.StreamTooLong => return error.InvalidDebugInfo, - error.UnsupportedAddrSize, error.UnsupportedDwarfVersion => return error.UnsupportedDebugInfo, + error.InvalidDebugInfo, + error.MissingDebugInfo, + error.OutOfMemory, + => |e| return e, + error.EndOfStream, + error.Overflow, + error.StreamTooLong, + error.InvalidOperand, + error.InvalidOpcode, + error.InvalidOperation, + => return error.InvalidDebugInfo, + error.UnsupportedAddrSize, + error.UnsupportedDwarfVersion, + error.UnimplementedUserOpcode, + => return error.UnsupportedDebugInfo, }; } fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Error!void { @@ -240,12 +281,18 @@ fn loadUnwindInfo(module: *const ElfModule, gpa: Allocator, di: *DebugInfo) Erro }; errdefer for (unwinds) |*u| u.deinit(gpa); for (unwinds) |*u| try prepareUnwindLookup(u, gpa); + + const unwind_cache = try gpa.create(UnwindContext.Cache); + errdefer gpa.destroy(unwind_cache); + unwind_cache.init(); + switch (unwinds.len) { 0 => unreachable, 1 => di.unwind = .{ unwinds[0], null }, 2 => di.unwind = .{ unwinds[0], unwinds[1] }, else => unreachable, } + di.unwind_cache = unwind_cache; } pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, context: *UnwindContext) Error!usize { const unwinds: *const [2]?Dwarf.Unwind = u: { @@ -257,7 +304,7 @@ pub fn unwindFrame(module: *const ElfModule, gpa: Allocator, di: *DebugInfo, con }; for (unwinds) |*opt_unwind| { const unwind = &(opt_unwind.* orelse break); - return context.unwindFrame(gpa, unwind, module.load_offset, null) catch |err| switch (err) { + return context.unwindFrame(di.unwind_cache, gpa, unwind, module.load_offset, null) catch |err| switch (err) { error.MissingDebugInfo => continue, // try the next one else => |e| return e, }; |
