diff options
| author | mlugg <mlugg@mlugg.co.uk> | 2025-09-26 10:52:09 +0100 |
|---|---|---|
| committer | mlugg <mlugg@mlugg.co.uk> | 2025-09-30 13:44:56 +0100 |
| commit | 156cd8f678ebdcccc48382d093a3ef7e45c85a45 (patch) | |
| tree | ca3f4c37bda9cf1d039ac25ba37b2c45ab5a345f /lib/std/debug/Dwarf/Unwind.zig | |
| parent | 3f84b6c80ed3306f040dd98b8ccba561a052167a (diff) | |
| download | zig-156cd8f678ebdcccc48382d093a3ef7e45c85a45.tar.gz zig-156cd8f678ebdcccc48382d093a3ef7e45c85a45.zip | |
std.debug: significantly speed up capturing stack traces
By my estimation, these changes speed up DWARF unwinding when using the
self-hosted x86_64 backend by around 7x. There are two very significant
enhancements: we no longer iterate frames which don't fit in the stack
trace buffer, and we cache register rules (in a fixed buffer) to avoid
re-parsing and evaluating CFI instructions in most cases. Alongside this
are a bunch of smaller enhancements, such as pre-caching the result of
evaluating the CIE's initial instructions, avoiding re-parsing of CIEs,
and big simplifications to the `Dwarf.Unwind.VirtualMachine` logic.
Diffstat (limited to 'lib/std/debug/Dwarf/Unwind.zig')
| -rw-r--r-- | lib/std/debug/Dwarf/Unwind.zig | 145 |
1 files changed, 95 insertions, 50 deletions
diff --git a/lib/std/debug/Dwarf/Unwind.zig b/lib/std/debug/Dwarf/Unwind.zig index 2eaa89c404..e251a9175d 100644 --- a/lib/std/debug/Dwarf/Unwind.zig +++ b/lib/std/debug/Dwarf/Unwind.zig @@ -10,7 +10,7 @@ //! The typical usage of `Unwind` is as follows: //! //! * Initialize with `initEhFrameHdr` or `initSection`, depending on the available data -//! * Call `prepareLookup` to construct a search table if necessary +//! * Call `prepare` to scan CIEs and, if necessary, construct a search table //! * Call `lookupPc` to find the section offset of the FDE corresponding to a PC //! * Call `getFde` to load the corresponding FDE and CIE //! * Check that the PC does indeed fall in that range (`lookupPc` may return a false positive) @@ -18,7 +18,7 @@ //! //! In some cases, such as when using the "compact unwind" data in Mach-O binaries, the FDE offsets //! may already be known. In that case, no call to `lookupPc` is necessary, which means the call to -//! `prepareLookup` can also be omitted. +//! `prepare` can be optimized to only scan CIEs. pub const VirtualMachine = @import("Unwind/VirtualMachine.zig"); @@ -45,7 +45,7 @@ frame_section: struct { /// A structure allowing fast lookups of the FDE corresponding to a particular PC. We use a binary /// search table for the lookup; essentially, a list of all FDEs ordered by PC range. `null` means -/// the lookup data is not yet populated, so `prepareLookup` must be called before `lookupPc`. +/// the lookup data is not yet populated, so `prepare` must be called before `lookupPc`. lookup: ?union(enum) { /// The `.eh_frame_hdr` section contains a pre-computed search table which we can use. eh_frame_hdr: struct { @@ -58,6 +58,12 @@ lookup: ?union(enum) { sorted_fdes: []SortedFdeEntry, }, +/// Initially empty; populated by `prepare`. +cie_list: std.MultiArrayList(struct { + offset: u64, + cie: CommonInformationEntry, +}), + const SortedFdeEntry = struct { /// This FDE's value of `pc_begin`. pc_begin: u64, @@ -83,6 +89,7 @@ pub fn initEhFrameHdr(header: EhFrameHeader, section_vaddr: u64, section_bytes_p .vaddr = section_vaddr, .table = table, } } else null, + .cie_list = .empty, }; } @@ -98,16 +105,21 @@ pub fn initSection(section: Section, section_vaddr: u64, section_bytes: []const .vaddr = section_vaddr, }, .lookup = null, + .cie_list = .empty, }; } -/// Technically, it is only necessary to call this if `prepareLookup` has previously been called, -/// since no other function here allocates resources. pub fn deinit(unwind: *Unwind, gpa: Allocator) void { if (unwind.lookup) |lookup| switch (lookup) { .eh_frame_hdr => {}, .sorted_fdes => |fdes| gpa.free(fdes), }; + for (unwind.cie_list.items(.cie)) |*cie| { + if (cie.last_row) |*lr| { + gpa.free(lr.cols); + } + } + unwind.cie_list.deinit(gpa); } /// Decoded version of the `.eh_frame_hdr` section. @@ -236,7 +248,6 @@ const EntryHeader = union(enum) { bytes_len: u64, }, fde: struct { - format: Format, /// Offset into the section of the corresponding CIE, *including* its entry header. cie_offset: u64, /// Remaining bytes in the FDE. These are parseable by `FrameDescriptionEntry.parse`. @@ -290,7 +301,6 @@ const EntryHeader = union(enum) { .debug_frame => cie_ptr_or_id, }; return .{ .fde = .{ - .format = unit_header.format, .cie_offset = cie_offset, .bytes_len = remaining_bytes, } }; @@ -299,6 +309,7 @@ const EntryHeader = union(enum) { pub const CommonInformationEntry = struct { version: u8, + format: Format, /// In version 4, CIEs can specify the address size used in the CIE and associated FDEs. /// This value must be used *only* to parse associated FDEs in `FrameDescriptionEntry.parse`. @@ -318,6 +329,12 @@ pub const CommonInformationEntry = struct { initial_instructions: []const u8, + last_row: ?struct { + offset: u64, + cfa: VirtualMachine.CfaRule, + cols: []VirtualMachine.Column, + }, + pub const AugmentationKind = enum { none, gcc_eh, lsb_z }; /// This function expects to read the CIE starting with the version field. @@ -326,6 +343,7 @@ pub const CommonInformationEntry = struct { /// `length_offset` specifies the offset of this CIE's length field in the /// .eh_frame / .debug_frame section. fn parse( + format: Format, cie_bytes: []const u8, section: Section, default_addr_size_bytes: u8, @@ -384,6 +402,7 @@ pub const CommonInformationEntry = struct { }; return .{ + .format = format, .version = version, .addr_size_bytes = addr_size_bytes, .segment_selector_size = segment_selector_size, @@ -394,6 +413,7 @@ pub const CommonInformationEntry = struct { .is_signal_frame = is_signal_frame, .augmentation_kind = aug_kind, .initial_instructions = r.buffered(), + .last_row = null, }; } }; @@ -411,7 +431,7 @@ pub const FrameDescriptionEntry = struct { /// module's `.eh_frame` section, this will equal `fde_bytes.ptr`. fde_vaddr: u64, fde_bytes: []const u8, - cie: CommonInformationEntry, + cie: *const CommonInformationEntry, endian: Endian, ) !FrameDescriptionEntry { if (cie.segment_selector_size != 0) return error.UnsupportedAddrSize; @@ -446,11 +466,18 @@ pub const FrameDescriptionEntry = struct { } }; -/// Builds the PC FDE lookup table if it is not already built. It is required to call this function -/// at least once before calling `lookupPc`. Once this function is called, memory has been allocated -/// and so `deinit` (matching this `gpa`) is required to free it. -pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endian: Endian) !void { - if (unwind.lookup != null) return; +/// Builds the CIE list and FDE lookup table if they are not already built. It is required to call +/// this function at least once before calling `lookupPc` or `getFde`. If only `getFde` is needed, +/// then `need_lookup` can be set to `false` to make this function more efficient. +pub fn prepare( + unwind: *Unwind, + gpa: Allocator, + addr_size_bytes: u8, + endian: Endian, + need_lookup: bool, +) !void { + if (unwind.cie_list.len > 0 and (!need_lookup or unwind.lookup != null)) return; + unwind.cie_list.clearRetainingCapacity(); const section = unwind.frame_section; @@ -462,21 +489,28 @@ pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endia const entry_offset = r.seek; switch (try EntryHeader.read(&r, entry_offset, section.id, endian)) { .cie => |cie_info| { - // Ignore CIEs for now; we'll parse them when we read a corresponding FDE - try r.discardAll(cast(usize, cie_info.bytes_len) orelse return error.EndOfStream); + // We will pre-populate a list of CIEs for efficiency: this avoids work re-parsing + // them every time we look up an FDE. It also lets us cache the result of evaluating + // the CIE's initial CFI instructions, which is useful because in the vast majority + // of cases those instructions will be needed to reach the PC we are unwinding to. + const bytes_len = cast(usize, cie_info.bytes_len) orelse return error.EndOfStream; + const idx = unwind.cie_list.len; + try unwind.cie_list.append(gpa, .{ + .offset = entry_offset, + .cie = try .parse(cie_info.format, try r.take(bytes_len), section.id, addr_size_bytes), + }); + errdefer _ = unwind.cie_list.pop().?; + try VirtualMachine.populateCieLastRow(gpa, &unwind.cie_list.items(.cie)[idx], addr_size_bytes, endian); continue; }, .fde => |fde_info| { - if (fde_info.cie_offset > section.bytes.len) return error.EndOfStream; - var cie_r: Reader = .fixed(section.bytes[@intCast(fde_info.cie_offset)..]); - const cie_info = switch (try EntryHeader.read(&cie_r, fde_info.cie_offset, section.id, endian)) { - .cie => |cie_info| cie_info, - .fde, .terminator => return bad(), // this is meant to be a CIE - }; - const cie_bytes_len = cast(usize, cie_info.bytes_len) orelse return error.EndOfStream; - const fde_bytes_len = cast(usize, fde_info.bytes_len) orelse return error.EndOfStream; - const cie: CommonInformationEntry = try .parse(try cie_r.take(cie_bytes_len), section.id, addr_size_bytes); - const fde: FrameDescriptionEntry = try .parse(section.vaddr + r.seek, try r.take(fde_bytes_len), cie, endian); + const bytes_len = cast(usize, fde_info.bytes_len) orelse return error.EndOfStream; + if (!need_lookup) { + try r.discardAll(bytes_len); + continue; + } + const cie = unwind.findCie(fde_info.cie_offset) orelse return error.InvalidDebugInfo; + const fde: FrameDescriptionEntry = try .parse(section.vaddr + r.seek, try r.take(bytes_len), cie, endian); try fde_list.append(gpa, .{ .pc_begin = fde.pc_begin, .fde_offset = entry_offset, @@ -502,12 +536,30 @@ pub fn prepareLookup(unwind: *Unwind, gpa: Allocator, addr_size_bytes: u8, endia unwind.lookup = .{ .sorted_fdes = final_fdes }; } +fn findCie(unwind: *const Unwind, offset: u64) ?*const CommonInformationEntry { + const offsets = unwind.cie_list.items(.offset); + if (offsets.len == 0) return null; + var start: usize = 0; + var len: usize = offsets.len; + while (len > 1) { + const mid = len / 2; + if (offset < offsets[start + mid]) { + len = mid; + } else { + start += mid; + len -= mid; + } + } + if (offsets[start] != offset) return null; + return &unwind.cie_list.items(.cie)[start]; +} + /// Given a program counter value, returns the offset of the corresponding FDE, or `null` if no /// matching FDE was found. The returned offset can be passed to `getFde` to load the data /// associated with the FDE. /// -/// Before calling this function, `prepareLookup` must return successfully at least once, to ensure -/// that `unwind.lookup` is populated. +/// Before calling this function, `prepare` must return successfully at least once, to ensure that +/// `unwind.lookup` is populated. /// /// The return value may be a false positive. After loading the FDE with `loadFde`, the caller must /// validate that `pc` is indeed in its range -- if it is not, then no FDE matches `pc`. @@ -524,20 +576,25 @@ pub fn lookupPc(unwind: *const Unwind, pc: u64, addr_size_bytes: u8, endian: End }, .sorted_fdes => |sorted_fdes| sorted_fdes, }; - const first_bad_idx = std.sort.partitionPoint(SortedFdeEntry, sorted_fdes, pc, struct { - fn canIncludePc(target_pc: u64, entry: SortedFdeEntry) bool { - return target_pc >= entry.pc_begin; // i.e. does 'entry_pc..<last pc>' include 'target_pc' + if (sorted_fdes.len == 0) return null; + var start: usize = 0; + var len: usize = sorted_fdes.len; + while (len > 1) { + const half = len / 2; + if (pc < sorted_fdes[start + half].pc_begin) { + len = half; + } else { + start += half; + len -= half; } - }.canIncludePc); - // `first_bad_idx` is the index of the first FDE whose `pc_begin` is too high to include `pc`. - // So if any FDE matches, it'll be the one at `first_bad_idx - 1` (maybe false positive). - if (first_bad_idx == 0) return null; - return sorted_fdes[first_bad_idx - 1].fde_offset; + } + // If any FDE matches, it'll be the one at `start` (maybe false positive). + return sorted_fdes[start].fde_offset; } /// Get the FDE at a given offset, as well as its associated CIE. This offset typically comes from /// `lookupPc`. The CFI instructions within can be evaluated with `VirtualMachine`. -pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endian: Endian) !struct { Format, CommonInformationEntry, FrameDescriptionEntry } { +pub fn getFde(unwind: *const Unwind, fde_offset: u64, endian: Endian) !struct { *const CommonInformationEntry, FrameDescriptionEntry } { const section = unwind.frame_section; if (fde_offset > section.bytes.len) return error.EndOfStream; @@ -547,19 +604,7 @@ pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endia .cie, .terminator => return bad(), // This is meant to be an FDE }; - const cie_offset = fde_info.cie_offset; - if (cie_offset > section.bytes.len) return error.EndOfStream; - var cie_reader: Reader = .fixed(section.bytes[@intCast(cie_offset)..]); - const cie_info = switch (try EntryHeader.read(&cie_reader, cie_offset, section.id, endian)) { - .cie => |info| info, - .fde, .terminator => return bad(), // This is meant to be a CIE - }; - - const cie: CommonInformationEntry = try .parse( - try cie_reader.take(cast(usize, cie_info.bytes_len) orelse return error.EndOfStream), - section.id, - addr_size_bytes, - ); + const cie = unwind.findCie(fde_info.cie_offset) orelse return error.InvalidDebugInfo; const fde: FrameDescriptionEntry = try .parse( section.vaddr + fde_offset + fde_reader.seek, try fde_reader.take(cast(usize, fde_info.bytes_len) orelse return error.EndOfStream), @@ -567,7 +612,7 @@ pub fn getFde(unwind: *const Unwind, fde_offset: u64, addr_size_bytes: u8, endia endian, ); - return .{ cie_info.format, cie, fde }; + return .{ cie, fde }; } const EhPointerContext = struct { |
