From 2e12b45d8b43d69e144887df4b04a2d383ff25d4 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 16:31:49 -0700 Subject: introduce tool for dumping coverage file with debug info resolved. begin efforts of providing `std.debug.Info`, a cross-platform abstraction for loading debug information into an in-memory format that supports queries such as "what is the source location of this virtual memory address?" Unlike `std.debug.SelfInfo`, this API does not assume the debug information in question happens to match the host CPU architecture, OS, or other target properties. --- lib/std/debug.zig | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'lib/std/debug.zig') diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 4d3437f665..907f7711a7 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -17,6 +17,7 @@ pub const MemoryAccessor = @import("debug/MemoryAccessor.zig"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); +pub const Info = @import("debug/Info.zig"); /// Unresolved source locations can be represented with a single `usize` that /// corresponds to a virtual memory address of the program counter. Combined @@ -28,6 +29,12 @@ pub const SourceLocation = struct { file_name: []const u8, }; +pub const Symbol = struct { + name: []const u8 = "???", + compile_unit_name: []const u8 = "???", + source_location: ?SourceLocation = null, +}; + /// Deprecated because it returns the optimization mode of the standard /// library, when the caller probably wants to use the optimization mode of /// their own module. @@ -871,13 +878,13 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: error.MissingDebugInfo, error.InvalidDebugInfo => return printUnknownSource(debug_info, out_stream, address, tty_config), else => return err, }; - defer symbol_info.deinit(debug_info.allocator); + defer if (symbol_info.source_location) |sl| debug_info.allocator.free(sl.file_name); return printLineInfo( out_stream, - symbol_info.line_info, + symbol_info.source_location, address, - symbol_info.symbol_name, + symbol_info.name, symbol_info.compile_unit_name, tty_config, printLineFromFileAnyOs, @@ -886,7 +893,7 @@ pub fn printSourceAtAddress(debug_info: *SelfInfo, out_stream: anytype, address: fn printLineInfo( out_stream: anytype, - line_info: ?SourceLocation, + source_location: ?SourceLocation, address: usize, symbol_name: []const u8, compile_unit_name: []const u8, @@ -896,8 +903,8 @@ fn printLineInfo( nosuspend { try tty_config.setColor(out_stream, .bold); - if (line_info) |*li| { - try out_stream.print("{s}:{d}:{d}", .{ li.file_name, li.line, li.column }); + if (source_location) |*sl| { + try out_stream.print("{s}:{d}:{d}", .{ sl.file_name, sl.line, sl.column }); } else { try out_stream.writeAll("???:?:?"); } @@ -910,11 +917,11 @@ fn printLineInfo( try out_stream.writeAll("\n"); // Show the matching source code line if possible - if (line_info) |li| { - if (printLineFromFile(out_stream, li)) { - if (li.column > 0) { + if (source_location) |sl| { + if (printLineFromFile(out_stream, sl)) { + if (sl.column > 0) { // The caret already takes one char - const space_needed = @as(usize, @intCast(li.column - 1)); + const space_needed = @as(usize, @intCast(sl.column - 1)); try out_stream.writeByteNTimes(' ', space_needed); try tty_config.setColor(out_stream, .green); @@ -932,10 +939,10 @@ fn printLineInfo( } } -fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void { +fn printLineFromFileAnyOs(out_stream: anytype, source_location: SourceLocation) !void { // Need this to always block even in async I/O mode, because this could potentially // be called from e.g. the event loop code crashing. - var f = try fs.cwd().openFile(line_info.file_name, .{}); + var f = try fs.cwd().openFile(source_location.file_name, .{}); defer f.close(); // TODO fstat and make sure that the file has the correct size @@ -944,7 +951,7 @@ fn printLineFromFileAnyOs(out_stream: anytype, line_info: SourceLocation) !void const line_start = seek: { var current_line_start: usize = 0; var next_line: usize = 1; - while (next_line != line_info.line) { + while (next_line != source_location.line) { const slice = buf[current_line_start..amt_read]; if (mem.indexOfScalar(u8, slice, '\n')) |pos| { next_line += 1; -- cgit v1.2.3 From de47acd732dca8b4d2f2b3559307f488ccac940d Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 17:45:31 -0700 Subject: code coverage dumping tool basic implementation * std.debug.Dwarf: add `sortCompileUnits` along with a field to track the state for the purpose of assertions and correct API usage. This makes batch lookups faster. - in the future, findCompileUnit should be enhanced to rely on sorted compile units as well. * implement `std.debug.Dwarf.resolveSourceLocations` as well as `std.debug.Info.resolveSourceLocations`. It's still pretty slow, since it calls getLineNumberInfo for each array element, repeating a lot of work unnecessarily. * integrate these APIs with `std.Progress` to understand what is taking so long. The output I'm seeing from this tool shows a lot of missing source locations. In particular, the main area of interest is missing for my tokenizer fuzzing example. --- lib/std/debug.zig | 6 ++++ lib/std/debug/Dwarf.zig | 85 +++++++++++++++++++++++++++++++++++++++++++++---- lib/std/debug/Info.zig | 17 ++++++---- tools/dump-cov.zig | 10 ++++-- 4 files changed, 102 insertions(+), 16 deletions(-) (limited to 'lib/std/debug.zig') diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 907f7711a7..6d034146c3 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -27,6 +27,12 @@ pub const SourceLocation = struct { line: u64, column: u64, file_name: []const u8, + + pub const invalid: SourceLocation = .{ + .line = 0, + .column = 0, + .file_name = &.{}, + }; }; pub const Symbol = struct { diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 3c150b3b18..170fa774c0 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -39,6 +39,7 @@ pub const call_frame = @import("Dwarf/call_frame.zig"); endian: std.builtin.Endian, sections: SectionArray = null_section_array, is_macho: bool, +compile_units_sorted: bool, // Filled later by the initializer abbrev_table_list: std.ArrayListUnmanaged(Abbrev.Table) = .{}, @@ -728,9 +729,9 @@ pub const OpenError = ScanError; /// Initialize DWARF info. The caller has the responsibility to initialize most /// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the /// main binary file (not the secondary debug info file). -pub fn open(di: *Dwarf, gpa: Allocator) OpenError!void { - try di.scanAllFunctions(gpa); - try di.scanAllCompileUnits(gpa); +pub fn open(d: *Dwarf, gpa: Allocator) OpenError!void { + try d.scanAllFunctions(gpa); + try d.scanAllCompileUnits(gpa); } const PcRange = struct { @@ -1061,6 +1062,39 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { } } +/// Populate missing PC ranges in compilation units, and then sort them by start address. +/// Does not guarantee pc_range to be non-null because there could be missing debug info. +pub fn sortCompileUnits(d: *Dwarf) ScanError!void { + assert(!d.compile_units_sorted); + + for (d.compile_unit_list.items) |*cu| { + if (cu.pc_range != null) continue; + const ranges_value = cu.die.getAttr(AT.ranges) orelse continue; + var iter = DebugRangeIterator.init(ranges_value, d, cu) catch continue; + var start: u64 = maxInt(u64); + var end: u64 = 0; + while (try iter.next()) |range| { + start = @min(start, range.start_addr); + end = @max(end, range.end_addr); + } + if (end != 0) cu.pc_range = .{ + .start = start, + .end = end, + }; + } + + std.mem.sortUnstable(CompileUnit, d.compile_unit_list.items, {}, struct { + fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool { + _ = ctx; + const a_range = a.pc_range orelse return false; + const b_range = b.pc_range orelse return true; + return a_range.start < b_range.start; + } + }.lessThan); + + d.compile_units_sorted = true; +} + const DebugRangeIterator = struct { base_address: u64, section_type: Section.Id, @@ -1208,6 +1242,7 @@ const DebugRangeIterator = struct { } }; +/// TODO: change this to binary searching the sorted compile unit list pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { @@ -2275,6 +2310,7 @@ pub const ElfModule = struct { .endian = endian, .sections = sections, .is_macho = false, + .compile_units_sorted = false, }; try Dwarf.open(&di, gpa); @@ -2326,6 +2362,8 @@ pub const ElfModule = struct { } }; +pub const ResolveSourceLocationsError = Allocator.Error || DeprecatedFixedBufferReader.Error; + /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations, by appending to the provided /// array list. @@ -2335,11 +2373,44 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, -) error{ MissingDebugInfo, InvalidDebugInfo }!void { + parent_prog_node: std.Progress.Node, +) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); - _ = d; - _ = gpa; - @panic("TODO"); + assert(d.compile_units_sorted); + + const prog_node = parent_prog_node.start("Resolve Source Locations", sorted_pc_addrs.len); + defer prog_node.end(); + + var cu_i: usize = 0; + var cu: *const CompileUnit = &d.compile_unit_list.items[0]; + var range = cu.pc_range.?; + next_pc: for (sorted_pc_addrs, output) |pc, *out| { + defer prog_node.completeOne(); + while (pc >= range.end) { + cu_i += 1; + if (cu_i >= d.compile_unit_list.items.len) { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + } + cu = &d.compile_unit_list.items[cu_i]; + range = cu.pc_range orelse { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + }; + } + if (pc < range.start) { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + } + // TODO: instead of calling this function, break the function up into one that parses the + // information once and prepares a context that can be reused for the entire batch. + if (getLineNumberInfo(d, gpa, cu.*, pc)) |src_loc| { + out.* = src_loc; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => out.* = std.debug.SourceLocation.invalid, + else => |e| return e, + } + } } fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index 5276ba68ec..3c61c4072f 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -20,9 +20,14 @@ address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), pub const LoadError = Dwarf.ElfModule.LoadError; -pub fn load(gpa: Allocator, path: Path) LoadError!Info { +pub fn load(gpa: Allocator, path: Path, parent_prog_node: std.Progress.Node) LoadError!Info { var sections: Dwarf.SectionArray = Dwarf.null_section_array; - const elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + var prog_node = parent_prog_node.start("Loading Debug Info", 0); + defer prog_node.end(); + var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + prog_node.end(); + prog_node = parent_prog_node.start("Sort Compile Units", 0); + try elf_module.dwarf.sortCompileUnits(); var info: Info = .{ .address_map = .{}, }; @@ -38,10 +43,7 @@ pub fn deinit(info: *Info, gpa: Allocator) void { info.* = undefined; } -pub const ResolveSourceLocationsError = error{ - MissingDebugInfo, - InvalidDebugInfo, -} || Allocator.Error; +pub const ResolveSourceLocationsError = Dwarf.ResolveSourceLocationsError; pub fn resolveSourceLocations( info: *Info, @@ -49,9 +51,10 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, + parent_prog_node: std.Progress.Node, ) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output); + return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output, parent_prog_node); } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index aba2911a91..8449dec33e 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -28,7 +28,10 @@ pub fn main() !void { .sub_path = cov_file_name, }; - var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| { + const prog_node = std.Progress.start(.{}); + defer prog_node.end(); + + var debug_info = std.debug.Info.load(gpa, exe_path, prog_node) catch |err| { fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) }); }; defer debug_info.deinit(gpa); @@ -51,7 +54,10 @@ pub fn main() !void { assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len); - try debug_info.resolveSourceLocations(gpa, pcs, source_locations); + try debug_info.resolveSourceLocations(gpa, pcs, source_locations, prog_node); + defer for (source_locations) |sl| { + gpa.free(sl.file_name); + }; for (pcs, source_locations) |pc, sl| { try stdout.print("{x}: {s}:{d}:{d}\n", .{ -- cgit v1.2.3 From 66954e833051872308641b3a1af12aa865d5d59a Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 21:22:33 -0700 Subject: std.debug.FixedBufferReader is fine it does not need to be deprecated --- lib/std/debug.zig | 95 +------------------------------------ lib/std/debug/Dwarf.zig | 41 ++++++++-------- lib/std/debug/FixedBufferReader.zig | 91 +++++++++++++++++++++++++++++++++++ lib/std/debug/SelfInfo.zig | 2 +- 4 files changed, 114 insertions(+), 115 deletions(-) create mode 100644 lib/std/debug/FixedBufferReader.zig (limited to 'lib/std/debug.zig') diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 6d034146c3..80c196e9d8 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -14,6 +14,7 @@ const native_os = builtin.os.tag; const native_endian = native_arch.endian(); pub const MemoryAccessor = @import("debug/MemoryAccessor.zig"); +pub const FixedBufferReader = @import("debug/FixedBufferReader.zig"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); @@ -1494,99 +1495,6 @@ pub const SafetyLock = struct { } }; -/// Deprecated. Don't use this, just read from your memory directly. -/// -/// This only exists because someone was too lazy to rework logic that used to -/// operate on an open file to operate on a memory buffer instead. -pub const DeprecatedFixedBufferReader = struct { - buf: []const u8, - pos: usize = 0, - endian: std.builtin.Endian, - - pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; - - pub fn seekTo(fbr: *DeprecatedFixedBufferReader, pos: u64) Error!void { - if (pos > fbr.buf.len) return error.EndOfBuffer; - fbr.pos = @intCast(pos); - } - - pub fn seekForward(fbr: *DeprecatedFixedBufferReader, amount: u64) Error!void { - if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; - fbr.pos += @intCast(amount); - } - - pub inline fn readByte(fbr: *DeprecatedFixedBufferReader) Error!u8 { - if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; - defer fbr.pos += 1; - return fbr.buf[fbr.pos]; - } - - pub fn readByteSigned(fbr: *DeprecatedFixedBufferReader) Error!i8 { - return @bitCast(try fbr.readByte()); - } - - pub fn readInt(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - const size = @divExact(@typeInfo(T).Int.bits, 8); - if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; - defer fbr.pos += size; - return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); - } - - pub fn readIntChecked( - fbr: *DeprecatedFixedBufferReader, - comptime T: type, - ma: *MemoryAccessor, - ) Error!T { - if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) - return error.InvalidBuffer; - - return fbr.readInt(T); - } - - pub fn readUleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - return std.leb.readUleb128(T, fbr); - } - - pub fn readIleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - return std.leb.readIleb128(T, fbr); - } - - pub fn readAddress(fbr: *DeprecatedFixedBufferReader, format: std.dwarf.Format) Error!u64 { - return switch (format) { - .@"32" => try fbr.readInt(u32), - .@"64" => try fbr.readInt(u64), - }; - } - - pub fn readAddressChecked( - fbr: *DeprecatedFixedBufferReader, - format: std.dwarf.Format, - ma: *MemoryAccessor, - ) Error!u64 { - return switch (format) { - .@"32" => try fbr.readIntChecked(u32, ma), - .@"64" => try fbr.readIntChecked(u64, ma), - }; - } - - pub fn readBytes(fbr: *DeprecatedFixedBufferReader, len: usize) Error![]const u8 { - if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; - defer fbr.pos += len; - return fbr.buf[fbr.pos..][0..len]; - } - - pub fn readBytesTo(fbr: *DeprecatedFixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { - const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ - u8, - fbr.buf, - fbr.pos, - sentinel, - }) orelse return error.EndOfBuffer; - defer fbr.pos = end + 1; - return fbr.buf[fbr.pos..end :sentinel]; - } -}; - /// Detect whether the program is being executed in the Valgrind virtual machine. /// /// When Valgrind integrations are disabled, this returns comptime-known false. @@ -1600,6 +1508,7 @@ pub inline fn inValgrind() bool { test { _ = &Dwarf; _ = &MemoryAccessor; + _ = &FixedBufferReader; _ = &Pdb; _ = &SelfInfo; _ = &dumpHex; diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 170fa774c0..446dc58990 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -27,8 +27,7 @@ const maxInt = std.math.maxInt; const MemoryAccessor = std.debug.MemoryAccessor; const Path = std.Build.Cache.Path; -/// Did I mention this is deprecated? -const DeprecatedFixedBufferReader = std.debug.DeprecatedFixedBufferReader; +const FixedBufferReader = std.debug.FixedBufferReader; const Dwarf = @This(); @@ -328,7 +327,7 @@ pub const ExceptionFrameHeader = struct { var left: usize = 0; var len: usize = self.fde_count; - var fbr: DeprecatedFixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; + var fbr: FixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; while (len > 1) { const mid = left + len / 2; @@ -371,7 +370,7 @@ pub const ExceptionFrameHeader = struct { const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0 .. eh_frame_len orelse maxInt(u32)]; const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: DeprecatedFixedBufferReader = .{ + var eh_frame_fbr: FixedBufferReader = .{ .buf = eh_frame, .pos = fde_offset, .endian = native_endian, @@ -429,9 +428,9 @@ pub const EntryHeader = struct { } /// Reads a header for either an FDE or a CIE, then advances the fbr to the position after the trailing structure. - /// `fbr` must be a DeprecatedFixedBufferReader backed by either the .eh_frame or .debug_frame sections. + /// `fbr` must be a FixedBufferReader backed by either the .eh_frame or .debug_frame sections. pub fn read( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, opt_ma: ?*MemoryAccessor, dwarf_section: Section.Id, ) !EntryHeader { @@ -544,7 +543,7 @@ pub const CommonInformationEntry = struct { ) !CommonInformationEntry { if (addr_size_bytes > 8) return error.UnsupportedAddrSize; - var fbr: DeprecatedFixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; + var fbr: FixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; const version = try fbr.readByte(); switch (dwarf_section) { @@ -678,7 +677,7 @@ pub const FrameDescriptionEntry = struct { ) !FrameDescriptionEntry { if (addr_size_bytes > 8) return error.InvalidAddrSize; - var fbr: DeprecatedFixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; + var fbr: FixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset), @@ -785,10 +784,10 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { const ScanError = error{ InvalidDebugInfo, MissingDebugInfo, -} || Allocator.Error || std.debug.DeprecatedFixedBufferReader.Error; +} || Allocator.Error || std.debug.FixedBufferReader.Error; fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; while (this_unit_offset < fbr.buf.len) { @@ -975,7 +974,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { } fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; var attrs_buf = std.ArrayList(Die.Attr).init(allocator); @@ -1100,7 +1099,7 @@ const DebugRangeIterator = struct { section_type: Section.Id, di: *const Dwarf, compile_unit: *const CompileUnit, - fbr: DeprecatedFixedBufferReader, + fbr: FixedBufferReader, pub fn init(ranges_value: *const FormValue, di: *const Dwarf, compile_unit: *const CompileUnit) !@This() { const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges; @@ -1275,7 +1274,7 @@ fn getAbbrevTable(di: *Dwarf, allocator: Allocator, abbrev_offset: u64) !*const } fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table { - var fbr: DeprecatedFixedBufferReader = .{ + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_abbrev).?, .pos = cast(usize, offset) orelse return bad(), .endian = di.endian, @@ -1327,7 +1326,7 @@ fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table } fn parseDie( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, attrs_buf: []Die.Attr, abbrev_table: *const Abbrev.Table, format: Format, @@ -1362,7 +1361,7 @@ pub fn getLineNumberInfo( const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; try fbr.seekTo(line_info_offset); const unit_header = try readUnitHeader(&fbr, null); @@ -1655,7 +1654,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { /// of FDEs is built for binary searching during unwinding. pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: DeprecatedFixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; + var fbr: FixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; const version = try fbr.readByte(); if (version != 1) break :blk; @@ -1695,7 +1694,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; for (frame_sections) |frame_section| { if (di.section(frame_section)) |section_data| { - var fbr: DeprecatedFixedBufferReader = .{ .buf = section_data, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = section_data, .endian = di.endian }; while (fbr.pos < fbr.buf.len) { const entry_header = try EntryHeader.read(&fbr, null, frame_section); switch (entry_header.type) { @@ -1739,7 +1738,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) } fn parseFormValue( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, form_id: u64, format: Format, implicit_const: ?i64, @@ -1937,7 +1936,7 @@ const UnitHeader = struct { unit_length: u64, }; -fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) ScanError!UnitHeader { +fn readUnitHeader(fbr: *FixedBufferReader, opt_ma: ?*MemoryAccessor) ScanError!UnitHeader { return switch (try if (opt_ma) |ma| fbr.readIntChecked(u32, ma) else fbr.readInt(u32)) { 0...0xfffffff0 - 1 => |unit_length| .{ .format = .@"32", @@ -2002,7 +2001,7 @@ const EhPointerContext = struct { text_rel_base: ?u64 = null, function_rel_base: ?u64 = null, }; -fn readEhPointer(fbr: *DeprecatedFixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { +fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { if (enc == EH.PE.omit) return null; const value: union(enum) { @@ -2362,7 +2361,7 @@ pub const ElfModule = struct { } }; -pub const ResolveSourceLocationsError = Allocator.Error || DeprecatedFixedBufferReader.Error; +pub const ResolveSourceLocationsError = Allocator.Error || FixedBufferReader.Error; /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations, by appending to the provided diff --git a/lib/std/debug/FixedBufferReader.zig b/lib/std/debug/FixedBufferReader.zig new file mode 100644 index 0000000000..2a90ba569e --- /dev/null +++ b/lib/std/debug/FixedBufferReader.zig @@ -0,0 +1,91 @@ +const std = @import("std.zig"); +const MemoryAccessor = std.debug.MemoryAccessor; + +const FixedBufferReader = @This(); + +buf: []const u8, +pos: usize = 0, +endian: std.builtin.Endian, + +pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; + +pub fn seekTo(fbr: *FixedBufferReader, pos: u64) Error!void { + if (pos > fbr.buf.len) return error.EndOfBuffer; + fbr.pos = @intCast(pos); +} + +pub fn seekForward(fbr: *FixedBufferReader, amount: u64) Error!void { + if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; + fbr.pos += @intCast(amount); +} + +pub inline fn readByte(fbr: *FixedBufferReader) Error!u8 { + if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; + defer fbr.pos += 1; + return fbr.buf[fbr.pos]; +} + +pub fn readByteSigned(fbr: *FixedBufferReader) Error!i8 { + return @bitCast(try fbr.readByte()); +} + +pub fn readInt(fbr: *FixedBufferReader, comptime T: type) Error!T { + const size = @divExact(@typeInfo(T).Int.bits, 8); + if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; + defer fbr.pos += size; + return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); +} + +pub fn readIntChecked( + fbr: *FixedBufferReader, + comptime T: type, + ma: *MemoryAccessor, +) Error!T { + if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) + return error.InvalidBuffer; + + return fbr.readInt(T); +} + +pub fn readUleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readUleb128(T, fbr); +} + +pub fn readIleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readIleb128(T, fbr); +} + +pub fn readAddress(fbr: *FixedBufferReader, format: std.dwarf.Format) Error!u64 { + return switch (format) { + .@"32" => try fbr.readInt(u32), + .@"64" => try fbr.readInt(u64), + }; +} + +pub fn readAddressChecked( + fbr: *FixedBufferReader, + format: std.dwarf.Format, + ma: *MemoryAccessor, +) Error!u64 { + return switch (format) { + .@"32" => try fbr.readIntChecked(u32, ma), + .@"64" => try fbr.readIntChecked(u64, ma), + }; +} + +pub fn readBytes(fbr: *FixedBufferReader, len: usize) Error![]const u8 { + if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; + defer fbr.pos += len; + return fbr.buf[fbr.pos..][0..len]; +} + +pub fn readBytesTo(fbr: *FixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { + const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ + u8, + fbr.buf, + fbr.pos, + sentinel, + }) orelse return error.EndOfBuffer; + defer fbr.pos = end + 1; + return fbr.buf[fbr.pos..end :sentinel]; +} diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 79cbd19a41..ba0d7bc039 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -1576,7 +1576,7 @@ pub fn unwindFrameDwarf( const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; if (fde_offset >= frame_section.len) return error.MissingFDE; - var fbr: std.debug.DeprecatedFixedBufferReader = .{ + var fbr: std.debug.FixedBufferReader = .{ .buf = frame_section, .pos = fde_offset, .endian = di.endian, -- cgit v1.2.3 From 1792258dc813cde7083fd7860442e6ec92afd4ba Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 23:31:28 -0700 Subject: std.debug.Dwarf: precompute .debug_line table yields a 60x speedup for resolveSourceLocations in debug builds --- lib/std/debug.zig | 2 +- lib/std/debug/Dwarf.zig | 310 +++++++++++++++++------------------- lib/std/debug/FixedBufferReader.zig | 4 +- 3 files changed, 150 insertions(+), 166 deletions(-) (limited to 'lib/std/debug.zig') diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 80c196e9d8..7f4f6b7df2 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -762,7 +762,7 @@ pub fn writeCurrentStackTrace( // an overflow. We do not need to signal `StackIterator` as it will correctly detect this // condition on the subsequent iteration and return `null` thus terminating the loop. // same behaviour for x86-windows-msvc - const address = if (return_address == 0) return_address else return_address - 1; + const address = return_address -| 1; try printSourceAtAddress(debug_info, out_stream, address, tty_config); } else printLastUnwindError(&it, debug_info, out_stream, tty_config); } diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 446dc58990..06ffad9441 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -138,6 +138,29 @@ pub const CompileUnit = struct { rnglists_base: usize, loclists_base: usize, frame_base: ?*const FormValue, + + src_loc_cache: ?SrcLocCache, + + pub const SrcLocCache = struct { + line_table: LineTable, + directories: []const FileEntry, + files: []FileEntry, + version: u16, + + pub const LineTable = std.AutoArrayHashMapUnmanaged(u64, LineEntry); + + pub const LineEntry = struct { + line: u32, + column: u32, + file: u32, + }; + + pub fn findSource(slc: *const SrcLocCache, address: u64) !LineEntry { + const index = std.sort.upperBound(u64, address, slc.line_table.keys(), {}, std.sort.asc(u64)); + if (index == 0) return missing(); + return slc.line_table.values()[index - 1]; + } + }; }; pub const FormValue = union(enum) { @@ -760,6 +783,11 @@ pub fn deinit(di: *Dwarf, gpa: Allocator) void { } di.abbrev_table_list.deinit(gpa); for (di.compile_unit_list.items) |*cu| { + if (cu.src_loc_cache) |*slc| { + slc.line_table.deinit(gpa); + gpa.free(slc.directories); + gpa.free(slc.files); + } cu.die.deinit(gpa); } di.compile_unit_list.deinit(gpa); @@ -846,6 +874,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { .rnglists_base = 0, .loclists_base = 0, .frame_base = null, + .src_loc_cache = null, }; while (true) { @@ -1032,6 +1061,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { .rnglists_base = if (compile_unit_die.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0, .loclists_base = if (compile_unit_die.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0, .frame_base = compile_unit_die.getAttr(AT.frame_base), + .src_loc_cache = null, }; compile_unit.pc_range = x: { @@ -1242,7 +1272,7 @@ const DebugRangeIterator = struct { }; /// TODO: change this to binary searching the sorted compile unit list -pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUnit { +pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { if (target_address >= range.start and target_address < range.end) return compile_unit; @@ -1352,34 +1382,36 @@ fn parseDie( }; } -pub fn getLineNumberInfo( - di: *Dwarf, - allocator: Allocator, - compile_unit: CompileUnit, - target_address: u64, -) !std.debug.SourceLocation { - const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); +fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) !CompileUnit.SrcLocCache { + const compile_unit_cwd = try compile_unit.die.getAttrString(d, AT.comp_dir, d.section(.debug_line_str), compile_unit.*); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: FixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ + .buf = d.section(.debug_line).?, + .endian = d.endian, + }; try fbr.seekTo(line_info_offset); const unit_header = try readUnitHeader(&fbr, null); if (unit_header.unit_length == 0) return missing(); + const next_offset = unit_header.header_length + unit_header.unit_length; const version = try fbr.readInt(u16); if (version < 2) return bad(); - var addr_size: u8 = switch (unit_header.format) { - .@"32" => 4, - .@"64" => 8, + const addr_size: u8, const seg_size: u8 = if (version >= 5) .{ + try fbr.readByte(), + try fbr.readByte(), + } else .{ + switch (unit_header.format) { + .@"32" => 4, + .@"64" => 8, + }, + 0, }; - var seg_size: u8 = 0; - if (version >= 5) { - addr_size = try fbr.readByte(); - seg_size = try fbr.readByte(); - } + _ = addr_size; + _ = seg_size; const prologue_length = try fbr.readAddress(unit_header.format); const prog_start_offset = fbr.pos + prologue_length; @@ -1388,8 +1420,8 @@ pub fn getLineNumberInfo( if (minimum_instruction_length == 0) return bad(); if (version >= 4) { - // maximum_operations_per_instruction - _ = try fbr.readByte(); + const maximum_operations_per_instruction = try fbr.readByte(); + _ = maximum_operations_per_instruction; } const default_is_stmt = (try fbr.readByte()) != 0; @@ -1402,18 +1434,18 @@ pub fn getLineNumberInfo( const standard_opcode_lengths = try fbr.readBytes(opcode_base - 1); - var include_directories = std.ArrayList(FileEntry).init(allocator); - defer include_directories.deinit(); - var file_entries = std.ArrayList(FileEntry).init(allocator); - defer file_entries.deinit(); + var directories: std.ArrayListUnmanaged(FileEntry) = .{}; + defer directories.deinit(gpa); + var file_entries: std.ArrayListUnmanaged(FileEntry) = .{}; + defer file_entries.deinit(gpa); if (version < 5) { - try include_directories.append(.{ .path = compile_unit_cwd }); + try directories.append(gpa, .{ .path = compile_unit_cwd }); while (true) { const dir = try fbr.readBytesTo(0); if (dir.len == 0) break; - try include_directories.append(.{ .path = dir }); + try directories.append(gpa, .{ .path = dir }); } while (true) { @@ -1422,7 +1454,7 @@ pub fn getLineNumberInfo( const dir_index = try fbr.readUleb128(u32); const mtime = try fbr.readUleb128(u64); const size = try fbr.readUleb128(u64); - try file_entries.append(.{ + try file_entries.append(gpa, .{ .path = file_name, .dir_index = dir_index, .mtime = mtime, @@ -1446,52 +1478,10 @@ pub fn getLineNumberInfo( } const directories_count = try fbr.readUleb128(usize); - try include_directories.ensureUnusedCapacity(directories_count); - { - var i: usize = 0; - while (i < directories_count) : (i += 1) { - var e: FileEntry = .{ .path = &.{} }; - for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { - const form_value = try parseFormValue( - &fbr, - ent_fmt.form_code, - unit_header.format, - null, - ); - switch (ent_fmt.content_type_code) { - DW.LNCT.path => e.path = try form_value.getString(di.*), - DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), - DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), - DW.LNCT.size => e.size = try form_value.getUInt(u64), - DW.LNCT.MD5 => e.md5 = switch (form_value) { - .data16 => |data16| data16.*, - else => return bad(), - }, - else => continue, - } - } - include_directories.appendAssumeCapacity(e); - } - } - } - var file_ent_fmt_buf: [10]FileEntFmt = undefined; - const file_name_entry_format_count = try fbr.readByte(); - if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); - for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { - ent_fmt.* = .{ - .content_type_code = try fbr.readUleb128(u8), - .form_code = try fbr.readUleb128(u16), - }; - } - - const file_names_count = try fbr.readUleb128(usize); - try file_entries.ensureUnusedCapacity(file_names_count); - { - var i: usize = 0; - while (i < file_names_count) : (i += 1) { - var e: FileEntry = .{ .path = &.{} }; - for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { + for (try directories.addManyAsSlice(gpa, directories_count)) |*e| { + e.* = .{ .path = &.{} }; + for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { const form_value = try parseFormValue( &fbr, ent_fmt.form_code, @@ -1499,7 +1489,7 @@ pub fn getLineNumberInfo( null, ); switch (ent_fmt.content_type_code) { - DW.LNCT.path => e.path = try form_value.getString(di.*), + DW.LNCT.path => e.path = try form_value.getString(d.*), DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), DW.LNCT.size => e.size = try form_value.getUInt(u64), @@ -1510,17 +1500,49 @@ pub fn getLineNumberInfo( else => continue, } } - file_entries.appendAssumeCapacity(e); + } + } + + var file_ent_fmt_buf: [10]FileEntFmt = undefined; + const file_name_entry_format_count = try fbr.readByte(); + if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { + ent_fmt.* = .{ + .content_type_code = try fbr.readUleb128(u8), + .form_code = try fbr.readUleb128(u16), + }; + } + + const file_names_count = try fbr.readUleb128(usize); + try file_entries.ensureUnusedCapacity(gpa, file_names_count); + + for (try file_entries.addManyAsSlice(gpa, file_names_count)) |*e| { + e.* = .{ .path = &.{} }; + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { + const form_value = try parseFormValue( + &fbr, + ent_fmt.form_code, + unit_header.format, + null, + ); + switch (ent_fmt.content_type_code) { + DW.LNCT.path => e.path = try form_value.getString(d.*), + DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), + DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), + DW.LNCT.size => e.size = try form_value.getUInt(u64), + DW.LNCT.MD5 => e.md5 = switch (form_value) { + .data16 => |data16| data16.*, + else => return bad(), + }, + else => continue, + } } } } - var prog = LineNumberProgram.init( - default_is_stmt, - include_directories.items, - target_address, - version, - ); + var prog = LineNumberProgram.init(default_is_stmt, version); + var line_table: CompileUnit.SrcLocCache.LineTable = .{}; + errdefer line_table.deinit(gpa); try fbr.seekTo(prog_start_offset); @@ -1536,7 +1558,7 @@ pub fn getLineNumberInfo( switch (sub_op) { DW.LNE.end_sequence => { prog.end_sequence = true; - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.reset(); }, DW.LNE.set_address => { @@ -1548,7 +1570,7 @@ pub fn getLineNumberInfo( const dir_index = try fbr.readUleb128(u32); const mtime = try fbr.readUleb128(u64); const size = try fbr.readUleb128(u64); - try file_entries.append(.{ + try file_entries.append(gpa, .{ .path = path, .dir_index = dir_index, .mtime = mtime, @@ -1564,12 +1586,12 @@ pub fn getLineNumberInfo( const inc_line = @as(i32, line_base) + @as(i32, adjusted_opcode % line_range); prog.line += inc_line; prog.address += inc_addr; - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.basic_block = false; } else { switch (opcode) { DW.LNS.copy => { - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.basic_block = false; }, DW.LNS.advance_pc => { @@ -1611,7 +1633,35 @@ pub fn getLineNumberInfo( } } - return missing(); + return .{ + .line_table = line_table, + .directories = try directories.toOwnedSlice(gpa), + .files = try file_entries.toOwnedSlice(gpa), + .version = version, + }; +} + +pub fn getLineNumberInfo( + d: *Dwarf, + gpa: Allocator, + compile_unit: *CompileUnit, + target_address: u64, +) !std.debug.SourceLocation { + if (compile_unit.src_loc_cache == null) + compile_unit.src_loc_cache = try runLineNumberProgram(d, gpa, compile_unit); + const slc = &compile_unit.src_loc_cache.?; + const entry = try slc.findSource(target_address); + const file_index = entry.file - @intFromBool(slc.version < 5); + if (file_index >= slc.files.len) return bad(); + const file_entry = &slc.files[file_index]; + if (file_entry.dir_index >= slc.directories.len) return bad(); + const dir_name = slc.directories[file_entry.dir_index].path; + const file_name = try std.fs.path.join(gpa, &.{ dir_name, file_entry.path }); + return .{ + .line = entry.line, + .column = entry.column, + .file_name = file_name, + }; } fn getString(di: Dwarf, offset: u64) ![:0]const u8 { @@ -1826,17 +1876,6 @@ const LineNumberProgram = struct { end_sequence: bool, default_is_stmt: bool, - target_address: u64, - include_dirs: []const FileEntry, - - prev_valid: bool, - prev_address: u64, - prev_file: usize, - prev_line: i64, - prev_column: u64, - prev_is_stmt: bool, - prev_basic_block: bool, - prev_end_sequence: bool, // Reset the state machine following the DWARF specification pub fn reset(self: *LineNumberProgram) void { @@ -1847,24 +1886,10 @@ const LineNumberProgram = struct { self.is_stmt = self.default_is_stmt; self.basic_block = false; self.end_sequence = false; - // Invalidate all the remaining fields - self.prev_valid = false; - self.prev_address = 0; - self.prev_file = undefined; - self.prev_line = undefined; - self.prev_column = undefined; - self.prev_is_stmt = undefined; - self.prev_basic_block = undefined; - self.prev_end_sequence = undefined; } - pub fn init( - is_stmt: bool, - include_dirs: []const FileEntry, - target_address: u64, - version: u16, - ) LineNumberProgram { - return LineNumberProgram{ + pub fn init(is_stmt: bool, version: u16) LineNumberProgram { + return .{ .address = 0, .file = 1, .line = 1, @@ -1873,60 +1898,17 @@ const LineNumberProgram = struct { .is_stmt = is_stmt, .basic_block = false, .end_sequence = false, - .include_dirs = include_dirs, .default_is_stmt = is_stmt, - .target_address = target_address, - .prev_valid = false, - .prev_address = 0, - .prev_file = undefined, - .prev_line = undefined, - .prev_column = undefined, - .prev_is_stmt = undefined, - .prev_basic_block = undefined, - .prev_end_sequence = undefined, }; } - pub fn checkLineMatch( - self: *LineNumberProgram, - allocator: Allocator, - file_entries: []const FileEntry, - ) !?std.debug.SourceLocation { - if (self.prev_valid and - self.target_address >= self.prev_address and - self.target_address < self.address) - { - const file_index = if (self.version >= 5) self.prev_file else i: { - if (self.prev_file == 0) return missing(); - break :i self.prev_file - 1; - }; - - if (file_index >= file_entries.len) return bad(); - const file_entry = &file_entries[file_index]; - - if (file_entry.dir_index >= self.include_dirs.len) return bad(); - const dir_name = self.include_dirs[file_entry.dir_index].path; - - const file_name = try std.fs.path.join(allocator, &[_][]const u8{ - dir_name, file_entry.path, - }); - - return std.debug.SourceLocation{ - .line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0, - .column = self.prev_column, - .file_name = file_name, - }; - } - - self.prev_valid = true; - self.prev_address = self.address; - self.prev_file = self.file; - self.prev_line = self.line; - self.prev_column = self.column; - self.prev_is_stmt = self.is_stmt; - self.prev_basic_block = self.basic_block; - self.prev_end_sequence = self.end_sequence; - return null; + pub fn addRow(prog: *LineNumberProgram, gpa: Allocator, table: *CompileUnit.SrcLocCache.LineTable) !void { + if (prog.line == 0) return; // garbage data + try table.put(gpa, prog.address, .{ + .line = cast(u32, prog.line) orelse maxInt(u32), + .column = cast(u32, prog.column) orelse maxInt(u32), + .file = cast(u32, prog.file) orelse return bad(), + }); } }; @@ -2381,7 +2363,7 @@ pub fn resolveSourceLocations( defer prog_node.end(); var cu_i: usize = 0; - var cu: *const CompileUnit = &d.compile_unit_list.items[0]; + var cu: *CompileUnit = &d.compile_unit_list.items[0]; var range = cu.pc_range.?; next_pc: for (sorted_pc_addrs, output) |pc, *out| { defer prog_node.completeOne(); @@ -2403,7 +2385,7 @@ pub fn resolveSourceLocations( } // TODO: instead of calling this function, break the function up into one that parses the // information once and prepares a context that can be reused for the entire batch. - if (getLineNumberInfo(d, gpa, cu.*, pc)) |src_loc| { + if (getLineNumberInfo(d, gpa, cu, pc)) |src_loc| { out.* = src_loc; } else |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => out.* = std.debug.SourceLocation.invalid, @@ -2419,7 +2401,7 @@ fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => "???", }, - .source_location = di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { + .source_location = di.getLineNumberInfo(allocator, compile_unit, address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, else => return err, }, diff --git a/lib/std/debug/FixedBufferReader.zig b/lib/std/debug/FixedBufferReader.zig index 2a90ba569e..494245a9e9 100644 --- a/lib/std/debug/FixedBufferReader.zig +++ b/lib/std/debug/FixedBufferReader.zig @@ -1,4 +1,6 @@ -const std = @import("std.zig"); +//! Optimized for performance in debug builds. + +const std = @import("../std.zig"); const MemoryAccessor = std.debug.MemoryAccessor; const FixedBufferReader = @This(); -- cgit v1.2.3 From 517cfb0dd1e2b5b8efc8e90ce4e5593a38fa158c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 4 Aug 2024 00:16:28 -0700 Subject: fuzzing: progress towards web UI * libfuzzer: close file after mmap * fuzzer/main.js: connect with EventSource and debug dump the messages. currently this prints how many fuzzer runs have been attempted to console.log. * extract some `std.debug.Info` logic into `std.debug.Coverage`. Prepares for consolidation across multiple different executables which share source files, and makes it possible to send all the PC/SourceLocation mapping data with 4 memcpy'd arrays. * std.Build.Fuzz: - spawn a thread to watch the message queue and signal event subscribers. - track coverage map data - respond to /events URL with EventSource messages on a timer --- lib/fuzzer.zig | 1 + lib/fuzzer/main.js | 13 ++- lib/std/Build/Fuzz.zig | 209 +++++++++++++++++++++++++++++++++++--- lib/std/Build/Step/Run.zig | 6 +- lib/std/debug.zig | 1 + lib/std/debug/Coverage.zig | 244 +++++++++++++++++++++++++++++++++++++++++++++ lib/std/debug/Info.zig | 153 ++-------------------------- tools/dump-cov.zig | 16 +-- 8 files changed, 478 insertions(+), 165 deletions(-) create mode 100644 lib/std/debug/Coverage.zig (limited to 'lib/std/debug.zig') diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index ede3663cdc..0d968cd60d 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -218,6 +218,7 @@ const Fuzzer = struct { .read = true, .truncate = false, }); + defer coverage_file.close(); const n_bitset_elems = (flagged_pcs.len + 7) / 8; const bytes_len = @sizeOf(SeenPcsHeader) + flagged_pcs.len * @sizeOf(usize) + n_bitset_elems; const existing_len = coverage_file.getEndPos() catch |err| { diff --git a/lib/fuzzer/main.js b/lib/fuzzer/main.js index 71e6b5fa54..872ac3d4b5 100644 --- a/lib/fuzzer/main.js +++ b/lib/fuzzer/main.js @@ -12,6 +12,9 @@ const text_decoder = new TextDecoder(); const text_encoder = new TextEncoder(); + const eventSource = new EventSource("events"); + eventSource.addEventListener('message', onMessage, false); + WebAssembly.instantiateStreaming(wasm_promise, { js: { log: function(ptr, len) { @@ -38,11 +41,15 @@ }); }); + function onMessage(e) { + console.log("Message", e.data); + } + function render() { - domSectSource.classList.add("hidden"); + domSectSource.classList.add("hidden"); - // TODO this is temporary debugging data - renderSource("/home/andy/dev/zig/lib/std/zig/tokenizer.zig"); + // TODO this is temporary debugging data + renderSource("/home/andy/dev/zig/lib/std/zig/tokenizer.zig"); } function renderSource(path) { diff --git a/lib/std/Build/Fuzz.zig b/lib/std/Build/Fuzz.zig index 46d9bfc8fd..0ff82f3677 100644 --- a/lib/std/Build/Fuzz.zig +++ b/lib/std/Build/Fuzz.zig @@ -6,6 +6,7 @@ const assert = std.debug.assert; const fatal = std.process.fatal; const Allocator = std.mem.Allocator; const log = std.log; +const Coverage = std.debug.Coverage; const Fuzz = @This(); const build_runner = @import("root"); @@ -53,17 +54,30 @@ pub fn start( .global_cache_directory = global_cache_directory, .zig_lib_directory = zig_lib_directory, .zig_exe_path = zig_exe_path, - .msg_queue = .{}, - .mutex = .{}, .listen_address = listen_address, .fuzz_run_steps = fuzz_run_steps, + + .msg_queue = .{}, + .mutex = .{}, + .condition = .{}, + + .coverage_files = .{}, + .coverage_mutex = .{}, + .coverage_condition = .{}, }; + // For accepting HTTP connections. const web_server_thread = std.Thread.spawn(.{}, WebServer.run, .{&web_server}) catch |err| { fatal("unable to spawn web server thread: {s}", .{@errorName(err)}); }; defer web_server_thread.join(); + // For polling messages and sending updates to subscribers. + const coverage_thread = std.Thread.spawn(.{}, WebServer.coverageRun, .{&web_server}) catch |err| { + fatal("unable to spawn coverage thread: {s}", .{@errorName(err)}); + }; + defer coverage_thread.join(); + { const fuzz_node = prog_node.start("Fuzzing", fuzz_run_steps.len); defer fuzz_node.end(); @@ -88,14 +102,38 @@ pub const WebServer = struct { global_cache_directory: Build.Cache.Directory, zig_lib_directory: Build.Cache.Directory, zig_exe_path: []const u8, + listen_address: std.net.Address, + fuzz_run_steps: []const *Step.Run, + /// Messages from fuzz workers. Protected by mutex. msg_queue: std.ArrayListUnmanaged(Msg), + /// Protects `msg_queue` only. mutex: std.Thread.Mutex, - listen_address: std.net.Address, - fuzz_run_steps: []const *Step.Run, + /// Signaled when there is a message in `msg_queue`. + condition: std.Thread.Condition, + + coverage_files: std.AutoArrayHashMapUnmanaged(u64, CoverageMap), + /// Protects `coverage_files` only. + coverage_mutex: std.Thread.Mutex, + /// Signaled when `coverage_files` changes. + coverage_condition: std.Thread.Condition, + + const CoverageMap = struct { + mapped_memory: []align(std.mem.page_size) const u8, + coverage: Coverage, + + fn deinit(cm: *CoverageMap, gpa: Allocator) void { + std.posix.munmap(cm.mapped_memory); + cm.coverage.deinit(gpa); + cm.* = undefined; + } + }; const Msg = union(enum) { - coverage_id: u64, + coverage: struct { + id: u64, + run: *Step.Run, + }, }; fn run(ws: *WebServer) void { @@ -162,6 +200,10 @@ pub const WebServer = struct { std.mem.eql(u8, request.head.target, "/debug/sources.tar")) { try serveSourcesTar(ws, request); + } else if (std.mem.eql(u8, request.head.target, "/events") or + std.mem.eql(u8, request.head.target, "/debug/events")) + { + try serveEvents(ws, request); } else { try request.respond("not found", .{ .status = .not_found, @@ -384,6 +426,58 @@ pub const WebServer = struct { try file.writeAll(std.mem.asBytes(&header)); } + fn serveEvents(ws: *WebServer, request: *std.http.Server.Request) !void { + var send_buffer: [0x4000]u8 = undefined; + var response = request.respondStreaming(.{ + .send_buffer = &send_buffer, + .respond_options = .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = "text/event-stream" }, + }, + .transfer_encoding = .none, + }, + }); + + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + if (getStats(ws)) |stats| { + try response.writer().print("data: {d}\n\n", .{stats.n_runs}); + } else { + try response.writeAll("data: loading debug information\n\n"); + } + try response.flush(); + + while (true) { + ws.coverage_condition.timedWait(&ws.coverage_mutex, std.time.ns_per_ms * 500) catch {}; + if (getStats(ws)) |stats| { + try response.writer().print("data: {d}\n\n", .{stats.n_runs}); + try response.flush(); + } + } + } + + const Stats = struct { + n_runs: u64, + }; + + fn getStats(ws: *WebServer) ?Stats { + const coverage_maps = ws.coverage_files.values(); + if (coverage_maps.len == 0) return null; + // TODO: make each events URL correspond to one coverage map + const ptr = coverage_maps[0].mapped_memory; + const SeenPcsHeader = extern struct { + n_runs: usize, + deduplicated_runs: usize, + pcs_len: usize, + lowest_stack: usize, + }; + const header: *const SeenPcsHeader = @ptrCast(ptr[0..@sizeOf(SeenPcsHeader)]); + return .{ + .n_runs = @atomicLoad(usize, &header.n_runs, .monotonic), + }; + } + fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void { const gpa = ws.gpa; @@ -471,6 +565,95 @@ pub const WebServer = struct { .name = "cache-control", .value = "max-age=0, must-revalidate", }; + + fn coverageRun(ws: *WebServer) void { + ws.mutex.lock(); + defer ws.mutex.unlock(); + + while (true) { + ws.condition.wait(&ws.mutex); + for (ws.msg_queue.items) |msg| switch (msg) { + .coverage => |coverage| prepareTables(ws, coverage.run, coverage.id) catch |err| switch (err) { + error.AlreadyReported => continue, + else => |e| log.err("failed to prepare code coverage tables: {s}", .{@errorName(e)}), + }, + }; + ws.msg_queue.clearRetainingCapacity(); + } + } + + fn prepareTables( + ws: *WebServer, + run_step: *Step.Run, + coverage_id: u64, + ) error{ OutOfMemory, AlreadyReported }!void { + const gpa = ws.gpa; + + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + const gop = try ws.coverage_files.getOrPut(gpa, coverage_id); + if (gop.found_existing) { + // We are fuzzing the same executable with multiple threads. + // Perhaps the same unit test; perhaps a different one. In any + // case, since the coverage file is the same, we only have to + // notice changes to that one file in order to learn coverage for + // this particular executable. + return; + } + errdefer _ = ws.coverage_files.pop(); + + gop.value_ptr.* = .{ + .coverage = std.debug.Coverage.init, + .mapped_memory = undefined, // populated below + }; + errdefer gop.value_ptr.coverage.deinit(gpa); + + const rebuilt_exe_path: Build.Cache.Path = .{ + .root_dir = Build.Cache.Directory.cwd(), + .sub_path = run_step.rebuilt_executable.?, + }; + var debug_info = std.debug.Info.load(gpa, rebuilt_exe_path, &gop.value_ptr.coverage) catch |err| { + log.err("step '{s}': failed to load debug information for '{}': {s}", .{ + run_step.step.name, rebuilt_exe_path, @errorName(err), + }); + return error.AlreadyReported; + }; + defer debug_info.deinit(gpa); + + const coverage_file_path: Build.Cache.Path = .{ + .root_dir = run_step.step.owner.cache_root, + .sub_path = "v/" ++ std.fmt.hex(coverage_id), + }; + var coverage_file = coverage_file_path.root_dir.handle.openFile(coverage_file_path.sub_path, .{}) catch |err| { + log.err("step '{s}': failed to load coverage file '{}': {s}", .{ + run_step.step.name, coverage_file_path, @errorName(err), + }); + return error.AlreadyReported; + }; + defer coverage_file.close(); + + const file_size = coverage_file.getEndPos() catch |err| { + log.err("unable to check len of coverage file '{}': {s}", .{ coverage_file_path, @errorName(err) }); + return error.AlreadyReported; + }; + + const mapped_memory = std.posix.mmap( + null, + file_size, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + coverage_file.handle, + 0, + ) catch |err| { + log.err("failed to map coverage file '{}': {s}", .{ coverage_file_path, @errorName(err) }); + return error.AlreadyReported; + }; + + gop.value_ptr.mapped_memory = mapped_memory; + + ws.coverage_condition.broadcast(); + } }; fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog_node: std.Progress.Node) void { @@ -493,16 +676,16 @@ fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog build_runner.printErrorMessages(gpa, &compile.step, ttyconf, stderr, false) catch {}; } - if (result) |rebuilt_bin_path| { - run.rebuilt_executable = rebuilt_bin_path; - } else |err| switch (err) { - error.MakeFailed => {}, + const rebuilt_bin_path = result catch |err| switch (err) { + error.MakeFailed => return, else => { - std.debug.print("step '{s}': failed to rebuild in fuzz mode: {s}\n", .{ + log.err("step '{s}': failed to rebuild in fuzz mode: {s}", .{ compile.step.name, @errorName(err), }); + return; }, - } + }; + run.rebuilt_executable = rebuilt_bin_path; } fn fuzzWorkerRun( @@ -524,11 +707,13 @@ fn fuzzWorkerRun( std.debug.lockStdErr(); defer std.debug.unlockStdErr(); build_runner.printErrorMessages(gpa, &run.step, ttyconf, stderr, false) catch {}; + return; }, else => { - std.debug.print("step '{s}': failed to rebuild '{s}' in fuzz mode: {s}\n", .{ + log.err("step '{s}': failed to rerun '{s}' in fuzz mode: {s}", .{ run.step.name, test_name, @errorName(err), }); + return; }, }; } diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index e494e969f0..b08ecfee78 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -1521,7 +1521,11 @@ fn evalZigTest( { web_server.mutex.lock(); defer web_server.mutex.unlock(); - try web_server.msg_queue.append(web_server.gpa, .{ .coverage_id = coverage_id }); + try web_server.msg_queue.append(web_server.gpa, .{ .coverage = .{ + .id = coverage_id, + .run = run, + } }); + web_server.condition.signal(); } }, else => {}, // ignore other messages diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 7f4f6b7df2..a3a8a533ee 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -19,6 +19,7 @@ pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); pub const Info = @import("debug/Info.zig"); +pub const Coverage = @import("debug/Coverage.zig"); /// Unresolved source locations can be represented with a single `usize` that /// corresponds to a virtual memory address of the program counter. Combined diff --git a/lib/std/debug/Coverage.zig b/lib/std/debug/Coverage.zig new file mode 100644 index 0000000000..d9cc7fdebd --- /dev/null +++ b/lib/std/debug/Coverage.zig @@ -0,0 +1,244 @@ +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const Hash = std.hash.Wyhash; +const Dwarf = std.debug.Dwarf; +const assert = std.debug.assert; + +const Coverage = @This(); + +/// Provides a globally-scoped integer index for directories. +/// +/// As opposed to, for example, a directory index that is compilation-unit +/// scoped inside a single ELF module. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +directories: std.ArrayHashMapUnmanaged(String, void, String.MapContext, false), +/// Provides a globally-scoped integer index for files. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false), +string_bytes: std.ArrayListUnmanaged(u8), +/// Protects the other fields. +mutex: std.Thread.Mutex, + +pub const init: Coverage = .{ + .directories = .{}, + .files = .{}, + .mutex = .{}, + .string_bytes = .{}, +}; + +pub const String = enum(u32) { + _, + + pub const MapContext = struct { + string_bytes: []const u8, + + pub fn eql(self: @This(), a: String, b: String, b_index: usize) bool { + _ = b_index; + const a_slice = span(self.string_bytes[@intFromEnum(a)..]); + const b_slice = span(self.string_bytes[@intFromEnum(b)..]); + return std.mem.eql(u8, a_slice, b_slice); + } + + pub fn hash(self: @This(), a: String) u32 { + return @truncate(Hash.hash(0, span(self.string_bytes[@intFromEnum(a)..]))); + } + }; + + pub const SliceAdapter = struct { + string_bytes: []const u8, + + pub fn eql(self: @This(), a_slice: []const u8, b: String, b_index: usize) bool { + _ = b_index; + const b_slice = span(self.string_bytes[@intFromEnum(b)..]); + return std.mem.eql(u8, a_slice, b_slice); + } + pub fn hash(self: @This(), a: []const u8) u32 { + _ = self; + return @truncate(Hash.hash(0, a)); + } + }; +}; + +pub const SourceLocation = struct { + file: File.Index, + line: u32, + column: u32, + + pub const invalid: SourceLocation = .{ + .file = .invalid, + .line = 0, + .column = 0, + }; +}; + +pub const File = struct { + directory_index: u32, + basename: String, + + pub const Index = enum(u32) { + invalid = std.math.maxInt(u32), + _, + }; + + pub const MapContext = struct { + string_bytes: []const u8, + + pub fn hash(self: MapContext, a: File) u32 { + const a_basename = span(self.string_bytes[@intFromEnum(a.basename)..]); + return @truncate(Hash.hash(a.directory_index, a_basename)); + } + + pub fn eql(self: MapContext, a: File, b: File, b_index: usize) bool { + _ = b_index; + if (a.directory_index != b.directory_index) return false; + const a_basename = span(self.string_bytes[@intFromEnum(a.basename)..]); + const b_basename = span(self.string_bytes[@intFromEnum(b.basename)..]); + return std.mem.eql(u8, a_basename, b_basename); + } + }; + + pub const SliceAdapter = struct { + string_bytes: []const u8, + + pub const Entry = struct { + directory_index: u32, + basename: []const u8, + }; + + pub fn hash(self: @This(), a: Entry) u32 { + _ = self; + return @truncate(Hash.hash(a.directory_index, a.basename)); + } + + pub fn eql(self: @This(), a: Entry, b: File, b_index: usize) bool { + _ = b_index; + if (a.directory_index != b.directory_index) return false; + const b_basename = span(self.string_bytes[@intFromEnum(b.basename)..]); + return std.mem.eql(u8, a.basename, b_basename); + } + }; +}; + +pub fn deinit(cov: *Coverage, gpa: Allocator) void { + cov.directories.deinit(gpa); + cov.files.deinit(gpa); + cov.string_bytes.deinit(gpa); + cov.* = undefined; +} + +pub fn fileAt(cov: *Coverage, index: File.Index) *File { + return &cov.files.keys()[@intFromEnum(index)]; +} + +pub fn stringAt(cov: *Coverage, index: String) [:0]const u8 { + return span(cov.string_bytes.items[@intFromEnum(index)..]); +} + +pub const ResolveAddressesDwarfError = Dwarf.ScanError; + +pub fn resolveAddressesDwarf( + cov: *Coverage, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []SourceLocation, + d: *Dwarf, +) ResolveAddressesDwarfError!void { + assert(sorted_pc_addrs.len == output.len); + assert(d.compile_units_sorted); + + var cu_i: usize = 0; + var line_table_i: usize = 0; + var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0]; + var range = cu.pc_range.?; + // Protects directories and files tables from other threads. + cov.mutex.lock(); + defer cov.mutex.unlock(); + next_pc: for (sorted_pc_addrs, output) |pc, *out| { + while (pc >= range.end) { + cu_i += 1; + if (cu_i >= d.compile_unit_list.items.len) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + range = cu.pc_range orelse { + out.* = SourceLocation.invalid; + continue :next_pc; + }; + } + if (pc < range.start) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + if (line_table_i == 0) { + line_table_i = 1; + cov.mutex.unlock(); + defer cov.mutex.lock(); + d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + out.* = SourceLocation.invalid; + cu_i += 1; + if (cu_i < d.compile_unit_list.items.len) { + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + if (cu.pc_range) |r| range = r; + } + continue :next_pc; + }, + else => |e| return e, + }; + } + const slc = &cu.src_loc_cache.?; + const table_addrs = slc.line_table.keys(); + while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1; + + const entry = slc.line_table.values()[line_table_i - 1]; + const corrected_file_index = entry.file - @intFromBool(slc.version < 5); + const file_entry = slc.files[corrected_file_index]; + const dir_path = slc.directories[file_entry.dir_index].path; + try cov.string_bytes.ensureUnusedCapacity(gpa, dir_path.len + file_entry.path.len + 2); + const dir_gop = try cov.directories.getOrPutContextAdapted(gpa, dir_path, String.SliceAdapter{ + .string_bytes = cov.string_bytes.items, + }, String.MapContext{ + .string_bytes = cov.string_bytes.items, + }); + if (!dir_gop.found_existing) + dir_gop.key_ptr.* = addStringAssumeCapacity(cov, dir_path); + const file_gop = try cov.files.getOrPutContextAdapted(gpa, File.SliceAdapter.Entry{ + .directory_index = @intCast(dir_gop.index), + .basename = file_entry.path, + }, File.SliceAdapter{ + .string_bytes = cov.string_bytes.items, + }, File.MapContext{ + .string_bytes = cov.string_bytes.items, + }); + if (!file_gop.found_existing) file_gop.key_ptr.* = .{ + .directory_index = @intCast(dir_gop.index), + .basename = addStringAssumeCapacity(cov, file_entry.path), + }; + out.* = .{ + .file = @enumFromInt(file_gop.index), + .line = entry.line, + .column = entry.column, + }; + } +} + +pub fn addStringAssumeCapacity(cov: *Coverage, s: []const u8) String { + const result: String = @enumFromInt(cov.string_bytes.items.len); + cov.string_bytes.appendSliceAssumeCapacity(s); + cov.string_bytes.appendAssumeCapacity(0); + return result; +} + +fn span(s: []const u8) [:0]const u8 { + return std.mem.sliceTo(@as([:0]const u8, @ptrCast(s)), 0); +} diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index a52de6549b..ee191d2c12 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -12,85 +12,31 @@ const Path = std.Build.Cache.Path; const Dwarf = std.debug.Dwarf; const page_size = std.mem.page_size; const assert = std.debug.assert; -const Hash = std.hash.Wyhash; +const Coverage = std.debug.Coverage; +const SourceLocation = std.debug.Coverage.SourceLocation; const Info = @This(); /// Sorted by key, ascending. address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), - -/// Provides a globally-scoped integer index for directories. -/// -/// As opposed to, for example, a directory index that is compilation-unit -/// scoped inside a single ELF module. -/// -/// String memory references the memory-mapped debug information. -/// -/// Protected by `mutex`. -directories: std.StringArrayHashMapUnmanaged(void), -/// Provides a globally-scoped integer index for files. -/// -/// String memory references the memory-mapped debug information. -/// -/// Protected by `mutex`. -files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false), -/// Protects `directories` and `files`. -mutex: std.Thread.Mutex, - -pub const SourceLocation = struct { - file: File.Index, - line: u32, - column: u32, - - pub const invalid: SourceLocation = .{ - .file = .invalid, - .line = 0, - .column = 0, - }; -}; - -pub const File = struct { - directory_index: u32, - basename: []const u8, - - pub const Index = enum(u32) { - invalid = std.math.maxInt(u32), - _, - }; - - pub const MapContext = struct { - pub fn hash(ctx: MapContext, a: File) u32 { - _ = ctx; - return @truncate(Hash.hash(a.directory_index, a.basename)); - } - - pub fn eql(ctx: MapContext, a: File, b: File, b_index: usize) bool { - _ = ctx; - _ = b_index; - return a.directory_index == b.directory_index and std.mem.eql(u8, a.basename, b.basename); - } - }; -}; +/// Externally managed, outlives this `Info` instance. +coverage: *Coverage, pub const LoadError = Dwarf.ElfModule.LoadError; -pub fn load(gpa: Allocator, path: Path) LoadError!Info { +pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { var sections: Dwarf.SectionArray = Dwarf.null_section_array; var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); try elf_module.dwarf.sortCompileUnits(); var info: Info = .{ .address_map = .{}, - .directories = .{}, - .files = .{}, - .mutex = .{}, + .coverage = coverage, }; try info.address_map.put(gpa, elf_module.base_address, elf_module); return info; } pub fn deinit(info: *Info, gpa: Allocator) void { - info.directories.deinit(gpa); - info.files.deinit(gpa); for (info.address_map.values()) |*elf_module| { elf_module.dwarf.deinit(gpa); } @@ -98,98 +44,19 @@ pub fn deinit(info: *Info, gpa: Allocator) void { info.* = undefined; } -pub fn fileAt(info: *Info, index: File.Index) *File { - return &info.files.keys()[@intFromEnum(index)]; -} - -pub const ResolveSourceLocationsError = Dwarf.ScanError; +pub const ResolveAddressesError = Coverage.ResolveAddressesDwarfError; /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations. -pub fn resolveSourceLocations( +pub fn resolveAddresses( info: *Info, gpa: Allocator, sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []SourceLocation, -) ResolveSourceLocationsError!void { +) ResolveAddressesError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return resolveSourceLocationsDwarf(info, gpa, sorted_pc_addrs, output, &elf_module.dwarf); -} - -pub fn resolveSourceLocationsDwarf( - info: *Info, - gpa: Allocator, - sorted_pc_addrs: []const u64, - /// Asserts its length equals length of `sorted_pc_addrs`. - output: []SourceLocation, - d: *Dwarf, -) ResolveSourceLocationsError!void { - assert(sorted_pc_addrs.len == output.len); - assert(d.compile_units_sorted); - - var cu_i: usize = 0; - var line_table_i: usize = 0; - var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0]; - var range = cu.pc_range.?; - // Protects directories and files tables from other threads. - info.mutex.lock(); - defer info.mutex.unlock(); - next_pc: for (sorted_pc_addrs, output) |pc, *out| { - while (pc >= range.end) { - cu_i += 1; - if (cu_i >= d.compile_unit_list.items.len) { - out.* = SourceLocation.invalid; - continue :next_pc; - } - cu = &d.compile_unit_list.items[cu_i]; - line_table_i = 0; - range = cu.pc_range orelse { - out.* = SourceLocation.invalid; - continue :next_pc; - }; - } - if (pc < range.start) { - out.* = SourceLocation.invalid; - continue :next_pc; - } - if (line_table_i == 0) { - line_table_i = 1; - info.mutex.unlock(); - defer info.mutex.lock(); - d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - out.* = SourceLocation.invalid; - cu_i += 1; - if (cu_i < d.compile_unit_list.items.len) { - cu = &d.compile_unit_list.items[cu_i]; - line_table_i = 0; - if (cu.pc_range) |r| range = r; - } - continue :next_pc; - }, - else => |e| return e, - }; - } - const slc = &cu.src_loc_cache.?; - const table_addrs = slc.line_table.keys(); - while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1; - - const entry = slc.line_table.values()[line_table_i - 1]; - const corrected_file_index = entry.file - @intFromBool(slc.version < 5); - const file_entry = slc.files[corrected_file_index]; - const dir_path = slc.directories[file_entry.dir_index].path; - const dir_gop = try info.directories.getOrPut(gpa, dir_path); - const file_gop = try info.files.getOrPut(gpa, .{ - .directory_index = @intCast(dir_gop.index), - .basename = file_entry.path, - }); - out.* = .{ - .file = @enumFromInt(file_gop.index), - .line = entry.line, - .column = entry.column, - }; - } + return info.coverage.resolveAddressesDwarf(gpa, sorted_pc_addrs, output, &elf_module.dwarf); } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index bd096b9fc0..fb08907cad 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -28,7 +28,10 @@ pub fn main() !void { .sub_path = cov_file_name, }; - var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| { + var coverage = std.debug.Coverage.init; + defer coverage.deinit(gpa); + + var debug_info = std.debug.Info.load(gpa, exe_path, &coverage) catch |err| { fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) }); }; defer debug_info.deinit(gpa); @@ -50,14 +53,15 @@ pub fn main() !void { } assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); - const source_locations = try arena.alloc(std.debug.Info.SourceLocation, pcs.len); - try debug_info.resolveSourceLocations(gpa, pcs, source_locations); + const source_locations = try arena.alloc(std.debug.Coverage.SourceLocation, pcs.len); + try debug_info.resolveAddresses(gpa, pcs, source_locations); for (pcs, source_locations) |pc, sl| { - const file = debug_info.fileAt(sl.file); - const dir_name = debug_info.directories.keys()[file.directory_index]; + const file = debug_info.coverage.fileAt(sl.file); + const dir_name = debug_info.coverage.directories.keys()[file.directory_index]; + const dir_name_slice = debug_info.coverage.stringAt(dir_name); try stdout.print("{x}: {s}/{s}:{d}:{d}\n", .{ - pc, dir_name, file.basename, sl.line, sl.column, + pc, dir_name_slice, debug_info.coverage.stringAt(file.basename), sl.line, sl.column, }); } -- cgit v1.2.3