From 2e12b45d8b43d69e144887df4b04a2d383ff25d4 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 16:31:49 -0700 Subject: introduce tool for dumping coverage file with debug info resolved. begin efforts of providing `std.debug.Info`, a cross-platform abstraction for loading debug information into an in-memory format that supports queries such as "what is the source location of this virtual memory address?" Unlike `std.debug.SelfInfo`, this API does not assume the debug information in question happens to match the host CPU architecture, OS, or other target properties. --- lib/std/debug/Dwarf.zig | 394 ++++++++++++++++++++++++++++++++++++++++++--- lib/std/debug/Info.zig | 57 +++++++ lib/std/debug/SelfInfo.zig | 264 +++--------------------------- 3 files changed, 447 insertions(+), 268 deletions(-) create mode 100644 lib/std/debug/Info.zig (limited to 'lib/std/debug') diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 991c731549..3c150b3b18 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -12,6 +12,8 @@ const native_endian = builtin.cpu.arch.endian(); const std = @import("../std.zig"); const Allocator = std.mem.Allocator; +const elf = std.elf; +const mem = std.mem; const DW = std.dwarf; const AT = DW.AT; const EH = DW.EH; @@ -22,8 +24,8 @@ const UT = DW.UT; const assert = std.debug.assert; const cast = std.math.cast; const maxInt = std.math.maxInt; -const readInt = std.mem.readInt; const MemoryAccessor = std.debug.MemoryAccessor; +const Path = std.Build.Cache.Path; /// Did I mention this is deprecated? const DeprecatedFixedBufferReader = std.debug.DeprecatedFixedBufferReader; @@ -252,13 +254,13 @@ pub const Die = struct { .@"32" => { const byte_offset = compile_unit.str_offsets_base + 4 * index; if (byte_offset + 4 > debug_str_offsets.len) return bad(); - const offset = readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); + const offset = mem.readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); return getStringGeneric(opt_str, offset); }, .@"64" => { const byte_offset = compile_unit.str_offsets_base + 8 * index; if (byte_offset + 8 > debug_str_offsets.len) return bad(); - const offset = readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); + const offset = mem.readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); return getStringGeneric(opt_str, offset); }, } @@ -721,12 +723,14 @@ const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); pub const SectionArray = [num_sections]?Section; pub const null_section_array = [_]?Section{null} ** num_sections; +pub const OpenError = ScanError; + /// Initialize DWARF info. The caller has the responsibility to initialize most /// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the /// main binary file (not the secondary debug info file). -pub fn open(di: *Dwarf, allocator: Allocator) !void { - try di.scanAllFunctions(allocator); - try di.scanAllCompileUnits(allocator); +pub fn open(di: *Dwarf, gpa: Allocator) OpenError!void { + try di.scanAllFunctions(gpa); + try di.scanAllCompileUnits(gpa); } const PcRange = struct { @@ -747,21 +751,21 @@ pub fn sectionVirtualOffset(di: Dwarf, dwarf_section: Section.Id, base_address: return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.virtualOffset(base_address) else null; } -pub fn deinit(di: *Dwarf, allocator: Allocator) void { +pub fn deinit(di: *Dwarf, gpa: Allocator) void { for (di.sections) |opt_section| { - if (opt_section) |s| if (s.owned) allocator.free(s.data); + if (opt_section) |s| if (s.owned) gpa.free(s.data); } for (di.abbrev_table_list.items) |*abbrev| { - abbrev.deinit(allocator); + abbrev.deinit(gpa); } - di.abbrev_table_list.deinit(allocator); + di.abbrev_table_list.deinit(gpa); for (di.compile_unit_list.items) |*cu| { - cu.die.deinit(allocator); + cu.die.deinit(gpa); } - di.compile_unit_list.deinit(allocator); - di.func_list.deinit(allocator); - di.cie_map.deinit(allocator); - di.fde_list.deinit(allocator); + di.compile_unit_list.deinit(gpa); + di.func_list.deinit(gpa); + di.cie_map.deinit(gpa); + di.fde_list.deinit(gpa); di.* = undefined; } @@ -777,7 +781,12 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { return null; } -fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { +const ScanError = error{ + InvalidDebugInfo, + MissingDebugInfo, +} || Allocator.Error || std.debug.DeprecatedFixedBufferReader.Error; + +fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; @@ -964,7 +973,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { } } -fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { +fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; @@ -1070,13 +1079,13 @@ const DebugRangeIterator = struct { .@"32" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx)); if (offset_loc + 4 > debug_ranges.len) return bad(); - const offset = readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); + const offset = mem.readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); break :off compile_unit.rnglists_base + offset; }, .@"64" => { const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx)); if (offset_loc + 8 > debug_ranges.len) return bad(); - const offset = readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); + const offset = mem.readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); break :off compile_unit.rnglists_base + offset; }, } @@ -1287,7 +1296,7 @@ fn parseDie( attrs_buf: []Die.Attr, abbrev_table: *const Abbrev.Table, format: Format, -) !?Die { +) ScanError!?Die { const abbrev_code = try fbr.readUleb128(u64); if (abbrev_code == 0) return null; const table_entry = abbrev_table.get(abbrev_code) orelse return bad(); @@ -1588,7 +1597,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { // The header is 8 or 12 bytes depending on is_64. if (compile_unit.addr_base < 8) return bad(); - const version = readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); + const version = mem.readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); if (version != 5) return bad(); const addr_size = debug_addr[compile_unit.addr_base - 2]; @@ -1598,9 +1607,9 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { if (byte_offset + addr_size > debug_addr.len) return bad(); return switch (addr_size) { 1 => debug_addr[byte_offset], - 2 => readInt(u16, debug_addr[byte_offset..][0..2], di.endian), - 4 => readInt(u32, debug_addr[byte_offset..][0..4], di.endian), - 8 => readInt(u64, debug_addr[byte_offset..][0..8], di.endian), + 2 => mem.readInt(u16, debug_addr[byte_offset..][0..2], di.endian), + 4 => mem.readInt(u32, debug_addr[byte_offset..][0..4], di.endian), + 8 => mem.readInt(u64, debug_addr[byte_offset..][0..8], di.endian), else => bad(), }; } @@ -1699,7 +1708,7 @@ fn parseFormValue( form_id: u64, format: Format, implicit_const: ?i64, -) anyerror!FormValue { +) ScanError!FormValue { return switch (form_id) { FORM.addr => .{ .addr = try fbr.readAddress(switch (@bitSizeOf(usize)) { 32 => .@"32", @@ -1892,7 +1901,8 @@ const UnitHeader = struct { header_length: u4, unit_length: u64, }; -fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) !UnitHeader { + +fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) ScanError!UnitHeader { return switch (try if (opt_ma) |ma| fbr.readIntChecked(u32, ma) else fbr.readInt(u32)) { 0...0xfffffff0 - 1 => |unit_length| .{ .format = .@"32", @@ -2023,3 +2033,335 @@ fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); } } + +pub const ElfModule = struct { + base_address: usize, + dwarf: Dwarf, + mapped_memory: []align(std.mem.page_size) const u8, + external_mapped_memory: ?[]align(std.mem.page_size) const u8, + + pub fn deinit(self: *@This(), allocator: Allocator) void { + self.dwarf.deinit(allocator); + std.posix.munmap(self.mapped_memory); + if (self.external_mapped_memory) |m| std.posix.munmap(m); + } + + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { + // Translate the VA into an address into this object + const relocated_address = address - self.base_address; + return self.dwarf.getSymbol(allocator, relocated_address); + } + + pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { + _ = allocator; + _ = address; + return &self.dwarf; + } + + pub const LoadError = error{ + InvalidDebugInfo, + MissingDebugInfo, + InvalidElfMagic, + InvalidElfVersion, + InvalidElfEndian, + /// TODO: implement this and then remove this error code + UnimplementedDwarfForeignEndian, + /// The debug info may be valid but this implementation uses memory + /// mapping which limits things to usize. If the target debug info is + /// 64-bit and host is 32-bit, there may be debug info that is not + /// supportable using this method. + Overflow, + + PermissionDenied, + LockedMemoryLimitExceeded, + MemoryMappingNotSupported, + } || Allocator.Error || std.fs.File.OpenError || OpenError; + + /// Reads debug info from an already mapped ELF file. + /// + /// If the required sections aren't present but a reference to external debug + /// info is, then this this function will recurse to attempt to load the debug + /// sections from an external file. + pub fn load( + gpa: Allocator, + mapped_mem: []align(std.mem.page_size) const u8, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(std.mem.page_size) const u8, + elf_filename: ?[]const u8, + ) LoadError!Dwarf.ElfModule { + if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; + + const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); + if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; + if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; + + const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { + elf.ELFDATA2LSB => .little, + elf.ELFDATA2MSB => .big, + else => return error.InvalidElfEndian, + }; + if (endian != native_endian) return error.UnimplementedDwarfForeignEndian; + + const shoff = hdr.e_shoff; + const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); + const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[cast(usize, str_section_off) orelse return error.Overflow])); + const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; + const shdrs = @as( + [*]const elf.Shdr, + @ptrCast(@alignCast(&mapped_mem[shoff])), + )[0..hdr.e_shnum]; + + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + + // Combine section list. This takes ownership over any owned sections from the parent scope. + for (parent_sections, §ions) |*parent, *section_elem| { + if (parent.*) |*p| { + section_elem.* = p.*; + p.owned = false; + } + } + errdefer for (sections) |opt_section| if (opt_section) |s| if (s.owned) gpa.free(s.data); + + var separate_debug_filename: ?[]const u8 = null; + var separate_debug_crc: ?u32 = null; + + for (shdrs) |*shdr| { + if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; + const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); + + if (mem.eql(u8, name, ".gnu_debuglink")) { + const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); + const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); + const crc_bytes = gnu_debuglink[crc_offset..][0..4]; + separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); + separate_debug_filename = debug_filename; + continue; + } + + var section_index: ?usize = null; + inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |sect, i| { + if (mem.eql(u8, "." ++ sect.name, name)) section_index = i; + } + if (section_index == null) continue; + if (sections[section_index.?] != null) continue; + + const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); + sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { + var section_stream = std.io.fixedBufferStream(section_bytes); + const section_reader = section_stream.reader(); + const chdr = section_reader.readStruct(elf.Chdr) catch continue; + if (chdr.ch_type != .ZLIB) continue; + + var zlib_stream = std.compress.zlib.decompressor(section_reader); + + const decompressed_section = try gpa.alloc(u8, chdr.ch_size); + errdefer gpa.free(decompressed_section); + + const read = zlib_stream.reader().readAll(decompressed_section) catch continue; + assert(read == decompressed_section.len); + + break :blk .{ + .data = decompressed_section, + .virtual_address = shdr.sh_addr, + .owned = true, + }; + } else .{ + .data = section_bytes, + .virtual_address = shdr.sh_addr, + .owned = false, + }; + } + + const missing_debug_info = + sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or + sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; + + // Attempt to load debug info from an external file + // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html + if (missing_debug_info) { + + // Only allow one level of debug info nesting + if (parent_mapped_mem) |_| { + return error.MissingDebugInfo; + } + + const global_debug_directories = [_][]const u8{ + "/usr/lib/debug", + }; + + // /.build-id/<2-character id prefix>/.debug + if (build_id) |id| blk: { + if (id.len < 3) break :blk; + + // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice + const extension = ".debug"; + var id_prefix_buf: [2]u8 = undefined; + var filename_buf: [38 + extension.len]u8 = undefined; + + _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; + const filename = std.fmt.bufPrint( + &filename_buf, + "{s}" ++ extension, + .{std.fmt.fmtSliceHexLower(id[1..])}, + ) catch break :blk; + + for (global_debug_directories) |global_directory| { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ + global_directory, ".build-id", &id_prefix_buf, filename, + }), + }; + defer gpa.free(path.sub_path); + + return loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; + } + } + + // use the path from .gnu_debuglink, in the same search order as gdb + if (separate_debug_filename) |separate_filename| blk: { + if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) + return error.MissingDebugInfo; + + // / + if (loadPath( + gpa, + .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = separate_filename, + }, + null, + separate_debug_crc, + §ions, + mapped_mem, + )) |debug_info| { + return debug_info; + } else |_| {} + + // /.debug/ + { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ ".debug", separate_filename }), + }; + defer gpa.free(path.sub_path); + + if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + + var cwd_buf: [std.fs.max_path_bytes]u8 = undefined; + const cwd_path = std.posix.realpath(".", &cwd_buf) catch break :blk; + + // // + for (global_debug_directories) |global_directory| { + const path: Path = .{ + .root_dir = std.Build.Cache.Directory.cwd(), + .sub_path = try std.fs.path.join(gpa, &.{ global_directory, cwd_path, separate_filename }), + }; + defer gpa.free(path.sub_path); + if (loadPath(gpa, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} + } + } + + return error.MissingDebugInfo; + } + + var di: Dwarf = .{ + .endian = endian, + .sections = sections, + .is_macho = false, + }; + + try Dwarf.open(&di, gpa); + + return .{ + .base_address = 0, + .dwarf = di, + .mapped_memory = parent_mapped_mem orelse mapped_mem, + .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, + }; + } + + pub fn loadPath( + gpa: Allocator, + elf_file_path: Path, + build_id: ?[]const u8, + expected_crc: ?u32, + parent_sections: *Dwarf.SectionArray, + parent_mapped_mem: ?[]align(std.mem.page_size) const u8, + ) LoadError!Dwarf.ElfModule { + const elf_file = elf_file_path.root_dir.handle.openFile(elf_file_path.sub_path, .{}) catch |err| switch (err) { + error.FileNotFound => return missing(), + else => return err, + }; + defer elf_file.close(); + + const end_pos = elf_file.getEndPos() catch return bad(); + const file_len = cast(usize, end_pos) orelse return error.Overflow; + + const mapped_mem = try std.posix.mmap( + null, + file_len, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + elf_file.handle, + 0, + ); + errdefer std.posix.munmap(mapped_mem); + + return load( + gpa, + mapped_mem, + build_id, + expected_crc, + parent_sections, + parent_mapped_mem, + elf_file_path.sub_path, + ); + } +}; + +/// Given an array of virtual memory addresses, sorted ascending, outputs a +/// corresponding array of source locations, by appending to the provided +/// array list. +pub fn resolveSourceLocations( + d: *Dwarf, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []std.debug.SourceLocation, +) error{ MissingDebugInfo, InvalidDebugInfo }!void { + assert(sorted_pc_addrs.len == output.len); + _ = d; + _ = gpa; + @panic("TODO"); +} + +fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { + if (di.findCompileUnit(address)) |compile_unit| { + return .{ + .name = di.getSymbolName(address) orelse "???", + .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => "???", + }, + .source_location = di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => null, + else => return err, + }, + }; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => return .{}, + else => return err, + } +} + +pub fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { + const start = cast(usize, offset) orelse return error.Overflow; + const end = start + (cast(usize, size) orelse return error.Overflow); + return ptr[start..end]; +} diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig new file mode 100644 index 0000000000..5276ba68ec --- /dev/null +++ b/lib/std/debug/Info.zig @@ -0,0 +1,57 @@ +//! Cross-platform abstraction for loading debug information into an in-memory +//! format that supports queries such as "what is the source location of this +//! virtual memory address?" +//! +//! Unlike `std.debug.SelfInfo`, this API does not assume the debug information +//! in question happens to match the host CPU architecture, OS, or other target +//! properties. + +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const Path = std.Build.Cache.Path; +const Dwarf = std.debug.Dwarf; +const page_size = std.mem.page_size; +const assert = std.debug.assert; + +const Info = @This(); + +/// Sorted by key, ascending. +address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), + +pub const LoadError = Dwarf.ElfModule.LoadError; + +pub fn load(gpa: Allocator, path: Path) LoadError!Info { + var sections: Dwarf.SectionArray = Dwarf.null_section_array; + const elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + var info: Info = .{ + .address_map = .{}, + }; + try info.address_map.put(gpa, elf_module.base_address, elf_module); + return info; +} + +pub fn deinit(info: *Info, gpa: Allocator) void { + for (info.address_map.values()) |*elf_module| { + elf_module.dwarf.deinit(gpa); + } + info.address_map.deinit(gpa); + info.* = undefined; +} + +pub const ResolveSourceLocationsError = error{ + MissingDebugInfo, + InvalidDebugInfo, +} || Allocator.Error; + +pub fn resolveSourceLocations( + info: *Info, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []std.debug.SourceLocation, +) ResolveSourceLocationsError!void { + assert(sorted_pc_addrs.len == output.len); + if (info.address_map.entries.len != 1) @panic("TODO"); + const elf_module = &info.address_map.values()[0]; + return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output); +} diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index f9747a088e..79cbd19a41 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -587,7 +587,7 @@ pub const Module = switch (native_os) { } if (section_index == null) continue; - const section_bytes = try chopSlice(mapped_mem, sect.offset, sect.size); + const section_bytes = try Dwarf.chopSlice(mapped_mem, sect.offset, sect.size); sections[section_index.?] = .{ .data = section_bytes, .virtual_address = sect.addr, @@ -622,7 +622,7 @@ pub const Module = switch (native_os) { return result.value_ptr; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !Dwarf.SymbolInfo { nosuspend { const result = try self.getOFileInfoForAddress(allocator, address); if (result.symbol == null) return .{}; @@ -641,7 +641,7 @@ pub const Module = switch (native_os) { const addr_off = result.relocated_address - result.symbol.?.addr; const o_file_di = &result.o_file_info.?.di; if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { - return SymbolInfo{ + return .{ .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", .compile_unit_name = compile_unit.die.getAttrString( o_file_di, @@ -662,7 +662,7 @@ pub const Module = switch (native_os) { }; } else |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{ .symbol_name = stab_symbol }; + return .{ .symbol_name = stab_symbol }; }, else => return err, } @@ -729,7 +729,7 @@ pub const Module = switch (native_os) { } } - fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?SymbolInfo { + fn getSymbolFromPdb(self: *@This(), relocated_address: usize) !?std.debug.Symbol { var coff_section: *align(1) const coff.SectionHeader = undefined; const mod_index = for (self.pdb.?.sect_contribs) |sect_contrib| { if (sect_contrib.Section > self.coff_section_headers.len) continue; @@ -759,14 +759,14 @@ pub const Module = switch (native_os) { relocated_address - coff_section.virtual_address, ); - return SymbolInfo{ + return .{ .symbol_name = symbol_name, .compile_unit_name = obj_basename, .line_info = opt_line_info, }; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { // Translate the VA into an address into this object const relocated_address = address - self.base_address; @@ -776,10 +776,10 @@ pub const Module = switch (native_os) { if (self.dwarf) |*dwarf| { const dwarf_address = relocated_address + self.coff_image_base; - return getSymbolFromDwarf(allocator, dwarf_address, dwarf); + return dwarf.getSymbol(allocator, dwarf_address); } - return SymbolInfo{}; + return .{}; } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { @@ -792,41 +792,18 @@ pub const Module = switch (native_os) { }; } }, - .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => struct { - base_address: usize, - dwarf: Dwarf, - mapped_memory: []align(mem.page_size) const u8, - external_mapped_memory: ?[]align(mem.page_size) const u8, - - pub fn deinit(self: *@This(), allocator: Allocator) void { - self.dwarf.deinit(allocator); - posix.munmap(self.mapped_memory); - if (self.external_mapped_memory) |m| posix.munmap(m); - } - - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { - // Translate the VA into an address into this object - const relocated_address = address - self.base_address; - return getSymbolFromDwarf(allocator, relocated_address, &self.dwarf); - } - - pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { - _ = allocator; - _ = address; - return &self.dwarf; - } - }, + .linux, .netbsd, .freebsd, .dragonfly, .openbsd, .haiku, .solaris, .illumos => Dwarf.ElfModule, .wasi, .emscripten => struct { pub fn deinit(self: *@This(), allocator: Allocator) void { _ = self; _ = allocator; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { _ = self; _ = allocator; _ = address; - return SymbolInfo{}; + return .{}; } pub fn getDwarfInfoForAddress(self: *@This(), allocator: Allocator, address: usize) !?*const Dwarf { @@ -1068,7 +1045,7 @@ pub fn readElfDebugInfo( expected_crc: ?u32, parent_sections: *Dwarf.SectionArray, parent_mapped_mem: ?[]align(mem.page_size) const u8, -) !Module { +) !Dwarf.ElfModule { nosuspend { const elf_file = (if (elf_filename) |filename| blk: { break :blk fs.cwd().openFile(filename, .{}); @@ -1078,176 +1055,15 @@ pub fn readElfDebugInfo( }; const mapped_mem = try mapWholeFile(elf_file); - if (expected_crc) |crc| if (crc != std.hash.crc.Crc32.hash(mapped_mem)) return error.InvalidDebugInfo; - - const hdr: *const elf.Ehdr = @ptrCast(&mapped_mem[0]); - if (!mem.eql(u8, hdr.e_ident[0..4], elf.MAGIC)) return error.InvalidElfMagic; - if (hdr.e_ident[elf.EI_VERSION] != 1) return error.InvalidElfVersion; - - const endian: std.builtin.Endian = switch (hdr.e_ident[elf.EI_DATA]) { - elf.ELFDATA2LSB => .little, - elf.ELFDATA2MSB => .big, - else => return error.InvalidElfEndian, - }; - assert(endian == native_endian); // this is our own debug info - - const shoff = hdr.e_shoff; - const str_section_off = shoff + @as(u64, hdr.e_shentsize) * @as(u64, hdr.e_shstrndx); - const str_shdr: *const elf.Shdr = @ptrCast(@alignCast(&mapped_mem[math.cast(usize, str_section_off) orelse return error.Overflow])); - const header_strings = mapped_mem[str_shdr.sh_offset..][0..str_shdr.sh_size]; - const shdrs = @as( - [*]const elf.Shdr, - @ptrCast(@alignCast(&mapped_mem[shoff])), - )[0..hdr.e_shnum]; - - var sections: Dwarf.SectionArray = Dwarf.null_section_array; - - // Combine section list. This takes ownership over any owned sections from the parent scope. - for (parent_sections, §ions) |*parent, *section| { - if (parent.*) |*p| { - section.* = p.*; - p.owned = false; - } - } - errdefer for (sections) |section| if (section) |s| if (s.owned) allocator.free(s.data); - - var separate_debug_filename: ?[]const u8 = null; - var separate_debug_crc: ?u32 = null; - - for (shdrs) |*shdr| { - if (shdr.sh_type == elf.SHT_NULL or shdr.sh_type == elf.SHT_NOBITS) continue; - const name = mem.sliceTo(header_strings[shdr.sh_name..], 0); - - if (mem.eql(u8, name, ".gnu_debuglink")) { - const gnu_debuglink = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - const debug_filename = mem.sliceTo(@as([*:0]const u8, @ptrCast(gnu_debuglink.ptr)), 0); - const crc_offset = mem.alignForward(usize, @intFromPtr(&debug_filename[debug_filename.len]) + 1, 4) - @intFromPtr(gnu_debuglink.ptr); - const crc_bytes = gnu_debuglink[crc_offset..][0..4]; - separate_debug_crc = mem.readInt(u32, crc_bytes, native_endian); - separate_debug_filename = debug_filename; - continue; - } - - var section_index: ?usize = null; - inline for (@typeInfo(Dwarf.Section.Id).Enum.fields, 0..) |section, i| { - if (mem.eql(u8, "." ++ section.name, name)) section_index = i; - } - if (section_index == null) continue; - if (sections[section_index.?] != null) continue; - - const section_bytes = try chopSlice(mapped_mem, shdr.sh_offset, shdr.sh_size); - sections[section_index.?] = if ((shdr.sh_flags & elf.SHF_COMPRESSED) > 0) blk: { - var section_stream = std.io.fixedBufferStream(section_bytes); - var section_reader = section_stream.reader(); - const chdr = section_reader.readStruct(elf.Chdr) catch continue; - if (chdr.ch_type != .ZLIB) continue; - - var zlib_stream = std.compress.zlib.decompressor(section_stream.reader()); - - const decompressed_section = try allocator.alloc(u8, chdr.ch_size); - errdefer allocator.free(decompressed_section); - - const read = zlib_stream.reader().readAll(decompressed_section) catch continue; - assert(read == decompressed_section.len); - - break :blk .{ - .data = decompressed_section, - .virtual_address = shdr.sh_addr, - .owned = true, - }; - } else .{ - .data = section_bytes, - .virtual_address = shdr.sh_addr, - .owned = false, - }; - } - - const missing_debug_info = - sections[@intFromEnum(Dwarf.Section.Id.debug_info)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_abbrev)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_str)] == null or - sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; - - // Attempt to load debug info from an external file - // See: https://sourceware.org/gdb/onlinedocs/gdb/Separate-Debug-Files.html - if (missing_debug_info) { - - // Only allow one level of debug info nesting - if (parent_mapped_mem) |_| { - return error.MissingDebugInfo; - } - - const global_debug_directories = [_][]const u8{ - "/usr/lib/debug", - }; - - // /.build-id/<2-character id prefix>/.debug - if (build_id) |id| blk: { - if (id.len < 3) break :blk; - - // Either md5 (16 bytes) or sha1 (20 bytes) are used here in practice - const extension = ".debug"; - var id_prefix_buf: [2]u8 = undefined; - var filename_buf: [38 + extension.len]u8 = undefined; - - _ = std.fmt.bufPrint(&id_prefix_buf, "{s}", .{std.fmt.fmtSliceHexLower(id[0..1])}) catch unreachable; - const filename = std.fmt.bufPrint( - &filename_buf, - "{s}" ++ extension, - .{std.fmt.fmtSliceHexLower(id[1..])}, - ) catch break :blk; - - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, ".build-id", &id_prefix_buf, filename }); - defer allocator.free(path); - - return readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem) catch continue; - } - } - - // use the path from .gnu_debuglink, in the same search order as gdb - if (separate_debug_filename) |separate_filename| blk: { - if (elf_filename != null and mem.eql(u8, elf_filename.?, separate_filename)) return error.MissingDebugInfo; - - // / - if (readElfDebugInfo(allocator, separate_filename, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - - // /.debug/ - { - const path = try fs.path.join(allocator, &.{ ".debug", separate_filename }); - defer allocator.free(path); - - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - - var cwd_buf: [fs.max_path_bytes]u8 = undefined; - const cwd_path = posix.realpath(".", &cwd_buf) catch break :blk; - - // // - for (global_debug_directories) |global_directory| { - const path = try fs.path.join(allocator, &.{ global_directory, cwd_path, separate_filename }); - defer allocator.free(path); - if (readElfDebugInfo(allocator, path, null, separate_debug_crc, §ions, mapped_mem)) |debug_info| return debug_info else |_| {} - } - } - - return error.MissingDebugInfo; - } - - var di = Dwarf{ - .endian = endian, - .sections = sections, - .is_macho = false, - }; - - try Dwarf.open(&di, allocator); - - return .{ - .base_address = undefined, - .dwarf = di, - .mapped_memory = parent_mapped_mem orelse mapped_mem, - .external_mapped_memory = if (parent_mapped_mem != null) mapped_mem else null, - }; + return Dwarf.ElfModule.load( + allocator, + mapped_mem, + build_id, + expected_crc, + parent_sections, + parent_mapped_mem, + elf_filename, + ); } } @@ -1289,22 +1105,6 @@ fn mapWholeFile(file: File) ![]align(mem.page_size) const u8 { } } -fn chopSlice(ptr: []const u8, offset: u64, size: u64) error{Overflow}![]const u8 { - const start = math.cast(usize, offset) orelse return error.Overflow; - const end = start + (math.cast(usize, size) orelse return error.Overflow); - return ptr[start..end]; -} - -pub const SymbolInfo = struct { - symbol_name: []const u8 = "???", - compile_unit_name: []const u8 = "???", - line_info: ?std.debug.SourceLocation = null, - - pub fn deinit(self: SymbolInfo, allocator: Allocator) void { - if (self.line_info) |li| allocator.free(li.file_name); - } -}; - fn machoSearchSymbols(symbols: []const MachoSymbol, address: usize) ?*const MachoSymbol { var min: usize = 0; var max: usize = symbols.len - 1; @@ -1350,26 +1150,6 @@ test machoSearchSymbols { try testing.expectEqual(&symbols[2], machoSearchSymbols(&symbols, 5000).?); } -fn getSymbolFromDwarf(allocator: Allocator, address: u64, di: *Dwarf) !SymbolInfo { - if (nosuspend di.findCompileUnit(address)) |compile_unit| { - return SymbolInfo{ - .symbol_name = nosuspend di.getSymbolName(address) orelse "???", - .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => "???", - }, - .line_info = nosuspend di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => null, - else => return err, - }, - }; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - return SymbolInfo{}; - }, - else => return err, - } -} - /// Unwind a frame using MachO compact unwind info (from __unwind_info). /// If the compact encoding can't encode a way to unwind a frame, it will /// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. -- cgit v1.2.3 From de47acd732dca8b4d2f2b3559307f488ccac940d Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 17:45:31 -0700 Subject: code coverage dumping tool basic implementation * std.debug.Dwarf: add `sortCompileUnits` along with a field to track the state for the purpose of assertions and correct API usage. This makes batch lookups faster. - in the future, findCompileUnit should be enhanced to rely on sorted compile units as well. * implement `std.debug.Dwarf.resolveSourceLocations` as well as `std.debug.Info.resolveSourceLocations`. It's still pretty slow, since it calls getLineNumberInfo for each array element, repeating a lot of work unnecessarily. * integrate these APIs with `std.Progress` to understand what is taking so long. The output I'm seeing from this tool shows a lot of missing source locations. In particular, the main area of interest is missing for my tokenizer fuzzing example. --- lib/std/debug.zig | 6 ++++ lib/std/debug/Dwarf.zig | 85 +++++++++++++++++++++++++++++++++++++++++++++---- lib/std/debug/Info.zig | 17 ++++++---- tools/dump-cov.zig | 10 ++++-- 4 files changed, 102 insertions(+), 16 deletions(-) (limited to 'lib/std/debug') diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 907f7711a7..6d034146c3 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -27,6 +27,12 @@ pub const SourceLocation = struct { line: u64, column: u64, file_name: []const u8, + + pub const invalid: SourceLocation = .{ + .line = 0, + .column = 0, + .file_name = &.{}, + }; }; pub const Symbol = struct { diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 3c150b3b18..170fa774c0 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -39,6 +39,7 @@ pub const call_frame = @import("Dwarf/call_frame.zig"); endian: std.builtin.Endian, sections: SectionArray = null_section_array, is_macho: bool, +compile_units_sorted: bool, // Filled later by the initializer abbrev_table_list: std.ArrayListUnmanaged(Abbrev.Table) = .{}, @@ -728,9 +729,9 @@ pub const OpenError = ScanError; /// Initialize DWARF info. The caller has the responsibility to initialize most /// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the /// main binary file (not the secondary debug info file). -pub fn open(di: *Dwarf, gpa: Allocator) OpenError!void { - try di.scanAllFunctions(gpa); - try di.scanAllCompileUnits(gpa); +pub fn open(d: *Dwarf, gpa: Allocator) OpenError!void { + try d.scanAllFunctions(gpa); + try d.scanAllCompileUnits(gpa); } const PcRange = struct { @@ -1061,6 +1062,39 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { } } +/// Populate missing PC ranges in compilation units, and then sort them by start address. +/// Does not guarantee pc_range to be non-null because there could be missing debug info. +pub fn sortCompileUnits(d: *Dwarf) ScanError!void { + assert(!d.compile_units_sorted); + + for (d.compile_unit_list.items) |*cu| { + if (cu.pc_range != null) continue; + const ranges_value = cu.die.getAttr(AT.ranges) orelse continue; + var iter = DebugRangeIterator.init(ranges_value, d, cu) catch continue; + var start: u64 = maxInt(u64); + var end: u64 = 0; + while (try iter.next()) |range| { + start = @min(start, range.start_addr); + end = @max(end, range.end_addr); + } + if (end != 0) cu.pc_range = .{ + .start = start, + .end = end, + }; + } + + std.mem.sortUnstable(CompileUnit, d.compile_unit_list.items, {}, struct { + fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool { + _ = ctx; + const a_range = a.pc_range orelse return false; + const b_range = b.pc_range orelse return true; + return a_range.start < b_range.start; + } + }.lessThan); + + d.compile_units_sorted = true; +} + const DebugRangeIterator = struct { base_address: u64, section_type: Section.Id, @@ -1208,6 +1242,7 @@ const DebugRangeIterator = struct { } }; +/// TODO: change this to binary searching the sorted compile unit list pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { @@ -2275,6 +2310,7 @@ pub const ElfModule = struct { .endian = endian, .sections = sections, .is_macho = false, + .compile_units_sorted = false, }; try Dwarf.open(&di, gpa); @@ -2326,6 +2362,8 @@ pub const ElfModule = struct { } }; +pub const ResolveSourceLocationsError = Allocator.Error || DeprecatedFixedBufferReader.Error; + /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations, by appending to the provided /// array list. @@ -2335,11 +2373,44 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, -) error{ MissingDebugInfo, InvalidDebugInfo }!void { + parent_prog_node: std.Progress.Node, +) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); - _ = d; - _ = gpa; - @panic("TODO"); + assert(d.compile_units_sorted); + + const prog_node = parent_prog_node.start("Resolve Source Locations", sorted_pc_addrs.len); + defer prog_node.end(); + + var cu_i: usize = 0; + var cu: *const CompileUnit = &d.compile_unit_list.items[0]; + var range = cu.pc_range.?; + next_pc: for (sorted_pc_addrs, output) |pc, *out| { + defer prog_node.completeOne(); + while (pc >= range.end) { + cu_i += 1; + if (cu_i >= d.compile_unit_list.items.len) { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + } + cu = &d.compile_unit_list.items[cu_i]; + range = cu.pc_range orelse { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + }; + } + if (pc < range.start) { + out.* = std.debug.SourceLocation.invalid; + continue :next_pc; + } + // TODO: instead of calling this function, break the function up into one that parses the + // information once and prepares a context that can be reused for the entire batch. + if (getLineNumberInfo(d, gpa, cu.*, pc)) |src_loc| { + out.* = src_loc; + } else |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => out.* = std.debug.SourceLocation.invalid, + else => |e| return e, + } + } } fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index 5276ba68ec..3c61c4072f 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -20,9 +20,14 @@ address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), pub const LoadError = Dwarf.ElfModule.LoadError; -pub fn load(gpa: Allocator, path: Path) LoadError!Info { +pub fn load(gpa: Allocator, path: Path, parent_prog_node: std.Progress.Node) LoadError!Info { var sections: Dwarf.SectionArray = Dwarf.null_section_array; - const elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + var prog_node = parent_prog_node.start("Loading Debug Info", 0); + defer prog_node.end(); + var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); + prog_node.end(); + prog_node = parent_prog_node.start("Sort Compile Units", 0); + try elf_module.dwarf.sortCompileUnits(); var info: Info = .{ .address_map = .{}, }; @@ -38,10 +43,7 @@ pub fn deinit(info: *Info, gpa: Allocator) void { info.* = undefined; } -pub const ResolveSourceLocationsError = error{ - MissingDebugInfo, - InvalidDebugInfo, -} || Allocator.Error; +pub const ResolveSourceLocationsError = Dwarf.ResolveSourceLocationsError; pub fn resolveSourceLocations( info: *Info, @@ -49,9 +51,10 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, + parent_prog_node: std.Progress.Node, ) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output); + return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output, parent_prog_node); } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index aba2911a91..8449dec33e 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -28,7 +28,10 @@ pub fn main() !void { .sub_path = cov_file_name, }; - var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| { + const prog_node = std.Progress.start(.{}); + defer prog_node.end(); + + var debug_info = std.debug.Info.load(gpa, exe_path, prog_node) catch |err| { fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) }); }; defer debug_info.deinit(gpa); @@ -51,7 +54,10 @@ pub fn main() !void { assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len); - try debug_info.resolveSourceLocations(gpa, pcs, source_locations); + try debug_info.resolveSourceLocations(gpa, pcs, source_locations, prog_node); + defer for (source_locations) |sl| { + gpa.free(sl.file_name); + }; for (pcs, source_locations) |pc, sl| { try stdout.print("{x}: {s}:{d}:{d}\n", .{ -- cgit v1.2.3 From 66954e833051872308641b3a1af12aa865d5d59a Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 21:22:33 -0700 Subject: std.debug.FixedBufferReader is fine it does not need to be deprecated --- lib/std/debug.zig | 95 +------------------------------------ lib/std/debug/Dwarf.zig | 41 ++++++++-------- lib/std/debug/FixedBufferReader.zig | 91 +++++++++++++++++++++++++++++++++++ lib/std/debug/SelfInfo.zig | 2 +- 4 files changed, 114 insertions(+), 115 deletions(-) create mode 100644 lib/std/debug/FixedBufferReader.zig (limited to 'lib/std/debug') diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 6d034146c3..80c196e9d8 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -14,6 +14,7 @@ const native_os = builtin.os.tag; const native_endian = native_arch.endian(); pub const MemoryAccessor = @import("debug/MemoryAccessor.zig"); +pub const FixedBufferReader = @import("debug/FixedBufferReader.zig"); pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); @@ -1494,99 +1495,6 @@ pub const SafetyLock = struct { } }; -/// Deprecated. Don't use this, just read from your memory directly. -/// -/// This only exists because someone was too lazy to rework logic that used to -/// operate on an open file to operate on a memory buffer instead. -pub const DeprecatedFixedBufferReader = struct { - buf: []const u8, - pos: usize = 0, - endian: std.builtin.Endian, - - pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; - - pub fn seekTo(fbr: *DeprecatedFixedBufferReader, pos: u64) Error!void { - if (pos > fbr.buf.len) return error.EndOfBuffer; - fbr.pos = @intCast(pos); - } - - pub fn seekForward(fbr: *DeprecatedFixedBufferReader, amount: u64) Error!void { - if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; - fbr.pos += @intCast(amount); - } - - pub inline fn readByte(fbr: *DeprecatedFixedBufferReader) Error!u8 { - if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; - defer fbr.pos += 1; - return fbr.buf[fbr.pos]; - } - - pub fn readByteSigned(fbr: *DeprecatedFixedBufferReader) Error!i8 { - return @bitCast(try fbr.readByte()); - } - - pub fn readInt(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - const size = @divExact(@typeInfo(T).Int.bits, 8); - if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; - defer fbr.pos += size; - return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); - } - - pub fn readIntChecked( - fbr: *DeprecatedFixedBufferReader, - comptime T: type, - ma: *MemoryAccessor, - ) Error!T { - if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) - return error.InvalidBuffer; - - return fbr.readInt(T); - } - - pub fn readUleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - return std.leb.readUleb128(T, fbr); - } - - pub fn readIleb128(fbr: *DeprecatedFixedBufferReader, comptime T: type) Error!T { - return std.leb.readIleb128(T, fbr); - } - - pub fn readAddress(fbr: *DeprecatedFixedBufferReader, format: std.dwarf.Format) Error!u64 { - return switch (format) { - .@"32" => try fbr.readInt(u32), - .@"64" => try fbr.readInt(u64), - }; - } - - pub fn readAddressChecked( - fbr: *DeprecatedFixedBufferReader, - format: std.dwarf.Format, - ma: *MemoryAccessor, - ) Error!u64 { - return switch (format) { - .@"32" => try fbr.readIntChecked(u32, ma), - .@"64" => try fbr.readIntChecked(u64, ma), - }; - } - - pub fn readBytes(fbr: *DeprecatedFixedBufferReader, len: usize) Error![]const u8 { - if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; - defer fbr.pos += len; - return fbr.buf[fbr.pos..][0..len]; - } - - pub fn readBytesTo(fbr: *DeprecatedFixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { - const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ - u8, - fbr.buf, - fbr.pos, - sentinel, - }) orelse return error.EndOfBuffer; - defer fbr.pos = end + 1; - return fbr.buf[fbr.pos..end :sentinel]; - } -}; - /// Detect whether the program is being executed in the Valgrind virtual machine. /// /// When Valgrind integrations are disabled, this returns comptime-known false. @@ -1600,6 +1508,7 @@ pub inline fn inValgrind() bool { test { _ = &Dwarf; _ = &MemoryAccessor; + _ = &FixedBufferReader; _ = &Pdb; _ = &SelfInfo; _ = &dumpHex; diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 170fa774c0..446dc58990 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -27,8 +27,7 @@ const maxInt = std.math.maxInt; const MemoryAccessor = std.debug.MemoryAccessor; const Path = std.Build.Cache.Path; -/// Did I mention this is deprecated? -const DeprecatedFixedBufferReader = std.debug.DeprecatedFixedBufferReader; +const FixedBufferReader = std.debug.FixedBufferReader; const Dwarf = @This(); @@ -328,7 +327,7 @@ pub const ExceptionFrameHeader = struct { var left: usize = 0; var len: usize = self.fde_count; - var fbr: DeprecatedFixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; + var fbr: FixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; while (len > 1) { const mid = left + len / 2; @@ -371,7 +370,7 @@ pub const ExceptionFrameHeader = struct { const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0 .. eh_frame_len orelse maxInt(u32)]; const fde_offset = fde_ptr - self.eh_frame_ptr; - var eh_frame_fbr: DeprecatedFixedBufferReader = .{ + var eh_frame_fbr: FixedBufferReader = .{ .buf = eh_frame, .pos = fde_offset, .endian = native_endian, @@ -429,9 +428,9 @@ pub const EntryHeader = struct { } /// Reads a header for either an FDE or a CIE, then advances the fbr to the position after the trailing structure. - /// `fbr` must be a DeprecatedFixedBufferReader backed by either the .eh_frame or .debug_frame sections. + /// `fbr` must be a FixedBufferReader backed by either the .eh_frame or .debug_frame sections. pub fn read( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, opt_ma: ?*MemoryAccessor, dwarf_section: Section.Id, ) !EntryHeader { @@ -544,7 +543,7 @@ pub const CommonInformationEntry = struct { ) !CommonInformationEntry { if (addr_size_bytes > 8) return error.UnsupportedAddrSize; - var fbr: DeprecatedFixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; + var fbr: FixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; const version = try fbr.readByte(); switch (dwarf_section) { @@ -678,7 +677,7 @@ pub const FrameDescriptionEntry = struct { ) !FrameDescriptionEntry { if (addr_size_bytes > 8) return error.InvalidAddrSize; - var fbr: DeprecatedFixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; + var fbr: FixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset), @@ -785,10 +784,10 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { const ScanError = error{ InvalidDebugInfo, MissingDebugInfo, -} || Allocator.Error || std.debug.DeprecatedFixedBufferReader.Error; +} || Allocator.Error || std.debug.FixedBufferReader.Error; fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; while (this_unit_offset < fbr.buf.len) { @@ -975,7 +974,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { } fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; var this_unit_offset: u64 = 0; var attrs_buf = std.ArrayList(Die.Attr).init(allocator); @@ -1100,7 +1099,7 @@ const DebugRangeIterator = struct { section_type: Section.Id, di: *const Dwarf, compile_unit: *const CompileUnit, - fbr: DeprecatedFixedBufferReader, + fbr: FixedBufferReader, pub fn init(ranges_value: *const FormValue, di: *const Dwarf, compile_unit: *const CompileUnit) !@This() { const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges; @@ -1275,7 +1274,7 @@ fn getAbbrevTable(di: *Dwarf, allocator: Allocator, abbrev_offset: u64) !*const } fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table { - var fbr: DeprecatedFixedBufferReader = .{ + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_abbrev).?, .pos = cast(usize, offset) orelse return bad(), .endian = di.endian, @@ -1327,7 +1326,7 @@ fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table } fn parseDie( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, attrs_buf: []Die.Attr, abbrev_table: *const Abbrev.Table, format: Format, @@ -1362,7 +1361,7 @@ pub fn getLineNumberInfo( const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: DeprecatedFixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; try fbr.seekTo(line_info_offset); const unit_header = try readUnitHeader(&fbr, null); @@ -1655,7 +1654,7 @@ fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { /// of FDEs is built for binary searching during unwinding. pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { - var fbr: DeprecatedFixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; + var fbr: FixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; const version = try fbr.readByte(); if (version != 1) break :blk; @@ -1695,7 +1694,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; for (frame_sections) |frame_section| { if (di.section(frame_section)) |section_data| { - var fbr: DeprecatedFixedBufferReader = .{ .buf = section_data, .endian = di.endian }; + var fbr: FixedBufferReader = .{ .buf = section_data, .endian = di.endian }; while (fbr.pos < fbr.buf.len) { const entry_header = try EntryHeader.read(&fbr, null, frame_section); switch (entry_header.type) { @@ -1739,7 +1738,7 @@ pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) } fn parseFormValue( - fbr: *DeprecatedFixedBufferReader, + fbr: *FixedBufferReader, form_id: u64, format: Format, implicit_const: ?i64, @@ -1937,7 +1936,7 @@ const UnitHeader = struct { unit_length: u64, }; -fn readUnitHeader(fbr: *DeprecatedFixedBufferReader, opt_ma: ?*MemoryAccessor) ScanError!UnitHeader { +fn readUnitHeader(fbr: *FixedBufferReader, opt_ma: ?*MemoryAccessor) ScanError!UnitHeader { return switch (try if (opt_ma) |ma| fbr.readIntChecked(u32, ma) else fbr.readInt(u32)) { 0...0xfffffff0 - 1 => |unit_length| .{ .format = .@"32", @@ -2002,7 +2001,7 @@ const EhPointerContext = struct { text_rel_base: ?u64 = null, function_rel_base: ?u64 = null, }; -fn readEhPointer(fbr: *DeprecatedFixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { +fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { if (enc == EH.PE.omit) return null; const value: union(enum) { @@ -2362,7 +2361,7 @@ pub const ElfModule = struct { } }; -pub const ResolveSourceLocationsError = Allocator.Error || DeprecatedFixedBufferReader.Error; +pub const ResolveSourceLocationsError = Allocator.Error || FixedBufferReader.Error; /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations, by appending to the provided diff --git a/lib/std/debug/FixedBufferReader.zig b/lib/std/debug/FixedBufferReader.zig new file mode 100644 index 0000000000..2a90ba569e --- /dev/null +++ b/lib/std/debug/FixedBufferReader.zig @@ -0,0 +1,91 @@ +const std = @import("std.zig"); +const MemoryAccessor = std.debug.MemoryAccessor; + +const FixedBufferReader = @This(); + +buf: []const u8, +pos: usize = 0, +endian: std.builtin.Endian, + +pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; + +pub fn seekTo(fbr: *FixedBufferReader, pos: u64) Error!void { + if (pos > fbr.buf.len) return error.EndOfBuffer; + fbr.pos = @intCast(pos); +} + +pub fn seekForward(fbr: *FixedBufferReader, amount: u64) Error!void { + if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; + fbr.pos += @intCast(amount); +} + +pub inline fn readByte(fbr: *FixedBufferReader) Error!u8 { + if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; + defer fbr.pos += 1; + return fbr.buf[fbr.pos]; +} + +pub fn readByteSigned(fbr: *FixedBufferReader) Error!i8 { + return @bitCast(try fbr.readByte()); +} + +pub fn readInt(fbr: *FixedBufferReader, comptime T: type) Error!T { + const size = @divExact(@typeInfo(T).Int.bits, 8); + if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; + defer fbr.pos += size; + return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); +} + +pub fn readIntChecked( + fbr: *FixedBufferReader, + comptime T: type, + ma: *MemoryAccessor, +) Error!T { + if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) + return error.InvalidBuffer; + + return fbr.readInt(T); +} + +pub fn readUleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readUleb128(T, fbr); +} + +pub fn readIleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readIleb128(T, fbr); +} + +pub fn readAddress(fbr: *FixedBufferReader, format: std.dwarf.Format) Error!u64 { + return switch (format) { + .@"32" => try fbr.readInt(u32), + .@"64" => try fbr.readInt(u64), + }; +} + +pub fn readAddressChecked( + fbr: *FixedBufferReader, + format: std.dwarf.Format, + ma: *MemoryAccessor, +) Error!u64 { + return switch (format) { + .@"32" => try fbr.readIntChecked(u32, ma), + .@"64" => try fbr.readIntChecked(u64, ma), + }; +} + +pub fn readBytes(fbr: *FixedBufferReader, len: usize) Error![]const u8 { + if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; + defer fbr.pos += len; + return fbr.buf[fbr.pos..][0..len]; +} + +pub fn readBytesTo(fbr: *FixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { + const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ + u8, + fbr.buf, + fbr.pos, + sentinel, + }) orelse return error.EndOfBuffer; + defer fbr.pos = end + 1; + return fbr.buf[fbr.pos..end :sentinel]; +} diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index 79cbd19a41..ba0d7bc039 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -1576,7 +1576,7 @@ pub fn unwindFrameDwarf( const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; if (fde_offset >= frame_section.len) return error.MissingFDE; - var fbr: std.debug.DeprecatedFixedBufferReader = .{ + var fbr: std.debug.FixedBufferReader = .{ .buf = frame_section, .pos = fde_offset, .endian = di.endian, -- cgit v1.2.3 From 1792258dc813cde7083fd7860442e6ec92afd4ba Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 23:31:28 -0700 Subject: std.debug.Dwarf: precompute .debug_line table yields a 60x speedup for resolveSourceLocations in debug builds --- lib/std/debug.zig | 2 +- lib/std/debug/Dwarf.zig | 310 +++++++++++++++++------------------- lib/std/debug/FixedBufferReader.zig | 4 +- 3 files changed, 150 insertions(+), 166 deletions(-) (limited to 'lib/std/debug') diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 80c196e9d8..7f4f6b7df2 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -762,7 +762,7 @@ pub fn writeCurrentStackTrace( // an overflow. We do not need to signal `StackIterator` as it will correctly detect this // condition on the subsequent iteration and return `null` thus terminating the loop. // same behaviour for x86-windows-msvc - const address = if (return_address == 0) return_address else return_address - 1; + const address = return_address -| 1; try printSourceAtAddress(debug_info, out_stream, address, tty_config); } else printLastUnwindError(&it, debug_info, out_stream, tty_config); } diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 446dc58990..06ffad9441 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -138,6 +138,29 @@ pub const CompileUnit = struct { rnglists_base: usize, loclists_base: usize, frame_base: ?*const FormValue, + + src_loc_cache: ?SrcLocCache, + + pub const SrcLocCache = struct { + line_table: LineTable, + directories: []const FileEntry, + files: []FileEntry, + version: u16, + + pub const LineTable = std.AutoArrayHashMapUnmanaged(u64, LineEntry); + + pub const LineEntry = struct { + line: u32, + column: u32, + file: u32, + }; + + pub fn findSource(slc: *const SrcLocCache, address: u64) !LineEntry { + const index = std.sort.upperBound(u64, address, slc.line_table.keys(), {}, std.sort.asc(u64)); + if (index == 0) return missing(); + return slc.line_table.values()[index - 1]; + } + }; }; pub const FormValue = union(enum) { @@ -760,6 +783,11 @@ pub fn deinit(di: *Dwarf, gpa: Allocator) void { } di.abbrev_table_list.deinit(gpa); for (di.compile_unit_list.items) |*cu| { + if (cu.src_loc_cache) |*slc| { + slc.line_table.deinit(gpa); + gpa.free(slc.directories); + gpa.free(slc.files); + } cu.die.deinit(gpa); } di.compile_unit_list.deinit(gpa); @@ -846,6 +874,7 @@ fn scanAllFunctions(di: *Dwarf, allocator: Allocator) ScanError!void { .rnglists_base = 0, .loclists_base = 0, .frame_base = null, + .src_loc_cache = null, }; while (true) { @@ -1032,6 +1061,7 @@ fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) ScanError!void { .rnglists_base = if (compile_unit_die.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0, .loclists_base = if (compile_unit_die.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0, .frame_base = compile_unit_die.getAttr(AT.frame_base), + .src_loc_cache = null, }; compile_unit.pc_range = x: { @@ -1242,7 +1272,7 @@ const DebugRangeIterator = struct { }; /// TODO: change this to binary searching the sorted compile unit list -pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUnit { +pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*CompileUnit { for (di.compile_unit_list.items) |*compile_unit| { if (compile_unit.pc_range) |range| { if (target_address >= range.start and target_address < range.end) return compile_unit; @@ -1352,34 +1382,36 @@ fn parseDie( }; } -pub fn getLineNumberInfo( - di: *Dwarf, - allocator: Allocator, - compile_unit: CompileUnit, - target_address: u64, -) !std.debug.SourceLocation { - const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); +fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) !CompileUnit.SrcLocCache { + const compile_unit_cwd = try compile_unit.die.getAttrString(d, AT.comp_dir, d.section(.debug_line_str), compile_unit.*); const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); - var fbr: FixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; + var fbr: FixedBufferReader = .{ + .buf = d.section(.debug_line).?, + .endian = d.endian, + }; try fbr.seekTo(line_info_offset); const unit_header = try readUnitHeader(&fbr, null); if (unit_header.unit_length == 0) return missing(); + const next_offset = unit_header.header_length + unit_header.unit_length; const version = try fbr.readInt(u16); if (version < 2) return bad(); - var addr_size: u8 = switch (unit_header.format) { - .@"32" => 4, - .@"64" => 8, + const addr_size: u8, const seg_size: u8 = if (version >= 5) .{ + try fbr.readByte(), + try fbr.readByte(), + } else .{ + switch (unit_header.format) { + .@"32" => 4, + .@"64" => 8, + }, + 0, }; - var seg_size: u8 = 0; - if (version >= 5) { - addr_size = try fbr.readByte(); - seg_size = try fbr.readByte(); - } + _ = addr_size; + _ = seg_size; const prologue_length = try fbr.readAddress(unit_header.format); const prog_start_offset = fbr.pos + prologue_length; @@ -1388,8 +1420,8 @@ pub fn getLineNumberInfo( if (minimum_instruction_length == 0) return bad(); if (version >= 4) { - // maximum_operations_per_instruction - _ = try fbr.readByte(); + const maximum_operations_per_instruction = try fbr.readByte(); + _ = maximum_operations_per_instruction; } const default_is_stmt = (try fbr.readByte()) != 0; @@ -1402,18 +1434,18 @@ pub fn getLineNumberInfo( const standard_opcode_lengths = try fbr.readBytes(opcode_base - 1); - var include_directories = std.ArrayList(FileEntry).init(allocator); - defer include_directories.deinit(); - var file_entries = std.ArrayList(FileEntry).init(allocator); - defer file_entries.deinit(); + var directories: std.ArrayListUnmanaged(FileEntry) = .{}; + defer directories.deinit(gpa); + var file_entries: std.ArrayListUnmanaged(FileEntry) = .{}; + defer file_entries.deinit(gpa); if (version < 5) { - try include_directories.append(.{ .path = compile_unit_cwd }); + try directories.append(gpa, .{ .path = compile_unit_cwd }); while (true) { const dir = try fbr.readBytesTo(0); if (dir.len == 0) break; - try include_directories.append(.{ .path = dir }); + try directories.append(gpa, .{ .path = dir }); } while (true) { @@ -1422,7 +1454,7 @@ pub fn getLineNumberInfo( const dir_index = try fbr.readUleb128(u32); const mtime = try fbr.readUleb128(u64); const size = try fbr.readUleb128(u64); - try file_entries.append(.{ + try file_entries.append(gpa, .{ .path = file_name, .dir_index = dir_index, .mtime = mtime, @@ -1446,52 +1478,10 @@ pub fn getLineNumberInfo( } const directories_count = try fbr.readUleb128(usize); - try include_directories.ensureUnusedCapacity(directories_count); - { - var i: usize = 0; - while (i < directories_count) : (i += 1) { - var e: FileEntry = .{ .path = &.{} }; - for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { - const form_value = try parseFormValue( - &fbr, - ent_fmt.form_code, - unit_header.format, - null, - ); - switch (ent_fmt.content_type_code) { - DW.LNCT.path => e.path = try form_value.getString(di.*), - DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), - DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), - DW.LNCT.size => e.size = try form_value.getUInt(u64), - DW.LNCT.MD5 => e.md5 = switch (form_value) { - .data16 => |data16| data16.*, - else => return bad(), - }, - else => continue, - } - } - include_directories.appendAssumeCapacity(e); - } - } - } - var file_ent_fmt_buf: [10]FileEntFmt = undefined; - const file_name_entry_format_count = try fbr.readByte(); - if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); - for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { - ent_fmt.* = .{ - .content_type_code = try fbr.readUleb128(u8), - .form_code = try fbr.readUleb128(u16), - }; - } - - const file_names_count = try fbr.readUleb128(usize); - try file_entries.ensureUnusedCapacity(file_names_count); - { - var i: usize = 0; - while (i < file_names_count) : (i += 1) { - var e: FileEntry = .{ .path = &.{} }; - for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { + for (try directories.addManyAsSlice(gpa, directories_count)) |*e| { + e.* = .{ .path = &.{} }; + for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { const form_value = try parseFormValue( &fbr, ent_fmt.form_code, @@ -1499,7 +1489,7 @@ pub fn getLineNumberInfo( null, ); switch (ent_fmt.content_type_code) { - DW.LNCT.path => e.path = try form_value.getString(di.*), + DW.LNCT.path => e.path = try form_value.getString(d.*), DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), DW.LNCT.size => e.size = try form_value.getUInt(u64), @@ -1510,17 +1500,49 @@ pub fn getLineNumberInfo( else => continue, } } - file_entries.appendAssumeCapacity(e); + } + } + + var file_ent_fmt_buf: [10]FileEntFmt = undefined; + const file_name_entry_format_count = try fbr.readByte(); + if (file_name_entry_format_count > file_ent_fmt_buf.len) return bad(); + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { + ent_fmt.* = .{ + .content_type_code = try fbr.readUleb128(u8), + .form_code = try fbr.readUleb128(u16), + }; + } + + const file_names_count = try fbr.readUleb128(usize); + try file_entries.ensureUnusedCapacity(gpa, file_names_count); + + for (try file_entries.addManyAsSlice(gpa, file_names_count)) |*e| { + e.* = .{ .path = &.{} }; + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { + const form_value = try parseFormValue( + &fbr, + ent_fmt.form_code, + unit_header.format, + null, + ); + switch (ent_fmt.content_type_code) { + DW.LNCT.path => e.path = try form_value.getString(d.*), + DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), + DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), + DW.LNCT.size => e.size = try form_value.getUInt(u64), + DW.LNCT.MD5 => e.md5 = switch (form_value) { + .data16 => |data16| data16.*, + else => return bad(), + }, + else => continue, + } } } } - var prog = LineNumberProgram.init( - default_is_stmt, - include_directories.items, - target_address, - version, - ); + var prog = LineNumberProgram.init(default_is_stmt, version); + var line_table: CompileUnit.SrcLocCache.LineTable = .{}; + errdefer line_table.deinit(gpa); try fbr.seekTo(prog_start_offset); @@ -1536,7 +1558,7 @@ pub fn getLineNumberInfo( switch (sub_op) { DW.LNE.end_sequence => { prog.end_sequence = true; - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.reset(); }, DW.LNE.set_address => { @@ -1548,7 +1570,7 @@ pub fn getLineNumberInfo( const dir_index = try fbr.readUleb128(u32); const mtime = try fbr.readUleb128(u64); const size = try fbr.readUleb128(u64); - try file_entries.append(.{ + try file_entries.append(gpa, .{ .path = path, .dir_index = dir_index, .mtime = mtime, @@ -1564,12 +1586,12 @@ pub fn getLineNumberInfo( const inc_line = @as(i32, line_base) + @as(i32, adjusted_opcode % line_range); prog.line += inc_line; prog.address += inc_addr; - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.basic_block = false; } else { switch (opcode) { DW.LNS.copy => { - if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + try prog.addRow(gpa, &line_table); prog.basic_block = false; }, DW.LNS.advance_pc => { @@ -1611,7 +1633,35 @@ pub fn getLineNumberInfo( } } - return missing(); + return .{ + .line_table = line_table, + .directories = try directories.toOwnedSlice(gpa), + .files = try file_entries.toOwnedSlice(gpa), + .version = version, + }; +} + +pub fn getLineNumberInfo( + d: *Dwarf, + gpa: Allocator, + compile_unit: *CompileUnit, + target_address: u64, +) !std.debug.SourceLocation { + if (compile_unit.src_loc_cache == null) + compile_unit.src_loc_cache = try runLineNumberProgram(d, gpa, compile_unit); + const slc = &compile_unit.src_loc_cache.?; + const entry = try slc.findSource(target_address); + const file_index = entry.file - @intFromBool(slc.version < 5); + if (file_index >= slc.files.len) return bad(); + const file_entry = &slc.files[file_index]; + if (file_entry.dir_index >= slc.directories.len) return bad(); + const dir_name = slc.directories[file_entry.dir_index].path; + const file_name = try std.fs.path.join(gpa, &.{ dir_name, file_entry.path }); + return .{ + .line = entry.line, + .column = entry.column, + .file_name = file_name, + }; } fn getString(di: Dwarf, offset: u64) ![:0]const u8 { @@ -1826,17 +1876,6 @@ const LineNumberProgram = struct { end_sequence: bool, default_is_stmt: bool, - target_address: u64, - include_dirs: []const FileEntry, - - prev_valid: bool, - prev_address: u64, - prev_file: usize, - prev_line: i64, - prev_column: u64, - prev_is_stmt: bool, - prev_basic_block: bool, - prev_end_sequence: bool, // Reset the state machine following the DWARF specification pub fn reset(self: *LineNumberProgram) void { @@ -1847,24 +1886,10 @@ const LineNumberProgram = struct { self.is_stmt = self.default_is_stmt; self.basic_block = false; self.end_sequence = false; - // Invalidate all the remaining fields - self.prev_valid = false; - self.prev_address = 0; - self.prev_file = undefined; - self.prev_line = undefined; - self.prev_column = undefined; - self.prev_is_stmt = undefined; - self.prev_basic_block = undefined; - self.prev_end_sequence = undefined; } - pub fn init( - is_stmt: bool, - include_dirs: []const FileEntry, - target_address: u64, - version: u16, - ) LineNumberProgram { - return LineNumberProgram{ + pub fn init(is_stmt: bool, version: u16) LineNumberProgram { + return .{ .address = 0, .file = 1, .line = 1, @@ -1873,60 +1898,17 @@ const LineNumberProgram = struct { .is_stmt = is_stmt, .basic_block = false, .end_sequence = false, - .include_dirs = include_dirs, .default_is_stmt = is_stmt, - .target_address = target_address, - .prev_valid = false, - .prev_address = 0, - .prev_file = undefined, - .prev_line = undefined, - .prev_column = undefined, - .prev_is_stmt = undefined, - .prev_basic_block = undefined, - .prev_end_sequence = undefined, }; } - pub fn checkLineMatch( - self: *LineNumberProgram, - allocator: Allocator, - file_entries: []const FileEntry, - ) !?std.debug.SourceLocation { - if (self.prev_valid and - self.target_address >= self.prev_address and - self.target_address < self.address) - { - const file_index = if (self.version >= 5) self.prev_file else i: { - if (self.prev_file == 0) return missing(); - break :i self.prev_file - 1; - }; - - if (file_index >= file_entries.len) return bad(); - const file_entry = &file_entries[file_index]; - - if (file_entry.dir_index >= self.include_dirs.len) return bad(); - const dir_name = self.include_dirs[file_entry.dir_index].path; - - const file_name = try std.fs.path.join(allocator, &[_][]const u8{ - dir_name, file_entry.path, - }); - - return std.debug.SourceLocation{ - .line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0, - .column = self.prev_column, - .file_name = file_name, - }; - } - - self.prev_valid = true; - self.prev_address = self.address; - self.prev_file = self.file; - self.prev_line = self.line; - self.prev_column = self.column; - self.prev_is_stmt = self.is_stmt; - self.prev_basic_block = self.basic_block; - self.prev_end_sequence = self.end_sequence; - return null; + pub fn addRow(prog: *LineNumberProgram, gpa: Allocator, table: *CompileUnit.SrcLocCache.LineTable) !void { + if (prog.line == 0) return; // garbage data + try table.put(gpa, prog.address, .{ + .line = cast(u32, prog.line) orelse maxInt(u32), + .column = cast(u32, prog.column) orelse maxInt(u32), + .file = cast(u32, prog.file) orelse return bad(), + }); } }; @@ -2381,7 +2363,7 @@ pub fn resolveSourceLocations( defer prog_node.end(); var cu_i: usize = 0; - var cu: *const CompileUnit = &d.compile_unit_list.items[0]; + var cu: *CompileUnit = &d.compile_unit_list.items[0]; var range = cu.pc_range.?; next_pc: for (sorted_pc_addrs, output) |pc, *out| { defer prog_node.completeOne(); @@ -2403,7 +2385,7 @@ pub fn resolveSourceLocations( } // TODO: instead of calling this function, break the function up into one that parses the // information once and prepares a context that can be reused for the entire batch. - if (getLineNumberInfo(d, gpa, cu.*, pc)) |src_loc| { + if (getLineNumberInfo(d, gpa, cu, pc)) |src_loc| { out.* = src_loc; } else |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => out.* = std.debug.SourceLocation.invalid, @@ -2419,7 +2401,7 @@ fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { .compile_unit_name = compile_unit.die.getAttrString(di, std.dwarf.AT.name, di.section(.debug_str), compile_unit.*) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => "???", }, - .source_location = di.getLineNumberInfo(allocator, compile_unit.*, address) catch |err| switch (err) { + .source_location = di.getLineNumberInfo(allocator, compile_unit, address) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, else => return err, }, diff --git a/lib/std/debug/FixedBufferReader.zig b/lib/std/debug/FixedBufferReader.zig index 2a90ba569e..494245a9e9 100644 --- a/lib/std/debug/FixedBufferReader.zig +++ b/lib/std/debug/FixedBufferReader.zig @@ -1,4 +1,6 @@ -const std = @import("std.zig"); +//! Optimized for performance in debug builds. + +const std = @import("../std.zig"); const MemoryAccessor = std.debug.MemoryAccessor; const FixedBufferReader = @This(); -- cgit v1.2.3 From c2ab4614b69a2303d640837df357c2336b0cedf2 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 2 Aug 2024 23:38:34 -0700 Subject: std.Debug.Info: remove std.Progress integration it's too fast to need it now --- lib/std/debug/Dwarf.zig | 5 ----- lib/std/debug/Info.zig | 9 ++------- tools/dump-cov.zig | 7 ++----- 3 files changed, 4 insertions(+), 17 deletions(-) (limited to 'lib/std/debug') diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 06ffad9441..9689ac98b3 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -2354,19 +2354,14 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, - parent_prog_node: std.Progress.Node, ) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); assert(d.compile_units_sorted); - const prog_node = parent_prog_node.start("Resolve Source Locations", sorted_pc_addrs.len); - defer prog_node.end(); - var cu_i: usize = 0; var cu: *CompileUnit = &d.compile_unit_list.items[0]; var range = cu.pc_range.?; next_pc: for (sorted_pc_addrs, output) |pc, *out| { - defer prog_node.completeOne(); while (pc >= range.end) { cu_i += 1; if (cu_i >= d.compile_unit_list.items.len) { diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index 3c61c4072f..f31b2f22c4 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -20,13 +20,9 @@ address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), pub const LoadError = Dwarf.ElfModule.LoadError; -pub fn load(gpa: Allocator, path: Path, parent_prog_node: std.Progress.Node) LoadError!Info { +pub fn load(gpa: Allocator, path: Path) LoadError!Info { var sections: Dwarf.SectionArray = Dwarf.null_section_array; - var prog_node = parent_prog_node.start("Loading Debug Info", 0); - defer prog_node.end(); var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); - prog_node.end(); - prog_node = parent_prog_node.start("Sort Compile Units", 0); try elf_module.dwarf.sortCompileUnits(); var info: Info = .{ .address_map = .{}, @@ -51,10 +47,9 @@ pub fn resolveSourceLocations( sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []std.debug.SourceLocation, - parent_prog_node: std.Progress.Node, ) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output, parent_prog_node); + return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output); } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index 8449dec33e..f821dde611 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -28,10 +28,7 @@ pub fn main() !void { .sub_path = cov_file_name, }; - const prog_node = std.Progress.start(.{}); - defer prog_node.end(); - - var debug_info = std.debug.Info.load(gpa, exe_path, prog_node) catch |err| { + var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| { fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) }); }; defer debug_info.deinit(gpa); @@ -54,7 +51,7 @@ pub fn main() !void { assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len); - try debug_info.resolveSourceLocations(gpa, pcs, source_locations, prog_node); + try debug_info.resolveSourceLocations(gpa, pcs, source_locations); defer for (source_locations) |sl| { gpa.free(sl.file_name); }; -- cgit v1.2.3 From 53aa9d75a9b10c9cd277031e604a631452d34e8c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 3 Aug 2024 17:42:08 -0700 Subject: std.debug.Info.resolveSourceLocations: O(N) implementation --- lib/std/debug/Dwarf.zig | 59 +++---------------- lib/std/debug/Info.zig | 146 +++++++++++++++++++++++++++++++++++++++++++++++- tools/dump-cov.zig | 11 ++-- 3 files changed, 157 insertions(+), 59 deletions(-) (limited to 'lib/std/debug') diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index 9689ac98b3..cd37795351 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -152,6 +152,7 @@ pub const CompileUnit = struct { pub const LineEntry = struct { line: u32, column: u32, + /// Offset by 1 depending on whether Dwarf version is >= 5. file: u32, }; @@ -809,7 +810,7 @@ pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { return null; } -const ScanError = error{ +pub const ScanError = error{ InvalidDebugInfo, MissingDebugInfo, } || Allocator.Error || std.debug.FixedBufferReader.Error; @@ -1113,7 +1114,7 @@ pub fn sortCompileUnits(d: *Dwarf) ScanError!void { } std.mem.sortUnstable(CompileUnit, d.compile_unit_list.items, {}, struct { - fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool { + pub fn lessThan(ctx: void, a: CompileUnit, b: CompileUnit) bool { _ = ctx; const a_range = a.pc_range orelse return false; const b_range = b.pc_range orelse return true; @@ -1641,14 +1642,18 @@ fn runLineNumberProgram(d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit) ! }; } +pub fn populateSrcLocCache(d: *Dwarf, gpa: Allocator, cu: *CompileUnit) ScanError!void { + if (cu.src_loc_cache != null) return; + cu.src_loc_cache = try runLineNumberProgram(d, gpa, cu); +} + pub fn getLineNumberInfo( d: *Dwarf, gpa: Allocator, compile_unit: *CompileUnit, target_address: u64, ) !std.debug.SourceLocation { - if (compile_unit.src_loc_cache == null) - compile_unit.src_loc_cache = try runLineNumberProgram(d, gpa, compile_unit); + try populateSrcLocCache(d, gpa, compile_unit); const slc = &compile_unit.src_loc_cache.?; const entry = try slc.findSource(target_address); const file_index = entry.file - @intFromBool(slc.version < 5); @@ -2343,52 +2348,6 @@ pub const ElfModule = struct { } }; -pub const ResolveSourceLocationsError = Allocator.Error || FixedBufferReader.Error; - -/// Given an array of virtual memory addresses, sorted ascending, outputs a -/// corresponding array of source locations, by appending to the provided -/// array list. -pub fn resolveSourceLocations( - d: *Dwarf, - gpa: Allocator, - sorted_pc_addrs: []const u64, - /// Asserts its length equals length of `sorted_pc_addrs`. - output: []std.debug.SourceLocation, -) ResolveSourceLocationsError!void { - assert(sorted_pc_addrs.len == output.len); - assert(d.compile_units_sorted); - - var cu_i: usize = 0; - var cu: *CompileUnit = &d.compile_unit_list.items[0]; - var range = cu.pc_range.?; - next_pc: for (sorted_pc_addrs, output) |pc, *out| { - while (pc >= range.end) { - cu_i += 1; - if (cu_i >= d.compile_unit_list.items.len) { - out.* = std.debug.SourceLocation.invalid; - continue :next_pc; - } - cu = &d.compile_unit_list.items[cu_i]; - range = cu.pc_range orelse { - out.* = std.debug.SourceLocation.invalid; - continue :next_pc; - }; - } - if (pc < range.start) { - out.* = std.debug.SourceLocation.invalid; - continue :next_pc; - } - // TODO: instead of calling this function, break the function up into one that parses the - // information once and prepares a context that can be reused for the entire batch. - if (getLineNumberInfo(d, gpa, cu, pc)) |src_loc| { - out.* = src_loc; - } else |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => out.* = std.debug.SourceLocation.invalid, - else => |e| return e, - } - } -} - fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { if (di.findCompileUnit(address)) |compile_unit| { return .{ diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index f31b2f22c4..a52de6549b 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -12,12 +12,66 @@ const Path = std.Build.Cache.Path; const Dwarf = std.debug.Dwarf; const page_size = std.mem.page_size; const assert = std.debug.assert; +const Hash = std.hash.Wyhash; const Info = @This(); /// Sorted by key, ascending. address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), +/// Provides a globally-scoped integer index for directories. +/// +/// As opposed to, for example, a directory index that is compilation-unit +/// scoped inside a single ELF module. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +directories: std.StringArrayHashMapUnmanaged(void), +/// Provides a globally-scoped integer index for files. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false), +/// Protects `directories` and `files`. +mutex: std.Thread.Mutex, + +pub const SourceLocation = struct { + file: File.Index, + line: u32, + column: u32, + + pub const invalid: SourceLocation = .{ + .file = .invalid, + .line = 0, + .column = 0, + }; +}; + +pub const File = struct { + directory_index: u32, + basename: []const u8, + + pub const Index = enum(u32) { + invalid = std.math.maxInt(u32), + _, + }; + + pub const MapContext = struct { + pub fn hash(ctx: MapContext, a: File) u32 { + _ = ctx; + return @truncate(Hash.hash(a.directory_index, a.basename)); + } + + pub fn eql(ctx: MapContext, a: File, b: File, b_index: usize) bool { + _ = ctx; + _ = b_index; + return a.directory_index == b.directory_index and std.mem.eql(u8, a.basename, b.basename); + } + }; +}; + pub const LoadError = Dwarf.ElfModule.LoadError; pub fn load(gpa: Allocator, path: Path) LoadError!Info { @@ -26,12 +80,17 @@ pub fn load(gpa: Allocator, path: Path) LoadError!Info { try elf_module.dwarf.sortCompileUnits(); var info: Info = .{ .address_map = .{}, + .directories = .{}, + .files = .{}, + .mutex = .{}, }; try info.address_map.put(gpa, elf_module.base_address, elf_module); return info; } pub fn deinit(info: *Info, gpa: Allocator) void { + info.directories.deinit(gpa); + info.files.deinit(gpa); for (info.address_map.values()) |*elf_module| { elf_module.dwarf.deinit(gpa); } @@ -39,17 +98,98 @@ pub fn deinit(info: *Info, gpa: Allocator) void { info.* = undefined; } -pub const ResolveSourceLocationsError = Dwarf.ResolveSourceLocationsError; +pub fn fileAt(info: *Info, index: File.Index) *File { + return &info.files.keys()[@intFromEnum(index)]; +} + +pub const ResolveSourceLocationsError = Dwarf.ScanError; +/// Given an array of virtual memory addresses, sorted ascending, outputs a +/// corresponding array of source locations. pub fn resolveSourceLocations( info: *Info, gpa: Allocator, sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. - output: []std.debug.SourceLocation, + output: []SourceLocation, ) ResolveSourceLocationsError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return elf_module.dwarf.resolveSourceLocations(gpa, sorted_pc_addrs, output); + return resolveSourceLocationsDwarf(info, gpa, sorted_pc_addrs, output, &elf_module.dwarf); +} + +pub fn resolveSourceLocationsDwarf( + info: *Info, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []SourceLocation, + d: *Dwarf, +) ResolveSourceLocationsError!void { + assert(sorted_pc_addrs.len == output.len); + assert(d.compile_units_sorted); + + var cu_i: usize = 0; + var line_table_i: usize = 0; + var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0]; + var range = cu.pc_range.?; + // Protects directories and files tables from other threads. + info.mutex.lock(); + defer info.mutex.unlock(); + next_pc: for (sorted_pc_addrs, output) |pc, *out| { + while (pc >= range.end) { + cu_i += 1; + if (cu_i >= d.compile_unit_list.items.len) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + range = cu.pc_range orelse { + out.* = SourceLocation.invalid; + continue :next_pc; + }; + } + if (pc < range.start) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + if (line_table_i == 0) { + line_table_i = 1; + info.mutex.unlock(); + defer info.mutex.lock(); + d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + out.* = SourceLocation.invalid; + cu_i += 1; + if (cu_i < d.compile_unit_list.items.len) { + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + if (cu.pc_range) |r| range = r; + } + continue :next_pc; + }, + else => |e| return e, + }; + } + const slc = &cu.src_loc_cache.?; + const table_addrs = slc.line_table.keys(); + while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1; + + const entry = slc.line_table.values()[line_table_i - 1]; + const corrected_file_index = entry.file - @intFromBool(slc.version < 5); + const file_entry = slc.files[corrected_file_index]; + const dir_path = slc.directories[file_entry.dir_index].path; + const dir_gop = try info.directories.getOrPut(gpa, dir_path); + const file_gop = try info.files.getOrPut(gpa, .{ + .directory_index = @intCast(dir_gop.index), + .basename = file_entry.path, + }); + out.* = .{ + .file = @enumFromInt(file_gop.index), + .line = entry.line, + .column = entry.column, + }; + } } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index f821dde611..bd096b9fc0 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -50,15 +50,14 @@ pub fn main() !void { } assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); - const source_locations = try arena.alloc(std.debug.SourceLocation, pcs.len); + const source_locations = try arena.alloc(std.debug.Info.SourceLocation, pcs.len); try debug_info.resolveSourceLocations(gpa, pcs, source_locations); - defer for (source_locations) |sl| { - gpa.free(sl.file_name); - }; for (pcs, source_locations) |pc, sl| { - try stdout.print("{x}: {s}:{d}:{d}\n", .{ - pc, sl.file_name, sl.line, sl.column, + const file = debug_info.fileAt(sl.file); + const dir_name = debug_info.directories.keys()[file.directory_index]; + try stdout.print("{x}: {s}/{s}:{d}:{d}\n", .{ + pc, dir_name, file.basename, sl.line, sl.column, }); } -- cgit v1.2.3 From 517cfb0dd1e2b5b8efc8e90ce4e5593a38fa158c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 4 Aug 2024 00:16:28 -0700 Subject: fuzzing: progress towards web UI * libfuzzer: close file after mmap * fuzzer/main.js: connect with EventSource and debug dump the messages. currently this prints how many fuzzer runs have been attempted to console.log. * extract some `std.debug.Info` logic into `std.debug.Coverage`. Prepares for consolidation across multiple different executables which share source files, and makes it possible to send all the PC/SourceLocation mapping data with 4 memcpy'd arrays. * std.Build.Fuzz: - spawn a thread to watch the message queue and signal event subscribers. - track coverage map data - respond to /events URL with EventSource messages on a timer --- lib/fuzzer.zig | 1 + lib/fuzzer/main.js | 13 ++- lib/std/Build/Fuzz.zig | 209 +++++++++++++++++++++++++++++++++++--- lib/std/Build/Step/Run.zig | 6 +- lib/std/debug.zig | 1 + lib/std/debug/Coverage.zig | 244 +++++++++++++++++++++++++++++++++++++++++++++ lib/std/debug/Info.zig | 153 ++-------------------------- tools/dump-cov.zig | 16 +-- 8 files changed, 478 insertions(+), 165 deletions(-) create mode 100644 lib/std/debug/Coverage.zig (limited to 'lib/std/debug') diff --git a/lib/fuzzer.zig b/lib/fuzzer.zig index ede3663cdc..0d968cd60d 100644 --- a/lib/fuzzer.zig +++ b/lib/fuzzer.zig @@ -218,6 +218,7 @@ const Fuzzer = struct { .read = true, .truncate = false, }); + defer coverage_file.close(); const n_bitset_elems = (flagged_pcs.len + 7) / 8; const bytes_len = @sizeOf(SeenPcsHeader) + flagged_pcs.len * @sizeOf(usize) + n_bitset_elems; const existing_len = coverage_file.getEndPos() catch |err| { diff --git a/lib/fuzzer/main.js b/lib/fuzzer/main.js index 71e6b5fa54..872ac3d4b5 100644 --- a/lib/fuzzer/main.js +++ b/lib/fuzzer/main.js @@ -12,6 +12,9 @@ const text_decoder = new TextDecoder(); const text_encoder = new TextEncoder(); + const eventSource = new EventSource("events"); + eventSource.addEventListener('message', onMessage, false); + WebAssembly.instantiateStreaming(wasm_promise, { js: { log: function(ptr, len) { @@ -38,11 +41,15 @@ }); }); + function onMessage(e) { + console.log("Message", e.data); + } + function render() { - domSectSource.classList.add("hidden"); + domSectSource.classList.add("hidden"); - // TODO this is temporary debugging data - renderSource("/home/andy/dev/zig/lib/std/zig/tokenizer.zig"); + // TODO this is temporary debugging data + renderSource("/home/andy/dev/zig/lib/std/zig/tokenizer.zig"); } function renderSource(path) { diff --git a/lib/std/Build/Fuzz.zig b/lib/std/Build/Fuzz.zig index 46d9bfc8fd..0ff82f3677 100644 --- a/lib/std/Build/Fuzz.zig +++ b/lib/std/Build/Fuzz.zig @@ -6,6 +6,7 @@ const assert = std.debug.assert; const fatal = std.process.fatal; const Allocator = std.mem.Allocator; const log = std.log; +const Coverage = std.debug.Coverage; const Fuzz = @This(); const build_runner = @import("root"); @@ -53,17 +54,30 @@ pub fn start( .global_cache_directory = global_cache_directory, .zig_lib_directory = zig_lib_directory, .zig_exe_path = zig_exe_path, - .msg_queue = .{}, - .mutex = .{}, .listen_address = listen_address, .fuzz_run_steps = fuzz_run_steps, + + .msg_queue = .{}, + .mutex = .{}, + .condition = .{}, + + .coverage_files = .{}, + .coverage_mutex = .{}, + .coverage_condition = .{}, }; + // For accepting HTTP connections. const web_server_thread = std.Thread.spawn(.{}, WebServer.run, .{&web_server}) catch |err| { fatal("unable to spawn web server thread: {s}", .{@errorName(err)}); }; defer web_server_thread.join(); + // For polling messages and sending updates to subscribers. + const coverage_thread = std.Thread.spawn(.{}, WebServer.coverageRun, .{&web_server}) catch |err| { + fatal("unable to spawn coverage thread: {s}", .{@errorName(err)}); + }; + defer coverage_thread.join(); + { const fuzz_node = prog_node.start("Fuzzing", fuzz_run_steps.len); defer fuzz_node.end(); @@ -88,14 +102,38 @@ pub const WebServer = struct { global_cache_directory: Build.Cache.Directory, zig_lib_directory: Build.Cache.Directory, zig_exe_path: []const u8, + listen_address: std.net.Address, + fuzz_run_steps: []const *Step.Run, + /// Messages from fuzz workers. Protected by mutex. msg_queue: std.ArrayListUnmanaged(Msg), + /// Protects `msg_queue` only. mutex: std.Thread.Mutex, - listen_address: std.net.Address, - fuzz_run_steps: []const *Step.Run, + /// Signaled when there is a message in `msg_queue`. + condition: std.Thread.Condition, + + coverage_files: std.AutoArrayHashMapUnmanaged(u64, CoverageMap), + /// Protects `coverage_files` only. + coverage_mutex: std.Thread.Mutex, + /// Signaled when `coverage_files` changes. + coverage_condition: std.Thread.Condition, + + const CoverageMap = struct { + mapped_memory: []align(std.mem.page_size) const u8, + coverage: Coverage, + + fn deinit(cm: *CoverageMap, gpa: Allocator) void { + std.posix.munmap(cm.mapped_memory); + cm.coverage.deinit(gpa); + cm.* = undefined; + } + }; const Msg = union(enum) { - coverage_id: u64, + coverage: struct { + id: u64, + run: *Step.Run, + }, }; fn run(ws: *WebServer) void { @@ -162,6 +200,10 @@ pub const WebServer = struct { std.mem.eql(u8, request.head.target, "/debug/sources.tar")) { try serveSourcesTar(ws, request); + } else if (std.mem.eql(u8, request.head.target, "/events") or + std.mem.eql(u8, request.head.target, "/debug/events")) + { + try serveEvents(ws, request); } else { try request.respond("not found", .{ .status = .not_found, @@ -384,6 +426,58 @@ pub const WebServer = struct { try file.writeAll(std.mem.asBytes(&header)); } + fn serveEvents(ws: *WebServer, request: *std.http.Server.Request) !void { + var send_buffer: [0x4000]u8 = undefined; + var response = request.respondStreaming(.{ + .send_buffer = &send_buffer, + .respond_options = .{ + .extra_headers = &.{ + .{ .name = "content-type", .value = "text/event-stream" }, + }, + .transfer_encoding = .none, + }, + }); + + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + if (getStats(ws)) |stats| { + try response.writer().print("data: {d}\n\n", .{stats.n_runs}); + } else { + try response.writeAll("data: loading debug information\n\n"); + } + try response.flush(); + + while (true) { + ws.coverage_condition.timedWait(&ws.coverage_mutex, std.time.ns_per_ms * 500) catch {}; + if (getStats(ws)) |stats| { + try response.writer().print("data: {d}\n\n", .{stats.n_runs}); + try response.flush(); + } + } + } + + const Stats = struct { + n_runs: u64, + }; + + fn getStats(ws: *WebServer) ?Stats { + const coverage_maps = ws.coverage_files.values(); + if (coverage_maps.len == 0) return null; + // TODO: make each events URL correspond to one coverage map + const ptr = coverage_maps[0].mapped_memory; + const SeenPcsHeader = extern struct { + n_runs: usize, + deduplicated_runs: usize, + pcs_len: usize, + lowest_stack: usize, + }; + const header: *const SeenPcsHeader = @ptrCast(ptr[0..@sizeOf(SeenPcsHeader)]); + return .{ + .n_runs = @atomicLoad(usize, &header.n_runs, .monotonic), + }; + } + fn serveSourcesTar(ws: *WebServer, request: *std.http.Server.Request) !void { const gpa = ws.gpa; @@ -471,6 +565,95 @@ pub const WebServer = struct { .name = "cache-control", .value = "max-age=0, must-revalidate", }; + + fn coverageRun(ws: *WebServer) void { + ws.mutex.lock(); + defer ws.mutex.unlock(); + + while (true) { + ws.condition.wait(&ws.mutex); + for (ws.msg_queue.items) |msg| switch (msg) { + .coverage => |coverage| prepareTables(ws, coverage.run, coverage.id) catch |err| switch (err) { + error.AlreadyReported => continue, + else => |e| log.err("failed to prepare code coverage tables: {s}", .{@errorName(e)}), + }, + }; + ws.msg_queue.clearRetainingCapacity(); + } + } + + fn prepareTables( + ws: *WebServer, + run_step: *Step.Run, + coverage_id: u64, + ) error{ OutOfMemory, AlreadyReported }!void { + const gpa = ws.gpa; + + ws.coverage_mutex.lock(); + defer ws.coverage_mutex.unlock(); + + const gop = try ws.coverage_files.getOrPut(gpa, coverage_id); + if (gop.found_existing) { + // We are fuzzing the same executable with multiple threads. + // Perhaps the same unit test; perhaps a different one. In any + // case, since the coverage file is the same, we only have to + // notice changes to that one file in order to learn coverage for + // this particular executable. + return; + } + errdefer _ = ws.coverage_files.pop(); + + gop.value_ptr.* = .{ + .coverage = std.debug.Coverage.init, + .mapped_memory = undefined, // populated below + }; + errdefer gop.value_ptr.coverage.deinit(gpa); + + const rebuilt_exe_path: Build.Cache.Path = .{ + .root_dir = Build.Cache.Directory.cwd(), + .sub_path = run_step.rebuilt_executable.?, + }; + var debug_info = std.debug.Info.load(gpa, rebuilt_exe_path, &gop.value_ptr.coverage) catch |err| { + log.err("step '{s}': failed to load debug information for '{}': {s}", .{ + run_step.step.name, rebuilt_exe_path, @errorName(err), + }); + return error.AlreadyReported; + }; + defer debug_info.deinit(gpa); + + const coverage_file_path: Build.Cache.Path = .{ + .root_dir = run_step.step.owner.cache_root, + .sub_path = "v/" ++ std.fmt.hex(coverage_id), + }; + var coverage_file = coverage_file_path.root_dir.handle.openFile(coverage_file_path.sub_path, .{}) catch |err| { + log.err("step '{s}': failed to load coverage file '{}': {s}", .{ + run_step.step.name, coverage_file_path, @errorName(err), + }); + return error.AlreadyReported; + }; + defer coverage_file.close(); + + const file_size = coverage_file.getEndPos() catch |err| { + log.err("unable to check len of coverage file '{}': {s}", .{ coverage_file_path, @errorName(err) }); + return error.AlreadyReported; + }; + + const mapped_memory = std.posix.mmap( + null, + file_size, + std.posix.PROT.READ, + .{ .TYPE = .SHARED }, + coverage_file.handle, + 0, + ) catch |err| { + log.err("failed to map coverage file '{}': {s}", .{ coverage_file_path, @errorName(err) }); + return error.AlreadyReported; + }; + + gop.value_ptr.mapped_memory = mapped_memory; + + ws.coverage_condition.broadcast(); + } }; fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog_node: std.Progress.Node) void { @@ -493,16 +676,16 @@ fn rebuildTestsWorkerRun(run: *Step.Run, ttyconf: std.io.tty.Config, parent_prog build_runner.printErrorMessages(gpa, &compile.step, ttyconf, stderr, false) catch {}; } - if (result) |rebuilt_bin_path| { - run.rebuilt_executable = rebuilt_bin_path; - } else |err| switch (err) { - error.MakeFailed => {}, + const rebuilt_bin_path = result catch |err| switch (err) { + error.MakeFailed => return, else => { - std.debug.print("step '{s}': failed to rebuild in fuzz mode: {s}\n", .{ + log.err("step '{s}': failed to rebuild in fuzz mode: {s}", .{ compile.step.name, @errorName(err), }); + return; }, - } + }; + run.rebuilt_executable = rebuilt_bin_path; } fn fuzzWorkerRun( @@ -524,11 +707,13 @@ fn fuzzWorkerRun( std.debug.lockStdErr(); defer std.debug.unlockStdErr(); build_runner.printErrorMessages(gpa, &run.step, ttyconf, stderr, false) catch {}; + return; }, else => { - std.debug.print("step '{s}': failed to rebuild '{s}' in fuzz mode: {s}\n", .{ + log.err("step '{s}': failed to rerun '{s}' in fuzz mode: {s}", .{ run.step.name, test_name, @errorName(err), }); + return; }, }; } diff --git a/lib/std/Build/Step/Run.zig b/lib/std/Build/Step/Run.zig index e494e969f0..b08ecfee78 100644 --- a/lib/std/Build/Step/Run.zig +++ b/lib/std/Build/Step/Run.zig @@ -1521,7 +1521,11 @@ fn evalZigTest( { web_server.mutex.lock(); defer web_server.mutex.unlock(); - try web_server.msg_queue.append(web_server.gpa, .{ .coverage_id = coverage_id }); + try web_server.msg_queue.append(web_server.gpa, .{ .coverage = .{ + .id = coverage_id, + .run = run, + } }); + web_server.condition.signal(); } }, else => {}, // ignore other messages diff --git a/lib/std/debug.zig b/lib/std/debug.zig index 7f4f6b7df2..a3a8a533ee 100644 --- a/lib/std/debug.zig +++ b/lib/std/debug.zig @@ -19,6 +19,7 @@ pub const Dwarf = @import("debug/Dwarf.zig"); pub const Pdb = @import("debug/Pdb.zig"); pub const SelfInfo = @import("debug/SelfInfo.zig"); pub const Info = @import("debug/Info.zig"); +pub const Coverage = @import("debug/Coverage.zig"); /// Unresolved source locations can be represented with a single `usize` that /// corresponds to a virtual memory address of the program counter. Combined diff --git a/lib/std/debug/Coverage.zig b/lib/std/debug/Coverage.zig new file mode 100644 index 0000000000..d9cc7fdebd --- /dev/null +++ b/lib/std/debug/Coverage.zig @@ -0,0 +1,244 @@ +const std = @import("../std.zig"); +const Allocator = std.mem.Allocator; +const Hash = std.hash.Wyhash; +const Dwarf = std.debug.Dwarf; +const assert = std.debug.assert; + +const Coverage = @This(); + +/// Provides a globally-scoped integer index for directories. +/// +/// As opposed to, for example, a directory index that is compilation-unit +/// scoped inside a single ELF module. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +directories: std.ArrayHashMapUnmanaged(String, void, String.MapContext, false), +/// Provides a globally-scoped integer index for files. +/// +/// String memory references the memory-mapped debug information. +/// +/// Protected by `mutex`. +files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false), +string_bytes: std.ArrayListUnmanaged(u8), +/// Protects the other fields. +mutex: std.Thread.Mutex, + +pub const init: Coverage = .{ + .directories = .{}, + .files = .{}, + .mutex = .{}, + .string_bytes = .{}, +}; + +pub const String = enum(u32) { + _, + + pub const MapContext = struct { + string_bytes: []const u8, + + pub fn eql(self: @This(), a: String, b: String, b_index: usize) bool { + _ = b_index; + const a_slice = span(self.string_bytes[@intFromEnum(a)..]); + const b_slice = span(self.string_bytes[@intFromEnum(b)..]); + return std.mem.eql(u8, a_slice, b_slice); + } + + pub fn hash(self: @This(), a: String) u32 { + return @truncate(Hash.hash(0, span(self.string_bytes[@intFromEnum(a)..]))); + } + }; + + pub const SliceAdapter = struct { + string_bytes: []const u8, + + pub fn eql(self: @This(), a_slice: []const u8, b: String, b_index: usize) bool { + _ = b_index; + const b_slice = span(self.string_bytes[@intFromEnum(b)..]); + return std.mem.eql(u8, a_slice, b_slice); + } + pub fn hash(self: @This(), a: []const u8) u32 { + _ = self; + return @truncate(Hash.hash(0, a)); + } + }; +}; + +pub const SourceLocation = struct { + file: File.Index, + line: u32, + column: u32, + + pub const invalid: SourceLocation = .{ + .file = .invalid, + .line = 0, + .column = 0, + }; +}; + +pub const File = struct { + directory_index: u32, + basename: String, + + pub const Index = enum(u32) { + invalid = std.math.maxInt(u32), + _, + }; + + pub const MapContext = struct { + string_bytes: []const u8, + + pub fn hash(self: MapContext, a: File) u32 { + const a_basename = span(self.string_bytes[@intFromEnum(a.basename)..]); + return @truncate(Hash.hash(a.directory_index, a_basename)); + } + + pub fn eql(self: MapContext, a: File, b: File, b_index: usize) bool { + _ = b_index; + if (a.directory_index != b.directory_index) return false; + const a_basename = span(self.string_bytes[@intFromEnum(a.basename)..]); + const b_basename = span(self.string_bytes[@intFromEnum(b.basename)..]); + return std.mem.eql(u8, a_basename, b_basename); + } + }; + + pub const SliceAdapter = struct { + string_bytes: []const u8, + + pub const Entry = struct { + directory_index: u32, + basename: []const u8, + }; + + pub fn hash(self: @This(), a: Entry) u32 { + _ = self; + return @truncate(Hash.hash(a.directory_index, a.basename)); + } + + pub fn eql(self: @This(), a: Entry, b: File, b_index: usize) bool { + _ = b_index; + if (a.directory_index != b.directory_index) return false; + const b_basename = span(self.string_bytes[@intFromEnum(b.basename)..]); + return std.mem.eql(u8, a.basename, b_basename); + } + }; +}; + +pub fn deinit(cov: *Coverage, gpa: Allocator) void { + cov.directories.deinit(gpa); + cov.files.deinit(gpa); + cov.string_bytes.deinit(gpa); + cov.* = undefined; +} + +pub fn fileAt(cov: *Coverage, index: File.Index) *File { + return &cov.files.keys()[@intFromEnum(index)]; +} + +pub fn stringAt(cov: *Coverage, index: String) [:0]const u8 { + return span(cov.string_bytes.items[@intFromEnum(index)..]); +} + +pub const ResolveAddressesDwarfError = Dwarf.ScanError; + +pub fn resolveAddressesDwarf( + cov: *Coverage, + gpa: Allocator, + sorted_pc_addrs: []const u64, + /// Asserts its length equals length of `sorted_pc_addrs`. + output: []SourceLocation, + d: *Dwarf, +) ResolveAddressesDwarfError!void { + assert(sorted_pc_addrs.len == output.len); + assert(d.compile_units_sorted); + + var cu_i: usize = 0; + var line_table_i: usize = 0; + var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0]; + var range = cu.pc_range.?; + // Protects directories and files tables from other threads. + cov.mutex.lock(); + defer cov.mutex.unlock(); + next_pc: for (sorted_pc_addrs, output) |pc, *out| { + while (pc >= range.end) { + cu_i += 1; + if (cu_i >= d.compile_unit_list.items.len) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + range = cu.pc_range orelse { + out.* = SourceLocation.invalid; + continue :next_pc; + }; + } + if (pc < range.start) { + out.* = SourceLocation.invalid; + continue :next_pc; + } + if (line_table_i == 0) { + line_table_i = 1; + cov.mutex.unlock(); + defer cov.mutex.lock(); + d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { + error.MissingDebugInfo, error.InvalidDebugInfo => { + out.* = SourceLocation.invalid; + cu_i += 1; + if (cu_i < d.compile_unit_list.items.len) { + cu = &d.compile_unit_list.items[cu_i]; + line_table_i = 0; + if (cu.pc_range) |r| range = r; + } + continue :next_pc; + }, + else => |e| return e, + }; + } + const slc = &cu.src_loc_cache.?; + const table_addrs = slc.line_table.keys(); + while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1; + + const entry = slc.line_table.values()[line_table_i - 1]; + const corrected_file_index = entry.file - @intFromBool(slc.version < 5); + const file_entry = slc.files[corrected_file_index]; + const dir_path = slc.directories[file_entry.dir_index].path; + try cov.string_bytes.ensureUnusedCapacity(gpa, dir_path.len + file_entry.path.len + 2); + const dir_gop = try cov.directories.getOrPutContextAdapted(gpa, dir_path, String.SliceAdapter{ + .string_bytes = cov.string_bytes.items, + }, String.MapContext{ + .string_bytes = cov.string_bytes.items, + }); + if (!dir_gop.found_existing) + dir_gop.key_ptr.* = addStringAssumeCapacity(cov, dir_path); + const file_gop = try cov.files.getOrPutContextAdapted(gpa, File.SliceAdapter.Entry{ + .directory_index = @intCast(dir_gop.index), + .basename = file_entry.path, + }, File.SliceAdapter{ + .string_bytes = cov.string_bytes.items, + }, File.MapContext{ + .string_bytes = cov.string_bytes.items, + }); + if (!file_gop.found_existing) file_gop.key_ptr.* = .{ + .directory_index = @intCast(dir_gop.index), + .basename = addStringAssumeCapacity(cov, file_entry.path), + }; + out.* = .{ + .file = @enumFromInt(file_gop.index), + .line = entry.line, + .column = entry.column, + }; + } +} + +pub fn addStringAssumeCapacity(cov: *Coverage, s: []const u8) String { + const result: String = @enumFromInt(cov.string_bytes.items.len); + cov.string_bytes.appendSliceAssumeCapacity(s); + cov.string_bytes.appendAssumeCapacity(0); + return result; +} + +fn span(s: []const u8) [:0]const u8 { + return std.mem.sliceTo(@as([:0]const u8, @ptrCast(s)), 0); +} diff --git a/lib/std/debug/Info.zig b/lib/std/debug/Info.zig index a52de6549b..ee191d2c12 100644 --- a/lib/std/debug/Info.zig +++ b/lib/std/debug/Info.zig @@ -12,85 +12,31 @@ const Path = std.Build.Cache.Path; const Dwarf = std.debug.Dwarf; const page_size = std.mem.page_size; const assert = std.debug.assert; -const Hash = std.hash.Wyhash; +const Coverage = std.debug.Coverage; +const SourceLocation = std.debug.Coverage.SourceLocation; const Info = @This(); /// Sorted by key, ascending. address_map: std.AutoArrayHashMapUnmanaged(u64, Dwarf.ElfModule), - -/// Provides a globally-scoped integer index for directories. -/// -/// As opposed to, for example, a directory index that is compilation-unit -/// scoped inside a single ELF module. -/// -/// String memory references the memory-mapped debug information. -/// -/// Protected by `mutex`. -directories: std.StringArrayHashMapUnmanaged(void), -/// Provides a globally-scoped integer index for files. -/// -/// String memory references the memory-mapped debug information. -/// -/// Protected by `mutex`. -files: std.ArrayHashMapUnmanaged(File, void, File.MapContext, false), -/// Protects `directories` and `files`. -mutex: std.Thread.Mutex, - -pub const SourceLocation = struct { - file: File.Index, - line: u32, - column: u32, - - pub const invalid: SourceLocation = .{ - .file = .invalid, - .line = 0, - .column = 0, - }; -}; - -pub const File = struct { - directory_index: u32, - basename: []const u8, - - pub const Index = enum(u32) { - invalid = std.math.maxInt(u32), - _, - }; - - pub const MapContext = struct { - pub fn hash(ctx: MapContext, a: File) u32 { - _ = ctx; - return @truncate(Hash.hash(a.directory_index, a.basename)); - } - - pub fn eql(ctx: MapContext, a: File, b: File, b_index: usize) bool { - _ = ctx; - _ = b_index; - return a.directory_index == b.directory_index and std.mem.eql(u8, a.basename, b.basename); - } - }; -}; +/// Externally managed, outlives this `Info` instance. +coverage: *Coverage, pub const LoadError = Dwarf.ElfModule.LoadError; -pub fn load(gpa: Allocator, path: Path) LoadError!Info { +pub fn load(gpa: Allocator, path: Path, coverage: *Coverage) LoadError!Info { var sections: Dwarf.SectionArray = Dwarf.null_section_array; var elf_module = try Dwarf.ElfModule.loadPath(gpa, path, null, null, §ions, null); try elf_module.dwarf.sortCompileUnits(); var info: Info = .{ .address_map = .{}, - .directories = .{}, - .files = .{}, - .mutex = .{}, + .coverage = coverage, }; try info.address_map.put(gpa, elf_module.base_address, elf_module); return info; } pub fn deinit(info: *Info, gpa: Allocator) void { - info.directories.deinit(gpa); - info.files.deinit(gpa); for (info.address_map.values()) |*elf_module| { elf_module.dwarf.deinit(gpa); } @@ -98,98 +44,19 @@ pub fn deinit(info: *Info, gpa: Allocator) void { info.* = undefined; } -pub fn fileAt(info: *Info, index: File.Index) *File { - return &info.files.keys()[@intFromEnum(index)]; -} - -pub const ResolveSourceLocationsError = Dwarf.ScanError; +pub const ResolveAddressesError = Coverage.ResolveAddressesDwarfError; /// Given an array of virtual memory addresses, sorted ascending, outputs a /// corresponding array of source locations. -pub fn resolveSourceLocations( +pub fn resolveAddresses( info: *Info, gpa: Allocator, sorted_pc_addrs: []const u64, /// Asserts its length equals length of `sorted_pc_addrs`. output: []SourceLocation, -) ResolveSourceLocationsError!void { +) ResolveAddressesError!void { assert(sorted_pc_addrs.len == output.len); if (info.address_map.entries.len != 1) @panic("TODO"); const elf_module = &info.address_map.values()[0]; - return resolveSourceLocationsDwarf(info, gpa, sorted_pc_addrs, output, &elf_module.dwarf); -} - -pub fn resolveSourceLocationsDwarf( - info: *Info, - gpa: Allocator, - sorted_pc_addrs: []const u64, - /// Asserts its length equals length of `sorted_pc_addrs`. - output: []SourceLocation, - d: *Dwarf, -) ResolveSourceLocationsError!void { - assert(sorted_pc_addrs.len == output.len); - assert(d.compile_units_sorted); - - var cu_i: usize = 0; - var line_table_i: usize = 0; - var cu: *Dwarf.CompileUnit = &d.compile_unit_list.items[0]; - var range = cu.pc_range.?; - // Protects directories and files tables from other threads. - info.mutex.lock(); - defer info.mutex.unlock(); - next_pc: for (sorted_pc_addrs, output) |pc, *out| { - while (pc >= range.end) { - cu_i += 1; - if (cu_i >= d.compile_unit_list.items.len) { - out.* = SourceLocation.invalid; - continue :next_pc; - } - cu = &d.compile_unit_list.items[cu_i]; - line_table_i = 0; - range = cu.pc_range orelse { - out.* = SourceLocation.invalid; - continue :next_pc; - }; - } - if (pc < range.start) { - out.* = SourceLocation.invalid; - continue :next_pc; - } - if (line_table_i == 0) { - line_table_i = 1; - info.mutex.unlock(); - defer info.mutex.lock(); - d.populateSrcLocCache(gpa, cu) catch |err| switch (err) { - error.MissingDebugInfo, error.InvalidDebugInfo => { - out.* = SourceLocation.invalid; - cu_i += 1; - if (cu_i < d.compile_unit_list.items.len) { - cu = &d.compile_unit_list.items[cu_i]; - line_table_i = 0; - if (cu.pc_range) |r| range = r; - } - continue :next_pc; - }, - else => |e| return e, - }; - } - const slc = &cu.src_loc_cache.?; - const table_addrs = slc.line_table.keys(); - while (line_table_i < table_addrs.len and table_addrs[line_table_i] < pc) line_table_i += 1; - - const entry = slc.line_table.values()[line_table_i - 1]; - const corrected_file_index = entry.file - @intFromBool(slc.version < 5); - const file_entry = slc.files[corrected_file_index]; - const dir_path = slc.directories[file_entry.dir_index].path; - const dir_gop = try info.directories.getOrPut(gpa, dir_path); - const file_gop = try info.files.getOrPut(gpa, .{ - .directory_index = @intCast(dir_gop.index), - .basename = file_entry.path, - }); - out.* = .{ - .file = @enumFromInt(file_gop.index), - .line = entry.line, - .column = entry.column, - }; - } + return info.coverage.resolveAddressesDwarf(gpa, sorted_pc_addrs, output, &elf_module.dwarf); } diff --git a/tools/dump-cov.zig b/tools/dump-cov.zig index bd096b9fc0..fb08907cad 100644 --- a/tools/dump-cov.zig +++ b/tools/dump-cov.zig @@ -28,7 +28,10 @@ pub fn main() !void { .sub_path = cov_file_name, }; - var debug_info = std.debug.Info.load(gpa, exe_path) catch |err| { + var coverage = std.debug.Coverage.init; + defer coverage.deinit(gpa); + + var debug_info = std.debug.Info.load(gpa, exe_path, &coverage) catch |err| { fatal("failed to load debug info for {}: {s}", .{ exe_path, @errorName(err) }); }; defer debug_info.deinit(gpa); @@ -50,14 +53,15 @@ pub fn main() !void { } assert(std.sort.isSorted(usize, pcs, {}, std.sort.asc(usize))); - const source_locations = try arena.alloc(std.debug.Info.SourceLocation, pcs.len); - try debug_info.resolveSourceLocations(gpa, pcs, source_locations); + const source_locations = try arena.alloc(std.debug.Coverage.SourceLocation, pcs.len); + try debug_info.resolveAddresses(gpa, pcs, source_locations); for (pcs, source_locations) |pc, sl| { - const file = debug_info.fileAt(sl.file); - const dir_name = debug_info.directories.keys()[file.directory_index]; + const file = debug_info.coverage.fileAt(sl.file); + const dir_name = debug_info.coverage.directories.keys()[file.directory_index]; + const dir_name_slice = debug_info.coverage.stringAt(dir_name); try stdout.print("{x}: {s}/{s}:{d}:{d}\n", .{ - pc, dir_name, file.basename, sl.line, sl.column, + pc, dir_name_slice, debug_info.coverage.stringAt(file.basename), sl.line, sl.column, }); } -- cgit v1.2.3 From 22925636f7afc0f334f1d44257c007a1d2ccd63f Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 4 Aug 2024 15:26:18 -0700 Subject: std.debug.Coverage: use extern structs helps the serialization use case --- lib/std/debug/Coverage.zig | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/std/debug') diff --git a/lib/std/debug/Coverage.zig b/lib/std/debug/Coverage.zig index d9cc7fdebd..f341efaffb 100644 --- a/lib/std/debug/Coverage.zig +++ b/lib/std/debug/Coverage.zig @@ -65,7 +65,7 @@ pub const String = enum(u32) { }; }; -pub const SourceLocation = struct { +pub const SourceLocation = extern struct { file: File.Index, line: u32, column: u32, @@ -77,7 +77,7 @@ pub const SourceLocation = struct { }; }; -pub const File = struct { +pub const File = extern struct { directory_index: u32, basename: String, -- cgit v1.2.3 From 8dae629c4f89155b6945ee952ee2aeb5bfa1d271 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 5 Aug 2024 19:19:10 -0700 Subject: update branch for latest std.sort changes --- lib/std/Build/Fuzz/WebServer.zig | 6 +++++- lib/std/debug/Dwarf.zig | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'lib/std/debug') diff --git a/lib/std/Build/Fuzz/WebServer.zig b/lib/std/Build/Fuzz/WebServer.zig index 3ae37c2109..c0dfddacd5 100644 --- a/lib/std/Build/Fuzz/WebServer.zig +++ b/lib/std/Build/Fuzz/WebServer.zig @@ -649,7 +649,11 @@ fn addEntryPoint(ws: *WebServer, coverage_id: u64, addr: u64) error{ AlreadyRepo const ptr = coverage_map.mapped_memory; const pcs_bytes = ptr[@sizeOf(abi.SeenPcsHeader)..][0 .. coverage_map.source_locations.len * @sizeOf(usize)]; const pcs: []const usize = @alignCast(std.mem.bytesAsSlice(usize, pcs_bytes)); - const index = std.sort.upperBound(usize, addr, pcs, {}, std.sort.asc(usize)); + const index = std.sort.upperBound(usize, pcs, addr, struct { + fn order(context: usize, item: usize) std.math.Order { + return std.math.order(item, context); + } + }.order); if (index >= pcs.len) { log.err("unable to find unit test entry address 0x{x} in source locations (range: 0x{x} to 0x{x})", .{ addr, pcs[0], pcs[pcs.len - 1], diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index cd37795351..caf04aca4d 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -157,7 +157,11 @@ pub const CompileUnit = struct { }; pub fn findSource(slc: *const SrcLocCache, address: u64) !LineEntry { - const index = std.sort.upperBound(u64, address, slc.line_table.keys(), {}, std.sort.asc(u64)); + const index = std.sort.upperBound(u64, slc.line_table.keys(), address, struct { + fn order(context: u64, item: u64) std.math.Order { + return std.math.order(item, context); + } + }.order); if (index == 0) return missing(); return slc.line_table.values()[index - 1]; } -- cgit v1.2.3 From 40edd11516081b455df09ce0d19b3ca686655924 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 5 Aug 2024 22:46:30 -0700 Subject: std.debug: fix compile errors on windows and macos --- lib/std/debug/Dwarf.zig | 2 +- lib/std/debug/SelfInfo.zig | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 12 deletions(-) (limited to 'lib/std/debug') diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig index caf04aca4d..e3d4ab1a8f 100644 --- a/lib/std/debug/Dwarf.zig +++ b/lib/std/debug/Dwarf.zig @@ -2352,7 +2352,7 @@ pub const ElfModule = struct { } }; -fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { +pub fn getSymbol(di: *Dwarf, allocator: Allocator, address: u64) !std.debug.Symbol { if (di.findCompileUnit(address)) |compile_unit| { return .{ .name = di.getSymbolName(address) orelse "???", diff --git a/lib/std/debug/SelfInfo.zig b/lib/std/debug/SelfInfo.zig index ba0d7bc039..2d87243c5d 100644 --- a/lib/std/debug/SelfInfo.zig +++ b/lib/std/debug/SelfInfo.zig @@ -602,10 +602,11 @@ pub const Module = switch (native_os) { sections[@intFromEnum(Dwarf.Section.Id.debug_line)] == null; if (missing_debug_info) return error.MissingDebugInfo; - var di = Dwarf{ + var di: Dwarf = .{ .endian = .little, .sections = sections, .is_macho = true, + .compile_units_sorted = false, }; try Dwarf.open(&di, allocator); @@ -622,7 +623,7 @@ pub const Module = switch (native_os) { return result.value_ptr; } - pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !Dwarf.SymbolInfo { + pub fn getSymbolAtAddress(self: *@This(), allocator: Allocator, address: usize) !std.debug.Symbol { nosuspend { const result = try self.getOFileInfoForAddress(allocator, address); if (result.symbol == null) return .{}; @@ -630,19 +631,19 @@ pub const Module = switch (native_os) { // Take the symbol name from the N_FUN STAB entry, we're going to // use it if we fail to find the DWARF infos const stab_symbol = mem.sliceTo(self.strings[result.symbol.?.strx..], 0); - if (result.o_file_info == null) return .{ .symbol_name = stab_symbol }; + if (result.o_file_info == null) return .{ .name = stab_symbol }; // Translate again the address, this time into an address inside the // .o file const relocated_address_o = result.o_file_info.?.addr_table.get(stab_symbol) orelse return .{ - .symbol_name = "???", + .name = "???", }; const addr_off = result.relocated_address - result.symbol.?.addr; const o_file_di = &result.o_file_info.?.di; if (o_file_di.findCompileUnit(relocated_address_o)) |compile_unit| { return .{ - .symbol_name = o_file_di.getSymbolName(relocated_address_o) orelse "???", + .name = o_file_di.getSymbolName(relocated_address_o) orelse "???", .compile_unit_name = compile_unit.die.getAttrString( o_file_di, std.dwarf.AT.name, @@ -651,9 +652,9 @@ pub const Module = switch (native_os) { ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => "???", }, - .line_info = o_file_di.getLineNumberInfo( + .source_location = o_file_di.getLineNumberInfo( allocator, - compile_unit.*, + compile_unit, relocated_address_o + addr_off, ) catch |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => null, @@ -662,7 +663,7 @@ pub const Module = switch (native_os) { }; } else |err| switch (err) { error.MissingDebugInfo, error.InvalidDebugInfo => { - return .{ .symbol_name = stab_symbol }; + return .{ .name = stab_symbol }; }, else => return err, } @@ -760,9 +761,9 @@ pub const Module = switch (native_os) { ); return .{ - .symbol_name = symbol_name, + .name = symbol_name, .compile_unit_name = obj_basename, - .line_info = opt_line_info, + .source_location = opt_line_info, }; } @@ -991,10 +992,11 @@ fn readCoffDebugInfo(allocator: Allocator, coff_obj: *coff.Coff) !Module { } else null; } - var dwarf = Dwarf{ + var dwarf: Dwarf = .{ .endian = native_endian, .sections = sections, .is_macho = false, + .compile_units_sorted = false, }; try Dwarf.open(&dwarf, allocator); @@ -1808,6 +1810,7 @@ fn unwindFrameMachODwarf( var di: Dwarf = .{ .endian = native_endian, .is_macho = true, + .compile_units_sorted = false, }; defer di.deinit(context.allocator); -- cgit v1.2.3