diff options
Diffstat (limited to 'lib/std/pdb.zig')
| -rw-r--r-- | lib/std/pdb.zig | 607 |
1 files changed, 11 insertions, 596 deletions
diff --git a/lib/std/pdb.zig b/lib/std/pdb.zig index ece1cc63dc..31ad02e945 100644 --- a/lib/std/pdb.zig +++ b/lib/std/pdb.zig @@ -1,3 +1,12 @@ +//! Program Data Base debugging information format. +//! +//! This namespace contains unopinionated types and data definitions only. For +//! an implementation of parsing and caching PDB information, see +//! `std.debug.Pdb`. +//! +//! Most of this is based on information gathered from LLVM source code, +//! documentation and/or contributors. + const std = @import("std.zig"); const io = std.io; const math = std.math; @@ -9,10 +18,7 @@ const debug = std.debug; const ArrayList = std.ArrayList; -// Note: most of this is based on information gathered from LLVM source code, -// documentation and/or contributors. - -// https://llvm.org/docs/PDB/DbiStream.html#stream-header +/// https://llvm.org/docs/PDB/DbiStream.html#stream-header pub const DbiStreamHeader = extern struct { VersionSignature: i32, VersionHeader: u32, @@ -415,10 +421,8 @@ pub const ColumnNumberEntry = extern struct { pub const FileChecksumEntryHeader = extern struct { /// Byte offset of filename in global string table. FileNameOffset: u32, - /// Number of bytes of checksum. ChecksumSize: u8, - /// FileChecksumKind ChecksumKind: u8, }; @@ -451,525 +455,15 @@ pub const DebugSubsectionHeader = extern struct { Length: u32, }; -pub const PDBStringTableHeader = extern struct { +pub const StringTableHeader = extern struct { /// PDBStringTableSignature Signature: u32, - /// 1 or 2 HashVersion: u32, - /// Number of bytes of names buffer. ByteSize: u32, }; -fn readSparseBitVector(stream: anytype, allocator: mem.Allocator) ![]u32 { - const num_words = try stream.readInt(u32, .little); - var list = ArrayList(u32).init(allocator); - errdefer list.deinit(); - var word_i: u32 = 0; - while (word_i != num_words) : (word_i += 1) { - const word = try stream.readInt(u32, .little); - var bit_i: u5 = 0; - while (true) : (bit_i += 1) { - if (word & (@as(u32, 1) << bit_i) != 0) { - try list.append(word_i * 32 + bit_i); - } - if (bit_i == std.math.maxInt(u5)) break; - } - } - return try list.toOwnedSlice(); -} - -pub const Pdb = struct { - in_file: File, - msf: Msf, - allocator: mem.Allocator, - string_table: ?*MsfStream, - dbi: ?*MsfStream, - modules: []Module, - sect_contribs: []SectionContribEntry, - guid: [16]u8, - age: u32, - - pub const Module = struct { - mod_info: ModInfo, - module_name: []u8, - obj_file_name: []u8, - // The fields below are filled on demand. - populated: bool, - symbols: []u8, - subsect_info: []u8, - checksum_offset: ?usize, - - pub fn deinit(self: *Module, allocator: mem.Allocator) void { - allocator.free(self.module_name); - allocator.free(self.obj_file_name); - if (self.populated) { - allocator.free(self.symbols); - allocator.free(self.subsect_info); - } - } - }; - - pub fn init(allocator: mem.Allocator, path: []const u8) !Pdb { - const file = try fs.cwd().openFile(path, .{}); - errdefer file.close(); - - return Pdb{ - .in_file = file, - .allocator = allocator, - .string_table = null, - .dbi = null, - .msf = try Msf.init(allocator, file), - .modules = &[_]Module{}, - .sect_contribs = &[_]SectionContribEntry{}, - .guid = undefined, - .age = undefined, - }; - } - - pub fn deinit(self: *Pdb) void { - self.in_file.close(); - self.msf.deinit(self.allocator); - for (self.modules) |*module| { - module.deinit(self.allocator); - } - self.allocator.free(self.modules); - self.allocator.free(self.sect_contribs); - } - - pub fn parseDbiStream(self: *Pdb) !void { - var stream = self.getStream(StreamType.Dbi) orelse - return error.InvalidDebugInfo; - const reader = stream.reader(); - - const header = try reader.readStruct(DbiStreamHeader); - if (header.VersionHeader != 19990903) // V70, only value observed by LLVM team - return error.UnknownPDBVersion; - // if (header.Age != age) - // return error.UnmatchingPDB; - - const mod_info_size = header.ModInfoSize; - const section_contrib_size = header.SectionContributionSize; - - var modules = ArrayList(Module).init(self.allocator); - errdefer modules.deinit(); - - // Module Info Substream - var mod_info_offset: usize = 0; - while (mod_info_offset != mod_info_size) { - const mod_info = try reader.readStruct(ModInfo); - var this_record_len: usize = @sizeOf(ModInfo); - - const module_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); - errdefer self.allocator.free(module_name); - this_record_len += module_name.len + 1; - - const obj_file_name = try reader.readUntilDelimiterAlloc(self.allocator, 0, 1024); - errdefer self.allocator.free(obj_file_name); - this_record_len += obj_file_name.len + 1; - - if (this_record_len % 4 != 0) { - const round_to_next_4 = (this_record_len | 0x3) + 1; - const march_forward_bytes = round_to_next_4 - this_record_len; - try stream.seekBy(@as(isize, @intCast(march_forward_bytes))); - this_record_len += march_forward_bytes; - } - - try modules.append(Module{ - .mod_info = mod_info, - .module_name = module_name, - .obj_file_name = obj_file_name, - - .populated = false, - .symbols = undefined, - .subsect_info = undefined, - .checksum_offset = null, - }); - - mod_info_offset += this_record_len; - if (mod_info_offset > mod_info_size) - return error.InvalidDebugInfo; - } - - // Section Contribution Substream - var sect_contribs = ArrayList(SectionContribEntry).init(self.allocator); - errdefer sect_contribs.deinit(); - - var sect_cont_offset: usize = 0; - if (section_contrib_size != 0) { - const version = reader.readEnum(SectionContrSubstreamVersion, .little) catch |err| switch (err) { - error.InvalidValue => return error.InvalidDebugInfo, - else => |e| return e, - }; - _ = version; - sect_cont_offset += @sizeOf(u32); - } - while (sect_cont_offset != section_contrib_size) { - const entry = try sect_contribs.addOne(); - entry.* = try reader.readStruct(SectionContribEntry); - sect_cont_offset += @sizeOf(SectionContribEntry); - - if (sect_cont_offset > section_contrib_size) - return error.InvalidDebugInfo; - } - - self.modules = try modules.toOwnedSlice(); - self.sect_contribs = try sect_contribs.toOwnedSlice(); - } - - pub fn parseInfoStream(self: *Pdb) !void { - var stream = self.getStream(StreamType.Pdb) orelse - return error.InvalidDebugInfo; - const reader = stream.reader(); - - // Parse the InfoStreamHeader. - const version = try reader.readInt(u32, .little); - const signature = try reader.readInt(u32, .little); - _ = signature; - const age = try reader.readInt(u32, .little); - const guid = try reader.readBytesNoEof(16); - - if (version != 20000404) // VC70, only value observed by LLVM team - return error.UnknownPDBVersion; - - self.guid = guid; - self.age = age; - - // Find the string table. - const string_table_index = str_tab_index: { - const name_bytes_len = try reader.readInt(u32, .little); - const name_bytes = try self.allocator.alloc(u8, name_bytes_len); - defer self.allocator.free(name_bytes); - try reader.readNoEof(name_bytes); - - const HashTableHeader = extern struct { - Size: u32, - Capacity: u32, - - fn maxLoad(cap: u32) u32 { - return cap * 2 / 3 + 1; - } - }; - const hash_tbl_hdr = try reader.readStruct(HashTableHeader); - if (hash_tbl_hdr.Capacity == 0) - return error.InvalidDebugInfo; - - if (hash_tbl_hdr.Size > HashTableHeader.maxLoad(hash_tbl_hdr.Capacity)) - return error.InvalidDebugInfo; - - const present = try readSparseBitVector(&reader, self.allocator); - defer self.allocator.free(present); - if (present.len != hash_tbl_hdr.Size) - return error.InvalidDebugInfo; - const deleted = try readSparseBitVector(&reader, self.allocator); - defer self.allocator.free(deleted); - - for (present) |_| { - const name_offset = try reader.readInt(u32, .little); - const name_index = try reader.readInt(u32, .little); - if (name_offset > name_bytes.len) - return error.InvalidDebugInfo; - const name = mem.sliceTo(name_bytes[name_offset..], 0); - if (mem.eql(u8, name, "/names")) { - break :str_tab_index name_index; - } - } - return error.MissingDebugInfo; - }; - - self.string_table = self.getStreamById(string_table_index) orelse - return error.MissingDebugInfo; - } - - pub fn getSymbolName(self: *Pdb, module: *Module, address: u64) ?[]const u8 { - _ = self; - std.debug.assert(module.populated); - - var symbol_i: usize = 0; - while (symbol_i != module.symbols.len) { - const prefix = @as(*align(1) RecordPrefix, @ptrCast(&module.symbols[symbol_i])); - if (prefix.RecordLen < 2) - return null; - switch (prefix.RecordKind) { - .S_LPROC32, .S_GPROC32 => { - const proc_sym = @as(*align(1) ProcSym, @ptrCast(&module.symbols[symbol_i + @sizeOf(RecordPrefix)])); - if (address >= proc_sym.CodeOffset and address < proc_sym.CodeOffset + proc_sym.CodeSize) { - return mem.sliceTo(@as([*:0]u8, @ptrCast(&proc_sym.Name[0])), 0); - } - }, - else => {}, - } - symbol_i += prefix.RecordLen + @sizeOf(u16); - } - - return null; - } - - pub fn getLineNumberInfo(self: *Pdb, module: *Module, address: u64) !debug.LineInfo { - std.debug.assert(module.populated); - const subsect_info = module.subsect_info; - - var sect_offset: usize = 0; - var skip_len: usize = undefined; - const checksum_offset = module.checksum_offset orelse return error.MissingDebugInfo; - while (sect_offset != subsect_info.len) : (sect_offset += skip_len) { - const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&subsect_info[sect_offset])); - skip_len = subsect_hdr.Length; - sect_offset += @sizeOf(DebugSubsectionHeader); - - switch (subsect_hdr.Kind) { - .Lines => { - var line_index = sect_offset; - - const line_hdr = @as(*align(1) LineFragmentHeader, @ptrCast(&subsect_info[line_index])); - if (line_hdr.RelocSegment == 0) - return error.MissingDebugInfo; - line_index += @sizeOf(LineFragmentHeader); - const frag_vaddr_start = line_hdr.RelocOffset; - const frag_vaddr_end = frag_vaddr_start + line_hdr.CodeSize; - - if (address >= frag_vaddr_start and address < frag_vaddr_end) { - // There is an unknown number of LineBlockFragmentHeaders (and their accompanying line and column records) - // from now on. We will iterate through them, and eventually find a LineInfo that we're interested in, - // breaking out to :subsections. If not, we will make sure to not read anything outside of this subsection. - const subsection_end_index = sect_offset + subsect_hdr.Length; - - while (line_index < subsection_end_index) { - const block_hdr = @as(*align(1) LineBlockFragmentHeader, @ptrCast(&subsect_info[line_index])); - line_index += @sizeOf(LineBlockFragmentHeader); - const start_line_index = line_index; - - const has_column = line_hdr.Flags.LF_HaveColumns; - - // All line entries are stored inside their line block by ascending start address. - // Heuristic: we want to find the last line entry - // that has a vaddr_start <= address. - // This is done with a simple linear search. - var line_i: u32 = 0; - while (line_i < block_hdr.NumLines) : (line_i += 1) { - const line_num_entry = @as(*align(1) LineNumberEntry, @ptrCast(&subsect_info[line_index])); - line_index += @sizeOf(LineNumberEntry); - - const vaddr_start = frag_vaddr_start + line_num_entry.Offset; - if (address < vaddr_start) { - break; - } - } - - // line_i == 0 would mean that no matching LineNumberEntry was found. - if (line_i > 0) { - const subsect_index = checksum_offset + block_hdr.NameIndex; - const chksum_hdr = @as(*align(1) FileChecksumEntryHeader, @ptrCast(&module.subsect_info[subsect_index])); - const strtab_offset = @sizeOf(PDBStringTableHeader) + chksum_hdr.FileNameOffset; - try self.string_table.?.seekTo(strtab_offset); - const source_file_name = try self.string_table.?.reader().readUntilDelimiterAlloc(self.allocator, 0, 1024); - - const line_entry_idx = line_i - 1; - - const column = if (has_column) blk: { - const start_col_index = start_line_index + @sizeOf(LineNumberEntry) * block_hdr.NumLines; - const col_index = start_col_index + @sizeOf(ColumnNumberEntry) * line_entry_idx; - const col_num_entry = @as(*align(1) ColumnNumberEntry, @ptrCast(&subsect_info[col_index])); - break :blk col_num_entry.StartColumn; - } else 0; - - const found_line_index = start_line_index + line_entry_idx * @sizeOf(LineNumberEntry); - const line_num_entry: *align(1) LineNumberEntry = @ptrCast(&subsect_info[found_line_index]); - const flags: *align(1) LineNumberEntry.Flags = @ptrCast(&line_num_entry.Flags); - - return debug.LineInfo{ - .file_name = source_file_name, - .line = flags.Start, - .column = column, - }; - } - } - - // Checking that we are not reading garbage after the (possibly) multiple block fragments. - if (line_index != subsection_end_index) { - return error.InvalidDebugInfo; - } - } - }, - else => {}, - } - - if (sect_offset > subsect_info.len) - return error.InvalidDebugInfo; - } - - return error.MissingDebugInfo; - } - - pub fn getModule(self: *Pdb, index: usize) !?*Module { - if (index >= self.modules.len) - return null; - - const mod = &self.modules[index]; - if (mod.populated) - return mod; - - // At most one can be non-zero. - if (mod.mod_info.C11ByteSize != 0 and mod.mod_info.C13ByteSize != 0) - return error.InvalidDebugInfo; - if (mod.mod_info.C13ByteSize == 0) - return error.InvalidDebugInfo; - - const stream = self.getStreamById(mod.mod_info.ModuleSymStream) orelse - return error.MissingDebugInfo; - const reader = stream.reader(); - - const signature = try reader.readInt(u32, .little); - if (signature != 4) - return error.InvalidDebugInfo; - - mod.symbols = try self.allocator.alloc(u8, mod.mod_info.SymByteSize - 4); - errdefer self.allocator.free(mod.symbols); - try reader.readNoEof(mod.symbols); - - mod.subsect_info = try self.allocator.alloc(u8, mod.mod_info.C13ByteSize); - errdefer self.allocator.free(mod.subsect_info); - try reader.readNoEof(mod.subsect_info); - - var sect_offset: usize = 0; - var skip_len: usize = undefined; - while (sect_offset != mod.subsect_info.len) : (sect_offset += skip_len) { - const subsect_hdr = @as(*align(1) DebugSubsectionHeader, @ptrCast(&mod.subsect_info[sect_offset])); - skip_len = subsect_hdr.Length; - sect_offset += @sizeOf(DebugSubsectionHeader); - - switch (subsect_hdr.Kind) { - .FileChecksums => { - mod.checksum_offset = sect_offset; - break; - }, - else => {}, - } - - if (sect_offset > mod.subsect_info.len) - return error.InvalidDebugInfo; - } - - mod.populated = true; - return mod; - } - - pub fn getStreamById(self: *Pdb, id: u32) ?*MsfStream { - if (id >= self.msf.streams.len) - return null; - return &self.msf.streams[id]; - } - - pub fn getStream(self: *Pdb, stream: StreamType) ?*MsfStream { - const id = @intFromEnum(stream); - return self.getStreamById(id); - } -}; - -// see https://llvm.org/docs/PDB/MsfFile.html -const Msf = struct { - directory: MsfStream, - streams: []MsfStream, - - fn init(allocator: mem.Allocator, file: File) !Msf { - const in = file.reader(); - - const superblock = try in.readStruct(SuperBlock); - - // Sanity checks - if (!mem.eql(u8, &superblock.FileMagic, SuperBlock.file_magic)) - return error.InvalidDebugInfo; - if (superblock.FreeBlockMapBlock != 1 and superblock.FreeBlockMapBlock != 2) - return error.InvalidDebugInfo; - const file_len = try file.getEndPos(); - if (superblock.NumBlocks * superblock.BlockSize != file_len) - return error.InvalidDebugInfo; - switch (superblock.BlockSize) { - // llvm only supports 4096 but we can handle any of these values - 512, 1024, 2048, 4096 => {}, - else => return error.InvalidDebugInfo, - } - - const dir_block_count = blockCountFromSize(superblock.NumDirectoryBytes, superblock.BlockSize); - if (dir_block_count > superblock.BlockSize / @sizeOf(u32)) - return error.UnhandledBigDirectoryStream; // cf. BlockMapAddr comment. - - try file.seekTo(superblock.BlockSize * superblock.BlockMapAddr); - const dir_blocks = try allocator.alloc(u32, dir_block_count); - for (dir_blocks) |*b| { - b.* = try in.readInt(u32, .little); - } - var directory = MsfStream.init( - superblock.BlockSize, - file, - dir_blocks, - ); - - const begin = directory.pos; - const stream_count = try directory.reader().readInt(u32, .little); - const stream_sizes = try allocator.alloc(u32, stream_count); - defer allocator.free(stream_sizes); - - // Microsoft's implementation uses @as(u32, -1) for inexistent streams. - // These streams are not used, but still participate in the file - // and must be taken into account when resolving stream indices. - const Nil = 0xFFFFFFFF; - for (stream_sizes) |*s| { - const size = try directory.reader().readInt(u32, .little); - s.* = if (size == Nil) 0 else blockCountFromSize(size, superblock.BlockSize); - } - - const streams = try allocator.alloc(MsfStream, stream_count); - for (streams, 0..) |*stream, i| { - const size = stream_sizes[i]; - if (size == 0) { - stream.* = MsfStream{ - .blocks = &[_]u32{}, - }; - } else { - var blocks = try allocator.alloc(u32, size); - var j: u32 = 0; - while (j < size) : (j += 1) { - const block_id = try directory.reader().readInt(u32, .little); - const n = (block_id % superblock.BlockSize); - // 0 is for SuperBlock, 1 and 2 for FPMs. - if (block_id == 0 or n == 1 or n == 2 or block_id * superblock.BlockSize > file_len) - return error.InvalidBlockIndex; - blocks[j] = block_id; - } - - stream.* = MsfStream.init( - superblock.BlockSize, - file, - blocks, - ); - } - } - - const end = directory.pos; - if (end - begin != superblock.NumDirectoryBytes) - return error.InvalidStreamDirectory; - - return Msf{ - .directory = directory, - .streams = streams, - }; - } - - fn deinit(self: *Msf, allocator: mem.Allocator) void { - allocator.free(self.directory.blocks); - for (self.streams) |*stream| { - allocator.free(stream.blocks); - } - allocator.free(self.streams); - } -}; - -fn blockCountFromSize(size: u32, block_size: u32) u32 { - return (size + block_size - 1) / block_size; -} - // https://llvm.org/docs/PDB/MsfFile.html#the-superblock pub const SuperBlock = extern struct { /// The LLVM docs list a space between C / C++ but empirically this is not the case. @@ -1016,82 +510,3 @@ pub const SuperBlock = extern struct { // implement it so we're kind of safe making this assumption for now. BlockMapAddr: u32, }; - -const MsfStream = struct { - in_file: File = undefined, - pos: u64 = undefined, - blocks: []u32 = undefined, - block_size: u32 = undefined, - - pub const Error = @typeInfo(@typeInfo(@TypeOf(read)).Fn.return_type.?).ErrorUnion.error_set; - - fn init(block_size: u32, file: File, blocks: []u32) MsfStream { - const stream = MsfStream{ - .in_file = file, - .pos = 0, - .blocks = blocks, - .block_size = block_size, - }; - - return stream; - } - - fn read(self: *MsfStream, buffer: []u8) !usize { - var block_id = @as(usize, @intCast(self.pos / self.block_size)); - if (block_id >= self.blocks.len) return 0; // End of Stream - var block = self.blocks[block_id]; - var offset = self.pos % self.block_size; - - try self.in_file.seekTo(block * self.block_size + offset); - const in = self.in_file.reader(); - - var size: usize = 0; - var rem_buffer = buffer; - while (size < buffer.len) { - const size_to_read = @min(self.block_size - offset, rem_buffer.len); - size += try in.read(rem_buffer[0..size_to_read]); - rem_buffer = buffer[size..]; - offset += size_to_read; - - // If we're at the end of a block, go to the next one. - if (offset == self.block_size) { - offset = 0; - block_id += 1; - if (block_id >= self.blocks.len) break; // End of Stream - block = self.blocks[block_id]; - try self.in_file.seekTo(block * self.block_size); - } - } - - self.pos += buffer.len; - return buffer.len; - } - - pub fn seekBy(self: *MsfStream, len: i64) !void { - self.pos = @as(u64, @intCast(@as(i64, @intCast(self.pos)) + len)); - if (self.pos >= self.blocks.len * self.block_size) - return error.EOF; - } - - pub fn seekTo(self: *MsfStream, len: u64) !void { - self.pos = len; - if (self.pos >= self.blocks.len * self.block_size) - return error.EOF; - } - - fn getSize(self: *const MsfStream) u64 { - return self.blocks.len * self.block_size; - } - - fn getFilePos(self: MsfStream) u64 { - const block_id = self.pos / self.block_size; - const block = self.blocks[block_id]; - const offset = self.pos % self.block_size; - - return block * self.block_size + offset; - } - - pub fn reader(self: *MsfStream) std.io.Reader(*MsfStream, Error, read) { - return .{ .context = self }; - } -}; |
