diff options
Diffstat (limited to 'lib/std/debug')
| -rw-r--r-- | lib/std/debug/Dwarf.zig | 2709 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/abi.zig | 410 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/call_frame.zig | 687 | ||||
| -rw-r--r-- | lib/std/debug/Dwarf/expression.zig | 1638 |
4 files changed, 5444 insertions, 0 deletions
diff --git a/lib/std/debug/Dwarf.zig b/lib/std/debug/Dwarf.zig new file mode 100644 index 0000000000..f17fd737a1 --- /dev/null +++ b/lib/std/debug/Dwarf.zig @@ -0,0 +1,2709 @@ +//! Implements parsing, decoding, and caching of DWARF information. +//! +//! For unopinionated types and bits, see `std.dwarf`. + +const builtin = @import("builtin"); +const std = @import("../std.zig"); +const AT = DW.AT; +const Allocator = std.mem.Allocator; +const DW = std.dwarf; +const EH = DW.EH; +const FORM = DW.FORM; +const Format = DW.Format; +const RLE = DW.RLE; +const StackIterator = std.debug.StackIterator; +const UT = DW.UT; +const assert = std.debug.assert; +const cast = std.math.cast; +const maxInt = std.math.maxInt; +const native_endian = builtin.cpu.arch.endian(); +const readInt = std.mem.readInt; + +const Dwarf = @This(); + +pub const expression = @import("Dwarf/expression.zig"); +pub const abi = @import("Dwarf/abi.zig"); +pub const call_frame = @import("Dwarf/call_frame.zig"); + +endian: std.builtin.Endian, +sections: SectionArray = null_section_array, +is_macho: bool, + +// Filled later by the initializer +abbrev_table_list: std.ArrayListUnmanaged(Abbrev.Table) = .{}, +compile_unit_list: std.ArrayListUnmanaged(CompileUnit) = .{}, +func_list: std.ArrayListUnmanaged(Func) = .{}, + +eh_frame_hdr: ?ExceptionFrameHeader = null, +// These lookup tables are only used if `eh_frame_hdr` is null +cie_map: std.AutoArrayHashMapUnmanaged(u64, CommonInformationEntry) = .{}, +// Sorted by start_pc +fde_list: std.ArrayListUnmanaged(FrameDescriptionEntry) = .{}, + +pub const Section = struct { + data: []const u8, + // Module-relative virtual address. + // Only set if the section data was loaded from disk. + virtual_address: ?usize = null, + // If `data` is owned by this Dwarf. + owned: bool, + + pub const Id = enum { + debug_info, + debug_abbrev, + debug_str, + debug_str_offsets, + debug_line, + debug_line_str, + debug_ranges, + debug_loclists, + debug_rnglists, + debug_addr, + debug_names, + debug_frame, + eh_frame, + eh_frame_hdr, + }; + + // For sections that are not memory mapped by the loader, this is an offset + // from `data.ptr` to where the section would have been mapped. Otherwise, + // `data` is directly backed by the section and the offset is zero. + pub fn virtualOffset(self: Section, base_address: usize) i64 { + return if (self.virtual_address) |va| + @as(i64, @intCast(base_address + va)) - + @as(i64, @intCast(@intFromPtr(self.data.ptr))) + else + 0; + } +}; + +pub const Abbrev = struct { + code: u64, + tag_id: u64, + has_children: bool, + attrs: []Attr, + + fn deinit(abbrev: *Abbrev, allocator: Allocator) void { + allocator.free(abbrev.attrs); + abbrev.* = undefined; + } + + const Attr = struct { + id: u64, + form_id: u64, + /// Only valid if form_id is .implicit_const + payload: i64, + }; + + const Table = struct { + // offset from .debug_abbrev + offset: u64, + abbrevs: []Abbrev, + + fn deinit(table: *Table, allocator: Allocator) void { + for (table.abbrevs) |*abbrev| { + abbrev.deinit(allocator); + } + allocator.free(table.abbrevs); + table.* = undefined; + } + + fn get(table: *const Table, abbrev_code: u64) ?*const Abbrev { + return for (table.abbrevs) |*abbrev| { + if (abbrev.code == abbrev_code) break abbrev; + } else null; + } + }; +}; + +pub const CompileUnit = struct { + version: u16, + format: Format, + die: Die, + pc_range: ?PcRange, + + str_offsets_base: usize, + addr_base: usize, + rnglists_base: usize, + loclists_base: usize, + frame_base: ?*const FormValue, +}; + +pub const FormValue = union(enum) { + addr: u64, + addrx: usize, + block: []const u8, + udata: u64, + data16: *const [16]u8, + sdata: i64, + exprloc: []const u8, + flag: bool, + sec_offset: u64, + ref: u64, + ref_addr: u64, + string: [:0]const u8, + strp: u64, + strx: usize, + line_strp: u64, + loclistx: u64, + rnglistx: u64, + + fn getString(fv: FormValue, di: Dwarf) ![:0]const u8 { + switch (fv) { + .string => |s| return s, + .strp => |off| return di.getString(off), + .line_strp => |off| return di.getLineString(off), + else => return badDwarf(), + } + } + + fn getUInt(fv: FormValue, comptime U: type) !U { + return switch (fv) { + inline .udata, + .sdata, + .sec_offset, + => |c| cast(U, c) orelse badDwarf(), + else => badDwarf(), + }; + } +}; + +pub const Die = struct { + tag_id: u64, + has_children: bool, + attrs: []Attr, + + const Attr = struct { + id: u64, + value: FormValue, + }; + + fn deinit(self: *Die, allocator: Allocator) void { + allocator.free(self.attrs); + self.* = undefined; + } + + fn getAttr(self: *const Die, id: u64) ?*const FormValue { + for (self.attrs) |*attr| { + if (attr.id == id) return &attr.value; + } + return null; + } + + fn getAttrAddr( + self: *const Die, + di: *const Dwarf, + id: u64, + compile_unit: CompileUnit, + ) error{ InvalidDebugInfo, MissingDebugInfo }!u64 { + const form_value = self.getAttr(id) orelse return error.MissingDebugInfo; + return switch (form_value.*) { + .addr => |value| value, + .addrx => |index| di.readDebugAddr(compile_unit, index), + else => error.InvalidDebugInfo, + }; + } + + fn getAttrSecOffset(self: *const Die, id: u64) !u64 { + const form_value = self.getAttr(id) orelse return error.MissingDebugInfo; + return form_value.getUInt(u64); + } + + fn getAttrUnsignedLe(self: *const Die, id: u64) !u64 { + const form_value = self.getAttr(id) orelse return error.MissingDebugInfo; + return switch (form_value.*) { + .Const => |value| value.asUnsignedLe(), + else => error.InvalidDebugInfo, + }; + } + + fn getAttrRef(self: *const Die, id: u64) !u64 { + const form_value = self.getAttr(id) orelse return error.MissingDebugInfo; + return switch (form_value.*) { + .ref => |value| value, + else => error.InvalidDebugInfo, + }; + } + + pub fn getAttrString( + self: *const Die, + di: *Dwarf, + id: u64, + opt_str: ?[]const u8, + compile_unit: CompileUnit, + ) error{ InvalidDebugInfo, MissingDebugInfo }![]const u8 { + const form_value = self.getAttr(id) orelse return error.MissingDebugInfo; + switch (form_value.*) { + .string => |value| return value, + .strp => |offset| return di.getString(offset), + .strx => |index| { + const debug_str_offsets = di.section(.debug_str_offsets) orelse return badDwarf(); + if (compile_unit.str_offsets_base == 0) return badDwarf(); + switch (compile_unit.format) { + .@"32" => { + const byte_offset = compile_unit.str_offsets_base + 4 * index; + if (byte_offset + 4 > debug_str_offsets.len) return badDwarf(); + const offset = readInt(u32, debug_str_offsets[byte_offset..][0..4], di.endian); + return getStringGeneric(opt_str, offset); + }, + .@"64" => { + const byte_offset = compile_unit.str_offsets_base + 8 * index; + if (byte_offset + 8 > debug_str_offsets.len) return badDwarf(); + const offset = readInt(u64, debug_str_offsets[byte_offset..][0..8], di.endian); + return getStringGeneric(opt_str, offset); + }, + } + }, + .line_strp => |offset| return di.getLineString(offset), + else => return badDwarf(), + } + } +}; + +/// This represents the decoded .eh_frame_hdr header +pub const ExceptionFrameHeader = struct { + eh_frame_ptr: usize, + table_enc: u8, + fde_count: usize, + entries: []const u8, + + pub fn entrySize(table_enc: u8) !u8 { + return switch (table_enc & EH.PE.type_mask) { + EH.PE.udata2, + EH.PE.sdata2, + => 4, + EH.PE.udata4, + EH.PE.sdata4, + => 8, + EH.PE.udata8, + EH.PE.sdata8, + => 16, + // This is a binary search table, so all entries must be the same length + else => return badDwarf(), + }; + } + + fn isValidPtr( + self: ExceptionFrameHeader, + comptime T: type, + ptr: usize, + ma: *StackIterator.MemoryAccessor, + eh_frame_len: ?usize, + ) bool { + if (eh_frame_len) |len| { + return ptr >= self.eh_frame_ptr and ptr <= self.eh_frame_ptr + len - @sizeOf(T); + } else { + return ma.load(T, ptr) != null; + } + } + + /// Find an entry by binary searching the eh_frame_hdr section. + /// + /// Since the length of the eh_frame section (`eh_frame_len`) may not be known by the caller, + /// MemoryAccessor will be used to verify readability of the header entries. + /// If `eh_frame_len` is provided, then these checks can be skipped. + pub fn findEntry( + self: ExceptionFrameHeader, + ma: *StackIterator.MemoryAccessor, + eh_frame_len: ?usize, + eh_frame_hdr_ptr: usize, + pc: usize, + cie: *CommonInformationEntry, + fde: *FrameDescriptionEntry, + ) !void { + const entry_size = try entrySize(self.table_enc); + + var left: usize = 0; + var len: usize = self.fde_count; + + var fbr: FixedBufferReader = .{ .buf = self.entries, .endian = native_endian }; + + while (len > 1) { + const mid = left + len / 2; + + fbr.pos = mid * entry_size; + const pc_begin = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ + .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]), + .follow_indirect = true, + .data_rel_base = eh_frame_hdr_ptr, + }) orelse return badDwarf(); + + if (pc < pc_begin) { + len /= 2; + } else { + left = mid; + if (pc == pc_begin) break; + len -= len / 2; + } + } + + if (len == 0) return badDwarf(); + fbr.pos = left * entry_size; + + // Read past the pc_begin field of the entry + _ = try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ + .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]), + .follow_indirect = true, + .data_rel_base = eh_frame_hdr_ptr, + }) orelse return badDwarf(); + + const fde_ptr = cast(usize, try readEhPointer(&fbr, self.table_enc, @sizeOf(usize), .{ + .pc_rel_base = @intFromPtr(&self.entries[fbr.pos]), + .follow_indirect = true, + .data_rel_base = eh_frame_hdr_ptr, + }) orelse return badDwarf()) orelse return badDwarf(); + + if (fde_ptr < self.eh_frame_ptr) return badDwarf(); + + // Even if eh_frame_len is not specified, all ranges accssed are checked via MemoryAccessor + const eh_frame = @as([*]const u8, @ptrFromInt(self.eh_frame_ptr))[0 .. eh_frame_len orelse maxInt(u32)]; + + const fde_offset = fde_ptr - self.eh_frame_ptr; + var eh_frame_fbr: FixedBufferReader = .{ + .buf = eh_frame, + .pos = fde_offset, + .endian = native_endian, + }; + + const fde_entry_header = try EntryHeader.read(&eh_frame_fbr, if (eh_frame_len == null) ma else null, .eh_frame); + if (!self.isValidPtr(u8, @intFromPtr(&fde_entry_header.entry_bytes[fde_entry_header.entry_bytes.len - 1]), ma, eh_frame_len)) return badDwarf(); + if (fde_entry_header.type != .fde) return badDwarf(); + + // CIEs always come before FDEs (the offset is a subtraction), so we can assume this memory is readable + const cie_offset = fde_entry_header.type.fde; + try eh_frame_fbr.seekTo(cie_offset); + const cie_entry_header = try EntryHeader.read(&eh_frame_fbr, if (eh_frame_len == null) ma else null, .eh_frame); + if (!self.isValidPtr(u8, @intFromPtr(&cie_entry_header.entry_bytes[cie_entry_header.entry_bytes.len - 1]), ma, eh_frame_len)) return badDwarf(); + if (cie_entry_header.type != .cie) return badDwarf(); + + cie.* = try CommonInformationEntry.parse( + cie_entry_header.entry_bytes, + 0, + true, + cie_entry_header.format, + .eh_frame, + cie_entry_header.length_offset, + @sizeOf(usize), + native_endian, + ); + + fde.* = try FrameDescriptionEntry.parse( + fde_entry_header.entry_bytes, + 0, + true, + cie.*, + @sizeOf(usize), + native_endian, + ); + } +}; + +pub const EntryHeader = struct { + /// Offset of the length field in the backing buffer + length_offset: usize, + format: Format, + type: union(enum) { + cie, + /// Value is the offset of the corresponding CIE + fde: u64, + terminator, + }, + /// The entry's contents, not including the ID field + entry_bytes: []const u8, + + /// The length of the entry including the ID field, but not the length field itself + pub fn entryLength(self: EntryHeader) usize { + return self.entry_bytes.len + @as(u8, if (self.format == .@"64") 8 else 4); + } + + /// Reads a header for either an FDE or a CIE, then advances the fbr to the position after the trailing structure. + /// `fbr` must be a FixedBufferReader backed by either the .eh_frame or .debug_frame sections. + pub fn read( + fbr: *FixedBufferReader, + opt_ma: ?*StackIterator.MemoryAccessor, + dwarf_section: Section.Id, + ) !EntryHeader { + assert(dwarf_section == .eh_frame or dwarf_section == .debug_frame); + + const length_offset = fbr.pos; + const unit_header = try readUnitHeader(fbr, opt_ma); + const unit_length = cast(usize, unit_header.unit_length) orelse return badDwarf(); + if (unit_length == 0) return .{ + .length_offset = length_offset, + .format = unit_header.format, + .type = .terminator, + .entry_bytes = &.{}, + }; + const start_offset = fbr.pos; + const end_offset = start_offset + unit_length; + defer fbr.pos = end_offset; + + const id = try if (opt_ma) |ma| + fbr.readAddressChecked(unit_header.format, ma) + else + fbr.readAddress(unit_header.format); + const entry_bytes = fbr.buf[fbr.pos..end_offset]; + const cie_id: u64 = switch (dwarf_section) { + .eh_frame => CommonInformationEntry.eh_id, + .debug_frame => switch (unit_header.format) { + .@"32" => CommonInformationEntry.dwarf32_id, + .@"64" => CommonInformationEntry.dwarf64_id, + }, + else => unreachable, + }; + + return .{ + .length_offset = length_offset, + .format = unit_header.format, + .type = if (id == cie_id) .cie else .{ .fde = switch (dwarf_section) { + .eh_frame => try std.math.sub(u64, start_offset, id), + .debug_frame => id, + else => unreachable, + } }, + .entry_bytes = entry_bytes, + }; + } +}; + +pub const CommonInformationEntry = struct { + // Used in .eh_frame + pub const eh_id = 0; + + // Used in .debug_frame (DWARF32) + pub const dwarf32_id = maxInt(u32); + + // Used in .debug_frame (DWARF64) + pub const dwarf64_id = maxInt(u64); + + // Offset of the length field of this entry in the eh_frame section. + // This is the key that FDEs use to reference CIEs. + length_offset: u64, + version: u8, + address_size: u8, + format: Format, + + // Only present in version 4 + segment_selector_size: ?u8, + + code_alignment_factor: u32, + data_alignment_factor: i32, + return_address_register: u8, + + aug_str: []const u8, + aug_data: []const u8, + lsda_pointer_enc: u8, + personality_enc: ?u8, + personality_routine_pointer: ?u64, + fde_pointer_enc: u8, + initial_instructions: []const u8, + + pub fn isSignalFrame(self: CommonInformationEntry) bool { + for (self.aug_str) |c| if (c == 'S') return true; + return false; + } + + pub fn addressesSignedWithBKey(self: CommonInformationEntry) bool { + for (self.aug_str) |c| if (c == 'B') return true; + return false; + } + + pub fn mteTaggedFrame(self: CommonInformationEntry) bool { + for (self.aug_str) |c| if (c == 'G') return true; + return false; + } + + /// This function expects to read the CIE starting with the version field. + /// The returned struct references memory backed by cie_bytes. + /// + /// See the FrameDescriptionEntry.parse documentation for the description + /// of `pc_rel_offset` and `is_runtime`. + /// + /// `length_offset` specifies the offset of this CIE's length field in the + /// .eh_frame / .debug_frame section. + pub fn parse( + cie_bytes: []const u8, + pc_rel_offset: i64, + is_runtime: bool, + format: Format, + dwarf_section: Section.Id, + length_offset: u64, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !CommonInformationEntry { + if (addr_size_bytes > 8) return error.UnsupportedAddrSize; + + var fbr: FixedBufferReader = .{ .buf = cie_bytes, .endian = endian }; + + const version = try fbr.readByte(); + switch (dwarf_section) { + .eh_frame => if (version != 1 and version != 3) return error.UnsupportedDwarfVersion, + .debug_frame => if (version != 4) return error.UnsupportedDwarfVersion, + else => return error.UnsupportedDwarfSection, + } + + var has_eh_data = false; + var has_aug_data = false; + + var aug_str_len: usize = 0; + const aug_str_start = fbr.pos; + var aug_byte = try fbr.readByte(); + while (aug_byte != 0) : (aug_byte = try fbr.readByte()) { + switch (aug_byte) { + 'z' => { + if (aug_str_len != 0) return badDwarf(); + has_aug_data = true; + }, + 'e' => { + if (has_aug_data or aug_str_len != 0) return badDwarf(); + if (try fbr.readByte() != 'h') return badDwarf(); + has_eh_data = true; + }, + else => if (has_eh_data) return badDwarf(), + } + + aug_str_len += 1; + } + + if (has_eh_data) { + // legacy data created by older versions of gcc - unsupported here + for (0..addr_size_bytes) |_| _ = try fbr.readByte(); + } + + const address_size = if (version == 4) try fbr.readByte() else addr_size_bytes; + const segment_selector_size = if (version == 4) try fbr.readByte() else null; + + const code_alignment_factor = try fbr.readUleb128(u32); + const data_alignment_factor = try fbr.readIleb128(i32); + const return_address_register = if (version == 1) try fbr.readByte() else try fbr.readUleb128(u8); + + var lsda_pointer_enc: u8 = EH.PE.omit; + var personality_enc: ?u8 = null; + var personality_routine_pointer: ?u64 = null; + var fde_pointer_enc: u8 = EH.PE.absptr; + + var aug_data: []const u8 = &[_]u8{}; + const aug_str = if (has_aug_data) blk: { + const aug_data_len = try fbr.readUleb128(usize); + const aug_data_start = fbr.pos; + aug_data = cie_bytes[aug_data_start..][0..aug_data_len]; + + const aug_str = cie_bytes[aug_str_start..][0..aug_str_len]; + for (aug_str[1..]) |byte| { + switch (byte) { + 'L' => { + lsda_pointer_enc = try fbr.readByte(); + }, + 'P' => { + personality_enc = try fbr.readByte(); + personality_routine_pointer = try readEhPointer(&fbr, personality_enc.?, addr_size_bytes, .{ + .pc_rel_base = try pcRelBase(@intFromPtr(&cie_bytes[fbr.pos]), pc_rel_offset), + .follow_indirect = is_runtime, + }); + }, + 'R' => { + fde_pointer_enc = try fbr.readByte(); + }, + 'S', 'B', 'G' => {}, + else => return badDwarf(), + } + } + + // aug_data_len can include padding so the CIE ends on an address boundary + fbr.pos = aug_data_start + aug_data_len; + break :blk aug_str; + } else &[_]u8{}; + + const initial_instructions = cie_bytes[fbr.pos..]; + return .{ + .length_offset = length_offset, + .version = version, + .address_size = address_size, + .format = format, + .segment_selector_size = segment_selector_size, + .code_alignment_factor = code_alignment_factor, + .data_alignment_factor = data_alignment_factor, + .return_address_register = return_address_register, + .aug_str = aug_str, + .aug_data = aug_data, + .lsda_pointer_enc = lsda_pointer_enc, + .personality_enc = personality_enc, + .personality_routine_pointer = personality_routine_pointer, + .fde_pointer_enc = fde_pointer_enc, + .initial_instructions = initial_instructions, + }; + } +}; + +pub const FrameDescriptionEntry = struct { + // Offset into eh_frame where the CIE for this FDE is stored + cie_length_offset: u64, + + pc_begin: u64, + pc_range: u64, + lsda_pointer: ?u64, + aug_data: []const u8, + instructions: []const u8, + + /// This function expects to read the FDE starting at the PC Begin field. + /// The returned struct references memory backed by `fde_bytes`. + /// + /// `pc_rel_offset` specifies an offset to be applied to pc_rel_base values + /// used when decoding pointers. This should be set to zero if fde_bytes is + /// backed by the memory of a .eh_frame / .debug_frame section in the running executable. + /// Otherwise, it should be the relative offset to translate addresses from + /// where the section is currently stored in memory, to where it *would* be + /// stored at runtime: section base addr - backing data base ptr. + /// + /// Similarly, `is_runtime` specifies this function is being called on a runtime + /// section, and so indirect pointers can be followed. + pub fn parse( + fde_bytes: []const u8, + pc_rel_offset: i64, + is_runtime: bool, + cie: CommonInformationEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !FrameDescriptionEntry { + if (addr_size_bytes > 8) return error.InvalidAddrSize; + + var fbr: FixedBufferReader = .{ .buf = fde_bytes, .endian = endian }; + + const pc_begin = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ + .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset), + .follow_indirect = is_runtime, + }) orelse return badDwarf(); + + const pc_range = try readEhPointer(&fbr, cie.fde_pointer_enc, addr_size_bytes, .{ + .pc_rel_base = 0, + .follow_indirect = false, + }) orelse return badDwarf(); + + var aug_data: []const u8 = &[_]u8{}; + const lsda_pointer = if (cie.aug_str.len > 0) blk: { + const aug_data_len = try fbr.readUleb128(usize); + const aug_data_start = fbr.pos; + aug_data = fde_bytes[aug_data_start..][0..aug_data_len]; + + const lsda_pointer = if (cie.lsda_pointer_enc != EH.PE.omit) + try readEhPointer(&fbr, cie.lsda_pointer_enc, addr_size_bytes, .{ + .pc_rel_base = try pcRelBase(@intFromPtr(&fde_bytes[fbr.pos]), pc_rel_offset), + .follow_indirect = is_runtime, + }) + else + null; + + fbr.pos = aug_data_start + aug_data_len; + break :blk lsda_pointer; + } else null; + + const instructions = fde_bytes[fbr.pos..]; + return .{ + .cie_length_offset = cie.length_offset, + .pc_begin = pc_begin, + .pc_range = pc_range, + .lsda_pointer = lsda_pointer, + .aug_data = aug_data, + .instructions = instructions, + }; + } +}; + +pub const UnwindContext = struct { + allocator: Allocator, + cfa: ?usize, + pc: usize, + thread_context: *std.debug.ThreadContext, + reg_context: abi.RegisterContext, + vm: call_frame.VirtualMachine, + stack_machine: expression.StackMachine(.{ .call_frame_context = true }), + + pub fn init( + allocator: Allocator, + thread_context: *const std.debug.ThreadContext, + ) !UnwindContext { + const pc = abi.stripInstructionPtrAuthCode( + (try abi.regValueNative( + usize, + thread_context, + abi.ipRegNum(), + null, + )).*, + ); + + const context_copy = try allocator.create(std.debug.ThreadContext); + std.debug.copyContext(thread_context, context_copy); + + return .{ + .allocator = allocator, + .cfa = null, + .pc = pc, + .thread_context = context_copy, + .reg_context = undefined, + .vm = .{}, + .stack_machine = .{}, + }; + } + + pub fn deinit(self: *UnwindContext) void { + self.vm.deinit(self.allocator); + self.stack_machine.deinit(self.allocator); + self.allocator.destroy(self.thread_context); + self.* = undefined; + } + + pub fn getFp(self: *const UnwindContext) !usize { + return (try abi.regValueNative(usize, self.thread_context, abi.fpRegNum(self.reg_context), self.reg_context)).*; + } +}; + +const num_sections = std.enums.directEnumArrayLen(Section.Id, 0); +pub const SectionArray = [num_sections]?Section; +pub const null_section_array = [_]?Section{null} ** num_sections; + +/// Initialize DWARF info. The caller has the responsibility to initialize most +/// the `Dwarf` fields before calling. `binary_mem` is the raw bytes of the +/// main binary file (not the secondary debug info file). +pub fn open(di: *Dwarf, allocator: Allocator) !void { + try di.scanAllFunctions(allocator); + try di.scanAllCompileUnits(allocator); +} + +const PcRange = struct { + start: u64, + end: u64, +}; + +const Func = struct { + pc_range: ?PcRange, + name: ?[]const u8, +}; + +pub fn section(di: Dwarf, dwarf_section: Section.Id) ?[]const u8 { + return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.data else null; +} + +pub fn sectionVirtualOffset(di: Dwarf, dwarf_section: Section.Id, base_address: usize) ?i64 { + return if (di.sections[@intFromEnum(dwarf_section)]) |s| s.virtualOffset(base_address) else null; +} + +pub fn deinit(di: *Dwarf, allocator: Allocator) void { + for (di.sections) |opt_section| { + if (opt_section) |s| if (s.owned) allocator.free(s.data); + } + for (di.abbrev_table_list.items) |*abbrev| { + abbrev.deinit(allocator); + } + di.abbrev_table_list.deinit(allocator); + for (di.compile_unit_list.items) |*cu| { + cu.die.deinit(allocator); + } + di.compile_unit_list.deinit(allocator); + di.func_list.deinit(allocator); + di.cie_map.deinit(allocator); + di.fde_list.deinit(allocator); + di.* = undefined; +} + +pub fn getSymbolName(di: *Dwarf, address: u64) ?[]const u8 { + for (di.func_list.items) |*func| { + if (func.pc_range) |range| { + if (address >= range.start and address < range.end) { + return func.name; + } + } + } + + return null; +} + +fn scanAllFunctions(di: *Dwarf, allocator: Allocator) !void { + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var this_unit_offset: u64 = 0; + + while (this_unit_offset < fbr.buf.len) { + try fbr.seekTo(this_unit_offset); + + const unit_header = try readUnitHeader(&fbr, null); + if (unit_header.unit_length == 0) return; + const next_offset = unit_header.header_length + unit_header.unit_length; + + const version = try fbr.readInt(u16); + if (version < 2 or version > 5) return badDwarf(); + + var address_size: u8 = undefined; + var debug_abbrev_offset: u64 = undefined; + if (version >= 5) { + const unit_type = try fbr.readInt(u8); + if (unit_type != DW.UT.compile) return badDwarf(); + address_size = try fbr.readByte(); + debug_abbrev_offset = try fbr.readAddress(unit_header.format); + } else { + debug_abbrev_offset = try fbr.readAddress(unit_header.format); + address_size = try fbr.readByte(); + } + if (address_size != @sizeOf(usize)) return badDwarf(); + + const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); + + var max_attrs: usize = 0; + var zig_padding_abbrev_code: u7 = 0; + for (abbrev_table.abbrevs) |abbrev| { + max_attrs = @max(max_attrs, abbrev.attrs.len); + if (cast(u7, abbrev.code)) |code| { + if (abbrev.tag_id == DW.TAG.ZIG_padding and + !abbrev.has_children and + abbrev.attrs.len == 0) + { + zig_padding_abbrev_code = code; + } + } + } + const attrs_buf = try allocator.alloc(Die.Attr, max_attrs * 3); + defer allocator.free(attrs_buf); + var attrs_bufs: [3][]Die.Attr = undefined; + for (&attrs_bufs, 0..) |*buf, index| buf.* = attrs_buf[index * max_attrs ..][0..max_attrs]; + + const next_unit_pos = this_unit_offset + next_offset; + + var compile_unit: CompileUnit = .{ + .version = version, + .format = unit_header.format, + .die = undefined, + .pc_range = null, + + .str_offsets_base = 0, + .addr_base = 0, + .rnglists_base = 0, + .loclists_base = 0, + .frame_base = null, + }; + + while (true) { + fbr.pos = std.mem.indexOfNonePos(u8, fbr.buf, fbr.pos, &.{ + zig_padding_abbrev_code, 0, + }) orelse fbr.buf.len; + if (fbr.pos >= next_unit_pos) break; + var die_obj = (try parseDie( + &fbr, + attrs_bufs[0], + abbrev_table, + unit_header.format, + )) orelse continue; + + switch (die_obj.tag_id) { + DW.TAG.compile_unit => { + compile_unit.die = die_obj; + compile_unit.die.attrs = attrs_bufs[1][0..die_obj.attrs.len]; + @memcpy(compile_unit.die.attrs, die_obj.attrs); + + compile_unit.str_offsets_base = if (die_obj.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0; + compile_unit.addr_base = if (die_obj.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0; + compile_unit.rnglists_base = if (die_obj.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0; + compile_unit.loclists_base = if (die_obj.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0; + compile_unit.frame_base = die_obj.getAttr(AT.frame_base); + }, + DW.TAG.subprogram, DW.TAG.inlined_subroutine, DW.TAG.subroutine, DW.TAG.entry_point => { + const fn_name = x: { + var this_die_obj = die_obj; + // Prevent endless loops + for (0..3) |_| { + if (this_die_obj.getAttr(AT.name)) |_| { + break :x try this_die_obj.getAttrString(di, AT.name, di.section(.debug_str), compile_unit); + } else if (this_die_obj.getAttr(AT.abstract_origin)) |_| { + const after_die_offset = fbr.pos; + defer fbr.pos = after_die_offset; + + // Follow the DIE it points to and repeat + const ref_offset = try this_die_obj.getAttrRef(AT.abstract_origin); + if (ref_offset > next_offset) return badDwarf(); + try fbr.seekTo(this_unit_offset + ref_offset); + this_die_obj = (try parseDie( + &fbr, + attrs_bufs[2], + abbrev_table, + unit_header.format, + )) orelse return badDwarf(); + } else if (this_die_obj.getAttr(AT.specification)) |_| { + const after_die_offset = fbr.pos; + defer fbr.pos = after_die_offset; + + // Follow the DIE it points to and repeat + const ref_offset = try this_die_obj.getAttrRef(AT.specification); + if (ref_offset > next_offset) return badDwarf(); + try fbr.seekTo(this_unit_offset + ref_offset); + this_die_obj = (try parseDie( + &fbr, + attrs_bufs[2], + abbrev_table, + unit_header.format, + )) orelse return badDwarf(); + } else { + break :x null; + } + } + + break :x null; + }; + + var range_added = if (die_obj.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| blk: { + if (die_obj.getAttr(AT.high_pc)) |high_pc_value| { + const pc_end = switch (high_pc_value.*) { + .addr => |value| value, + .udata => |offset| low_pc + offset, + else => return badDwarf(), + }; + + try di.func_list.append(allocator, .{ + .name = fn_name, + .pc_range = .{ + .start = low_pc, + .end = pc_end, + }, + }); + + break :blk true; + } + + break :blk false; + } else |err| blk: { + if (err != error.MissingDebugInfo) return err; + break :blk false; + }; + + if (die_obj.getAttr(AT.ranges)) |ranges_value| blk: { + var iter = DebugRangeIterator.init(ranges_value, di, &compile_unit) catch |err| { + if (err != error.MissingDebugInfo) return err; + break :blk; + }; + + while (try iter.next()) |range| { + range_added = true; + try di.func_list.append(allocator, .{ + .name = fn_name, + .pc_range = .{ + .start = range.start_addr, + .end = range.end_addr, + }, + }); + } + } + + if (fn_name != null and !range_added) { + try di.func_list.append(allocator, .{ + .name = fn_name, + .pc_range = null, + }); + } + }, + else => {}, + } + } + + this_unit_offset += next_offset; + } +} + +fn scanAllCompileUnits(di: *Dwarf, allocator: Allocator) !void { + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_info).?, .endian = di.endian }; + var this_unit_offset: u64 = 0; + + var attrs_buf = std.ArrayList(Die.Attr).init(allocator); + defer attrs_buf.deinit(); + + while (this_unit_offset < fbr.buf.len) { + try fbr.seekTo(this_unit_offset); + + const unit_header = try readUnitHeader(&fbr, null); + if (unit_header.unit_length == 0) return; + const next_offset = unit_header.header_length + unit_header.unit_length; + + const version = try fbr.readInt(u16); + if (version < 2 or version > 5) return badDwarf(); + + var address_size: u8 = undefined; + var debug_abbrev_offset: u64 = undefined; + if (version >= 5) { + const unit_type = try fbr.readInt(u8); + if (unit_type != UT.compile) return badDwarf(); + address_size = try fbr.readByte(); + debug_abbrev_offset = try fbr.readAddress(unit_header.format); + } else { + debug_abbrev_offset = try fbr.readAddress(unit_header.format); + address_size = try fbr.readByte(); + } + if (address_size != @sizeOf(usize)) return badDwarf(); + + const abbrev_table = try di.getAbbrevTable(allocator, debug_abbrev_offset); + + var max_attrs: usize = 0; + for (abbrev_table.abbrevs) |abbrev| { + max_attrs = @max(max_attrs, abbrev.attrs.len); + } + try attrs_buf.resize(max_attrs); + + var compile_unit_die = (try parseDie( + &fbr, + attrs_buf.items, + abbrev_table, + unit_header.format, + )) orelse return badDwarf(); + + if (compile_unit_die.tag_id != DW.TAG.compile_unit) return badDwarf(); + + compile_unit_die.attrs = try allocator.dupe(Die.Attr, compile_unit_die.attrs); + + var compile_unit: CompileUnit = .{ + .version = version, + .format = unit_header.format, + .pc_range = null, + .die = compile_unit_die, + .str_offsets_base = if (compile_unit_die.getAttr(AT.str_offsets_base)) |fv| try fv.getUInt(usize) else 0, + .addr_base = if (compile_unit_die.getAttr(AT.addr_base)) |fv| try fv.getUInt(usize) else 0, + .rnglists_base = if (compile_unit_die.getAttr(AT.rnglists_base)) |fv| try fv.getUInt(usize) else 0, + .loclists_base = if (compile_unit_die.getAttr(AT.loclists_base)) |fv| try fv.getUInt(usize) else 0, + .frame_base = compile_unit_die.getAttr(AT.frame_base), + }; + + compile_unit.pc_range = x: { + if (compile_unit_die.getAttrAddr(di, AT.low_pc, compile_unit)) |low_pc| { + if (compile_unit_die.getAttr(AT.high_pc)) |high_pc_value| { + const pc_end = switch (high_pc_value.*) { + .addr => |value| value, + .udata => |offset| low_pc + offset, + else => return badDwarf(), + }; + break :x PcRange{ + .start = low_pc, + .end = pc_end, + }; + } else { + break :x null; + } + } else |err| { + if (err != error.MissingDebugInfo) return err; + break :x null; + } + }; + + try di.compile_unit_list.append(allocator, compile_unit); + + this_unit_offset += next_offset; + } +} + +const DebugRangeIterator = struct { + base_address: u64, + section_type: Section.Id, + di: *const Dwarf, + compile_unit: *const CompileUnit, + fbr: FixedBufferReader, + + pub fn init(ranges_value: *const FormValue, di: *const Dwarf, compile_unit: *const CompileUnit) !@This() { + const section_type = if (compile_unit.version >= 5) Section.Id.debug_rnglists else Section.Id.debug_ranges; + const debug_ranges = di.section(section_type) orelse return error.MissingDebugInfo; + + const ranges_offset = switch (ranges_value.*) { + .sec_offset, .udata => |off| off, + .rnglistx => |idx| off: { + switch (compile_unit.format) { + .@"32" => { + const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 4 * idx)); + if (offset_loc + 4 > debug_ranges.len) return badDwarf(); + const offset = readInt(u32, debug_ranges[offset_loc..][0..4], di.endian); + break :off compile_unit.rnglists_base + offset; + }, + .@"64" => { + const offset_loc = @as(usize, @intCast(compile_unit.rnglists_base + 8 * idx)); + if (offset_loc + 8 > debug_ranges.len) return badDwarf(); + const offset = readInt(u64, debug_ranges[offset_loc..][0..8], di.endian); + break :off compile_unit.rnglists_base + offset; + }, + } + }, + else => return badDwarf(), + }; + + // All the addresses in the list are relative to the value + // specified by DW_AT.low_pc or to some other value encoded + // in the list itself. + // If no starting value is specified use zero. + const base_address = compile_unit.die.getAttrAddr(di, AT.low_pc, compile_unit.*) catch |err| switch (err) { + error.MissingDebugInfo => 0, + else => return err, + }; + + return .{ + .base_address = base_address, + .section_type = section_type, + .di = di, + .compile_unit = compile_unit, + .fbr = .{ + .buf = debug_ranges, + .pos = cast(usize, ranges_offset) orelse return badDwarf(), + .endian = di.endian, + }, + }; + } + + // Returns the next range in the list, or null if the end was reached. + pub fn next(self: *@This()) !?struct { start_addr: u64, end_addr: u64 } { + switch (self.section_type) { + .debug_rnglists => { + const kind = try self.fbr.readByte(); + switch (kind) { + RLE.end_of_list => return null, + RLE.base_addressx => { + const index = try self.fbr.readUleb128(usize); + self.base_address = try self.di.readDebugAddr(self.compile_unit.*, index); + return try self.next(); + }, + RLE.startx_endx => { + const start_index = try self.fbr.readUleb128(usize); + const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index); + + const end_index = try self.fbr.readUleb128(usize); + const end_addr = try self.di.readDebugAddr(self.compile_unit.*, end_index); + + return .{ + .start_addr = start_addr, + .end_addr = end_addr, + }; + }, + RLE.startx_length => { + const start_index = try self.fbr.readUleb128(usize); + const start_addr = try self.di.readDebugAddr(self.compile_unit.*, start_index); + + const len = try self.fbr.readUleb128(usize); + const end_addr = start_addr + len; + + return .{ + .start_addr = start_addr, + .end_addr = end_addr, + }; + }, + RLE.offset_pair => { + const start_addr = try self.fbr.readUleb128(usize); + const end_addr = try self.fbr.readUleb128(usize); + + // This is the only kind that uses the base address + return .{ + .start_addr = self.base_address + start_addr, + .end_addr = self.base_address + end_addr, + }; + }, + RLE.base_address => { + self.base_address = try self.fbr.readInt(usize); + return try self.next(); + }, + RLE.start_end => { + const start_addr = try self.fbr.readInt(usize); + const end_addr = try self.fbr.readInt(usize); + + return .{ + .start_addr = start_addr, + .end_addr = end_addr, + }; + }, + RLE.start_length => { + const start_addr = try self.fbr.readInt(usize); + const len = try self.fbr.readUleb128(usize); + const end_addr = start_addr + len; + + return .{ + .start_addr = start_addr, + .end_addr = end_addr, + }; + }, + else => return badDwarf(), + } + }, + .debug_ranges => { + const start_addr = try self.fbr.readInt(usize); + const end_addr = try self.fbr.readInt(usize); + if (start_addr == 0 and end_addr == 0) return null; + + // This entry selects a new value for the base address + if (start_addr == maxInt(usize)) { + self.base_address = end_addr; + return try self.next(); + } + + return .{ + .start_addr = self.base_address + start_addr, + .end_addr = self.base_address + end_addr, + }; + }, + else => unreachable, + } + } +}; + +pub fn findCompileUnit(di: *const Dwarf, target_address: u64) !*const CompileUnit { + for (di.compile_unit_list.items) |*compile_unit| { + if (compile_unit.pc_range) |range| { + if (target_address >= range.start and target_address < range.end) return compile_unit; + } + + const ranges_value = compile_unit.die.getAttr(AT.ranges) orelse continue; + var iter = DebugRangeIterator.init(ranges_value, di, compile_unit) catch continue; + while (try iter.next()) |range| { + if (target_address >= range.start_addr and target_address < range.end_addr) return compile_unit; + } + } + + return missingDwarf(); +} + +/// Gets an already existing AbbrevTable given the abbrev_offset, or if not found, +/// seeks in the stream and parses it. +fn getAbbrevTable(di: *Dwarf, allocator: Allocator, abbrev_offset: u64) !*const Abbrev.Table { + for (di.abbrev_table_list.items) |*table| { + if (table.offset == abbrev_offset) { + return table; + } + } + try di.abbrev_table_list.append( + allocator, + try di.parseAbbrevTable(allocator, abbrev_offset), + ); + return &di.abbrev_table_list.items[di.abbrev_table_list.items.len - 1]; +} + +fn parseAbbrevTable(di: *Dwarf, allocator: Allocator, offset: u64) !Abbrev.Table { + var fbr: FixedBufferReader = .{ + .buf = di.section(.debug_abbrev).?, + .pos = cast(usize, offset) orelse return badDwarf(), + .endian = di.endian, + }; + + var abbrevs = std.ArrayList(Abbrev).init(allocator); + defer { + for (abbrevs.items) |*abbrev| { + abbrev.deinit(allocator); + } + abbrevs.deinit(); + } + + var attrs = std.ArrayList(Abbrev.Attr).init(allocator); + defer attrs.deinit(); + + while (true) { + const code = try fbr.readUleb128(u64); + if (code == 0) break; + const tag_id = try fbr.readUleb128(u64); + const has_children = (try fbr.readByte()) == DW.CHILDREN.yes; + + while (true) { + const attr_id = try fbr.readUleb128(u64); + const form_id = try fbr.readUleb128(u64); + if (attr_id == 0 and form_id == 0) break; + try attrs.append(.{ + .id = attr_id, + .form_id = form_id, + .payload = switch (form_id) { + FORM.implicit_const => try fbr.readIleb128(i64), + else => undefined, + }, + }); + } + + try abbrevs.append(.{ + .code = code, + .tag_id = tag_id, + .has_children = has_children, + .attrs = try attrs.toOwnedSlice(), + }); + } + + return .{ + .offset = offset, + .abbrevs = try abbrevs.toOwnedSlice(), + }; +} + +fn parseDie( + fbr: *FixedBufferReader, + attrs_buf: []Die.Attr, + abbrev_table: *const Abbrev.Table, + format: Format, +) !?Die { + const abbrev_code = try fbr.readUleb128(u64); + if (abbrev_code == 0) return null; + const table_entry = abbrev_table.get(abbrev_code) orelse return badDwarf(); + + const attrs = attrs_buf[0..table_entry.attrs.len]; + for (attrs, table_entry.attrs) |*result_attr, attr| result_attr.* = Die.Attr{ + .id = attr.id, + .value = try parseFormValue( + fbr, + attr.form_id, + format, + attr.payload, + ), + }; + return .{ + .tag_id = table_entry.tag_id, + .has_children = table_entry.has_children, + .attrs = attrs, + }; +} + +pub fn getLineNumberInfo( + di: *Dwarf, + allocator: Allocator, + compile_unit: CompileUnit, + target_address: u64, +) !std.debug.LineInfo { + const compile_unit_cwd = try compile_unit.die.getAttrString(di, AT.comp_dir, di.section(.debug_line_str), compile_unit); + const line_info_offset = try compile_unit.die.getAttrSecOffset(AT.stmt_list); + + var fbr: FixedBufferReader = .{ .buf = di.section(.debug_line).?, .endian = di.endian }; + try fbr.seekTo(line_info_offset); + + const unit_header = try readUnitHeader(&fbr, null); + if (unit_header.unit_length == 0) return missingDwarf(); + const next_offset = unit_header.header_length + unit_header.unit_length; + + const version = try fbr.readInt(u16); + if (version < 2) return badDwarf(); + + var addr_size: u8 = switch (unit_header.format) { + .@"32" => 4, + .@"64" => 8, + }; + var seg_size: u8 = 0; + if (version >= 5) { + addr_size = try fbr.readByte(); + seg_size = try fbr.readByte(); + } + + const prologue_length = try fbr.readAddress(unit_header.format); + const prog_start_offset = fbr.pos + prologue_length; + + const minimum_instruction_length = try fbr.readByte(); + if (minimum_instruction_length == 0) return badDwarf(); + + if (version >= 4) { + // maximum_operations_per_instruction + _ = try fbr.readByte(); + } + + const default_is_stmt = (try fbr.readByte()) != 0; + const line_base = try fbr.readByteSigned(); + + const line_range = try fbr.readByte(); + if (line_range == 0) return badDwarf(); + + const opcode_base = try fbr.readByte(); + + const standard_opcode_lengths = try fbr.readBytes(opcode_base - 1); + + var include_directories = std.ArrayList(FileEntry).init(allocator); + defer include_directories.deinit(); + var file_entries = std.ArrayList(FileEntry).init(allocator); + defer file_entries.deinit(); + + if (version < 5) { + try include_directories.append(.{ .path = compile_unit_cwd }); + + while (true) { + const dir = try fbr.readBytesTo(0); + if (dir.len == 0) break; + try include_directories.append(.{ .path = dir }); + } + + while (true) { + const file_name = try fbr.readBytesTo(0); + if (file_name.len == 0) break; + const dir_index = try fbr.readUleb128(u32); + const mtime = try fbr.readUleb128(u64); + const size = try fbr.readUleb128(u64); + try file_entries.append(.{ + .path = file_name, + .dir_index = dir_index, + .mtime = mtime, + .size = size, + }); + } + } else { + const FileEntFmt = struct { + content_type_code: u8, + form_code: u16, + }; + { + var dir_ent_fmt_buf: [10]FileEntFmt = undefined; + const directory_entry_format_count = try fbr.readByte(); + if (directory_entry_format_count > dir_ent_fmt_buf.len) return badDwarf(); + for (dir_ent_fmt_buf[0..directory_entry_format_count]) |*ent_fmt| { + ent_fmt.* = .{ + .content_type_code = try fbr.readUleb128(u8), + .form_code = try fbr.readUleb128(u16), + }; + } + + const directories_count = try fbr.readUleb128(usize); + try include_directories.ensureUnusedCapacity(directories_count); + { + var i: usize = 0; + while (i < directories_count) : (i += 1) { + var e: FileEntry = .{ .path = &.{} }; + for (dir_ent_fmt_buf[0..directory_entry_format_count]) |ent_fmt| { + const form_value = try parseFormValue( + &fbr, + ent_fmt.form_code, + unit_header.format, + null, + ); + switch (ent_fmt.content_type_code) { + DW.LNCT.path => e.path = try form_value.getString(di.*), + DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), + DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), + DW.LNCT.size => e.size = try form_value.getUInt(u64), + DW.LNCT.MD5 => e.md5 = switch (form_value) { + .data16 => |data16| data16.*, + else => return badDwarf(), + }, + else => continue, + } + } + include_directories.appendAssumeCapacity(e); + } + } + } + + var file_ent_fmt_buf: [10]FileEntFmt = undefined; + const file_name_entry_format_count = try fbr.readByte(); + if (file_name_entry_format_count > file_ent_fmt_buf.len) return badDwarf(); + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |*ent_fmt| { + ent_fmt.* = .{ + .content_type_code = try fbr.readUleb128(u8), + .form_code = try fbr.readUleb128(u16), + }; + } + + const file_names_count = try fbr.readUleb128(usize); + try file_entries.ensureUnusedCapacity(file_names_count); + { + var i: usize = 0; + while (i < file_names_count) : (i += 1) { + var e: FileEntry = .{ .path = &.{} }; + for (file_ent_fmt_buf[0..file_name_entry_format_count]) |ent_fmt| { + const form_value = try parseFormValue( + &fbr, + ent_fmt.form_code, + unit_header.format, + null, + ); + switch (ent_fmt.content_type_code) { + DW.LNCT.path => e.path = try form_value.getString(di.*), + DW.LNCT.directory_index => e.dir_index = try form_value.getUInt(u32), + DW.LNCT.timestamp => e.mtime = try form_value.getUInt(u64), + DW.LNCT.size => e.size = try form_value.getUInt(u64), + DW.LNCT.MD5 => e.md5 = switch (form_value) { + .data16 => |data16| data16.*, + else => return badDwarf(), + }, + else => continue, + } + } + file_entries.appendAssumeCapacity(e); + } + } + } + + var prog = LineNumberProgram.init( + default_is_stmt, + include_directories.items, + target_address, + version, + ); + + try fbr.seekTo(prog_start_offset); + + const next_unit_pos = line_info_offset + next_offset; + + while (fbr.pos < next_unit_pos) { + const opcode = try fbr.readByte(); + + if (opcode == DW.LNS.extended_op) { + const op_size = try fbr.readUleb128(u64); + if (op_size < 1) return badDwarf(); + const sub_op = try fbr.readByte(); + switch (sub_op) { + DW.LNE.end_sequence => { + prog.end_sequence = true; + if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + prog.reset(); + }, + DW.LNE.set_address => { + const addr = try fbr.readInt(usize); + prog.address = addr; + }, + DW.LNE.define_file => { + const path = try fbr.readBytesTo(0); + const dir_index = try fbr.readUleb128(u32); + const mtime = try fbr.readUleb128(u64); + const size = try fbr.readUleb128(u64); + try file_entries.append(.{ + .path = path, + .dir_index = dir_index, + .mtime = mtime, + .size = size, + }); + }, + else => try fbr.seekForward(op_size - 1), + } + } else if (opcode >= opcode_base) { + // special opcodes + const adjusted_opcode = opcode - opcode_base; + const inc_addr = minimum_instruction_length * (adjusted_opcode / line_range); + const inc_line = @as(i32, line_base) + @as(i32, adjusted_opcode % line_range); + prog.line += inc_line; + prog.address += inc_addr; + if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + prog.basic_block = false; + } else { + switch (opcode) { + DW.LNS.copy => { + if (try prog.checkLineMatch(allocator, file_entries.items)) |info| return info; + prog.basic_block = false; + }, + DW.LNS.advance_pc => { + const arg = try fbr.readUleb128(usize); + prog.address += arg * minimum_instruction_length; + }, + DW.LNS.advance_line => { + const arg = try fbr.readIleb128(i64); + prog.line += arg; + }, + DW.LNS.set_file => { + const arg = try fbr.readUleb128(usize); + prog.file = arg; + }, + DW.LNS.set_column => { + const arg = try fbr.readUleb128(u64); + prog.column = arg; + }, + DW.LNS.negate_stmt => { + prog.is_stmt = !prog.is_stmt; + }, + DW.LNS.set_basic_block => { + prog.basic_block = true; + }, + DW.LNS.const_add_pc => { + const inc_addr = minimum_instruction_length * ((255 - opcode_base) / line_range); + prog.address += inc_addr; + }, + DW.LNS.fixed_advance_pc => { + const arg = try fbr.readInt(u16); + prog.address += arg; + }, + DW.LNS.set_prologue_end => {}, + else => { + if (opcode - 1 >= standard_opcode_lengths.len) return badDwarf(); + try fbr.seekForward(standard_opcode_lengths[opcode - 1]); + }, + } + } + } + + return missingDwarf(); +} + +fn getString(di: Dwarf, offset: u64) ![:0]const u8 { + return getStringGeneric(di.section(.debug_str), offset); +} + +fn getLineString(di: Dwarf, offset: u64) ![:0]const u8 { + return getStringGeneric(di.section(.debug_line_str), offset); +} + +fn readDebugAddr(di: Dwarf, compile_unit: CompileUnit, index: u64) !u64 { + const debug_addr = di.section(.debug_addr) orelse return badDwarf(); + + // addr_base points to the first item after the header, however we + // need to read the header to know the size of each item. Empirically, + // it may disagree with is_64 on the compile unit. + // The header is 8 or 12 bytes depending on is_64. + if (compile_unit.addr_base < 8) return badDwarf(); + + const version = readInt(u16, debug_addr[compile_unit.addr_base - 4 ..][0..2], di.endian); + if (version != 5) return badDwarf(); + + const addr_size = debug_addr[compile_unit.addr_base - 2]; + const seg_size = debug_addr[compile_unit.addr_base - 1]; + + const byte_offset = @as(usize, @intCast(compile_unit.addr_base + (addr_size + seg_size) * index)); + if (byte_offset + addr_size > debug_addr.len) return badDwarf(); + return switch (addr_size) { + 1 => debug_addr[byte_offset], + 2 => readInt(u16, debug_addr[byte_offset..][0..2], di.endian), + 4 => readInt(u32, debug_addr[byte_offset..][0..4], di.endian), + 8 => readInt(u64, debug_addr[byte_offset..][0..8], di.endian), + else => badDwarf(), + }; +} + +/// If .eh_frame_hdr is present, then only the header needs to be parsed. +/// +/// Otherwise, .eh_frame and .debug_frame are scanned and a sorted list +/// of FDEs is built for binary searching during unwinding. +pub fn scanAllUnwindInfo(di: *Dwarf, allocator: Allocator, base_address: usize) !void { + if (di.section(.eh_frame_hdr)) |eh_frame_hdr| blk: { + var fbr: FixedBufferReader = .{ .buf = eh_frame_hdr, .endian = native_endian }; + + const version = try fbr.readByte(); + if (version != 1) break :blk; + + const eh_frame_ptr_enc = try fbr.readByte(); + if (eh_frame_ptr_enc == EH.PE.omit) break :blk; + const fde_count_enc = try fbr.readByte(); + if (fde_count_enc == EH.PE.omit) break :blk; + const table_enc = try fbr.readByte(); + if (table_enc == EH.PE.omit) break :blk; + + const eh_frame_ptr = cast(usize, try readEhPointer(&fbr, eh_frame_ptr_enc, @sizeOf(usize), .{ + .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.pos]), + .follow_indirect = true, + }) orelse return badDwarf()) orelse return badDwarf(); + + const fde_count = cast(usize, try readEhPointer(&fbr, fde_count_enc, @sizeOf(usize), .{ + .pc_rel_base = @intFromPtr(&eh_frame_hdr[fbr.pos]), + .follow_indirect = true, + }) orelse return badDwarf()) orelse return badDwarf(); + + const entry_size = try ExceptionFrameHeader.entrySize(table_enc); + const entries_len = fde_count * entry_size; + if (entries_len > eh_frame_hdr.len - fbr.pos) return badDwarf(); + + di.eh_frame_hdr = .{ + .eh_frame_ptr = eh_frame_ptr, + .table_enc = table_enc, + .fde_count = fde_count, + .entries = eh_frame_hdr[fbr.pos..][0..entries_len], + }; + + // No need to scan .eh_frame, we have a binary search table already + return; + } + + const frame_sections = [2]Section.Id{ .eh_frame, .debug_frame }; + for (frame_sections) |frame_section| { + if (di.section(frame_section)) |section_data| { + var fbr: FixedBufferReader = .{ .buf = section_data, .endian = di.endian }; + while (fbr.pos < fbr.buf.len) { + const entry_header = try EntryHeader.read(&fbr, null, frame_section); + switch (entry_header.type) { + .cie => { + const cie = try CommonInformationEntry.parse( + entry_header.entry_bytes, + di.sectionVirtualOffset(frame_section, base_address).?, + true, + entry_header.format, + frame_section, + entry_header.length_offset, + @sizeOf(usize), + di.endian, + ); + try di.cie_map.put(allocator, entry_header.length_offset, cie); + }, + .fde => |cie_offset| { + const cie = di.cie_map.get(cie_offset) orelse return badDwarf(); + const fde = try FrameDescriptionEntry.parse( + entry_header.entry_bytes, + di.sectionVirtualOffset(frame_section, base_address).?, + true, + cie, + @sizeOf(usize), + di.endian, + ); + try di.fde_list.append(allocator, fde); + }, + .terminator => break, + } + } + + std.mem.sortUnstable(FrameDescriptionEntry, di.fde_list.items, {}, struct { + fn lessThan(ctx: void, a: FrameDescriptionEntry, b: FrameDescriptionEntry) bool { + _ = ctx; + return a.pc_begin < b.pc_begin; + } + }.lessThan); + } + } +} + +/// Unwind a stack frame using DWARF unwinding info, updating the register context. +/// +/// If `.eh_frame_hdr` is available, it will be used to binary search for the FDE. +/// Otherwise, a linear scan of `.eh_frame` and `.debug_frame` is done to find the FDE. +/// +/// `explicit_fde_offset` is for cases where the FDE offset is known, such as when __unwind_info +/// defers unwinding to DWARF. This is an offset into the `.eh_frame` section. +pub fn unwindFrame(di: *const Dwarf, context: *UnwindContext, ma: *StackIterator.MemoryAccessor, explicit_fde_offset: ?usize) !usize { + if (!comptime abi.supportsUnwinding(builtin.target)) return error.UnsupportedCpuArchitecture; + if (context.pc == 0) return 0; + + // Find the FDE and CIE + var cie: CommonInformationEntry = undefined; + var fde: FrameDescriptionEntry = undefined; + + if (explicit_fde_offset) |fde_offset| { + const dwarf_section: Section.Id = .eh_frame; + const frame_section = di.section(dwarf_section) orelse return error.MissingFDE; + if (fde_offset >= frame_section.len) return error.MissingFDE; + + var fbr: FixedBufferReader = .{ + .buf = frame_section, + .pos = fde_offset, + .endian = di.endian, + }; + + const fde_entry_header = try EntryHeader.read(&fbr, null, dwarf_section); + if (fde_entry_header.type != .fde) return error.MissingFDE; + + const cie_offset = fde_entry_header.type.fde; + try fbr.seekTo(cie_offset); + + fbr.endian = native_endian; + const cie_entry_header = try EntryHeader.read(&fbr, null, dwarf_section); + if (cie_entry_header.type != .cie) return badDwarf(); + + cie = try CommonInformationEntry.parse( + cie_entry_header.entry_bytes, + 0, + true, + cie_entry_header.format, + dwarf_section, + cie_entry_header.length_offset, + @sizeOf(usize), + native_endian, + ); + + fde = try FrameDescriptionEntry.parse( + fde_entry_header.entry_bytes, + 0, + true, + cie, + @sizeOf(usize), + native_endian, + ); + } else if (di.eh_frame_hdr) |header| { + const eh_frame_len = if (di.section(.eh_frame)) |eh_frame| eh_frame.len else null; + try header.findEntry( + ma, + eh_frame_len, + @intFromPtr(di.section(.eh_frame_hdr).?.ptr), + context.pc, + &cie, + &fde, + ); + } else { + const index = std.sort.binarySearch(FrameDescriptionEntry, context.pc, di.fde_list.items, {}, struct { + pub fn compareFn(_: void, pc: usize, mid_item: FrameDescriptionEntry) std.math.Order { + if (pc < mid_item.pc_begin) return .lt; + + const range_end = mid_item.pc_begin + mid_item.pc_range; + if (pc < range_end) return .eq; + + return .gt; + } + }.compareFn); + + fde = if (index) |i| di.fde_list.items[i] else return error.MissingFDE; + cie = di.cie_map.get(fde.cie_length_offset) orelse return error.MissingCIE; + } + + var expression_context: expression.Context = .{ + .format = cie.format, + .memory_accessor = ma, + .compile_unit = di.findCompileUnit(fde.pc_begin) catch null, + .thread_context = context.thread_context, + .reg_context = context.reg_context, + .cfa = context.cfa, + }; + + context.vm.reset(); + context.reg_context.eh_frame = cie.version != 4; + context.reg_context.is_macho = di.is_macho; + + const row = try context.vm.runToNative(context.allocator, context.pc, cie, fde); + context.cfa = switch (row.cfa.rule) { + .val_offset => |offset| blk: { + const register = row.cfa.register orelse return error.InvalidCFARule; + const value = readInt(usize, (try abi.regBytes(context.thread_context, register, context.reg_context))[0..@sizeOf(usize)], native_endian); + break :blk try call_frame.applyOffset(value, offset); + }, + .expression => |expr| blk: { + context.stack_machine.reset(); + const value = try context.stack_machine.run( + expr, + context.allocator, + expression_context, + context.cfa, + ); + + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + }, + else => return error.InvalidCFARule, + }; + + if (ma.load(usize, context.cfa.?) == null) return error.InvalidCFA; + expression_context.cfa = context.cfa; + + // Buffering the modifications is done because copying the thread context is not portable, + // some implementations (ie. darwin) use internal pointers to the mcontext. + var arena = std.heap.ArenaAllocator.init(context.allocator); + defer arena.deinit(); + const update_allocator = arena.allocator(); + + const RegisterUpdate = struct { + // Backed by thread_context + dest: []u8, + // Backed by arena + src: []const u8, + prev: ?*@This(), + }; + + var update_tail: ?*RegisterUpdate = null; + var has_return_address = true; + for (context.vm.rowColumns(row)) |column| { + if (column.register) |register| { + if (register == cie.return_address_register) { + has_return_address = column.rule != .undefined; + } + + const dest = try abi.regBytes(context.thread_context, register, context.reg_context); + const src = try update_allocator.alloc(u8, dest.len); + + const prev = update_tail; + update_tail = try update_allocator.create(RegisterUpdate); + update_tail.?.* = .{ + .dest = dest, + .src = src, + .prev = prev, + }; + + try column.resolveValue( + context, + expression_context, + ma, + src, + ); + } + } + + // On all implemented architectures, the CFA is defined as being the previous frame's SP + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(context.reg_context), context.reg_context)).* = context.cfa.?; + + while (update_tail) |tail| { + @memcpy(tail.dest, tail.src); + update_tail = tail.prev; + } + + if (has_return_address) { + context.pc = abi.stripInstructionPtrAuthCode(readInt(usize, (try abi.regBytes( + context.thread_context, + cie.return_address_register, + context.reg_context, + ))[0..@sizeOf(usize)], native_endian)); + } else { + context.pc = 0; + } + + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), context.reg_context)).* = context.pc; + + // The call instruction will have pushed the address of the instruction that follows the call as the return address. + // This next instruction may be past the end of the function if the caller was `noreturn` (ie. the last instruction in + // the function was the call). If we were to look up an FDE entry using the return address directly, it could end up + // either not finding an FDE at all, or using the next FDE in the program, producing incorrect results. To prevent this, + // we subtract one so that the next lookup is guaranteed to land inside the + // + // The exception to this rule is signal frames, where we return execution would be returned to the instruction + // that triggered the handler. + const return_address = context.pc; + if (context.pc > 0 and !cie.isSignalFrame()) context.pc -= 1; + + return return_address; +} + +fn parseFormValue( + fbr: *FixedBufferReader, + form_id: u64, + format: Format, + implicit_const: ?i64, +) anyerror!FormValue { + return switch (form_id) { + FORM.addr => .{ .addr = try fbr.readAddress(switch (@bitSizeOf(usize)) { + 32 => .@"32", + 64 => .@"64", + else => @compileError("unsupported @sizeOf(usize)"), + }) }, + FORM.addrx1 => .{ .addrx = try fbr.readInt(u8) }, + FORM.addrx2 => .{ .addrx = try fbr.readInt(u16) }, + FORM.addrx3 => .{ .addrx = try fbr.readInt(u24) }, + FORM.addrx4 => .{ .addrx = try fbr.readInt(u32) }, + FORM.addrx => .{ .addrx = try fbr.readUleb128(usize) }, + + FORM.block1, + FORM.block2, + FORM.block4, + FORM.block, + => .{ .block = try fbr.readBytes(switch (form_id) { + FORM.block1 => try fbr.readInt(u8), + FORM.block2 => try fbr.readInt(u16), + FORM.block4 => try fbr.readInt(u32), + FORM.block => try fbr.readUleb128(usize), + else => unreachable, + }) }, + + FORM.data1 => .{ .udata = try fbr.readInt(u8) }, + FORM.data2 => .{ .udata = try fbr.readInt(u16) }, + FORM.data4 => .{ .udata = try fbr.readInt(u32) }, + FORM.data8 => .{ .udata = try fbr.readInt(u64) }, + FORM.data16 => .{ .data16 = (try fbr.readBytes(16))[0..16] }, + FORM.udata => .{ .udata = try fbr.readUleb128(u64) }, + FORM.sdata => .{ .sdata = try fbr.readIleb128(i64) }, + FORM.exprloc => .{ .exprloc = try fbr.readBytes(try fbr.readUleb128(usize)) }, + FORM.flag => .{ .flag = (try fbr.readByte()) != 0 }, + FORM.flag_present => .{ .flag = true }, + FORM.sec_offset => .{ .sec_offset = try fbr.readAddress(format) }, + + FORM.ref1 => .{ .ref = try fbr.readInt(u8) }, + FORM.ref2 => .{ .ref = try fbr.readInt(u16) }, + FORM.ref4 => .{ .ref = try fbr.readInt(u32) }, + FORM.ref8 => .{ .ref = try fbr.readInt(u64) }, + FORM.ref_udata => .{ .ref = try fbr.readUleb128(u64) }, + + FORM.ref_addr => .{ .ref_addr = try fbr.readAddress(format) }, + FORM.ref_sig8 => .{ .ref = try fbr.readInt(u64) }, + + FORM.string => .{ .string = try fbr.readBytesTo(0) }, + FORM.strp => .{ .strp = try fbr.readAddress(format) }, + FORM.strx1 => .{ .strx = try fbr.readInt(u8) }, + FORM.strx2 => .{ .strx = try fbr.readInt(u16) }, + FORM.strx3 => .{ .strx = try fbr.readInt(u24) }, + FORM.strx4 => .{ .strx = try fbr.readInt(u32) }, + FORM.strx => .{ .strx = try fbr.readUleb128(usize) }, + FORM.line_strp => .{ .line_strp = try fbr.readAddress(format) }, + FORM.indirect => parseFormValue(fbr, try fbr.readUleb128(u64), format, implicit_const), + FORM.implicit_const => .{ .sdata = implicit_const orelse return badDwarf() }, + FORM.loclistx => .{ .loclistx = try fbr.readUleb128(u64) }, + FORM.rnglistx => .{ .rnglistx = try fbr.readUleb128(u64) }, + else => { + //debug.print("unrecognized form id: {x}\n", .{form_id}); + return badDwarf(); + }, + }; +} + +const FileEntry = struct { + path: []const u8, + dir_index: u32 = 0, + mtime: u64 = 0, + size: u64 = 0, + md5: [16]u8 = [1]u8{0} ** 16, +}; + +const LineNumberProgram = struct { + address: u64, + file: usize, + line: i64, + column: u64, + version: u16, + is_stmt: bool, + basic_block: bool, + end_sequence: bool, + + default_is_stmt: bool, + target_address: u64, + include_dirs: []const FileEntry, + + prev_valid: bool, + prev_address: u64, + prev_file: usize, + prev_line: i64, + prev_column: u64, + prev_is_stmt: bool, + prev_basic_block: bool, + prev_end_sequence: bool, + + // Reset the state machine following the DWARF specification + pub fn reset(self: *LineNumberProgram) void { + self.address = 0; + self.file = 1; + self.line = 1; + self.column = 0; + self.is_stmt = self.default_is_stmt; + self.basic_block = false; + self.end_sequence = false; + // Invalidate all the remaining fields + self.prev_valid = false; + self.prev_address = 0; + self.prev_file = undefined; + self.prev_line = undefined; + self.prev_column = undefined; + self.prev_is_stmt = undefined; + self.prev_basic_block = undefined; + self.prev_end_sequence = undefined; + } + + pub fn init( + is_stmt: bool, + include_dirs: []const FileEntry, + target_address: u64, + version: u16, + ) LineNumberProgram { + return LineNumberProgram{ + .address = 0, + .file = 1, + .line = 1, + .column = 0, + .version = version, + .is_stmt = is_stmt, + .basic_block = false, + .end_sequence = false, + .include_dirs = include_dirs, + .default_is_stmt = is_stmt, + .target_address = target_address, + .prev_valid = false, + .prev_address = 0, + .prev_file = undefined, + .prev_line = undefined, + .prev_column = undefined, + .prev_is_stmt = undefined, + .prev_basic_block = undefined, + .prev_end_sequence = undefined, + }; + } + + pub fn checkLineMatch( + self: *LineNumberProgram, + allocator: Allocator, + file_entries: []const FileEntry, + ) !?std.debug.LineInfo { + if (self.prev_valid and + self.target_address >= self.prev_address and + self.target_address < self.address) + { + const file_index = if (self.version >= 5) self.prev_file else i: { + if (self.prev_file == 0) return missingDwarf(); + break :i self.prev_file - 1; + }; + + if (file_index >= file_entries.len) return badDwarf(); + const file_entry = &file_entries[file_index]; + + if (file_entry.dir_index >= self.include_dirs.len) return badDwarf(); + const dir_name = self.include_dirs[file_entry.dir_index].path; + + const file_name = try std.fs.path.join(allocator, &[_][]const u8{ + dir_name, file_entry.path, + }); + + return std.debug.LineInfo{ + .line = if (self.prev_line >= 0) @as(u64, @intCast(self.prev_line)) else 0, + .column = self.prev_column, + .file_name = file_name, + }; + } + + self.prev_valid = true; + self.prev_address = self.address; + self.prev_file = self.file; + self.prev_line = self.line; + self.prev_column = self.column; + self.prev_is_stmt = self.is_stmt; + self.prev_basic_block = self.basic_block; + self.prev_end_sequence = self.end_sequence; + return null; + } +}; + +const UnitHeader = struct { + format: Format, + header_length: u4, + unit_length: u64, +}; +fn readUnitHeader(fbr: *FixedBufferReader, opt_ma: ?*StackIterator.MemoryAccessor) !UnitHeader { + return switch (try if (opt_ma) |ma| fbr.readIntChecked(u32, ma) else fbr.readInt(u32)) { + 0...0xfffffff0 - 1 => |unit_length| .{ + .format = .@"32", + .header_length = 4, + .unit_length = unit_length, + }, + 0xfffffff0...0xffffffff - 1 => badDwarf(), + 0xffffffff => .{ + .format = .@"64", + .header_length = 12, + .unit_length = try if (opt_ma) |ma| fbr.readIntChecked(u64, ma) else fbr.readInt(u64), + }, + }; +} + +/// Returns the DWARF register number for an x86_64 register number found in compact unwind info +fn compactUnwindToDwarfRegNumber(unwind_reg_number: u3) !u8 { + return switch (unwind_reg_number) { + 1 => 3, // RBX + 2 => 12, // R12 + 3 => 13, // R13 + 4 => 14, // R14 + 5 => 15, // R15 + 6 => 6, // RBP + else => error.InvalidUnwindRegisterNumber, + }; +} + +/// This function is to make it handy to comment out the return and make it +/// into a crash when working on this file. +fn badDwarf() error{InvalidDebugInfo} { + //if (true) @panic("badDwarf"); // can be handy to uncomment when working on this file + return error.InvalidDebugInfo; +} + +fn missingDwarf() error{MissingDebugInfo} { + //if (true) @panic("missingDwarf"); // can be handy to uncomment when working on this file + return error.MissingDebugInfo; +} + +fn getStringGeneric(opt_str: ?[]const u8, offset: u64) ![:0]const u8 { + const str = opt_str orelse return badDwarf(); + if (offset > str.len) return badDwarf(); + const casted_offset = cast(usize, offset) orelse return badDwarf(); + // Valid strings always have a terminating zero byte + const last = std.mem.indexOfScalarPos(u8, str, casted_offset, 0) orelse return badDwarf(); + return str[casted_offset..last :0]; +} + +// Reading debug info needs to be fast, even when compiled in debug mode, +// so avoid using a `std.io.FixedBufferStream` which is too slow. +pub const FixedBufferReader = struct { + buf: []const u8, + pos: usize = 0, + endian: std.builtin.Endian, + + pub const Error = error{ EndOfBuffer, Overflow, InvalidBuffer }; + + fn seekTo(fbr: *FixedBufferReader, pos: u64) Error!void { + if (pos > fbr.buf.len) return error.EndOfBuffer; + fbr.pos = @intCast(pos); + } + + fn seekForward(fbr: *FixedBufferReader, amount: u64) Error!void { + if (fbr.buf.len - fbr.pos < amount) return error.EndOfBuffer; + fbr.pos += @intCast(amount); + } + + pub inline fn readByte(fbr: *FixedBufferReader) Error!u8 { + if (fbr.pos >= fbr.buf.len) return error.EndOfBuffer; + defer fbr.pos += 1; + return fbr.buf[fbr.pos]; + } + + fn readByteSigned(fbr: *FixedBufferReader) Error!i8 { + return @bitCast(try fbr.readByte()); + } + + fn readInt(fbr: *FixedBufferReader, comptime T: type) Error!T { + const size = @divExact(@typeInfo(T).Int.bits, 8); + if (fbr.buf.len - fbr.pos < size) return error.EndOfBuffer; + defer fbr.pos += size; + return std.mem.readInt(T, fbr.buf[fbr.pos..][0..size], fbr.endian); + } + + fn readIntChecked( + fbr: *FixedBufferReader, + comptime T: type, + ma: *std.debug.StackIterator.MemoryAccessor, + ) Error!T { + if (ma.load(T, @intFromPtr(fbr.buf[fbr.pos..].ptr)) == null) + return error.InvalidBuffer; + + return fbr.readInt(T); + } + + fn readUleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readUleb128(T, fbr); + } + + fn readIleb128(fbr: *FixedBufferReader, comptime T: type) Error!T { + return std.leb.readIleb128(T, fbr); + } + + fn readAddress(fbr: *FixedBufferReader, format: Format) Error!u64 { + return switch (format) { + .@"32" => try fbr.readInt(u32), + .@"64" => try fbr.readInt(u64), + }; + } + + fn readAddressChecked( + fbr: *FixedBufferReader, + format: Format, + ma: *std.debug.StackIterator.MemoryAccessor, + ) Error!u64 { + return switch (format) { + .@"32" => try fbr.readIntChecked(u32, ma), + .@"64" => try fbr.readIntChecked(u64, ma), + }; + } + + fn readBytes(fbr: *FixedBufferReader, len: usize) Error![]const u8 { + if (fbr.buf.len - fbr.pos < len) return error.EndOfBuffer; + defer fbr.pos += len; + return fbr.buf[fbr.pos..][0..len]; + } + + fn readBytesTo(fbr: *FixedBufferReader, comptime sentinel: u8) Error![:sentinel]const u8 { + const end = @call(.always_inline, std.mem.indexOfScalarPos, .{ + u8, + fbr.buf, + fbr.pos, + sentinel, + }) orelse return error.EndOfBuffer; + defer fbr.pos = end + 1; + return fbr.buf[fbr.pos..end :sentinel]; + } +}; + +/// Unwind a frame using MachO compact unwind info (from __unwind_info). +/// If the compact encoding can't encode a way to unwind a frame, it will +/// defer unwinding to DWARF, in which case `.eh_frame` will be used if available. +pub fn unwindFrameMachO( + context: *UnwindContext, + ma: *StackIterator.MemoryAccessor, + unwind_info: []const u8, + eh_frame: ?[]const u8, + module_base_address: usize, +) !usize { + const macho = std.macho; + + const header = std.mem.bytesAsValue( + macho.unwind_info_section_header, + unwind_info[0..@sizeOf(macho.unwind_info_section_header)], + ); + const indices = std.mem.bytesAsSlice( + macho.unwind_info_section_header_index_entry, + unwind_info[header.indexSectionOffset..][0 .. header.indexCount * @sizeOf(macho.unwind_info_section_header_index_entry)], + ); + if (indices.len == 0) return error.MissingUnwindInfo; + + const mapped_pc = context.pc - module_base_address; + const second_level_index = blk: { + var left: usize = 0; + var len: usize = indices.len; + + while (len > 1) { + const mid = left + len / 2; + const offset = indices[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + // Last index is a sentinel containing the highest address as its functionOffset + if (indices[left].secondLevelPagesSectionOffset == 0) return error.MissingUnwindInfo; + break :blk &indices[left]; + }; + + const common_encodings = std.mem.bytesAsSlice( + macho.compact_unwind_encoding_t, + unwind_info[header.commonEncodingsArraySectionOffset..][0 .. header.commonEncodingsArrayCount * @sizeOf(macho.compact_unwind_encoding_t)], + ); + + const start_offset = second_level_index.secondLevelPagesSectionOffset; + const kind = std.mem.bytesAsValue( + macho.UNWIND_SECOND_LEVEL, + unwind_info[start_offset..][0..@sizeOf(macho.UNWIND_SECOND_LEVEL)], + ); + + const entry: struct { + function_offset: usize, + raw_encoding: u32, + } = switch (kind.*) { + .REGULAR => blk: { + const page_header = std.mem.bytesAsValue( + macho.unwind_info_regular_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_regular_second_level_page_header)], + ); + + const entries = std.mem.bytesAsSlice( + macho.unwind_info_regular_second_level_entry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.unwind_info_regular_second_level_entry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = entries[mid].functionOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + break :blk .{ + .function_offset = entries[left].functionOffset, + .raw_encoding = entries[left].encoding, + }; + }, + .COMPRESSED => blk: { + const page_header = std.mem.bytesAsValue( + macho.unwind_info_compressed_second_level_page_header, + unwind_info[start_offset..][0..@sizeOf(macho.unwind_info_compressed_second_level_page_header)], + ); + + const entries = std.mem.bytesAsSlice( + macho.UnwindInfoCompressedEntry, + unwind_info[start_offset + page_header.entryPageOffset ..][0 .. page_header.entryCount * @sizeOf(macho.UnwindInfoCompressedEntry)], + ); + if (entries.len == 0) return error.InvalidUnwindInfo; + + var left: usize = 0; + var len: usize = entries.len; + while (len > 1) { + const mid = left + len / 2; + const offset = second_level_index.functionOffset + entries[mid].funcOffset; + if (mapped_pc < offset) { + len /= 2; + } else { + left = mid; + if (mapped_pc == offset) break; + len -= len / 2; + } + } + + const entry = entries[left]; + const function_offset = second_level_index.functionOffset + entry.funcOffset; + if (entry.encodingIndex < header.commonEncodingsArrayCount) { + if (entry.encodingIndex >= common_encodings.len) return error.InvalidUnwindInfo; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = common_encodings[entry.encodingIndex], + }; + } else { + const local_index = try std.math.sub( + u8, + entry.encodingIndex, + cast(u8, header.commonEncodingsArrayCount) orelse return error.InvalidUnwindInfo, + ); + const local_encodings = std.mem.bytesAsSlice( + macho.compact_unwind_encoding_t, + unwind_info[start_offset + page_header.encodingsPageOffset ..][0 .. page_header.encodingsCount * @sizeOf(macho.compact_unwind_encoding_t)], + ); + if (local_index >= local_encodings.len) return error.InvalidUnwindInfo; + break :blk .{ + .function_offset = function_offset, + .raw_encoding = local_encodings[local_index], + }; + } + }, + else => return error.InvalidUnwindInfo, + }; + + if (entry.raw_encoding == 0) return error.NoUnwindInfo; + const reg_context = abi.RegisterContext{ + .eh_frame = false, + .is_macho = true, + }; + + const encoding: macho.CompactUnwindEncoding = @bitCast(entry.raw_encoding); + const new_ip = switch (builtin.cpu.arch) { + .x86_64 => switch (encoding.mode.x86_64) { + .OLD => return error.UnimplementedUnwindEncoding, + .RBP_FRAME => blk: { + const regs: [5]u3 = .{ + encoding.value.x86_64.frame.reg0, + encoding.value.x86_64.frame.reg1, + encoding.value.x86_64.frame.reg2, + encoding.value.x86_64.frame.reg3, + encoding.value.x86_64.frame.reg4, + }; + + const frame_offset = encoding.value.x86_64.frame.frame_offset * @sizeOf(usize); + var max_reg: usize = 0; + inline for (regs, 0..) |reg, i| { + if (reg > 0) max_reg = i; + } + + const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 2 * @sizeOf(usize); + + // Verify the stack range we're about to read register values from + if (ma.load(usize, new_sp) == null or ma.load(usize, fp - frame_offset + max_reg * @sizeOf(usize)) == null) return error.InvalidUnwindInfo; + + const ip_ptr = fp + @sizeOf(usize); + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; + + for (regs, 0..) |reg, i| { + if (reg == 0) continue; + const addr = fp - frame_offset + i * @sizeOf(usize); + const reg_number = try compactUnwindToDwarfRegNumber(reg); + (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(addr)).*; + } + + break :blk new_ip; + }, + .STACK_IMMD, + .STACK_IND, + => blk: { + const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; + const stack_size = if (encoding.mode.x86_64 == .STACK_IMMD) + @as(usize, encoding.value.x86_64.frameless.stack.direct.stack_size) * @sizeOf(usize) + else stack_size: { + // In .STACK_IND, the stack size is inferred from the subq instruction at the beginning of the function. + const sub_offset_addr = + module_base_address + + entry.function_offset + + encoding.value.x86_64.frameless.stack.indirect.sub_offset; + if (ma.load(usize, sub_offset_addr) == null) return error.InvalidUnwindInfo; + + // `sub_offset_addr` points to the offset of the literal within the instruction + const sub_operand = @as(*align(1) const u32, @ptrFromInt(sub_offset_addr)).*; + break :stack_size sub_operand + @sizeOf(usize) * @as(usize, encoding.value.x86_64.frameless.stack.indirect.stack_adjust); + }; + + // Decode the Lehmer-coded sequence of registers. + // For a description of the encoding see lib/libc/include/any-macos.13-any/mach-o/compact_unwind_encoding.h + + // Decode the variable-based permutation number into its digits. Each digit represents + // an index into the list of register numbers that weren't yet used in the sequence at + // the time the digit was added. + const reg_count = encoding.value.x86_64.frameless.stack_reg_count; + const ip_ptr = if (reg_count > 0) reg_blk: { + var digits: [6]u3 = undefined; + var accumulator: usize = encoding.value.x86_64.frameless.stack_reg_permutation; + var base: usize = 2; + for (0..reg_count) |i| { + const div = accumulator / base; + digits[digits.len - 1 - i] = @intCast(accumulator - base * div); + accumulator = div; + base += 1; + } + + const reg_numbers = [_]u3{ 1, 2, 3, 4, 5, 6 }; + var registers: [reg_numbers.len]u3 = undefined; + var used_indices = [_]bool{false} ** reg_numbers.len; + for (digits[digits.len - reg_count ..], 0..) |target_unused_index, i| { + var unused_count: u8 = 0; + const unused_index = for (used_indices, 0..) |used, index| { + if (!used) { + if (target_unused_index == unused_count) break index; + unused_count += 1; + } + } else unreachable; + + registers[i] = reg_numbers[unused_index]; + used_indices[unused_index] = true; + } + + var reg_addr = sp + stack_size - @sizeOf(usize) * @as(usize, reg_count + 1); + if (ma.load(usize, reg_addr) == null) return error.InvalidUnwindInfo; + for (0..reg_count) |i| { + const reg_number = try compactUnwindToDwarfRegNumber(registers[i]); + (try abi.regValueNative(usize, context.thread_context, reg_number, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + + break :reg_blk reg_addr; + } else sp + stack_size - @sizeOf(usize); + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_sp = ip_ptr + @sizeOf(usize); + if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; + + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; + + break :blk new_ip; + }, + .DWARF => { + return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.x86_64.dwarf)); + }, + }, + .aarch64 => switch (encoding.mode.arm64) { + .OLD => return error.UnimplementedUnwindEncoding, + .FRAMELESS => blk: { + const sp = (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).*; + const new_sp = sp + encoding.value.arm64.frameless.stack_size * 16; + const new_ip = (try abi.regValueNative(usize, context.thread_context, 30, reg_context)).*; + if (ma.load(usize, new_sp) == null) return error.InvalidUnwindInfo; + (try abi.regValueNative(usize, context.thread_context, abi.spRegNum(reg_context), reg_context)).* = new_sp; + break :blk new_ip; + }, + .DWARF => { + return unwindFrameMachODwarf(context, ma, eh_frame orelse return error.MissingEhFrame, @intCast(encoding.value.arm64.dwarf)); + }, + .FRAME => blk: { + const fp = (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).*; + const new_sp = fp + 16; + const ip_ptr = fp + @sizeOf(usize); + + const num_restored_pairs: usize = + @popCount(@as(u5, @bitCast(encoding.value.arm64.frame.x_reg_pairs))) + + @popCount(@as(u4, @bitCast(encoding.value.arm64.frame.d_reg_pairs))); + const min_reg_addr = fp - num_restored_pairs * 2 * @sizeOf(usize); + + if (ma.load(usize, new_sp) == null or ma.load(usize, min_reg_addr) == null) return error.InvalidUnwindInfo; + + var reg_addr = fp - @sizeOf(usize); + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.x_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.x_reg_pairs, field.name) != 0) { + (try abi.regValueNative(usize, context.thread_context, 19 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + (try abi.regValueNative(usize, context.thread_context, 20 + i, reg_context)).* = @as(*const usize, @ptrFromInt(reg_addr)).*; + reg_addr += @sizeOf(usize); + } + } + + inline for (@typeInfo(@TypeOf(encoding.value.arm64.frame.d_reg_pairs)).Struct.fields, 0..) |field, i| { + if (@field(encoding.value.arm64.frame.d_reg_pairs, field.name) != 0) { + // Only the lower half of the 128-bit V registers are restored during unwinding + @memcpy( + try abi.regBytes(context.thread_context, 64 + 8 + i, context.reg_context), + std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + @memcpy( + try abi.regBytes(context.thread_context, 64 + 9 + i, context.reg_context), + std.mem.asBytes(@as(*const usize, @ptrFromInt(reg_addr))), + ); + reg_addr += @sizeOf(usize); + } + } + + const new_ip = @as(*const usize, @ptrFromInt(ip_ptr)).*; + const new_fp = @as(*const usize, @ptrFromInt(fp)).*; + + (try abi.regValueNative(usize, context.thread_context, abi.fpRegNum(reg_context), reg_context)).* = new_fp; + (try abi.regValueNative(usize, context.thread_context, abi.ipRegNum(), reg_context)).* = new_ip; + + break :blk new_ip; + }, + }, + else => return error.UnimplementedArch, + }; + + context.pc = abi.stripInstructionPtrAuthCode(new_ip); + if (context.pc > 0) context.pc -= 1; + return new_ip; +} + +fn unwindFrameMachODwarf( + context: *UnwindContext, + ma: *std.debug.StackIterator.MemoryAccessor, + eh_frame: []const u8, + fde_offset: usize, +) !usize { + var di = Dwarf{ + .endian = native_endian, + .is_macho = true, + }; + defer di.deinit(context.allocator); + + di.sections[@intFromEnum(Section.Id.eh_frame)] = .{ + .data = eh_frame, + .owned = false, + }; + + return di.unwindFrame(context, ma, fde_offset); +} + +const EhPointerContext = struct { + // The address of the pointer field itself + pc_rel_base: u64, + + // Whether or not to follow indirect pointers. This should only be + // used when decoding pointers at runtime using the current process's + // debug info + follow_indirect: bool, + + // These relative addressing modes are only used in specific cases, and + // might not be available / required in all parsing contexts + data_rel_base: ?u64 = null, + text_rel_base: ?u64 = null, + function_rel_base: ?u64 = null, +}; +fn readEhPointer(fbr: *FixedBufferReader, enc: u8, addr_size_bytes: u8, ctx: EhPointerContext) !?u64 { + if (enc == EH.PE.omit) return null; + + const value: union(enum) { + signed: i64, + unsigned: u64, + } = switch (enc & EH.PE.type_mask) { + EH.PE.absptr => .{ + .unsigned = switch (addr_size_bytes) { + 2 => try fbr.readInt(u16), + 4 => try fbr.readInt(u32), + 8 => try fbr.readInt(u64), + else => return error.InvalidAddrSize, + }, + }, + EH.PE.uleb128 => .{ .unsigned = try fbr.readUleb128(u64) }, + EH.PE.udata2 => .{ .unsigned = try fbr.readInt(u16) }, + EH.PE.udata4 => .{ .unsigned = try fbr.readInt(u32) }, + EH.PE.udata8 => .{ .unsigned = try fbr.readInt(u64) }, + EH.PE.sleb128 => .{ .signed = try fbr.readIleb128(i64) }, + EH.PE.sdata2 => .{ .signed = try fbr.readInt(i16) }, + EH.PE.sdata4 => .{ .signed = try fbr.readInt(i32) }, + EH.PE.sdata8 => .{ .signed = try fbr.readInt(i64) }, + else => return badDwarf(), + }; + + const base = switch (enc & EH.PE.rel_mask) { + EH.PE.pcrel => ctx.pc_rel_base, + EH.PE.textrel => ctx.text_rel_base orelse return error.PointerBaseNotSpecified, + EH.PE.datarel => ctx.data_rel_base orelse return error.PointerBaseNotSpecified, + EH.PE.funcrel => ctx.function_rel_base orelse return error.PointerBaseNotSpecified, + else => null, + }; + + const ptr: u64 = if (base) |b| switch (value) { + .signed => |s| @intCast(try std.math.add(i64, s, @as(i64, @intCast(b)))), + // absptr can actually contain signed values in some cases (aarch64 MachO) + .unsigned => |u| u +% b, + } else switch (value) { + .signed => |s| @as(u64, @intCast(s)), + .unsigned => |u| u, + }; + + if ((enc & EH.PE.indirect) > 0 and ctx.follow_indirect) { + if (@sizeOf(usize) != addr_size_bytes) { + // See the documentation for `follow_indirect` + return error.NonNativeIndirection; + } + + const native_ptr = cast(usize, ptr) orelse return error.PointerOverflow; + return switch (addr_size_bytes) { + 2, 4, 8 => return @as(*const usize, @ptrFromInt(native_ptr)).*, + else => return error.UnsupportedAddrSize, + }; + } else { + return ptr; + } +} + +fn pcRelBase(field_ptr: usize, pc_rel_offset: i64) !usize { + if (pc_rel_offset < 0) { + return std.math.sub(usize, field_ptr, @as(usize, @intCast(-pc_rel_offset))); + } else { + return std.math.add(usize, field_ptr, @as(usize, @intCast(pc_rel_offset))); + } +} diff --git a/lib/std/debug/Dwarf/abi.zig b/lib/std/debug/Dwarf/abi.zig new file mode 100644 index 0000000000..1a47625ae7 --- /dev/null +++ b/lib/std/debug/Dwarf/abi.zig @@ -0,0 +1,410 @@ +const builtin = @import("builtin"); +const std = @import("../../std.zig"); +const mem = std.mem; +const native_os = builtin.os.tag; +const posix = std.posix; + +pub fn supportsUnwinding(target: std.Target) bool { + return switch (target.cpu.arch) { + .x86 => switch (target.os.tag) { + .linux, .netbsd, .solaris, .illumos => true, + else => false, + }, + .x86_64 => switch (target.os.tag) { + .linux, .netbsd, .freebsd, .openbsd, .macos, .ios, .solaris, .illumos => true, + else => false, + }, + .arm => switch (target.os.tag) { + .linux => true, + else => false, + }, + .aarch64 => switch (target.os.tag) { + .linux, .netbsd, .freebsd, .macos, .ios => true, + else => false, + }, + else => false, + }; +} + +pub fn ipRegNum() u8 { + return switch (builtin.cpu.arch) { + .x86 => 8, + .x86_64 => 16, + .arm => 15, + .aarch64 => 32, + else => unreachable, + }; +} + +pub fn fpRegNum(reg_context: RegisterContext) u8 { + return switch (builtin.cpu.arch) { + // GCC on OS X historically did the opposite of ELF for these registers (only in .eh_frame), and that is now the convention for MachO + .x86 => if (reg_context.eh_frame and reg_context.is_macho) 4 else 5, + .x86_64 => 6, + .arm => 11, + .aarch64 => 29, + else => unreachable, + }; +} + +pub fn spRegNum(reg_context: RegisterContext) u8 { + return switch (builtin.cpu.arch) { + .x86 => if (reg_context.eh_frame and reg_context.is_macho) 5 else 4, + .x86_64 => 7, + .arm => 13, + .aarch64 => 31, + else => unreachable, + }; +} + +/// Some platforms use pointer authentication - the upper bits of instruction pointers contain a signature. +/// This function clears these signature bits to make the pointer usable. +pub inline fn stripInstructionPtrAuthCode(ptr: usize) usize { + if (builtin.cpu.arch == .aarch64) { + // `hint 0x07` maps to `xpaclri` (or `nop` if the hardware doesn't support it) + // The save / restore is because `xpaclri` operates on x30 (LR) + return asm ( + \\mov x16, x30 + \\mov x30, x15 + \\hint 0x07 + \\mov x15, x30 + \\mov x30, x16 + : [ret] "={x15}" (-> usize), + : [ptr] "{x15}" (ptr), + : "x16" + ); + } + + return ptr; +} + +pub const RegisterContext = struct { + eh_frame: bool, + is_macho: bool, +}; + +pub const AbiError = error{ + InvalidRegister, + UnimplementedArch, + UnimplementedOs, + RegisterContextRequired, + ThreadContextNotSupported, +}; + +fn RegValueReturnType(comptime ContextPtrType: type, comptime T: type) type { + const reg_bytes_type = comptime RegBytesReturnType(ContextPtrType); + const info = @typeInfo(reg_bytes_type).Pointer; + return @Type(.{ + .Pointer = .{ + .size = .One, + .is_const = info.is_const, + .is_volatile = info.is_volatile, + .is_allowzero = info.is_allowzero, + .alignment = info.alignment, + .address_space = info.address_space, + .child = T, + .sentinel = null, + }, + }); +} + +/// Returns a pointer to a register stored in a ThreadContext, preserving the pointer attributes of the context. +pub fn regValueNative( + comptime T: type, + thread_context_ptr: anytype, + reg_number: u8, + reg_context: ?RegisterContext, +) !RegValueReturnType(@TypeOf(thread_context_ptr), T) { + const reg_bytes = try regBytes(thread_context_ptr, reg_number, reg_context); + if (@sizeOf(T) != reg_bytes.len) return error.IncompatibleRegisterSize; + return mem.bytesAsValue(T, reg_bytes[0..@sizeOf(T)]); +} + +fn RegBytesReturnType(comptime ContextPtrType: type) type { + const info = @typeInfo(ContextPtrType); + if (info != .Pointer or info.Pointer.child != std.debug.ThreadContext) { + @compileError("Expected a pointer to std.debug.ThreadContext, got " ++ @typeName(@TypeOf(ContextPtrType))); + } + + return if (info.Pointer.is_const) return []const u8 else []u8; +} + +/// Returns a slice containing the backing storage for `reg_number`. +/// +/// `reg_context` describes in what context the register number is used, as it can have different +/// meanings depending on the DWARF container. It is only required when getting the stack or +/// frame pointer register on some architectures. +pub fn regBytes( + thread_context_ptr: anytype, + reg_number: u8, + reg_context: ?RegisterContext, +) AbiError!RegBytesReturnType(@TypeOf(thread_context_ptr)) { + if (native_os == .windows) { + return switch (builtin.cpu.arch) { + .x86 => switch (reg_number) { + 0 => mem.asBytes(&thread_context_ptr.Eax), + 1 => mem.asBytes(&thread_context_ptr.Ecx), + 2 => mem.asBytes(&thread_context_ptr.Edx), + 3 => mem.asBytes(&thread_context_ptr.Ebx), + 4 => mem.asBytes(&thread_context_ptr.Esp), + 5 => mem.asBytes(&thread_context_ptr.Ebp), + 6 => mem.asBytes(&thread_context_ptr.Esi), + 7 => mem.asBytes(&thread_context_ptr.Edi), + 8 => mem.asBytes(&thread_context_ptr.Eip), + 9 => mem.asBytes(&thread_context_ptr.EFlags), + 10 => mem.asBytes(&thread_context_ptr.SegCs), + 11 => mem.asBytes(&thread_context_ptr.SegSs), + 12 => mem.asBytes(&thread_context_ptr.SegDs), + 13 => mem.asBytes(&thread_context_ptr.SegEs), + 14 => mem.asBytes(&thread_context_ptr.SegFs), + 15 => mem.asBytes(&thread_context_ptr.SegGs), + else => error.InvalidRegister, + }, + .x86_64 => switch (reg_number) { + 0 => mem.asBytes(&thread_context_ptr.Rax), + 1 => mem.asBytes(&thread_context_ptr.Rdx), + 2 => mem.asBytes(&thread_context_ptr.Rcx), + 3 => mem.asBytes(&thread_context_ptr.Rbx), + 4 => mem.asBytes(&thread_context_ptr.Rsi), + 5 => mem.asBytes(&thread_context_ptr.Rdi), + 6 => mem.asBytes(&thread_context_ptr.Rbp), + 7 => mem.asBytes(&thread_context_ptr.Rsp), + 8 => mem.asBytes(&thread_context_ptr.R8), + 9 => mem.asBytes(&thread_context_ptr.R9), + 10 => mem.asBytes(&thread_context_ptr.R10), + 11 => mem.asBytes(&thread_context_ptr.R11), + 12 => mem.asBytes(&thread_context_ptr.R12), + 13 => mem.asBytes(&thread_context_ptr.R13), + 14 => mem.asBytes(&thread_context_ptr.R14), + 15 => mem.asBytes(&thread_context_ptr.R15), + 16 => mem.asBytes(&thread_context_ptr.Rip), + else => error.InvalidRegister, + }, + .aarch64 => switch (reg_number) { + 0...30 => mem.asBytes(&thread_context_ptr.DUMMYUNIONNAME.X[reg_number]), + 31 => mem.asBytes(&thread_context_ptr.Sp), + 32 => mem.asBytes(&thread_context_ptr.Pc), + else => error.InvalidRegister, + }, + else => error.UnimplementedArch, + }; + } + + if (!std.debug.have_ucontext) return error.ThreadContextNotSupported; + + const ucontext_ptr = thread_context_ptr; + return switch (builtin.cpu.arch) { + .x86 => switch (native_os) { + .linux, .netbsd, .solaris, .illumos => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EAX]), + 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ECX]), + 2 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EDX]), + 3 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EBX]), + 4...5 => if (reg_context) |r| bytes: { + if (reg_number == 4) { + break :bytes if (r.eh_frame and r.is_macho) + mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EBP]) + else + mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ESP]); + } else { + break :bytes if (r.eh_frame and r.is_macho) + mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ESP]) + else + mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EBP]); + } + } else error.RegisterContextRequired, + 6 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ESI]), + 7 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EDI]), + 8 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EIP]), + 9 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.EFL]), + 10 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.CS]), + 11 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.SS]), + 12 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.DS]), + 13 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.ES]), + 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.FS]), + 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.GS]), + 16...23 => error.InvalidRegister, // TODO: Support loading ST0-ST7 from mcontext.fpregs + 32...39 => error.InvalidRegister, // TODO: Support loading XMM0-XMM7 from mcontext.fpregs + else => error.InvalidRegister, + }, + else => error.UnimplementedOs, + }, + .x86_64 => switch (native_os) { + .linux, .solaris, .illumos => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RAX]), + 1 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RDX]), + 2 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RCX]), + 3 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RBX]), + 4 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RSI]), + 5 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RDI]), + 6 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RBP]), + 7 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RSP]), + 8 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R8]), + 9 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R9]), + 10 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R10]), + 11 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R11]), + 12 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R12]), + 13 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R13]), + 14 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R14]), + 15 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.R15]), + 16 => mem.asBytes(&ucontext_ptr.mcontext.gregs[posix.REG.RIP]), + 17...32 => |i| if (native_os.isSolarish()) + mem.asBytes(&ucontext_ptr.mcontext.fpregs.chip_state.xmm[i - 17]) + else + mem.asBytes(&ucontext_ptr.mcontext.fpregs.xmm[i - 17]), + else => error.InvalidRegister, + }, + .freebsd => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.mcontext.rax), + 1 => mem.asBytes(&ucontext_ptr.mcontext.rdx), + 2 => mem.asBytes(&ucontext_ptr.mcontext.rcx), + 3 => mem.asBytes(&ucontext_ptr.mcontext.rbx), + 4 => mem.asBytes(&ucontext_ptr.mcontext.rsi), + 5 => mem.asBytes(&ucontext_ptr.mcontext.rdi), + 6 => mem.asBytes(&ucontext_ptr.mcontext.rbp), + 7 => mem.asBytes(&ucontext_ptr.mcontext.rsp), + 8 => mem.asBytes(&ucontext_ptr.mcontext.r8), + 9 => mem.asBytes(&ucontext_ptr.mcontext.r9), + 10 => mem.asBytes(&ucontext_ptr.mcontext.r10), + 11 => mem.asBytes(&ucontext_ptr.mcontext.r11), + 12 => mem.asBytes(&ucontext_ptr.mcontext.r12), + 13 => mem.asBytes(&ucontext_ptr.mcontext.r13), + 14 => mem.asBytes(&ucontext_ptr.mcontext.r14), + 15 => mem.asBytes(&ucontext_ptr.mcontext.r15), + 16 => mem.asBytes(&ucontext_ptr.mcontext.rip), + // TODO: Extract xmm state from mcontext.fpstate? + else => error.InvalidRegister, + }, + .openbsd => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.sc_rax), + 1 => mem.asBytes(&ucontext_ptr.sc_rdx), + 2 => mem.asBytes(&ucontext_ptr.sc_rcx), + 3 => mem.asBytes(&ucontext_ptr.sc_rbx), + 4 => mem.asBytes(&ucontext_ptr.sc_rsi), + 5 => mem.asBytes(&ucontext_ptr.sc_rdi), + 6 => mem.asBytes(&ucontext_ptr.sc_rbp), + 7 => mem.asBytes(&ucontext_ptr.sc_rsp), + 8 => mem.asBytes(&ucontext_ptr.sc_r8), + 9 => mem.asBytes(&ucontext_ptr.sc_r9), + 10 => mem.asBytes(&ucontext_ptr.sc_r10), + 11 => mem.asBytes(&ucontext_ptr.sc_r11), + 12 => mem.asBytes(&ucontext_ptr.sc_r12), + 13 => mem.asBytes(&ucontext_ptr.sc_r13), + 14 => mem.asBytes(&ucontext_ptr.sc_r14), + 15 => mem.asBytes(&ucontext_ptr.sc_r15), + 16 => mem.asBytes(&ucontext_ptr.sc_rip), + // TODO: Extract xmm state from sc_fpstate? + else => error.InvalidRegister, + }, + .macos, .ios => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.mcontext.ss.rax), + 1 => mem.asBytes(&ucontext_ptr.mcontext.ss.rdx), + 2 => mem.asBytes(&ucontext_ptr.mcontext.ss.rcx), + 3 => mem.asBytes(&ucontext_ptr.mcontext.ss.rbx), + 4 => mem.asBytes(&ucontext_ptr.mcontext.ss.rsi), + 5 => mem.asBytes(&ucontext_ptr.mcontext.ss.rdi), + 6 => mem.asBytes(&ucontext_ptr.mcontext.ss.rbp), + 7 => mem.asBytes(&ucontext_ptr.mcontext.ss.rsp), + 8 => mem.asBytes(&ucontext_ptr.mcontext.ss.r8), + 9 => mem.asBytes(&ucontext_ptr.mcontext.ss.r9), + 10 => mem.asBytes(&ucontext_ptr.mcontext.ss.r10), + 11 => mem.asBytes(&ucontext_ptr.mcontext.ss.r11), + 12 => mem.asBytes(&ucontext_ptr.mcontext.ss.r12), + 13 => mem.asBytes(&ucontext_ptr.mcontext.ss.r13), + 14 => mem.asBytes(&ucontext_ptr.mcontext.ss.r14), + 15 => mem.asBytes(&ucontext_ptr.mcontext.ss.r15), + 16 => mem.asBytes(&ucontext_ptr.mcontext.ss.rip), + else => error.InvalidRegister, + }, + else => error.UnimplementedOs, + }, + .arm => switch (native_os) { + .linux => switch (reg_number) { + 0 => mem.asBytes(&ucontext_ptr.mcontext.arm_r0), + 1 => mem.asBytes(&ucontext_ptr.mcontext.arm_r1), + 2 => mem.asBytes(&ucontext_ptr.mcontext.arm_r2), + 3 => mem.asBytes(&ucontext_ptr.mcontext.arm_r3), + 4 => mem.asBytes(&ucontext_ptr.mcontext.arm_r4), + 5 => mem.asBytes(&ucontext_ptr.mcontext.arm_r5), + 6 => mem.asBytes(&ucontext_ptr.mcontext.arm_r6), + 7 => mem.asBytes(&ucontext_ptr.mcontext.arm_r7), + 8 => mem.asBytes(&ucontext_ptr.mcontext.arm_r8), + 9 => mem.asBytes(&ucontext_ptr.mcontext.arm_r9), + 10 => mem.asBytes(&ucontext_ptr.mcontext.arm_r10), + 11 => mem.asBytes(&ucontext_ptr.mcontext.arm_fp), + 12 => mem.asBytes(&ucontext_ptr.mcontext.arm_ip), + 13 => mem.asBytes(&ucontext_ptr.mcontext.arm_sp), + 14 => mem.asBytes(&ucontext_ptr.mcontext.arm_lr), + 15 => mem.asBytes(&ucontext_ptr.mcontext.arm_pc), + // CPSR is not allocated a register number (See: https://github.com/ARM-software/abi-aa/blob/main/aadwarf32/aadwarf32.rst, Section 4.1) + else => error.InvalidRegister, + }, + else => error.UnimplementedOs, + }, + .aarch64 => switch (native_os) { + .macos, .ios => switch (reg_number) { + 0...28 => mem.asBytes(&ucontext_ptr.mcontext.ss.regs[reg_number]), + 29 => mem.asBytes(&ucontext_ptr.mcontext.ss.fp), + 30 => mem.asBytes(&ucontext_ptr.mcontext.ss.lr), + 31 => mem.asBytes(&ucontext_ptr.mcontext.ss.sp), + 32 => mem.asBytes(&ucontext_ptr.mcontext.ss.pc), + + // TODO: Find storage for this state + //34 => mem.asBytes(&ucontext_ptr.ra_sign_state), + + // V0-V31 + 64...95 => mem.asBytes(&ucontext_ptr.mcontext.ns.q[reg_number - 64]), + else => error.InvalidRegister, + }, + .netbsd => switch (reg_number) { + 0...34 => mem.asBytes(&ucontext_ptr.mcontext.gregs[reg_number]), + else => error.InvalidRegister, + }, + .freebsd => switch (reg_number) { + 0...29 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.x[reg_number]), + 30 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.lr), + 31 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.sp), + + // TODO: This seems wrong, but it was in the previous debug.zig code for mapping PC, check this + 32 => mem.asBytes(&ucontext_ptr.mcontext.gpregs.elr), + + else => error.InvalidRegister, + }, + .openbsd => switch (reg_number) { + 0...30 => mem.asBytes(&ucontext_ptr.sc_x[reg_number]), + 31 => mem.asBytes(&ucontext_ptr.sc_sp), + 32 => mem.asBytes(&ucontext_ptr.sc_lr), + 33 => mem.asBytes(&ucontext_ptr.sc_elr), + 34 => mem.asBytes(&ucontext_ptr.sc_spsr), + else => error.InvalidRegister, + }, + else => switch (reg_number) { + 0...30 => mem.asBytes(&ucontext_ptr.mcontext.regs[reg_number]), + 31 => mem.asBytes(&ucontext_ptr.mcontext.sp), + 32 => mem.asBytes(&ucontext_ptr.mcontext.pc), + else => error.InvalidRegister, + }, + }, + else => error.UnimplementedArch, + }; +} + +/// Returns the ABI-defined default value this register has in the unwinding table +/// before running any of the CIE instructions. The DWARF spec defines these as having +/// the .undefined rule by default, but allows ABI authors to override that. +pub fn getRegDefaultValue(reg_number: u8, context: *std.debug.Dwarf.UnwindContext, out: []u8) !void { + switch (builtin.cpu.arch) { + .aarch64 => { + // Callee-saved registers are initialized as if they had the .same_value rule + if (reg_number >= 19 and reg_number <= 28) { + const src = try regBytes(context.thread_context, reg_number, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + return; + } + }, + else => {}, + } + + @memset(out, undefined); +} diff --git a/lib/std/debug/Dwarf/call_frame.zig b/lib/std/debug/Dwarf/call_frame.zig new file mode 100644 index 0000000000..73e00d3099 --- /dev/null +++ b/lib/std/debug/Dwarf/call_frame.zig @@ -0,0 +1,687 @@ +const builtin = @import("builtin"); +const std = @import("../../std.zig"); +const mem = std.mem; +const debug = std.debug; +const leb = std.leb; +const DW = std.dwarf; +const abi = std.debug.Dwarf.abi; +const assert = std.debug.assert; +const native_endian = builtin.cpu.arch.endian(); + +/// TODO merge with std.dwarf.CFA +const Opcode = enum(u8) { + advance_loc = 0x1 << 6, + offset = 0x2 << 6, + restore = 0x3 << 6, + + nop = 0x00, + set_loc = 0x01, + advance_loc1 = 0x02, + advance_loc2 = 0x03, + advance_loc4 = 0x04, + offset_extended = 0x05, + restore_extended = 0x06, + undefined = 0x07, + same_value = 0x08, + register = 0x09, + remember_state = 0x0a, + restore_state = 0x0b, + def_cfa = 0x0c, + def_cfa_register = 0x0d, + def_cfa_offset = 0x0e, + def_cfa_expression = 0x0f, + expression = 0x10, + offset_extended_sf = 0x11, + def_cfa_sf = 0x12, + def_cfa_offset_sf = 0x13, + val_offset = 0x14, + val_offset_sf = 0x15, + val_expression = 0x16, + + // These opcodes encode an operand in the lower 6 bits of the opcode itself + pub const lo_inline = @intFromEnum(Opcode.advance_loc); + pub const hi_inline = @intFromEnum(Opcode.restore) | 0b111111; + + // These opcodes are trailed by zero or more operands + pub const lo_reserved = @intFromEnum(Opcode.nop); + pub const hi_reserved = @intFromEnum(Opcode.val_expression); + + // Vendor-specific opcodes + pub const lo_user = 0x1c; + pub const hi_user = 0x3f; +}; + +fn readBlock(stream: *std.io.FixedBufferStream([]const u8)) ![]const u8 { + const reader = stream.reader(); + const block_len = try leb.readUleb128(usize, reader); + if (stream.pos + block_len > stream.buffer.len) return error.InvalidOperand; + + const block = stream.buffer[stream.pos..][0..block_len]; + reader.context.pos += block_len; + + return block; +} + +pub const Instruction = union(Opcode) { + advance_loc: struct { + delta: u8, + }, + offset: struct { + register: u8, + offset: u64, + }, + restore: struct { + register: u8, + }, + nop: void, + set_loc: struct { + address: u64, + }, + advance_loc1: struct { + delta: u8, + }, + advance_loc2: struct { + delta: u16, + }, + advance_loc4: struct { + delta: u32, + }, + offset_extended: struct { + register: u8, + offset: u64, + }, + restore_extended: struct { + register: u8, + }, + undefined: struct { + register: u8, + }, + same_value: struct { + register: u8, + }, + register: struct { + register: u8, + target_register: u8, + }, + remember_state: void, + restore_state: void, + def_cfa: struct { + register: u8, + offset: u64, + }, + def_cfa_register: struct { + register: u8, + }, + def_cfa_offset: struct { + offset: u64, + }, + def_cfa_expression: struct { + block: []const u8, + }, + expression: struct { + register: u8, + block: []const u8, + }, + offset_extended_sf: struct { + register: u8, + offset: i64, + }, + def_cfa_sf: struct { + register: u8, + offset: i64, + }, + def_cfa_offset_sf: struct { + offset: i64, + }, + val_offset: struct { + register: u8, + offset: u64, + }, + val_offset_sf: struct { + register: u8, + offset: i64, + }, + val_expression: struct { + register: u8, + block: []const u8, + }, + + pub fn read( + stream: *std.io.FixedBufferStream([]const u8), + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Instruction { + const reader = stream.reader(); + switch (try reader.readByte()) { + Opcode.lo_inline...Opcode.hi_inline => |opcode| { + const e: Opcode = @enumFromInt(opcode & 0b11000000); + const value: u6 = @intCast(opcode & 0b111111); + return switch (e) { + .advance_loc => .{ + .advance_loc = .{ .delta = value }, + }, + .offset => .{ + .offset = .{ + .register = value, + .offset = try leb.readUleb128(u64, reader), + }, + }, + .restore => .{ + .restore = .{ .register = value }, + }, + else => unreachable, + }; + }, + Opcode.lo_reserved...Opcode.hi_reserved => |opcode| { + const e: Opcode = @enumFromInt(opcode); + return switch (e) { + .advance_loc, + .offset, + .restore, + => unreachable, + .nop => .{ .nop = {} }, + .set_loc => .{ + .set_loc = .{ + .address = switch (addr_size_bytes) { + 2 => try reader.readInt(u16, endian), + 4 => try reader.readInt(u32, endian), + 8 => try reader.readInt(u64, endian), + else => return error.InvalidAddrSize, + }, + }, + }, + .advance_loc1 => .{ + .advance_loc1 = .{ .delta = try reader.readByte() }, + }, + .advance_loc2 => .{ + .advance_loc2 = .{ .delta = try reader.readInt(u16, endian) }, + }, + .advance_loc4 => .{ + .advance_loc4 = .{ .delta = try reader.readInt(u32, endian) }, + }, + .offset_extended => .{ + .offset_extended = .{ + .register = try leb.readUleb128(u8, reader), + .offset = try leb.readUleb128(u64, reader), + }, + }, + .restore_extended => .{ + .restore_extended = .{ + .register = try leb.readUleb128(u8, reader), + }, + }, + .undefined => .{ + .undefined = .{ + .register = try leb.readUleb128(u8, reader), + }, + }, + .same_value => .{ + .same_value = .{ + .register = try leb.readUleb128(u8, reader), + }, + }, + .register => .{ + .register = .{ + .register = try leb.readUleb128(u8, reader), + .target_register = try leb.readUleb128(u8, reader), + }, + }, + .remember_state => .{ .remember_state = {} }, + .restore_state => .{ .restore_state = {} }, + .def_cfa => .{ + .def_cfa = .{ + .register = try leb.readUleb128(u8, reader), + .offset = try leb.readUleb128(u64, reader), + }, + }, + .def_cfa_register => .{ + .def_cfa_register = .{ + .register = try leb.readUleb128(u8, reader), + }, + }, + .def_cfa_offset => .{ + .def_cfa_offset = .{ + .offset = try leb.readUleb128(u64, reader), + }, + }, + .def_cfa_expression => .{ + .def_cfa_expression = .{ + .block = try readBlock(stream), + }, + }, + .expression => .{ + .expression = .{ + .register = try leb.readUleb128(u8, reader), + .block = try readBlock(stream), + }, + }, + .offset_extended_sf => .{ + .offset_extended_sf = .{ + .register = try leb.readUleb128(u8, reader), + .offset = try leb.readIleb128(i64, reader), + }, + }, + .def_cfa_sf => .{ + .def_cfa_sf = .{ + .register = try leb.readUleb128(u8, reader), + .offset = try leb.readIleb128(i64, reader), + }, + }, + .def_cfa_offset_sf => .{ + .def_cfa_offset_sf = .{ + .offset = try leb.readIleb128(i64, reader), + }, + }, + .val_offset => .{ + .val_offset = .{ + .register = try leb.readUleb128(u8, reader), + .offset = try leb.readUleb128(u64, reader), + }, + }, + .val_offset_sf => .{ + .val_offset_sf = .{ + .register = try leb.readUleb128(u8, reader), + .offset = try leb.readIleb128(i64, reader), + }, + }, + .val_expression => .{ + .val_expression = .{ + .register = try leb.readUleb128(u8, reader), + .block = try readBlock(stream), + }, + }, + }; + }, + Opcode.lo_user...Opcode.hi_user => return error.UnimplementedUserOpcode, + else => return error.InvalidOpcode, + } + } +}; + +/// Since register rules are applied (usually) during a panic, +/// checked addition / subtraction is used so that we can return +/// an error and fall back to FP-based unwinding. +pub fn applyOffset(base: usize, offset: i64) !usize { + return if (offset >= 0) + try std.math.add(usize, base, @as(usize, @intCast(offset))) + else + try std.math.sub(usize, base, @as(usize, @intCast(-offset))); +} + +/// This is a virtual machine that runs DWARF call frame instructions. +pub const VirtualMachine = struct { + /// See section 6.4.1 of the DWARF5 specification for details on each + const RegisterRule = union(enum) { + // The spec says that the default rule for each column is the undefined rule. + // However, it also allows ABI / compiler authors to specify alternate defaults, so + // there is a distinction made here. + default: void, + + undefined: void, + same_value: void, + + // offset(N) + offset: i64, + + // val_offset(N) + val_offset: i64, + + // register(R) + register: u8, + + // expression(E) + expression: []const u8, + + // val_expression(E) + val_expression: []const u8, + + // Augmenter-defined rule + architectural: void, + }; + + /// Each row contains unwinding rules for a set of registers. + pub const Row = struct { + /// Offset from `FrameDescriptionEntry.pc_begin` + offset: u64 = 0, + + /// Special-case column that defines the CFA (Canonical Frame Address) rule. + /// The register field of this column defines the register that CFA is derived from. + cfa: Column = .{}, + + /// The register fields in these columns define the register the rule applies to. + columns: ColumnRange = .{}, + + /// Indicates that the next write to any column in this row needs to copy + /// the backing column storage first, as it may be referenced by previous rows. + copy_on_write: bool = false, + }; + + pub const Column = struct { + register: ?u8 = null, + rule: RegisterRule = .{ .default = {} }, + + /// Resolves the register rule and places the result into `out` (see dwarf.abi.regBytes) + pub fn resolveValue( + self: Column, + context: *std.debug.Dwarf.UnwindContext, + expression_context: std.debug.Dwarf.expression.Context, + ma: *debug.StackIterator.MemoryAccessor, + out: []u8, + ) !void { + switch (self.rule) { + .default => { + const register = self.register orelse return error.InvalidRegister; + try abi.getRegDefaultValue(register, context, out); + }, + .undefined => { + @memset(out, undefined); + }, + .same_value => { + // TODO: This copy could be eliminated if callers always copy the state then call this function to update it + const register = self.register orelse return error.InvalidRegister; + const src = try abi.regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, src); + }, + .offset => |offset| { + if (context.cfa) |cfa| { + const addr = try applyOffset(cfa, offset); + if (ma.load(usize, addr) == null) return error.InvalidAddress; + const ptr: *const usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + } else return error.InvalidCFA; + }, + .val_offset => |offset| { + if (context.cfa) |cfa| { + mem.writeInt(usize, out[0..@sizeOf(usize)], try applyOffset(cfa, offset), native_endian); + } else return error.InvalidCFA; + }, + .register => |register| { + const src = try abi.regBytes(context.thread_context, register, context.reg_context); + if (src.len != out.len) return error.RegisterSizeMismatch; + @memcpy(out, try abi.regBytes(context.thread_context, register, context.reg_context)); + }, + .expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); + const addr = if (value) |v| blk: { + if (v != .generic) return error.InvalidExpressionValue; + break :blk v.generic; + } else return error.NoExpressionValue; + + if (ma.load(usize, addr) == null) return error.InvalidExpressionAddress; + const ptr: *usize = @ptrFromInt(addr); + mem.writeInt(usize, out[0..@sizeOf(usize)], ptr.*, native_endian); + }, + .val_expression => |expression| { + context.stack_machine.reset(); + const value = try context.stack_machine.run(expression, context.allocator, expression_context, context.cfa.?); + if (value) |v| { + if (v != .generic) return error.InvalidExpressionValue; + mem.writeInt(usize, out[0..@sizeOf(usize)], v.generic, native_endian); + } else return error.NoExpressionValue; + }, + .architectural => return error.UnimplementedRegisterRule, + } + } + }; + + const ColumnRange = struct { + /// Index into `columns` of the first column in this row. + start: usize = undefined, + len: u8 = 0, + }; + + columns: std.ArrayListUnmanaged(Column) = .{}, + stack: std.ArrayListUnmanaged(ColumnRange) = .{}, + current_row: Row = .{}, + + /// The result of executing the CIE's initial_instructions + cie_row: ?Row = null, + + pub fn deinit(self: *VirtualMachine, allocator: std.mem.Allocator) void { + self.stack.deinit(allocator); + self.columns.deinit(allocator); + self.* = undefined; + } + + pub fn reset(self: *VirtualMachine) void { + self.stack.clearRetainingCapacity(); + self.columns.clearRetainingCapacity(); + self.current_row = .{}; + self.cie_row = null; + } + + /// Return a slice backed by the row's non-CFA columns + pub fn rowColumns(self: VirtualMachine, row: Row) []Column { + if (row.columns.len == 0) return &.{}; + return self.columns.items[row.columns.start..][0..row.columns.len]; + } + + /// Either retrieves or adds a column for `register` (non-CFA) in the current row. + fn getOrAddColumn(self: *VirtualMachine, allocator: std.mem.Allocator, register: u8) !*Column { + for (self.rowColumns(self.current_row)) |*c| { + if (c.register == register) return c; + } + + if (self.current_row.columns.len == 0) { + self.current_row.columns.start = self.columns.items.len; + } + self.current_row.columns.len += 1; + + const column = try self.columns.addOne(allocator); + column.* = .{ + .register = register, + }; + + return column; + } + + /// Runs the CIE instructions, then the FDE instructions. Execution halts + /// once the row that corresponds to `pc` is known, and the row is returned. + pub fn runTo( + self: *VirtualMachine, + allocator: std.mem.Allocator, + pc: u64, + cie: std.debug.Dwarf.CommonInformationEntry, + fde: std.debug.Dwarf.FrameDescriptionEntry, + addr_size_bytes: u8, + endian: std.builtin.Endian, + ) !Row { + assert(self.cie_row == null); + if (pc < fde.pc_begin or pc >= fde.pc_begin + fde.pc_range) return error.AddressOutOfRange; + + var prev_row: Row = self.current_row; + + var cie_stream = std.io.fixedBufferStream(cie.initial_instructions); + var fde_stream = std.io.fixedBufferStream(fde.instructions); + var streams = [_]*std.io.FixedBufferStream([]const u8){ + &cie_stream, + &fde_stream, + }; + + for (&streams, 0..) |stream, i| { + while (stream.pos < stream.buffer.len) { + const instruction = try std.debug.Dwarf.call_frame.Instruction.read(stream, addr_size_bytes, endian); + prev_row = try self.step(allocator, cie, i == 0, instruction); + if (pc < fde.pc_begin + self.current_row.offset) return prev_row; + } + } + + return self.current_row; + } + + pub fn runToNative( + self: *VirtualMachine, + allocator: std.mem.Allocator, + pc: u64, + cie: std.debug.Dwarf.CommonInformationEntry, + fde: std.debug.Dwarf.FrameDescriptionEntry, + ) !Row { + return self.runTo(allocator, pc, cie, fde, @sizeOf(usize), builtin.target.cpu.arch.endian()); + } + + fn resolveCopyOnWrite(self: *VirtualMachine, allocator: std.mem.Allocator) !void { + if (!self.current_row.copy_on_write) return; + + const new_start = self.columns.items.len; + if (self.current_row.columns.len > 0) { + try self.columns.ensureUnusedCapacity(allocator, self.current_row.columns.len); + self.columns.appendSliceAssumeCapacity(self.rowColumns(self.current_row)); + self.current_row.columns.start = new_start; + } + } + + /// Executes a single instruction. + /// If this instruction is from the CIE, `is_initial` should be set. + /// Returns the value of `current_row` before executing this instruction. + pub fn step( + self: *VirtualMachine, + allocator: std.mem.Allocator, + cie: std.debug.Dwarf.CommonInformationEntry, + is_initial: bool, + instruction: Instruction, + ) !Row { + // CIE instructions must be run before FDE instructions + assert(!is_initial or self.cie_row == null); + if (!is_initial and self.cie_row == null) { + self.cie_row = self.current_row; + self.current_row.copy_on_write = true; + } + + const prev_row = self.current_row; + switch (instruction) { + .set_loc => |i| { + if (i.address <= self.current_row.offset) return error.InvalidOperation; + // TODO: Check cie.segment_selector_size != 0 for DWARFV4 + self.current_row.offset = i.address; + }, + inline .advance_loc, + .advance_loc1, + .advance_loc2, + .advance_loc4, + => |i| { + self.current_row.offset += i.delta * cie.code_alignment_factor; + self.current_row.copy_on_write = true; + }, + inline .offset, + .offset_extended, + .offset_extended_sf, + => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor }; + }, + inline .restore, + .restore_extended, + => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.cie_row) |cie_row| { + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = for (self.rowColumns(cie_row)) |cie_column| { + if (cie_column.register == i.register) break cie_column.rule; + } else .{ .default = {} }; + } else return error.InvalidOperation; + }, + .nop => {}, + .undefined => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .undefined = {} }; + }, + .same_value => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .same_value = {} }; + }, + .register => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ .register = i.target_register }; + }, + .remember_state => { + try self.stack.append(allocator, self.current_row.columns); + self.current_row.copy_on_write = true; + }, + .restore_state => { + const restored_columns = self.stack.popOrNull() orelse return error.InvalidOperation; + self.columns.shrinkRetainingCapacity(self.columns.items.len - self.current_row.columns.len); + try self.columns.ensureUnusedCapacity(allocator, restored_columns.len); + + self.current_row.columns.start = self.columns.items.len; + self.current_row.columns.len = restored_columns.len; + self.columns.appendSliceAssumeCapacity(self.columns.items[restored_columns.start..][0..restored_columns.len]); + }, + .def_cfa => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = @intCast(i.offset) }, + }; + }, + .def_cfa_sf => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa = .{ + .register = i.register, + .rule = .{ .val_offset = i.offset * cie.data_alignment_factor }, + }; + }, + .def_cfa_register => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.register = i.register; + }, + .def_cfa_offset => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = @intCast(i.offset), + }; + }, + .def_cfa_offset_sf => |i| { + try self.resolveCopyOnWrite(allocator); + if (self.current_row.cfa.register == null or self.current_row.cfa.rule != .val_offset) return error.InvalidOperation; + self.current_row.cfa.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .def_cfa_expression => |i| { + try self.resolveCopyOnWrite(allocator); + self.current_row.cfa.register = undefined; + self.current_row.cfa.rule = .{ + .expression = i.block, + }; + }, + .expression => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .expression = i.block, + }; + }, + .val_offset => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_offset = @as(i64, @intCast(i.offset)) * cie.data_alignment_factor, + }; + }, + .val_offset_sf => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_offset = i.offset * cie.data_alignment_factor, + }; + }, + .val_expression => |i| { + try self.resolveCopyOnWrite(allocator); + const column = try self.getOrAddColumn(allocator, i.register); + column.rule = .{ + .val_expression = i.block, + }; + }, + } + + return prev_row; + } +}; diff --git a/lib/std/debug/Dwarf/expression.zig b/lib/std/debug/Dwarf/expression.zig new file mode 100644 index 0000000000..6243ea9717 --- /dev/null +++ b/lib/std/debug/Dwarf/expression.zig @@ -0,0 +1,1638 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const leb = std.leb; +const OP = std.dwarf.OP; +const abi = std.debug.Dwarf.abi; +const mem = std.mem; +const assert = std.debug.assert; +const native_endian = builtin.cpu.arch.endian(); + +/// Expressions can be evaluated in different contexts, each requiring its own set of inputs. +/// Callers should specify all the fields relevant to their context. If a field is required +/// by the expression and it isn't in the context, error.IncompleteExpressionContext is returned. +pub const Context = struct { + /// The dwarf format of the section this expression is in + format: std.dwarf.Format = .@"32", + /// If specified, any addresses will pass through before being accessed + memory_accessor: ?*std.debug.StackIterator.MemoryAccessor = null, + /// The compilation unit this expression relates to, if any + compile_unit: ?*const std.debug.Dwarf.CompileUnit = null, + /// When evaluating a user-presented expression, this is the address of the object being evaluated + object_address: ?*const anyopaque = null, + /// .debug_addr section + debug_addr: ?[]const u8 = null, + /// Thread context + thread_context: ?*std.debug.ThreadContext = null, + reg_context: ?abi.RegisterContext = null, + /// Call frame address, if in a CFI context + cfa: ?usize = null, + /// This expression is a sub-expression from an OP.entry_value instruction + entry_value_context: bool = false, +}; + +pub const Options = struct { + /// The address size of the target architecture + addr_size: u8 = @sizeOf(usize), + /// Endianness of the target architecture + endian: std.builtin.Endian = builtin.target.cpu.arch.endian(), + /// Restrict the stack machine to a subset of opcodes used in call frame instructions + call_frame_context: bool = false, +}; + +// Explicitly defined to support executing sub-expressions +pub const Error = error{ + UnimplementedExpressionCall, + UnimplementedOpcode, + UnimplementedUserOpcode, + UnimplementedTypedComparison, + UnimplementedTypeConversion, + + UnknownExpressionOpcode, + + IncompleteExpressionContext, + + InvalidCFAOpcode, + InvalidExpression, + InvalidFrameBase, + InvalidIntegralTypeSize, + InvalidRegister, + InvalidSubExpression, + InvalidTypeLength, + + TruncatedIntegralType, +} || abi.AbiError || error{ EndOfStream, Overflow, OutOfMemory, DivisionByZero }; + +/// A stack machine that can decode and run DWARF expressions. +/// Expressions can be decoded for non-native address size and endianness, +/// but can only be executed if the current target matches the configuration. +pub fn StackMachine(comptime options: Options) type { + const addr_type = switch (options.addr_size) { + 2 => u16, + 4 => u32, + 8 => u64, + else => @compileError("Unsupported address size of " ++ options.addr_size), + }; + + const addr_type_signed = switch (options.addr_size) { + 2 => i16, + 4 => i32, + 8 => i64, + else => @compileError("Unsupported address size of " ++ options.addr_size), + }; + + return struct { + const Self = @This(); + + const Operand = union(enum) { + generic: addr_type, + register: u8, + type_size: u8, + branch_offset: i16, + base_register: struct { + base_register: u8, + offset: i64, + }, + composite_location: struct { + size: u64, + offset: i64, + }, + block: []const u8, + register_type: struct { + register: u8, + type_offset: addr_type, + }, + const_type: struct { + type_offset: addr_type, + value_bytes: []const u8, + }, + deref_type: struct { + size: u8, + type_offset: addr_type, + }, + }; + + const Value = union(enum) { + generic: addr_type, + + // Typed value with a maximum size of a register + regval_type: struct { + // Offset of DW_TAG_base_type DIE + type_offset: addr_type, + type_size: u8, + value: addr_type, + }, + + // Typed value specified directly in the instruction stream + const_type: struct { + // Offset of DW_TAG_base_type DIE + type_offset: addr_type, + // Backed by the instruction stream + value_bytes: []const u8, + }, + + pub fn asIntegral(self: Value) !addr_type { + return switch (self) { + .generic => |v| v, + + // TODO: For these two prongs, look up the type and assert it's integral? + .regval_type => |regval_type| regval_type.value, + .const_type => |const_type| { + const value: u64 = switch (const_type.value_bytes.len) { + 1 => mem.readInt(u8, const_type.value_bytes[0..1], native_endian), + 2 => mem.readInt(u16, const_type.value_bytes[0..2], native_endian), + 4 => mem.readInt(u32, const_type.value_bytes[0..4], native_endian), + 8 => mem.readInt(u64, const_type.value_bytes[0..8], native_endian), + else => return error.InvalidIntegralTypeSize, + }; + + return std.math.cast(addr_type, value) orelse error.TruncatedIntegralType; + }, + }; + } + }; + + stack: std.ArrayListUnmanaged(Value) = .{}, + + pub fn reset(self: *Self) void { + self.stack.clearRetainingCapacity(); + } + + pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { + self.stack.deinit(allocator); + } + + fn generic(value: anytype) Operand { + const int_info = @typeInfo(@TypeOf(value)).Int; + if (@sizeOf(@TypeOf(value)) > options.addr_size) { + return .{ .generic = switch (int_info.signedness) { + .signed => @bitCast(@as(addr_type_signed, @truncate(value))), + .unsigned => @truncate(value), + } }; + } else { + return .{ .generic = switch (int_info.signedness) { + .signed => @bitCast(@as(addr_type_signed, @intCast(value))), + .unsigned => @intCast(value), + } }; + } + } + + pub fn readOperand(stream: *std.io.FixedBufferStream([]const u8), opcode: u8, context: Context) !?Operand { + const reader = stream.reader(); + return switch (opcode) { + OP.addr => generic(try reader.readInt(addr_type, options.endian)), + OP.call_ref => switch (context.format) { + .@"32" => generic(try reader.readInt(u32, options.endian)), + .@"64" => generic(try reader.readInt(u64, options.endian)), + }, + OP.const1u, + OP.pick, + => generic(try reader.readByte()), + OP.deref_size, + OP.xderef_size, + => .{ .type_size = try reader.readByte() }, + OP.const1s => generic(try reader.readByteSigned()), + OP.const2u, + OP.call2, + => generic(try reader.readInt(u16, options.endian)), + OP.call4 => generic(try reader.readInt(u32, options.endian)), + OP.const2s => generic(try reader.readInt(i16, options.endian)), + OP.bra, + OP.skip, + => .{ .branch_offset = try reader.readInt(i16, options.endian) }, + OP.const4u => generic(try reader.readInt(u32, options.endian)), + OP.const4s => generic(try reader.readInt(i32, options.endian)), + OP.const8u => generic(try reader.readInt(u64, options.endian)), + OP.const8s => generic(try reader.readInt(i64, options.endian)), + OP.constu, + OP.plus_uconst, + OP.addrx, + OP.constx, + OP.convert, + OP.reinterpret, + => generic(try leb.readUleb128(u64, reader)), + OP.consts, + OP.fbreg, + => generic(try leb.readIleb128(i64, reader)), + OP.lit0...OP.lit31 => |n| generic(n - OP.lit0), + OP.reg0...OP.reg31 => |n| .{ .register = n - OP.reg0 }, + OP.breg0...OP.breg31 => |n| .{ .base_register = .{ + .base_register = n - OP.breg0, + .offset = try leb.readIleb128(i64, reader), + } }, + OP.regx => .{ .register = try leb.readUleb128(u8, reader) }, + OP.bregx => blk: { + const base_register = try leb.readUleb128(u8, reader); + const offset = try leb.readIleb128(i64, reader); + break :blk .{ .base_register = .{ + .base_register = base_register, + .offset = offset, + } }; + }, + OP.regval_type => blk: { + const register = try leb.readUleb128(u8, reader); + const type_offset = try leb.readUleb128(addr_type, reader); + break :blk .{ .register_type = .{ + .register = register, + .type_offset = type_offset, + } }; + }, + OP.piece => .{ + .composite_location = .{ + .size = try leb.readUleb128(u8, reader), + .offset = 0, + }, + }, + OP.bit_piece => blk: { + const size = try leb.readUleb128(u8, reader); + const offset = try leb.readIleb128(i64, reader); + break :blk .{ .composite_location = .{ + .size = size, + .offset = offset, + } }; + }, + OP.implicit_value, OP.entry_value => blk: { + const size = try leb.readUleb128(u8, reader); + if (stream.pos + size > stream.buffer.len) return error.InvalidExpression; + const block = stream.buffer[stream.pos..][0..size]; + stream.pos += size; + break :blk .{ + .block = block, + }; + }, + OP.const_type => blk: { + const type_offset = try leb.readUleb128(addr_type, reader); + const size = try reader.readByte(); + if (stream.pos + size > stream.buffer.len) return error.InvalidExpression; + const value_bytes = stream.buffer[stream.pos..][0..size]; + stream.pos += size; + break :blk .{ .const_type = .{ + .type_offset = type_offset, + .value_bytes = value_bytes, + } }; + }, + OP.deref_type, + OP.xderef_type, + => .{ + .deref_type = .{ + .size = try reader.readByte(), + .type_offset = try leb.readUleb128(addr_type, reader), + }, + }, + OP.lo_user...OP.hi_user => return error.UnimplementedUserOpcode, + else => null, + }; + } + + pub fn run( + self: *Self, + expression: []const u8, + allocator: std.mem.Allocator, + context: Context, + initial_value: ?usize, + ) Error!?Value { + if (initial_value) |i| try self.stack.append(allocator, .{ .generic = i }); + var stream = std.io.fixedBufferStream(expression); + while (try self.step(&stream, allocator, context)) {} + if (self.stack.items.len == 0) return null; + return self.stack.items[self.stack.items.len - 1]; + } + + /// Reads an opcode and its operands from `stream`, then executes it + pub fn step( + self: *Self, + stream: *std.io.FixedBufferStream([]const u8), + allocator: std.mem.Allocator, + context: Context, + ) Error!bool { + if (@sizeOf(usize) != @sizeOf(addr_type) or options.endian != comptime builtin.target.cpu.arch.endian()) + @compileError("Execution of non-native address sizes / endianness is not supported"); + + const opcode = try stream.reader().readByte(); + if (options.call_frame_context and !isOpcodeValidInCFA(opcode)) return error.InvalidCFAOpcode; + const operand = try readOperand(stream, opcode, context); + switch (opcode) { + + // 2.5.1.1: Literal Encodings + OP.lit0...OP.lit31, + OP.addr, + OP.const1u, + OP.const2u, + OP.const4u, + OP.const8u, + OP.const1s, + OP.const2s, + OP.const4s, + OP.const8s, + OP.constu, + OP.consts, + => try self.stack.append(allocator, .{ .generic = operand.?.generic }), + + OP.const_type => { + const const_type = operand.?.const_type; + try self.stack.append(allocator, .{ .const_type = .{ + .type_offset = const_type.type_offset, + .value_bytes = const_type.value_bytes, + } }); + }, + + OP.addrx, + OP.constx, + => { + if (context.compile_unit == null) return error.IncompleteExpressionContext; + if (context.debug_addr == null) return error.IncompleteExpressionContext; + const debug_addr_index = operand.?.generic; + const offset = context.compile_unit.?.addr_base + debug_addr_index; + if (offset >= context.debug_addr.?.len) return error.InvalidExpression; + const value = mem.readInt(usize, context.debug_addr.?[offset..][0..@sizeOf(usize)], native_endian); + try self.stack.append(allocator, .{ .generic = value }); + }, + + // 2.5.1.2: Register Values + OP.fbreg => { + if (context.compile_unit == null) return error.IncompleteExpressionContext; + if (context.compile_unit.?.frame_base == null) return error.IncompleteExpressionContext; + + const offset: i64 = @intCast(operand.?.generic); + _ = offset; + + switch (context.compile_unit.?.frame_base.?.*) { + .exprloc => { + // TODO: Run this expression in a nested stack machine + return error.UnimplementedOpcode; + }, + .loclistx => { + // TODO: Read value from .debug_loclists + return error.UnimplementedOpcode; + }, + .sec_offset => { + // TODO: Read value from .debug_loclists + return error.UnimplementedOpcode; + }, + else => return error.InvalidFrameBase, + } + }, + OP.breg0...OP.breg31, + OP.bregx, + => { + if (context.thread_context == null) return error.IncompleteExpressionContext; + + const base_register = operand.?.base_register; + var value: i64 = @intCast(mem.readInt(usize, (try abi.regBytes( + context.thread_context.?, + base_register.base_register, + context.reg_context, + ))[0..@sizeOf(usize)], native_endian)); + value += base_register.offset; + try self.stack.append(allocator, .{ .generic = @intCast(value) }); + }, + OP.regval_type => { + const register_type = operand.?.register_type; + const value = mem.readInt(usize, (try abi.regBytes( + context.thread_context.?, + register_type.register, + context.reg_context, + ))[0..@sizeOf(usize)], native_endian); + try self.stack.append(allocator, .{ + .regval_type = .{ + .type_offset = register_type.type_offset, + .type_size = @sizeOf(addr_type), + .value = value, + }, + }); + }, + + // 2.5.1.3: Stack Operations + OP.dup => { + if (self.stack.items.len == 0) return error.InvalidExpression; + try self.stack.append(allocator, self.stack.items[self.stack.items.len - 1]); + }, + OP.drop => { + _ = self.stack.pop(); + }, + OP.pick, OP.over => { + const stack_index = if (opcode == OP.over) 1 else operand.?.generic; + if (stack_index >= self.stack.items.len) return error.InvalidExpression; + try self.stack.append(allocator, self.stack.items[self.stack.items.len - 1 - stack_index]); + }, + OP.swap => { + if (self.stack.items.len < 2) return error.InvalidExpression; + mem.swap(Value, &self.stack.items[self.stack.items.len - 1], &self.stack.items[self.stack.items.len - 2]); + }, + OP.rot => { + if (self.stack.items.len < 3) return error.InvalidExpression; + const first = self.stack.items[self.stack.items.len - 1]; + self.stack.items[self.stack.items.len - 1] = self.stack.items[self.stack.items.len - 2]; + self.stack.items[self.stack.items.len - 2] = self.stack.items[self.stack.items.len - 3]; + self.stack.items[self.stack.items.len - 3] = first; + }, + OP.deref, + OP.xderef, + OP.deref_size, + OP.xderef_size, + OP.deref_type, + OP.xderef_type, + => { + if (self.stack.items.len == 0) return error.InvalidExpression; + const addr = try self.stack.items[self.stack.items.len - 1].asIntegral(); + const addr_space_identifier: ?usize = switch (opcode) { + OP.xderef, + OP.xderef_size, + OP.xderef_type, + => blk: { + _ = self.stack.pop(); + if (self.stack.items.len == 0) return error.InvalidExpression; + break :blk try self.stack.items[self.stack.items.len - 1].asIntegral(); + }, + else => null, + }; + + // Usage of addr_space_identifier in the address calculation is implementation defined. + // This code will need to be updated to handle any architectures that utilize this. + _ = addr_space_identifier; + + const size = switch (opcode) { + OP.deref, + OP.xderef, + => @sizeOf(addr_type), + OP.deref_size, + OP.xderef_size, + => operand.?.type_size, + OP.deref_type, + OP.xderef_type, + => operand.?.deref_type.size, + else => unreachable, + }; + + if (context.memory_accessor) |memory_accessor| { + if (!switch (size) { + 1 => memory_accessor.load(u8, addr) != null, + 2 => memory_accessor.load(u16, addr) != null, + 4 => memory_accessor.load(u32, addr) != null, + 8 => memory_accessor.load(u64, addr) != null, + else => return error.InvalidExpression, + }) return error.InvalidExpression; + } + + const value: addr_type = std.math.cast(addr_type, @as(u64, switch (size) { + 1 => @as(*const u8, @ptrFromInt(addr)).*, + 2 => @as(*const u16, @ptrFromInt(addr)).*, + 4 => @as(*const u32, @ptrFromInt(addr)).*, + 8 => @as(*const u64, @ptrFromInt(addr)).*, + else => return error.InvalidExpression, + })) orelse return error.InvalidExpression; + + switch (opcode) { + OP.deref_type, + OP.xderef_type, + => { + self.stack.items[self.stack.items.len - 1] = .{ + .regval_type = .{ + .type_offset = operand.?.deref_type.type_offset, + .type_size = operand.?.deref_type.size, + .value = value, + }, + }; + }, + else => { + self.stack.items[self.stack.items.len - 1] = .{ .generic = value }; + }, + } + }, + OP.push_object_address => { + // In sub-expressions, `push_object_address` is not meaningful (as per the + // spec), so treat it like a nop + if (!context.entry_value_context) { + if (context.object_address == null) return error.IncompleteExpressionContext; + try self.stack.append(allocator, .{ .generic = @intFromPtr(context.object_address.?) }); + } + }, + OP.form_tls_address => { + return error.UnimplementedOpcode; + }, + OP.call_frame_cfa => { + if (context.cfa) |cfa| { + try self.stack.append(allocator, .{ .generic = cfa }); + } else return error.IncompleteExpressionContext; + }, + + // 2.5.1.4: Arithmetic and Logical Operations + OP.abs => { + if (self.stack.items.len == 0) return error.InvalidExpression; + const value: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = @abs(value), + }; + }, + OP.@"and" => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = a & try self.stack.items[self.stack.items.len - 1].asIntegral(), + }; + }, + OP.div => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a: isize = @bitCast(try self.stack.pop().asIntegral()); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = @bitCast(try std.math.divTrunc(isize, b, a)), + }; + }, + OP.minus => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const b = try self.stack.pop().asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = try std.math.sub(addr_type, try self.stack.items[self.stack.items.len - 1].asIntegral(), b), + }; + }, + OP.mod => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a: isize = @bitCast(try self.stack.pop().asIntegral()); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = @bitCast(@mod(b, a)), + }; + }, + OP.mul => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a: isize = @bitCast(try self.stack.pop().asIntegral()); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = @bitCast(@mulWithOverflow(a, b)[0]), + }; + }, + OP.neg => { + if (self.stack.items.len == 0) return error.InvalidExpression; + self.stack.items[self.stack.items.len - 1] = .{ + .generic = @bitCast( + try std.math.negate( + @as(isize, @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral())), + ), + ), + }; + }, + OP.not => { + if (self.stack.items.len == 0) return error.InvalidExpression; + self.stack.items[self.stack.items.len - 1] = .{ + .generic = ~try self.stack.items[self.stack.items.len - 1].asIntegral(), + }; + }, + OP.@"or" => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = a | try self.stack.items[self.stack.items.len - 1].asIntegral(), + }; + }, + OP.plus => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const b = try self.stack.pop().asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = try std.math.add(addr_type, try self.stack.items[self.stack.items.len - 1].asIntegral(), b), + }; + }, + OP.plus_uconst => { + if (self.stack.items.len == 0) return error.InvalidExpression; + const constant = operand.?.generic; + self.stack.items[self.stack.items.len - 1] = .{ + .generic = try std.math.add(addr_type, try self.stack.items[self.stack.items.len - 1].asIntegral(), constant), + }; + }, + OP.shl => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + const b = try self.stack.items[self.stack.items.len - 1].asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = std.math.shl(usize, b, a), + }; + }, + OP.shr => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + const b = try self.stack.items[self.stack.items.len - 1].asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = std.math.shr(usize, b, a), + }; + }, + OP.shra => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + const b: isize = @bitCast(try self.stack.items[self.stack.items.len - 1].asIntegral()); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = @bitCast(std.math.shr(isize, b, a)), + }; + }, + OP.xor => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = try self.stack.pop().asIntegral(); + self.stack.items[self.stack.items.len - 1] = .{ + .generic = a ^ try self.stack.items[self.stack.items.len - 1].asIntegral(), + }; + }, + + // 2.5.1.5: Control Flow Operations + OP.le, + OP.ge, + OP.eq, + OP.lt, + OP.gt, + OP.ne, + => { + if (self.stack.items.len < 2) return error.InvalidExpression; + const a = self.stack.pop(); + const b = self.stack.items[self.stack.items.len - 1]; + + if (a == .generic and b == .generic) { + const a_int: isize = @bitCast(a.asIntegral() catch unreachable); + const b_int: isize = @bitCast(b.asIntegral() catch unreachable); + const result = @intFromBool(switch (opcode) { + OP.le => b_int <= a_int, + OP.ge => b_int >= a_int, + OP.eq => b_int == a_int, + OP.lt => b_int < a_int, + OP.gt => b_int > a_int, + OP.ne => b_int != a_int, + else => unreachable, + }); + + self.stack.items[self.stack.items.len - 1] = .{ .generic = result }; + } else { + // TODO: Load the types referenced by these values, find their comparison operator, and run it + return error.UnimplementedTypedComparison; + } + }, + OP.skip, OP.bra => { + const branch_offset = operand.?.branch_offset; + const condition = if (opcode == OP.bra) blk: { + if (self.stack.items.len == 0) return error.InvalidExpression; + break :blk try self.stack.pop().asIntegral() != 0; + } else true; + + if (condition) { + const new_pos = std.math.cast( + usize, + try std.math.add(isize, @as(isize, @intCast(stream.pos)), branch_offset), + ) orelse return error.InvalidExpression; + + if (new_pos < 0 or new_pos > stream.buffer.len) return error.InvalidExpression; + stream.pos = new_pos; + } + }, + OP.call2, + OP.call4, + OP.call_ref, + => { + const debug_info_offset = operand.?.generic; + _ = debug_info_offset; + + // TODO: Load a DIE entry at debug_info_offset in a .debug_info section (the spec says that it + // can be in a separate exe / shared object from the one containing this expression). + // Transfer control to the DW_AT_location attribute, with the current stack as input. + + return error.UnimplementedExpressionCall; + }, + + // 2.5.1.6: Type Conversions + OP.convert => { + if (self.stack.items.len == 0) return error.InvalidExpression; + const type_offset = operand.?.generic; + + // TODO: Load the DW_TAG_base_type entries in context.compile_unit and verify both types are the same size + const value = self.stack.items[self.stack.items.len - 1]; + if (type_offset == 0) { + self.stack.items[self.stack.items.len - 1] = .{ .generic = try value.asIntegral() }; + } else { + // TODO: Load the DW_TAG_base_type entry in context.compile_unit, find a conversion operator + // from the old type to the new type, run it. + return error.UnimplementedTypeConversion; + } + }, + OP.reinterpret => { + if (self.stack.items.len == 0) return error.InvalidExpression; + const type_offset = operand.?.generic; + + // TODO: Load the DW_TAG_base_type entries in context.compile_unit and verify both types are the same size + const value = self.stack.items[self.stack.items.len - 1]; + if (type_offset == 0) { + self.stack.items[self.stack.items.len - 1] = .{ .generic = try value.asIntegral() }; + } else { + self.stack.items[self.stack.items.len - 1] = switch (value) { + .generic => |v| .{ + .regval_type = .{ + .type_offset = type_offset, + .type_size = @sizeOf(addr_type), + .value = v, + }, + }, + .regval_type => |r| .{ + .regval_type = .{ + .type_offset = type_offset, + .type_size = r.type_size, + .value = r.value, + }, + }, + .const_type => |c| .{ + .const_type = .{ + .type_offset = type_offset, + .value_bytes = c.value_bytes, + }, + }, + }; + } + }, + + // 2.5.1.7: Special Operations + OP.nop => {}, + OP.entry_value => { + const block = operand.?.block; + if (block.len == 0) return error.InvalidSubExpression; + + // TODO: The spec states that this sub-expression needs to observe the state (ie. registers) + // as it was upon entering the current subprogram. If this isn't being called at the + // end of a frame unwind operation, an additional ThreadContext with this state will be needed. + + if (isOpcodeRegisterLocation(block[0])) { + if (context.thread_context == null) return error.IncompleteExpressionContext; + + var block_stream = std.io.fixedBufferStream(block); + const register = (try readOperand(&block_stream, block[0], context)).?.register; + const value = mem.readInt(usize, (try abi.regBytes(context.thread_context.?, register, context.reg_context))[0..@sizeOf(usize)], native_endian); + try self.stack.append(allocator, .{ .generic = value }); + } else { + var stack_machine: Self = .{}; + defer stack_machine.deinit(allocator); + + var sub_context = context; + sub_context.entry_value_context = true; + const result = try stack_machine.run(block, allocator, sub_context, null); + try self.stack.append(allocator, result orelse return error.InvalidSubExpression); + } + }, + + // These have already been handled by readOperand + OP.lo_user...OP.hi_user => unreachable, + else => { + //std.debug.print("Unknown DWARF expression opcode: {x}\n", .{opcode}); + return error.UnknownExpressionOpcode; + }, + } + + return stream.pos < stream.buffer.len; + } + }; +} + +pub fn Builder(comptime options: Options) type { + const addr_type = switch (options.addr_size) { + 2 => u16, + 4 => u32, + 8 => u64, + else => @compileError("Unsupported address size of " ++ options.addr_size), + }; + + return struct { + /// Zero-operand instructions + pub fn writeOpcode(writer: anytype, comptime opcode: u8) !void { + if (options.call_frame_context and !comptime isOpcodeValidInCFA(opcode)) return error.InvalidCFAOpcode; + switch (opcode) { + OP.dup, + OP.drop, + OP.over, + OP.swap, + OP.rot, + OP.deref, + OP.xderef, + OP.push_object_address, + OP.form_tls_address, + OP.call_frame_cfa, + OP.abs, + OP.@"and", + OP.div, + OP.minus, + OP.mod, + OP.mul, + OP.neg, + OP.not, + OP.@"or", + OP.plus, + OP.shl, + OP.shr, + OP.shra, + OP.xor, + OP.le, + OP.ge, + OP.eq, + OP.lt, + OP.gt, + OP.ne, + OP.nop, + OP.stack_value, + => try writer.writeByte(opcode), + else => @compileError("This opcode requires operands, use `write<Opcode>()` instead"), + } + } + + // 2.5.1.1: Literal Encodings + pub fn writeLiteral(writer: anytype, literal: u8) !void { + switch (literal) { + 0...31 => |n| try writer.writeByte(n + OP.lit0), + else => return error.InvalidLiteral, + } + } + + pub fn writeConst(writer: anytype, comptime T: type, value: T) !void { + if (@typeInfo(T) != .Int) @compileError("Constants must be integers"); + + switch (T) { + u8, i8, u16, i16, u32, i32, u64, i64 => { + try writer.writeByte(switch (T) { + u8 => OP.const1u, + i8 => OP.const1s, + u16 => OP.const2u, + i16 => OP.const2s, + u32 => OP.const4u, + i32 => OP.const4s, + u64 => OP.const8u, + i64 => OP.const8s, + else => unreachable, + }); + + try writer.writeInt(T, value, options.endian); + }, + else => switch (@typeInfo(T).Int.signedness) { + .unsigned => { + try writer.writeByte(OP.constu); + try leb.writeUleb128(writer, value); + }, + .signed => { + try writer.writeByte(OP.consts); + try leb.writeIleb128(writer, value); + }, + }, + } + } + + pub fn writeConstx(writer: anytype, debug_addr_offset: anytype) !void { + try writer.writeByte(OP.constx); + try leb.writeUleb128(writer, debug_addr_offset); + } + + pub fn writeConstType(writer: anytype, die_offset: anytype, value_bytes: []const u8) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; + if (value_bytes.len > 0xff) return error.InvalidTypeLength; + try writer.writeByte(OP.const_type); + try leb.writeUleb128(writer, die_offset); + try writer.writeByte(@intCast(value_bytes.len)); + try writer.writeAll(value_bytes); + } + + pub fn writeAddr(writer: anytype, value: addr_type) !void { + try writer.writeByte(OP.addr); + try writer.writeInt(addr_type, value, options.endian); + } + + pub fn writeAddrx(writer: anytype, debug_addr_offset: anytype) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; + try writer.writeByte(OP.addrx); + try leb.writeUleb128(writer, debug_addr_offset); + } + + // 2.5.1.2: Register Values + pub fn writeFbreg(writer: anytype, offset: anytype) !void { + try writer.writeByte(OP.fbreg); + try leb.writeIleb128(writer, offset); + } + + pub fn writeBreg(writer: anytype, register: u8, offset: anytype) !void { + if (register > 31) return error.InvalidRegister; + try writer.writeByte(OP.breg0 + register); + try leb.writeIleb128(writer, offset); + } + + pub fn writeBregx(writer: anytype, register: anytype, offset: anytype) !void { + try writer.writeByte(OP.bregx); + try leb.writeUleb128(writer, register); + try leb.writeIleb128(writer, offset); + } + + pub fn writeRegvalType(writer: anytype, register: anytype, offset: anytype) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; + try writer.writeByte(OP.regval_type); + try leb.writeUleb128(writer, register); + try leb.writeUleb128(writer, offset); + } + + // 2.5.1.3: Stack Operations + pub fn writePick(writer: anytype, index: u8) !void { + try writer.writeByte(OP.pick); + try writer.writeByte(index); + } + + pub fn writeDerefSize(writer: anytype, size: u8) !void { + try writer.writeByte(OP.deref_size); + try writer.writeByte(size); + } + + pub fn writeXDerefSize(writer: anytype, size: u8) !void { + try writer.writeByte(OP.xderef_size); + try writer.writeByte(size); + } + + pub fn writeDerefType(writer: anytype, size: u8, die_offset: anytype) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; + try writer.writeByte(OP.deref_type); + try writer.writeByte(size); + try leb.writeUleb128(writer, die_offset); + } + + pub fn writeXDerefType(writer: anytype, size: u8, die_offset: anytype) !void { + try writer.writeByte(OP.xderef_type); + try writer.writeByte(size); + try leb.writeUleb128(writer, die_offset); + } + + // 2.5.1.4: Arithmetic and Logical Operations + + pub fn writePlusUconst(writer: anytype, uint_value: anytype) !void { + try writer.writeByte(OP.plus_uconst); + try leb.writeUleb128(writer, uint_value); + } + + // 2.5.1.5: Control Flow Operations + + pub fn writeSkip(writer: anytype, offset: i16) !void { + try writer.writeByte(OP.skip); + try writer.writeInt(i16, offset, options.endian); + } + + pub fn writeBra(writer: anytype, offset: i16) !void { + try writer.writeByte(OP.bra); + try writer.writeInt(i16, offset, options.endian); + } + + pub fn writeCall(writer: anytype, comptime T: type, offset: T) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; + switch (T) { + u16 => try writer.writeByte(OP.call2), + u32 => try writer.writeByte(OP.call4), + else => @compileError("Call operand must be a 2 or 4 byte offset"), + } + + try writer.writeInt(T, offset, options.endian); + } + + pub fn writeCallRef(writer: anytype, comptime is_64: bool, value: if (is_64) u64 else u32) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; + try writer.writeByte(OP.call_ref); + try writer.writeInt(if (is_64) u64 else u32, value, options.endian); + } + + pub fn writeConvert(writer: anytype, die_offset: anytype) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; + try writer.writeByte(OP.convert); + try leb.writeUleb128(writer, die_offset); + } + + pub fn writeReinterpret(writer: anytype, die_offset: anytype) !void { + if (options.call_frame_context) return error.InvalidCFAOpcode; + try writer.writeByte(OP.reinterpret); + try leb.writeUleb128(writer, die_offset); + } + + // 2.5.1.7: Special Operations + + pub fn writeEntryValue(writer: anytype, expression: []const u8) !void { + try writer.writeByte(OP.entry_value); + try leb.writeUleb128(writer, expression.len); + try writer.writeAll(expression); + } + + // 2.6: Location Descriptions + pub fn writeReg(writer: anytype, register: u8) !void { + try writer.writeByte(OP.reg0 + register); + } + + pub fn writeRegx(writer: anytype, register: anytype) !void { + try writer.writeByte(OP.regx); + try leb.writeUleb128(writer, register); + } + + pub fn writeImplicitValue(writer: anytype, value_bytes: []const u8) !void { + try writer.writeByte(OP.implicit_value); + try leb.writeUleb128(writer, value_bytes.len); + try writer.writeAll(value_bytes); + } + }; +} + +// Certain opcodes are not allowed in a CFA context, see 6.4.2 +fn isOpcodeValidInCFA(opcode: u8) bool { + return switch (opcode) { + OP.addrx, + OP.call2, + OP.call4, + OP.call_ref, + OP.const_type, + OP.constx, + OP.convert, + OP.deref_type, + OP.regval_type, + OP.reinterpret, + OP.push_object_address, + OP.call_frame_cfa, + => false, + else => true, + }; +} + +fn isOpcodeRegisterLocation(opcode: u8) bool { + return switch (opcode) { + OP.reg0...OP.reg31, OP.regx => true, + else => false, + }; +} + +const testing = std.testing; +test "DWARF expressions" { + const allocator = std.testing.allocator; + + const options = Options{}; + var stack_machine = StackMachine(options){}; + defer stack_machine.deinit(allocator); + + const b = Builder(options); + + var program = std.ArrayList(u8).init(allocator); + defer program.deinit(); + + const writer = program.writer(); + + // Literals + { + const context = Context{}; + for (0..32) |i| { + try b.writeLiteral(writer, @intCast(i)); + } + + _ = try stack_machine.run(program.items, allocator, context, 0); + + for (0..32) |i| { + const expected = 31 - i; + try testing.expectEqual(expected, stack_machine.stack.popOrNull().?.generic); + } + } + + // Constants + { + stack_machine.reset(); + program.clearRetainingCapacity(); + + const input = [_]comptime_int{ + 1, + -1, + @as(usize, @truncate(0x0fff)), + @as(isize, @truncate(-0x0fff)), + @as(usize, @truncate(0x0fffffff)), + @as(isize, @truncate(-0x0fffffff)), + @as(usize, @truncate(0x0fffffffffffffff)), + @as(isize, @truncate(-0x0fffffffffffffff)), + @as(usize, @truncate(0x8000000)), + @as(isize, @truncate(-0x8000000)), + @as(usize, @truncate(0x12345678_12345678)), + @as(usize, @truncate(0xffffffff_ffffffff)), + @as(usize, @truncate(0xeeeeeeee_eeeeeeee)), + }; + + try b.writeConst(writer, u8, input[0]); + try b.writeConst(writer, i8, input[1]); + try b.writeConst(writer, u16, input[2]); + try b.writeConst(writer, i16, input[3]); + try b.writeConst(writer, u32, input[4]); + try b.writeConst(writer, i32, input[5]); + try b.writeConst(writer, u64, input[6]); + try b.writeConst(writer, i64, input[7]); + try b.writeConst(writer, u28, input[8]); + try b.writeConst(writer, i28, input[9]); + try b.writeAddr(writer, input[10]); + + var mock_compile_unit: std.debug.Dwarf.CompileUnit = undefined; + mock_compile_unit.addr_base = 1; + + var mock_debug_addr = std.ArrayList(u8).init(allocator); + defer mock_debug_addr.deinit(); + + try mock_debug_addr.writer().writeInt(u16, 0, native_endian); + try mock_debug_addr.writer().writeInt(usize, input[11], native_endian); + try mock_debug_addr.writer().writeInt(usize, input[12], native_endian); + + const context = Context{ + .compile_unit = &mock_compile_unit, + .debug_addr = mock_debug_addr.items, + }; + + try b.writeConstx(writer, @as(usize, 1)); + try b.writeAddrx(writer, @as(usize, 1 + @sizeOf(usize))); + + const die_offset: usize = @truncate(0xaabbccdd); + const type_bytes: []const u8 = &.{ 1, 2, 3, 4 }; + try b.writeConstType(writer, die_offset, type_bytes); + + _ = try stack_machine.run(program.items, allocator, context, 0); + + const const_type = stack_machine.stack.popOrNull().?.const_type; + try testing.expectEqual(die_offset, const_type.type_offset); + try testing.expectEqualSlices(u8, type_bytes, const_type.value_bytes); + + const expected = .{ + .{ usize, input[12], usize }, + .{ usize, input[11], usize }, + .{ usize, input[10], usize }, + .{ isize, input[9], isize }, + .{ usize, input[8], usize }, + .{ isize, input[7], isize }, + .{ usize, input[6], usize }, + .{ isize, input[5], isize }, + .{ usize, input[4], usize }, + .{ isize, input[3], isize }, + .{ usize, input[2], usize }, + .{ isize, input[1], isize }, + .{ usize, input[0], usize }, + }; + + inline for (expected) |e| { + try testing.expectEqual(@as(e[0], e[1]), @as(e[2], @bitCast(stack_machine.stack.popOrNull().?.generic))); + } + } + + // Register values + if (@sizeOf(std.debug.ThreadContext) != 0) { + stack_machine.reset(); + program.clearRetainingCapacity(); + + const reg_context = abi.RegisterContext{ + .eh_frame = true, + .is_macho = builtin.os.tag == .macos, + }; + var thread_context: std.debug.ThreadContext = undefined; + std.debug.relocateContext(&thread_context); + const context = Context{ + .thread_context = &thread_context, + .reg_context = reg_context, + }; + + // Only test register operations on arch / os that have them implemented + if (abi.regBytes(&thread_context, 0, reg_context)) |reg_bytes| { + + // TODO: Test fbreg (once implemented): mock a DIE and point compile_unit.frame_base at it + + mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); + (try abi.regValueNative(usize, &thread_context, abi.fpRegNum(reg_context), reg_context)).* = 1; + (try abi.regValueNative(usize, &thread_context, abi.spRegNum(reg_context), reg_context)).* = 2; + (try abi.regValueNative(usize, &thread_context, abi.ipRegNum(), reg_context)).* = 3; + + try b.writeBreg(writer, abi.fpRegNum(reg_context), @as(usize, 100)); + try b.writeBreg(writer, abi.spRegNum(reg_context), @as(usize, 200)); + try b.writeBregx(writer, abi.ipRegNum(), @as(usize, 300)); + try b.writeRegvalType(writer, @as(u8, 0), @as(usize, 400)); + + _ = try stack_machine.run(program.items, allocator, context, 0); + + const regval_type = stack_machine.stack.popOrNull().?.regval_type; + try testing.expectEqual(@as(usize, 400), regval_type.type_offset); + try testing.expectEqual(@as(u8, @sizeOf(usize)), regval_type.type_size); + try testing.expectEqual(@as(usize, 0xee), regval_type.value); + + try testing.expectEqual(@as(usize, 303), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 202), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 101), stack_machine.stack.popOrNull().?.generic); + } else |err| { + switch (err) { + error.UnimplementedArch, + error.UnimplementedOs, + error.ThreadContextNotSupported, + => {}, + else => return err, + } + } + } + + // Stack operations + { + var context = Context{}; + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 1); + try b.writeOpcode(writer, OP.dup); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 1), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 1), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 1); + try b.writeOpcode(writer, OP.drop); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expect(stack_machine.stack.popOrNull() == null); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 4); + try b.writeConst(writer, u8, 5); + try b.writeConst(writer, u8, 6); + try b.writePick(writer, 2); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 4), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 4); + try b.writeConst(writer, u8, 5); + try b.writeConst(writer, u8, 6); + try b.writeOpcode(writer, OP.over); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 5), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 5); + try b.writeConst(writer, u8, 6); + try b.writeOpcode(writer, OP.swap); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 5), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 6), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u8, 4); + try b.writeConst(writer, u8, 5); + try b.writeConst(writer, u8, 6); + try b.writeOpcode(writer, OP.rot); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 5), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 4), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 6), stack_machine.stack.popOrNull().?.generic); + + const deref_target: usize = @truncate(0xffeeffee_ffeeffee); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeOpcode(writer, OP.deref); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(deref_target, stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 0); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeOpcode(writer, OP.xderef); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(deref_target, stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeDerefSize(writer, 1); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, @as(*const u8, @ptrCast(&deref_target)).*), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 0); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeXDerefSize(writer, 1); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, @as(*const u8, @ptrCast(&deref_target)).*), stack_machine.stack.popOrNull().?.generic); + + const type_offset: usize = @truncate(0xaabbaabb_aabbaabb); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeDerefType(writer, 1, type_offset); + _ = try stack_machine.run(program.items, allocator, context, null); + const deref_type = stack_machine.stack.popOrNull().?.regval_type; + try testing.expectEqual(type_offset, deref_type.type_offset); + try testing.expectEqual(@as(u8, 1), deref_type.type_size); + try testing.expectEqual(@as(usize, @as(*const u8, @ptrCast(&deref_target)).*), deref_type.value); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 0); + try b.writeAddr(writer, @intFromPtr(&deref_target)); + try b.writeXDerefType(writer, 1, type_offset); + _ = try stack_machine.run(program.items, allocator, context, null); + const xderef_type = stack_machine.stack.popOrNull().?.regval_type; + try testing.expectEqual(type_offset, xderef_type.type_offset); + try testing.expectEqual(@as(u8, 1), xderef_type.type_size); + try testing.expectEqual(@as(usize, @as(*const u8, @ptrCast(&deref_target)).*), xderef_type.value); + + context.object_address = &deref_target; + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeOpcode(writer, OP.push_object_address); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, @intFromPtr(context.object_address.?)), stack_machine.stack.popOrNull().?.generic); + + // TODO: Test OP.form_tls_address + + context.cfa = @truncate(0xccddccdd_ccddccdd); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeOpcode(writer, OP.call_frame_cfa); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(context.cfa.?, stack_machine.stack.popOrNull().?.generic); + } + + // Arithmetic and Logical Operations + { + const context = Context{}; + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, i16, -4096); + try b.writeOpcode(writer, OP.abs); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 4096), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xff0f); + try b.writeConst(writer, u16, 0xf0ff); + try b.writeOpcode(writer, OP.@"and"); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xf00f), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, i16, -404); + try b.writeConst(writer, i16, 100); + try b.writeOpcode(writer, OP.div); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(isize, -404 / 100), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 200); + try b.writeConst(writer, u16, 50); + try b.writeOpcode(writer, OP.minus); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 150), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 123); + try b.writeConst(writer, u16, 100); + try b.writeOpcode(writer, OP.mod); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 23), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xff); + try b.writeConst(writer, u16, 0xee); + try b.writeOpcode(writer, OP.mul); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xed12), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 5); + try b.writeOpcode(writer, OP.neg); + try b.writeConst(writer, i16, -6); + try b.writeOpcode(writer, OP.neg); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 6), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(isize, -5), @as(isize, @bitCast(stack_machine.stack.popOrNull().?.generic))); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xff0f); + try b.writeOpcode(writer, OP.not); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(~@as(usize, 0xff0f), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xff0f); + try b.writeConst(writer, u16, 0xf0ff); + try b.writeOpcode(writer, OP.@"or"); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xffff), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, i16, 402); + try b.writeConst(writer, i16, 100); + try b.writeOpcode(writer, OP.plus); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 502), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 4096); + try b.writePlusUconst(writer, @as(usize, 8192)); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 4096 + 8192), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xfff); + try b.writeConst(writer, u16, 1); + try b.writeOpcode(writer, OP.shl); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xfff << 1), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xfff); + try b.writeConst(writer, u16, 1); + try b.writeOpcode(writer, OP.shr); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xfff >> 1), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xfff); + try b.writeConst(writer, u16, 1); + try b.writeOpcode(writer, OP.shr); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, @bitCast(@as(isize, 0xfff) >> 1)), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConst(writer, u16, 0xf0ff); + try b.writeConst(writer, u16, 0xff0f); + try b.writeOpcode(writer, OP.xor); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0x0ff0), stack_machine.stack.popOrNull().?.generic); + } + + // Control Flow Operations + { + const context = Context{}; + const expected = .{ + .{ OP.le, 1, 1, 0 }, + .{ OP.ge, 1, 0, 1 }, + .{ OP.eq, 1, 0, 0 }, + .{ OP.lt, 0, 1, 0 }, + .{ OP.gt, 0, 0, 1 }, + .{ OP.ne, 0, 1, 1 }, + }; + + inline for (expected) |e| { + stack_machine.reset(); + program.clearRetainingCapacity(); + + try b.writeConst(writer, u16, 0); + try b.writeConst(writer, u16, 0); + try b.writeOpcode(writer, e[0]); + try b.writeConst(writer, u16, 0); + try b.writeConst(writer, u16, 1); + try b.writeOpcode(writer, e[0]); + try b.writeConst(writer, u16, 1); + try b.writeConst(writer, u16, 0); + try b.writeOpcode(writer, e[0]); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, e[3]), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, e[2]), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, e[1]), stack_machine.stack.popOrNull().?.generic); + } + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 2); + try b.writeSkip(writer, 1); + try b.writeLiteral(writer, 3); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 2), stack_machine.stack.popOrNull().?.generic); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 2); + try b.writeBra(writer, 1); + try b.writeLiteral(writer, 3); + try b.writeLiteral(writer, 0); + try b.writeBra(writer, 1); + try b.writeLiteral(writer, 4); + try b.writeLiteral(writer, 5); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 5), stack_machine.stack.popOrNull().?.generic); + try testing.expectEqual(@as(usize, 4), stack_machine.stack.popOrNull().?.generic); + try testing.expect(stack_machine.stack.popOrNull() == null); + + // TODO: Test call2, call4, call_ref once implemented + + } + + // Type conversions + { + const context = Context{}; + stack_machine.reset(); + program.clearRetainingCapacity(); + + // TODO: Test typed OP.convert once implemented + + const value: usize = @truncate(0xffeeffee_ffeeffee); + var value_bytes: [options.addr_size]u8 = undefined; + mem.writeInt(usize, &value_bytes, value, native_endian); + + // Convert to generic type + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConstType(writer, @as(usize, 0), &value_bytes); + try b.writeConvert(writer, @as(usize, 0)); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(value, stack_machine.stack.popOrNull().?.generic); + + // Reinterpret to generic type + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConstType(writer, @as(usize, 0), &value_bytes); + try b.writeReinterpret(writer, @as(usize, 0)); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(value, stack_machine.stack.popOrNull().?.generic); + + // Reinterpret to new type + const die_offset: usize = 0xffee; + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeConstType(writer, @as(usize, 0), &value_bytes); + try b.writeReinterpret(writer, die_offset); + _ = try stack_machine.run(program.items, allocator, context, null); + const const_type = stack_machine.stack.popOrNull().?.const_type; + try testing.expectEqual(die_offset, const_type.type_offset); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeLiteral(writer, 0); + try b.writeReinterpret(writer, die_offset); + _ = try stack_machine.run(program.items, allocator, context, null); + const regval_type = stack_machine.stack.popOrNull().?.regval_type; + try testing.expectEqual(die_offset, regval_type.type_offset); + } + + // Special operations + { + var context = Context{}; + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeOpcode(writer, OP.nop); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expect(stack_machine.stack.popOrNull() == null); + + // Sub-expression + { + var sub_program = std.ArrayList(u8).init(allocator); + defer sub_program.deinit(); + const sub_writer = sub_program.writer(); + try b.writeLiteral(sub_writer, 3); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeEntryValue(writer, sub_program.items); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 3), stack_machine.stack.popOrNull().?.generic); + } + + // Register location description + const reg_context = abi.RegisterContext{ + .eh_frame = true, + .is_macho = builtin.os.tag == .macos, + }; + var thread_context: std.debug.ThreadContext = undefined; + std.debug.relocateContext(&thread_context); + context = Context{ + .thread_context = &thread_context, + .reg_context = reg_context, + }; + + if (abi.regBytes(&thread_context, 0, reg_context)) |reg_bytes| { + mem.writeInt(usize, reg_bytes[0..@sizeOf(usize)], 0xee, native_endian); + + var sub_program = std.ArrayList(u8).init(allocator); + defer sub_program.deinit(); + const sub_writer = sub_program.writer(); + try b.writeReg(sub_writer, 0); + + stack_machine.reset(); + program.clearRetainingCapacity(); + try b.writeEntryValue(writer, sub_program.items); + _ = try stack_machine.run(program.items, allocator, context, null); + try testing.expectEqual(@as(usize, 0xee), stack_machine.stack.popOrNull().?.generic); + } else |err| { + switch (err) { + error.UnimplementedArch, + error.UnimplementedOs, + error.ThreadContextNotSupported, + => {}, + else => return err, + } + } + } +} |
