From 835a60a34fe2bc3d35e4524caee455a4743a5022 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Fri, 20 Jan 2023 18:26:21 +0100 Subject: zld: parse, synthesise and emit unwind records --- src/link/MachO/Object.zig | 483 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 403 insertions(+), 80 deletions(-) (limited to 'src/link/MachO/Object.zig') diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 401184da51..2196e9ccf0 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -8,6 +8,7 @@ const std = @import("std"); const build_options = @import("build_options"); const assert = std.debug.assert; const dwarf = std.dwarf; +const eh_frame = @import("eh_frame.zig"); const fs = std.fs; const io = std.io; const log = std.log.scoped(.link); @@ -24,6 +25,7 @@ const DwarfInfo = @import("DwarfInfo.zig"); const LoadCommandIterator = macho.LoadCommandIterator; const Zld = @import("zld.zig").Zld; const SymbolWithLoc = @import("zld.zig").SymbolWithLoc; +const UnwindInfo = @import("UnwindInfo.zig"); name: []const u8, mtime: u64, @@ -44,6 +46,8 @@ symtab: []macho.nlist_64 = undefined, /// Can be undefined as set together with in_symtab. source_symtab_lookup: []u32 = undefined, /// Can be undefined as set together with in_symtab. +reverse_symtab_lookup: []u32 = undefined, +/// Can be undefined as set together with in_symtab. source_address_lookup: []i64 = undefined, /// Can be undefined as set together with in_symtab. source_section_index_lookup: []i64 = undefined, @@ -53,22 +57,49 @@ strtab_lookup: []u32 = undefined, atom_by_index_table: []AtomIndex = undefined, /// Can be undefined as set together with in_symtab. globals_lookup: []i64 = undefined, +/// Can be undefined as set together with in_symtab. +relocs_lookup: []RelocEntry = undefined, atoms: std.ArrayListUnmanaged(AtomIndex) = .{}, +exec_atoms: std.ArrayListUnmanaged(AtomIndex) = .{}, + +eh_frame_sect: ?macho.section_64 = null, +eh_frame_relocs_lookup: std.AutoArrayHashMapUnmanaged(u32, Record) = .{}, +eh_frame_records_lookup: std.AutoArrayHashMapUnmanaged(AtomIndex, u32) = .{}, + +unwind_info_sect: ?macho.section_64 = null, +unwind_relocs_lookup: []Record = undefined, +unwind_records_lookup: std.AutoHashMapUnmanaged(AtomIndex, u32) = .{}, + +const RelocEntry = struct { start: u32, len: u32 }; + +const Record = struct { + dead: bool, + reloc: RelocEntry, +}; pub fn deinit(self: *Object, gpa: Allocator) void { self.atoms.deinit(gpa); + self.exec_atoms.deinit(gpa); gpa.free(self.name); gpa.free(self.contents); if (self.in_symtab) |_| { gpa.free(self.source_symtab_lookup); + gpa.free(self.reverse_symtab_lookup); gpa.free(self.source_address_lookup); gpa.free(self.source_section_index_lookup); gpa.free(self.strtab_lookup); gpa.free(self.symtab); gpa.free(self.atom_by_index_table); gpa.free(self.globals_lookup); + gpa.free(self.relocs_lookup); } + self.eh_frame_relocs_lookup.deinit(gpa); + self.eh_frame_records_lookup.deinit(gpa); + if (self.hasUnwindRecords()) { + gpa.free(self.unwind_relocs_lookup); + } + self.unwind_records_lookup.deinit(gpa); } pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { @@ -105,76 +136,95 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) .ncmds = self.header.ncmds, .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], }; - while (it.next()) |cmd| { - switch (cmd.cmd()) { - .SYMTAB => { - const symtab = cmd.cast(macho.symtab_command).?; - self.in_symtab = @ptrCast( - [*]const macho.nlist_64, - @alignCast(@alignOf(macho.nlist_64), &self.contents[symtab.symoff]), - )[0..symtab.nsyms]; - self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; - - const nsects = self.getSourceSections().len; - - self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects); - self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); - self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); - self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len); - self.atom_by_index_table = try allocator.alloc(AtomIndex, self.in_symtab.?.len + nsects); - // This is wasteful but we need to be able to lookup source symbol address after stripping and - // allocating of sections. - self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len); - self.source_section_index_lookup = try allocator.alloc(i64, nsects); - - for (self.symtab) |*sym| { - sym.* = .{ - .n_value = 0, - .n_sect = 0, - .n_desc = 0, - .n_strx = 0, - .n_type = 0, - }; - } + const nsects = self.getSourceSections().len; + const symtab = while (it.next()) |cmd| switch (cmd.cmd()) { + .SYMTAB => break cmd.cast(macho.symtab_command).?, + else => {}, + } else return; + + self.in_symtab = @ptrCast( + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &self.contents[symtab.symoff]), + )[0..symtab.nsyms]; + self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; + + self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects); + self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); + self.reverse_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); + self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); + self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len); + self.atom_by_index_table = try allocator.alloc(AtomIndex, self.in_symtab.?.len + nsects); + self.relocs_lookup = try allocator.alloc(RelocEntry, self.in_symtab.?.len + nsects); + // This is wasteful but we need to be able to lookup source symbol address after stripping and + // allocating of sections. + self.source_address_lookup = try allocator.alloc(i64, self.in_symtab.?.len); + self.source_section_index_lookup = try allocator.alloc(i64, nsects); + + for (self.symtab) |*sym| { + sym.* = .{ + .n_value = 0, + .n_sect = 0, + .n_desc = 0, + .n_strx = 0, + .n_type = 0, + }; + } - mem.set(i64, self.globals_lookup, -1); - mem.set(AtomIndex, self.atom_by_index_table, 0); - mem.set(i64, self.source_section_index_lookup, -1); + mem.set(i64, self.globals_lookup, -1); + mem.set(AtomIndex, self.atom_by_index_table, 0); + mem.set(i64, self.source_section_index_lookup, -1); + mem.set(RelocEntry, self.relocs_lookup, .{ + .start = 0, + .len = 0, + }); - // You would expect that the symbol table is at least pre-sorted based on symbol's type: - // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, - // the GO compiler does not necessarily respect that therefore we sort immediately by type - // and address within. - var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(allocator, self.in_symtab.?.len); - defer sorted_all_syms.deinit(); + // You would expect that the symbol table is at least pre-sorted based on symbol's type: + // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, + // the GO compiler does not necessarily respect that therefore we sort immediately by type + // and address within. + var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(allocator, self.in_symtab.?.len); + defer sorted_all_syms.deinit(); - for (self.in_symtab.?) |_, index| { - sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) }); - } + for (self.in_symtab.?) |_, index| { + sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) }); + } - // We sort by type: defined < undefined, and - // afterwards by address in each group. Normally, dysymtab should - // be enough to guarantee the sort, but turns out not every compiler - // is kind enough to specify the symbols in the correct order. - sort.sort(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan); + // We sort by type: defined < undefined, and + // afterwards by address in each group. Normally, dysymtab should + // be enough to guarantee the sort, but turns out not every compiler + // is kind enough to specify the symbols in the correct order. + sort.sort(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan); - for (sorted_all_syms.items) |sym_id, i| { - const sym = sym_id.getSymbol(self); + for (sorted_all_syms.items) |sym_id, i| { + const sym = sym_id.getSymbol(self); - if (sym.sect() and self.source_section_index_lookup[sym.n_sect - 1] == -1) { - self.source_section_index_lookup[sym.n_sect - 1] = @intCast(i64, i); - } + if (sym.sect() and self.source_section_index_lookup[sym.n_sect - 1] == -1) { + self.source_section_index_lookup[sym.n_sect - 1] = @intCast(i64, i); + } - self.symtab[i] = sym; - self.source_symtab_lookup[i] = sym_id.index; - self.source_address_lookup[i] = if (sym.undf()) -1 else @intCast(i64, sym.n_value); + self.symtab[i] = sym; + self.source_symtab_lookup[i] = sym_id.index; + self.reverse_symtab_lookup[sym_id.index] = @intCast(u32, i); + self.source_address_lookup[i] = if (sym.undf()) -1 else @intCast(i64, sym.n_value); - const sym_name_len = mem.sliceTo(@ptrCast([*:0]const u8, self.in_strtab.?.ptr + sym.n_strx), 0).len + 1; - self.strtab_lookup[i] = @intCast(u32, sym_name_len); - } + const sym_name_len = mem.sliceTo(@ptrCast([*:0]const u8, self.in_strtab.?.ptr + sym.n_strx), 0).len + 1; + self.strtab_lookup[i] = @intCast(u32, sym_name_len); + } + + // Parse __TEXT,__eh_frame header if one exists + self.eh_frame_sect = self.getSourceSectionByName("__TEXT", "__eh_frame"); + + // Parse __LD,__compact_unwind header if one exists + self.unwind_info_sect = self.getSourceSectionByName("__LD", "__compact_unwind"); + if (self.hasUnwindRecords()) { + self.unwind_relocs_lookup = try allocator.alloc(Record, self.getUnwindRecords().len); + mem.set(Record, self.unwind_relocs_lookup, .{ + .dead = true, + .reloc = .{ + .start = 0, + .len = 0, }, - else => {}, - } + }); } } @@ -295,14 +345,20 @@ fn sectionLessThanByAddress(ctx: void, lhs: SortedSection, rhs: SortedSection) b return lhs.header.addr < rhs.header.addr; } -/// Splits input sections into Atoms. +pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u32) !void { + log.debug("splitting object({d}, {s}) into atoms", .{ object_id, self.name }); + + try self.splitRegularSections(zld, object_id); + try self.parseEhFrameSection(zld, object_id); + try self.parseUnwindInfo(zld, object_id); +} + +/// Splits input regular sections into Atoms. /// If the Object was compiled with `MH_SUBSECTIONS_VIA_SYMBOLS`, splits section /// into subsections where each subsection then represents an Atom. -pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void { +pub fn splitRegularSections(self: *Object, zld: *Zld, object_id: u32) !void { const gpa = zld.gpa; - log.debug("splitting object({d}, {s}) into atoms", .{ object_id, self.name }); - const sections = self.getSourceSections(); for (sections) |sect, id| { if (sect.isDebug()) continue; @@ -418,6 +474,9 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void { sect.@"align", out_sect_id, ); + if (!sect.isZerofill()) { + try self.cacheRelocs(zld, atom_index); + } zld.addAtomToSection(atom_index); } @@ -431,7 +490,6 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void { const nsyms_trailing = atom_loc.len - 1; next_sym_index += atom_loc.len; - // TODO: We want to bubble up the first externally defined symbol here. const atom_size = if (next_sym_index < sect_start_index + sect_loc.len) symtab[next_sym_index].n_value - addr else @@ -461,7 +519,9 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void { const alias_index = self.getSectionAliasSymbolIndex(sect_id); self.atom_by_index_table[alias_index] = atom_index; } - + if (!sect.isZerofill()) { + try self.cacheRelocs(zld, atom_index); + } zld.addAtomToSection(atom_index); } } else { @@ -476,6 +536,9 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void { sect.@"align", out_sect_id, ); + if (!sect.isZerofill()) { + try self.cacheRelocs(zld, atom_index); + } zld.addAtomToSection(atom_index); } } @@ -484,7 +547,7 @@ pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void { fn createAtomFromSubsection( self: *Object, zld: *Zld, - object_id: u31, + object_id: u32, sym_index: u32, inner_sym_index: u32, inner_nsyms_trailing: u32, @@ -497,7 +560,7 @@ fn createAtomFromSubsection( const atom = zld.getAtomPtr(atom_index); atom.inner_sym_index = inner_sym_index; atom.inner_nsyms_trailing = inner_nsyms_trailing; - atom.file = object_id; + atom.file = object_id + 1; self.symtab[sym_index].n_sect = out_sect_id + 1; log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ @@ -519,9 +582,208 @@ fn createAtomFromSubsection( self.atom_by_index_table[sym_loc.sym_index] = atom_index; } + const out_sect = zld.sections.items(.header)[out_sect_id]; + if (out_sect.isCode() and + mem.eql(u8, "__TEXT", out_sect.segName()) and + mem.eql(u8, "__text", out_sect.sectName())) + { + // TODO currently assuming a single section for executable machine code + try self.exec_atoms.append(gpa, atom_index); + } + return atom_index; } +fn filterRelocs( + relocs: []align(1) const macho.relocation_info, + start_addr: u64, + end_addr: u64, +) RelocEntry { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address >= self.addr; + } + }; + const LPredicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; + } + }; + + const start = @import("zld.zig").bsearch(macho.relocation_info, relocs, Predicate{ .addr = end_addr }); + const len = @import("zld.zig").lsearch(macho.relocation_info, relocs[start..], LPredicate{ .addr = start_addr }); + + return .{ .start = @intCast(u32, start), .len = @intCast(u32, len) }; +} + +fn cacheRelocs(self: *Object, zld: *Zld, atom_index: AtomIndex) !void { + const atom = zld.getAtom(atom_index); + + const source_sect = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: { + const source_sect = self.getSourceSection(source_sym.n_sect - 1); + assert(!source_sect.isZerofill()); + break :blk source_sect; + } else blk: { + // If there was no matching symbol present in the source symtab, this means + // we are dealing with either an entire section, or part of it, but also + // starting at the beginning. + const nbase = @intCast(u32, self.in_symtab.?.len); + const sect_id = @intCast(u16, atom.sym_index - nbase); + const source_sect = self.getSourceSection(sect_id); + assert(!source_sect.isZerofill()); + break :blk source_sect; + }; + + const relocs = self.getRelocs(source_sect); + + self.relocs_lookup[atom.sym_index] = if (self.getSourceSymbol(atom.sym_index)) |source_sym| blk: { + const offset = source_sym.n_value - source_sect.addr; + break :blk filterRelocs(relocs, offset, offset + atom.size); + } else filterRelocs(relocs, 0, atom.size); +} + +fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { + const sect = self.eh_frame_sect orelse return; + + log.debug("parsing __TEXT,__eh_frame section", .{}); + + if (zld.getSectionByName("__TEXT", "__eh_frame") == null) { + _ = try zld.initSection("__TEXT", "__eh_frame", .{}); + } + + const gpa = zld.gpa; + const cpu_arch = zld.options.target.cpu.arch; + const relocs = self.getRelocs(sect); + + var it = self.getEhFrameRecordsIterator(); + var record_count: u32 = 0; + while (try it.next()) |_| { + record_count += 1; + } + + try self.eh_frame_relocs_lookup.ensureTotalCapacity(gpa, record_count); + try self.eh_frame_records_lookup.ensureTotalCapacity(gpa, record_count); + + it.reset(); + + while (try it.next()) |record| { + const offset = it.pos - record.getSize(); + const rel_pos = switch (cpu_arch) { + .aarch64 => filterRelocs(relocs, offset, offset + record.getSize()), + .x86_64 => RelocEntry{ .start = 0, .len = 0 }, + else => unreachable, + }; + self.eh_frame_relocs_lookup.putAssumeCapacityNoClobber(offset, .{ + .dead = false, + .reloc = rel_pos, + }); + + if (record.tag == .fde) { + const target = blk: { + switch (cpu_arch) { + .aarch64 => { + assert(rel_pos.len > 0); // TODO convert to an error as the FDE eh frame is malformed + // Find function symbol that this record describes + const rel = relocs[rel_pos.start..][rel_pos.len - 1]; + const target = UnwindInfo.parseRelocTarget( + zld, + object_id, + rel, + it.data[offset..], + @intCast(i32, offset), + ); + break :blk target; + }, + .x86_64 => { + const target_address = record.getTargetSymbolAddress(.{ + .base_addr = sect.addr, + .base_offset = offset, + }); + const target_sym_index = self.getSymbolByAddress(target_address, null); + const target = if (self.getGlobal(target_sym_index)) |global_index| + zld.globals.items[global_index] + else + SymbolWithLoc{ .sym_index = target_sym_index, .file = object_id + 1 }; + break :blk target; + }, + else => unreachable, + } + }; + log.debug("FDE at offset {x} tracks {s}", .{ offset, zld.getSymbolName(target) }); + if (target.getFile() != object_id) { + self.eh_frame_relocs_lookup.getPtr(offset).?.dead = true; + } else { + const atom_index = self.getAtomIndexForSymbol(target.sym_index).?; + self.eh_frame_records_lookup.putAssumeCapacityNoClobber(atom_index, offset); + } + } + } +} + +fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { + const sect = self.unwind_info_sect orelse return; + + log.debug("parsing unwind info in {s}", .{self.name}); + + const gpa = zld.gpa; + const cpu_arch = zld.options.target.cpu.arch; + + if (zld.getSectionByName("__TEXT", "__unwind_info") == null) { + _ = try zld.initSection("__TEXT", "__unwind_info", .{}); + } + + try self.unwind_records_lookup.ensureTotalCapacity(gpa, @intCast(u32, self.exec_atoms.items.len)); + + const unwind_records = self.getUnwindRecords(); + + const needs_eh_frame = for (unwind_records) |record| { + if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) break true; + } else false; + + if (needs_eh_frame) { + if (self.eh_frame_sect == null) { + log.err("missing __TEXT,__eh_frame section", .{}); + log.err(" in object {s}", .{self.name}); + return error.MissingSection; + } + } + + const relocs = self.getRelocs(sect); + for (unwind_records) |record, record_id| { + const offset = record_id * @sizeOf(macho.compact_unwind_entry); + const rel_pos = filterRelocs( + relocs, + offset, + offset + @sizeOf(macho.compact_unwind_entry), + ); + assert(rel_pos.len > 0); // TODO convert to an error as the unwind info is malformed + self.unwind_relocs_lookup[record_id] = .{ + .dead = false, + .reloc = rel_pos, + }; + + // Find function symbol that this record describes + const rel = relocs[rel_pos.start..][rel_pos.len - 1]; + const target = UnwindInfo.parseRelocTarget( + zld, + object_id, + rel, + mem.asBytes(&record), + @intCast(i32, offset), + ); + if (target.getFile() != object_id) { + self.unwind_relocs_lookup[record_id].dead = true; + } else { + const atom_index = self.getAtomIndexForSymbol(target.sym_index).?; + self.unwind_records_lookup.putAssumeCapacityNoClobber(atom_index, @intCast(u32, record_id)); + } + } +} + pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { const symtab = self.in_symtab.?; if (index >= symtab.len) return null; @@ -529,23 +791,28 @@ pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { return symtab[mapped_index]; } -/// Expects an arena allocator. -/// Caller owns memory. -pub fn createReverseSymbolLookup(self: Object, arena: Allocator) ![]u32 { - const symtab = self.in_symtab orelse return &[0]u32{}; - const lookup = try arena.alloc(u32, symtab.len); - for (self.source_symtab_lookup) |source_id, id| { - lookup[source_id] = @intCast(u32, id); - } - return lookup; -} - pub fn getSourceSection(self: Object, index: u16) macho.section_64 { const sections = self.getSourceSections(); assert(index < sections.len); return sections[index]; } +pub fn getSourceSectionByName(self: Object, segname: []const u8, sectname: []const u8) ?macho.section_64 { + const sections = self.getSourceSections(); + for (sections) |sect| { + if (mem.eql(u8, segname, sect.segName()) and mem.eql(u8, sectname, sect.sectName())) + return sect; + } else return null; +} + +pub fn getSourceSectionIndexByName(self: Object, segname: []const u8, sectname: []const u8) ?u8 { + const sections = self.getSourceSections(); + for (sections) |sect, i| { + if (mem.eql(u8, segname, sect.segName()) and mem.eql(u8, sectname, sect.sectName())) + return @intCast(u8, i + 1); + } else return null; +} + pub fn getSourceSections(self: Object) []const macho.section_64 { var it = LoadCommandIterator{ .ncmds = self.header.ncmds, @@ -652,8 +919,64 @@ pub fn getSymbolName(self: Object, index: u32) []const u8 { return strtab[start..][0 .. len - 1 :0]; } +pub fn getSymbolByAddress(self: Object, addr: u64, sect_hint: ?u8) u32 { + // Find containing atom + const Predicate = struct { + addr: i64, + + pub fn predicate(pred: @This(), other: i64) bool { + return if (other == -1) true else other > pred.addr; + } + }; + + if (sect_hint) |sect_id| { + if (self.source_section_index_lookup[sect_id] > -1) { + const first_sym_index = @intCast(usize, self.source_section_index_lookup[sect_id]); + const target_sym_index = @import("zld.zig").lsearch(i64, self.source_address_lookup[first_sym_index..], Predicate{ + .addr = @intCast(i64, addr), + }); + if (target_sym_index > 0) { + return @intCast(u32, first_sym_index + target_sym_index - 1); + } + } + return self.getSectionAliasSymbolIndex(sect_id); + } + + const target_sym_index = @import("zld.zig").lsearch(i64, self.source_address_lookup, Predicate{ + .addr = @intCast(i64, addr), + }); + assert(target_sym_index > 0); + return @intCast(u32, target_sym_index - 1); +} + +pub fn getGlobal(self: Object, sym_index: u32) ?u32 { + if (self.globals_lookup[sym_index] == -1) return null; + return @intCast(u32, self.globals_lookup[sym_index]); +} + pub fn getAtomIndexForSymbol(self: Object, sym_index: u32) ?AtomIndex { const atom_index = self.atom_by_index_table[sym_index]; if (atom_index == 0) return null; return atom_index; } + +pub fn hasUnwindRecords(self: Object) bool { + return self.unwind_info_sect != null; +} + +pub fn getUnwindRecords(self: Object) []align(1) const macho.compact_unwind_entry { + const sect = self.unwind_info_sect orelse return &[0]macho.compact_unwind_entry{}; + const data = self.getSectionContents(sect); + const num_entries = @divExact(data.len, @sizeOf(macho.compact_unwind_entry)); + return @ptrCast([*]align(1) const macho.compact_unwind_entry, data)[0..num_entries]; +} + +pub fn hasEhFrameRecords(self: Object) bool { + return self.eh_frame_sect != null; +} + +pub fn getEhFrameRecordsIterator(self: Object) eh_frame.Iterator { + const sect = self.eh_frame_sect orelse return .{ .data = &[0]u8{} }; + const data = self.getSectionContents(sect); + return .{ .data = data }; +} -- cgit v1.2.3 From 3dff040ca58effc5aaf1c7313a7baa28ec2ac6dd Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 21 Jan 2023 13:26:27 +0100 Subject: macho: synthesise unwind records in absence of compact unwind section Unlike Apple ld, we will not do any DWARF CFI parsing and simply output DWARF type unwind records. --- src/link/MachO/Object.zig | 13 +++- src/link/MachO/UnwindInfo.zig | 115 ++++++++++++++++++--------------- src/link/MachO/dead_strip.zig | 144 +++++++++++++++++++++++------------------- 3 files changed, 155 insertions(+), 117 deletions(-) (limited to 'src/link/MachO/Object.zig') diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 2196e9ccf0..944cb7a677 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -725,7 +725,18 @@ fn parseEhFrameSection(self: *Object, zld: *Zld, object_id: u32) !void { } fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { - const sect = self.unwind_info_sect orelse return; + const sect = self.unwind_info_sect orelse { + // If it so happens that the object had `__eh_frame` section defined but no `__compact_unwind`, + // we will try fully synthesising unwind info records to somewhat match Apple ld's + // approach. However, we will only synthesise DWARF records and nothing more. For this reason, + // we still create the output `__TEXT,__unwind_info` section. + if (self.eh_frame_sect != null) { + if (zld.getSectionByName("__TEXT", "__unwind_info") == null) { + _ = try zld.initSection("__TEXT", "__unwind_info", .{}); + } + } + return; + }; log.debug("parsing unwind info in {s}", .{self.name}); diff --git a/src/link/MachO/UnwindInfo.zig b/src/link/MachO/UnwindInfo.zig index 4f3860a72b..8f765756b3 100644 --- a/src/link/MachO/UnwindInfo.zig +++ b/src/link/MachO/UnwindInfo.zig @@ -253,37 +253,13 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { try records.ensureUnusedCapacity(object.exec_atoms.items.len); try atom_indexes.ensureUnusedCapacity(object.exec_atoms.items.len); - var it = object.getEhFrameRecordsIterator(); - for (object.exec_atoms.items) |atom_index| { var record = if (object.unwind_records_lookup.get(atom_index)) |record_id| blk: { if (object.unwind_relocs_lookup[record_id].dead) continue; var record = unwind_records[record_id]; if (UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - const fde_offset = object.eh_frame_records_lookup.get(atom_index).?; - it.seekTo(fde_offset); - const fde = (try it.next()).?; - const cie_ptr = fde.getCiePointer(); - const cie_offset = fde_offset + 4 - cie_ptr; - it.seekTo(cie_offset); - const cie = (try it.next()).?; - - if (cie.getPersonalityPointerReloc( - zld, - @intCast(u32, object_id), - cie_offset, - )) |target| { - const personality_index = info.getPersonalityFunction(target) orelse inner: { - const personality_index = info.personalities_count; - info.personalities[personality_index] = target; - info.personalities_count += 1; - break :inner personality_index; - }; - - record.personalityFunction = personality_index + 1; - UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1); - } + try info.collectPersonalityFromDwarf(zld, @intCast(u32, object_id), atom_index, &record); } else { if (getPersonalityFunctionReloc( zld, @@ -324,6 +300,21 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { const atom = zld.getAtom(atom_index); const sym = zld.getSymbol(atom.getSymbolWithLoc()); if (sym.n_desc == N_DEAD) continue; + + if (!object.hasUnwindRecords()) { + if (object.eh_frame_records_lookup.get(atom_index)) |fde_offset| { + if (object.eh_frame_relocs_lookup.get(fde_offset).?.dead) continue; + var record = nullRecord(); + try info.collectPersonalityFromDwarf(zld, @intCast(u32, object_id), atom_index, &record); + switch (cpu_arch) { + .aarch64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_ARM64_MODE.DWARF), + .x86_64 => UnwindEncoding.setMode(&record.compactUnwindEncoding, macho.UNWIND_X86_64_MODE.DWARF), + else => unreachable, + } + break :blk record; + } + } + break :blk nullRecord(); }; @@ -499,6 +490,40 @@ pub fn collect(info: *UnwindInfo, zld: *Zld) !void { } } +fn collectPersonalityFromDwarf( + info: *UnwindInfo, + zld: *Zld, + object_id: u32, + atom_index: u32, + record: *macho.compact_unwind_entry, +) !void { + const object = &zld.objects.items[object_id]; + var it = object.getEhFrameRecordsIterator(); + const fde_offset = object.eh_frame_records_lookup.get(atom_index).?; + it.seekTo(fde_offset); + const fde = (try it.next()).?; + const cie_ptr = fde.getCiePointer(); + const cie_offset = fde_offset + 4 - cie_ptr; + it.seekTo(cie_offset); + const cie = (try it.next()).?; + + if (cie.getPersonalityPointerReloc( + zld, + @intCast(u32, object_id), + cie_offset, + )) |target| { + const personality_index = info.getPersonalityFunction(target) orelse inner: { + const personality_index = info.personalities_count; + info.personalities[personality_index] = target; + info.personalities_count += 1; + break :inner personality_index; + }; + + record.personalityFunction = personality_index + 1; + UnwindEncoding.setPersonalityIndex(&record.compactUnwindEncoding, personality_index + 1); + } +} + pub fn calcSectionSize(info: UnwindInfo, zld: *Zld) !void { const sect_id = zld.getSectionByName("__TEXT", "__unwind_info") orelse return; const sect = &zld.sections.items(.header)[sect_id]; @@ -766,40 +791,26 @@ fn getCommonEncoding(info: UnwindInfo, enc: macho.compact_unwind_encoding_t) ?u7 } pub const UnwindEncoding = struct { - pub const UNWIND_X86_64_MODE = enum(u4) { - none = 0, - ebp_frame = 1, - stack_immd = 2, - stack_ind = 3, - dwarf = 4, - }; - - pub const UNWIND_ARM64_MODE = enum(u4) { - none = 0, - frameless = 2, - dwarf = 3, - frame = 4, - }; - - pub const UNWIND_MODE_MASK: u32 = 0x0F000000; - pub const UNWIND_PERSONALITY_INDEX_MASK: u32 = 0x30000000; - pub const UNWIND_HAS_LSDA_MASK: u32 = 0x40000000; - pub fn getMode(enc: macho.compact_unwind_encoding_t) u4 { - const mode = @truncate(u4, (enc & UNWIND_MODE_MASK) >> 24); - return mode; + comptime assert(macho.UNWIND_ARM64_MODE_MASK == macho.UNWIND_X86_64_MODE_MASK); + return @truncate(u4, (enc & macho.UNWIND_ARM64_MODE_MASK) >> 24); } pub fn isDwarf(enc: macho.compact_unwind_encoding_t, cpu_arch: std.Target.Cpu.Arch) bool { - switch (cpu_arch) { - .aarch64 => return @intToEnum(UNWIND_ARM64_MODE, getMode(enc)) == .dwarf, - .x86_64 => return @intToEnum(UNWIND_X86_64_MODE, getMode(enc)) == .dwarf, + const mode = getMode(enc); + return switch (cpu_arch) { + .aarch64 => @intToEnum(macho.UNWIND_ARM64_MODE, mode) == .DWARF, + .x86_64 => @intToEnum(macho.UNWIND_X86_64_MODE, mode) == .DWARF, else => unreachable, - } + }; + } + + pub fn setMode(enc: *macho.compact_unwind_encoding_t, mode: anytype) void { + enc.* |= @intCast(u32, @enumToInt(mode)) << 24; } pub fn hasLsda(enc: macho.compact_unwind_encoding_t) bool { - const has_lsda = @truncate(u1, (enc & UNWIND_HAS_LSDA_MASK) >> 31); + const has_lsda = @truncate(u1, (enc & macho.UNWIND_HAS_LSDA) >> 31); return has_lsda == 1; } @@ -809,7 +820,7 @@ pub const UnwindEncoding = struct { } pub fn getPersonalityIndex(enc: macho.compact_unwind_encoding_t) u2 { - const index = @truncate(u2, (enc & UNWIND_PERSONALITY_INDEX_MASK) >> 28); + const index = @truncate(u2, (enc & macho.UNWIND_PERSONALITY_MASK) >> 28); return index; } diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 789b4925d1..1f8def96f3 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -238,16 +238,10 @@ fn mark(zld: *Zld, roots: AtomTable, alive: *AtomTable) !void { } } - for (zld.objects.items) |object, object_id| { + for (zld.objects.items) |_, object_id| { // Traverse unwind and eh_frame records noting if the source symbol has been marked, and if so, // marking all references as live. - // TODO I am currently assuming there will always be __unwind_info section emitted which implies - // we will not traverse __eh_frame in isolation. This however is only true for more recent versions - // of macOS so if there is a feature request to handle earlier versions of macOS, the following - // bit code needs updating as well. - if (object.hasUnwindRecords()) { - try markUnwindRecords(zld, @intCast(u32, object_id), alive); - } + try markUnwindRecords(zld, @intCast(u32, object_id), alive); } } @@ -256,9 +250,23 @@ fn markUnwindRecords(zld: *Zld, object_id: u32, alive: *AtomTable) !void { const cpu_arch = zld.options.target.cpu.arch; const unwind_records = object.getUnwindRecords(); - var it = object.getEhFrameRecordsIterator(); for (object.exec_atoms.items) |atom_index| { + if (!object.hasUnwindRecords()) { + if (object.eh_frame_records_lookup.get(atom_index)) |fde_offset| { + const ptr = object.eh_frame_relocs_lookup.getPtr(fde_offset).?; + if (ptr.dead) continue; // already marked + if (!alive.contains(atom_index)) { + // Mark dead and continue. + ptr.dead = true; + } else { + // Mark references live and continue. + try markEhFrameRecord(zld, object_id, atom_index, alive); + } + continue; + } + } + const record_id = object.unwind_records_lookup.get(atom_index) orelse continue; if (object.unwind_relocs_lookup[record_id].dead) continue; // already marked, nothing to do if (!alive.contains(atom_index)) { @@ -272,61 +280,7 @@ fn markUnwindRecords(zld: *Zld, object_id: u32, alive: *AtomTable) !void { const record = unwind_records[record_id]; if (UnwindInfo.UnwindEncoding.isDwarf(record.compactUnwindEncoding, cpu_arch)) { - const fde_offset = object.eh_frame_records_lookup.get(atom_index).?; - it.seekTo(fde_offset); - const fde = (try it.next()).?; - - const cie_ptr = fde.getCiePointer(); - const cie_offset = fde_offset + 4 - cie_ptr; - it.seekTo(cie_offset); - const cie = (try it.next()).?; - - switch (cpu_arch) { - .aarch64 => { - // Mark FDE references which should include any referenced LSDA record - const relocs = eh_frame.getRelocs(zld, object_id, fde_offset); - for (relocs) |rel| { - const target = UnwindInfo.parseRelocTarget( - zld, - object_id, - rel, - fde.data, - @intCast(i32, fde_offset) + 4, - ); - const target_sym = zld.getSymbol(target); - if (!target_sym.undf()) blk: { - const target_object = zld.objects.items[target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index) orelse - break :blk; - markLive(zld, target_atom_index, alive); - } - } - }, - .x86_64 => { - const lsda_ptr = try fde.getLsdaPointer(cie, .{ - .base_addr = object.eh_frame_sect.?.addr, - .base_offset = fde_offset, - }); - if (lsda_ptr) |lsda_address| { - // Mark LSDA record as live - const sym_index = object.getSymbolByAddress(lsda_address, null); - const target_atom_index = object.getAtomIndexForSymbol(sym_index).?; - markLive(zld, target_atom_index, alive); - } - }, - else => unreachable, - } - - // Mark CIE references which should include any referenced personalities - // that are defined locally. - if (cie.getPersonalityPointerReloc(zld, object_id, cie_offset)) |target| { - const target_sym = zld.getSymbol(target); - if (!target_sym.undf()) { - const target_object = zld.objects.items[target.getFile().?]; - const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; - markLive(zld, target_atom_index, alive); - } - } + try markEhFrameRecord(zld, object_id, atom_index, alive); } else { if (UnwindInfo.getPersonalityFunctionReloc(zld, object_id, record_id)) |rel| { const target = UnwindInfo.parseRelocTarget( @@ -360,6 +314,68 @@ fn markUnwindRecords(zld: *Zld, object_id: u32, alive: *AtomTable) !void { } } +fn markEhFrameRecord(zld: *Zld, object_id: u32, atom_index: AtomIndex, alive: *AtomTable) !void { + const cpu_arch = zld.options.target.cpu.arch; + const object = &zld.objects.items[object_id]; + var it = object.getEhFrameRecordsIterator(); + + const fde_offset = object.eh_frame_records_lookup.get(atom_index).?; + it.seekTo(fde_offset); + const fde = (try it.next()).?; + + const cie_ptr = fde.getCiePointer(); + const cie_offset = fde_offset + 4 - cie_ptr; + it.seekTo(cie_offset); + const cie = (try it.next()).?; + + switch (cpu_arch) { + .aarch64 => { + // Mark FDE references which should include any referenced LSDA record + const relocs = eh_frame.getRelocs(zld, object_id, fde_offset); + for (relocs) |rel| { + const target = UnwindInfo.parseRelocTarget( + zld, + object_id, + rel, + fde.data, + @intCast(i32, fde_offset) + 4, + ); + const target_sym = zld.getSymbol(target); + if (!target_sym.undf()) blk: { + const target_object = zld.objects.items[target.getFile().?]; + const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index) orelse + break :blk; + markLive(zld, target_atom_index, alive); + } + } + }, + .x86_64 => { + const lsda_ptr = try fde.getLsdaPointer(cie, .{ + .base_addr = object.eh_frame_sect.?.addr, + .base_offset = fde_offset, + }); + if (lsda_ptr) |lsda_address| { + // Mark LSDA record as live + const sym_index = object.getSymbolByAddress(lsda_address, null); + const target_atom_index = object.getAtomIndexForSymbol(sym_index).?; + markLive(zld, target_atom_index, alive); + } + }, + else => unreachable, + } + + // Mark CIE references which should include any referenced personalities + // that are defined locally. + if (cie.getPersonalityPointerReloc(zld, object_id, cie_offset)) |target| { + const target_sym = zld.getSymbol(target); + if (!target_sym.undf()) { + const target_object = zld.objects.items[target.getFile().?]; + const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; + markLive(zld, target_atom_index, alive); + } + } +} + fn prune(zld: *Zld, alive: AtomTable) void { log.debug("pruning dead atoms", .{}); for (zld.objects.items) |*object| { -- cgit v1.2.3 From 983e37340913383647727a7118c2824baf23d2ea Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Sat, 21 Jan 2023 16:53:46 +0100 Subject: macho: fix sorting symbols by seniority --- src/link/MachO/Object.zig | 32 ++++++++++++++++++-------------- test/link/macho/weak_library/build.zig | 2 ++ 2 files changed, 20 insertions(+), 14 deletions(-) (limited to 'src/link/MachO/Object.zig') diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 944cb7a677..13219084b6 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -242,6 +242,17 @@ const SymbolAtIndex = struct { return mem.sliceTo(@ptrCast([*:0]const u8, ctx.in_strtab.?.ptr + off), 0); } + fn getSymbolSeniority(self: SymbolAtIndex, ctx: Context) u2 { + const sym = self.getSymbol(ctx); + if (!sym.ext()) { + const sym_name = self.getSymbolName(ctx); + if (mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L")) return 0; + return 1; + } + if (sym.weakDef() or sym.pext()) return 2; + return 3; + } + /// Performs lexicographic-like check. /// * lhs and rhs defined /// * if lhs == rhs @@ -256,23 +267,15 @@ const SymbolAtIndex = struct { if (lhs.sect() and rhs.sect()) { if (lhs.n_value == rhs.n_value) { if (lhs.n_sect == rhs.n_sect) { - if (lhs.ext() and rhs.ext()) { - if ((lhs.pext() or lhs.weakDef()) and (rhs.pext() or rhs.weakDef())) { - return false; - } else return rhs.pext() or rhs.weakDef(); - } else { - const lhs_name = lhs_index.getSymbolName(ctx); - const lhs_temp = mem.startsWith(u8, lhs_name, "l") or mem.startsWith(u8, lhs_name, "L"); - const rhs_name = rhs_index.getSymbolName(ctx); - const rhs_temp = mem.startsWith(u8, rhs_name, "l") or mem.startsWith(u8, rhs_name, "L"); - if (lhs_temp and rhs_temp) { - return false; - } else return rhs_temp; - } + const lhs_senior = lhs_index.getSymbolSeniority(ctx); + const rhs_senior = rhs_index.getSymbolSeniority(ctx); + if (lhs_senior == rhs_senior) { + return lessThanByNStrx(ctx, lhs_index, rhs_index); + } else return lhs_senior < rhs_senior; } else return lhs.n_sect < rhs.n_sect; } else return lhs.n_value < rhs.n_value; } else if (lhs.undf() and rhs.undf()) { - return false; + return lessThanByNStrx(ctx, lhs_index, rhs_index); } else return rhs.undf(); } @@ -786,6 +789,7 @@ fn parseUnwindInfo(self: *Object, zld: *Zld, object_id: u32) !void { mem.asBytes(&record), @intCast(i32, offset), ); + log.debug("unwind record {d} tracks {s}", .{ record_id, zld.getSymbolName(target) }); if (target.getFile() != object_id) { self.unwind_relocs_lookup[record_id].dead = true; } else { diff --git a/test/link/macho/weak_library/build.zig b/test/link/macho/weak_library/build.zig index 8c41e0dfd1..79f67bd7df 100644 --- a/test/link/macho/weak_library/build.zig +++ b/test/link/macho/weak_library/build.zig @@ -31,6 +31,8 @@ pub fn build(b: *Builder) void { check.checkInSymtab(); check.checkNext("(undefined) weak external _a (from liba)"); + + check.checkInSymtab(); check.checkNext("(undefined) weak external _asStr (from liba)"); const run_cmd = check.runAndCompare(); -- cgit v1.2.3