diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2022-10-21 23:20:34 +0200 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2022-10-22 07:59:24 +0200 |
| commit | 086cee844a082bab82dba9e7422067f644930309 (patch) | |
| tree | 290f75decc67ffa008cfb4e5680c0ff730768778 /src | |
| parent | 0cc4d54781ca3835e2f83c8ce93b485366a52b2a (diff) | |
| download | zig-086cee844a082bab82dba9e7422067f644930309.tar.gz zig-086cee844a082bab82dba9e7422067f644930309.zip | |
macho: refactor dead code stripping conditions
1. If an object file was not compiled with `MH_SUBSECTIONS_VIA_SYMBOLS`
such a hand-written ASM on x86_64, treat the entire object file as
not suitable for dead code stripping aka a GC root.
2. If there are non-extern relocs within a section, treat the entire
section as a root, at least temporarily until we work out the exact
conditions for marking the atoms live.
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/MachO/ZldAtom.zig | 34 | ||||
| -rw-r--r-- | src/link/MachO/dead_strip.zig | 183 | ||||
| -rw-r--r-- | src/link/MachO/thunks.zig | 2 | ||||
| -rw-r--r-- | src/link/MachO/zld.zig | 18 |
4 files changed, 129 insertions, 108 deletions
diff --git a/src/link/MachO/ZldAtom.zig b/src/link/MachO/ZldAtom.zig index 270dd2d4a7..ef8964c958 100644 --- a/src/link/MachO/ZldAtom.zig +++ b/src/link/MachO/ZldAtom.zig @@ -165,7 +165,7 @@ pub fn parseRelocTarget( atom_index: AtomIndex, rel: macho.relocation_info, reverse_lookup: []u32, -) !SymbolWithLoc { +) SymbolWithLoc { const atom = zld.getAtom(atom_index); const object = &zld.objects.items[atom.getFile().?]; @@ -429,15 +429,17 @@ pub fn getRelocTargetAddress(zld: *Zld, rel: macho.relocation_info, target: Symb zld.getSymbolName(target_atom.getSymbolWithLoc()), target_atom.file, }); - // If `target` is contained within the target atom, pull its address value. + const target_sym = zld.getSymbol(target_atom.getSymbolWithLoc()); + assert(target_sym.n_desc != @import("zld.zig").N_DEAD); + + // If `target` is contained within the target atom, pull its address value. const offset = if (target_atom.getFile() != null) blk: { const object = zld.objects.items[target_atom.getFile().?]; break :blk if (object.getSourceSymbol(target.sym_index)) |_| Atom.calcInnerSymbolOffset(zld, target_atom_index, target.sym_index) else 0; // section alias - } else 0; const base_address: u64 = if (is_tlv) base_address: { // For TLV relocations, the value specified as a relocation is the displacement from the @@ -498,20 +500,13 @@ fn resolveRelocsArm64( atom.file, }); - const sym_index = reverse_lookup[rel.r_symbolnum]; - const sym_loc = SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - const sym = zld.getSymbol(sym_loc); - assert(sym.sect()); - subtractor = sym_loc; + subtractor = parseRelocTarget(zld, atom_index, rel, reverse_lookup); continue; }, else => {}, } - const target = try parseRelocTarget(zld, atom_index, rel, reverse_lookup); + const target = parseRelocTarget(zld, atom_index, rel, reverse_lookup); const rel_offset = @intCast(u32, rel.r_address - context.base_offset); log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ @@ -784,20 +779,13 @@ fn resolveRelocsX86( atom.file, }); - const sym_index = reverse_lookup[rel.r_symbolnum]; - const sym_loc = SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - const sym = zld.getSymbol(sym_loc); - assert(sym.sect()); - subtractor = sym_loc; + subtractor = parseRelocTarget(zld, atom_index, rel, reverse_lookup); continue; }, else => {}, } - const target = try parseRelocTarget(zld, atom_index, rel, reverse_lookup); + const target = parseRelocTarget(zld, atom_index, rel, reverse_lookup); const rel_offset = @intCast(u32, rel.r_address - context.base_offset); log.debug(" RELA({s}) @ {x} => %{d} in object({?})", .{ @@ -816,10 +804,11 @@ fn resolveRelocsX86( const header = zld.sections.items(.header)[source_sym.n_sect - 1]; break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; }; - const target_addr = try getRelocTargetAddress(zld, rel, target, is_tlv); log.debug(" | source_addr = 0x{x}", .{source_addr}); + const target_addr = try getRelocTargetAddress(zld, rel, target, is_tlv); + switch (rel_type) { .X86_64_RELOC_BRANCH => { const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); @@ -878,6 +867,7 @@ fn resolveRelocsX86( } const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend); + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, correction); diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 39ab4417c2..a3218ac40f 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -42,8 +42,12 @@ fn collectRoots(zld: *Zld, roots: *AtomTable) !void { const object = zld.objects.items[global.getFile().?]; const atom_index = object.getAtomIndexForSymbol(global.sym_index).?; // panic here means fatal error _ = try roots.getOrPut(atom_index); - log.debug("adding root", .{}); - zld.logAtom(atom_index, log); + + log.debug("root(ATOM({d}, %{d}, {d}))", .{ + atom_index, + zld.getAtom(atom_index).sym_index, + zld.getAtom(atom_index).file, + }); }, else => |other| { assert(other == .Lib); @@ -55,8 +59,12 @@ fn collectRoots(zld: *Zld, roots: *AtomTable) !void { const object = zld.objects.items[global.getFile().?]; const atom_index = object.getAtomIndexForSymbol(global.sym_index).?; // panic here means fatal error _ = try roots.getOrPut(atom_index); - log.debug("adding root", .{}); - zld.logAtom(atom_index, log); + + log.debug("root(ATOM({d}, %{d}, {d}))", .{ + atom_index, + zld.getAtom(atom_index).sym_index, + zld.getAtom(atom_index).file, + }); } }, } @@ -67,28 +75,36 @@ fn collectRoots(zld: *Zld, roots: *AtomTable) !void { const object = zld.objects.items[global.getFile().?]; if (object.getAtomIndexForSymbol(global.sym_index)) |atom_index| { _ = try roots.getOrPut(atom_index); - log.debug("adding root", .{}); - zld.logAtom(atom_index, log); + + log.debug("root(ATOM({d}, %{d}, {d}))", .{ + atom_index, + zld.getAtom(atom_index).sym_index, + zld.getAtom(atom_index).file, + }); } break; } } for (zld.objects.items) |object| { - for (object.atoms.items) |atom_index| { - const atom = zld.getAtom(atom_index); + const has_subsections = object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| - source_sym.n_sect - 1 - else blk: { - const nbase = @intCast(u32, object.in_symtab.?.len); - const sect_id = @intCast(u16, atom.sym_index - nbase); - break :blk sect_id; - }; - const source_sect = object.getSourceSection(sect_id); + for (object.atoms.items) |atom_index| { const is_gc_root = blk: { + // Modelled after ld64 which treats each object file compiled without MH_SUBSECTIONS_VIA_SYMBOLS + // as a root. + if (!has_subsections) break :blk true; + + const atom = zld.getAtom(atom_index); + const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| + source_sym.n_sect - 1 + else sect_id: { + const nbase = @intCast(u32, object.in_symtab.?.len); + const sect_id = @intCast(u16, atom.sym_index - nbase); + break :sect_id sect_id; + }; + const source_sect = object.getSourceSection(sect_id); if (source_sect.isDontDeadStrip()) break :blk true; - if (mem.eql(u8, "__StaticInit", source_sect.sectName())) break :blk true; switch (source_sect.@"type"()) { macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, @@ -96,10 +112,15 @@ fn collectRoots(zld: *Zld, roots: *AtomTable) !void { else => break :blk false, } }; + if (is_gc_root) { try roots.putNoClobber(atom_index, {}); - log.debug("adding root", .{}); - zld.logAtom(atom_index, log); + + log.debug("root(ATOM({d}, %{d}, {d}))", .{ + atom_index, + zld.getAtom(atom_index).sym_index, + zld.getAtom(atom_index).file, + }); } } } @@ -111,17 +132,17 @@ fn markLive( alive: *AtomTable, reverse_lookups: [][]u32, ) anyerror!void { - log.debug("mark(ATOM({d}))", .{atom_index}); - if (alive.contains(atom_index)) return; - alive.putAssumeCapacityNoClobber(atom_index, {}); + const atom = zld.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + + log.debug("mark(ATOM({d}, %{d}, {d}))", .{ atom_index, sym_loc.sym_index, sym_loc.file }); - zld.logAtom(atom_index, log); + alive.putAssumeCapacityNoClobber(atom_index, {}); const cpu_arch = zld.options.target.cpu.arch; - const atom = zld.getAtom(atom_index); const sym = zld.getSymbol(atom.getSymbolWithLoc()); const header = zld.sections.items(.header)[sym.n_sect - 1]; if (header.isZerofill()) return; @@ -130,37 +151,30 @@ fn markLive( const reverse_lookup = reverse_lookups[atom.getFile().?]; for (relocs) |rel| { const target = switch (cpu_arch) { - .aarch64 => blk: { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - switch (rel_type) { - .ARM64_RELOC_ADDEND => continue, - .ARM64_RELOC_SUBTRACTOR => { - const sym_index = reverse_lookup[rel.r_symbolnum]; - break :blk SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - }, - else => break :blk try Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup), - } - }, - .x86_64 => blk: { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - switch (rel_type) { - .X86_64_RELOC_SUBTRACTOR => { - const sym_index = reverse_lookup[rel.r_symbolnum]; - break :blk SymbolWithLoc{ - .sym_index = sym_index, - .file = atom.file, - }; - }, - else => break :blk try Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup), - } + .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_ADDEND => continue, + else => Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup), }, + .x86_64 => Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup), else => unreachable, }; - const target_sym = zld.getSymbol(target); + + if (rel.r_extern == 0) { + // We are pessimistic and mark all atoms within the target section as live. + // TODO: this can be improved by marking only the relevant atoms. + const sect_id = target_sym.n_sect; + const object = zld.objects.items[target.getFile().?]; + for (object.atoms.items) |other_atom_index| { + const other_atom = zld.getAtom(other_atom_index); + const other_sym = zld.getSymbol(other_atom.getSymbolWithLoc()); + if (other_sym.n_sect == sect_id) { + try markLive(zld, other_atom_index, alive, reverse_lookups); + } + } + continue; + } + if (target_sym.undf()) continue; if (target.getFile() == null) { const target_sym_name = zld.getSymbolName(target); @@ -172,51 +186,51 @@ fn markLive( const object = zld.objects.items[target.getFile().?]; const target_atom_index = object.getAtomIndexForSymbol(target.sym_index).?; - log.debug(" following ATOM({d})", .{target_atom_index}); + log.debug(" following ATOM({d}, %{d}, {d})", .{ + target_atom_index, + zld.getAtom(target_atom_index).sym_index, + zld.getAtom(target_atom_index).file, + }); try markLive(zld, target_atom_index, alive, reverse_lookups); } } fn refersLive(zld: *Zld, atom_index: AtomIndex, alive: AtomTable, reverse_lookups: [][]u32) !bool { - log.debug("refersLive(ATOM({d}))", .{atom_index}); + const atom = zld.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + + log.debug("refersLive(ATOM({d}, %{d}, {d}))", .{ atom_index, sym_loc.sym_index, sym_loc.file }); const cpu_arch = zld.options.target.cpu.arch; - const atom = zld.getAtom(atom_index); - const sym = zld.getSymbol(atom.getSymbolWithLoc()); + const sym = zld.getSymbol(sym_loc); const header = zld.sections.items(.header)[sym.n_sect - 1]; - if (header.isZerofill()) return false; + assert(!header.isZerofill()); const relocs = Atom.getAtomRelocs(zld, atom_index); const reverse_lookup = reverse_lookups[atom.getFile().?]; for (relocs) |rel| { - switch (cpu_arch) { - .aarch64 => { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - switch (rel_type) { - .ARM64_RELOC_ADDEND, .ARM64_RELOC_SUBTRACTOR => continue, - else => {}, - } - }, - .x86_64 => { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - switch (rel_type) { - .X86_64_RELOC_SUBTRACTOR => continue, - else => {}, - } + const target = switch (cpu_arch) { + .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_ADDEND => continue, + else => Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup), }, + .x86_64 => Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup), else => unreachable, - } + }; - const target = try Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup); const object = zld.objects.items[target.getFile().?]; const target_atom_index = object.getAtomIndexForSymbol(target.sym_index) orelse { log.debug("atom for symbol '{s}' not found; skipping...", .{zld.getSymbolName(target)}); continue; }; if (alive.contains(target_atom_index)) { - log.debug(" refers live ATOM({d})", .{target_atom_index}); + log.debug(" refers live ATOM({d}, %{d}, {d})", .{ + target_atom_index, + zld.getAtom(target_atom_index).sym_index, + zld.getAtom(target_atom_index).file, + }); return true; } } @@ -270,10 +284,16 @@ fn prune(zld: *Zld, alive: AtomTable) !void { continue; } - zld.logAtom(atom_index, log); - const atom = zld.getAtom(atom_index); const sym_loc = atom.getSymbolWithLoc(); + + log.debug("prune(ATOM({d}, %{d}, {d}))", .{ + atom_index, + sym_loc.sym_index, + sym_loc.file, + }); + log.debug(" {s} in {s}", .{ zld.getSymbolName(sym_loc), object.name }); + const sym = zld.getSymbolPtr(sym_loc); const sect_id = sym.n_sect - 1; var section = zld.sections.get(sect_id); @@ -303,16 +323,17 @@ fn prune(zld: *Zld, alive: AtomTable) !void { zld.sections.set(sect_id, section); _ = object.atoms.swapRemove(i); - if (sym.ext()) { - sym.n_desc = N_DEAD; - } + sym.n_desc = N_DEAD; var inner_sym_it = Atom.getInnerSymbolsIterator(zld, atom_index); while (inner_sym_it.next()) |inner| { const inner_sym = zld.getSymbolPtr(inner); - if (inner_sym.ext()) { - inner_sym.n_desc = N_DEAD; - } + inner_sym.n_desc = N_DEAD; + } + + if (Atom.getSectionAlias(zld, atom_index)) |alias| { + const alias_sym = zld.getSymbolPtr(alias); + alias_sym.n_desc = N_DEAD; } } } diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig index 229e7518c5..39fae5aa1e 100644 --- a/src/link/MachO/thunks.zig +++ b/src/link/MachO/thunks.zig @@ -224,7 +224,7 @@ fn scanRelocs( for (relocs) |rel| { if (!relocNeedsThunk(rel)) continue; - const target = Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup) catch unreachable; + const target = Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup); if (isReachable(zld, atom_index, rel, base_offset, target, allocated)) continue; log.debug("{x}: source = {s}@{x}, target = {s}@{x} unreachable", .{ diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index c7b34723ac..b9adaf7bf0 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -2313,7 +2313,7 @@ pub const Zld = struct { else => unreachable, } - const global = try Atom.parseRelocTarget(self, atom_index, rel, reverse_lookups[atom.getFile().?]); + const global = Atom.parseRelocTarget(self, atom_index, rel, reverse_lookups[atom.getFile().?]); const bind_sym_name = self.getSymbolName(global); const bind_sym = self.getSymbol(global); if (!bind_sym.undf()) continue; @@ -2750,14 +2750,20 @@ pub const Zld = struct { continue; } - const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; - const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; + const source_addr = if (object.getSourceSymbol(atom.sym_index)) |source_sym| + source_sym.n_value + else blk: { + const nbase = @intCast(u32, object.in_symtab.?.len); + const source_sect_id = @intCast(u16, atom.sym_index - nbase); + break :blk object.getSourceSection(source_sect_id).addr; + }; const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse return error.Overflow; for (filtered_dice) |single| { - const offset = single.offset - source_addr + base; + const offset = math.cast(u32, single.offset - source_addr + base) orelse + return error.Overflow; out_dice.appendAssumeCapacity(.{ .offset = offset, .length = single.length, @@ -4305,6 +4311,10 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr const physical_zerofill_size = math.cast(usize, linkedit.fileoff - physical_zerofill_start) orelse return error.Overflow; if (physical_zerofill_size > 0) { + log.debug("zeroing out zerofill area of length {x} at {x}", .{ + physical_zerofill_size, + physical_zerofill_start, + }); var padding = try zld.gpa.alloc(u8, physical_zerofill_size); defer zld.gpa.free(padding); mem.set(u8, padding, 0); |
