diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2022-07-04 20:40:10 +0200 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2022-07-22 16:58:20 +0200 |
| commit | 03feea0fb200f273dd74bf778997e6a6bead86cc (patch) | |
| tree | c439ed641e01ec4d860abb181ee585a2a287494c /src/link | |
| parent | d042b88c112aa919386bc76294225d4f7bd9a7b3 (diff) | |
| download | zig-03feea0fb200f273dd74bf778997e6a6bead86cc.tar.gz zig-03feea0fb200f273dd74bf778997e6a6bead86cc.zip | |
macho: split section into subsections if requested and/or possible
Diffstat (limited to 'src/link')
| -rw-r--r-- | src/link/MachO.zig | 291 | ||||
| -rw-r--r-- | src/link/MachO/Atom.zig | 64 | ||||
| -rw-r--r-- | src/link/MachO/Object.zig | 392 |
3 files changed, 514 insertions, 233 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index c5ed6cb6ac..38624ae152 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -57,6 +57,8 @@ const SystemLib = struct { weak: bool = false, }; +const N_DESC_GCED: u16 = @bitCast(u16, @as(i16, -1)); + base: File, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. @@ -256,6 +258,8 @@ unnamed_const_atoms: UnnamedConstTable = .{}, /// TODO consolidate this. decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?MatchingSection) = .{}, +gc_roots: std.AutoHashMapUnmanaged(*Atom, void) = .{}, + const Entry = struct { target: Atom.Relocation.Target, atom: *Atom, @@ -1165,6 +1169,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No const use_llvm = build_options.have_llvm and self.base.options.use_llvm; if (use_llvm or use_stage1) { + self.logAtoms(); + try self.gcAtoms(); try self.pruneAndSortSections(); try self.allocateSegments(); try self.allocateLocals(); @@ -1173,9 +1179,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.allocateSpecialSymbols(); try self.allocateGlobals(); - if (build_options.enable_logging) { + if (build_options.enable_logging or true) { self.logSymtab(); self.logSectionOrdinals(); + self.logAtoms(); } if (use_llvm or use_stage1) { @@ -2177,6 +2184,7 @@ pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: try atom.code.resize(self.base.allocator, size_usize); mem.set(u8, atom.code.items, 0); + try self.atom_by_index_table.putNoClobber(self.base.allocator, local_sym_index, atom); try self.managed_atoms.append(self.base.allocator, atom); return atom; } @@ -3298,12 +3306,7 @@ fn resolveDyldStubBinder(self: *MachO) !void { const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); atom_sym.n_value = vaddr; - } else { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].segment; - const sect = &seg.sections.items[self.got_section_index.?]; - sect.size += atom.size; - try self.addAtomToSection(atom, match); - } + } else try self.addAtomToSection(atom, match); atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); } @@ -3564,6 +3567,7 @@ pub fn deinit(self: *MachO) void { self.symbol_resolver.deinit(self.base.allocator); self.unresolved.deinit(self.base.allocator); self.tentatives.deinit(self.base.allocator); + self.gc_roots.deinit(self.base.allocator); for (self.objects.items) |*object| { object.deinit(self.base.allocator); @@ -3916,7 +3920,6 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu const required_alignment = typed_value.ty.abiAlignment(self.base.options.target); const local_sym_index = try self.allocateLocalSymbol(); const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), math.log2(required_alignment)); - try self.atom_by_index_table.putNoClobber(self.base.allocator, local_sym_index, atom); const res = try codegen.generateSymbol(&self.base, decl.srcLoc(), typed_value, &code_buffer, .none, .{ .parent_atom_index = local_sym_index, @@ -5597,7 +5600,7 @@ fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []* const old_idx = maybe_index.* orelse continue; const sect = sections[old_idx]; if (sect.size == 0) { - log.debug("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); + log.warn("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); maybe_index.* = null; seg.inner.cmdsize -= @sizeOf(macho.section_64); seg.inner.nsects -= 1; @@ -5630,7 +5633,7 @@ fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []* if (seg.inner.nsects == 0 and !mem.eql(u8, "__TEXT", seg.inner.segName())) { // Segment has now become empty, so mark it as such - log.debug("marking segment {s} as dead", .{seg.inner.segName()}); + log.warn("marking segment {s} as dead", .{seg.inner.segName()}); seg.inner.cmd = @intToEnum(macho.LC, 0); maybe_seg_id.* = null; } @@ -5712,6 +5715,189 @@ fn pruneAndSortSections(self: *MachO) !void { self.sections_order_dirty = false; } +fn gcAtoms(self: *MachO) !void { + const dead_strip = self.base.options.gc_sections orelse false; + if (!dead_strip) return; + + // Add all exports as GC roots + for (self.globals.items) |sym| { + if (sym.n_type == 0) continue; + const resolv = self.symbol_resolver.get(sym.n_strx).?; + assert(resolv.where == .global); + const gc_root = self.atom_by_index_table.get(resolv.local_sym_index) orelse { + log.warn("skipping {s}", .{self.getString(sym.n_strx)}); + continue; + }; + _ = try self.gc_roots.getOrPut(self.base.allocator, gc_root); + } + + // if (self.tlv_ptrs_section_index) |sect| { + // var atom = self.atoms.get(.{ + // .seg = self.data_segment_cmd_index.?, + // .sect = sect, + // }).?; + + // while (true) { + // _ = try self.gc_roots.getOrPut(self.base.allocator, atom); + + // if (atom.prev) |prev| { + // atom = prev; + // } else break; + // } + // } + + // Add any atom targeting an import as GC root + var atoms_it = self.atoms.iterator(); + while (atoms_it.next()) |entry| { + var atom = entry.value_ptr.*; + + while (true) { + for (atom.relocs.items) |rel| { + if ((try Atom.getTargetAtom(rel, self)) == null) switch (rel.target) { + .local => {}, + .global => |n_strx| { + const resolv = self.symbol_resolver.get(n_strx).?; + switch (resolv.where) { + .global => {}, + .undef => { + _ = try self.gc_roots.getOrPut(self.base.allocator, atom); + break; + }, + } + }, + }; + } + + if (atom.prev) |prev| { + atom = prev; + } else break; + } + } + + var stack = std.ArrayList(*Atom).init(self.base.allocator); + defer stack.deinit(); + try stack.ensureUnusedCapacity(self.gc_roots.count()); + + var retained = std.AutoHashMap(*Atom, void).init(self.base.allocator); + defer retained.deinit(); + try retained.ensureUnusedCapacity(self.gc_roots.count()); + + log.warn("GC roots:", .{}); + var gc_roots_it = self.gc_roots.keyIterator(); + while (gc_roots_it.next()) |gc_root| { + self.logAtom(gc_root.*); + + stack.appendAssumeCapacity(gc_root.*); + retained.putAssumeCapacityNoClobber(gc_root.*, {}); + } + + log.warn("walking tree...", .{}); + while (stack.popOrNull()) |source_atom| { + for (source_atom.relocs.items) |rel| { + if (try Atom.getTargetAtom(rel, self)) |target_atom| { + const gop = try retained.getOrPut(target_atom); + if (!gop.found_existing) { + log.warn(" RETAINED ATOM(%{d}) -> ATOM(%{d})", .{ + source_atom.local_sym_index, + target_atom.local_sym_index, + }); + try stack.append(target_atom); + } + } + } + } + + atoms_it = self.atoms.iterator(); + while (atoms_it.next()) |entry| { + const match = entry.key_ptr.*; + + if (self.text_segment_cmd_index) |seg| { + if (seg == match.seg) { + if (self.eh_frame_section_index) |sect| { + if (sect == match.sect) continue; + } + } + } + + if (self.data_segment_cmd_index) |seg| { + if (seg == match.seg) { + if (self.rustc_section_index) |sect| { + if (sect == match.sect) continue; + } + } + } + + const seg = &self.load_commands.items[match.seg].segment; + const sect = &seg.sections.items[match.sect]; + var atom = entry.value_ptr.*; + + log.warn("GCing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); + + while (true) { + const orig_prev = atom.prev; + + if (!retained.contains(atom)) { + // Dead atom; remove. + log.warn(" DEAD ATOM(%{d})", .{atom.local_sym_index}); + + const sym = &self.locals.items[atom.local_sym_index]; + sym.n_desc = N_DESC_GCED; + + if (self.symbol_resolver.getPtr(sym.n_strx)) |resolv| { + if (resolv.local_sym_index == atom.local_sym_index) { + const global = &self.globals.items[resolv.where_index]; + global.n_desc = N_DESC_GCED; + } + } + + for (self.got_entries.items) |got_entry| { + if (got_entry.atom == atom) { + _ = self.got_entries_table.swapRemove(got_entry.target); + break; + } + } + + for (self.stubs.items) |stub, i| { + if (stub == atom) { + _ = self.stubs_table.swapRemove(@intCast(u32, i)); + break; + } + } + + for (atom.contained.items) |sym_off| { + const inner = &self.locals.items[sym_off.local_sym_index]; + inner.n_desc = N_DESC_GCED; + + if (self.symbol_resolver.getPtr(inner.n_strx)) |resolv| { + if (resolv.local_sym_index == atom.local_sym_index) { + const global = &self.globals.items[resolv.where_index]; + global.n_desc = N_DESC_GCED; + } + } + } + + log.warn(" BEFORE size = {x}", .{sect.size}); + sect.size -= atom.size; + log.warn(" AFTER size = {x}", .{sect.size}); + if (atom.prev) |prev| { + prev.next = atom.next; + } + if (atom.next) |next| { + next.prev = atom.prev; + } else { + // TODO I think a null would be better here. + // The section will be GCed in the next step. + entry.value_ptr.* = if (atom.prev) |prev| prev else undefined; + } + } + + if (orig_prev) |prev| { + atom = prev; + } else break; + } + } +} + fn updateSectionOrdinals(self: *MachO) !void { if (!self.sections_order_dirty) return; @@ -5776,8 +5962,11 @@ fn writeDyldInfoData(self: *MachO) !void { } const seg = self.load_commands.items[match.seg].segment; + const sect = seg.sections.items[match.sect]; + log.warn("dyld info for {s},{s}", .{ sect.segName(), sect.sectName() }); while (true) { + log.warn(" ATOM %{d}", .{atom.local_sym_index}); const sym = self.locals.items[atom.local_sym_index]; const base_offset = sym.n_value - seg.inner.vmaddr; @@ -6217,10 +6406,19 @@ fn writeSymbolTable(self: *MachO) !void { for (self.locals.items) |sym| { if (sym.n_strx == 0) continue; + if (sym.n_desc == N_DESC_GCED) continue; if (self.symbol_resolver.get(sym.n_strx)) |_| continue; try locals.append(sym); } + var globals = std.ArrayList(macho.nlist_64).init(self.base.allocator); + defer globals.deinit(); + + for (self.globals.items) |sym| { + if (sym.n_desc == N_DESC_GCED) continue; + try globals.append(sym); + } + // TODO How do we handle null global symbols in incremental context? var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator); defer undefs.deinit(); @@ -6291,7 +6489,7 @@ fn writeSymbolTable(self: *MachO) !void { } const nlocals = locals.items.len; - const nexports = self.globals.items.len; + const nexports = globals.items.len; const nundefs = undefs.items.len; const locals_off = symtab.symoff; @@ -6302,7 +6500,7 @@ fn writeSymbolTable(self: *MachO) !void { const exports_off = locals_off + locals_size; const exports_size = nexports * @sizeOf(macho.nlist_64); log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(globals.items), exports_off); const undefs_off = exports_off + exports_size; const undefs_size = nundefs * @sizeOf(macho.nlist_64); @@ -6898,55 +7096,55 @@ fn snapshotState(self: *MachO) !void { } fn logSymtab(self: MachO) void { - log.debug("locals:", .{}); + log.warn("locals:", .{}); for (self.locals.items) |sym, id| { - log.debug(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); + log.warn(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); } - log.debug("globals:", .{}); + log.warn("globals:", .{}); for (self.globals.items) |sym, id| { - log.debug(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); + log.warn(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); } - log.debug("undefs:", .{}); + log.warn("undefs:", .{}); for (self.undefs.items) |sym, id| { - log.debug(" {d}: {s}: in {d}", .{ id, self.getString(sym.n_strx), sym.n_desc }); + log.warn(" {d}: {s}: in {d}", .{ id, self.getString(sym.n_strx), sym.n_desc }); } { - log.debug("resolver:", .{}); + log.warn("resolver:", .{}); var it = self.symbol_resolver.iterator(); while (it.next()) |entry| { - log.debug(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); + log.warn(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); } } - log.debug("GOT entries:", .{}); + log.warn("GOT entries:", .{}); for (self.got_entries_table.values()) |value| { const key = self.got_entries.items[value].target; const atom = self.got_entries.items[value].atom; const n_value = self.locals.items[atom.local_sym_index].n_value; switch (key) { - .local => |ndx| log.debug(" {d}: @{x}", .{ ndx, n_value }), - .global => |n_strx| log.debug(" {s}: @{x}", .{ self.getString(n_strx), n_value }), + .local => |ndx| log.warn(" {d}: @{x}", .{ ndx, n_value }), + .global => |n_strx| log.warn(" {s}: @{x}", .{ self.getString(n_strx), n_value }), } } - log.debug("__thread_ptrs entries:", .{}); + log.warn("__thread_ptrs entries:", .{}); for (self.tlv_ptr_entries_table.values()) |value| { const key = self.tlv_ptr_entries.items[value].target; const atom = self.tlv_ptr_entries.items[value].atom; const n_value = self.locals.items[atom.local_sym_index].n_value; assert(key == .global); - log.debug(" {s}: @{x}", .{ self.getString(key.global), n_value }); + log.warn(" {s}: @{x}", .{ self.getString(key.global), n_value }); } - log.debug("stubs:", .{}); + log.warn("stubs:", .{}); for (self.stubs_table.keys()) |key| { const value = self.stubs_table.get(key).?; const atom = self.stubs.items[value]; const sym = self.locals.items[atom.local_sym_index]; - log.debug(" {s}: @{x}", .{ self.getString(key), sym.n_value }); + log.warn(" {s}: @{x}", .{ self.getString(key), sym.n_value }); } } @@ -6964,6 +7162,45 @@ fn logSectionOrdinals(self: MachO) void { } } +fn logAtoms(self: MachO) void { + log.warn("atoms:", .{}); + var it = self.atoms.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var atom = entry.value_ptr.*; + + while (atom.prev) |prev| { + atom = prev; + } + + const seg = self.load_commands.items[match.seg].segment; + const sect = seg.sections.items[match.sect]; + log.warn("{s},{s}", .{ sect.segName(), sect.sectName() }); + + while (true) { + self.logAtom(atom); + + if (atom.next) |next| { + atom = next; + } else break; + } + } +} + +fn logAtom(self: MachO, atom: *const Atom) void { + const sym = self.locals.items[atom.local_sym_index]; + log.warn(" ATOM(%{d}) @ {x}", .{ atom.local_sym_index, sym.n_value }); + + for (atom.contained.items) |sym_off| { + const inner_sym = self.locals.items[sym_off.local_sym_index]; + log.warn(" %{d} ('{s}') @ {x}", .{ + sym_off.local_sym_index, + self.getString(inner_sym.n_strx), + inner_sym.n_value, + }); + } +} + /// Since `os.copy_file_range` cannot be used when copying overlapping ranges within the same file, /// and since `File.copyRangeAll` uses `os.copy_file_range` under-the-hood, we use heap allocated /// buffers on all hosts except Linux (if `copy_file_range` syscall is available). diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index d7c595dbba..e6adb0cc1c 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -236,6 +236,7 @@ pub fn freeListEligible(self: Atom, macho_file: MachO) bool { const RelocContext = struct { base_addr: u64 = 0, + base_offset: i32 = 0, allocator: Allocator, object: *Object, macho_file: *MachO, @@ -366,7 +367,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: ) orelse unreachable; break :target Relocation.Target{ .global = n_strx }; }; - const offset = @intCast(u32, rel.r_address); + const offset = @intCast(u32, rel.r_address - context.base_offset); switch (arch) { .aarch64 => { @@ -487,7 +488,7 @@ fn addPtrBindingOrRebase( .global => |n_strx| { try self.bindings.append(context.allocator, .{ .n_strx = n_strx, - .offset = @intCast(u32, rel.r_address), + .offset = @intCast(u32, rel.r_address - context.base_offset), }); }, .local => { @@ -529,7 +530,10 @@ fn addPtrBindingOrRebase( }; if (should_rebase) { - try self.rebases.append(context.allocator, @intCast(u32, rel.r_address)); + try self.rebases.append( + context.allocator, + @intCast(u32, rel.r_address - context.base_offset), + ); } }, } @@ -650,6 +654,60 @@ fn addStub(target: Relocation.Target, context: RelocContext) !void { context.macho_file.stubs.items[stub_index] = atom; } +pub fn getTargetAtom(rel: Relocation, macho_file: *MachO) !?*Atom { + const is_via_got = got: { + switch (macho_file.base.options.target.cpu.arch) { + .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) { + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_POINTER_TO_GOT, + => true, + else => false, + }, + .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { + .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, + else => false, + }, + else => unreachable, + } + }; + + if (is_via_got) { + const got_index = macho_file.got_entries_table.get(rel.target) orelse { + log.err("expected GOT entry for symbol", .{}); + switch (rel.target) { + .local => |sym_index| log.err(" local @{d}", .{sym_index}), + .global => |n_strx| log.err(" global @'{s}'", .{macho_file.getString(n_strx)}), + } + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }; + return macho_file.got_entries.items[got_index].atom; + } + + switch (rel.target) { + .local => |sym_index| { + return macho_file.atom_by_index_table.get(sym_index); + }, + .global => |n_strx| { + const resolv = macho_file.symbol_resolver.get(n_strx).?; + switch (resolv.where) { + .global => return macho_file.atom_by_index_table.get(resolv.local_sym_index), + .undef => { + if (macho_file.stubs_table.get(n_strx)) |stub_index| { + return macho_file.stubs.items[stub_index]; + } else { + if (macho_file.tlv_ptr_entries_table.get(rel.target)) |tlv_ptr_index| { + return macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom; + } + return null; + } + }, + } + }, + } +} + pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index f01f366fdd..305ae25791 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -176,6 +176,13 @@ pub fn free(self: *Object, allocator: Allocator, macho_file: *MachO) void { .n_desc = 0, .n_value = 0, }; + _ = macho_file.atom_by_index_table.remove(atom.local_sym_index); + _ = macho_file.gc_roots.remove(atom); + + for (atom.contained.items) |sym_off| { + _ = macho_file.atom_by_index_table.remove(sym_off.local_sym_index); + } + atom.local_sym_index = 0; } if (atom == last_atom) { @@ -346,7 +353,7 @@ const NlistWithIndex = struct { } } - fn filterInSection(symbols: []NlistWithIndex, sect: macho.section_64) []NlistWithIndex { + fn filterByAddress(symbols: []NlistWithIndex, start_addr: u64, end_addr: u64) []NlistWithIndex { const Predicate = struct { addr: u64, @@ -355,13 +362,36 @@ const NlistWithIndex = struct { } }; - const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); - const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); + const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ + .addr = start_addr, + }); + const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ + .addr = end_addr, + }); return symbols[start..end]; } }; +fn filterRelocs( + relocs: []const macho.relocation_info, + start_addr: u64, + end_addr: u64, +) []const macho.relocation_info { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; + } + }; + + const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); + const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); + + return relocs[start..end]; +} + fn filterDice( dices: []const macho.data_in_code_entry, start_addr: u64, @@ -422,16 +452,13 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! // We only care about defined symbols, so filter every other out. const sorted_nlists = sorted_all_nlists.items[0..iundefsym]; - const dead_strip = blk: { - const dead_strip = macho_file.base.options.gc_sections orelse break :blk false; - if (dead_strip or macho_file.base.options.optimize_mode != .Debug) - break :blk self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - break :blk false; - }; + const dead_strip = macho_file.base.options.gc_sections orelse false; + const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0 and + (macho_file.base.options.optimize_mode != .Debug or dead_strip); for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); - log.debug("putting section '{s},{s}' as an Atom", .{ sect.segName(), sect.sectName() }); + log.debug("parsing section '{s},{s}' into Atoms", .{ sect.segName(), sect.sectName() }); // Get matching segment/section in the final artifact. const match = (try macho_file.getMatchingSection(sect)) orelse { @@ -455,7 +482,11 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! ); // Symbols within this section only. - const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect); + const filtered_nlists = NlistWithIndex.filterByAddress( + sorted_nlists, + sect.addr, + sect.addr + sect.size, + ); macho_file.has_dices = macho_file.has_dices or blk: { if (self.text_section_index) |index| { @@ -467,204 +498,123 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! }; macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; - if (dead_strip) blk: { - if (filtered_nlists.len == 0) break :blk; // nothing to split - + if (subsections_via_symbols and filtered_nlists.len > 0) { // If the first nlist does not match the start of the section, // then we need to encapsulate the memory range [section start, first symbol) // as a temporary symbol and insert the matching Atom. const first_nlist = filtered_nlists[0].nlist; - if (first_nlist.n_value > sect.addr) {} - } - - // If there is no symbol to refer to this atom, we create - // a temp one, unless we already did that when working out the relocations - // of other atoms. - const local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const local_sym_index = @intCast(u32, macho_file.locals.items.len); - try macho_file.locals.append(allocator, .{ - .n_strx = 0, - .n_type = macho.N_SECT, - .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), - .n_desc = 0, - .n_value = sect.addr, - }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, local_sym_index); - break :blk local_sym_index; - }; - const atom = try self.parseIntoAtom( - allocator, - local_sym_index, - sect.size, - sect.@"align", - code, - relocs, - filtered_nlists, - match, - macho_file, - ); - - if (!self.start_atoms.contains(match)) { - try self.start_atoms.putNoClobber(allocator, match, atom); - } + if (first_nlist.n_value > sect.addr) { + const local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const local_sym_index = @intCast(u32, macho_file.locals.items.len); + try macho_file.locals.append(allocator, .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), + .n_desc = 0, + .n_value = sect.addr, + }); + try self.sections_as_symbols.putNoClobber(allocator, sect_id, local_sym_index); + break :blk local_sym_index; + }; + const atom_size = first_nlist.n_value - sect.addr; + const atom_code: ?[]const u8 = if (code) |cc| + cc[0..atom_size] + else + null; + try self.parseIntoAtom( + allocator, + local_sym_index, + atom_size, + sect.@"align", + atom_code, + relocs, + &.{}, + match, + sect, + macho_file, + ); + } - if (self.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; + var next_nlist_count: usize = 0; + while (next_nlist_count < filtered_nlists.len) { + const next_nlist = filtered_nlists[next_nlist_count]; + const addr = next_nlist.nlist.n_value; + const atom_nlists = NlistWithIndex.filterByAddress( + filtered_nlists[next_nlist_count..], + addr, + addr + 1, + ); + next_nlist_count += atom_nlists.len; + + const local_sym_index = @intCast(u32, macho_file.locals.items.len); + try macho_file.locals.append(allocator, .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), + .n_desc = 0, + .n_value = addr, + }); + + const atom_size = blk: { + const end_addr = if (next_nlist_count < filtered_nlists.len) + filtered_nlists[next_nlist_count].nlist.n_value + else + sect.addr + sect.size; + break :blk end_addr - addr; + }; + const atom_code: ?[]const u8 = if (code) |cc| + cc[addr - sect.addr ..][0..atom_size] + else + null; + const atom_align = if (addr > 0) + math.min(@ctz(u64, addr), sect.@"align") + else + sect.@"align"; + try self.parseIntoAtom( + allocator, + local_sym_index, + atom_size, + atom_align, + atom_code, + relocs, + atom_nlists, + match, + sect, + macho_file, + ); + } } else { - try self.end_atoms.putNoClobber(allocator, match, atom); + // If there is no symbol to refer to this atom, we create + // a temp one, unless we already did that when working out the relocations + // of other atoms. + const local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const local_sym_index = @intCast(u32, macho_file.locals.items.len); + try macho_file.locals.append(allocator, .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), + .n_desc = 0, + .n_value = sect.addr, + }); + try self.sections_as_symbols.putNoClobber(allocator, sect_id, local_sym_index); + break :blk local_sym_index; + }; + try self.parseIntoAtom( + allocator, + local_sym_index, + sect.size, + sect.@"align", + code, + relocs, + filtered_nlists, + match, + sect, + macho_file, + ); } - try self.contained_atoms.append(allocator, atom); } } -// const Context = struct { -// allocator: *Allocator, -// object: *Object, -// macho_file: *MachO, -// match: MachO.MatchingSection, -// }; - -// const AtomParser = struct { -// section: macho.section_64, -// code: []u8, -// relocs: []macho.relocation_info, -// nlists: []NlistWithIndex, -// index: u32 = 0, - -// fn peek(self: AtomParser) ?NlistWithIndex { -// return if (self.index + 1 < self.nlists.len) self.nlists[self.index + 1] else null; -// } - -// fn lessThanBySeniority(context: Context, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { -// if (!MachO.symbolIsExt(rhs.nlist)) { -// return MachO.symbolIsTemp(lhs.nlist, context.object.getString(lhs.nlist.n_strx)); -// } else if (MachO.symbolIsPext(rhs.nlist) or MachO.symbolIsWeakDef(rhs.nlist)) { -// return !MachO.symbolIsExt(lhs.nlist); -// } else { -// return false; -// } -// } - -// pub fn next(self: *AtomParser, context: Context) !?*Atom { -// if (self.index == self.nlists.len) return null; - -// const tracy = trace(@src()); -// defer tracy.end(); - -// var aliases = std.ArrayList(NlistWithIndex).init(context.allocator); -// defer aliases.deinit(); - -// const next_nlist: ?NlistWithIndex = blk: while (true) { -// const curr_nlist = self.nlists[self.index]; -// try aliases.append(curr_nlist); - -// if (self.peek()) |next_nlist| { -// if (curr_nlist.nlist.n_value == next_nlist.nlist.n_value) { -// self.index += 1; -// continue; -// } -// break :blk next_nlist; -// } -// break :blk null; -// } else null; - -// for (aliases.items) |*nlist_with_index| { -// nlist_with_index.index = context.object.symbol_mapping.get(nlist_with_index.index) orelse unreachable; -// } - -// if (aliases.items.len > 1) { -// // Bubble-up senior symbol as the main link to the atom. -// sort.sort( -// NlistWithIndex, -// aliases.items, -// context, -// AtomParser.lessThanBySeniority, -// ); -// } - -// const senior_nlist = aliases.pop(); -// const senior_sym = &context.macho_file.locals.items[senior_nlist.index]; -// senior_sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1); - -// const start_addr = senior_nlist.nlist.n_value - self.section.addr; -// const end_addr = if (next_nlist) |n| n.nlist.n_value - self.section.addr else self.section.size; - -// const code = self.code[start_addr..end_addr]; -// const size = code.len; - -// const max_align = self.section.@"align"; -// const actual_align = if (senior_nlist.nlist.n_value > 0) -// math.min(@ctz(u64, senior_nlist.nlist.n_value), max_align) -// else -// max_align; - -// const stab: ?Atom.Stab = if (context.object.debug_info) |di| blk: { -// // TODO there has to be a better to handle this. -// for (di.inner.func_list.items) |func| { -// if (func.pc_range) |range| { -// if (senior_nlist.nlist.n_value >= range.start and senior_nlist.nlist.n_value < range.end) { -// break :blk Atom.Stab{ -// .function = range.end - range.start, -// }; -// } -// } -// } -// // TODO -// // if (self.macho_file.globals.contains(self.macho_file.getString(senior_sym.strx))) break :blk .global; -// break :blk .static; -// } else null; - -// const atom = try context.macho_file.createEmptyAtom(senior_nlist.index, size, actual_align); -// atom.stab = stab; - -// const is_zerofill = blk: { -// const section_type = commands.sectionType(self.section); -// break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; -// }; -// if (!is_zerofill) { -// mem.copy(u8, atom.code.items, code); -// } - -// try atom.aliases.ensureTotalCapacity(context.allocator, aliases.items.len); -// for (aliases.items) |alias| { -// atom.aliases.appendAssumeCapacity(alias.index); -// const sym = &context.macho_file.locals.items[alias.index]; -// sym.n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(context.match).? + 1); -// } - -// try atom.parseRelocs(self.relocs, .{ -// .base_addr = self.section.addr, -// .base_offset = start_addr, -// .allocator = context.allocator, -// .object = context.object, -// .macho_file = context.macho_file, -// }); - -// if (context.macho_file.has_dices) { -// const dices = filterDice( -// context.object.data_in_code_entries.items, -// senior_nlist.nlist.n_value, -// senior_nlist.nlist.n_value + size, -// ); -// try atom.dices.ensureTotalCapacity(context.allocator, dices.len); - -// for (dices) |dice| { -// atom.dices.appendAssumeCapacity(.{ -// .offset = dice.offset - try math.cast(u32, senior_nlist.nlist.n_value), -// .length = dice.length, -// .kind = dice.kind, -// }); -// } -// } - -// self.index += 1; - -// return atom; -// } -// }; - fn parseIntoAtom( self: *Object, allocator: Allocator, @@ -675,8 +625,9 @@ fn parseIntoAtom( relocs: []const macho.relocation_info, nlists: []const NlistWithIndex, match: MatchingSection, + sect: macho.section_64, macho_file: *MachO, -) !*Atom { +) !void { const sym = macho_file.locals.items[local_sym_index]; const align_pow_2 = try math.powi(u32, 2, alignment); const aligned_size = mem.alignForwardGeneric(u64, size, align_pow_2); @@ -686,8 +637,11 @@ fn parseIntoAtom( mem.copy(u8, atom.code.items, cc); } - try atom.parseRelocs(relocs, .{ - .base_addr = sym.n_value, + const base_offset = sym.n_value - sect.addr; + const filtered_relocs = filterRelocs(relocs, base_offset, base_offset + size); + try atom.parseRelocs(filtered_relocs, .{ + .base_addr = sect.addr, + .base_offset = @intCast(i32, base_offset), .allocator = allocator, .object = self, .macho_file = macho_file, @@ -740,9 +694,41 @@ fn parseIntoAtom( .offset = nlist.n_value - sym.n_value, .stab = stab, }); + + try macho_file.atom_by_index_table.putNoClobber(allocator, sym_index, atom); } - return atom; + const is_gc_root = blk: { + if (sect.isDontDeadStrip()) break :blk true; + if (sect.isDontDeadStripIfReferencesLive()) { + // TODO if isDontDeadStripIfReferencesLive we should analyse the edges + // before making it a GC root + break :blk true; + } + if (mem.eql(u8, "__StaticInit", sect.sectName())) break :blk true; + switch (sect.type_()) { + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => break :blk true, + else => break :blk false, + } + }; + if (is_gc_root) { + try macho_file.gc_roots.putNoClobber(allocator, atom, {}); + } + + if (!self.start_atoms.contains(match)) { + try self.start_atoms.putNoClobber(allocator, match, atom); + } + + if (self.end_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } else { + try self.end_atoms.putNoClobber(allocator, match, atom); + } + try self.contained_atoms.append(allocator, atom); } fn parseSymtab(self: *Object) void { |
