diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2024-07-05 08:16:23 +0200 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2024-07-18 09:13:07 +0200 |
| commit | e2bfd6fc691a92f9dc36597a8febb03293b0f5ad (patch) | |
| tree | b10018453568aff9e88e7b0ffb065ada040abfb9 | |
| parent | 101299e85625faf29b4afce07ad1e3522ea75421 (diff) | |
| download | zig-e2bfd6fc691a92f9dc36597a8febb03293b0f5ad.tar.gz zig-e2bfd6fc691a92f9dc36597a8febb03293b0f5ad.zip | |
macho: revamp how we compute dyld relocs
| -rw-r--r-- | src/link/MachO.zig | 130 | ||||
| -rw-r--r-- | src/link/MachO/Atom.zig | 37 | ||||
| -rw-r--r-- | src/link/MachO/Object.zig | 1 | ||||
| -rw-r--r-- | src/link/MachO/ZigObject.zig | 1 | ||||
| -rw-r--r-- | src/link/MachO/dyld_info/Rebase.zig | 115 | ||||
| -rw-r--r-- | src/link/MachO/dyld_info/Trie.zig | 721 | ||||
| -rw-r--r-- | src/link/MachO/dyld_info/bind.zig | 265 | ||||
| -rw-r--r-- | src/link/MachO/synthetic.zig | 138 |
8 files changed, 628 insertions, 780 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index ed1a78c2ae..b557eb350e 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -82,11 +82,11 @@ stubs_helper: StubsHelperSection = .{}, objc_stubs: ObjcStubsSection = .{}, la_symbol_ptr: LaSymbolPtrSection = .{}, tlv_ptr: TlvPtrSection = .{}, -rebase: RebaseSection = .{}, -bind: BindSection = .{}, -weak_bind: WeakBindSection = .{}, -lazy_bind: LazyBindSection = .{}, -export_trie: ExportTrieSection = .{}, +rebase: Rebase = .{}, +bind: Bind = .{}, +weak_bind: WeakBind = .{}, +lazy_bind: LazyBind = .{}, +export_trie: ExportTrie = .{}, unwind_info: UnwindInfo = .{}, /// Tracked loadable segments during incremental linking. @@ -590,8 +590,6 @@ pub fn flushModule(self: *MachO, arena: Allocator, tid: Zcu.PerThread.Id, prog_n state_log.debug("{}", .{self.dumpState()}); } - try self.initDyldInfoSections(); - // Beyond this point, everything has been allocated a virtual address and we can resolve // the relocations, and commit objects to file. if (self.getZigObject()) |zo| { @@ -2500,87 +2498,6 @@ fn allocateLinkeditSegment(self: *MachO) !void { seg.fileoff = mem.alignForward(u64, fileoff, page_size); } -fn initDyldInfoSections(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const gpa = self.base.comp.gpa; - - if (self.zig_got_sect_index != null) try self.zig_got.addDyldRelocs(self); - if (self.got_sect_index != null) try self.got.addDyldRelocs(self); - if (self.tlv_ptr_sect_index != null) try self.tlv_ptr.addDyldRelocs(self); - if (self.la_symbol_ptr_sect_index != null) try self.la_symbol_ptr.addDyldRelocs(self); - try self.initExportTrie(); - - var objects = try std.ArrayList(File.Index).initCapacity(gpa, self.objects.items.len + 1); - defer objects.deinit(); - if (self.getZigObject()) |zo| objects.appendAssumeCapacity(zo.index); - objects.appendSliceAssumeCapacity(self.objects.items); - - var nrebases: usize = 0; - var nbinds: usize = 0; - var nweak_binds: usize = 0; - for (objects.items) |index| { - const ctx = switch (self.getFile(index).?) { - .zig_object => |x| x.dynamic_relocs, - .object => |x| x.dynamic_relocs, - else => unreachable, - }; - nrebases += ctx.rebase_relocs; - nbinds += ctx.bind_relocs; - nweak_binds += ctx.weak_bind_relocs; - } - if (self.getInternalObject()) |int| { - nrebases += int.num_rebase_relocs; - } - try self.rebase.entries.ensureUnusedCapacity(gpa, nrebases); - try self.bind.entries.ensureUnusedCapacity(gpa, nbinds); - try self.weak_bind.entries.ensureUnusedCapacity(gpa, nweak_binds); -} - -fn initExportTrie(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const gpa = self.base.comp.gpa; - try self.export_trie.init(gpa); - - const seg = self.getTextSegment(); - for (self.objects.items) |index| { - for (self.getFile(index).?.getSymbols()) |sym_index| { - const sym = self.getSymbol(sym_index); - if (!sym.flags.@"export") continue; - if (sym.getAtom(self)) |atom| if (!atom.flags.alive) continue; - if (sym.getFile(self).?.getIndex() != index) continue; - var flags: u64 = if (sym.flags.abs) - macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE - else if (sym.flags.tlv) - macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL - else - macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR; - if (sym.flags.weak) { - flags |= macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; - self.weak_defines = true; - self.binds_to_weak = true; - } - try self.export_trie.put(gpa, .{ - .name = sym.getName(self), - .vmaddr_offset = sym.getAddress(.{ .stubs = false }, self) - seg.vmaddr, - .export_flags = flags, - }); - } - } - - if (self.mh_execute_header_index) |index| { - const sym = self.getSymbol(index); - try self.export_trie.put(gpa, .{ - .name = sym.getName(self), - .vmaddr_offset = sym.getAddress(.{}, self) - seg.vmaddr, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } -} - fn writeAtoms(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); @@ -2659,13 +2576,13 @@ fn writeUnwindInfo(self: *MachO) !void { fn finalizeDyldInfoSections(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - const gpa = self.base.comp.gpa; - - try self.rebase.finalize(gpa); - try self.bind.finalize(gpa, self); - try self.weak_bind.finalize(gpa, self); - try self.lazy_bind.finalize(gpa, self); - try self.export_trie.finalize(gpa); + try self.rebase.updateSize(self); + try self.bind.updateSize(self); + try self.weak_bind.updateSize(self); + if (self.la_symbol_ptr_sect_index) |_| { + try self.lazy_bind.updateSize(self); + } + try self.export_trie.updateSize(self); } fn writeSyntheticSections(self: *MachO) !void { @@ -2742,25 +2659,14 @@ fn writeDyldInfoSections(self: *MachO, off: u32) !u32 { const gpa = self.base.comp.gpa; const cmd = &self.dyld_info_cmd; var needed_size: u32 = 0; - - cmd.rebase_off = needed_size; - cmd.rebase_size = mem.alignForward(u32, @intCast(self.rebase.size()), @alignOf(u64)); needed_size += cmd.rebase_size; - cmd.bind_off = needed_size; - cmd.bind_size = mem.alignForward(u32, @intCast(self.bind.size()), @alignOf(u64)); needed_size += cmd.bind_size; - cmd.weak_bind_off = needed_size; - cmd.weak_bind_size = mem.alignForward(u32, @intCast(self.weak_bind.size()), @alignOf(u64)); needed_size += cmd.weak_bind_size; - cmd.lazy_bind_off = needed_size; - cmd.lazy_bind_size = mem.alignForward(u32, @intCast(self.lazy_bind.size()), @alignOf(u64)); needed_size += cmd.lazy_bind_size; - cmd.export_off = needed_size; - cmd.export_size = mem.alignForward(u32, @intCast(self.export_trie.size), @alignOf(u64)); needed_size += cmd.export_size; const buffer = try gpa.alloc(u8, needed_size); @@ -2785,7 +2691,6 @@ fn writeDyldInfoSections(self: *MachO, off: u32) !u32 { cmd.weak_bind_off += off; cmd.lazy_bind_off += off; cmd.export_off += off; - try self.base.file.?.pwriteAll(buffer, off); return off + needed_size; @@ -4831,6 +4736,7 @@ const mem = std.mem; const meta = std.meta; const aarch64 = @import("../arch/aarch64/bits.zig"); +const bind = @import("MachO/dyld_info/bind.zig"); const calcUuid = @import("MachO/uuid.zig").calcUuid; const codegen = @import("../codegen.zig"); const dead_strip = @import("MachO/dead_strip.zig"); @@ -4851,13 +4757,13 @@ const Alignment = Atom.Alignment; const Allocator = mem.Allocator; const Archive = @import("MachO/Archive.zig"); pub const Atom = @import("MachO/Atom.zig"); -const BindSection = synthetic.BindSection; +const Bind = bind.Bind; const Cache = std.Build.Cache; const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); const Dylib = @import("MachO/Dylib.zig"); -const ExportTrieSection = synthetic.ExportTrieSection; +const ExportTrie = @import("MachO/dyld_info/Trie.zig"); const File = @import("MachO/file.zig").File; const GotSection = synthetic.GotSection; const Hash = std.hash.Wyhash; @@ -4865,7 +4771,7 @@ const Indsymtab = synthetic.Indsymtab; const InternalObject = @import("MachO/InternalObject.zig"); const ObjcStubsSection = synthetic.ObjcStubsSection; const Object = @import("MachO/Object.zig"); -const LazyBindSection = synthetic.LazyBindSection; +const LazyBind = bind.LazyBind; const LaSymbolPtrSection = synthetic.LaSymbolPtrSection; const LibStub = tapi.LibStub; const Liveness = @import("../Liveness.zig"); @@ -4875,7 +4781,7 @@ const Zcu = @import("../Zcu.zig"); /// Deprecated. const Module = Zcu; const InternPool = @import("../InternPool.zig"); -const RebaseSection = synthetic.RebaseSection; +const Rebase = @import("MachO/dyld_info/Rebase.zig"); pub const Relocation = @import("MachO/Relocation.zig"); const StringTable = @import("StringTable.zig"); const StubsSection = synthetic.StubsSection; @@ -4885,6 +4791,6 @@ const Thunk = thunks.Thunk; const TlvPtrSection = synthetic.TlvPtrSection; const Value = @import("../Value.zig"); const UnwindInfo = @import("MachO/UnwindInfo.zig"); -const WeakBindSection = synthetic.WeakBindSection; +const WeakBind = bind.WeakBind; const ZigGotSection = synthetic.ZigGotSection; const ZigObject = @import("MachO/ZigObject.zig"); diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index c0a53da7e4..c7a65843bb 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -460,11 +460,6 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { defer tracy.end(); assert(self.flags.alive); - const dynrel_ctx = switch (self.getFile(macho_file)) { - .zig_object => |x| &x.dynamic_relocs, - .object => |x| &x.dynamic_relocs, - else => unreachable, - }; const relocs = self.getRelocs(macho_file); for (relocs) |rel| { @@ -537,21 +532,15 @@ pub fn scanRelocs(self: Atom, macho_file: *MachO) !void { continue; } if (symbol.flags.import) { - dynrel_ctx.bind_relocs += 1; if (symbol.flags.weak) { - dynrel_ctx.weak_bind_relocs += 1; macho_file.binds_to_weak = true; } continue; } if (symbol.flags.@"export" and symbol.flags.weak) { - dynrel_ctx.weak_bind_relocs += 1; macho_file.binds_to_weak = true; - } else if (symbol.flags.interposable) { - dynrel_ctx.bind_relocs += 1; } } - dynrel_ctx.rebase_relocs += 1; } }, @@ -651,8 +640,6 @@ fn resolveRelocInner( ) ResolveError!void { const cpu_arch = macho_file.getTarget().cpu.arch; const rel_offset = math.cast(usize, rel.offset - self.off) orelse return error.Overflow; - const seg_id = macho_file.sections.items(.segment_id)[self.out_n_sect]; - const seg = macho_file.segments.items[seg_id]; const P = @as(i64, @intCast(self.getAddress(macho_file))) + @as(i64, @intCast(rel_offset)); const A = rel.addend + rel.getRelocAddend(cpu_arch); const S: i64 = @intCast(rel.getTargetAddress(macho_file)); @@ -706,29 +693,8 @@ fn resolveRelocInner( try writer.writeInt(u64, @intCast(S - TLS), .little); return; } - const entry = bind.Entry{ - .target = rel.target, - .offset = @as(u64, @intCast(P)) - seg.vmaddr, - .segment_id = seg_id, - .addend = A, - }; - if (sym.flags.import) { - macho_file.bind.entries.appendAssumeCapacity(entry); - if (sym.flags.weak) { - macho_file.weak_bind.entries.appendAssumeCapacity(entry); - } - return; - } - if (sym.flags.@"export" and sym.flags.weak) { - macho_file.weak_bind.entries.appendAssumeCapacity(entry); - } else if (sym.flags.interposable) { - macho_file.bind.entries.appendAssumeCapacity(entry); - } + if (sym.flags.import) return; } - macho_file.rebase.entries.appendAssumeCapacity(.{ - .offset = @as(u64, @intCast(P)) - seg.vmaddr, - .segment_id = seg_id, - }); try writer.writeInt(u64, @bitCast(S + A - SUB), .little); } else if (rel.meta.length == 2) { try writer.writeInt(u32, @bitCast(@as(i32, @truncate(S + A - SUB))), .little); @@ -1239,7 +1205,6 @@ pub const Alignment = @import("../../InternPool.zig").Alignment; const aarch64 = @import("../aarch64.zig"); const assert = std.debug.assert; -const bind = @import("dyld_info/bind.zig"); const macho = std.macho; const math = std.math; const mem = std.mem; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 8ecd88b413..30bdce7d9e 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -30,7 +30,6 @@ data_in_code: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, alive: bool = true, hidden: bool = false, -dynamic_relocs: MachO.DynamicRelocs = .{}, output_symtab_ctx: MachO.SymtabCtx = .{}, output_ar_state: Archive.ArState = .{}, diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index b59b6a6720..f731f36b7e 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -48,7 +48,6 @@ relocs: RelocationTable = .{}, dwarf: ?Dwarf = null, -dynamic_relocs: MachO.DynamicRelocs = .{}, output_symtab_ctx: MachO.SymtabCtx = .{}, output_ar_state: Archive.ArState = .{}, diff --git a/src/link/MachO/dyld_info/Rebase.zig b/src/link/MachO/dyld_info/Rebase.zig index f0121cf3dd..8348aa01f8 100644 --- a/src/link/MachO/dyld_info/Rebase.zig +++ b/src/link/MachO/dyld_info/Rebase.zig @@ -1,14 +1,3 @@ -const Rebase = @This(); - -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const log = std.log.scoped(.link_dyld_info); -const macho = std.macho; -const testing = std.testing; - -const Allocator = std.mem.Allocator; - entries: std.ArrayListUnmanaged(Entry) = .{}, buffer: std.ArrayListUnmanaged(u8) = .{}, @@ -30,11 +19,94 @@ pub fn deinit(rebase: *Rebase, gpa: Allocator) void { rebase.buffer.deinit(gpa); } -pub fn size(rebase: Rebase) u64 { - return @as(u64, @intCast(rebase.buffer.items.len)); +pub fn updateSize(rebase: *Rebase, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.comp.gpa; + + var objects = try std.ArrayList(File.Index).initCapacity(gpa, macho_file.objects.items.len + 1); + defer objects.deinit(); + objects.appendSliceAssumeCapacity(macho_file.objects.items); + if (macho_file.getInternalObject()) |obj| objects.appendAssumeCapacity(obj.index); + + for (objects.items) |index| { + const file = macho_file.getFile(index).?; + for (file.getAtoms()) |atom_index| { + const atom = macho_file.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + if (atom.getInputSection(macho_file).isZerofill()) continue; + const atom_addr = atom.getAddress(macho_file); + const seg_id = macho_file.sections.items(.segment_id)[atom.out_n_sect]; + const seg = macho_file.segments.items[seg_id]; + for (atom.getRelocs(macho_file)) |rel| { + if (rel.type != .unsigned or rel.meta.length != 3) continue; + if (rel.tag == .@"extern") { + const sym = rel.getTargetSymbol(macho_file); + if (sym.isTlvInit(macho_file)) continue; + if (sym.flags.import) continue; + } + const rel_offset = rel.offset - atom.off; + try rebase.entries.append(gpa, .{ + .offset = atom_addr + rel_offset - seg.vmaddr, + .segment_id = seg_id, + }); + } + } + } + + if (macho_file.got_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.got.symbols.items, 0..) |ref, idx| { + const sym = macho_file.getSymbol(ref); + const addr = macho_file.got.getAddress(@intCast(idx), macho_file); + if (!sym.flags.import) { + try rebase.entries.append(gpa, .{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }); + } + } + } + + if (macho_file.la_symbol_ptr_sect_index) |sid| { + const sect = macho_file.sections.items(.header)[sid]; + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.stubs.symbols.items, 0..) |ref, idx| { + const sym = macho_file.getSymbol(ref); + const addr = sect.addr + idx * @sizeOf(u64); + const rebase_entry = Rebase.Entry{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }; + if ((sym.flags.import and !sym.flags.weak) or !sym.flags.import) { + try rebase.entries.append(gpa, rebase_entry); + } + } + } + + if (macho_file.tlv_ptr_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.tlv_ptr.symbols.items, 0..) |ref, idx| { + const sym = macho_file.getSymbol(ref); + const addr = macho_file.tlv_ptr.getAddress(@intCast(idx), macho_file); + if (!sym.flags.import) { + try rebase.entries.append(gpa, .{ + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + }); + } + } + } + + try rebase.finalize(gpa); + macho_file.dyld_info_cmd.rebase_size = mem.alignForward(u32, @intCast(rebase.buffer.items.len), @alignOf(u64)); } -pub fn finalize(rebase: *Rebase, gpa: Allocator) !void { +fn finalize(rebase: *Rebase, gpa: Allocator) !void { if (rebase.entries.items.len == 0) return; const writer = rebase.buffer.writer(gpa); @@ -198,7 +270,6 @@ fn done(writer: anytype) !void { } pub fn write(rebase: Rebase, writer: anytype) !void { - if (rebase.size() == 0) return; try writer.writeAll(rebase.buffer.items); } @@ -574,3 +645,17 @@ test "rebase - composite" { macho.REBASE_OPCODE_DONE, }, rebase.buffer.items); } + +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.link_dyld_info); +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; +const trace = @import("../../../tracy.zig").trace; + +const Allocator = mem.Allocator; +const File = @import("../file.zig").File; +const MachO = @import("../../MachO.zig"); +const Rebase = @This(); diff --git a/src/link/MachO/dyld_info/Trie.zig b/src/link/MachO/dyld_info/Trie.zig index a6f717a043..aead1372f0 100644 --- a/src/link/MachO/dyld_info/Trie.zig +++ b/src/link/MachO/dyld_info/Trie.zig @@ -28,463 +28,312 @@ //! After the optional exported symbol information is a byte of how many edges (0-255) that //! this node has leaving it, followed by each edge. Each edge is a zero terminated UTF8 of //! the addition chars in the symbol, followed by a uleb128 offset for the node that edge points to. -const Trie = @This(); - -const std = @import("std"); -const mem = std.mem; -const leb = std.leb; -const log = std.log.scoped(.macho); -const macho = std.macho; -const testing = std.testing; -const assert = std.debug.assert; -const Allocator = mem.Allocator; - -pub const Node = struct { - base: *Trie, - - /// Terminal info associated with this node. - /// If this node is not a terminal node, info is null. - terminal_info: ?struct { - /// Export flags associated with this exported symbol. - export_flags: u64, - /// VM address offset wrt to the section this symbol is defined against. - vmaddr_offset: u64, - } = null, - - /// Offset of this node in the trie output byte stream. - trie_offset: ?u64 = null, - - /// List of all edges originating from this node. - edges: std.ArrayListUnmanaged(Edge) = .{}, - - node_dirty: bool = true, - - /// Edge connecting to nodes in the trie. - pub const Edge = struct { - from: *Node, - to: *Node, - label: []u8, - - fn deinit(self: *Edge, allocator: Allocator) void { - self.to.deinit(allocator); - allocator.destroy(self.to); - allocator.free(self.label); - self.from = undefined; - self.to = undefined; - self.label = undefined; - } - }; - - fn deinit(self: *Node, allocator: Allocator) void { - for (self.edges.items) |*edge| { - edge.deinit(allocator); - } - self.edges.deinit(allocator); - } - - /// Inserts a new node starting from `self`. - fn put(self: *Node, allocator: Allocator, label: []const u8) !*Node { - // Check for match with edges from this node. - for (self.edges.items) |*edge| { - const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.to; - if (match == 0) continue; - if (match == edge.label.len) return edge.to.put(allocator, label[match..]); - - // Found a match, need to splice up nodes. - // From: A -> B - // To: A -> C -> B - const mid = try allocator.create(Node); - mid.* = .{ .base = self.base }; - const to_label = try allocator.dupe(u8, edge.label[match..]); - allocator.free(edge.label); - const to_node = edge.to; - edge.to = mid; - edge.label = try allocator.dupe(u8, label[0..match]); - self.base.node_count += 1; - - try mid.edges.append(allocator, .{ - .from = mid, - .to = to_node, - .label = to_label, - }); - - return if (match == label.len) mid else mid.put(allocator, label[match..]); - } - // Add a new node. - const node = try allocator.create(Node); - node.* = .{ .base = self.base }; - self.base.node_count += 1; +/// The root node of the trie. +root: ?Node.Index = null, +buffer: std.ArrayListUnmanaged(u8) = .{}, +nodes: std.MultiArrayList(Node) = .{}, +edges: std.ArrayListUnmanaged(Edge) = .{}, - try self.edges.append(allocator, .{ - .from = self, - .to = node, - .label = try allocator.dupe(u8, label), - }); +/// Insert a symbol into the trie, updating the prefixes in the process. +/// This operation may change the layout of the trie by splicing edges in +/// certain circumstances. +fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void { + // const tracy = trace(@src()); + // defer tracy.end(); + + const node_index = try self.putNode(self.root.?, allocator, symbol.name); + const slice = self.nodes.slice(); + slice.items(.is_terminal)[node_index] = true; + slice.items(.vmaddr_offset)[node_index] = symbol.vmaddr_offset; + slice.items(.export_flags)[node_index] = symbol.export_flags; +} - return node; +/// Inserts a new node starting at `node_index`. +fn putNode(self: *Trie, node_index: Node.Index, allocator: Allocator, label: []const u8) !Node.Index { + // Check for match with edges from this node. + for (self.nodes.items(.edges)[node_index].items) |edge_index| { + const edge = &self.edges.items[edge_index]; + const match = mem.indexOfDiff(u8, edge.label, label) orelse return edge.node; + if (match == 0) continue; + if (match == edge.label.len) return self.putNode(edge.node, allocator, label[match..]); + + // Found a match, need to splice up nodes. + // From: A -> B + // To: A -> C -> B + const mid_index = try self.addNode(allocator); + const to_label = edge.label[match..]; + const to_node = edge.node; + edge.node = mid_index; + edge.label = label[0..match]; + + const new_edge_index = try self.addEdge(allocator); + const new_edge = &self.edges.items[new_edge_index]; + new_edge.node = to_node; + new_edge.label = to_label; + try self.nodes.items(.edges)[mid_index].append(allocator, new_edge_index); + + return if (match == label.len) mid_index else self.putNode(mid_index, allocator, label[match..]); } - /// Recursively parses the node from the input byte stream. - fn read(self: *Node, allocator: Allocator, reader: anytype) Trie.ReadError!usize { - self.node_dirty = true; - const trie_offset = try reader.context.getPos(); - self.trie_offset = trie_offset; - - var nread: usize = 0; - - const node_size = try leb.readUleb128(u64, reader); - if (node_size > 0) { - const export_flags = try leb.readUleb128(u64, reader); - // TODO Parse special flags. - assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and - export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); - - const vmaddr_offset = try leb.readUleb128(u64, reader); - - self.terminal_info = .{ - .export_flags = export_flags, - .vmaddr_offset = vmaddr_offset, - }; - } - - const nedges = try reader.readByte(); - self.base.node_count += nedges; - - nread += (try reader.context.getPos()) - trie_offset; + // Add a new node. + const new_node_index = try self.addNode(allocator); + const new_edge_index = try self.addEdge(allocator); + const new_edge = &self.edges.items[new_edge_index]; + new_edge.node = new_node_index; + new_edge.label = label; + try self.nodes.items(.edges)[node_index].append(allocator, new_edge_index); - var i: usize = 0; - while (i < nedges) : (i += 1) { - const edge_start_pos = try reader.context.getPos(); - - const label = blk: { - var label_buf = std.ArrayList(u8).init(allocator); - while (true) { - const next = try reader.readByte(); - if (next == @as(u8, 0)) - break; - try label_buf.append(next); - } - break :blk try label_buf.toOwnedSlice(); - }; - - const seek_to = try leb.readUleb128(u64, reader); - const return_pos = try reader.context.getPos(); - - nread += return_pos - edge_start_pos; - try reader.context.seekTo(seek_to); - - const node = try allocator.create(Node); - node.* = .{ .base = self.base }; + return new_node_index; +} - nread += try node.read(allocator, reader); - try self.edges.append(allocator, .{ - .from = self, - .to = node, - .label = label, +pub fn updateSize(self: *Trie, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.comp.gpa; + + try self.init(gpa); + // TODO + // try self.nodes.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2); + // try self.edges.ensureUnusedCapacity(gpa, macho_file.resolver.values.items.len * 2); + + const seg = macho_file.getTextSegment(); + for (macho_file.objects.items) |index| { + for (macho_file.getFile(index).?.getSymbols()) |ref| { + const sym = macho_file.getSymbol(ref); + if (!sym.flags.@"export") continue; + if (sym.getAtom(macho_file)) |atom| if (!atom.flags.alive) continue; + var flags: u64 = if (sym.flags.abs) + macho.EXPORT_SYMBOL_FLAGS_KIND_ABSOLUTE + else if (sym.flags.tlv) + macho.EXPORT_SYMBOL_FLAGS_KIND_THREAD_LOCAL + else + macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR; + if (sym.flags.weak) { + flags |= macho.EXPORT_SYMBOL_FLAGS_WEAK_DEFINITION; + macho_file.weak_defines = true; + macho_file.binds_to_weak = true; + } + try self.put(gpa, .{ + .name = sym.getName(macho_file), + .vmaddr_offset = sym.getAddress(.{ .stubs = false }, macho_file) - seg.vmaddr, + .export_flags = flags, }); - try reader.context.seekTo(return_pos); - } - - return nread; - } - - /// Writes this node to a byte stream. - /// The children of this node *are* not written to the byte stream - /// recursively. To write all nodes to a byte stream in sequence, - /// iterate over `Trie.ordered_nodes` and call this method on each node. - /// This is one of the requirements of the MachO. - /// Panics if `finalize` was not called before calling this method. - fn write(self: Node, writer: anytype) !void { - assert(!self.node_dirty); - if (self.terminal_info) |info| { - // Terminal node info: encode export flags and vmaddr offset of this symbol. - var info_buf: [@sizeOf(u64) * 2]u8 = undefined; - var info_stream = std.io.fixedBufferStream(&info_buf); - // TODO Implement for special flags. - assert(info.export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and - info.export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); - try leb.writeUleb128(info_stream.writer(), info.export_flags); - try leb.writeUleb128(info_stream.writer(), info.vmaddr_offset); - - // Encode the size of the terminal node info. - var size_buf: [@sizeOf(u64)]u8 = undefined; - var size_stream = std.io.fixedBufferStream(&size_buf); - try leb.writeUleb128(size_stream.writer(), info_stream.pos); - - // Now, write them to the output stream. - try writer.writeAll(size_buf[0..size_stream.pos]); - try writer.writeAll(info_buf[0..info_stream.pos]); - } else { - // Non-terminal node is delimited by 0 byte. - try writer.writeByte(0); - } - // Write number of edges (max legal number of edges is 256). - try writer.writeByte(@as(u8, @intCast(self.edges.items.len))); - - for (self.edges.items) |edge| { - // Write edge label and offset to next node in trie. - try writer.writeAll(edge.label); - try writer.writeByte(0); - try leb.writeUleb128(writer, edge.to.trie_offset.?); } } - const FinalizeResult = struct { - /// Current size of this node in bytes. - node_size: u64, - - /// True if the trie offset of this node in the output byte stream - /// would need updating; false otherwise. - updated: bool, - }; + try self.finalize(gpa); - /// Updates offset of this node in the output byte stream. - fn finalize(self: *Node, offset_in_trie: u64) !FinalizeResult { - var stream = std.io.countingWriter(std.io.null_writer); - const writer = stream.writer(); - - var node_size: u64 = 0; - if (self.terminal_info) |info| { - try leb.writeUleb128(writer, info.export_flags); - try leb.writeUleb128(writer, info.vmaddr_offset); - try leb.writeUleb128(writer, stream.bytes_written); - } else { - node_size += 1; // 0x0 for non-terminal nodes - } - node_size += 1; // 1 byte for edge count - - for (self.edges.items) |edge| { - const next_node_offset = edge.to.trie_offset orelse 0; - node_size += edge.label.len + 1; - try leb.writeUleb128(writer, next_node_offset); - } - - const trie_offset = self.trie_offset orelse 0; - const updated = offset_in_trie != trie_offset; - self.trie_offset = offset_in_trie; - self.node_dirty = false; - node_size += stream.bytes_written; - - return FinalizeResult{ .node_size = node_size, .updated = updated }; - } -}; - -/// The root node of the trie. -root: ?*Node = null, - -/// If you want to access nodes ordered in DFS fashion, -/// you should call `finalize` first since the nodes -/// in this container are not guaranteed to not be stale -/// if more insertions took place after the last `finalize` -/// call. -ordered_nodes: std.ArrayListUnmanaged(*Node) = .{}, - -/// The size of the trie in bytes. -/// This value may be outdated if there were additional -/// insertions performed after `finalize` was called. -/// Call `finalize` before accessing this value to ensure -/// it is up-to-date. -size: u64 = 0, - -/// Number of nodes currently in the trie. -node_count: usize = 0, - -trie_dirty: bool = true, - -/// Export symbol that is to be placed in the trie. -pub const ExportSymbol = struct { - /// Name of the symbol. - name: []const u8, - - /// Offset of this symbol's virtual memory address from the beginning - /// of the __TEXT segment. - vmaddr_offset: u64, - - /// Export flags of this exported symbol. - export_flags: u64, -}; - -/// Insert a symbol into the trie, updating the prefixes in the process. -/// This operation may change the layout of the trie by splicing edges in -/// certain circumstances. -pub fn put(self: *Trie, allocator: Allocator, symbol: ExportSymbol) !void { - const node = try self.root.?.put(allocator, symbol.name); - node.terminal_info = .{ - .vmaddr_offset = symbol.vmaddr_offset, - .export_flags = symbol.export_flags, - }; - self.trie_dirty = true; + macho_file.dyld_info_cmd.export_size = mem.alignForward(u32, @intCast(self.buffer.items.len), @alignOf(u64)); } /// Finalizes this trie for writing to a byte stream. /// This step performs multiple passes through the trie ensuring /// there are no gaps after every `Node` is ULEB128 encoded. /// Call this method before trying to `write` the trie to a byte stream. -pub fn finalize(self: *Trie, allocator: Allocator) !void { - if (!self.trie_dirty) return; +fn finalize(self: *Trie, allocator: Allocator) !void { + const tracy = trace(@src()); + defer tracy.end(); - self.ordered_nodes.shrinkRetainingCapacity(0); - try self.ordered_nodes.ensureTotalCapacity(allocator, self.node_count); + var ordered_nodes = std.ArrayList(Node.Index).init(allocator); + defer ordered_nodes.deinit(); + try ordered_nodes.ensureTotalCapacityPrecise(self.nodes.items(.is_terminal).len); - var fifo = std.fifo.LinearFifo(*Node, .Dynamic).init(allocator); + var fifo = std.fifo.LinearFifo(Node.Index, .Dynamic).init(allocator); defer fifo.deinit(); try fifo.writeItem(self.root.?); - while (fifo.readItem()) |next| { - for (next.edges.items) |*edge| { - try fifo.writeItem(edge.to); + while (fifo.readItem()) |next_index| { + const edges = &self.nodes.items(.edges)[next_index]; + for (edges.items) |edge_index| { + const edge = self.edges.items[edge_index]; + try fifo.writeItem(edge.node); } - self.ordered_nodes.appendAssumeCapacity(next); + ordered_nodes.appendAssumeCapacity(next_index); } var more: bool = true; + var size: u32 = 0; while (more) { - self.size = 0; + size = 0; more = false; - for (self.ordered_nodes.items) |node| { - const res = try node.finalize(self.size); - self.size += res.node_size; + for (ordered_nodes.items) |node_index| { + const res = try self.finalizeNode(node_index, size); + size += res.node_size; if (res.updated) more = true; } } - self.trie_dirty = false; + try self.buffer.ensureTotalCapacityPrecise(allocator, size); + for (ordered_nodes.items) |node_index| { + try self.writeNode(node_index, self.buffer.writer(allocator)); + } } -const ReadError = error{ - OutOfMemory, - EndOfStream, - Overflow, +const FinalizeNodeResult = struct { + /// Current size of this node in bytes. + node_size: u32, + + /// True if the trie offset of this node in the output byte stream + /// would need updating; false otherwise. + updated: bool, }; -/// Parse the trie from a byte stream. -pub fn read(self: *Trie, allocator: Allocator, reader: anytype) ReadError!usize { - return self.root.?.read(allocator, reader); -} +/// Updates offset of this node in the output byte stream. +fn finalizeNode(self: *Trie, node_index: Node.Index, offset_in_trie: u32) !FinalizeNodeResult { + var stream = std.io.countingWriter(std.io.null_writer); + const writer = stream.writer(); + const slice = self.nodes.slice(); + + var node_size: u32 = 0; + if (slice.items(.is_terminal)[node_index]) { + const export_flags = slice.items(.export_flags)[node_index]; + const vmaddr_offset = slice.items(.vmaddr_offset)[node_index]; + try leb.writeULEB128(writer, export_flags); + try leb.writeULEB128(writer, vmaddr_offset); + try leb.writeULEB128(writer, stream.bytes_written); + } else { + node_size += 1; // 0x0 for non-terminal nodes + } + node_size += 1; // 1 byte for edge count -/// Write the trie to a byte stream. -/// Panics if the trie was not finalized using `finalize` before calling this method. -pub fn write(self: Trie, writer: anytype) !void { - assert(!self.trie_dirty); - for (self.ordered_nodes.items) |node| { - try node.write(writer); + for (slice.items(.edges)[node_index].items) |edge_index| { + const edge = &self.edges.items[edge_index]; + const next_node_offset = slice.items(.trie_offset)[edge.node]; + node_size += @intCast(edge.label.len + 1); + try leb.writeULEB128(writer, next_node_offset); } + + const trie_offset = slice.items(.trie_offset)[node_index]; + const updated = offset_in_trie != trie_offset; + slice.items(.trie_offset)[node_index] = offset_in_trie; + node_size += @intCast(stream.bytes_written); + + return .{ .node_size = node_size, .updated = updated }; } -pub fn init(self: *Trie, allocator: Allocator) !void { +fn init(self: *Trie, allocator: Allocator) !void { assert(self.root == null); - const root = try allocator.create(Node); - root.* = .{ .base = self }; - self.root = root; - self.node_count += 1; + self.root = try self.addNode(allocator); } pub fn deinit(self: *Trie, allocator: Allocator) void { - if (self.root) |root| { - root.deinit(allocator); - allocator.destroy(root); + for (self.nodes.items(.edges)) |*edges| { + edges.deinit(allocator); } - self.ordered_nodes.deinit(allocator); + self.nodes.deinit(allocator); + self.edges.deinit(allocator); + self.buffer.deinit(allocator); } -test "Trie node count" { - const gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); +pub fn write(self: Trie, writer: anytype) !void { + if (self.buffer.items.len == 0) return; + try writer.writeAll(self.buffer.items); +} - try testing.expectEqual(@as(usize, 1), trie.node_count); - try testing.expect(trie.root != null); +/// Writes this node to a byte stream. +/// The children of this node *are* not written to the byte stream +/// recursively. To write all nodes to a byte stream in sequence, +/// iterate over `Trie.ordered_nodes` and call this method on each node. +/// This is one of the requirements of the MachO. +/// Panics if `finalize` was not called before calling this method. +fn writeNode(self: *Trie, node_index: Node.Index, writer: anytype) !void { + const slice = self.nodes.slice(); + const edges = slice.items(.edges)[node_index]; + const is_terminal = slice.items(.is_terminal)[node_index]; + const export_flags = slice.items(.export_flags)[node_index]; + const vmaddr_offset = slice.items(.vmaddr_offset)[node_index]; + + if (is_terminal) { + // Terminal node info: encode export flags and vmaddr offset of this symbol. + var info_buf: [@sizeOf(u64) * 2]u8 = undefined; + var info_stream = std.io.fixedBufferStream(&info_buf); + // TODO Implement for special flags. + assert(export_flags & macho.EXPORT_SYMBOL_FLAGS_REEXPORT == 0 and + export_flags & macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER == 0); + try leb.writeULEB128(info_stream.writer(), export_flags); + try leb.writeULEB128(info_stream.writer(), vmaddr_offset); + + // Encode the size of the terminal node info. + var size_buf: [@sizeOf(u64)]u8 = undefined; + var size_stream = std.io.fixedBufferStream(&size_buf); + try leb.writeULEB128(size_stream.writer(), info_stream.pos); + + // Now, write them to the output stream. + try writer.writeAll(size_buf[0..size_stream.pos]); + try writer.writeAll(info_buf[0..info_stream.pos]); + } else { + // Non-terminal node is delimited by 0 byte. + try writer.writeByte(0); + } + // Write number of edges (max legal number of edges is 256). + try writer.writeByte(@as(u8, @intCast(edges.items.len))); + + for (edges.items) |edge_index| { + const edge = self.edges.items[edge_index]; + // Write edge label and offset to next node in trie. + try writer.writeAll(edge.label); + try writer.writeByte(0); + try leb.writeULEB128(writer, slice.items(.trie_offset)[edge.node]); + } +} - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(@as(usize, 2), trie.node_count); +fn addNode(self: *Trie, allocator: Allocator) !Node.Index { + const index: Node.Index = @intCast(try self.nodes.addOne(allocator)); + self.nodes.set(index, .{}); + return index; +} - // Inserting the same node shouldn't update the trie. - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(@as(usize, 2), trie.node_count); +fn addEdge(self: *Trie, allocator: Allocator) !Edge.Index { + const index: Edge.Index = @intCast(self.edges.items.len); + const edge = try self.edges.addOne(allocator); + edge.* = .{}; + return index; +} - try trie.put(gpa, .{ - .name = "__mh_execute_header", - .vmaddr_offset = 0x1000, - .export_flags = 0, - }); - try testing.expectEqual(@as(usize, 4), trie.node_count); +/// Export symbol that is to be placed in the trie. +pub const ExportSymbol = struct { + /// Name of the symbol. + name: []const u8, - // Inserting the same node shouldn't update the trie. - try trie.put(gpa, .{ - .name = "__mh_execute_header", - .vmaddr_offset = 0x1000, - .export_flags = 0, - }); - try testing.expectEqual(@as(usize, 4), trie.node_count); - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expectEqual(@as(usize, 4), trie.node_count); -} + /// Offset of this symbol's virtual memory address from the beginning + /// of the __TEXT segment. + vmaddr_offset: u64, -test "Trie basic" { - const gpa = testing.allocator; - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); + /// Export flags of this exported symbol. + export_flags: u64, +}; - // root --- _st ---> node - try trie.put(gpa, .{ - .name = "_st", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); - try testing.expect(mem.eql(u8, trie.root.?.edges.items[0].label, "_st")); - - { - // root --- _st ---> node --- art ---> node - try trie.put(gpa, .{ - .name = "_start", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); - - const nextEdge = &trie.root.?.edges.items[0]; - try testing.expect(mem.eql(u8, nextEdge.label, "_st")); - try testing.expect(nextEdge.to.edges.items.len == 1); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "art")); - } - { - // root --- _ ---> node --- st ---> node --- art ---> node - // | - // | --- main ---> node - try trie.put(gpa, .{ - .name = "_main", - .vmaddr_offset = 0, - .export_flags = 0, - }); - try testing.expect(trie.root.?.edges.items.len == 1); - - const nextEdge = &trie.root.?.edges.items[0]; - try testing.expect(mem.eql(u8, nextEdge.label, "_")); - try testing.expect(nextEdge.to.edges.items.len == 2); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[0].label, "st")); - try testing.expect(mem.eql(u8, nextEdge.to.edges.items[1].label, "main")); - - const nextNextEdge = &nextEdge.to.edges.items[0]; - try testing.expect(mem.eql(u8, nextNextEdge.to.edges.items[0].label, "art")); - } -} +const Node = struct { + is_terminal: bool = false, + + /// Export flags associated with this exported symbol. + export_flags: u64 = 0, + + /// VM address offset wrt to the section this symbol is defined against. + vmaddr_offset: u64 = 0, + + /// Offset of this node in the trie output byte stream. + trie_offset: u32 = 0, + + /// List of all edges originating from this node. + edges: std.ArrayListUnmanaged(Edge.Index) = .{}, + + const Index = u32; +}; + +/// Edge connecting nodes in the trie. +const Edge = struct { + /// Target node in the trie. + node: Node.Index = 0, + + /// Matching prefix. + label: []const u8 = "", + + const Index = u32; +}; fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void { assert(expected.len > 0); @@ -502,7 +351,7 @@ fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void { } test "write Trie to a byte stream" { - var gpa = testing.allocator; + const gpa = testing.allocator; var trie: Trie = .{}; defer trie.deinit(gpa); try trie.init(gpa); @@ -519,7 +368,6 @@ test "write Trie to a byte stream" { }); try trie.finalize(gpa); - try trie.finalize(gpa); // Finalizing mulitple times is a nop subsequently unless we add new nodes. const exp_buffer = [_]u8{ 0x0, 0x1, // node root @@ -531,51 +379,7 @@ test "write Trie to a byte stream" { 0x2, 0x0, 0x0, 0x0, // terminal node 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node }; - - const buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - { - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); - } - { - // Writing finalized trie again should yield the same result. - try stream.seekTo(0); - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); - } -} - -test "parse Trie from byte stream" { - const gpa = testing.allocator; - - const in_buffer = [_]u8{ - 0x0, 0x1, // node root - 0x5f, 0x0, 0x5, // edge '_' - 0x0, 0x2, // non-terminal node - 0x5f, 0x6d, 0x68, 0x5f, 0x65, 0x78, 0x65, 0x63, 0x75, 0x74, // edge '_mh_execute_header' - 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x0, 0x21, // edge '_mh_execute_header' - 0x6d, 0x61, 0x69, 0x6e, 0x0, 0x25, // edge 'main' - 0x2, 0x0, 0x0, 0x0, // terminal node - 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node - }; - - var in_stream = std.io.fixedBufferStream(&in_buffer); - var trie: Trie = .{}; - defer trie.deinit(gpa); - try trie.init(gpa); - const nread = try trie.read(gpa, in_stream.reader()); - - try testing.expect(nread == in_buffer.len); - - try trie.finalize(gpa); - - const out_buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(out_buffer); - var out_stream = std.io.fixedBufferStream(out_buffer); - _ = try trie.write(out_stream.writer()); - try expectEqualHexStrings(&in_buffer, out_buffer); + try expectEqualHexStrings(&exp_buffer, trie.buffer.items); } test "ordering bug" { @@ -602,11 +406,18 @@ test "ordering bug" { 0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72, 0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00, }; - - const buffer = try gpa.alloc(u8, trie.size); - defer gpa.free(buffer); - var stream = std.io.fixedBufferStream(buffer); - // Writing finalized trie again should yield the same result. - _ = try trie.write(stream.writer()); - try expectEqualHexStrings(&exp_buffer, buffer); + try expectEqualHexStrings(&exp_buffer, trie.buffer.items); } + +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.macho); +const macho = std.macho; +const mem = std.mem; +const std = @import("std"); +const testing = std.testing; +const trace = @import("../../../tracy.zig").trace; + +const Allocator = mem.Allocator; +const MachO = @import("../../MachO.zig"); +const Trie = @This(); diff --git a/src/link/MachO/dyld_info/bind.zig b/src/link/MachO/dyld_info/bind.zig index 15d3df76e5..28e8b1d7d2 100644 --- a/src/link/MachO/dyld_info/bind.zig +++ b/src/link/MachO/dyld_info/bind.zig @@ -1,14 +1,3 @@ -const std = @import("std"); -const assert = std.debug.assert; -const leb = std.leb; -const log = std.log.scoped(.link_dyld_info); -const macho = std.macho; -const testing = std.testing; - -const Allocator = std.mem.Allocator; -const MachO = @import("../../MachO.zig"); -const Symbol = @import("../Symbol.zig"); - pub const Entry = struct { target: Symbol.Index, offset: u64, @@ -39,11 +28,108 @@ pub const Bind = struct { self.buffer.deinit(gpa); } - pub fn size(self: Self) u64 { - return @intCast(self.buffer.items.len); + pub fn updateSize(self: *Self, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.comp.gpa; + const cpu_arch = macho_file.getTarget().cpu.arch; + + var objects = try std.ArrayList(File.Index).initCapacity(gpa, macho_file.objects.items.len + 1); + defer objects.deinit(); + objects.appendSliceAssumeCapacity(macho_file.objects.items); + if (macho_file.getInternalObject()) |obj| objects.appendAssumeCapacity(obj.index); + + for (objects.items) |index| { + const file = macho_file.getFile(index).?; + for (file.getAtoms()) |atom_index| { + const atom = macho_file.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + if (atom.getInputSection(macho_file).isZerofill()) continue; + const atom_addr = atom.getAddress(macho_file); + const relocs = atom.getRelocs(macho_file); + const seg_id = macho_file.sections.items(.segment_id)[atom.out_n_sect]; + const seg = macho_file.segments.items[seg_id]; + for (relocs) |rel| { + if (rel.type != .unsigned or rel.meta.length != 3 or rel.tag != .@"extern") continue; + const rel_offset = rel.offset - atom.off; + const addend = rel.addend + rel.getRelocAddend(cpu_arch); + const sym = rel.getTargetSymbol(macho_file); + if (sym.isTlvInit(macho_file)) continue; + const entry = Entry{ + .target = rel.target, + .offset = atom_addr + rel_offset - seg.vmaddr, + .segment_id = seg_id, + .addend = addend, + }; + if (sym.flags.import or (!(sym.flags.@"export" and sym.flags.weak) and sym.flags.interposable)) { + try self.entries.append(gpa, entry); + } + } + } + } + + if (macho_file.got_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.got.symbols.items, 0..) |ref, idx| { + const sym = macho_file.getSymbol(ref); + const addr = macho_file.got.getAddress(@intCast(idx), macho_file); + const entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import or (sym.flags.@"export" and sym.flags.interposable and !sym.flags.weak)) { + try self.entries.append(gpa, entry); + } + } + } + + if (macho_file.la_symbol_ptr_sect_index) |sid| { + const sect = macho_file.sections.items(.header)[sid]; + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.stubs.symbols.items, 0..) |ref, idx| { + const sym = macho_file.getSymbol(ref); + const addr = sect.addr + idx * @sizeOf(u64); + const bind_entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import and sym.flags.weak) { + try self.entries.append(gpa, bind_entry); + } + } + } + + if (macho_file.tlv_ptr_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + + for (macho_file.tlv_ptr.symbols.items, 0..) |ref, idx| { + const sym = macho_file.getSymbol(ref); + const addr = macho_file.tlv_ptr.getAddress(@intCast(idx), macho_file); + const entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.import or (sym.flags.@"export" and sym.flags.interposable and !sym.flags.weak)) { + try self.entries.append(gpa, entry); + } + } + } + + try self.finalize(gpa, macho_file); + macho_file.dyld_info_cmd.bind_size = mem.alignForward(u32, @intCast(self.buffer.items.len), @alignOf(u64)); } - pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { if (self.entries.items.len == 0) return; const writer = self.buffer.writer(gpa); @@ -178,7 +264,6 @@ pub const Bind = struct { } pub fn write(self: Self, writer: anytype) !void { - if (self.size() == 0) return; try writer.writeAll(self.buffer.items); } }; @@ -194,11 +279,109 @@ pub const WeakBind = struct { self.buffer.deinit(gpa); } - pub fn size(self: Self) u64 { - return @intCast(self.buffer.items.len); + pub fn updateSize(self: *Self, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.comp.gpa; + const cpu_arch = macho_file.getTarget().cpu.arch; + + var objects = try std.ArrayList(File.Index).initCapacity(gpa, macho_file.objects.items.len + 1); + defer objects.deinit(); + objects.appendSliceAssumeCapacity(macho_file.objects.items); + if (macho_file.getInternalObject()) |obj| objects.appendAssumeCapacity(obj.index); + + for (objects.items) |index| { + const file = macho_file.getFile(index).?; + for (file.getAtoms()) |atom_index| { + const atom = macho_file.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + if (atom.getInputSection(macho_file).isZerofill()) continue; + const atom_addr = atom.getAddress(macho_file); + const relocs = atom.getRelocs(macho_file); + const seg_id = macho_file.sections.items(.segment_id)[atom.out_n_sect]; + const seg = macho_file.segments.items[seg_id]; + for (relocs) |rel| { + if (rel.type != .unsigned or rel.meta.length != 3 or rel.tag != .@"extern") continue; + const rel_offset = rel.offset - atom.off; + const addend = rel.addend + rel.getRelocAddend(cpu_arch); + const sym = rel.getTargetSymbol(macho_file); + if (sym.isTlvInit(macho_file)) continue; + const entry = Entry{ + .target = rel.target, + .offset = atom_addr + rel_offset - seg.vmaddr, + .segment_id = seg_id, + .addend = addend, + }; + if (!sym.isLocal() and sym.flags.weak) { + try self.entries.append(gpa, entry); + } + } + } + } + + if (macho_file.got_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + for (macho_file.got.symbols.items, 0..) |ref, idx| { + const sym = macho_file.getSymbol(ref); + const addr = macho_file.got.getAddress(@intCast(idx), macho_file); + const entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.weak) { + try self.entries.append(gpa, entry); + } + } + } + + if (macho_file.la_symbol_ptr_sect_index) |sid| { + const sect = macho_file.sections.items(.header)[sid]; + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + + for (macho_file.stubs.symbols.items, 0..) |ref, idx| { + const sym = macho_file.getSymbol(ref); + const addr = sect.addr + idx * @sizeOf(u64); + const bind_entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.weak) { + try self.entries.append(gpa, bind_entry); + } + } + } + + if (macho_file.tlv_ptr_sect_index) |sid| { + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + + for (macho_file.tlv_ptr.symbols.items, 0..) |ref, idx| { + const sym = macho_file.getSymbol(ref); + const addr = macho_file.tlv_ptr.getAddress(@intCast(idx), macho_file); + const entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if (sym.flags.weak) { + try self.entries.append(gpa, entry); + } + } + } + + try self.finalize(gpa, macho_file); + macho_file.dyld_info_cmd.weak_bind_size = mem.alignForward(u32, @intCast(self.buffer.items.len), @alignOf(u64)); } - pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { if (self.entries.items.len == 0) return; const writer = self.buffer.writer(gpa); @@ -322,7 +505,6 @@ pub const WeakBind = struct { } pub fn write(self: Self, writer: anytype) !void { - if (self.size() == 0) return; try writer.writeAll(self.buffer.items); } }; @@ -340,11 +522,36 @@ pub const LazyBind = struct { self.offsets.deinit(gpa); } - pub fn size(self: Self) u64 { - return @intCast(self.buffer.items.len); + pub fn updateSize(self: *Self, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.comp.gpa; + + const sid = macho_file.la_symbol_ptr_sect_index.?; + const sect = macho_file.sections.items(.header)[sid]; + const seg_id = macho_file.sections.items(.segment_id)[sid]; + const seg = macho_file.segments.items[seg_id]; + + for (macho_file.stubs.symbols.items, 0..) |ref, idx| { + const sym = macho_file.getSymbol(ref); + const addr = sect.addr + idx * @sizeOf(u64); + const bind_entry = Entry{ + .target = ref, + .offset = addr - seg.vmaddr, + .segment_id = seg_id, + .addend = 0, + }; + if ((sym.flags.import and !sym.flags.weak) or (sym.flags.interposable and !sym.flags.weak)) { + try self.entries.append(gpa, bind_entry); + } + } + + try self.finalize(gpa, macho_file); + macho_file.dyld_info_cmd.lazy_bind_size = mem.alignForward(u32, @intCast(self.buffer.items.len), @alignOf(u64)); } - pub fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { + fn finalize(self: *Self, gpa: Allocator, ctx: *MachO) !void { try self.offsets.ensureTotalCapacityPrecise(gpa, self.entries.items.len); const writer = self.buffer.writer(gpa); @@ -474,3 +681,17 @@ fn done(writer: anytype) !void { log.debug(">>> done", .{}); try writer.writeByte(macho.BIND_OPCODE_DONE); } + +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.link_dyld_info); +const macho = std.macho; +const mem = std.mem; +const testing = std.testing; +const trace = @import("../../../tracy.zig").trace; +const std = @import("std"); + +const Allocator = mem.Allocator; +const File = @import("../file.zig").File; +const MachO = @import("../../MachO.zig"); +const Symbol = @import("../Symbol.zig"); diff --git a/src/link/MachO/synthetic.zig b/src/link/MachO/synthetic.zig index a7729b9137..4cac061d93 100644 --- a/src/link/MachO/synthetic.zig +++ b/src/link/MachO/synthetic.zig @@ -70,22 +70,6 @@ pub const ZigGotSection = struct { } } - pub fn addDyldRelocs(zig_got: ZigGotSection, macho_file: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - const gpa = macho_file.base.comp.gpa; - const seg_id = macho_file.sections.items(.segment_id)[macho_file.zig_got_sect_index.?]; - const seg = macho_file.segments.items[seg_id]; - - for (0..zig_got.entries.items.len) |idx| { - const addr = zig_got.entryAddress(@intCast(idx), macho_file); - try macho_file.rebase.entries.append(gpa, .{ - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - }); - } - } - const FormatCtx = struct { zig_got: ZigGotSection, macho_file: *MachO, @@ -146,41 +130,6 @@ pub const GotSection = struct { return got.symbols.items.len * @sizeOf(u64); } - pub fn addDyldRelocs(got: GotSection, macho_file: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - const gpa = macho_file.base.comp.gpa; - const seg_id = macho_file.sections.items(.segment_id)[macho_file.got_sect_index.?]; - const seg = macho_file.segments.items[seg_id]; - - for (got.symbols.items, 0..) |sym_index, idx| { - const sym = macho_file.getSymbol(sym_index); - const addr = got.getAddress(@intCast(idx), macho_file); - const entry = bind.Entry{ - .target = sym_index, - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - .addend = 0, - }; - if (sym.flags.import) { - try macho_file.bind.entries.append(gpa, entry); - if (sym.flags.weak) { - try macho_file.weak_bind.entries.append(gpa, entry); - } - } else { - try macho_file.rebase.entries.append(gpa, .{ - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - }); - if (sym.flags.weak) { - try macho_file.weak_bind.entries.append(gpa, entry); - } else if (sym.flags.interposable) { - try macho_file.bind.entries.append(gpa, entry); - } - } - } - } - pub fn write(got: GotSection, macho_file: *MachO, writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -446,49 +395,6 @@ pub const LaSymbolPtrSection = struct { return macho_file.stubs.symbols.items.len * @sizeOf(u64); } - pub fn addDyldRelocs(laptr: LaSymbolPtrSection, macho_file: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - _ = laptr; - const gpa = macho_file.base.comp.gpa; - - const sect = macho_file.sections.items(.header)[macho_file.la_symbol_ptr_sect_index.?]; - const seg_id = macho_file.sections.items(.segment_id)[macho_file.la_symbol_ptr_sect_index.?]; - const seg = macho_file.segments.items[seg_id]; - - for (macho_file.stubs.symbols.items, 0..) |sym_index, idx| { - const sym = macho_file.getSymbol(sym_index); - const addr = sect.addr + idx * @sizeOf(u64); - const rebase_entry = Rebase.Entry{ - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - }; - const bind_entry = bind.Entry{ - .target = sym_index, - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - .addend = 0, - }; - if (sym.flags.import) { - if (sym.flags.weak) { - try macho_file.bind.entries.append(gpa, bind_entry); - try macho_file.weak_bind.entries.append(gpa, bind_entry); - } else { - try macho_file.lazy_bind.entries.append(gpa, bind_entry); - try macho_file.rebase.entries.append(gpa, rebase_entry); - } - } else { - if (sym.flags.weak) { - try macho_file.rebase.entries.append(gpa, rebase_entry); - try macho_file.weak_bind.entries.append(gpa, bind_entry); - } else if (sym.flags.interposable) { - try macho_file.lazy_bind.entries.append(gpa, bind_entry); - try macho_file.rebase.entries.append(gpa, rebase_entry); - } - } - } - } - pub fn write(laptr: LaSymbolPtrSection, macho_file: *MachO, writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -539,41 +445,6 @@ pub const TlvPtrSection = struct { return tlv.symbols.items.len * @sizeOf(u64); } - pub fn addDyldRelocs(tlv: TlvPtrSection, macho_file: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - const gpa = macho_file.base.comp.gpa; - const seg_id = macho_file.sections.items(.segment_id)[macho_file.tlv_ptr_sect_index.?]; - const seg = macho_file.segments.items[seg_id]; - - for (tlv.symbols.items, 0..) |sym_index, idx| { - const sym = macho_file.getSymbol(sym_index); - const addr = tlv.getAddress(@intCast(idx), macho_file); - const entry = bind.Entry{ - .target = sym_index, - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - .addend = 0, - }; - if (sym.flags.import) { - try macho_file.bind.entries.append(gpa, entry); - if (sym.flags.weak) { - try macho_file.weak_bind.entries.append(gpa, entry); - } - } else { - try macho_file.rebase.entries.append(gpa, .{ - .offset = addr - seg.vmaddr, - .segment_id = seg_id, - }); - if (sym.flags.weak) { - try macho_file.weak_bind.entries.append(gpa, entry); - } else if (sym.flags.interposable) { - try macho_file.bind.entries.append(gpa, entry); - } - } - } - } - pub fn write(tlv: TlvPtrSection, macho_file: *MachO, writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -772,21 +643,12 @@ pub const Indsymtab = struct { } }; -pub const RebaseSection = Rebase; -pub const BindSection = bind.Bind; -pub const WeakBindSection = bind.WeakBind; -pub const LazyBindSection = bind.LazyBind; -pub const ExportTrieSection = Trie; - const aarch64 = @import("../aarch64.zig"); const assert = std.debug.assert; -const bind = @import("dyld_info/bind.zig"); const math = std.math; const std = @import("std"); const trace = @import("../../tracy.zig").trace; const Allocator = std.mem.Allocator; const MachO = @import("../MachO.zig"); -const Rebase = @import("dyld_info/Rebase.zig"); const Symbol = @import("Symbol.zig"); -const Trie = @import("dyld_info/Trie.zig"); |
