diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2022-07-23 00:01:09 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-07-23 00:01:09 -0700 |
| commit | a8bfddfaeae4f48c044fd134aac1e977e6a161f8 (patch) | |
| tree | 4b1b000767ba641f5ca7f7c40aa17e29991e9114 /src | |
| parent | a035d75a1750e59e43bb9122f33d8586ed1ee385 (diff) | |
| parent | cf6cfc830db89e0031200d1a16c93eb7801cb911 (diff) | |
| download | zig-a8bfddfaeae4f48c044fd134aac1e977e6a161f8.tar.gz zig-a8bfddfaeae4f48c044fd134aac1e977e6a161f8.zip | |
Merge pull request #12140 from ziglang/macho-gc-sections
macho: add support for `-dead_strip` (GC sections) and simplify symbol resolution
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/aarch64/CodeGen.zig | 18 | ||||
| -rw-r--r-- | src/arch/aarch64/Emit.zig | 13 | ||||
| -rw-r--r-- | src/arch/aarch64/Mir.zig | 8 | ||||
| -rw-r--r-- | src/arch/riscv64/CodeGen.zig | 2 | ||||
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 18 | ||||
| -rw-r--r-- | src/arch/x86_64/Emit.zig | 17 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 17 | ||||
| -rw-r--r-- | src/link.zig | 7 | ||||
| -rw-r--r-- | src/link/MachO.zig | 4158 | ||||
| -rw-r--r-- | src/link/MachO/Atom.zig | 668 | ||||
| -rw-r--r-- | src/link/MachO/DebugSymbols.zig | 65 | ||||
| -rw-r--r-- | src/link/MachO/Object.zig | 851 | ||||
| -rw-r--r-- | src/link/MachO/dead_strip.zig | 292 | ||||
| -rw-r--r-- | src/link/strtab.zig | 113 | ||||
| -rw-r--r-- | src/main.zig | 11 |
15 files changed, 3395 insertions, 2863 deletions
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 64d49f2508..ba7c56e2bd 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -3174,7 +3174,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. const func = func_payload.data; const fn_owner_decl = mod.declPtr(func.owner_decl); try self.genSetReg(Type.initTag(.u64), .x30, .{ - .got_load = fn_owner_decl.link.macho.local_sym_index, + .got_load = fn_owner_decl.link.macho.sym_index, }); // blr x30 _ = try self.addInst(.{ @@ -3190,14 +3190,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. lib_name, }); } - const n_strx = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); + const sym_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); _ = try self.addInst(.{ .tag = .call_extern, .data = .{ - .extern_fn = .{ - .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index, - .sym_name = n_strx, + .relocation = .{ + .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index, + .sym_index = sym_index, }, }, }); @@ -4157,7 +4157,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro .data = .{ .payload = try self.addExtra(Mir.LoadMemoryPie{ .register = @enumToInt(src_reg), - .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index, + .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index, .sym_index = sym_index, }), }, @@ -4270,7 +4270,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void .data = .{ .payload = try self.addExtra(Mir.LoadMemoryPie{ .register = @enumToInt(reg), - .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index, + .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index, .sym_index = sym_index, }), }, @@ -4578,8 +4578,8 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne } else if (self.bin_file.cast(link.File.MachO)) |_| { // Because MachO is PIE-always-on, we defer memory address resolution until // the linker has enough info to perform relocations. - assert(decl.link.macho.local_sym_index != 0); - return MCValue{ .got_load = decl.link.macho.local_sym_index }; + assert(decl.link.macho.sym_index != 0); + return MCValue{ .got_load = decl.link.macho.sym_index }; } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 8ea6ab91e2..47a0c08893 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -649,7 +649,7 @@ fn mirDebugEpilogueBegin(self: *Emit) !void { fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void { assert(emit.mir.instructions.items(.tag)[inst] == .call_extern); - const extern_fn = emit.mir.instructions.items(.data)[inst].extern_fn; + const relocation = emit.mir.instructions.items(.data)[inst].relocation; if (emit.bin_file.cast(link.File.MachO)) |macho_file| { const offset = blk: { @@ -659,10 +659,13 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) !void { break :blk offset; }; // Add relocation to the decl. - const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?; + const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?; try atom.relocs.append(emit.bin_file.allocator, .{ .offset = offset, - .target = .{ .global = extern_fn.sym_name }, + .target = .{ + .sym_index = relocation.sym_index, + .file = null, + }, .addend = 0, .subtractor = null, .pcrel = true, @@ -864,7 +867,7 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { // Page reloc for adrp instruction. try atom.relocs.append(emit.bin_file.allocator, .{ .offset = offset, - .target = .{ .local = data.sym_index }, + .target = .{ .sym_index = data.sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -882,7 +885,7 @@ fn mirLoadMemoryPie(emit: *Emit, inst: Mir.Inst.Index) !void { // Pageoff reloc for adrp instruction. try atom.relocs.append(emit.bin_file.allocator, .{ .offset = offset + 4, - .target = .{ .local = data.sym_index }, + .target = .{ .sym_index = data.sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = false, diff --git a/src/arch/aarch64/Mir.zig b/src/arch/aarch64/Mir.zig index 1d66a69c8e..2fef069f7a 100644 --- a/src/arch/aarch64/Mir.zig +++ b/src/arch/aarch64/Mir.zig @@ -225,14 +225,16 @@ pub const Inst = struct { /// /// Used by e.g. b inst: Index, - /// An extern function + /// Relocation for the linker where: + /// * `atom_index` is the index of the source + /// * `sym_index` is the index of the target /// /// Used by e.g. call_extern - extern_fn: struct { + relocation: struct { /// Index of the containing atom. atom_index: u32, /// Index into the linker's string table. - sym_name: u32, + sym_index: u32, }, /// A 16-bit immediate value. /// diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index 1d4108a77e..e52dd4ec08 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -2563,7 +2563,7 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne } else if (self.bin_file.cast(link.File.MachO)) |_| { // TODO I'm hacking my way through here by repurposing .memory for storing // index to the GOT target symbol index. - return MCValue{ .memory = decl.link.macho.local_sym_index }; + return MCValue{ .memory = decl.link.macho.sym_index }; } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 8616c4ac5c..b35db3e97a 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2644,8 +2644,8 @@ fn loadMemPtrIntoRegister(self: *Self, reg: Register, ptr_ty: Type, ptr: MCValue .flags = flags, }), .data = .{ - .load_reloc = .{ - .atom_index = fn_owner_decl.link.macho.local_sym_index, + .relocation = .{ + .atom_index = fn_owner_decl.link.macho.sym_index, .sym_index = sym_index, }, }, @@ -3977,7 +3977,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. const func = func_payload.data; const fn_owner_decl = mod.declPtr(func.owner_decl); try self.genSetReg(Type.initTag(.usize), .rax, .{ - .got_load = fn_owner_decl.link.macho.local_sym_index, + .got_load = fn_owner_decl.link.macho.sym_index, }); // callq *%rax _ = try self.addInst(.{ @@ -3997,14 +3997,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallOptions. lib_name, }); } - const n_strx = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); + const sym_index = try macho_file.getGlobalSymbol(mem.sliceTo(decl_name, 0)); _ = try self.addInst(.{ .tag = .call_extern, .ops = undefined, .data = .{ - .extern_fn = .{ - .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.local_sym_index, - .sym_name = n_strx, + .relocation = .{ + .atom_index = mod.declPtr(self.mod_fn.owner_decl).link.macho.sym_index, + .sym_index = sym_index, }, }, }); @@ -6771,8 +6771,8 @@ fn lowerDeclRef(self: *Self, tv: TypedValue, decl_index: Module.Decl.Index) Inne } else if (self.bin_file.cast(link.File.MachO)) |_| { // Because MachO is PIE-always-on, we defer memory address resolution until // the linker has enough info to perform relocations. - assert(decl.link.macho.local_sym_index != 0); - return MCValue{ .got_load = decl.link.macho.local_sym_index }; + assert(decl.link.macho.sym_index != 0); + return MCValue{ .got_load = decl.link.macho.sym_index }; } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const got_addr = coff_file.offset_table_virtual_address + decl.link.coff.offset_table_index * ptr_bytes; return MCValue{ .memory = got_addr }; diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index cc5b54fb55..52d68e81ec 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -982,7 +982,7 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .lea_pie); const ops = emit.mir.instructions.items(.ops)[inst].decode(); - const load_reloc = emit.mir.instructions.items(.data)[inst].load_reloc; + const relocation = emit.mir.instructions.items(.data)[inst].relocation; // lea reg1, [rip + reloc] // RM @@ -1001,11 +1001,11 @@ fn mirLeaPie(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { 0b01 => @enumToInt(std.macho.reloc_type_x86_64.X86_64_RELOC_SIGNED), else => return emit.fail("TODO unused LEA PIE variants 0b10 and 0b11", .{}), }; - const atom = macho_file.atom_by_index_table.get(load_reloc.atom_index).?; - log.debug("adding reloc of type {} to local @{d}", .{ reloc_type, load_reloc.sym_index }); + const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?; + log.debug("adding reloc of type {} to local @{d}", .{ reloc_type, relocation.sym_index }); try atom.relocs.append(emit.bin_file.allocator, .{ .offset = @intCast(u32, end_offset - 4), - .target = .{ .local = load_reloc.sym_index }, + .target = .{ .sym_index = relocation.sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -1116,7 +1116,7 @@ fn mirCmpFloatAvx(emit: *Emit, tag: Tag, inst: Mir.Inst.Index) InnerError!void { fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { const tag = emit.mir.instructions.items(.tag)[inst]; assert(tag == .call_extern); - const extern_fn = emit.mir.instructions.items(.data)[inst].extern_fn; + const relocation = emit.mir.instructions.items(.data)[inst].relocation; const offset = blk: { // callq @@ -1126,10 +1126,13 @@ fn mirCallExtern(emit: *Emit, inst: Mir.Inst.Index) InnerError!void { if (emit.bin_file.cast(link.File.MachO)) |macho_file| { // Add relocation to the decl. - const atom = macho_file.atom_by_index_table.get(extern_fn.atom_index).?; + const atom = macho_file.atom_by_index_table.get(relocation.atom_index).?; try atom.relocs.append(emit.bin_file.allocator, .{ .offset = offset, - .target = .{ .global = extern_fn.sym_name }, + .target = .{ + .sym_index = relocation.sym_index, + .file = null, + }, .addend = 0, .subtractor = null, .pcrel = true, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index 74b0ca0d12..f67b48a271 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -181,7 +181,7 @@ pub const Inst = struct { /// 0b00 reg1, [rip + reloc] // via GOT emits X86_64_RELOC_GOT relocation /// 0b01 reg1, [rip + reloc] // direct load emits X86_64_RELOC_SIGNED relocation /// Notes: - /// * `Data` contains `load_reloc` + /// * `Data` contains `relocation` lea_pie, /// ops flags: form: @@ -368,7 +368,7 @@ pub const Inst = struct { /// Pseudo-instructions /// call extern function /// Notes: - /// * target of the call is stored as `extern_fn` in `Data` union. + /// * target of the call is stored as `relocation` in `Data` union. call_extern, /// end of prologue @@ -439,15 +439,10 @@ pub const Inst = struct { /// A condition code for use with EFLAGS register. cc: bits.Condition, }, - /// An extern function. - extern_fn: struct { - /// Index of the containing atom. - atom_index: u32, - /// Index into the linker's string table. - sym_name: u32, - }, - /// PIE load relocation. - load_reloc: struct { + /// Relocation for the linker where: + /// * `atom_index` is the index of the source + /// * `sym_index` is the index of the target + relocation: struct { /// Index of the containing atom. atom_index: u32, /// Index into the linker's symbol table. diff --git a/src/link.zig b/src/link.zig index aa37589ff5..a69dcc4c6e 100644 --- a/src/link.zig +++ b/src/link.zig @@ -544,12 +544,7 @@ pub const File = struct { switch (base.tag) { .coff => return @fieldParentPtr(Coff, "base", base).allocateDeclIndexes(decl_index), .elf => return @fieldParentPtr(Elf, "base", base).allocateDeclIndexes(decl_index), - .macho => return @fieldParentPtr(MachO, "base", base).allocateDeclIndexes(decl_index) catch |err| switch (err) { - // remap this error code because we are transitioning away from - // `allocateDeclIndexes`. - error.Overflow => return error.OutOfMemory, - error.OutOfMemory => return error.OutOfMemory, - }, + .macho => return @fieldParentPtr(MachO, "base", base).allocateDeclIndexes(decl_index), .wasm => return @fieldParentPtr(Wasm, "base", base).allocateDeclIndexes(decl_index), .plan9 => return @fieldParentPtr(Plan9, "base", base).allocateDeclIndexes(decl_index), .c, .spirv, .nvptx => {}, diff --git a/src/link/MachO.zig b/src/link/MachO.zig index d659d994eb..0f2cbfa844 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -4,6 +4,7 @@ const std = @import("std"); const build_options = @import("build_options"); const builtin = @import("builtin"); const assert = std.debug.assert; +const dwarf = std.dwarf; const fmt = std.fmt; const fs = std.fs; const log = std.log.scoped(.link); @@ -15,6 +16,7 @@ const meta = std.meta; const aarch64 = @import("../arch/aarch64/bits.zig"); const bind = @import("MachO/bind.zig"); const codegen = @import("../codegen.zig"); +const dead_strip = @import("MachO/dead_strip.zig"); const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); const target_util = @import("../target.zig"); @@ -35,8 +37,7 @@ const LibStub = @import("tapi.zig").LibStub; const Liveness = @import("../Liveness.zig"); const LlvmObject = @import("../codegen/llvm.zig").Object; const Module = @import("../Module.zig"); -const StringIndexAdapter = std.hash_map.StringIndexAdapter; -const StringIndexContext = std.hash_map.StringIndexContext; +const StringTable = @import("strtab.zig").StringTable; const Trie = @import("MachO/Trie.zig"); const Type = @import("../type.zig").Type; const TypedValue = @import("../TypedValue.zig"); @@ -52,6 +53,8 @@ pub const SearchStrategy = enum { dylibs_first, }; +pub const N_DESC_GCED: u16 = @bitCast(u16, @as(i16, -1)); + const SystemLib = struct { needed: bool = false, weak: bool = false, @@ -69,10 +72,10 @@ d_sym: ?DebugSymbols = null, /// For x86_64 that's 4KB, whereas for aarch64, that's 16KB. page_size: u16, -/// If true, the linker will preallocate several sections and segments before starting the linking -/// process. This is for example true for stage2 debug builds, however, this is false for stage1 -/// and potentially stage2 release builds in the future. -needs_prealloc: bool = true, +/// Mode of operation: incremental - will preallocate segments/sections and is compatible with +/// watch and HCS modes of operation; one_shot - will link relocatables in a traditional, one-shot +/// fashion (default for LLVM backend). +mode: enum { incremental, one_shot }, /// The absolute address of the entry point. entry_addr: ?u64 = null, @@ -151,53 +154,48 @@ rustc_section_index: ?u16 = null, rustc_section_size: u64 = 0, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -globals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -undefs: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -symbol_resolver: std.AutoHashMapUnmanaged(u32, SymbolWithLoc) = .{}, -unresolved: std.AutoArrayHashMapUnmanaged(u32, enum { - none, - stub, - got, -}) = .{}, -tentatives: std.AutoArrayHashMapUnmanaged(u32, void) = .{}, +globals: std.StringArrayHashMapUnmanaged(SymbolWithLoc) = .{}, +// FIXME Jakub +// TODO storing index into globals might be dangerous if we delete a global +// while not having everything resolved. Actually, perhaps `unresolved` +// should not be stored at the global scope? Is this possible? +// Otherwise, audit if this can be a problem. +// An alternative, which I still need to investigate for perf reasons is to +// store all global names in an adapted with context strtab. +unresolved: std.AutoArrayHashMapUnmanaged(u32, bool) = .{}, locals_free_list: std.ArrayListUnmanaged(u32) = .{}, -globals_free_list: std.ArrayListUnmanaged(u32) = .{}, dyld_stub_binder_index: ?u32 = null, dyld_private_atom: ?*Atom = null, stub_helper_preamble_atom: ?*Atom = null, -mh_execute_header_sym_index: ?u32 = null, -dso_handle_sym_index: ?u32 = null, - -strtab: std.ArrayListUnmanaged(u8) = .{}, -strtab_dir: std.HashMapUnmanaged(u32, void, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, +strtab: StringTable(.strtab) = .{}, +// TODO I think synthetic tables are a perfect match for some generic refactoring, +// and probably reusable between linker backends too. tlv_ptr_entries: std.ArrayListUnmanaged(Entry) = .{}, tlv_ptr_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, -tlv_ptr_entries_table: std.AutoArrayHashMapUnmanaged(Atom.Relocation.Target, u32) = .{}, +tlv_ptr_entries_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, got_entries: std.ArrayListUnmanaged(Entry) = .{}, got_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, -got_entries_table: std.AutoArrayHashMapUnmanaged(Atom.Relocation.Target, u32) = .{}, +got_entries_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, -stubs: std.ArrayListUnmanaged(*Atom) = .{}, +stubs: std.ArrayListUnmanaged(Entry) = .{}, stubs_free_list: std.ArrayListUnmanaged(u32) = .{}, -stubs_table: std.AutoArrayHashMapUnmanaged(u32, u32) = .{}, +stubs_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, load_commands_dirty: bool = false, sections_order_dirty: bool = false, -has_dices: bool = false, -has_stabs: bool = false, + /// A helper var to indicate if we are at the start of the incremental updates, or /// already somewhere further along the update-and-run chain. /// TODO once we add opening a prelinked output binary from file, this will become /// obsolete as we will carry on where we left off. -cold_start: bool = false, -invalidate_relocs: bool = false, +cold_start: bool = true, section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, @@ -221,12 +219,10 @@ atom_free_lists: std.AutoHashMapUnmanaged(MatchingSection, std.ArrayListUnmanage /// Pointer to the last allocated atom atoms: std.AutoHashMapUnmanaged(MatchingSection, *Atom) = .{}, -/// List of atoms that are owned directly by the linker. -/// Currently these are only atoms that are the result of linking -/// object files. Atoms which take part in incremental linking are -/// at present owned by Module.Decl. -/// TODO consolidate this. +/// List of atoms that are either synthetic or map directly to the Zig source program. managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, + +/// Table of atoms indexed by the symbol index. atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, /// Table of unnamed constants associated with a parent `Decl`. @@ -257,8 +253,25 @@ unnamed_const_atoms: UnnamedConstTable = .{}, decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?MatchingSection) = .{}, const Entry = struct { - target: Atom.Relocation.Target, - atom: *Atom, + target: SymbolWithLoc, + // Index into the synthetic symbol table (i.e., file == null). + sym_index: u32, + + pub fn getSymbol(entry: Entry, macho_file: *MachO) macho.nlist_64 { + return macho_file.getSymbol(.{ .sym_index = entry.sym_index, .file = null }); + } + + pub fn getSymbolPtr(entry: Entry, macho_file: *MachO) *macho.nlist_64 { + return macho_file.getSymbolPtr(.{ .sym_index = entry.sym_index, .file = null }); + } + + pub fn getAtom(entry: Entry, macho_file: *MachO) *Atom { + return macho_file.getAtomForSymbol(.{ .sym_index = entry.sym_index, .file = null }).?; + } + + pub fn getName(entry: Entry, macho_file: *MachO) []const u8 { + return macho_file.getSymbolName(.{ .sym_index = entry.sym_index, .file = null }); + } }; const UnnamedConstTable = std.AutoHashMapUnmanaged(Module.Decl.Index, std.ArrayListUnmanaged(*Atom)); @@ -269,15 +282,12 @@ const PendingUpdate = union(enum) { add_got_entry: u32, }; -const SymbolWithLoc = struct { - // Table where the symbol can be found. - where: enum { - global, - undef, - }, - where_index: u32, - local_sym_index: u32 = 0, - file: ?u16 = null, // null means Zig module +pub const SymbolWithLoc = struct { + // Index into the respective symbol table. + sym_index: u32, + + // null means it's a synthetic global. + file: ?u32 = null, }; /// When allocating, the ideal_capacity is calculated by @@ -385,7 +395,7 @@ pub fn openPath(allocator: Allocator, options: link.Options) !*MachO { .n_desc = 0, .n_value = 0, }); - try self.strtab.append(allocator, 0); + try self.strtab.buffer.append(allocator, 0); try self.populateMissingMetadata(); @@ -406,7 +416,6 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { const requires_adhoc_codesig = cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator); const use_llvm = build_options.have_llvm and options.use_llvm; const use_stage1 = build_options.is_stage1 and options.use_stage1; - const needs_prealloc = !(use_stage1 or use_llvm or options.cache_mode == .whole); const self = try gpa.create(MachO); errdefer gpa.destroy(self); @@ -419,14 +428,22 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { .file = null, }, .page_size = page_size, - .code_signature = if (requires_adhoc_codesig) CodeSignature.init(page_size) else null, - .needs_prealloc = needs_prealloc, + .code_signature = if (requires_adhoc_codesig) + CodeSignature.init(page_size) + else + null, + .mode = if (use_stage1 or use_llvm or options.module == null or options.cache_mode == .whole) + .one_shot + else + .incremental, }; if (use_llvm and !use_stage1) { self.llvm_object = try LlvmObject.create(gpa, options); } + log.debug("selected linker mode '{s}'", .{@tagName(self.mode)}); + return self; } @@ -448,33 +465,209 @@ pub fn flush(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) !v return error.TODOImplementWritingStaticLibFiles; } } - return self.flushModule(comp, prog_node); + + switch (self.mode) { + .one_shot => return self.linkOneShot(comp, prog_node), + .incremental => return self.flushModule(comp, prog_node), + } } pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) !void { const tracy = trace(@src()); defer tracy.end(); - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - - if (build_options.have_llvm and !use_stage1) { + if (build_options.have_llvm) { if (self.llvm_object) |llvm_object| { - try llvm_object.flushModule(comp, prog_node); - - llvm_object.destroy(self.base.allocator); - self.llvm_object = null; - - if (self.base.options.output_mode == .Lib and self.base.options.link_mode == .Static) { - return; - } + return try llvm_object.flushModule(comp, prog_node); } } + var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + var sub_prog_node = prog_node.start("MachO Flush", 0); sub_prog_node.activate(); defer sub_prog_node.end(); - var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); + const module = self.base.options.module orelse return error.LinkingWithoutZigSourceUnimplemented; + + if (self.d_sym) |*d_sym| { + try d_sym.dwarf.flushModule(&self.base, module); + } + + var libs = std.StringArrayHashMap(SystemLib).init(arena); + try self.resolveLibSystem(arena, comp, &.{}, &libs); + + const id_symlink_basename = "zld.id"; + + const cache_dir_handle = module.zig_cache_artifact_directory.handle; + var man: Cache.Manifest = undefined; + defer if (!self.base.options.disable_lld_caching) man.deinit(); + + var digest: [Cache.hex_digest_len]u8 = undefined; + man = comp.cache_parent.obtain(); + self.base.releaseLock(); + + man.hash.addListOfBytes(libs.keys()); + + _ = try man.hit(); + digest = man.final(); + + var prev_digest_buf: [digest.len]u8 = undefined; + const prev_digest: []u8 = Cache.readSmallFile( + cache_dir_handle, + id_symlink_basename, + &prev_digest_buf, + ) catch |err| blk: { + log.debug("MachO Zld new_digest={s} error: {s}", .{ + std.fmt.fmtSliceHexLower(&digest), + @errorName(err), + }); + // Handle this as a cache miss. + break :blk prev_digest_buf[0..0]; + }; + const cache_miss: bool = cache_miss: { + if (mem.eql(u8, prev_digest, &digest)) { + log.debug("MachO Zld digest={s} match", .{ + std.fmt.fmtSliceHexLower(&digest), + }); + if (!self.cold_start) { + log.debug(" skipping parsing linker line objects", .{}); + break :cache_miss false; + } else { + log.debug(" TODO parse prelinked binary and continue linking where we left off", .{}); + } + } + log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ + std.fmt.fmtSliceHexLower(prev_digest), + std.fmt.fmtSliceHexLower(&digest), + }); + // We are about to change the output file to be different, so we invalidate the build hash now. + cache_dir_handle.deleteFile(id_symlink_basename) catch |err| switch (err) { + error.FileNotFound => {}, + else => |e| return e, + }; + break :cache_miss true; + }; + + if (cache_miss) { + for (self.dylibs.items) |*dylib| { + dylib.deinit(self.base.allocator); + } + self.dylibs.clearRetainingCapacity(); + self.dylibs_map.clearRetainingCapacity(); + self.referenced_dylibs.clearRetainingCapacity(); + + var dependent_libs = std.fifo.LinearFifo(struct { + id: Dylib.Id, + parent: u16, + }, .Dynamic).init(self.base.allocator); + defer dependent_libs.deinit(); + try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); + try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); + } + + try self.createMhExecuteHeaderSymbol(); + try self.resolveDyldStubBinder(); + try self.createDyldPrivateAtom(); + try self.createStubHelperPreambleAtom(); + try self.resolveSymbolsInDylibs(); + try self.addCodeSignatureLC(); + + if (self.unresolved.count() > 0) { + return error.UndefinedSymbolReference; + } + + try self.allocateSpecialSymbols(); + + if (build_options.enable_logging) { + self.logSymtab(); + self.logSectionOrdinals(); + self.logAtoms(); + } + + try self.writeAtomsIncremental(); + + try self.setEntryPoint(); + try self.updateSectionOrdinals(); + try self.writeLinkeditSegment(); + + if (self.d_sym) |*d_sym| { + // Flush debug symbols bundle. + try d_sym.flushModule(self.base.allocator, self.base.options); + } + + // code signature and entitlements + if (self.base.options.entitlements) |path| { + if (self.code_signature) |*csig| { + try csig.addEntitlements(self.base.allocator, path); + csig.code_directory.ident = self.base.options.emit.?.sub_path; + } else { + var csig = CodeSignature.init(self.page_size); + try csig.addEntitlements(self.base.allocator, path); + csig.code_directory.ident = self.base.options.emit.?.sub_path; + self.code_signature = csig; + } + } + + if (self.code_signature) |*csig| { + csig.clear(self.base.allocator); + csig.code_directory.ident = self.base.options.emit.?.sub_path; + // Preallocate space for the code signature. + // We need to do this at this stage so that we have the load commands with proper values + // written out to the file. + // The most important here is to have the correct vm and filesize of the __LINKEDIT segment + // where the code signature goes into. + try self.writeCodeSignaturePadding(csig); + } + + try self.writeLoadCommands(); + try self.writeHeader(); + + if (self.entry_addr == null and self.base.options.output_mode == .Exe) { + log.debug("flushing. no_entry_point_found = true", .{}); + self.error_flags.no_entry_point_found = true; + } else { + log.debug("flushing. no_entry_point_found = false", .{}); + self.error_flags.no_entry_point_found = false; + } + + assert(!self.load_commands_dirty); + + if (self.code_signature) |*csig| { + try self.writeCodeSignature(csig); // code signing always comes last + } + + if (build_options.enable_link_snapshots) { + if (self.base.options.enable_link_snapshots) + try self.snapshotState(); + } + + if (cache_miss) { + // Update the file with the digest. If it fails we can continue; it only + // means that the next invocation will have an unnecessary cache miss. + Cache.writeSmallFile(cache_dir_handle, id_symlink_basename, &digest) catch |err| { + log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)}); + }; + // Again failure here only means an unnecessary cache miss. + man.writeManifest() catch |err| { + log.debug("failed to write cache manifest when linking: {s}", .{@errorName(err)}); + }; + // We hang on to this lock so that the output file path can be used without + // other processes clobbering it. + self.base.lock = man.toOwnedLock(); + } + + self.cold_start = false; +} + +fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.allocator; + var arena_allocator = std.heap.ArenaAllocator.init(gpa); defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); @@ -484,7 +677,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No // If there is no Zig code to compile, then we should skip flushing the output file because it // will not be part of the linker line anyway. const module_obj_path: ?[]const u8 = if (self.base.options.module) |module| blk: { - if (use_stage1) { + if (self.base.options.use_stage1) { const obj_basename = try std.zig.binNameAlloc(arena, .{ .root_name = self.base.options.root_name, .target = self.base.options.target, @@ -501,48 +694,35 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No } } - const obj_basename = self.base.intermediary_basename orelse break :blk null; + try self.flushModule(comp, prog_node); if (fs.path.dirname(full_out_path)) |dirname| { - break :blk try fs.path.join(arena, &.{ dirname, obj_basename }); + break :blk try fs.path.join(arena, &.{ dirname, self.base.intermediary_basename.? }); } else { - break :blk obj_basename; + break :blk self.base.intermediary_basename.?; } } else null; - if (self.d_sym) |*d_sym| { - if (self.base.options.module) |module| { - try d_sym.dwarf.flushModule(&self.base, module); - } - } + var sub_prog_node = prog_node.start("MachO Flush", 0); + sub_prog_node.activate(); + sub_prog_node.context.refresh(); + defer sub_prog_node.end(); const is_lib = self.base.options.output_mode == .Lib; const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; const stack_size = self.base.options.stack_size_override orelse 0; - const allow_undef = is_dyn_lib and (self.base.options.allow_shlib_undefined orelse false); + const is_debug_build = self.base.options.optimize_mode == .Debug; + const gc_sections = self.base.options.gc_sections orelse !is_debug_build; const id_symlink_basename = "zld.id"; - const cache_dir_handle = blk: { - if (use_stage1) { - break :blk directory.handle; - } - if (self.base.options.module) |module| { - break :blk module.zig_cache_artifact_directory.handle; - } - break :blk directory.handle; - }; var man: Cache.Manifest = undefined; defer if (!self.base.options.disable_lld_caching) man.deinit(); var digest: [Cache.hex_digest_len]u8 = undefined; - var needs_full_relink = true; - - cache: { - if ((use_stage1 and self.base.options.disable_lld_caching) or self.base.options.cache_mode == .whole) - break :cache; + if (!self.base.options.disable_lld_caching) { man = comp.cache_parent.obtain(); // We are about to obtain this lock, so here we give other processes a chance first. @@ -565,7 +745,9 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No man.hash.addOptional(self.base.options.search_strategy); man.hash.addOptional(self.base.options.headerpad_size); man.hash.add(self.base.options.headerpad_max_install_names); + man.hash.add(gc_sections); man.hash.add(self.base.options.dead_strip_dylibs); + man.hash.add(self.base.options.strip); man.hash.addListOfBytes(self.base.options.lib_dirs); man.hash.addListOfBytes(self.base.options.framework_dirs); link.hashAddSystemLibs(&man.hash, self.base.options.frameworks); @@ -584,7 +766,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No var prev_digest_buf: [digest.len]u8 = undefined; const prev_digest: []u8 = Cache.readSmallFile( - cache_dir_handle, + directory.handle, id_symlink_basename, &prev_digest_buf, ) catch |err| blk: { @@ -597,23 +779,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No }; if (mem.eql(u8, prev_digest, &digest)) { // Hot diggity dog! The output binary is already there. - - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - if (use_llvm or use_stage1) { - log.debug("MachO Zld digest={s} match - skipping invocation", .{std.fmt.fmtSliceHexLower(&digest)}); - self.base.lock = man.toOwnedLock(); - return; - } else { - log.debug("MachO Zld digest={s} match", .{std.fmt.fmtSliceHexLower(&digest)}); - if (!self.cold_start) { - log.debug(" no need to relink objects", .{}); - needs_full_relink = false; - } else { - log.debug(" TODO parse prelinked binary and continue linking where we left off", .{}); - // TODO until such time however, perform a full relink of objects. - needs_full_relink = true; - } - } + log.debug("MachO Zld digest={s} match - skipping invocation", .{ + std.fmt.fmtSliceHexLower(&digest), + }); + self.base.lock = man.toOwnedLock(); + return; } log.debug("MachO Zld prev_digest={s} new_digest={s}", .{ std.fmt.fmtSliceHexLower(prev_digest), @@ -621,7 +791,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No }); // We are about to change the output file to be different, so we invalidate the build hash now. - cache_dir_handle.deleteFile(id_symlink_basename) catch |err| switch (err) { + directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { error.FileNotFound => {}, else => |e| return e, }; @@ -652,450 +822,350 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); } } else { - if (use_stage1) { - const sub_path = self.base.options.emit.?.sub_path; - self.base.file = try cache_dir_handle.createFile(sub_path, .{ - .truncate = true, - .read = true, - .mode = link.determineMode(self.base.options), - }); - // Index 0 is always a null symbol. - try self.locals.append(self.base.allocator, .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.strtab.append(self.base.allocator, 0); - try self.populateMissingMetadata(); - } + const sub_path = self.base.options.emit.?.sub_path; + self.base.file = try directory.handle.createFile(sub_path, .{ + .truncate = true, + .read = true, + .mode = link.determineMode(self.base.options), + }); + // Index 0 is always a null symbol. + try self.locals.append(gpa, .{ + .n_strx = 0, + .n_type = 0, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + try self.strtab.buffer.append(gpa, 0); + try self.populateMissingMetadata(); var lib_not_found = false; var framework_not_found = false; - if (needs_full_relink) { - for (self.objects.items) |*object| { - object.free(self.base.allocator, self); - object.deinit(self.base.allocator); - } - self.objects.clearRetainingCapacity(); - - for (self.archives.items) |*archive| { - archive.deinit(self.base.allocator); - } - self.archives.clearRetainingCapacity(); + // Positional arguments to the linker such as object files and static archives. + var positionals = std.ArrayList([]const u8).init(arena); + try positionals.ensureUnusedCapacity(self.base.options.objects.len); - for (self.dylibs.items) |*dylib| { - dylib.deinit(self.base.allocator); - } - self.dylibs.clearRetainingCapacity(); - self.dylibs_map.clearRetainingCapacity(); - self.referenced_dylibs.clearRetainingCapacity(); - - { - var to_remove = std.ArrayList(u32).init(self.base.allocator); - defer to_remove.deinit(); - var it = self.symbol_resolver.iterator(); - while (it.next()) |entry| { - const key = entry.key_ptr.*; - const value = entry.value_ptr.*; - if (value.file != null) { - try to_remove.append(key); - } - } + var must_link_archives = std.StringArrayHashMap(void).init(arena); + try must_link_archives.ensureUnusedCapacity(self.base.options.objects.len); - for (to_remove.items) |key| { - if (self.symbol_resolver.fetchRemove(key)) |entry| { - const resolv = entry.value; - switch (resolv.where) { - .global => { - self.globals_free_list.append(self.base.allocator, resolv.where_index) catch {}; - const sym = &self.globals.items[resolv.where_index]; - sym.n_strx = 0; - sym.n_type = 0; - sym.n_value = 0; - }, - .undef => { - const sym = &self.undefs.items[resolv.where_index]; - sym.n_strx = 0; - sym.n_desc = 0; - }, - } - if (self.got_entries_table.get(.{ .global = entry.key })) |i| { - self.got_entries_free_list.append(self.base.allocator, @intCast(u32, i)) catch {}; - self.got_entries.items[i] = .{ .target = .{ .local = 0 }, .atom = undefined }; - _ = self.got_entries_table.swapRemove(.{ .global = entry.key }); - } - if (self.stubs_table.get(entry.key)) |i| { - self.stubs_free_list.append(self.base.allocator, @intCast(u32, i)) catch {}; - self.stubs.items[i] = undefined; - _ = self.stubs_table.swapRemove(entry.key); - } - } - } + for (self.base.options.objects) |obj| { + if (must_link_archives.contains(obj.path)) continue; + if (obj.must_link) { + _ = must_link_archives.getOrPutAssumeCapacity(obj.path); + } else { + _ = positionals.appendAssumeCapacity(obj.path); } - // Invalidate all relocs - // TODO we only need to invalidate the backlinks to the relinked atoms from - // the relocatable object files. - self.invalidate_relocs = true; - - // Positional arguments to the linker such as object files and static archives. - var positionals = std.ArrayList([]const u8).init(arena); - try positionals.ensureUnusedCapacity(self.base.options.objects.len); + } - var must_link_archives = std.StringArrayHashMap(void).init(arena); - try must_link_archives.ensureUnusedCapacity(self.base.options.objects.len); + for (comp.c_object_table.keys()) |key| { + try positionals.append(key.status.success.object_path); + } - for (self.base.options.objects) |obj| { - if (must_link_archives.contains(obj.path)) continue; - if (obj.must_link) { - _ = must_link_archives.getOrPutAssumeCapacity(obj.path); - } else { - _ = positionals.appendAssumeCapacity(obj.path); - } - } + if (module_obj_path) |p| { + try positionals.append(p); + } - for (comp.c_object_table.keys()) |key| { - try positionals.append(key.status.success.object_path); - } + if (comp.compiler_rt_lib) |lib| { + try positionals.append(lib.full_object_path); + } - if (module_obj_path) |p| { - try positionals.append(p); - } + // libc++ dep + if (self.base.options.link_libcpp) { + try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); + try positionals.append(comp.libcxx_static_lib.?.full_object_path); + } - if (comp.compiler_rt_lib) |lib| { - try positionals.append(lib.full_object_path); - } + // Shared and static libraries passed via `-l` flag. + var candidate_libs = std.StringArrayHashMap(SystemLib).init(arena); - // libc++ dep - if (self.base.options.link_libcpp) { - try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); - try positionals.append(comp.libcxx_static_lib.?.full_object_path); + const system_lib_names = self.base.options.system_libs.keys(); + for (system_lib_names) |system_lib_name| { + // By this time, we depend on these libs being dynamically linked libraries and not static libraries + // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which + // case we want to avoid prepending "-l". + if (Compilation.classifyFileExt(system_lib_name) == .shared_library) { + try positionals.append(system_lib_name); + continue; } - // Shared and static libraries passed via `-l` flag. - var candidate_libs = std.StringArrayHashMap(SystemLib).init(arena); - - const system_lib_names = self.base.options.system_libs.keys(); - for (system_lib_names) |system_lib_name| { - // By this time, we depend on these libs being dynamically linked libraries and not static libraries - // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which - // case we want to avoid prepending "-l". - if (Compilation.classifyFileExt(system_lib_name) == .shared_library) { - try positionals.append(system_lib_name); - continue; - } - - const system_lib_info = self.base.options.system_libs.get(system_lib_name).?; - try candidate_libs.put(system_lib_name, .{ - .needed = system_lib_info.needed, - .weak = system_lib_info.weak, - }); - } + const system_lib_info = self.base.options.system_libs.get(system_lib_name).?; + try candidate_libs.put(system_lib_name, .{ + .needed = system_lib_info.needed, + .weak = system_lib_info.weak, + }); + } - var lib_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.lib_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try lib_dirs.append(search_dir); - } else { - log.warn("directory not found for '-L{s}'", .{dir}); - } + var lib_dirs = std.ArrayList([]const u8).init(arena); + for (self.base.options.lib_dirs) |dir| { + if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { + try lib_dirs.append(search_dir); + } else { + log.warn("directory not found for '-L{s}'", .{dir}); } + } - var libs = std.StringArrayHashMap(SystemLib).init(arena); - - // Assume ld64 default -search_paths_first if no strategy specified. - const search_strategy = self.base.options.search_strategy orelse .paths_first; - outer: for (candidate_libs.keys()) |lib_name| { - switch (search_strategy) { - .paths_first => { - // Look in each directory for a dylib (stub first), and then for archive - for (lib_dirs.items) |dir| { - for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { - if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { - try libs.put(full_path, candidate_libs.get(lib_name).?); - continue :outer; - } + var libs = std.StringArrayHashMap(SystemLib).init(arena); + + // Assume ld64 default -search_paths_first if no strategy specified. + const search_strategy = self.base.options.search_strategy orelse .paths_first; + outer: for (candidate_libs.keys()) |lib_name| { + switch (search_strategy) { + .paths_first => { + // Look in each directory for a dylib (stub first), and then for archive + for (lib_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib", ".a" }) |ext| { + if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { + try libs.put(full_path, candidate_libs.get(lib_name).?); + continue :outer; } - } else { - log.warn("library not found for '-l{s}'", .{lib_name}); - lib_not_found = true; } - }, - .dylibs_first => { - // First, look for a dylib in each search dir - for (lib_dirs.items) |dir| { - for (&[_][]const u8{ ".tbd", ".dylib" }) |ext| { - if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { - try libs.put(full_path, candidate_libs.get(lib_name).?); - continue :outer; - } - } - } else for (lib_dirs.items) |dir| { - if (try resolveLib(arena, dir, lib_name, ".a")) |full_path| { + } else { + log.warn("library not found for '-l{s}'", .{lib_name}); + lib_not_found = true; + } + }, + .dylibs_first => { + // First, look for a dylib in each search dir + for (lib_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib" }) |ext| { + if (try resolveLib(arena, dir, lib_name, ext)) |full_path| { try libs.put(full_path, candidate_libs.get(lib_name).?); - } else { - log.warn("library not found for '-l{s}'", .{lib_name}); - lib_not_found = true; + continue :outer; } } - }, - } - } - - if (lib_not_found) { - log.warn("Library search paths:", .{}); - for (lib_dirs.items) |dir| { - log.warn(" {s}", .{dir}); - } - } - - // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. - var libsystem_available = false; - if (self.base.options.sysroot != null) blk: { - // Try stub file first. If we hit it, then we're done as the stub file - // re-exports every single symbol definition. - for (lib_dirs.items) |dir| { - if (try resolveLib(arena, dir, "System", ".tbd")) |full_path| { - try libs.put(full_path, .{ .needed = true }); - libsystem_available = true; - break :blk; - } - } - // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib - // doesn't export libc.dylib which we'll need to resolve subsequently also. - for (lib_dirs.items) |dir| { - if (try resolveLib(arena, dir, "System", ".dylib")) |libsystem_path| { - if (try resolveLib(arena, dir, "c", ".dylib")) |libc_path| { - try libs.put(libsystem_path, .{ .needed = true }); - try libs.put(libc_path, .{ .needed = true }); - libsystem_available = true; - break :blk; + } else for (lib_dirs.items) |dir| { + if (try resolveLib(arena, dir, lib_name, ".a")) |full_path| { + try libs.put(full_path, candidate_libs.get(lib_name).?); + } else { + log.warn("library not found for '-l{s}'", .{lib_name}); + lib_not_found = true; } } - } + }, } - if (!libsystem_available) { - const libsystem_name = try std.fmt.allocPrint(arena, "libSystem.{d}.tbd", .{ - self.base.options.target.os.version_range.semver.min.major, - }); - const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ - "libc", "darwin", libsystem_name, - }); - try libs.put(full_path, .{ .needed = true }); + } + + if (lib_not_found) { + log.warn("Library search paths:", .{}); + for (lib_dirs.items) |dir| { + log.warn(" {s}", .{dir}); } + } - // frameworks - var framework_dirs = std.ArrayList([]const u8).init(arena); - for (self.base.options.framework_dirs) |dir| { - if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { - try framework_dirs.append(search_dir); - } else { - log.warn("directory not found for '-F{s}'", .{dir}); - } + try self.resolveLibSystem(arena, comp, lib_dirs.items, &libs); + + // frameworks + var framework_dirs = std.ArrayList([]const u8).init(arena); + for (self.base.options.framework_dirs) |dir| { + if (try resolveSearchDir(arena, dir, self.base.options.sysroot)) |search_dir| { + try framework_dirs.append(search_dir); + } else { + log.warn("directory not found for '-F{s}'", .{dir}); } + } - outer: for (self.base.options.frameworks.keys()) |f_name| { - for (framework_dirs.items) |dir| { - for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { - if (try resolveFramework(arena, dir, f_name, ext)) |full_path| { - const info = self.base.options.frameworks.get(f_name).?; - try libs.put(full_path, .{ - .needed = info.needed, - .weak = info.weak, - }); - continue :outer; - } + outer: for (self.base.options.frameworks.keys()) |f_name| { + for (framework_dirs.items) |dir| { + for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { + if (try resolveFramework(arena, dir, f_name, ext)) |full_path| { + const info = self.base.options.frameworks.get(f_name).?; + try libs.put(full_path, .{ + .needed = info.needed, + .weak = info.weak, + }); + continue :outer; } - } else { - log.warn("framework not found for '-framework {s}'", .{f_name}); - framework_not_found = true; } + } else { + log.warn("framework not found for '-framework {s}'", .{f_name}); + framework_not_found = true; } + } - if (framework_not_found) { - log.warn("Framework search paths:", .{}); - for (framework_dirs.items) |dir| { - log.warn(" {s}", .{dir}); - } + if (framework_not_found) { + log.warn("Framework search paths:", .{}); + for (framework_dirs.items) |dir| { + log.warn(" {s}", .{dir}); } + } - // rpaths - var rpath_table = std.StringArrayHashMap(void).init(arena); - for (self.base.options.rpath_list) |rpath| { - if (rpath_table.contains(rpath)) continue; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = macho.emptyGenericCommandWithData(macho.rpath_command{ - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try self.base.allocator.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(self.base.allocator, .{ .rpath = rpath_cmd }); - try rpath_table.putNoClobber(rpath, {}); - self.load_commands_dirty = true; - } + // rpaths + var rpath_table = std.StringArrayHashMap(void).init(arena); + for (self.base.options.rpath_list) |rpath| { + if (rpath_table.contains(rpath)) continue; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath.len + 1, + @sizeOf(u64), + )); + var rpath_cmd = macho.emptyGenericCommandWithData(macho.rpath_command{ + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + rpath_cmd.data = try gpa.alloc(u8, cmdsize - rpath_cmd.inner.path); + mem.set(u8, rpath_cmd.data, 0); + mem.copy(u8, rpath_cmd.data, rpath); + try self.load_commands.append(gpa, .{ .rpath = rpath_cmd }); + try rpath_table.putNoClobber(rpath, {}); + self.load_commands_dirty = true; + } - // code signature and entitlements - if (self.base.options.entitlements) |path| { - if (self.code_signature) |*csig| { - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - } else { - var csig = CodeSignature.init(self.page_size); - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - self.code_signature = csig; - } + // code signature and entitlements + if (self.base.options.entitlements) |path| { + if (self.code_signature) |*csig| { + try csig.addEntitlements(gpa, path); + csig.code_directory.ident = self.base.options.emit.?.sub_path; + } else { + var csig = CodeSignature.init(self.page_size); + try csig.addEntitlements(gpa, path); + csig.code_directory.ident = self.base.options.emit.?.sub_path; + self.code_signature = csig; } + } - if (self.base.options.verbose_link) { - var argv = std.ArrayList([]const u8).init(arena); - - try argv.append("zig"); - try argv.append("ld"); - - if (is_exe_or_dyn_lib) { - try argv.append("-dynamic"); - } + if (self.base.options.verbose_link) { + var argv = std.ArrayList([]const u8).init(arena); - if (is_dyn_lib) { - try argv.append("-dylib"); + try argv.append("zig"); + try argv.append("ld"); - if (self.base.options.install_name) |install_name| { - try argv.append("-install_name"); - try argv.append(install_name); - } - } + if (is_exe_or_dyn_lib) { + try argv.append("-dynamic"); + } - if (self.base.options.sysroot) |syslibroot| { - try argv.append("-syslibroot"); - try argv.append(syslibroot); - } + if (is_dyn_lib) { + try argv.append("-dylib"); - for (rpath_table.keys()) |rpath| { - try argv.append("-rpath"); - try argv.append(rpath); + if (self.base.options.install_name) |install_name| { + try argv.append("-install_name"); + try argv.append(install_name); } + } - if (self.base.options.pagezero_size) |pagezero_size| { - try argv.append("-pagezero_size"); - try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{pagezero_size})); - } + if (self.base.options.sysroot) |syslibroot| { + try argv.append("-syslibroot"); + try argv.append(syslibroot); + } - if (self.base.options.search_strategy) |strat| switch (strat) { - .paths_first => try argv.append("-search_paths_first"), - .dylibs_first => try argv.append("-search_dylibs_first"), - }; + for (rpath_table.keys()) |rpath| { + try argv.append("-rpath"); + try argv.append(rpath); + } - if (self.base.options.headerpad_size) |headerpad_size| { - try argv.append("-headerpad_size"); - try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{headerpad_size})); - } + if (self.base.options.pagezero_size) |pagezero_size| { + try argv.append("-pagezero_size"); + try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{pagezero_size})); + } - if (self.base.options.headerpad_max_install_names) { - try argv.append("-headerpad_max_install_names"); - } + if (self.base.options.search_strategy) |strat| switch (strat) { + .paths_first => try argv.append("-search_paths_first"), + .dylibs_first => try argv.append("-search_dylibs_first"), + }; - if (self.base.options.dead_strip_dylibs) { - try argv.append("-dead_strip_dylibs"); - } + if (self.base.options.headerpad_size) |headerpad_size| { + try argv.append("-headerpad_size"); + try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{headerpad_size})); + } - if (self.base.options.entry) |entry| { - try argv.append("-e"); - try argv.append(entry); - } + if (self.base.options.headerpad_max_install_names) { + try argv.append("-headerpad_max_install_names"); + } - for (self.base.options.objects) |obj| { - try argv.append(obj.path); - } + if (gc_sections) { + try argv.append("-dead_strip"); + } - for (comp.c_object_table.keys()) |key| { - try argv.append(key.status.success.object_path); - } + if (self.base.options.dead_strip_dylibs) { + try argv.append("-dead_strip_dylibs"); + } - if (module_obj_path) |p| { - try argv.append(p); - } + if (self.base.options.entry) |entry| { + try argv.append("-e"); + try argv.append(entry); + } - if (comp.compiler_rt_lib) |lib| { - try argv.append(lib.full_object_path); - } + for (self.base.options.objects) |obj| { + try argv.append(obj.path); + } - if (self.base.options.link_libcpp) { - try argv.append(comp.libcxxabi_static_lib.?.full_object_path); - try argv.append(comp.libcxx_static_lib.?.full_object_path); - } + for (comp.c_object_table.keys()) |key| { + try argv.append(key.status.success.object_path); + } - try argv.append("-o"); - try argv.append(full_out_path); + if (module_obj_path) |p| { + try argv.append(p); + } - try argv.append("-lSystem"); - try argv.append("-lc"); + if (comp.compiler_rt_lib) |lib| { + try argv.append(lib.full_object_path); + } - for (self.base.options.system_libs.keys()) |l_name| { - const info = self.base.options.system_libs.get(l_name).?; - const arg = if (info.needed) - try std.fmt.allocPrint(arena, "-needed-l{s}", .{l_name}) - else if (info.weak) - try std.fmt.allocPrint(arena, "-weak-l{s}", .{l_name}) - else - try std.fmt.allocPrint(arena, "-l{s}", .{l_name}); - try argv.append(arg); - } + if (self.base.options.link_libcpp) { + try argv.append(comp.libcxxabi_static_lib.?.full_object_path); + try argv.append(comp.libcxx_static_lib.?.full_object_path); + } - for (self.base.options.lib_dirs) |lib_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); - } + try argv.append("-o"); + try argv.append(full_out_path); + + try argv.append("-lSystem"); + try argv.append("-lc"); + + for (self.base.options.system_libs.keys()) |l_name| { + const info = self.base.options.system_libs.get(l_name).?; + const arg = if (info.needed) + try std.fmt.allocPrint(arena, "-needed-l{s}", .{l_name}) + else if (info.weak) + try std.fmt.allocPrint(arena, "-weak-l{s}", .{l_name}) + else + try std.fmt.allocPrint(arena, "-l{s}", .{l_name}); + try argv.append(arg); + } - for (self.base.options.frameworks.keys()) |framework| { - const info = self.base.options.frameworks.get(framework).?; - const arg = if (info.needed) - try std.fmt.allocPrint(arena, "-needed_framework {s}", .{framework}) - else if (info.weak) - try std.fmt.allocPrint(arena, "-weak_framework {s}", .{framework}) - else - try std.fmt.allocPrint(arena, "-framework {s}", .{framework}); - try argv.append(arg); - } + for (self.base.options.lib_dirs) |lib_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); + } - for (self.base.options.framework_dirs) |framework_dir| { - try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); - } + for (self.base.options.frameworks.keys()) |framework| { + const info = self.base.options.frameworks.get(framework).?; + const arg = if (info.needed) + try std.fmt.allocPrint(arena, "-needed_framework {s}", .{framework}) + else if (info.weak) + try std.fmt.allocPrint(arena, "-weak_framework {s}", .{framework}) + else + try std.fmt.allocPrint(arena, "-framework {s}", .{framework}); + try argv.append(arg); + } - if (allow_undef) { - try argv.append("-undefined"); - try argv.append("dynamic_lookup"); - } + for (self.base.options.framework_dirs) |framework_dir| { + try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); + } - for (must_link_archives.keys()) |lib| { - try argv.append(try std.fmt.allocPrint(arena, "-force_load {s}", .{lib})); - } + if (is_dyn_lib and (self.base.options.allow_shlib_undefined orelse false)) { + try argv.append("-undefined"); + try argv.append("dynamic_lookup"); + } - Compilation.dump_argv(argv.items); + for (must_link_archives.keys()) |lib| { + try argv.append(try std.fmt.allocPrint(arena, "-force_load {s}", .{lib})); } - var dependent_libs = std.fifo.LinearFifo(struct { - id: Dylib.Id, - parent: u16, - }, .Dynamic).init(self.base.allocator); - defer dependent_libs.deinit(); - try self.parseInputFiles(positionals.items, self.base.options.sysroot, &dependent_libs); - try self.parseAndForceLoadStaticArchives(must_link_archives.keys()); - try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); - try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); + Compilation.dump_argv(argv.items); } - try self.createMhExecuteHeaderSymbol(); + var dependent_libs = std.fifo.LinearFifo(struct { + id: Dylib.Id, + parent: u16, + }, .Dynamic).init(gpa); + defer dependent_libs.deinit(); + try self.parseInputFiles(positionals.items, self.base.options.sysroot, &dependent_libs); + try self.parseAndForceLoadStaticArchives(must_link_archives.keys()); + try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); + try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); + for (self.objects.items) |*object, object_id| { - if (object.analyzed) continue; - try self.resolveSymbolsInObject(@intCast(u16, object_id)); + try self.resolveSymbolsInObject(object, @intCast(u16, object_id)); } try self.resolveSymbolsInArchives(); @@ -1103,46 +1173,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createDyldPrivateAtom(); try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); + try self.createMhExecuteHeaderSymbol(); try self.createDsoHandleSymbol(); try self.addCodeSignatureLC(); + try self.resolveSymbolsAtLoading(); - { - var next_sym: usize = 0; - while (next_sym < self.unresolved.count()) { - const sym = &self.undefs.items[self.unresolved.keys()[next_sym]]; - const sym_name = self.getString(sym.n_strx); - const resolv = self.symbol_resolver.get(sym.n_strx) orelse unreachable; - - if (sym.discarded()) { - sym.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - _ = self.unresolved.swapRemove(resolv.where_index); - continue; - } else if (allow_undef) { - const n_desc = @bitCast( - u16, - macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP * @intCast(i16, macho.N_SYMBOL_RESOLVER), - ); - // TODO allow_shlib_undefined is an ELF flag so figure out macOS specific flags too. - sym.n_type = macho.N_EXT; - sym.n_desc = n_desc; - _ = self.unresolved.swapRemove(resolv.where_index); - continue; - } - - log.err("undefined reference to symbol '{s}'", .{sym_name}); - if (resolv.file) |file| { - log.err(" first referenced in '{s}'", .{self.objects.items[file].name}); - } - - next_sym += 1; - } - } if (self.unresolved.count() > 0) { return error.UndefinedSymbolReference; } @@ -1154,46 +1189,42 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No } try self.createTentativeDefAtoms(); - try self.parseObjectsIntoAtoms(); - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - if (use_llvm or use_stage1) { - try self.pruneAndSortSections(); - try self.allocateSegments(); - try self.allocateLocals(); + for (self.objects.items) |*object, object_id| { + try object.splitIntoAtomsOneShot(self, @intCast(u32, object_id)); } + if (gc_sections) { + try dead_strip.gcAtoms(self); + } + + try self.pruneAndSortSections(); + try self.allocateSegments(); + try self.allocateSymbols(); + try self.allocateSpecialSymbols(); - try self.allocateGlobals(); if (build_options.enable_logging) { self.logSymtab(); self.logSectionOrdinals(); + self.logAtoms(); } - if (use_llvm or use_stage1) { - try self.writeAllAtoms(); - } else { - try self.writeAtoms(); - } + try self.writeAtomsOneShot(); if (self.rustc_section_index) |id| { - const seg = &self.load_commands.items[self.data_segment_cmd_index.?].segment; - const sect = &seg.sections.items[id]; + const sect = self.getSectionPtr(.{ + .seg = self.data_segment_cmd_index.?, + .sect = id, + }); sect.size = self.rustc_section_size; } try self.setEntryPoint(); - try self.updateSectionOrdinals(); try self.writeLinkeditSegment(); - if (self.d_sym) |*d_sym| { - // Flush debug symbols bundle. - try d_sym.flushModule(self.base.allocator, self.base.options); - } - if (self.code_signature) |*csig| { - csig.clear(self.base.allocator); + csig.clear(gpa); csig.code_directory.ident = self.base.options.emit.?.sub_path; // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values @@ -1206,32 +1237,17 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.writeLoadCommands(); try self.writeHeader(); - if (self.entry_addr == null and self.base.options.output_mode == .Exe) { - log.debug("flushing. no_entry_point_found = true", .{}); - self.error_flags.no_entry_point_found = true; - } else { - log.debug("flushing. no_entry_point_found = false", .{}); - self.error_flags.no_entry_point_found = false; - } - assert(!self.load_commands_dirty); if (self.code_signature) |*csig| { try self.writeCodeSignature(csig); // code signing always comes last } - - if (build_options.enable_link_snapshots) { - if (self.base.options.enable_link_snapshots) - try self.snapshotState(); - } } - cache: { - if ((use_stage1 and self.base.options.disable_lld_caching) or self.base.options.cache_mode == .whole) - break :cache; + if (!self.base.options.disable_lld_caching) { // Update the file with the digest. If it fails we can continue; it only // means that the next invocation will have an unnecessary cache miss. - Cache.writeSmallFile(cache_dir_handle, id_symlink_basename, &digest) catch |err| { + Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { log.debug("failed to save linking hash digest file: {s}", .{@errorName(err)}); }; // Again failure here only means an unnecessary cache miss. @@ -1242,8 +1258,49 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No // other processes clobbering it. self.base.lock = man.toOwnedLock(); } +} - self.cold_start = false; +fn resolveLibSystem( + self: *MachO, + arena: Allocator, + comp: *Compilation, + search_dirs: []const []const u8, + out_libs: anytype, +) !void { + // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. + var libsystem_available = false; + if (self.base.options.sysroot != null) blk: { + // Try stub file first. If we hit it, then we're done as the stub file + // re-exports every single symbol definition. + for (search_dirs) |dir| { + if (try resolveLib(arena, dir, "System", ".tbd")) |full_path| { + try out_libs.put(full_path, .{ .needed = true }); + libsystem_available = true; + break :blk; + } + } + // If we didn't hit the stub file, try .dylib next. However, libSystem.dylib + // doesn't export libc.dylib which we'll need to resolve subsequently also. + for (search_dirs) |dir| { + if (try resolveLib(arena, dir, "System", ".dylib")) |libsystem_path| { + if (try resolveLib(arena, dir, "c", ".dylib")) |libc_path| { + try out_libs.put(libsystem_path, .{ .needed = true }); + try out_libs.put(libc_path, .{ .needed = true }); + libsystem_available = true; + break :blk; + } + } + } + } + if (!libsystem_available) { + const libsystem_name = try std.fmt.allocPrint(arena, "libSystem.{d}.tbd", .{ + self.base.options.target.os.version_range.semver.min.major, + }); + const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ + "libc", "darwin", libsystem_name, + }); + try out_libs.put(full_path, .{ .needed = true }); + } } fn resolveSearchDir( @@ -1288,6 +1345,16 @@ fn resolveSearchDir( return null; } +fn resolveSearchDirs(arena: Allocator, dirs: []const []const u8, syslibroot: ?[]const u8, out_dirs: anytype) !void { + for (dirs) |dir| { + if (try resolveSearchDir(arena, dir, syslibroot)) |search_dir| { + try out_dirs.append(search_dir); + } else { + log.warn("directory not found for '-L{s}'", .{dir}); + } + } +} + fn resolveLib( arena: Allocator, search_dir: []const u8, @@ -1337,9 +1404,15 @@ fn parseObject(self: *MachO, path: []const u8) !bool { const name = try self.base.allocator.dupe(u8, path); errdefer self.base.allocator.free(name); + const mtime: u64 = mtime: { + const stat = file.stat() catch break :mtime 0; + break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); + }; + var object = Object{ .name = name, .file = file, + .mtime = mtime, }; object.parse(self.base.allocator, self.base.options.target) catch |err| switch (err) { @@ -1507,7 +1580,7 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const .syslibroot = syslibroot, })) continue; - log.warn("unknown filetype for positional input file: '{s}'", .{file_name}); + log.debug("unknown filetype for positional input file: '{s}'", .{file_name}); } } @@ -1522,7 +1595,7 @@ fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !voi log.debug("parsing and force loading static archive '{s}'", .{full_path}); if (try self.parseArchive(full_path, true)) continue; - log.warn("unknown filetype: expected static archive: '{s}'", .{file_name}); + log.debug("unknown filetype: expected static archive: '{s}'", .{file_name}); } } @@ -1543,7 +1616,7 @@ fn parseLibs( })) continue; if (try self.parseArchive(lib, false)) continue; - log.warn("unknown filetype for a library: '{s}'", .{lib}); + log.debug("unknown filetype for a library: '{s}'", .{lib}); } } @@ -1587,7 +1660,7 @@ fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: any }); if (did_parse_successfully) break; } else { - log.warn("unable to resolve dependency {s}", .{dep_id.id.name}); + log.debug("unable to resolve dependency {s}", .{dep_id.id.name}); } } } @@ -1595,6 +1668,15 @@ fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: any pub const MatchingSection = struct { seg: u16, sect: u16, + + pub fn eql(this: MatchingSection, other: struct { + seg: ?u16, + sect: ?u16, + }) bool { + const seg = other.seg orelse return false; + const sect = other.sect orelse return false; + return this.seg == seg and this.sect == sect; + } }; pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { @@ -2158,33 +2240,31 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio return res; } -pub fn createEmptyAtom(self: *MachO, local_sym_index: u32, size: u64, alignment: u32) !*Atom { +pub fn createEmptyAtom(gpa: Allocator, sym_index: u32, size: u64, alignment: u32) !*Atom { const size_usize = math.cast(usize, size) orelse return error.Overflow; - const atom = try self.base.allocator.create(Atom); - errdefer self.base.allocator.destroy(atom); + const atom = try gpa.create(Atom); + errdefer gpa.destroy(atom); atom.* = Atom.empty; - atom.local_sym_index = local_sym_index; + atom.sym_index = sym_index; atom.size = size; atom.alignment = alignment; - try atom.code.resize(self.base.allocator, size_usize); + try atom.code.resize(gpa, size_usize); mem.set(u8, atom.code.items, 0); - try self.managed_atoms.append(self.base.allocator, atom); return atom; } pub fn writeAtom(self: *MachO, atom: *Atom, match: MatchingSection) !void { - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - const sym = self.locals.items[atom.local_sym_index]; + const sect = self.getSection(match); + const sym = atom.getSymbol(self); const file_offset = sect.offset + sym.n_value - sect.addr; try atom.resolveRelocs(self); - log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ self.getString(sym.n_strx), file_offset }); + log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); } -fn allocateLocals(self: *MachO) !void { +fn allocateSymbols(self: *MachO) !void { var it = self.atoms.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; @@ -2194,37 +2274,25 @@ fn allocateLocals(self: *MachO) !void { atom = prev; } - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; + const n_sect = self.getSectionOrdinal(match); + const sect = self.getSection(match); var base_vaddr = sect.addr; - log.debug("allocating local symbols in {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ n_sect, sect.segName(), sect.sectName() }); while (true) { const alignment = try math.powi(u32, 2, atom.alignment); base_vaddr = mem.alignForwardGeneric(u64, base_vaddr, alignment); - const sym = &self.locals.items[atom.local_sym_index]; + const sym = atom.getSymbolPtr(self); sym.n_value = base_vaddr; sym.n_sect = n_sect; - log.debug(" {d}: {s} allocated at 0x{x}", .{ - atom.local_sym_index, - self.getString(sym.n_strx), - base_vaddr, - }); - - // Update each alias (if any) - for (atom.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = base_vaddr; - alias_sym.n_sect = n_sect; - } + log.debug(" ATOM(%{d}, '{s}') @{x}", .{ atom.sym_index, atom.getName(self), base_vaddr }); // Update each symbol contained within the atom for (atom.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); contained_sym.n_value = base_vaddr + sym_at_off.offset; contained_sym.n_sect = n_sect; } @@ -2242,16 +2310,11 @@ fn shiftLocalsByOffset(self: *MachO, match: MatchingSection, offset: i64) !void var atom = self.atoms.get(match) orelse return; while (true) { - const atom_sym = &self.locals.items[atom.local_sym_index]; + const atom_sym = atom.getSymbolPtr(self); atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); - for (atom.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = @intCast(u64, @intCast(i64, alias_sym.n_value) + offset); - } - for (atom.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; + const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); } @@ -2262,53 +2325,33 @@ fn shiftLocalsByOffset(self: *MachO, match: MatchingSection, offset: i64) !void } fn allocateSpecialSymbols(self: *MachO) !void { - for (&[_]?u32{ - self.mh_execute_header_sym_index, - self.dso_handle_sym_index, - }) |maybe_sym_index| { - const sym_index = maybe_sym_index orelse continue; - const sym = &self.locals.items[sym_index]; + for (&[_][]const u8{ + "___dso_handle", + "__mh_execute_header", + }) |name| { + const global = self.globals.get(name) orelse continue; + if (global.file != null) continue; + const sym = self.getSymbolPtr(global); const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(.{ + sym.n_sect = self.getSectionOrdinal(.{ .seg = self.text_segment_cmd_index.?, .sect = 0, - }).? + 1); + }); sym.n_value = seg.inner.vmaddr; log.debug("allocating {s} at the start of {s}", .{ - self.getString(sym.n_strx), + name, seg.inner.segName(), }); } } -fn allocateGlobals(self: *MachO) !void { - log.debug("allocating global symbols", .{}); +fn writeAtomsOneShot(self: *MachO) !void { + assert(self.mode == .one_shot); - var sym_it = self.symbol_resolver.valueIterator(); - while (sym_it.next()) |resolv| { - if (resolv.where != .global) continue; - - assert(resolv.local_sym_index != 0); - const local_sym = self.locals.items[resolv.local_sym_index]; - const sym = &self.globals.items[resolv.where_index]; - sym.n_value = local_sym.n_value; - sym.n_sect = local_sym.n_sect; - - log.debug(" {d}: {s} allocated at 0x{x}", .{ - resolv.where_index, - self.getString(sym.n_strx), - local_sym.n_value, - }); - } -} - -fn writeAllAtoms(self: *MachO) !void { var it = self.atoms.iterator(); while (it.next()) |entry| { - const match = entry.key_ptr.*; - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; + const sect = self.getSection(entry.key_ptr.*); var atom: *Atom = entry.value_ptr.*; if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; @@ -2324,20 +2367,28 @@ fn writeAllAtoms(self: *MachO) !void { } while (true) { - const atom_sym = self.locals.items[atom.local_sym_index]; + const this_sym = atom.getSymbol(self); const padding_size: usize = if (atom.next) |next| blk: { - const next_sym = self.locals.items[next.local_sym_index]; - const size = next_sym.n_value - (atom_sym.n_value + atom.size); + const next_sym = next.getSymbol(self); + const size = next_sym.n_value - (this_sym.n_value + atom.size); break :blk math.cast(usize, size) orelse return error.Overflow; } else 0; - log.debug(" (adding atom {s} to buffer: {})", .{ self.getString(atom_sym.n_strx), atom_sym }); + log.debug(" (adding ATOM(%{d}, '{s}') from object({d}) to buffer)", .{ + atom.sym_index, + atom.getName(self), + atom.file, + }); + if (padding_size > 0) { + log.debug(" (with padding {x})", .{padding_size}); + } try atom.resolveRelocs(self); buffer.appendSliceAssumeCapacity(atom.code.items); var i: usize = 0; while (i < padding_size) : (i += 1) { + // TODO with NOPs buffer.appendAssumeCapacity(0); } @@ -2381,12 +2432,13 @@ fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anyty } } -fn writeAtoms(self: *MachO) !void { +fn writeAtomsIncremental(self: *MachO) !void { + assert(self.mode == .incremental); + var it = self.atoms.iterator(); while (it.next()) |entry| { const match = entry.key_ptr.*; - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; + const sect = self.getSection(match); var atom: *Atom = entry.value_ptr.*; // TODO handle zerofill in stage2 @@ -2395,7 +2447,7 @@ fn writeAtoms(self: *MachO) !void { log.debug("writing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); while (true) { - if (atom.dirty or self.invalidate_relocs) { + if (atom.dirty) { try self.writeAtom(atom, match); atom.dirty = false; } @@ -2407,17 +2459,19 @@ fn writeAtoms(self: *MachO) !void { } } -pub fn createGotAtom(self: *MachO, target: Atom.Relocation.Target) !*Atom { - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ +pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { + const gpa = self.base.allocator; + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); - try atom.relocs.append(self.base.allocator, .{ + + const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); + try atom.relocs.append(gpa, .{ .offset = 0, .target = target, .addend = 0, @@ -2430,35 +2484,60 @@ pub fn createGotAtom(self: *MachO, target: Atom.Relocation.Target) !*Atom { else => unreachable, }, }); - switch (target) { - .local => { - try atom.rebases.append(self.base.allocator, 0); - }, - .global => |n_strx| { - try atom.bindings.append(self.base.allocator, .{ - .n_strx = n_strx, - .offset = 0, - }); - }, + + const target_sym = self.getSymbol(target); + if (target_sym.undf()) { + const global = self.globals.get(self.getSymbolName(target)).?; + try atom.bindings.append(gpa, .{ + .target = global, + .offset = 0, + }); + } else { + try atom.rebases.append(gpa, 0); } + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + try self.allocateAtomCommon(atom, .{ + .seg = self.data_const_segment_cmd_index.?, + .sect = self.got_section_index.?, + }); + return atom; } -pub fn createTlvPtrAtom(self: *MachO, target: Atom.Relocation.Target) !*Atom { - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ +pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { + const gpa = self.base.allocator; + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); - assert(target == .global); - try atom.bindings.append(self.base.allocator, .{ - .n_strx = target.global, + + const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); + const target_sym = self.getSymbol(target); + assert(target_sym.undf()); + + const global = self.globals.get(self.getSymbolName(target)).?; + try atom.bindings.append(gpa, .{ + .target = global, .offset = 0, }); + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + const match = (try self.getMatchingSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__thread_ptrs"), + .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + })).?; + try self.allocateAtomCommon(atom, match); + return atom; } @@ -2466,34 +2545,32 @@ fn createDyldPrivateAtom(self: *MachO) !void { if (self.dyld_stub_binder_index == null) return; if (self.dyld_private_atom != null) return; - const local_sym_index = @intCast(u32, self.locals.items.len); - const sym = try self.locals.addOne(self.base.allocator); - sym.* = .{ + const gpa = self.base.allocator; + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, - }; - const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); + }); + const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); self.dyld_private_atom = atom; - const match = MatchingSection{ + + try self.allocateAtomCommon(atom, .{ .seg = self.data_segment_cmd_index.?, .sect = self.data_section_index.?, - }; - if (self.needs_prealloc) { - const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); - log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - sym.n_value = vaddr; - } else try self.addAtomToSection(atom, match); + }); - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); } fn createStubHelperPreambleAtom(self: *MachO) !void { if (self.dyld_stub_binder_index == null) return; if (self.stub_helper_preamble_atom != null) return; + const gpa = self.base.allocator; const arch = self.base.options.target.cpu.arch; const size: u64 = switch (arch) { .x86_64 => 15, @@ -2505,17 +2582,16 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { .aarch64 => 2, else => unreachable, }; - const local_sym_index = @intCast(u32, self.locals.items.len); - const sym = try self.locals.addOne(self.base.allocator); - sym.* = .{ + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, - }; - const atom = try self.createEmptyAtom(local_sym_index, size, alignment); - const dyld_private_sym_index = self.dyld_private_atom.?.local_sym_index; + }); + const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); + const dyld_private_sym_index = self.dyld_private_atom.?.sym_index; switch (arch) { .x86_64 => { try atom.relocs.ensureUnusedCapacity(self.base.allocator, 2); @@ -2525,7 +2601,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { atom.code.items[2] = 0x1d; atom.relocs.appendAssumeCapacity(.{ .offset = 3, - .target = .{ .local = dyld_private_sym_index }, + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2540,7 +2616,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { atom.code.items[10] = 0x25; atom.relocs.appendAssumeCapacity(.{ .offset = 11, - .target = .{ .global = self.undefs.items[self.dyld_stub_binder_index.?].n_strx }, + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2554,7 +2630,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { mem.writeIntLittle(u32, atom.code.items[0..][0..4], aarch64.Instruction.adrp(.x17, 0).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 0, - .target = .{ .local = dyld_private_sym_index }, + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2565,7 +2641,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { mem.writeIntLittle(u32, atom.code.items[4..][0..4], aarch64.Instruction.add(.x17, .x17, 0, false).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 4, - .target = .{ .local = dyld_private_sym_index }, + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = false, @@ -2583,7 +2659,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { mem.writeIntLittle(u32, atom.code.items[12..][0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 12, - .target = .{ .global = self.undefs.items[self.dyld_stub_binder_index.?].n_strx }, + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2598,7 +2674,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { ).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 16, - .target = .{ .global = self.undefs.items[self.dyld_stub_binder_index.?].n_strx }, + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, .addend = 0, .subtractor = null, .pcrel = false, @@ -2611,22 +2687,18 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { else => unreachable, } self.stub_helper_preamble_atom = atom; - const match = MatchingSection{ + + try self.allocateAtomCommon(atom, .{ .seg = self.text_segment_cmd_index.?, .sect = self.stub_helper_section_index.?, - }; - - if (self.needs_prealloc) { - const alignment_pow_2 = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, atom.size, alignment_pow_2, match); - log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - sym.n_value = vaddr; - } else try self.addAtomToSection(atom, match); + }); - sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); } pub fn createStubHelperAtom(self: *MachO) !*Atom { + const gpa = self.base.allocator; const arch = self.base.options.target.cpu.arch; const stub_size: u4 = switch (arch) { .x86_64 => 10, @@ -2638,16 +2710,16 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { .aarch64 => 2, else => unreachable, }; - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(local_sym_index, stub_size, alignment); - try atom.relocs.ensureTotalCapacity(self.base.allocator, 1); + const atom = try MachO.createEmptyAtom(gpa, sym_index, stub_size, alignment); + try atom.relocs.ensureTotalCapacity(gpa, 1); switch (arch) { .x86_64 => { @@ -2658,7 +2730,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { atom.code.items[5] = 0xe9; atom.relocs.appendAssumeCapacity(.{ .offset = 6, - .target = .{ .local = self.stub_helper_preamble_atom.?.local_sym_index }, + .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2680,7 +2752,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { mem.writeIntLittle(u32, atom.code.items[4..8], aarch64.Instruction.b(0).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 4, - .target = .{ .local = self.stub_helper_preamble_atom.?.local_sym_index }, + .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2692,22 +2764,31 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { else => unreachable, } + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + try self.allocateAtomCommon(atom, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stub_helper_section_index.?, + }); + return atom; } -pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, n_strx: u32) !*Atom { - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ +pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWithLoc) !*Atom { + const gpa = self.base.allocator; + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), 3); - try atom.relocs.append(self.base.allocator, .{ + const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); + try atom.relocs.append(gpa, .{ .offset = 0, - .target = .{ .local = stub_sym_index }, + .target = .{ .sym_index = stub_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = false, @@ -2718,15 +2799,27 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, n_strx: u32) !*A else => unreachable, }, }); - try atom.rebases.append(self.base.allocator, 0); - try atom.lazy_bindings.append(self.base.allocator, .{ - .n_strx = n_strx, + try atom.rebases.append(gpa, 0); + + const global = self.globals.get(self.getSymbolName(target)).?; + try atom.lazy_bindings.append(gpa, .{ + .target = global, .offset = 0, }); + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + try self.allocateAtomCommon(atom, .{ + .seg = self.data_segment_cmd_index.?, + .sect = self.la_symbol_ptr_section_index.?, + }); + return atom; } pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { + const gpa = self.base.allocator; const arch = self.base.options.target.cpu.arch; const alignment: u2 = switch (arch) { .x86_64 => 0, @@ -2738,23 +2831,23 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { .aarch64 => 3 * @sizeOf(u32), else => unreachable, // unhandled architecture type }; - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - const atom = try self.createEmptyAtom(local_sym_index, stub_size, alignment); + const atom = try MachO.createEmptyAtom(gpa, sym_index, stub_size, alignment); switch (arch) { .x86_64 => { // jmp atom.code.items[0] = 0xff; atom.code.items[1] = 0x25; - try atom.relocs.append(self.base.allocator, .{ + try atom.relocs.append(gpa, .{ .offset = 2, - .target = .{ .local = laptr_sym_index }, + .target = .{ .sym_index = laptr_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2763,12 +2856,12 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { }); }, .aarch64 => { - try atom.relocs.ensureTotalCapacity(self.base.allocator, 2); + try atom.relocs.ensureTotalCapacity(gpa, 2); // adrp x16, pages mem.writeIntLittle(u32, atom.code.items[0..4], aarch64.Instruction.adrp(.x16, 0).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 0, - .target = .{ .local = laptr_sym_index }, + .target = .{ .sym_index = laptr_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = true, @@ -2783,7 +2876,7 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { ).toU32()); atom.relocs.appendAssumeCapacity(.{ .offset = 4, - .target = .{ .local = laptr_sym_index }, + .target = .{ .sym_index = laptr_sym_index, .file = null }, .addend = 0, .subtractor = null, .pcrel = false, @@ -2795,101 +2888,179 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { }, else => unreachable, } + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + + try self.allocateAtomCommon(atom, .{ + .seg = self.text_segment_cmd_index.?, + .sect = self.stubs_section_index.?, + }); + return atom; } fn createTentativeDefAtoms(self: *MachO) !void { - if (self.tentatives.count() == 0) return; - // Convert any tentative definition into a regular symbol and allocate - // text blocks for each tentative definition. - while (self.tentatives.popOrNull()) |entry| { + const gpa = self.base.allocator; + + for (self.globals.values()) |global| { + const sym = self.getSymbolPtr(global); + if (!sym.tentative()) continue; + + log.debug("creating tentative definition for ATOM(%{d}, '{s}') in object({d})", .{ + global.sym_index, self.getSymbolName(global), global.file, + }); + + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative definition. const match = MatchingSection{ .seg = self.data_segment_cmd_index.?, .sect = self.bss_section_index.?, }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); + _ = try self.section_ordinals.getOrPut(gpa, match); - const global_sym = &self.globals.items[entry.key]; - const size = global_sym.n_value; - const alignment = (global_sym.n_desc >> 8) & 0x0f; + const size = sym.n_value; + const alignment = (sym.n_desc >> 8) & 0x0f; - global_sym.n_value = 0; - global_sym.n_desc = 0; - global_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - - const local_sym_index = @intCast(u32, self.locals.items.len); - const local_sym = try self.locals.addOne(self.base.allocator); - local_sym.* = .{ - .n_strx = global_sym.n_strx, - .n_type = macho.N_SECT, - .n_sect = global_sym.n_sect, + sym.* = .{ + .n_strx = sym.n_strx, + .n_type = macho.N_SECT | macho.N_EXT, + .n_sect = 0, .n_desc = 0, .n_value = 0, }; - const resolv = self.symbol_resolver.getPtr(local_sym.n_strx) orelse unreachable; - resolv.local_sym_index = local_sym_index; + const atom = try MachO.createEmptyAtom(gpa, global.sym_index, size, alignment); + atom.file = global.file; - const atom = try self.createEmptyAtom(local_sym_index, size, alignment); + try self.allocateAtomCommon(atom, match); - if (self.needs_prealloc) { - const alignment_pow_2 = try math.powi(u32, 2, alignment); - const vaddr = try self.allocateAtom(atom, size, alignment_pow_2, match); - local_sym.n_value = vaddr; - global_sym.n_value = vaddr; - } else try self.addAtomToSection(atom, match); + if (global.file) |file| { + const object = &self.objects.items[file]; + try object.managed_atoms.append(gpa, atom); + try object.atom_by_index_table.putNoClobber(gpa, global.sym_index, atom); + } else { + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, global.sym_index, atom); + } } } -fn createDsoHandleSymbol(self: *MachO) !void { - if (self.dso_handle_sym_index != null) return; - - const n_strx = self.strtab_dir.getKeyAdapted(@as([]const u8, "___dso_handle"), StringIndexAdapter{ - .bytes = &self.strtab, - }) orelse return; - - const resolv = self.symbol_resolver.getPtr(n_strx) orelse return; - if (resolv.where != .undef) return; +fn createMhExecuteHeaderSymbol(self: *MachO) !void { + if (self.base.options.output_mode != .Exe) return; + if (self.globals.get("__mh_execute_header")) |global| { + const sym = self.getSymbol(global); + if (!sym.undf() and !(sym.pext() or sym.weakDef())) return; + } - const undef = &self.undefs.items[resolv.where_index]; - const local_sym_index = @intCast(u32, self.locals.items.len); - var nlist = macho.nlist_64{ - .n_strx = undef.n_strx, - .n_type = macho.N_SECT, + const gpa = self.base.allocator; + const n_strx = try self.strtab.insert(gpa, "__mh_execute_header"); + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ + .n_strx = n_strx, + .n_type = macho.N_SECT | macho.N_EXT, .n_sect = 0, - .n_desc = 0, + .n_desc = macho.REFERENCED_DYNAMICALLY, .n_value = 0, - }; - try self.locals.append(self.base.allocator, nlist); - const global_sym_index = @intCast(u32, self.globals.items.len); - nlist.n_type |= macho.N_EXT; - nlist.n_desc = macho.N_WEAK_DEF; - try self.globals.append(self.base.allocator, nlist); - self.dso_handle_sym_index = local_sym_index; + }); - assert(self.unresolved.swapRemove(resolv.where_index)); + const name = try gpa.dupe(u8, "__mh_execute_header"); + const gop = try self.globals.getOrPut(gpa, name); + defer if (gop.found_existing) gpa.free(name); + gop.value_ptr.* = .{ + .sym_index = sym_index, + .file = null, + }; +} - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, +fn createDsoHandleSymbol(self: *MachO) !void { + const global = self.globals.getPtr("___dso_handle") orelse return; + const sym = self.getSymbolPtr(global.*); + if (!sym.undf()) return; + + const gpa = self.base.allocator; + const n_strx = try self.strtab.insert(gpa, "___dso_handle"); + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ + .n_strx = n_strx, + .n_type = macho.N_SECT | macho.N_EXT, .n_sect = 0, - .n_desc = 0, + .n_desc = macho.N_WEAK_DEF, .n_value = 0, + }); + global.* = .{ + .sym_index = sym_index, + .file = null, }; - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - }; + _ = self.unresolved.swapRemove(@intCast(u32, self.globals.getIndex("___dso_handle").?)); } -fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { - const object = &self.objects.items[object_id]; +fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { + const gpa = self.base.allocator; + const sym = self.getSymbol(current); + const sym_name = self.getSymbolName(current); + + const name = try gpa.dupe(u8, sym_name); + const global_index = @intCast(u32, self.globals.values().len); + const gop = try self.globals.getOrPut(gpa, name); + defer if (gop.found_existing) gpa.free(name); + + if (!gop.found_existing) { + gop.value_ptr.* = current; + if (sym.undf() and !sym.tentative()) { + try self.unresolved.putNoClobber(gpa, global_index, false); + } + return; + } + + const global = gop.value_ptr.*; + const global_sym = self.getSymbol(global); + + // Cases to consider: sym vs global_sym + // 1. strong(sym) and strong(global_sym) => error + // 2. strong(sym) and weak(global_sym) => sym + // 3. strong(sym) and tentative(global_sym) => sym + // 4. strong(sym) and undf(global_sym) => sym + // 5. weak(sym) and strong(global_sym) => global_sym + // 6. weak(sym) and tentative(global_sym) => sym + // 7. weak(sym) and undf(global_sym) => sym + // 8. tentative(sym) and strong(global_sym) => global_sym + // 9. tentative(sym) and weak(global_sym) => global_sym + // 10. tentative(sym) and tentative(global_sym) => pick larger + // 11. tentative(sym) and undf(global_sym) => sym + // 12. undf(sym) and * => global_sym + // + // Reduces to: + // 1. strong(sym) and strong(global_sym) => error + // 2. * and strong(global_sym) => global_sym + // 3. weak(sym) and weak(global_sym) => global_sym + // 4. tentative(sym) and tentative(global_sym) => pick larger + // 5. undf(sym) and * => global_sym + // 6. else => sym + + const sym_is_strong = sym.sect() and !(sym.weakDef() or sym.pext()); + const global_is_strong = global_sym.sect() and !(global_sym.weakDef() or global_sym.pext()); + const sym_is_weak = sym.sect() and (sym.weakDef() or sym.pext()); + const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); + + if (sym_is_strong and global_is_strong) return error.MultipleSymbolDefinitions; + if (global_is_strong) return; + if (sym_is_weak and global_is_weak) return; + if (sym.tentative() and global_sym.tentative()) { + if (global_sym.n_value >= sym.n_value) return; + } + if (sym.undf() and !sym.tentative()) return; + + _ = self.unresolved.swapRemove(@intCast(u32, self.globals.getIndex(name).?)); + + gop.value_ptr.* = current; +} +fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { log.debug("resolving symbols in '{s}'", .{object.name}); - for (object.symtab.items) |sym, id| { - const sym_id = @intCast(u32, id); + for (object.symtab.items) |sym, index| { + const sym_index = @intCast(u32, index); const sym_name = object.getString(sym.n_strx); if (sym.stab()) { @@ -2913,170 +3084,27 @@ fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { return error.UnhandledSymbolType; } - if (sym.sect()) { - // Defined symbol regardless of scope lands in the locals symbol table. - const local_sym_index = @intCast(u32, self.locals.items.len); - try self.locals.append(self.base.allocator, .{ - .n_strx = if (symbolIsTemp(sym, sym_name)) 0 else try self.makeString(sym_name), - .n_type = macho.N_SECT, - .n_sect = 0, - .n_desc = 0, - .n_value = sym.n_value, - }); - try object.symbol_mapping.putNoClobber(self.base.allocator, sym_id, local_sym_index); - try object.reverse_symbol_mapping.putNoClobber(self.base.allocator, local_sym_index, sym_id); - - // If the symbol's scope is not local aka translation unit, then we need work out - // if we should save the symbol as a global, or potentially flag the error. - if (!sym.ext()) continue; - - const n_strx = try self.makeString(sym_name); - const local = self.locals.items[local_sym_index]; - const resolv = self.symbol_resolver.getPtr(n_strx) orelse { - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - .file = object_id, - }); - continue; - }; - - switch (resolv.where) { - .global => { - const global = &self.globals.items[resolv.where_index]; - - if (global.tentative()) { - assert(self.tentatives.swapRemove(resolv.where_index)); - } else if (!(sym.weakDef() or sym.pext()) and !(global.weakDef() or global.pext())) { - log.err("symbol '{s}' defined multiple times", .{sym_name}); - if (resolv.file) |file| { - log.err(" first definition in '{s}'", .{self.objects.items[file].name}); - } - log.err(" next definition in '{s}'", .{object.name}); - return error.MultipleSymbolDefinitions; - } else if (sym.weakDef() or sym.pext()) continue; // Current symbol is weak, so skip it. - - // Otherwise, update the resolver and the global symbol. - global.n_type = sym.n_type; - resolv.local_sym_index = local_sym_index; - resolv.file = object_id; - - continue; - }, - .undef => { - const undef = &self.undefs.items[resolv.where_index]; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - assert(self.unresolved.swapRemove(resolv.where_index)); - }, - } - - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.base.allocator, .{ - .n_strx = local.n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, + if (sym.sect() and !sym.ext()) { + log.debug("symbol '{s}' local to object {s}; skipping...", .{ + sym_name, + object.name, }); - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - .file = object_id, - }; - } else if (sym.tentative()) { - // Symbol is a tentative definition. - const n_strx = try self.makeString(sym_name); - const resolv = self.symbol_resolver.getPtr(n_strx) orelse { - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.base.allocator, .{ - .n_strx = try self.makeString(sym_name), - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .global, - .where_index = global_sym_index, - .file = object_id, - }); - _ = try self.tentatives.getOrPut(self.base.allocator, global_sym_index); - continue; - }; - - switch (resolv.where) { - .global => { - const global = &self.globals.items[resolv.where_index]; - if (!global.tentative()) continue; - if (global.n_value >= sym.n_value) continue; - - global.n_desc = sym.n_desc; - global.n_value = sym.n_value; - resolv.file = object_id; - }, - .undef => { - const undef = &self.undefs.items[resolv.where_index]; - const global_sym_index = @intCast(u32, self.globals.items.len); - try self.globals.append(self.base.allocator, .{ - .n_strx = undef.n_strx, - .n_type = sym.n_type, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = sym.n_value, - }); - _ = try self.tentatives.getOrPut(self.base.allocator, global_sym_index); - assert(self.unresolved.swapRemove(resolv.where_index)); - - resolv.* = .{ - .where = .global, - .where_index = global_sym_index, - .file = object_id, - }; - undef.* = .{ - .n_strx = 0, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - }, - } - } else { - // Symbol is undefined. - const n_strx = try self.makeString(sym_name); - if (self.symbol_resolver.contains(n_strx)) continue; - - const undef_sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.base.allocator, .{ - .n_strx = try self.makeString(sym_name), - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = sym.n_desc, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = undef_sym_index, - .file = object_id, - }); - try self.unresolved.putNoClobber(self.base.allocator, undef_sym_index, .none); + continue; } + + const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = object_id }; + self.resolveGlobalSymbol(sym_loc) catch |err| switch (err) { + error.MultipleSymbolDefinitions => { + const global = self.globals.get(sym_name).?; + log.err("symbol '{s}' defined multiple times", .{sym_name}); + if (global.file) |file| { + log.err(" first definition in '{s}'", .{self.objects.items[file].name}); + } + log.err(" next definition in '{s}'", .{self.objects.items[object_id].name}); + return error.MultipleSymbolDefinitions; + }, + else => |e| return e, + }; } } @@ -3085,8 +3113,8 @@ fn resolveSymbolsInArchives(self: *MachO) !void { var next_sym: usize = 0; loop: while (next_sym < self.unresolved.count()) { - const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; - const sym_name = self.getString(sym.n_strx); + const global = self.globals.values()[self.unresolved.keys()[next_sym]]; + const sym_name = self.getSymbolName(global); for (self.archives.items) |archive| { // Check if the entry exists in a static archive. @@ -3099,7 +3127,7 @@ fn resolveSymbolsInArchives(self: *MachO) !void { const object_id = @intCast(u16, self.objects.items.len); const object = try self.objects.addOne(self.base.allocator); object.* = try archive.parseObject(self.base.allocator, self.base.options.target, offsets.items[0]); - try self.resolveSymbolsInObject(object_id); + try self.resolveSymbolsInObject(object, object_id); continue :loop; } @@ -3113,8 +3141,10 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { var next_sym: usize = 0; loop: while (next_sym < self.unresolved.count()) { - const sym = self.undefs.items[self.unresolved.keys()[next_sym]]; - const sym_name = self.getString(sym.n_strx); + const global_index = self.unresolved.keys()[next_sym]; + const global = self.globals.values()[global_index]; + const sym = self.getSymbolPtr(global); + const sym_name = self.getSymbolName(global); for (self.dylibs.items) |dylib, id| { if (!dylib.symbols.contains(sym_name)) continue; @@ -3126,68 +3156,23 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { } const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; - const resolv = self.symbol_resolver.getPtr(sym.n_strx) orelse unreachable; - const undef = &self.undefs.items[resolv.where_index]; - undef.n_type |= macho.N_EXT; - undef.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; + sym.n_type |= macho.N_EXT; + sym.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; if (dylib.weak) { - undef.n_desc |= macho.N_WEAK_REF; + sym.n_desc |= macho.N_WEAK_REF; } - if (self.unresolved.fetchSwapRemove(resolv.where_index)) |entry| outer_blk: { - switch (entry.value) { - .none => {}, - .got => return error.TODOGotHint, - .stub => { - if (self.stubs_table.contains(sym.n_strx)) break :outer_blk; - const stub_helper_atom = blk: { - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }; - const atom = try self.createStubHelperAtom(); - const atom_sym = &self.locals.items[atom.local_sym_index]; - const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); - atom_sym.n_value = vaddr; - atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - break :blk atom; - }; - const laptr_atom = blk: { - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }; - const atom = try self.createLazyPointerAtom( - stub_helper_atom.local_sym_index, - sym.n_strx, - ); - const atom_sym = &self.locals.items[atom.local_sym_index]; - const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); - atom_sym.n_value = vaddr; - atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - break :blk atom; - }; - const stub_atom = blk: { - const match = MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }; - const atom = try self.createStubAtom(laptr_atom.local_sym_index); - const atom_sym = &self.locals.items[atom.local_sym_index]; - const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, atom.size, alignment, match); - atom_sym.n_value = vaddr; - atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - break :blk atom; - }; - const stub_index = @intCast(u32, self.stubs.items.len); - try self.stubs.append(self.base.allocator, stub_atom); - try self.stubs_table.putNoClobber(self.base.allocator, sym.n_strx, stub_index); - }, - } + if (self.unresolved.fetchSwapRemove(global_index)) |entry| blk: { + if (!entry.value) break :blk; + if (!sym.undf()) break :blk; + if (self.stubs_table.contains(global)) break :blk; + + const stub_index = try self.allocateStubEntry(global); + const stub_helper_atom = try self.createStubHelperAtom(); + const laptr_atom = try self.createLazyPointerAtom(stub_helper_atom.sym_index, global); + const stub_atom = try self.createStubAtom(laptr_atom.sym_index); + self.stubs.items[stub_index].sym_index = stub_atom.sym_index; } continue :loop; @@ -3197,39 +3182,46 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { } } -fn createMhExecuteHeaderSymbol(self: *MachO) !void { - if (self.base.options.output_mode != .Exe) return; - if (self.mh_execute_header_sym_index != null) return; +fn resolveSymbolsAtLoading(self: *MachO) !void { + const is_lib = self.base.options.output_mode == .Lib; + const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; + const allow_undef = is_dyn_lib and (self.base.options.allow_shlib_undefined orelse false); - const n_strx = try self.makeString("__mh_execute_header"); - const local_sym_index = @intCast(u32, self.locals.items.len); - var nlist = macho.nlist_64{ - .n_strx = n_strx, - .n_type = macho.N_SECT, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - try self.locals.append(self.base.allocator, nlist); - self.mh_execute_header_sym_index = local_sym_index; - - if (self.symbol_resolver.getPtr(n_strx)) |resolv| { - const global = &self.globals.items[resolv.where_index]; - if (!(global.weakDef() or !global.pext())) { - log.err("symbol '__mh_execute_header' defined multiple times", .{}); - return error.MultipleSymbolDefinitions; + var next_sym: usize = 0; + while (next_sym < self.unresolved.count()) { + const global_index = self.unresolved.keys()[next_sym]; + const global = self.globals.values()[global_index]; + const sym = self.getSymbolPtr(global); + const sym_name = self.getSymbolName(global); + + if (sym.discarded()) { + sym.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + _ = self.unresolved.swapRemove(global_index); + continue; + } else if (allow_undef) { + const n_desc = @bitCast( + u16, + macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP * @intCast(i16, macho.N_SYMBOL_RESOLVER), + ); + // TODO allow_shlib_undefined is an ELF flag so figure out macOS specific flags too. + sym.n_type = macho.N_EXT; + sym.n_desc = n_desc; + _ = self.unresolved.swapRemove(global_index); + continue; } - resolv.local_sym_index = local_sym_index; - } else { - const global_sym_index = @intCast(u32, self.globals.items.len); - nlist.n_type |= macho.N_EXT; - try self.globals.append(self.base.allocator, nlist); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = local_sym_index, - .file = null, - }); + + log.err("undefined reference to symbol '{s}'", .{sym_name}); + if (global.file) |file| { + log.err(" first referenced in '{s}'", .{self.objects.items[file].name}); + } + + next_sym += 1; } } @@ -3237,21 +3229,20 @@ fn resolveDyldStubBinder(self: *MachO) !void { if (self.dyld_stub_binder_index != null) return; if (self.unresolved.count() == 0) return; // no need for a stub binder if we don't have any imports - const n_strx = try self.makeString("dyld_stub_binder"); - const sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.base.allocator, .{ + const gpa = self.base.allocator; + const n_strx = try self.strtab.insert(gpa, "dyld_stub_binder"); + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ .n_strx = n_strx, .n_type = macho.N_UNDF, .n_sect = 0, .n_desc = 0, .n_value = 0, }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = sym_index, - }); - const sym = &self.undefs.items[sym_index]; - const sym_name = self.getString(n_strx); + const sym_name = try gpa.dupe(u8, "dyld_stub_binder"); + const global = SymbolWithLoc{ .sym_index = sym_index, .file = null }; + try self.globals.putNoClobber(gpa, sym_name, global); + const sym = &self.locals.items[sym_index]; for (self.dylibs.items) |dylib, id| { if (!dylib.symbols.contains(sym_name)) continue; @@ -3276,205 +3267,9 @@ fn resolveDyldStubBinder(self: *MachO) !void { } // Add dyld_stub_binder as the final GOT entry. - const target = Atom.Relocation.Target{ .global = n_strx }; - const atom = try self.createGotAtom(target); - const got_index = @intCast(u32, self.got_entries.items.len); - try self.got_entries.append(self.base.allocator, .{ .target = target, .atom = atom }); - try self.got_entries_table.putNoClobber(self.base.allocator, target, got_index); - const match = MatchingSection{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }; - const atom_sym = &self.locals.items[atom.local_sym_index]; - - if (self.needs_prealloc) { - const vaddr = try self.allocateAtom(atom, @sizeOf(u64), 8, match); - log.debug("allocated {s} atom at 0x{x}", .{ self.getString(sym.n_strx), vaddr }); - atom_sym.n_value = vaddr; - } else { - const seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].segment; - const sect = &seg.sections.items[self.got_section_index.?]; - sect.size += atom.size; - try self.addAtomToSection(atom, match); - } - - atom_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); -} - -fn parseObjectsIntoAtoms(self: *MachO) !void { - // TODO I need to see if I can simplify this logic, or perhaps split it into two functions: - // one for non-prealloc traditional path, and one for incremental prealloc path. - const tracy = trace(@src()); - defer tracy.end(); - - var parsed_atoms = std.AutoArrayHashMap(MatchingSection, *Atom).init(self.base.allocator); - defer parsed_atoms.deinit(); - - var first_atoms = std.AutoArrayHashMap(MatchingSection, *Atom).init(self.base.allocator); - defer first_atoms.deinit(); - - var section_metadata = std.AutoHashMap(MatchingSection, struct { - size: u64, - alignment: u32, - }).init(self.base.allocator); - defer section_metadata.deinit(); - - for (self.objects.items) |*object| { - if (object.analyzed) continue; - - try object.parseIntoAtoms(self.base.allocator, self); - - var it = object.end_atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom = entry.value_ptr.*; - - while (atom.prev) |prev| { - atom = prev; - } - - const first_atom = atom; - - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - const metadata = try section_metadata.getOrPut(match); - if (!metadata.found_existing) { - metadata.value_ptr.* = .{ - .size = sect.size, - .alignment = sect.@"align", - }; - } - - log.debug("{s},{s}", .{ sect.segName(), sect.sectName() }); - - while (true) { - const alignment = try math.powi(u32, 2, atom.alignment); - const curr_size = metadata.value_ptr.size; - const curr_size_aligned = mem.alignForwardGeneric(u64, curr_size, alignment); - metadata.value_ptr.size = curr_size_aligned + atom.size; - metadata.value_ptr.alignment = math.max(metadata.value_ptr.alignment, atom.alignment); - - const sym = self.locals.items[atom.local_sym_index]; - log.debug(" {s}: n_value=0x{x}, size=0x{x}, alignment=0x{x}", .{ - self.getString(sym.n_strx), - sym.n_value, - atom.size, - atom.alignment, - }); - - if (atom.next) |next| { - atom = next; - } else break; - } - - if (parsed_atoms.getPtr(match)) |last| { - last.*.next = first_atom; - first_atom.prev = last.*; - last.* = first_atom; - } - _ = try parsed_atoms.put(match, atom); - - if (!first_atoms.contains(match)) { - try first_atoms.putNoClobber(match, first_atom); - } - } - - object.analyzed = true; - } - - var it = section_metadata.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - const metadata = entry.value_ptr.*; - const seg = &self.load_commands.items[match.seg].segment; - const sect = &seg.sections.items[match.sect]; - log.debug("{s},{s} => size: 0x{x}, alignment: 0x{x}", .{ - sect.segName(), - sect.sectName(), - metadata.size, - metadata.alignment, - }); - - sect.@"align" = math.max(sect.@"align", metadata.alignment); - const needed_size = @intCast(u32, metadata.size); - - if (self.needs_prealloc) { - try self.growSection(match, needed_size); - } - sect.size = needed_size; - } - - for (&[_]?u16{ - self.text_segment_cmd_index, - self.data_const_segment_cmd_index, - self.data_segment_cmd_index, - }) |maybe_seg_id| { - const seg_id = maybe_seg_id orelse continue; - const seg = self.load_commands.items[seg_id].segment; - - for (seg.sections.items) |sect, sect_id| { - const match = MatchingSection{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }; - if (!section_metadata.contains(match)) continue; - - var base_vaddr = if (self.atoms.get(match)) |last| blk: { - const last_atom_sym = self.locals.items[last.local_sym_index]; - break :blk last_atom_sym.n_value + last.size; - } else sect.addr; - - if (self.atoms.getPtr(match)) |last| { - const first_atom = first_atoms.get(match).?; - last.*.next = first_atom; - first_atom.prev = last.*; - last.* = first_atom; - } - _ = try self.atoms.put(self.base.allocator, match, parsed_atoms.get(match).?); - - if (!self.needs_prealloc) continue; - - const n_sect = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); - - var atom = first_atoms.get(match).?; - while (true) { - const alignment = try math.powi(u32, 2, atom.alignment); - base_vaddr = mem.alignForwardGeneric(u64, base_vaddr, alignment); - - const sym = &self.locals.items[atom.local_sym_index]; - sym.n_value = base_vaddr; - sym.n_sect = n_sect; - - log.debug(" {s}: start=0x{x}, end=0x{x}, size=0x{x}, alignment=0x{x}", .{ - self.getString(sym.n_strx), - base_vaddr, - base_vaddr + atom.size, - atom.size, - atom.alignment, - }); - - // Update each alias (if any) - for (atom.aliases.items) |index| { - const alias_sym = &self.locals.items[index]; - alias_sym.n_value = base_vaddr; - alias_sym.n_sect = n_sect; - } - - // Update each symbol contained within the atom - for (atom.contained.items) |sym_at_off| { - const contained_sym = &self.locals.items[sym_at_off.local_sym_index]; - contained_sym.n_value = base_vaddr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } - - base_vaddr += atom.size; - - if (atom.next) |next| { - atom = next; - } else break; - } - } - } + const got_index = try self.allocateGotEntry(global); + const got_atom = try self.createGotAtom(global); + self.got_entries.items[got_index].sym_index = got_atom.sym_index; } fn addLoadDylibLC(self: *MachO, id: u16) !void { @@ -3511,16 +3306,8 @@ fn setEntryPoint(self: *MachO) !void { if (self.base.options.output_mode != .Exe) return; const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const entry_name = self.base.options.entry orelse "_main"; - const n_strx = self.strtab_dir.getKeyAdapted(entry_name, StringIndexAdapter{ - .bytes = &self.strtab, - }) orelse { - log.err("entrypoint '{s}' not found", .{entry_name}); - return error.MissingMainEntrypoint; - }; - const resolv = self.symbol_resolver.get(n_strx) orelse unreachable; - assert(resolv.where == .global); - const sym = self.globals.items[resolv.where_index]; + const global = try self.getEntryPoint(); + const sym = self.getSymbol(global); const ec = &self.load_commands.items[self.main_cmd_index.?].main; ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); ec.stacksize = self.base.options.stack_size_override orelse 0; @@ -3529,76 +3316,77 @@ fn setEntryPoint(self: *MachO) !void { } pub fn deinit(self: *MachO) void { + const gpa = self.base.allocator; + if (build_options.have_llvm) { - if (self.llvm_object) |llvm_object| llvm_object.destroy(self.base.allocator); + if (self.llvm_object) |llvm_object| llvm_object.destroy(gpa); } if (self.d_sym) |*d_sym| { - d_sym.deinit(self.base.allocator); - } - - self.section_ordinals.deinit(self.base.allocator); - self.tlv_ptr_entries.deinit(self.base.allocator); - self.tlv_ptr_entries_free_list.deinit(self.base.allocator); - self.tlv_ptr_entries_table.deinit(self.base.allocator); - self.got_entries.deinit(self.base.allocator); - self.got_entries_free_list.deinit(self.base.allocator); - self.got_entries_table.deinit(self.base.allocator); - self.stubs.deinit(self.base.allocator); - self.stubs_free_list.deinit(self.base.allocator); - self.stubs_table.deinit(self.base.allocator); - self.strtab_dir.deinit(self.base.allocator); - self.strtab.deinit(self.base.allocator); - self.undefs.deinit(self.base.allocator); - self.globals.deinit(self.base.allocator); - self.globals_free_list.deinit(self.base.allocator); - self.locals.deinit(self.base.allocator); - self.locals_free_list.deinit(self.base.allocator); - self.symbol_resolver.deinit(self.base.allocator); - self.unresolved.deinit(self.base.allocator); - self.tentatives.deinit(self.base.allocator); + d_sym.deinit(gpa); + } + + self.section_ordinals.deinit(gpa); + self.tlv_ptr_entries.deinit(gpa); + self.tlv_ptr_entries_free_list.deinit(gpa); + self.tlv_ptr_entries_table.deinit(gpa); + self.got_entries.deinit(gpa); + self.got_entries_free_list.deinit(gpa); + self.got_entries_table.deinit(gpa); + self.stubs.deinit(gpa); + self.stubs_free_list.deinit(gpa); + self.stubs_table.deinit(gpa); + self.strtab.deinit(gpa); + self.locals.deinit(gpa); + self.locals_free_list.deinit(gpa); + self.unresolved.deinit(gpa); + + for (self.globals.keys()) |key| { + gpa.free(key); + } + self.globals.deinit(gpa); for (self.objects.items) |*object| { - object.deinit(self.base.allocator); + object.deinit(gpa); } - self.objects.deinit(self.base.allocator); + self.objects.deinit(gpa); for (self.archives.items) |*archive| { - archive.deinit(self.base.allocator); + archive.deinit(gpa); } - self.archives.deinit(self.base.allocator); + self.archives.deinit(gpa); for (self.dylibs.items) |*dylib| { - dylib.deinit(self.base.allocator); + dylib.deinit(gpa); } - self.dylibs.deinit(self.base.allocator); - self.dylibs_map.deinit(self.base.allocator); - self.referenced_dylibs.deinit(self.base.allocator); + self.dylibs.deinit(gpa); + self.dylibs_map.deinit(gpa); + self.referenced_dylibs.deinit(gpa); for (self.load_commands.items) |*lc| { - lc.deinit(self.base.allocator); + lc.deinit(gpa); } - self.load_commands.deinit(self.base.allocator); + self.load_commands.deinit(gpa); for (self.managed_atoms.items) |atom| { - atom.deinit(self.base.allocator); - self.base.allocator.destroy(atom); + atom.deinit(gpa); + gpa.destroy(atom); } - self.managed_atoms.deinit(self.base.allocator); - self.atoms.deinit(self.base.allocator); + self.managed_atoms.deinit(gpa); + self.atoms.deinit(gpa); { var it = self.atom_free_lists.valueIterator(); while (it.next()) |free_list| { - free_list.deinit(self.base.allocator); + free_list.deinit(gpa); } - self.atom_free_lists.deinit(self.base.allocator); + self.atom_free_lists.deinit(gpa); } if (self.base.options.module) |mod| { for (self.decls.keys()) |decl_index| { const decl = mod.declPtr(decl_index); - decl.link.macho.deinit(self.base.allocator); + decl.link.macho.deinit(gpa); } - self.decls.deinit(self.base.allocator); + self.decls.deinit(gpa); } else { assert(self.decls.count() == 0); } @@ -3606,15 +3394,15 @@ pub fn deinit(self: *MachO) void { { var it = self.unnamed_const_atoms.valueIterator(); while (it.next()) |atoms| { - atoms.deinit(self.base.allocator); + atoms.deinit(gpa); } - self.unnamed_const_atoms.deinit(self.base.allocator); + self.unnamed_const_atoms.deinit(gpa); } - self.atom_by_index_table.deinit(self.base.allocator); + self.atom_by_index_table.deinit(gpa); if (self.code_signature) |*csig| { - csig.deinit(self.base.allocator); + csig.deinit(gpa); } } @@ -3670,7 +3458,7 @@ fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) if (atom.prev) |prev| { prev.next = atom.next; - if (!already_have_free_list_node and prev.freeListEligible(self.*)) { + if (!already_have_free_list_node and prev.freeListEligible(self)) { // The free list is heuristics, it doesn't have to be perfect, so we can ignore // the OOM here. free_list.append(self.base.allocator, prev) catch {}; @@ -3700,14 +3488,14 @@ fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, match: MatchingSec } fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { - const sym = self.locals.items[atom.local_sym_index]; + const sym = atom.getSymbol(self); const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; - const need_realloc = !align_ok or new_atom_size > atom.capacity(self.*); + const need_realloc = !align_ok or new_atom_size > atom.capacity(self); if (!need_realloc) return sym.n_value; return self.allocateAtom(atom, new_atom_size, alignment, match); } -fn allocateLocalSymbol(self: *MachO) !u32 { +fn allocateSymbol(self: *MachO) !u32 { try self.locals.ensureUnusedCapacity(self.base.allocator, 1); const index = blk: { @@ -3733,8 +3521,9 @@ fn allocateLocalSymbol(self: *MachO) !u32 { return index; } -pub fn allocateGotEntry(self: *MachO, target: Atom.Relocation.Target) !u32 { - try self.got_entries.ensureUnusedCapacity(self.base.allocator, 1); +pub fn allocateGotEntry(self: *MachO, target: SymbolWithLoc) !u32 { + const gpa = self.base.allocator; + try self.got_entries.ensureUnusedCapacity(gpa, 1); const index = blk: { if (self.got_entries_free_list.popOrNull()) |index| { @@ -3748,16 +3537,13 @@ pub fn allocateGotEntry(self: *MachO, target: Atom.Relocation.Target) !u32 { } }; - self.got_entries.items[index] = .{ - .target = target, - .atom = undefined, - }; - try self.got_entries_table.putNoClobber(self.base.allocator, target, index); + self.got_entries.items[index] = .{ .target = target, .sym_index = 0 }; + try self.got_entries_table.putNoClobber(gpa, target, index); return index; } -pub fn allocateStubEntry(self: *MachO, n_strx: u32) !u32 { +pub fn allocateStubEntry(self: *MachO, target: SymbolWithLoc) !u32 { try self.stubs.ensureUnusedCapacity(self.base.allocator, 1); const index = blk: { @@ -3772,13 +3558,13 @@ pub fn allocateStubEntry(self: *MachO, n_strx: u32) !u32 { } }; - self.stubs.items[index] = undefined; - try self.stubs_table.putNoClobber(self.base.allocator, n_strx, index); + self.stubs.items[index] = .{ .target = target, .sym_index = 0 }; + try self.stubs_table.putNoClobber(self.base.allocator, target, index); return index; } -pub fn allocateTlvPtrEntry(self: *MachO, target: Atom.Relocation.Target) !u32 { +pub fn allocateTlvPtrEntry(self: *MachO, target: SymbolWithLoc) !u32 { try self.tlv_ptr_entries.ensureUnusedCapacity(self.base.allocator, 1); const index = blk: { @@ -3793,7 +3579,7 @@ pub fn allocateTlvPtrEntry(self: *MachO, target: Atom.Relocation.Target) !u32 { } }; - self.tlv_ptr_entries.items[index] = .{ .target = target, .atom = undefined }; + self.tlv_ptr_entries.items[index] = .{ .target = target, .sym_index = 0 }; try self.tlv_ptr_entries_table.putNoClobber(self.base.allocator, target, index); return index; @@ -3802,16 +3588,11 @@ pub fn allocateTlvPtrEntry(self: *MachO, target: Atom.Relocation.Target) !u32 { pub fn allocateDeclIndexes(self: *MachO, decl_index: Module.Decl.Index) !void { if (self.llvm_object) |_| return; const decl = self.base.options.module.?.declPtr(decl_index); - if (decl.link.macho.local_sym_index != 0) return; + if (decl.link.macho.sym_index != 0) return; - decl.link.macho.local_sym_index = try self.allocateLocalSymbol(); - try self.atom_by_index_table.putNoClobber(self.base.allocator, decl.link.macho.local_sym_index, &decl.link.macho); + decl.link.macho.sym_index = try self.allocateSymbol(); + try self.atom_by_index_table.putNoClobber(self.base.allocator, decl.link.macho.sym_index, &decl.link.macho); try self.decls.putNoClobber(self.base.allocator, decl_index, null); - - const got_target = .{ .local = decl.link.macho.local_sym_index }; - const got_index = try self.allocateGotEntry(got_target); - const got_atom = try self.createGotAtom(got_target); - self.got_entries.items[got_index].atom = got_atom; } pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liveness: Liveness) !void { @@ -3862,14 +3643,14 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv }, } - const symbol = try self.placeDecl(decl_index, decl.link.macho.code.items.len); + const addr = try self.placeDecl(decl_index, decl.link.macho.code.items.len); if (decl_state) |*ds| { try self.d_sym.?.dwarf.commitDeclState( &self.base, module, decl, - symbol.n_value, + addr, decl.link.macho.size, ds, ); @@ -3885,8 +3666,9 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu var code_buffer = std.ArrayList(u8).init(self.base.allocator); defer code_buffer.deinit(); + const gpa = self.base.allocator; const module = self.base.options.module.?; - const gop = try self.unnamed_const_atoms.getOrPut(self.base.allocator, decl_index); + const gop = try self.unnamed_const_atoms.getOrPut(gpa, decl_index); if (!gop.found_existing) { gop.value_ptr.* = .{}; } @@ -3894,25 +3676,32 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu const decl = module.declPtr(decl_index); const decl_name = try decl.getFullyQualifiedName(module); - defer self.base.allocator.free(decl_name); + defer gpa.free(decl_name); const name_str_index = blk: { const index = unnamed_consts.items.len; - const name = try std.fmt.allocPrint(self.base.allocator, "__unnamed_{s}_{d}", .{ decl_name, index }); - defer self.base.allocator.free(name); - break :blk try self.makeString(name); + const name = try std.fmt.allocPrint(gpa, "__unnamed_{s}_{d}", .{ decl_name, index }); + defer gpa.free(name); + break :blk try self.strtab.insert(gpa, name); }; - const name = self.getString(name_str_index); + const name = self.strtab.get(name_str_index); log.debug("allocating symbol indexes for {s}", .{name}); const required_alignment = typed_value.ty.abiAlignment(self.base.options.target); - const local_sym_index = try self.allocateLocalSymbol(); - const atom = try self.createEmptyAtom(local_sym_index, @sizeOf(u64), math.log2(required_alignment)); - try self.atom_by_index_table.putNoClobber(self.base.allocator, local_sym_index, atom); + const sym_index = try self.allocateSymbol(); + const atom = try MachO.createEmptyAtom( + gpa, + sym_index, + @sizeOf(u64), + math.log2(required_alignment), + ); + + try self.managed_atoms.append(gpa, atom); + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); const res = try codegen.generateSymbol(&self.base, decl.srcLoc(), typed_value, &code_buffer, .none, .{ - .parent_atom_index = local_sym_index, + .parent_atom_index = sym_index, }); const code = switch (res) { .externally_managed => |x| x, @@ -3926,7 +3715,7 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu }; atom.code.clearRetainingCapacity(); - try atom.code.appendSlice(self.base.allocator, code); + try atom.code.appendSlice(gpa, code); const match = try self.getMatchingSectionAtom( atom, @@ -3942,18 +3731,18 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu errdefer self.freeAtom(atom, match, true); - const symbol = &self.locals.items[atom.local_sym_index]; + const symbol = atom.getSymbolPtr(self); symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = @intCast(u8, self.section_ordinals.getIndex(match).?) + 1, + .n_sect = self.getSectionOrdinal(match), .n_desc = 0, .n_value = addr, }; - try unnamed_consts.append(self.base.allocator, atom); + try unnamed_consts.append(gpa, atom); - return atom.local_sym_index; + return atom.sym_index; } pub fn updateDecl(self: *MachO, module: *Module, decl_index: Module.Decl.Index) !void { @@ -3995,14 +3784,14 @@ pub fn updateDecl(self: *MachO, module: *Module, decl_index: Module.Decl.Index) }, &code_buffer, .{ .dwarf = ds, }, .{ - .parent_atom_index = decl.link.macho.local_sym_index, + .parent_atom_index = decl.link.macho.sym_index, }) else try codegen.generateSymbol(&self.base, decl.srcLoc(), .{ .ty = decl.ty, .val = decl_val, }, &code_buffer, .none, .{ - .parent_atom_index = decl.link.macho.local_sym_index, + .parent_atom_index = decl.link.macho.sym_index, }); const code = blk: { @@ -4025,14 +3814,14 @@ pub fn updateDecl(self: *MachO, module: *Module, decl_index: Module.Decl.Index) }, } }; - const symbol = try self.placeDecl(decl_index, code.len); + const addr = try self.placeDecl(decl_index, code.len); if (decl_state) |*ds| { try self.d_sym.?.dwarf.commitDeclState( &self.base, module, decl, - symbol.n_value, + addr, decl.link.macho.size, ds, ); @@ -4177,8 +3966,7 @@ fn getMatchingSectionAtom( .@"align" = align_log_2, })).?; }; - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; + const sect = self.getSection(match); log.debug(" allocating atom '{s}' in '{s},{s}' ({d},{d})", .{ name, sect.segName(), @@ -4189,12 +3977,11 @@ fn getMatchingSectionAtom( return match; } -fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*macho.nlist_64 { +fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 { const module = self.base.options.module.?; const decl = module.declPtr(decl_index); const required_alignment = decl.getAlignment(self.base.options.target); - assert(decl.link.macho.local_sym_index != 0); // Caller forgot to call allocateDeclIndexes() - const symbol = &self.locals.items[decl.link.macho.local_sym_index]; + assert(decl.link.macho.sym_index != 0); // Caller forgot to call allocateDeclIndexes() const sym_name = try decl.getFullyQualifiedName(module); defer self.base.allocator.free(sym_name); @@ -4212,7 +3999,8 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac const match = decl_ptr.*.?; if (decl.link.macho.size != 0) { - const capacity = decl.link.macho.capacity(self.*); + const symbol = decl.link.macho.getSymbolPtr(self); + const capacity = decl.link.macho.capacity(self); const need_realloc = code_len > capacity or !mem.isAlignedGeneric(u64, symbol.n_value, required_alignment); if (need_realloc) { @@ -4220,18 +4008,24 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac log.debug("growing {s} and moving from 0x{x} to 0x{x}", .{ sym_name, symbol.n_value, vaddr }); log.debug(" (required alignment 0x{x})", .{required_alignment}); symbol.n_value = vaddr; + + const got_atom = self.getGotAtomForSymbol(.{ + .sym_index = decl.link.macho.sym_index, + .file = null, + }).?; + got_atom.dirty = true; } else if (code_len < decl.link.macho.size) { self.shrinkAtom(&decl.link.macho, code_len, match); } decl.link.macho.size = code_len; decl.link.macho.dirty = true; - symbol.n_strx = try self.makeString(sym_name); + symbol.n_strx = try self.strtab.insert(self.base.allocator, sym_name); symbol.n_type = macho.N_SECT; symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; symbol.n_desc = 0; } else { - const name_str_index = try self.makeString(sym_name); + const name_str_index = try self.strtab.insert(self.base.allocator, sym_name); const addr = try self.allocateAtom(&decl.link.macho, code_len, required_alignment, match); log.debug("allocated atom for {s} at 0x{x}", .{ sym_name, addr }); @@ -4239,28 +4033,22 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !*mac errdefer self.freeAtom(&decl.link.macho, match, false); + const symbol = decl.link.macho.getSymbolPtr(self); symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = @intCast(u8, self.section_ordinals.getIndex(match).?) + 1, + .n_sect = self.getSectionOrdinal(match), .n_desc = 0, .n_value = addr, }; - const got_index = self.got_entries_table.get(.{ .local = decl.link.macho.local_sym_index }).?; - const got_atom = self.got_entries.items[got_index].atom; - const got_sym = &self.locals.items[got_atom.local_sym_index]; - const vaddr = try self.allocateAtom(got_atom, @sizeOf(u64), 8, .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }); - got_sym.n_value = vaddr; - got_sym.n_sect = @intCast(u8, self.section_ordinals.getIndex(.{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }).? + 1); + + const got_target = SymbolWithLoc{ .sym_index = decl.link.macho.sym_index, .file = null }; + const got_index = try self.allocateGotEntry(got_target); + const got_atom = try self.createGotAtom(got_target); + self.got_entries.items[got_index].sym_index = got_atom.sym_index; } - return symbol; + return decl.link.macho.getSymbol(self).n_value; } pub fn updateDeclLineNumber(self: *MachO, module: *Module, decl: *const Module.Decl) !void { @@ -4280,19 +4068,23 @@ pub fn updateDeclExports( @panic("Attempted to compile for object format that was disabled by build configuration"); } if (build_options.have_llvm) { - if (self.llvm_object) |llvm_object| return llvm_object.updateDeclExports(module, decl_index, exports); + if (self.llvm_object) |llvm_object| + return llvm_object.updateDeclExports(module, decl_index, exports); } const tracy = trace(@src()); defer tracy.end(); - try self.globals.ensureUnusedCapacity(self.base.allocator, exports.len); + const gpa = self.base.allocator; + const decl = module.declPtr(decl_index); - if (decl.link.macho.local_sym_index == 0) return; - const decl_sym = &self.locals.items[decl.link.macho.local_sym_index]; + if (decl.link.macho.sym_index == 0) return; + const decl_sym = decl.link.macho.getSymbol(self); for (exports) |exp| { - const exp_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{exp.options.name}); - defer self.base.allocator.free(exp_name); + const exp_name = try std.fmt.allocPrint(gpa, "_{s}", .{exp.options.name}); + defer gpa.free(exp_name); + + log.debug("adding new export '{s}'", .{exp_name}); if (exp.options.section) |section_name| { if (!mem.eql(u8, section_name, "__text")) { @@ -4300,7 +4092,7 @@ pub fn updateDeclExports( module.gpa, exp, try Module.ErrorMsg.create( - self.base.allocator, + gpa, decl.srcLoc(), "Unimplemented: ExportOptions.section", .{}, @@ -4315,7 +4107,7 @@ pub fn updateDeclExports( module.gpa, exp, try Module.ErrorMsg.create( - self.base.allocator, + gpa, decl.srcLoc(), "Unimplemented: GlobalLinkage.LinkOnce", .{}, @@ -4324,103 +4116,85 @@ pub fn updateDeclExports( continue; } - const is_weak = exp.options.linkage == .Internal or exp.options.linkage == .Weak; - const n_strx = try self.makeString(exp_name); - if (self.symbol_resolver.getPtr(n_strx)) |resolv| { - switch (resolv.where) { - .global => { - if (resolv.local_sym_index == decl.link.macho.local_sym_index) continue; - - const sym = &self.globals.items[resolv.where_index]; - - if (sym.tentative()) { - assert(self.tentatives.swapRemove(resolv.where_index)); - } else if (!is_weak and !(sym.weakDef() or sym.pext())) { - _ = try module.failed_exports.put( - module.gpa, - exp, - try Module.ErrorMsg.create( - self.base.allocator, - decl.srcLoc(), - \\LinkError: symbol '{s}' defined multiple times - \\ first definition in '{s}' - , - .{ exp_name, self.objects.items[resolv.file.?].name }, - ), - ); - continue; - } else if (is_weak) continue; // Current symbol is weak, so skip it. - - // Otherwise, update the resolver and the global symbol. - sym.n_type = macho.N_SECT | macho.N_EXT; - resolv.local_sym_index = decl.link.macho.local_sym_index; - resolv.file = null; - exp.link.macho.sym_index = resolv.where_index; - - continue; - }, - .undef => { - assert(self.unresolved.swapRemove(resolv.where_index)); - _ = self.symbol_resolver.remove(n_strx); - }, - } - } - - var n_type: u8 = macho.N_SECT | macho.N_EXT; - var n_desc: u16 = 0; + const sym_index = exp.link.macho.sym_index orelse blk: { + const sym_index = try self.allocateSymbol(); + exp.link.macho.sym_index = sym_index; + break :blk sym_index; + }; + const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null }; + const sym = self.getSymbolPtr(sym_loc); + sym.* = .{ + .n_strx = try self.strtab.insert(gpa, exp_name), + .n_type = macho.N_SECT | macho.N_EXT, + .n_sect = self.getSectionOrdinal(.{ + .seg = self.text_segment_cmd_index.?, + .sect = self.text_section_index.?, // TODO what if we export a variable? + }), + .n_desc = 0, + .n_value = decl_sym.n_value, + }; switch (exp.options.linkage) { .Internal => { // Symbol should be hidden, or in MachO lingo, private extern. // We should also mark the symbol as Weak: n_desc == N_WEAK_DEF. - // TODO work out when to add N_WEAK_REF. - n_type |= macho.N_PEXT; - n_desc |= macho.N_WEAK_DEF; + sym.n_type |= macho.N_PEXT; + sym.n_desc |= macho.N_WEAK_DEF; }, .Strong => {}, .Weak => { // Weak linkage is specified as part of n_desc field. // Symbol's n_type is like for a symbol with strong linkage. - n_desc |= macho.N_WEAK_DEF; + sym.n_desc |= macho.N_WEAK_DEF; }, else => unreachable, } - const global_sym_index = if (exp.link.macho.sym_index) |i| i else blk: { - const i = if (self.globals_free_list.popOrNull()) |i| i else inner: { - _ = self.globals.addOneAssumeCapacity(); - break :inner @intCast(u32, self.globals.items.len - 1); - }; - break :blk i; - }; - const sym = &self.globals.items[global_sym_index]; - sym.* = .{ - .n_strx = try self.makeString(exp_name), - .n_type = n_type, - .n_sect = @intCast(u8, self.text_section_index.?) + 1, - .n_desc = n_desc, - .n_value = decl_sym.n_value, + self.resolveGlobalSymbol(sym_loc) catch |err| switch (err) { + error.MultipleSymbolDefinitions => { + const global = self.globals.get(exp_name).?; + if (sym_loc.sym_index != global.sym_index and global.file != null) { + _ = try module.failed_exports.put(module.gpa, exp, try Module.ErrorMsg.create( + gpa, + decl.srcLoc(), + \\LinkError: symbol '{s}' defined multiple times + \\ first definition in '{s}' + , + .{ exp_name, self.objects.items[global.file.?].name }, + )); + } + }, + else => |e| return e, }; - exp.link.macho.sym_index = global_sym_index; - - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .global, - .where_index = global_sym_index, - .local_sym_index = decl.link.macho.local_sym_index, - }); } } pub fn deleteExport(self: *MachO, exp: Export) void { if (self.llvm_object) |_| return; const sym_index = exp.sym_index orelse return; - self.globals_free_list.append(self.base.allocator, sym_index) catch {}; - const global = &self.globals.items[sym_index]; - log.debug("deleting export '{s}': {}", .{ self.getString(global.n_strx), global }); - assert(self.symbol_resolver.remove(global.n_strx)); - global.n_type = 0; - global.n_strx = 0; - global.n_value = 0; + + const gpa = self.base.allocator; + + const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = null }; + const sym = self.getSymbolPtr(sym_loc); + const sym_name = self.getSymbolName(sym_loc); + log.debug("deleting export '{s}'", .{sym_name}); + assert(sym.sect() and sym.ext()); + sym.* = .{ + .n_strx = 0, + .n_type = 0, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + self.locals_free_list.append(gpa, sym_index) catch {}; + + if (self.globals.get(sym_name)) |global| blk: { + if (global.sym_index != sym_index) break :blk; + if (global.file != null) break :blk; + const kv = self.globals.fetchSwapRemove(sym_name); + gpa.free(kv.?.key); + } } fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { @@ -4430,11 +4204,11 @@ fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { .seg = self.text_segment_cmd_index.?, .sect = self.text_const_section_index.?, }, true); - self.locals_free_list.append(self.base.allocator, atom.local_sym_index) catch {}; - self.locals.items[atom.local_sym_index].n_type = 0; - _ = self.atom_by_index_table.remove(atom.local_sym_index); - log.debug(" adding local symbol index {d} to free list", .{atom.local_sym_index}); - atom.local_sym_index = 0; + self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; + self.locals.items[atom.sym_index].n_type = 0; + _ = self.atom_by_index_table.remove(atom.sym_index); + log.debug(" adding local symbol index {d} to free list", .{atom.sym_index}); + atom.sym_index = 0; } unnamed_consts.clearAndFree(self.base.allocator); } @@ -4452,29 +4226,33 @@ pub fn freeDecl(self: *MachO, decl_index: Module.Decl.Index) void { self.freeUnnamedConsts(decl_index); } // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. - if (decl.link.macho.local_sym_index != 0) { - self.locals_free_list.append(self.base.allocator, decl.link.macho.local_sym_index) catch {}; + if (decl.link.macho.sym_index != 0) { + self.locals_free_list.append(self.base.allocator, decl.link.macho.sym_index) catch {}; // Try freeing GOT atom if this decl had one - if (self.got_entries_table.get(.{ .local = decl.link.macho.local_sym_index })) |got_index| { + const got_target = SymbolWithLoc{ .sym_index = decl.link.macho.sym_index, .file = null }; + if (self.got_entries_table.get(got_target)) |got_index| { self.got_entries_free_list.append(self.base.allocator, @intCast(u32, got_index)) catch {}; - self.got_entries.items[got_index] = .{ .target = .{ .local = 0 }, .atom = undefined }; - _ = self.got_entries_table.swapRemove(.{ .local = decl.link.macho.local_sym_index }); + self.got_entries.items[got_index] = .{ + .target = .{ .sym_index = 0, .file = null }, + .sym_index = 0, + }; + _ = self.got_entries_table.remove(got_target); if (self.d_sym) |*d_sym| { - d_sym.swapRemoveRelocs(decl.link.macho.local_sym_index); + d_sym.swapRemoveRelocs(decl.link.macho.sym_index); } log.debug(" adding GOT index {d} to free list (target local@{d})", .{ got_index, - decl.link.macho.local_sym_index, + decl.link.macho.sym_index, }); } - self.locals.items[decl.link.macho.local_sym_index].n_type = 0; - _ = self.atom_by_index_table.remove(decl.link.macho.local_sym_index); - log.debug(" adding local symbol index {d} to free list", .{decl.link.macho.local_sym_index}); - decl.link.macho.local_sym_index = 0; + self.locals.items[decl.link.macho.sym_index].n_type = 0; + _ = self.atom_by_index_table.remove(decl.link.macho.sym_index); + log.debug(" adding local symbol index {d} to free list", .{decl.link.macho.sym_index}); + decl.link.macho.sym_index = 0; } if (self.d_sym) |*d_sym| { d_sym.dwarf.freeDecl(decl); @@ -4486,12 +4264,12 @@ pub fn getDeclVAddr(self: *MachO, decl_index: Module.Decl.Index, reloc_info: Fil const decl = mod.declPtr(decl_index); assert(self.llvm_object == null); - assert(decl.link.macho.local_sym_index != 0); + assert(decl.link.macho.sym_index != 0); const atom = self.atom_by_index_table.get(reloc_info.parent_atom_index).?; try atom.relocs.append(self.base.allocator, .{ .offset = @intCast(u32, reloc_info.offset), - .target = .{ .local = decl.link.macho.local_sym_index }, + .target = .{ .sym_index = decl.link.macho.sym_index, .file = null }, .addend = reloc_info.addend, .subtractor = null, .pcrel = false, @@ -4534,7 +4312,7 @@ fn populateMissingMetadata(self: *MachO) !void { if (self.text_segment_cmd_index == null) { self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const needed_size = if (self.needs_prealloc) blk: { + const needed_size = if (self.mode == .incremental) blk: { const headerpad_size = @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size); const program_code_size_hint = self.base.options.program_code_size_hint; const got_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; @@ -4565,7 +4343,7 @@ fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 2, else => unreachable, // unhandled architecture type }; - const needed_size = if (self.needs_prealloc) self.base.options.program_code_size_hint else 0; + const needed_size = if (self.mode == .incremental) self.base.options.program_code_size_hint else 0; self.text_section_index = try self.initSection( self.text_segment_cmd_index.?, "__text", @@ -4588,7 +4366,7 @@ fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 3 * @sizeOf(u32), else => unreachable, // unhandled architecture type }; - const needed_size = if (self.needs_prealloc) stub_size * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) stub_size * self.base.options.symbol_count_hint else 0; self.stubs_section_index = try self.initSection( self.text_segment_cmd_index.?, "__stubs", @@ -4617,7 +4395,7 @@ fn populateMissingMetadata(self: *MachO) !void { .aarch64 => 3 * @sizeOf(u32), else => unreachable, }; - const needed_size = if (self.needs_prealloc) + const needed_size = if (self.mode == .incremental) stub_size * self.base.options.symbol_count_hint + preamble_size else 0; @@ -4637,7 +4415,7 @@ fn populateMissingMetadata(self: *MachO) !void { var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; - if (self.needs_prealloc) { + if (self.mode == .incremental) { const base = self.getSegmentAllocBase(&.{self.text_segment_cmd_index.?}); vmaddr = base.vmaddr; fileoff = base.fileoff; @@ -4666,7 +4444,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.got_section_index == null) { - const needed_size = if (self.needs_prealloc) + const needed_size = if (self.mode == .incremental) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; @@ -4687,7 +4465,7 @@ fn populateMissingMetadata(self: *MachO) !void { var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; - if (self.needs_prealloc) { + if (self.mode == .incremental) { const base = self.getSegmentAllocBase(&.{self.data_const_segment_cmd_index.?}); vmaddr = base.vmaddr; fileoff = base.fileoff; @@ -4716,7 +4494,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.la_symbol_ptr_section_index == null) { - const needed_size = if (self.needs_prealloc) + const needed_size = if (self.mode == .incremental) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; @@ -4733,7 +4511,10 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.data_section_index == null) { - const needed_size = if (self.needs_prealloc) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) + @sizeOf(u64) * self.base.options.symbol_count_hint + else + 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.data_section_index = try self.initSection( self.data_segment_cmd_index.?, @@ -4745,7 +4526,10 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.tlv_section_index == null) { - const needed_size = if (self.needs_prealloc) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) + @sizeOf(u64) * self.base.options.symbol_count_hint + else + 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.tlv_section_index = try self.initSection( self.data_segment_cmd_index.?, @@ -4759,7 +4543,10 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.tlv_data_section_index == null) { - const needed_size = if (self.needs_prealloc) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) + @sizeOf(u64) * self.base.options.symbol_count_hint + else + 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.tlv_data_section_index = try self.initSection( self.data_segment_cmd_index.?, @@ -4773,7 +4560,10 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.tlv_bss_section_index == null) { - const needed_size = if (self.needs_prealloc) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) + @sizeOf(u64) * self.base.options.symbol_count_hint + else + 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.tlv_bss_section_index = try self.initSection( self.data_segment_cmd_index.?, @@ -4787,7 +4577,10 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.bss_section_index == null) { - const needed_size = if (self.needs_prealloc) @sizeOf(u64) * self.base.options.symbol_count_hint else 0; + const needed_size = if (self.mode == .incremental) + @sizeOf(u64) * self.base.options.symbol_count_hint + else + 0; const alignment: u16 = 3; // 2^3 = @sizeOf(u64) self.bss_section_index = try self.initSection( self.data_segment_cmd_index.?, @@ -4804,7 +4597,7 @@ fn populateMissingMetadata(self: *MachO) !void { self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; - if (self.needs_prealloc) { + if (self.mode == .incremental) { const base = self.getSegmentAllocBase(&.{self.data_segment_cmd_index.?}); vmaddr = base.vmaddr; fileoff = base.fileoff; @@ -5028,8 +4821,6 @@ fn populateMissingMetadata(self: *MachO) !void { }); self.load_commands_dirty = true; } - - self.cold_start = true; } fn calcMinHeaderpad(self: *MachO) u64 { @@ -5130,7 +4921,7 @@ fn allocateSegment(self: *MachO, maybe_index: ?u16, indices: []const ?u16, init_ // Allocate the sections according to their alignment at the beginning of the segment. var start = init_size; - for (seg.sections.items) |*sect, sect_id| { + for (seg.sections.items) |*sect| { const is_zerofill = sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL; const use_llvm = build_options.have_llvm and self.base.options.use_llvm; const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; @@ -5138,32 +4929,12 @@ fn allocateSegment(self: *MachO, maybe_index: ?u16, indices: []const ?u16, init_ const start_aligned = mem.alignForwardGeneric(u64, start, alignment); // TODO handle zerofill sections in stage2 - sect.offset = if (is_zerofill and (use_stage1 or use_llvm)) 0 else @intCast(u32, seg.inner.fileoff + start_aligned); + sect.offset = if (is_zerofill and (use_stage1 or use_llvm)) + 0 + else + @intCast(u32, seg.inner.fileoff + start_aligned); sect.addr = seg.inner.vmaddr + start_aligned; - // Recalculate section size given the allocated start address - sect.size = if (self.atoms.get(.{ - .seg = index, - .sect = @intCast(u16, sect_id), - })) |last_atom| blk: { - var atom = last_atom; - while (atom.prev) |prev| { - atom = prev; - } - - var base_addr = sect.addr; - - while (true) { - const atom_alignment = try math.powi(u32, 2, atom.alignment); - base_addr = mem.alignForwardGeneric(u64, base_addr, atom_alignment) + atom.size; - if (atom.next) |next| { - atom = next; - } else break; - } - - break :blk base_addr - sect.addr; - } else 0; - start = start_aligned + sect.size; if (!(is_zerofill and (use_stage1 or use_llvm))) { @@ -5194,14 +4965,14 @@ fn initSection( var sect = macho.section_64{ .sectname = makeStaticString(sectname), .segname = seg.inner.segname, - .size = if (self.needs_prealloc) @intCast(u32, size) else 0, + .size = if (self.mode == .incremental) @intCast(u32, size) else 0, .@"align" = alignment, .flags = opts.flags, .reserved1 = opts.reserved1, .reserved2 = opts.reserved2, }; - if (self.needs_prealloc) { + if (self.mode == .incremental) { const alignment_pow_2 = try math.powi(u32, 2, alignment); const padding: ?u32 = if (segment_id == self.text_segment_cmd_index.?) @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size) @@ -5419,12 +5190,30 @@ fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u3 return max_alignment; } -fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { +fn allocateAtomCommon(self: *MachO, atom: *Atom, match: MatchingSection) !void { + const sym = atom.getSymbolPtr(self); + if (self.mode == .incremental) { + const size = atom.size; + const alignment = try math.powi(u32, 2, atom.alignment); + const vaddr = try self.allocateAtom(atom, size, alignment, match); + const sym_name = atom.getName(self); + log.debug("allocated {s} atom at 0x{x}", .{ sym_name, vaddr }); + sym.n_value = vaddr; + } else try self.addAtomToSection(atom, match); + sym.n_sect = self.getSectionOrdinal(match); +} + +fn allocateAtom( + self: *MachO, + atom: *Atom, + new_atom_size: u64, + alignment: u64, + match: MatchingSection, +) !u64 { const tracy = trace(@src()); defer tracy.end(); - const seg = &self.load_commands.items[match.seg].segment; - const sect = &seg.sections.items[match.sect]; + const sect = self.getSectionPtr(match); var free_list = self.atom_free_lists.get(match).?; const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; const new_atom_ideal_capacity = if (needs_padding) padToIdeal(new_atom_size) else new_atom_size; @@ -5445,8 +5234,8 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, m const big_atom = free_list.items[i]; // We now have a pointer to a live atom that has too much capacity. // Is it enough that we could fit this new atom? - const sym = self.locals.items[big_atom.local_sym_index]; - const capacity = big_atom.capacity(self.*); + const sym = big_atom.getSymbol(self); + const capacity = big_atom.capacity(self); const ideal_capacity = if (needs_padding) padToIdeal(capacity) else capacity; const ideal_capacity_end_vaddr = math.add(u64, sym.n_value, ideal_capacity) catch ideal_capacity; const capacity_end_vaddr = sym.n_value + capacity; @@ -5456,7 +5245,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, m // Additional bookkeeping here to notice if this free list node // should be deleted because the atom that it points to has grown to take up // more of the extra capacity. - if (!big_atom.freeListEligible(self.*)) { + if (!big_atom.freeListEligible(self)) { _ = free_list.swapRemove(i); } else { i += 1; @@ -5476,7 +5265,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, m } break :blk new_start_vaddr; } else if (self.atoms.get(match)) |last| { - const last_symbol = self.locals.items[last.local_sym_index]; + const last_symbol = last.getSymbol(self); const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment); @@ -5525,7 +5314,7 @@ fn allocateAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, m return vaddr; } -fn addAtomToSection(self: *MachO, atom: *Atom, match: MatchingSection) !void { +pub fn addAtomToSection(self: *MachO, atom: *Atom, match: MatchingSection) !void { if (self.atoms.getPtr(match)) |last| { last.*.next = atom; atom.prev = last.*; @@ -5533,34 +5322,42 @@ fn addAtomToSection(self: *MachO, atom: *Atom, match: MatchingSection) !void { } else { try self.atoms.putNoClobber(self.base.allocator, match, atom); } - const seg = &self.load_commands.items[match.seg].segment; - const sect = &seg.sections.items[match.sect]; - sect.size += atom.size; + const sect = self.getSectionPtr(match); + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); + const padding = aligned_end_addr - sect.size; + sect.size += padding + atom.size; + sect.@"align" = @maximum(sect.@"align", atom.alignment); } pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { - const sym_name = try std.fmt.allocPrint(self.base.allocator, "_{s}", .{name}); - defer self.base.allocator.free(sym_name); - const n_strx = try self.makeString(sym_name); - - if (!self.symbol_resolver.contains(n_strx)) { - log.debug("adding new extern function '{s}'", .{sym_name}); - const sym_index = @intCast(u32, self.undefs.items.len); - try self.undefs.append(self.base.allocator, .{ - .n_strx = n_strx, - .n_type = macho.N_UNDF, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try self.symbol_resolver.putNoClobber(self.base.allocator, n_strx, .{ - .where = .undef, - .where_index = sym_index, - }); - try self.unresolved.putNoClobber(self.base.allocator, sym_index, .stub); + const gpa = self.base.allocator; + const sym_name = try std.fmt.allocPrint(gpa, "_{s}", .{name}); + const global_index = @intCast(u32, self.globals.values().len); + const gop = try self.globals.getOrPut(gpa, sym_name); + defer if (gop.found_existing) gpa.free(sym_name); + + if (gop.found_existing) { + // TODO audit this: can we ever reference anything from outside the Zig module? + assert(gop.value_ptr.file == null); + return gop.value_ptr.sym_index; } - return n_strx; + const sym_index = @intCast(u32, self.locals.items.len); + try self.locals.append(gpa, .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + gop.value_ptr.* = .{ + .sym_index = sym_index, + .file = null, + }; + try self.unresolved.putNoClobber(gpa, global_index, true); + + return sym_index; } fn getSegmentAllocBase(self: MachO, indices: []const ?u16) struct { vmaddr: u64, fileoff: u64 } { @@ -5588,7 +5385,7 @@ fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []* for (indices) |maybe_index| { const old_idx = maybe_index.* orelse continue; - const sect = sections[old_idx]; + const sect = §ions[old_idx]; if (sect.size == 0) { log.debug("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); maybe_index.* = null; @@ -5596,7 +5393,7 @@ fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []* seg.inner.nsects -= 1; } else { maybe_index.* = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sect); + seg.sections.appendAssumeCapacity(sect.*); } try mapping.putNoClobber(old_idx, maybe_index.*); } @@ -5711,7 +5508,11 @@ fn updateSectionOrdinals(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - var ordinal_remap = std.AutoHashMap(u8, u8).init(self.base.allocator); + log.debug("updating section ordinals", .{}); + + const gpa = self.base.allocator; + + var ordinal_remap = std.AutoHashMap(u8, u8).init(gpa); defer ordinal_remap.deinit(); var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; @@ -5723,27 +5524,40 @@ fn updateSectionOrdinals(self: *MachO) !void { }) |maybe_index| { const index = maybe_index orelse continue; const seg = self.load_commands.items[index].segment; - for (seg.sections.items) |_, sect_id| { + for (seg.sections.items) |sect, sect_id| { const match = MatchingSection{ .seg = @intCast(u16, index), .sect = @intCast(u16, sect_id), }; - const old_ordinal = @intCast(u8, self.section_ordinals.getIndex(match).? + 1); + const old_ordinal = self.getSectionOrdinal(match); new_ordinal += 1; + log.debug("'{s},{s}': sect({d}, '_,_') => sect({d}, '_,_')", .{ + sect.segName(), + sect.sectName(), + old_ordinal, + new_ordinal, + }); try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); - try ordinals.putNoClobber(self.base.allocator, match, {}); + try ordinals.putNoClobber(gpa, match, {}); } } + // FIXME Jakub + // TODO no need for duping work here; simply walk the atom graph for (self.locals.items) |*sym| { + if (sym.undf()) continue; if (sym.n_sect == 0) continue; sym.n_sect = ordinal_remap.get(sym.n_sect).?; } - for (self.globals.items) |*sym| { - sym.n_sect = ordinal_remap.get(sym.n_sect).?; + for (self.objects.items) |*object| { + for (object.symtab.items) |*sym| { + if (sym.undf()) continue; + if (sym.n_sect == 0) continue; + sym.n_sect = ordinal_remap.get(sym.n_sect).?; + } } - self.section_ordinals.deinit(self.base.allocator); + self.section_ordinals.deinit(gpa); self.section_ordinals = ordinals; } @@ -5751,11 +5565,13 @@ fn writeDyldInfoData(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - var rebase_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + const gpa = self.base.allocator; + + var rebase_pointers = std.ArrayList(bind.Pointer).init(gpa); defer rebase_pointers.deinit(); - var bind_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + var bind_pointers = std.ArrayList(bind.Pointer).init(gpa); defer bind_pointers.deinit(); - var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(self.base.allocator); + var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); defer lazy_bind_pointers.deinit(); { @@ -5768,13 +5584,17 @@ fn writeDyldInfoData(self: *MachO) !void { if (match.seg == seg) continue; // __TEXT is non-writable } - const seg = self.load_commands.items[match.seg].segment; + const seg = self.getSegment(match); + const sect = self.getSection(match); + log.debug("dyld info for {s},{s}", .{ sect.segName(), sect.sectName() }); while (true) { - const sym = self.locals.items[atom.local_sym_index]; + log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); + const sym = atom.getSymbol(self); const base_offset = sym.n_value - seg.inner.vmaddr; for (atom.rebases.items) |offset| { + log.debug(" | rebase at {x}", .{base_offset + offset}); try rebase_pointers.append(.{ .offset = base_offset + offset, .segment_id = match.seg, @@ -5782,57 +5602,55 @@ fn writeDyldInfoData(self: *MachO) !void { } for (atom.bindings.items) |binding| { - const resolv = self.symbol_resolver.get(binding.n_strx).?; - switch (resolv.where) { - .global => { - // Turn into a rebase. - try rebase_pointers.append(.{ - .offset = base_offset + binding.offset, - .segment_id = match.seg, - }); - }, - .undef => { - const bind_sym = self.undefs.items[resolv.where_index]; - var flags: u4 = 0; - if (bind_sym.weakRef()) { - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER), - .name = self.getString(bind_sym.n_strx), - .bind_flags = flags, - }); - }, + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } + try bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); } for (atom.lazy_bindings.items) |binding| { - const resolv = self.symbol_resolver.get(binding.n_strx).?; - switch (resolv.where) { - .global => { - // Turn into a rebase. - try rebase_pointers.append(.{ - .offset = base_offset + binding.offset, - .segment_id = match.seg, - }); - }, - .undef => { - const bind_sym = self.undefs.items[resolv.where_index]; - var flags: u4 = 0; - if (bind_sym.weakRef()) { - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try lazy_bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER), - .name = self.getString(bind_sym.n_strx), - .bind_flags = flags, - }); - }, + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } + try lazy_bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = match.seg, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); } if (atom.prev) |prev| { @@ -5843,7 +5661,7 @@ fn writeDyldInfoData(self: *MachO) !void { } var trie: Trie = .{}; - defer trie.deinit(self.base.allocator); + defer trie.deinit(gpa); { // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. @@ -5852,19 +5670,40 @@ fn writeDyldInfoData(self: *MachO) !void { const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; const base_address = text_segment.inner.vmaddr; - for (self.globals.items) |sym| { - if (sym.n_type == 0) continue; - const sym_name = self.getString(sym.n_strx); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - - try trie.put(self.base.allocator, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); + if (self.base.options.output_mode == .Exe) { + for (&[_]SymbolWithLoc{ + try self.getEntryPoint(), + self.globals.get("__mh_execute_header").?, + }) |global| { + const sym = self.getSymbol(global); + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + } else { + assert(self.base.options.output_mode == .Lib); + for (self.globals.values()) |global| { + const sym = self.getSymbol(global); + + if (sym.undf()) continue; + if (!sym.ext()) continue; + if (sym.n_desc == N_DESC_GCED) continue; + + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } } - try trie.finalize(self.base.allocator); + try trie.finalize(gpa); } const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; @@ -5909,8 +5748,8 @@ fn writeDyldInfoData(self: *MachO) !void { seg.inner.filesize = dyld_info.export_off + dyld_info.export_size - seg.inner.fileoff; const needed_size = dyld_info.export_off + dyld_info.export_size - dyld_info.rebase_off; - var buffer = try self.base.allocator.alloc(u8, needed_size); - defer self.base.allocator.free(buffer); + var buffer = try gpa.alloc(u8, needed_size); + defer gpa.free(buffer); mem.set(u8, buffer, 0); var stream = std.io.fixedBufferStream(buffer); @@ -5937,10 +5776,12 @@ fn writeDyldInfoData(self: *MachO) !void { try self.populateLazyBindOffsetsInStubHelper( buffer[dyld_info.lazy_bind_off - base_off ..][0..dyld_info.lazy_bind_size], ); + self.load_commands_dirty = true; } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { + const gpa = self.base.allocator; const text_segment_cmd_index = self.text_segment_cmd_index orelse return; const stub_helper_section_index = self.stub_helper_section_index orelse return; const last_atom = self.atoms.get(.{ @@ -5950,7 +5791,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { if (self.stub_helper_preamble_atom == null) return; if (last_atom == self.stub_helper_preamble_atom.?) return; - var table = std.AutoHashMap(i64, *Atom).init(self.base.allocator); + var table = std.AutoHashMap(i64, *Atom).init(gpa); defer table.deinit(); { @@ -5966,7 +5807,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { while (true) { const laptr_off = blk: { - const sym = self.locals.items[laptr_atom.local_sym_index]; + const sym = laptr_atom.getSymbol(self); break :blk @intCast(i64, sym.n_value - base_addr); }; try table.putNoClobber(laptr_off, stub_atom); @@ -5979,7 +5820,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { var stream = std.io.fixedBufferStream(buffer); var reader = stream.reader(); - var offsets = std.ArrayList(struct { sym_offset: i64, offset: u32 }).init(self.base.allocator); + var offsets = std.ArrayList(struct { sym_offset: i64, offset: u32 }).init(gpa); try offsets.append(.{ .sym_offset = undefined, .offset = 0 }); defer offsets.deinit(); var valid_block = false; @@ -6022,10 +5863,10 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } } - const sect = blk: { - const seg = self.load_commands.items[text_segment_cmd_index].segment; - break :blk seg.sections.items[stub_helper_section_index]; - }; + const sect = self.getSection(.{ + .seg = text_segment_cmd_index, + .sect = stub_helper_section_index, + }); const stub_offset: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 1, .aarch64 => 2 * @sizeOf(u32), @@ -6036,79 +5877,63 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { while (offsets.popOrNull()) |bind_offset| { const atom = table.get(bind_offset.sym_offset).?; - const sym = self.locals.items[atom.local_sym_index]; + const sym = atom.getSymbol(self); const file_offset = sect.offset + sym.n_value - sect.addr + stub_offset; mem.writeIntLittle(u32, &buf, bind_offset.offset); log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ bind_offset.offset, - self.getString(sym.n_strx), + atom.getName(self), file_offset, }); try self.base.file.?.pwriteAll(&buf, file_offset); } } +const asc_u64 = std.sort.asc(u64); + fn writeFunctionStarts(self: *MachO) !void { - var atom = self.atoms.get(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }) orelse return; + const text_seg_index = self.text_segment_cmd_index orelse return; + const text_sect_index = self.text_section_index orelse return; + const text_seg = self.load_commands.items[text_seg_index].segment; const tracy = trace(@src()); defer tracy.end(); - while (atom.prev) |prev| { - atom = prev; - } - - var offsets = std.ArrayList(u32).init(self.base.allocator); - defer offsets.deinit(); - - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - var last_off: u32 = 0; - - while (true) { - const atom_sym = self.locals.items[atom.local_sym_index]; - - if (atom_sym.n_strx != 0) blk: { - if (self.symbol_resolver.get(atom_sym.n_strx)) |resolv| { - assert(resolv.where == .global); - if (resolv.local_sym_index != atom.local_sym_index) break :blk; - } - - const offset = @intCast(u32, atom_sym.n_value - text_seg.inner.vmaddr); - const diff = offset - last_off; + const gpa = self.base.allocator; - if (diff == 0) break :blk; + // We need to sort by address first + var addresses = std.ArrayList(u64).init(gpa); + defer addresses.deinit(); + try addresses.ensureTotalCapacityPrecise(self.globals.count()); - try offsets.append(diff); - last_off = offset; - } + for (self.globals.values()) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; + if (sym.n_desc == N_DESC_GCED) continue; + const match = self.getMatchingSectionFromOrdinal(sym.n_sect); + if (match.seg != text_seg_index or match.sect != text_sect_index) continue; - for (atom.contained.items) |cont| { - const cont_sym = self.locals.items[cont.local_sym_index]; + addresses.appendAssumeCapacity(sym.n_value); + } - if (cont_sym.n_strx == 0) continue; - if (self.symbol_resolver.get(cont_sym.n_strx)) |resolv| { - assert(resolv.where == .global); - if (resolv.local_sym_index != cont.local_sym_index) continue; - } + std.sort.sort(u64, addresses.items, {}, asc_u64); - const offset = @intCast(u32, cont_sym.n_value - text_seg.inner.vmaddr); - const diff = offset - last_off; + var offsets = std.ArrayList(u32).init(gpa); + defer offsets.deinit(); + try offsets.ensureTotalCapacityPrecise(addresses.items.len); - if (diff == 0) continue; + var last_off: u32 = 0; + for (addresses.items) |addr| { + const offset = @intCast(u32, addr - text_seg.inner.vmaddr); + const diff = offset - last_off; - try offsets.append(diff); - last_off = offset; - } + if (diff == 0) continue; - if (atom.next) |next| { - atom = next; - } else break; + offsets.appendAssumeCapacity(diff); + last_off = offset; } - var buffer = std.ArrayList(u8).init(self.base.allocator); + var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); const max_size = @intCast(usize, offsets.items.len * @sizeOf(u64)); @@ -6136,53 +5961,72 @@ fn writeFunctionStarts(self: *MachO) !void { self.load_commands_dirty = true; } -fn writeDices(self: *MachO) !void { - if (!self.has_dices) return; +fn filterDataInCode( + dices: []const macho.data_in_code_entry, + start_addr: u64, + end_addr: u64, +) []const macho.data_in_code_entry { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { + return dice.offset >= self.addr; + } + }; + + const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); + const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); + + return dices[start..end]; +} +fn writeDataInCode(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); - var buf = std.ArrayList(u8).init(self.base.allocator); - defer buf.deinit(); + var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.base.allocator); + defer out_dice.deinit(); - var atom: *Atom = self.atoms.get(.{ + const text_sect = self.getSection(.{ .seg = self.text_segment_cmd_index orelse return, .sect = self.text_section_index orelse return, - }) orelse return; + }); - while (atom.prev) |prev| { - atom = prev; - } + for (self.objects.items) |object| { + const dice = object.parseDataInCode() orelse continue; + try out_dice.ensureUnusedCapacity(dice.len); - const text_seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const text_sect = text_seg.sections.items[self.text_section_index.?]; + for (object.managed_atoms.items) |atom| { + const sym = atom.getSymbol(self); + if (sym.n_desc == N_DESC_GCED) continue; - while (true) { - if (atom.dices.items.len > 0) { - const sym = self.locals.items[atom.local_sym_index]; - const base_off = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse return error.Overflow; - - try buf.ensureUnusedCapacity(atom.dices.items.len * @sizeOf(macho.data_in_code_entry)); - for (atom.dices.items) |dice| { - const rebased_dice = macho.data_in_code_entry{ - .offset = base_off + dice.offset, - .length = dice.length, - .kind = dice.kind, - }; - buf.appendSliceAssumeCapacity(mem.asBytes(&rebased_dice)); + const match = self.getMatchingSectionFromOrdinal(sym.n_sect); + if (match.seg != self.text_segment_cmd_index.? and match.sect != self.text_section_index.?) { + continue; } - } - if (atom.next) |next| { - atom = next; - } else break; + const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; + const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; + const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); + const base = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse + return error.Overflow; + + for (filtered_dice) |single| { + const offset = single.offset - source_addr + base; + out_dice.appendAssumeCapacity(.{ + .offset = offset, + .length = single.length, + .kind = single.kind, + }); + } + } } const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].linkedit_data; const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const datasize = buf.items.len; + const datasize = out_dice.items.len * @sizeOf(macho.data_in_code_entry); dice_cmd.dataoff = @intCast(u32, dataoff); dice_cmd.datasize = @intCast(u32, datasize); seg.inner.filesize = dice_cmd.dataoff + dice_cmd.datasize - seg.inner.fileoff; @@ -6192,118 +6036,93 @@ fn writeDices(self: *MachO) !void { dice_cmd.dataoff + dice_cmd.datasize, }); - try self.base.file.?.pwriteAll(buf.items, dice_cmd.dataoff); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), dice_cmd.dataoff); self.load_commands_dirty = true; } -fn writeSymbolTable(self: *MachO) !void { +fn writeSymtab(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); + const gpa = self.base.allocator; const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; const symoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(macho.nlist_64)); symtab.symoff = @intCast(u32, symoff); - var locals = std.ArrayList(macho.nlist_64).init(self.base.allocator); + var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); - for (self.locals.items) |sym| { - if (sym.n_strx == 0) continue; - if (self.symbol_resolver.get(sym.n_strx)) |_| continue; + for (self.locals.items) |sym, sym_id| { + if (sym.n_strx == 0) continue; // no name, skip + if (sym.n_desc == N_DESC_GCED) continue; // GCed, skip + const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = null }; + if (self.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip + if (self.globals.contains(self.getSymbolName(sym_loc))) continue; // global symbol is either an export or import, skip try locals.append(sym); } - // TODO How do we handle null global symbols in incremental context? - var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator); - defer undefs.deinit(); - var undefs_table = std.AutoHashMap(u32, u32).init(self.base.allocator); - defer undefs_table.deinit(); - try undefs.ensureTotalCapacity(self.undefs.items.len); - try undefs_table.ensureTotalCapacity(@intCast(u32, self.undefs.items.len)); + for (self.objects.items) |object, object_id| { + for (object.symtab.items) |sym, sym_id| { + if (sym.n_strx == 0) continue; // no name, skip + if (sym.n_desc == N_DESC_GCED) continue; // GCed, skip + const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = @intCast(u32, object_id) }; + if (self.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip + if (self.globals.contains(self.getSymbolName(sym_loc))) continue; // global symbol is either an export or import, skip + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(sym_loc)); + try locals.append(out_sym); + } - for (self.undefs.items) |sym, i| { - if (sym.n_strx == 0) continue; - const new_index = @intCast(u32, undefs.items.len); - undefs.appendAssumeCapacity(sym); - undefs_table.putAssumeCapacityNoClobber(@intCast(u32, i), new_index); + if (!self.base.options.strip) { + try self.generateSymbolStabs(object, &locals); + } } - if (self.has_stabs) { - for (self.objects.items) |object| { - if (object.debug_info == null) continue; + var exports = std.ArrayList(macho.nlist_64).init(gpa); + defer exports.deinit(); - // Open scope - try locals.ensureUnusedCapacity(3); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_comp_dir.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.tu_name.?), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try self.makeString(object.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime orelse 0, - }); + for (self.globals.values()) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; // import, skip + if (sym.n_desc == N_DESC_GCED) continue; // GCed, skip + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); + try exports.append(out_sym); + } - for (object.contained_atoms.items) |atom| { - if (atom.stab) |stab| { - const nlists = try stab.asNlists(atom.local_sym_index, self); - defer self.base.allocator.free(nlists); - try locals.appendSlice(nlists); - } else { - for (atom.contained.items) |sym_at_off| { - const stab = sym_at_off.stab orelse continue; - const nlists = try stab.asNlists(sym_at_off.local_sym_index, self); - defer self.base.allocator.free(nlists); - try locals.appendSlice(nlists); - } - } - } + var imports = std.ArrayList(macho.nlist_64).init(gpa); + defer imports.deinit(); + var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); + defer imports_table.deinit(); - // Close scope - try locals.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - } + for (self.globals.values()) |global| { + const sym = self.getSymbol(global); + if (sym.n_strx == 0) continue; // no name, skip + if (!sym.undf()) continue; // not an import, skip + const new_index = @intCast(u32, imports.items.len); + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); + try imports.append(out_sym); + try imports_table.putNoClobber(global, new_index); } const nlocals = locals.items.len; - const nexports = self.globals.items.len; - const nundefs = undefs.items.len; - - const locals_off = symtab.symoff; - const locals_size = nlocals * @sizeOf(macho.nlist_64); - log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); + const nexports = exports.items.len; + const nimports = imports.items.len; + symtab.nsyms = @intCast(u32, nlocals + nexports + nimports); - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); - log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.globals.items), exports_off); + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacityPrecise(symtab.nsyms * @sizeOf(macho.nlist_64)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); - const undefs_off = exports_off + exports_size; - const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing undefined symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off); + log.debug("writing symtab from 0x{x} to 0x{x}", .{ symtab.symoff, symtab.symoff + buffer.items.len }); + try self.base.file.?.pwriteAll(buffer.items, symtab.symoff); - symtab.nsyms = @intCast(u32, nlocals + nexports + nundefs); - seg.inner.filesize = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64) - seg.inner.fileoff; + seg.inner.filesize = symtab.symoff + buffer.items.len - seg.inner.fileoff; // Update dynamic symbol table. const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].dysymtab; @@ -6311,7 +6130,7 @@ fn writeSymbolTable(self: *MachO) !void { dysymtab.iextdefsym = dysymtab.nlocalsym; dysymtab.nextdefsym = @intCast(u32, nexports); dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nundefs); + dysymtab.nundefsym = @intCast(u32, nimports); const nstubs = @intCast(u32, self.stubs_table.count()); const ngot_entries = @intCast(u32, self.got_entries_table.count()); @@ -6327,55 +6146,62 @@ fn writeSymbolTable(self: *MachO) !void { dysymtab.indirectsymoff + dysymtab.nindirectsyms * @sizeOf(u32), }); - var buf = std.ArrayList(u8).init(self.base.allocator); + var buf = std.ArrayList(u8).init(gpa); defer buf.deinit(); try buf.ensureTotalCapacity(dysymtab.nindirectsyms * @sizeOf(u32)); const writer = buf.writer(); if (self.text_segment_cmd_index) |text_segment_cmd_index| blk: { const stubs_section_index = self.stubs_section_index orelse break :blk; - const text_segment = &self.load_commands.items[text_segment_cmd_index].segment; - const stubs = &text_segment.sections.items[stubs_section_index]; + const stubs = self.getSectionPtr(.{ + .seg = text_segment_cmd_index, + .sect = stubs_section_index, + }); stubs.reserved1 = 0; - for (self.stubs_table.keys()) |key| { - const resolv = self.symbol_resolver.get(key).?; - switch (resolv.where) { - .global => try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL), - .undef => try writer.writeIntLittle(u32, dysymtab.iundefsym + undefs_table.get(resolv.where_index).?), - } + for (self.stubs.items) |entry| { + if (entry.sym_index == 0) continue; + const atom_sym = entry.getSymbol(self); + if (atom_sym.n_desc == N_DESC_GCED) continue; + const target_sym = self.getSymbol(entry.target); + assert(target_sym.undf()); + try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); } } if (self.data_const_segment_cmd_index) |data_const_segment_cmd_index| blk: { const got_section_index = self.got_section_index orelse break :blk; - const data_const_segment = &self.load_commands.items[data_const_segment_cmd_index].segment; - const got = &data_const_segment.sections.items[got_section_index]; + const got = self.getSectionPtr(.{ + .seg = data_const_segment_cmd_index, + .sect = got_section_index, + }); got.reserved1 = nstubs; - for (self.got_entries_table.keys()) |key| { - switch (key) { - .local => try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL), - .global => |n_strx| { - const resolv = self.symbol_resolver.get(n_strx).?; - switch (resolv.where) { - .global => try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL), - .undef => try writer.writeIntLittle(u32, dysymtab.iundefsym + undefs_table.get(resolv.where_index).?), - } - }, + for (self.got_entries.items) |entry| { + if (entry.sym_index == 0) continue; + const atom_sym = entry.getSymbol(self); + if (atom_sym.n_desc == N_DESC_GCED) continue; + const target_sym = self.getSymbol(entry.target); + if (target_sym.undf()) { + try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + } else { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); } } } if (self.data_segment_cmd_index) |data_segment_cmd_index| blk: { const la_symbol_ptr_section_index = self.la_symbol_ptr_section_index orelse break :blk; - const data_segment = &self.load_commands.items[data_segment_cmd_index].segment; - const la_symbol_ptr = &data_segment.sections.items[la_symbol_ptr_section_index]; + const la_symbol_ptr = self.getSectionPtr(.{ + .seg = data_segment_cmd_index, + .sect = la_symbol_ptr_section_index, + }); la_symbol_ptr.reserved1 = nstubs + ngot_entries; - for (self.stubs_table.keys()) |key| { - const resolv = self.symbol_resolver.get(key).?; - switch (resolv.where) { - .global => try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL), - .undef => try writer.writeIntLittle(u32, dysymtab.iundefsym + undefs_table.get(resolv.where_index).?), - } + for (self.stubs.items) |entry| { + if (entry.sym_index == 0) continue; + const atom_sym = entry.getSymbol(self); + if (atom_sym.n_desc == N_DESC_GCED) continue; + const target_sym = self.getSymbol(entry.target); + assert(target_sym.undf()); + try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); } } @@ -6385,21 +6211,22 @@ fn writeSymbolTable(self: *MachO) !void { self.load_commands_dirty = true; } -fn writeStringTable(self: *MachO) !void { +fn writeStrtab(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; const stroff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const strsize = self.strtab.items.len; + + const strsize = self.strtab.buffer.items.len; symtab.stroff = @intCast(u32, stroff); symtab.strsize = @intCast(u32, strsize); seg.inner.filesize = symtab.stroff + symtab.strsize - seg.inner.fileoff; log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.base.file.?.pwriteAll(self.strtab.items, symtab.stroff); + try self.base.file.?.pwriteAll(self.strtab.buffer.items, symtab.stroff); self.load_commands_dirty = true; } @@ -6413,9 +6240,9 @@ fn writeLinkeditSegment(self: *MachO) !void { try self.writeDyldInfoData(); try self.writeFunctionStarts(); - try self.writeDices(); - try self.writeSymbolTable(); - try self.writeStringTable(); + try self.writeDataInCode(); + try self.writeSymtab(); + try self.writeStrtab(); seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); } @@ -6557,43 +6384,114 @@ pub fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } -pub fn makeString(self: *MachO, string: []const u8) !u32 { - const gop = try self.strtab_dir.getOrPutContextAdapted(self.base.allocator, @as([]const u8, string), StringIndexAdapter{ - .bytes = &self.strtab, - }, StringIndexContext{ - .bytes = &self.strtab, - }); - if (gop.found_existing) { - const off = gop.key_ptr.*; - log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); - return off; - } - - try self.strtab.ensureUnusedCapacity(self.base.allocator, string.len + 1); - const new_off = @intCast(u32, self.strtab.items.len); +pub fn getSectionOrdinal(self: *MachO, match: MatchingSection) u8 { + return @intCast(u8, self.section_ordinals.getIndex(match).?) + 1; +} - log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); +pub fn getMatchingSectionFromOrdinal(self: *MachO, ord: u8) MatchingSection { + const index = ord - 1; + assert(index < self.section_ordinals.count()); + return self.section_ordinals.keys()[index]; +} - self.strtab.appendSliceAssumeCapacity(string); - self.strtab.appendAssumeCapacity(0); +pub fn getSegmentPtr(self: *MachO, match: MatchingSection) *macho.SegmentCommand { + assert(match.seg < self.load_commands.items.len); + return &self.load_commands.items[match.seg].segment; +} - gop.key_ptr.* = new_off; +pub fn getSegment(self: *MachO, match: MatchingSection) macho.SegmentCommand { + return self.getSegmentPtr(match).*; +} - return new_off; +pub fn getSectionPtr(self: *MachO, match: MatchingSection) *macho.section_64 { + const seg = self.getSegmentPtr(match); + assert(match.sect < seg.sections.items.len); + return &seg.sections.items[match.sect]; } -pub fn getString(self: MachO, off: u32) []const u8 { - assert(off < self.strtab.items.len); - return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.items.ptr + off), 0); +pub fn getSection(self: *MachO, match: MatchingSection) macho.section_64 { + return self.getSectionPtr(match).*; } -pub fn symbolIsTemp(sym: macho.nlist_64, sym_name: []const u8) bool { +pub fn symbolIsTemp(self: *MachO, sym_with_loc: SymbolWithLoc) bool { + const sym = self.getSymbol(sym_with_loc); if (!sym.sect()) return false; if (sym.ext()) return false; + const sym_name = self.getSymbolName(sym_with_loc); return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); } -pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anytype) usize { +/// Returns pointer-to-symbol described by `sym_with_loc` descriptor. +pub fn getSymbolPtr(self: *MachO, sym_with_loc: SymbolWithLoc) *macho.nlist_64 { + if (sym_with_loc.file) |file| { + const object = &self.objects.items[file]; + return &object.symtab.items[sym_with_loc.sym_index]; + } else { + return &self.locals.items[sym_with_loc.sym_index]; + } +} + +/// Returns symbol described by `sym_with_loc` descriptor. +pub fn getSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) macho.nlist_64 { + return self.getSymbolPtr(sym_with_loc).*; +} + +/// Returns name of the symbol described by `sym_with_loc` descriptor. +pub fn getSymbolName(self: *MachO, sym_with_loc: SymbolWithLoc) []const u8 { + if (sym_with_loc.file) |file| { + const object = self.objects.items[file]; + const sym = object.symtab.items[sym_with_loc.sym_index]; + return object.getString(sym.n_strx); + } else { + const sym = self.locals.items[sym_with_loc.sym_index]; + return self.strtab.get(sym.n_strx).?; + } +} + +/// Returns atom if there is an atom referenced by the symbol described by `sym_with_loc` descriptor. +/// Returns null on failure. +pub fn getAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { + if (sym_with_loc.file) |file| { + const object = self.objects.items[file]; + return object.getAtomForSymbol(sym_with_loc.sym_index); + } else { + return self.atom_by_index_table.get(sym_with_loc.sym_index); + } +} + +/// Returns GOT atom that references `sym_with_loc` if one exists. +/// Returns null otherwise. +pub fn getGotAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { + const got_index = self.got_entries_table.get(sym_with_loc) orelse return null; + return self.got_entries.items[got_index].getAtom(self); +} + +/// Returns stubs atom that references `sym_with_loc` if one exists. +/// Returns null otherwise. +pub fn getStubsAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { + const stubs_index = self.stubs_table.get(sym_with_loc) orelse return null; + return self.stubs.items[stubs_index].getAtom(self); +} + +/// Returns TLV pointer atom that references `sym_with_loc` if one exists. +/// Returns null otherwise. +pub fn getTlvPtrAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { + const tlv_ptr_index = self.tlv_ptr_entries_table.get(sym_with_loc) orelse return null; + return self.tlv_ptr_entries.items[tlv_ptr_index].getAtom(self); +} + +/// Returns symbol location corresponding to the set entrypoint. +/// Asserts output mode is executable. +pub fn getEntryPoint(self: MachO) error{MissingMainEntrypoint}!SymbolWithLoc { + const entry_name = self.base.options.entry orelse "_main"; + const global = self.globals.get(entry_name) orelse { + log.err("entrypoint '{s}' not found", .{entry_name}); + return error.MissingMainEntrypoint; + }; + return global; +} + +pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: anytype) usize { if (!@hasDecl(@TypeOf(predicate), "predicate")) @compileError("Predicate is required to define fn predicate(@This(), T) bool"); @@ -6606,6 +6504,225 @@ pub fn findFirst(comptime T: type, haystack: []T, start: usize, predicate: anyty return i; } +const DebugInfo = struct { + inner: dwarf.DwarfInfo, + debug_info: []const u8, + debug_abbrev: []const u8, + debug_str: []const u8, + debug_line: []const u8, + debug_line_str: []const u8, + debug_ranges: []const u8, + + pub fn parse(allocator: Allocator, object: Object) !?DebugInfo { + var debug_info = blk: { + const index = object.dwarf_debug_info_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_abbrev = blk: { + const index = object.dwarf_debug_abbrev_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_str = blk: { + const index = object.dwarf_debug_str_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_line = blk: { + const index = object.dwarf_debug_line_index orelse return null; + break :blk try object.getSectionContents(index); + }; + var debug_line_str = blk: { + if (object.dwarf_debug_line_str_index) |ind| { + break :blk try object.getSectionContents(ind); + } + break :blk &[0]u8{}; + }; + var debug_ranges = blk: { + if (object.dwarf_debug_ranges_index) |ind| { + break :blk try object.getSectionContents(ind); + } + break :blk &[0]u8{}; + }; + + var inner: dwarf.DwarfInfo = .{ + .endian = .Little, + .debug_info = debug_info, + .debug_abbrev = debug_abbrev, + .debug_str = debug_str, + .debug_line = debug_line, + .debug_line_str = debug_line_str, + .debug_ranges = debug_ranges, + }; + try dwarf.openDwarfDebugInfo(&inner, allocator); + + return DebugInfo{ + .inner = inner, + .debug_info = debug_info, + .debug_abbrev = debug_abbrev, + .debug_str = debug_str, + .debug_line = debug_line, + .debug_line_str = debug_line_str, + .debug_ranges = debug_ranges, + }; + } + + pub fn deinit(self: *DebugInfo, allocator: Allocator) void { + self.inner.deinit(allocator); + } +}; + +pub fn generateSymbolStabs( + self: *MachO, + object: Object, + locals: *std.ArrayList(macho.nlist_64), +) !void { + assert(!self.base.options.strip); + + const gpa = self.base.allocator; + + log.debug("parsing debug info in '{s}'", .{object.name}); + + var debug_info = (try DebugInfo.parse(gpa, object)) orelse return; + + // We assume there is only one CU. + const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { + error.MissingDebugInfo => { + // TODO audit cases with missing debug info and audit our dwarf.zig module. + log.debug("invalid or missing debug info in {s}; skipping", .{object.name}); + return; + }, + else => |e| return e, + }; + const tu_name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name); + const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); + + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_comp_dir), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_name), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, object.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime, + }); + + var stabs_buf: [4]macho.nlist_64 = undefined; + + for (object.managed_atoms.items) |atom| { + const stabs = try self.generateSymbolStabsForSymbol( + atom.getSymbolWithLoc(), + debug_info, + &stabs_buf, + ); + try locals.appendSlice(stabs); + + for (atom.contained.items) |sym_at_off| { + const sym_loc = SymbolWithLoc{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }; + const contained_stabs = try self.generateSymbolStabsForSymbol( + sym_loc, + debug_info, + &stabs_buf, + ); + try locals.appendSlice(contained_stabs); + } + } + + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); +} + +fn generateSymbolStabsForSymbol( + self: *MachO, + sym_loc: SymbolWithLoc, + debug_info: DebugInfo, + buf: *[4]macho.nlist_64, +) ![]const macho.nlist_64 { + const gpa = self.base.allocator; + const object = self.objects.items[sym_loc.file.?]; + const sym = self.getSymbol(sym_loc); + const sym_name = self.getSymbolName(sym_loc); + + if (sym.n_strx == 0) return buf[0..0]; + if (sym.n_desc == N_DESC_GCED) return buf[0..0]; + if (self.symbolIsTemp(sym_loc)) return buf[0..0]; + + const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; + const size: ?u64 = size: { + if (source_sym.tentative()) break :size null; + for (debug_info.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { + break :size range.end - range.start; + } + } + } + break :size null; + }; + + if (size) |ss| { + buf[0] = .{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[1] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_FUN, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[2] = .{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = ss, + }; + buf[3] = .{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = ss, + }; + return buf; + } else { + buf[0] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_STSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + return buf[0..1]; + } +} + fn snapshotState(self: *MachO) !void { const emit = self.base.options.emit orelse { log.debug("no emit directory found; skipping snapshot...", .{}); @@ -6655,7 +6772,7 @@ fn snapshotState(self: *MachO) !void { const arena = arena_allocator.allocator(); const out_file = try emit.directory.handle.createFile("snapshots.json", .{ - .truncate = self.cold_start, + .truncate = false, .read = true, }); defer out_file.close(); @@ -6675,8 +6792,7 @@ fn snapshotState(self: *MachO) !void { var nodes = std.ArrayList(Snapshot.Node).init(arena); for (self.section_ordinals.keys()) |key| { - const seg = self.load_commands.items[key.seg].segment; - const sect = seg.sections.items[key.sect]; + const sect = self.getSection(key); const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ sect.segName(), sect.sectName() }); try nodes.append(.{ .address = sect.addr, @@ -6684,6 +6800,8 @@ fn snapshotState(self: *MachO) !void { .payload = .{ .name = sect_name }, }); + const is_tlv = sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; + var atom: *Atom = self.atoms.get(key) orelse { try nodes.append(.{ .address = sect.addr + sect.size, @@ -6698,103 +6816,63 @@ fn snapshotState(self: *MachO) !void { } while (true) { - const atom_sym = self.locals.items[atom.local_sym_index]; - const should_skip_atom: bool = blk: { - if (self.mh_execute_header_index) |index| { - if (index == atom.local_sym_index) break :blk true; - } - if (mem.eql(u8, self.getString(atom_sym.n_strx), "___dso_handle")) break :blk true; - break :blk false; - }; - - if (should_skip_atom) { - if (atom.next) |next| { - atom = next; - } else break; - continue; - } - + const atom_sym = atom.getSymbol(self); var node = Snapshot.Node{ .address = atom_sym.n_value, .tag = .atom_start, .payload = .{ - .name = self.getString(atom_sym.n_strx), - .is_global = self.symbol_resolver.contains(atom_sym.n_strx), + .name = atom.getName(self), + .is_global = self.globals.contains(atom.getName(self)), }, }; var aliases = std.ArrayList([]const u8).init(arena); - for (atom.aliases.items) |loc| { - try aliases.append(self.getString(self.locals.items[loc].n_strx)); + for (atom.contained.items) |sym_off| { + if (sym_off.offset == 0) { + try aliases.append(self.getSymbolName(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + })); + } } node.payload.aliases = aliases.toOwnedSlice(); try nodes.append(node); var relocs = try std.ArrayList(Snapshot.Node).initCapacity(arena, atom.relocs.items.len); for (atom.relocs.items) |rel| { - const arch = self.base.options.target.cpu.arch; const source_addr = blk: { - const sym = self.locals.items[atom.local_sym_index]; - break :blk sym.n_value + rel.offset; + const source_sym = atom.getSymbol(self); + break :blk source_sym.n_value + rel.offset; }; const target_addr = blk: { - const is_via_got = got: { - switch (arch) { - .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) { - .ARM64_RELOC_GOT_LOAD_PAGE21, .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => true, - else => false, - }, - .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { - .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, - else => false, - }, - else => unreachable, + const target_atom = rel.getTargetAtom(self) orelse { + // If there is no atom for target, we still need to check for special, atom-less + // symbols such as `___dso_handle`. + const target_name = self.getSymbolName(rel.target); + if (self.globals.contains(target_name)) { + const atomless_sym = self.getSymbol(rel.target); + break :blk atomless_sym.n_value; } + break :blk 0; }; - - if (is_via_got) { - const got_index = self.got_entries_table.get(rel.target) orelse break :blk 0; - const got_atom = self.got_entries.items[got_index].atom; - break :blk self.locals.items[got_atom.local_sym_index].n_value; - } - - switch (rel.target) { - .local => |sym_index| { - const sym = self.locals.items[sym_index]; - const is_tlv = is_tlv: { - const source_sym = self.locals.items[atom.local_sym_index]; - const match = self.section_ordinals.keys()[source_sym.n_sect - 1]; - const match_seg = self.load_commands.items[match.seg].segment; - const match_sect = match_seg.sections.items[match.sect]; - break :is_tlv match_sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; - }; - if (is_tlv) { - const match_seg = self.load_commands.items[self.data_segment_cmd_index.?].segment; - const base_address = inner: { - if (self.tlv_data_section_index) |i| { - break :inner match_seg.sections.items[i].addr; - } else if (self.tlv_bss_section_index) |i| { - break :inner match_seg.sections.items[i].addr; - } else unreachable; - }; - break :blk sym.n_value - base_address; - } - break :blk sym.n_value; - }, - .global => |n_strx| { - const resolv = self.symbol_resolver.get(n_strx).?; - switch (resolv.where) { - .global => break :blk self.globals.items[resolv.where_index].n_value, - .undef => { - if (self.stubs_table.get(n_strx)) |stub_index| { - const stub_atom = self.stubs.items[stub_index]; - break :blk self.locals.items[stub_atom.local_sym_index].n_value; - } - break :blk 0; - }, - } - }, - } + const target_sym = if (target_atom.isSymbolContained(rel.target, self)) + self.getSymbol(rel.target) + else + target_atom.getSymbol(self); + const base_address: u64 = if (is_tlv) base_address: { + const sect_id: u16 = sect_id: { + if (self.tlv_data_section_index) |i| { + break :sect_id i; + } else if (self.tlv_bss_section_index) |i| { + break :sect_id i; + } else unreachable; + }; + break :base_address self.getSection(.{ + .seg = self.data_segment_cmd_index.?, + .sect = sect_id, + }).addr; + } else 0; + break :blk target_sym.n_value - base_address; }; relocs.appendAssumeCapacity(.{ @@ -6815,15 +6893,18 @@ fn snapshotState(self: *MachO) !void { var next_i: usize = 0; var last_rel: usize = 0; while (next_i < atom.contained.items.len) : (next_i += 1) { - const loc = atom.contained.items[next_i]; - const cont_sym = self.locals.items[loc.local_sym_index]; - const cont_sym_name = self.getString(cont_sym.n_strx); + const loc = SymbolWithLoc{ + .sym_index = atom.contained.items[next_i].sym_index, + .file = atom.file, + }; + const cont_sym = self.getSymbol(loc); + const cont_sym_name = self.getSymbolName(loc); var contained_node = Snapshot.Node{ .address = cont_sym.n_value, .tag = .atom_start, .payload = .{ .name = cont_sym_name, - .is_global = self.symbol_resolver.contains(cont_sym.n_strx), + .is_global = self.globals.contains(cont_sym_name), }, }; @@ -6831,10 +6912,14 @@ fn snapshotState(self: *MachO) !void { var inner_aliases = std.ArrayList([]const u8).init(arena); while (true) { if (next_i + 1 >= atom.contained.items.len) break; - const next_sym = self.locals.items[atom.contained.items[next_i + 1].local_sym_index]; + const next_sym_loc = SymbolWithLoc{ + .sym_index = atom.contained.items[next_i + 1].sym_index, + .file = atom.file, + }; + const next_sym = self.getSymbol(next_sym_loc); if (next_sym.n_value != cont_sym.n_value) break; - const next_sym_name = self.getString(next_sym.n_strx); - if (self.symbol_resolver.contains(next_sym.n_strx)) { + const next_sym_name = self.getSymbolName(next_sym_loc); + if (self.globals.contains(next_sym_name)) { try inner_aliases.append(contained_node.payload.name); contained_node.payload.name = next_sym_name; contained_node.payload.is_global = true; @@ -6843,7 +6928,10 @@ fn snapshotState(self: *MachO) !void { } const cont_size = if (next_i + 1 < atom.contained.items.len) - self.locals.items[atom.contained.items[next_i + 1].local_sym_index].n_value - cont_sym.n_value + self.getSymbol(.{ + .sym_index = atom.contained.items[next_i + 1].sym_index, + .file = atom.file, + }).n_value - cont_sym.n_value else atom_sym.n_value + atom.size - cont_sym.n_value; @@ -6890,69 +6978,181 @@ fn snapshotState(self: *MachO) !void { try writer.writeByte(']'); } -fn logSymtab(self: MachO) void { - log.debug("locals:", .{}); - for (self.locals.items) |sym, id| { - log.debug(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); +fn logSymAttributes(sym: macho.nlist_64, buf: *[9]u8) []const u8 { + mem.set(u8, buf[0..4], '_'); + mem.set(u8, buf[4..], ' '); + if (sym.sect()) { + buf[0] = 's'; } - - log.debug("globals:", .{}); - for (self.globals.items) |sym, id| { - log.debug(" {d}: {s}: @{x} in {d}", .{ id, self.getString(sym.n_strx), sym.n_value, sym.n_sect }); + if (sym.ext()) { + if (sym.weakDef() or sym.pext()) { + buf[1] = 'w'; + } else { + buf[1] = 'e'; + } } - - log.debug("undefs:", .{}); - for (self.undefs.items) |sym, id| { - log.debug(" {d}: {s}: in {d}", .{ id, self.getString(sym.n_strx), sym.n_desc }); + if (sym.tentative()) { + buf[2] = 't'; } + if (sym.undf()) { + buf[3] = 'u'; + } + if (sym.n_desc == N_DESC_GCED) { + mem.copy(u8, buf[5..], "DEAD"); + } + return buf[0..]; +} - { - log.debug("resolver:", .{}); - var it = self.symbol_resolver.iterator(); - while (it.next()) |entry| { - log.debug(" {s} => {}", .{ self.getString(entry.key_ptr.*), entry.value_ptr.* }); +fn logSymtab(self: *MachO) void { + var buf: [9]u8 = undefined; + + log.debug("symtab:", .{}); + for (self.objects.items) |object, id| { + log.debug(" object({d}): {s}", .{ id, object.name }); + for (object.symtab.items) |sym, sym_id| { + const where = if (sym.undf() and !sym.tentative()) "ord" else "sect"; + const def_index = if (sym.undf() and !sym.tentative()) + @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER) + else + sym.n_sect; + log.debug(" %{d}: {s} @{x} in {s}({d}), {s}", .{ + sym_id, + object.getString(sym.n_strx), + sym.n_value, + where, + def_index, + logSymAttributes(sym, &buf), + }); } } + log.debug(" object(null)", .{}); + for (self.locals.items) |sym, sym_id| { + const where = if (sym.undf() and !sym.tentative()) "ord" else "sect"; + const def_index = if (sym.undf() and !sym.tentative()) + @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER) + else + sym.n_sect; + log.debug(" %{d}: {s} @{x} in {s}({d}), {s}", .{ + sym_id, + self.strtab.get(sym.n_strx), + sym.n_value, + where, + def_index, + logSymAttributes(sym, &buf), + }); + } + + log.debug("globals table:", .{}); + for (self.globals.keys()) |name, id| { + const value = self.globals.values()[id]; + log.debug(" {s} => %{d} in object({d})", .{ name, value.sym_index, value.file }); + } log.debug("GOT entries:", .{}); - for (self.got_entries_table.values()) |value| { - const key = self.got_entries.items[value].target; - const atom = self.got_entries.items[value].atom; - const n_value = self.locals.items[atom.local_sym_index].n_value; - switch (key) { - .local => |ndx| log.debug(" {d}: @{x}", .{ ndx, n_value }), - .global => |n_strx| log.debug(" {s}: @{x}", .{ self.getString(n_strx), n_value }), + for (self.got_entries.items) |entry, i| { + const atom_sym = entry.getSymbol(self); + if (atom_sym.n_desc == N_DESC_GCED) continue; + const target_sym = self.getSymbol(entry.target); + if (target_sym.undf()) { + log.debug(" {d}@{x} => import('{s}')", .{ + i, + atom_sym.n_value, + self.getSymbolName(entry.target), + }); + } else { + log.debug(" {d}@{x} => local(%{d}) in object({d}) {s}", .{ + i, + atom_sym.n_value, + entry.target.sym_index, + entry.target.file, + logSymAttributes(target_sym, &buf), + }); } } log.debug("__thread_ptrs entries:", .{}); - for (self.tlv_ptr_entries_table.values()) |value| { - const key = self.tlv_ptr_entries.items[value].target; - const atom = self.tlv_ptr_entries.items[value].atom; - const n_value = self.locals.items[atom.local_sym_index].n_value; - assert(key == .global); - log.debug(" {s}: @{x}", .{ self.getString(key.global), n_value }); + for (self.tlv_ptr_entries.items) |entry, i| { + const atom_sym = entry.getSymbol(self); + if (atom_sym.n_desc == N_DESC_GCED) continue; + const target_sym = self.getSymbol(entry.target); + assert(target_sym.undf()); + log.debug(" {d}@{x} => import('{s}')", .{ + i, + atom_sym.n_value, + self.getSymbolName(entry.target), + }); } - log.debug("stubs:", .{}); - for (self.stubs_table.keys()) |key| { - const value = self.stubs_table.get(key).?; - const atom = self.stubs.items[value]; - const sym = self.locals.items[atom.local_sym_index]; - log.debug(" {s}: @{x}", .{ self.getString(key), sym.n_value }); + log.debug("stubs entries:", .{}); + for (self.stubs.items) |entry, i| { + const target_sym = self.getSymbol(entry.target); + const atom_sym = entry.getSymbol(self); + assert(target_sym.undf()); + log.debug(" {d}@{x} => import('{s}')", .{ + i, + atom_sym.n_value, + self.getSymbolName(entry.target), + }); } } -fn logSectionOrdinals(self: MachO) void { +fn logSectionOrdinals(self: *MachO) void { for (self.section_ordinals.keys()) |match, i| { - const seg = self.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - log.debug("ord {d}: {d},{d} => {s},{s}", .{ - i + 1, - match.seg, - match.sect, - sect.segName(), - sect.sectName(), + const sect = self.getSection(match); + log.debug("sect({d}, '{s},{s}')", .{ i + 1, sect.segName(), sect.sectName() }); + } +} + +fn logAtoms(self: *MachO) void { + log.debug("atoms:", .{}); + var it = self.atoms.iterator(); + while (it.next()) |entry| { + const match = entry.key_ptr.*; + var atom = entry.value_ptr.*; + + while (atom.prev) |prev| { + atom = prev; + } + + const sect = self.getSection(match); + log.debug("{s},{s}", .{ sect.segName(), sect.sectName() }); + + while (true) { + self.logAtom(atom); + if (atom.next) |next| { + atom = next; + } else break; + } + } +} + +pub fn logAtom(self: *MachO, atom: *const Atom) void { + const sym = atom.getSymbol(self); + const sym_name = atom.getName(self); + log.debug(" ATOM(%{d}, '{s}') @ {x} (sizeof({x}), alignof({x})) in object({d}) in sect({d})", .{ + atom.sym_index, + sym_name, + sym.n_value, + atom.size, + atom.alignment, + atom.file, + sym.n_sect, + }); + + for (atom.contained.items) |sym_off| { + const inner_sym = self.getSymbol(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + }); + const inner_sym_name = self.getSymbolName(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + }); + log.debug(" (%{d}, '{s}') @ {x} ({x})", .{ + sym_off.sym_index, + inner_sym_name, + inner_sym.n_value, + sym_off.offset, }); } } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index ff78b26989..2f60702423 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -16,7 +16,7 @@ const Arch = std.Target.Cpu.Arch; const Dwarf = @import("../Dwarf.zig"); const MachO = @import("../MachO.zig"); const Object = @import("Object.zig"); -const StringIndexAdapter = std.hash_map.StringIndexAdapter; +const SymbolWithLoc = MachO.SymbolWithLoc; /// Each decl always gets a local symbol with the fully qualified name. /// The vaddr and size are found here directly. @@ -24,10 +24,10 @@ const StringIndexAdapter = std.hash_map.StringIndexAdapter; /// the symbol references, and adding that to the file offset of the section. /// If this field is 0, it means the codegen size = 0 and there is no symbol or /// offset table entry. -local_sym_index: u32, +sym_index: u32, -/// List of symbol aliases pointing to the same atom via different nlists -aliases: std.ArrayListUnmanaged(u32) = .{}, +/// null means symbol defined by Zig source. +file: ?u32, /// List of symbols contained within this atom contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, @@ -48,26 +48,17 @@ alignment: u32, relocs: std.ArrayListUnmanaged(Relocation) = .{}, /// List of offsets contained within this atom that need rebasing by the dynamic -/// loader in presence of ASLR. +/// loader for example in presence of ASLR. rebases: std.ArrayListUnmanaged(u64) = .{}, /// List of offsets contained within this atom that will be dynamically bound /// by the dynamic loader and contain pointers to resolved (at load time) extern -/// symbols (aka proxies aka imports) +/// symbols (aka proxies aka imports). bindings: std.ArrayListUnmanaged(Binding) = .{}, -/// List of lazy bindings +/// List of lazy bindings (cf bindings above). lazy_bindings: std.ArrayListUnmanaged(Binding) = .{}, -/// List of data-in-code entries. This is currently specific to x86_64 only. -dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, - -/// Stab entry for this atom. This is currently specific to a binary created -/// by linking object files in a traditional sense - in incremental sense, we -/// bypass stabs altogether to produce dSYM bundle directly with fully relocated -/// DWARF sections. -stab: ?Stab = null, - /// Points to the previous and next neighbours next: ?*Atom, prev: ?*Atom, @@ -77,107 +68,62 @@ dbg_info_atom: Dwarf.Atom, dirty: bool = true, pub const Binding = struct { - n_strx: u32, + target: SymbolWithLoc, offset: u64, }; pub const SymbolAtOffset = struct { - local_sym_index: u32, + sym_index: u32, offset: u64, - stab: ?Stab = null, -}; - -pub const Stab = union(enum) { - function: u64, - static, - global, - - pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 { - var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator); - defer nlists.deinit(); - - const sym = macho_file.locals.items[local_sym_index]; - switch (stab) { - .function => |size| { - try nlists.ensureUnusedCapacity(4); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = sym.n_strx, - .n_type = macho.N_FUN, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = size, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = size, - }); - }, - .global => { - try nlists.append(.{ - .n_strx = sym.n_strx, - .n_type = macho.N_GSYM, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - }, - .static => { - try nlists.append(.{ - .n_strx = sym.n_strx, - .n_type = macho.N_STSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - }, - } - - return nlists.toOwnedSlice(); - } }; pub const Relocation = struct { - pub const Target = union(enum) { - local: u32, - global: u32, - }; - /// Offset within the atom's code buffer. /// Note relocation size can be inferred by relocation's kind. offset: u32, - target: Target, + target: MachO.SymbolWithLoc, addend: i64, - subtractor: ?u32, + subtractor: ?MachO.SymbolWithLoc, pcrel: bool, length: u2, @"type": u4, + + pub fn getTargetAtom(self: Relocation, macho_file: *MachO) ?*Atom { + const is_via_got = got: { + switch (macho_file.base.options.target.cpu.arch) { + .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, self.@"type")) { + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_POINTER_TO_GOT, + => true, + else => false, + }, + .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, self.@"type")) { + .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, + else => false, + }, + else => unreachable, + } + }; + + if (is_via_got) { + return macho_file.getGotAtomForSymbol(self.target).?; // panic means fatal error + } + if (macho_file.getStubsAtomForSymbol(self.target)) |stubs_atom| return stubs_atom; + if (macho_file.getTlvPtrAtomForSymbol(self.target)) |tlv_ptr_atom| return tlv_ptr_atom; + return macho_file.getAtomForSymbol(self.target); + } }; pub const empty = Atom{ - .local_sym_index = 0, + .sym_index = 0, + .file = null, .size = 0, .alignment = 0, .prev = null, @@ -186,34 +132,66 @@ pub const empty = Atom{ }; pub fn deinit(self: *Atom, allocator: Allocator) void { - self.dices.deinit(allocator); self.lazy_bindings.deinit(allocator); self.bindings.deinit(allocator); self.rebases.deinit(allocator); self.relocs.deinit(allocator); self.contained.deinit(allocator); - self.aliases.deinit(allocator); self.code.deinit(allocator); } pub fn clearRetainingCapacity(self: *Atom) void { - self.dices.clearRetainingCapacity(); self.lazy_bindings.clearRetainingCapacity(); self.bindings.clearRetainingCapacity(); self.rebases.clearRetainingCapacity(); self.relocs.clearRetainingCapacity(); self.contained.clearRetainingCapacity(); - self.aliases.clearRetainingCapacity(); self.code.clearRetainingCapacity(); } +/// Returns symbol referencing this atom. +pub fn getSymbol(self: Atom, macho_file: *MachO) macho.nlist_64 { + return self.getSymbolPtr(macho_file).*; +} + +/// Returns pointer-to-symbol referencing this atom. +pub fn getSymbolPtr(self: Atom, macho_file: *MachO) *macho.nlist_64 { + return macho_file.getSymbolPtr(.{ + .sym_index = self.sym_index, + .file = self.file, + }); +} + +pub fn getSymbolWithLoc(self: Atom) SymbolWithLoc { + return .{ .sym_index = self.sym_index, .file = self.file }; +} + +/// Returns true if the symbol pointed at with `sym_loc` is contained within this atom. +/// WARNING this function assumes all atoms have been allocated in the virtual memory. +/// Calling it without allocating with `MachO.allocateSymbols` (or equivalent) will +/// give bogus results. +pub fn isSymbolContained(self: Atom, sym_loc: SymbolWithLoc, macho_file: *MachO) bool { + const sym = macho_file.getSymbol(sym_loc); + if (!sym.sect()) return false; + const self_sym = self.getSymbol(macho_file); + return sym.n_value >= self_sym.n_value and sym.n_value < self_sym.n_value + self.size; +} + +/// Returns the name of this atom. +pub fn getName(self: Atom, macho_file: *MachO) []const u8 { + return macho_file.getSymbolName(.{ + .sym_index = self.sym_index, + .file = self.file, + }); +} + /// Returns how much room there is to grow in virtual address space. /// File offset relocation happens transparently, so it is not included in /// this calculation. -pub fn capacity(self: Atom, macho_file: MachO) u64 { - const self_sym = macho_file.locals.items[self.local_sym_index]; +pub fn capacity(self: Atom, macho_file: *MachO) u64 { + const self_sym = self.getSymbol(macho_file); if (self.next) |next| { - const next_sym = macho_file.locals.items[next.local_sym_index]; + const next_sym = next.getSymbol(macho_file); return next_sym.n_value - self_sym.n_value; } else { // We are the last atom. @@ -222,11 +200,11 @@ pub fn capacity(self: Atom, macho_file: MachO) u64 { } } -pub fn freeListEligible(self: Atom, macho_file: MachO) bool { +pub fn freeListEligible(self: Atom, macho_file: *MachO) bool { // No need to keep a free list node for the last atom. const next = self.next orelse return false; - const self_sym = macho_file.locals.items[self.local_sym_index]; - const next_sym = macho_file.locals.items[next.local_sym_index]; + const self_sym = self.getSymbol(macho_file); + const next_sym = next.getSymbol(macho_file); const cap = next_sym.n_value - self_sym.n_value; const ideal_cap = MachO.padToIdeal(self.size); if (cap <= ideal_cap) return false; @@ -235,19 +213,20 @@ pub fn freeListEligible(self: Atom, macho_file: MachO) bool { } const RelocContext = struct { - base_addr: u64 = 0, - allocator: Allocator, - object: *Object, macho_file: *MachO, + base_addr: u64 = 0, + base_offset: i32 = 0, }; -pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocContext) !void { +pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: RelocContext) !void { const tracy = trace(@src()); defer tracy.end(); + const gpa = context.macho_file.base.allocator; + const arch = context.macho_file.base.options.target.cpu.arch; var addend: i64 = 0; - var subtractor: ?u32 = null; + var subtractor: ?SymbolWithLoc = null; for (relocs) |rel, i| { blk: { @@ -284,20 +263,16 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC } assert(subtractor == null); - const sym = context.object.symtab.items[rel.r_symbolnum]; + const sym_loc = MachO.SymbolWithLoc{ + .sym_index = rel.r_symbolnum, + .file = self.file, + }; + const sym = context.macho_file.getSymbol(sym_loc); if (sym.sect() and !sym.ext()) { - subtractor = context.object.symbol_mapping.get(rel.r_symbolnum).?; + subtractor = sym_loc; } else { - const sym_name = context.object.getString(sym.n_strx); - const n_strx = context.macho_file.strtab_dir.getKeyAdapted( - @as([]const u8, sym_name), - StringIndexAdapter{ - .bytes = &context.macho_file.strtab, - }, - ).?; - const resolv = context.macho_file.symbol_resolver.get(n_strx).?; - assert(resolv.where == .global); - subtractor = resolv.local_sym_index; + const sym_name = context.macho_file.getSymbolName(sym_loc); + subtractor = context.macho_file.globals.get(sym_name).?; } // Verify that *_SUBTRACTOR is followed by *_UNSIGNED. if (relocs.len <= i + 1) { @@ -328,45 +303,42 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC continue; } + const object = &context.macho_file.objects.items[self.file.?]; const target = target: { if (rel.r_extern == 0) { const sect_id = @intCast(u16, rel.r_symbolnum - 1); - const local_sym_index = context.object.sections_as_symbols.get(sect_id) orelse blk: { - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment; - const sect = seg.sections.items[sect_id]; + const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { + const sect = object.getSourceSection(sect_id); const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable; - const local_sym_index = @intCast(u32, context.macho_file.locals.items.len); - try context.macho_file.locals.append(context.allocator, .{ + const sym_index = @intCast(u32, object.symtab.items.len); + try object.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1), + .n_sect = context.macho_file.getSectionOrdinal(match), .n_desc = 0, - .n_value = 0, + .n_value = sect.addr, }); - try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index); - break :blk local_sym_index; + try object.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index); + break :blk sym_index; }; - break :target Relocation.Target{ .local = local_sym_index }; + break :target MachO.SymbolWithLoc{ .sym_index = sym_index, .file = self.file }; } - const sym = context.object.symtab.items[rel.r_symbolnum]; - const sym_name = context.object.getString(sym.n_strx); + const sym_loc = MachO.SymbolWithLoc{ + .sym_index = rel.r_symbolnum, + .file = self.file, + }; + const sym = context.macho_file.getSymbol(sym_loc); if (sym.sect() and !sym.ext()) { - const sym_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; - break :target Relocation.Target{ .local = sym_index }; + break :target sym_loc; + } else { + const sym_name = context.macho_file.getSymbolName(sym_loc); + break :target context.macho_file.globals.get(sym_name).?; } - - const n_strx = context.macho_file.strtab_dir.getKeyAdapted( - @as([]const u8, sym_name), - StringIndexAdapter{ - .bytes = &context.macho_file.strtab, - }, - ) orelse unreachable; - break :target Relocation.Target{ .global = n_strx }; }; - const offset = @intCast(u32, rel.r_address); + const offset = @intCast(u32, rel.r_address - context.base_offset); switch (arch) { .aarch64 => { @@ -388,8 +360,7 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC else mem.readIntLittle(i32, self.code.items[offset..][0..4]); if (rel.r_extern == 0) { - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment; - const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; addend -= @intCast(i64, target_sect_base_addr); } try self.addPtrBindingOrRebase(rel, target, context); @@ -397,9 +368,7 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC .ARM64_RELOC_TLVP_LOAD_PAGE21, .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, => { - if (target == .global) { - try addTlvPtrEntry(target, context); - } + try addTlvPtrEntry(target, context); }, else => {}, } @@ -423,8 +392,7 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC else mem.readIntLittle(i32, self.code.items[offset..][0..4]); if (rel.r_extern == 0) { - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment; - const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; addend -= @intCast(i64, target_sect_base_addr); } try self.addPtrBindingOrRebase(rel, target, context); @@ -445,16 +413,15 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC if (rel.r_extern == 0) { // Note for the future self: when r_extern == 0, we should subtract correction from the // addend. - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].segment; - const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; + // We need to add base_offset, i.e., offset of this atom wrt to the source + // section. Otherwise, the addend will over-/under-shoot. addend += @intCast(i64, context.base_addr + offset + 4) - - @intCast(i64, target_sect_base_addr); + @intCast(i64, target_sect_base_addr) + context.base_offset; } }, .X86_64_RELOC_TLV => { - if (target == .global) { - try addTlvPtrEntry(target, context); - } + try addTlvPtrEntry(target, context); }, else => {}, } @@ -462,7 +429,7 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC else => unreachable, } - try self.relocs.append(context.allocator, .{ + try self.relocs.append(gpa, .{ .offset = offset, .target = target, .addend = addend, @@ -480,286 +447,182 @@ pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocC fn addPtrBindingOrRebase( self: *Atom, rel: macho.relocation_info, - target: Relocation.Target, + target: MachO.SymbolWithLoc, context: RelocContext, ) !void { - switch (target) { - .global => |n_strx| { - try self.bindings.append(context.allocator, .{ - .n_strx = n_strx, - .offset = @intCast(u32, rel.r_address), - }); - }, - .local => { - const source_sym = context.macho_file.locals.items[self.local_sym_index]; - const match = context.macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; - const seg = context.macho_file.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - const sect_type = sect.type_(); - - const should_rebase = rebase: { - if (rel.r_length != 3) break :rebase false; - - // TODO actually, a check similar to what dyld is doing, that is, verifying - // that the segment is writable should be enough here. - const is_right_segment = blk: { - if (context.macho_file.data_segment_cmd_index) |idx| { - if (match.seg == idx) { - break :blk true; - } + const gpa = context.macho_file.base.allocator; + const sym = context.macho_file.getSymbol(target); + if (sym.undf()) { + try self.bindings.append(gpa, .{ + .target = target, + .offset = @intCast(u32, rel.r_address - context.base_offset), + }); + } else { + const source_sym = self.getSymbol(context.macho_file); + const match = context.macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); + const sect = context.macho_file.getSection(match); + const sect_type = sect.type_(); + + const should_rebase = rebase: { + if (rel.r_length != 3) break :rebase false; + + // TODO actually, a check similar to what dyld is doing, that is, verifying + // that the segment is writable should be enough here. + const is_right_segment = blk: { + if (context.macho_file.data_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; } - if (context.macho_file.data_const_segment_cmd_index) |idx| { - if (match.seg == idx) { - break :blk true; - } + } + if (context.macho_file.data_const_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; } - break :blk false; - }; - - if (!is_right_segment) break :rebase false; - if (sect_type != macho.S_LITERAL_POINTERS and - sect_type != macho.S_REGULAR and - sect_type != macho.S_MOD_INIT_FUNC_POINTERS and - sect_type != macho.S_MOD_TERM_FUNC_POINTERS) - { - break :rebase false; } - - break :rebase true; + break :blk false; }; - if (should_rebase) { - try self.rebases.append(context.allocator, @intCast(u32, rel.r_address)); + if (!is_right_segment) break :rebase false; + if (sect_type != macho.S_LITERAL_POINTERS and + sect_type != macho.S_REGULAR and + sect_type != macho.S_MOD_INIT_FUNC_POINTERS and + sect_type != macho.S_MOD_TERM_FUNC_POINTERS) + { + break :rebase false; } - }, + + break :rebase true; + }; + + if (should_rebase) { + try self.rebases.append(gpa, @intCast(u32, rel.r_address - context.base_offset)); + } } } -fn addTlvPtrEntry(target: Relocation.Target, context: RelocContext) !void { +fn addTlvPtrEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void { + const target_sym = context.macho_file.getSymbol(target); + if (!target_sym.undf()) return; if (context.macho_file.tlv_ptr_entries_table.contains(target)) return; const index = try context.macho_file.allocateTlvPtrEntry(target); const atom = try context.macho_file.createTlvPtrAtom(target); - context.macho_file.tlv_ptr_entries.items[index].atom = atom; - - const match = (try context.macho_file.getMatchingSection(.{ - .segname = MachO.makeStaticString("__DATA"), - .sectname = MachO.makeStaticString("__thread_ptrs"), - .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - })).?; - if (!context.object.start_atoms.contains(match)) { - try context.object.start_atoms.putNoClobber(context.allocator, match, atom); - } - if (context.object.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try context.object.end_atoms.putNoClobber(context.allocator, match, atom); - } + context.macho_file.tlv_ptr_entries.items[index].sym_index = atom.sym_index; } -fn addGotEntry(target: Relocation.Target, context: RelocContext) !void { +fn addGotEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void { if (context.macho_file.got_entries_table.contains(target)) return; const index = try context.macho_file.allocateGotEntry(target); const atom = try context.macho_file.createGotAtom(target); - context.macho_file.got_entries.items[index].atom = atom; - - const match = MachO.MatchingSection{ - .seg = context.macho_file.data_const_segment_cmd_index.?, - .sect = context.macho_file.got_section_index.?, - }; - if (!context.object.start_atoms.contains(match)) { - try context.object.start_atoms.putNoClobber(context.allocator, match, atom); - } - if (context.object.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try context.object.end_atoms.putNoClobber(context.allocator, match, atom); - } + context.macho_file.got_entries.items[index].sym_index = atom.sym_index; } -fn addStub(target: Relocation.Target, context: RelocContext) !void { - if (target != .global) return; - if (context.macho_file.stubs_table.contains(target.global)) return; - // If the symbol has been resolved as defined globally elsewhere (in a different translation unit), - // then skip creating stub entry. - // TODO Is this the correct for the incremental? - if (context.macho_file.symbol_resolver.get(target.global).?.where == .global) return; - - const stub_index = try context.macho_file.allocateStubEntry(target.global); - - // TODO clean this up! - const stub_helper_atom = atom: { - const atom = try context.macho_file.createStubHelperAtom(); - const match = MachO.MatchingSection{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stub_helper_section_index.?, - }; - if (!context.object.start_atoms.contains(match)) { - try context.object.start_atoms.putNoClobber(context.allocator, match, atom); - } - if (context.object.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try context.object.end_atoms.putNoClobber(context.allocator, match, atom); - } - break :atom atom; - }; - const laptr_atom = atom: { - const atom = try context.macho_file.createLazyPointerAtom( - stub_helper_atom.local_sym_index, - target.global, - ); - const match = MachO.MatchingSection{ - .seg = context.macho_file.data_segment_cmd_index.?, - .sect = context.macho_file.la_symbol_ptr_section_index.?, - }; - if (!context.object.start_atoms.contains(match)) { - try context.object.start_atoms.putNoClobber(context.allocator, match, atom); - } - if (context.object.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try context.object.end_atoms.putNoClobber(context.allocator, match, atom); - } - break :atom atom; - }; - const atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index); - const match = MachO.MatchingSection{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stubs_section_index.?, - }; - if (!context.object.start_atoms.contains(match)) { - try context.object.start_atoms.putNoClobber(context.allocator, match, atom); - } - if (context.object.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try context.object.end_atoms.putNoClobber(context.allocator, match, atom); - } - context.macho_file.stubs.items[stub_index] = atom; +fn addStub(target: MachO.SymbolWithLoc, context: RelocContext) !void { + const target_sym = context.macho_file.getSymbol(target); + if (!target_sym.undf()) return; + if (context.macho_file.stubs_table.contains(target)) return; + + const stub_index = try context.macho_file.allocateStubEntry(target); + const stub_helper_atom = try context.macho_file.createStubHelperAtom(); + const laptr_atom = try context.macho_file.createLazyPointerAtom(stub_helper_atom.sym_index, target); + const stub_atom = try context.macho_file.createStubAtom(laptr_atom.sym_index); + + context.macho_file.stubs.items[stub_index].sym_index = stub_atom.sym_index; } pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); + log.debug("ATOM(%{d}, '{s}')", .{ self.sym_index, self.getName(macho_file) }); + for (self.relocs.items) |rel| { - log.debug("relocating {}", .{rel}); const arch = macho_file.base.options.target.cpu.arch; + switch (arch) { + .aarch64 => { + log.debug(" RELA({s}) @ {x} => %{d} in object({d})", .{ + @tagName(@intToEnum(macho.reloc_type_arm64, rel.@"type")), + rel.offset, + rel.target.sym_index, + rel.target.file, + }); + }, + .x86_64 => { + log.debug(" RELA({s}) @ {x} => %{d} in object({d})", .{ + @tagName(@intToEnum(macho.reloc_type_x86_64, rel.@"type")), + rel.offset, + rel.target.sym_index, + rel.target.file, + }); + }, + else => unreachable, + } + const source_addr = blk: { - const sym = macho_file.locals.items[self.local_sym_index]; - break :blk sym.n_value + rel.offset; + const source_sym = self.getSymbol(macho_file); + break :blk source_sym.n_value + rel.offset; + }; + const is_tlv = is_tlv: { + const source_sym = self.getSymbol(macho_file); + const match = macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); + const sect = macho_file.getSection(match); + break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; }; - var is_via_thread_ptrs: bool = false; const target_addr = blk: { - const is_via_got = got: { - switch (arch) { - .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) { - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_POINTER_TO_GOT, - => true, - else => false, - }, - .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { - .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, - else => false, - }, - else => unreachable, - } + const target_atom = rel.getTargetAtom(macho_file) orelse { + // If there is no atom for target, we still need to check for special, atom-less + // symbols such as `___dso_handle`. + const target_name = macho_file.getSymbolName(rel.target); + assert(macho_file.globals.contains(target_name)); + const atomless_sym = macho_file.getSymbol(rel.target); + log.debug(" | atomless target '{s}'", .{target_name}); + break :blk atomless_sym.n_value; }; - - if (is_via_got) { - const got_index = macho_file.got_entries_table.get(rel.target) orelse { - log.err("expected GOT entry for symbol", .{}); - switch (rel.target) { - .local => |sym_index| log.err(" local @{d}", .{sym_index}), - .global => |n_strx| log.err(" global @'{s}'", .{macho_file.getString(n_strx)}), + log.debug(" | target ATOM(%{d}, '{s}') in object({d})", .{ + target_atom.sym_index, + target_atom.getName(macho_file), + target_atom.file, + }); + // If `rel.target` is contained within the target atom, pull its address value. + const target_sym = if (target_atom.isSymbolContained(rel.target, macho_file)) + macho_file.getSymbol(rel.target) + else + target_atom.getSymbol(macho_file); + assert(target_sym.n_desc != MachO.N_DESC_GCED); + const base_address: u64 = if (is_tlv) base_address: { + // For TLV relocations, the value specified as a relocation is the displacement from the + // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first + // defined TLV template init section in the following order: + // * wrt to __thread_data if defined, then + // * wrt to __thread_bss + const sect_id: u16 = sect_id: { + if (macho_file.tlv_data_section_index) |i| { + break :sect_id i; + } else if (macho_file.tlv_bss_section_index) |i| { + break :sect_id i; + } else { + log.err("threadlocal variables present but no initializer sections found", .{}); + log.err(" __thread_data not found", .{}); + log.err(" __thread_bss not found", .{}); + return error.FailedToResolveRelocationTarget; } - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; }; - const atom = macho_file.got_entries.items[got_index].atom; - break :blk macho_file.locals.items[atom.local_sym_index].n_value; - } - - switch (rel.target) { - .local => |sym_index| { - const sym = macho_file.locals.items[sym_index]; - const is_tlv = is_tlv: { - const source_sym = macho_file.locals.items[self.local_sym_index]; - const match = macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; - const seg = macho_file.load_commands.items[match.seg].segment; - const sect = seg.sections.items[match.sect]; - break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; - }; - if (is_tlv) { - // For TLV relocations, the value specified as a relocation is the displacement from the - // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first - // defined TLV template init section in the following order: - // * wrt to __thread_data if defined, then - // * wrt to __thread_bss - const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].segment; - const base_address = inner: { - if (macho_file.tlv_data_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else if (macho_file.tlv_bss_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else { - log.err("threadlocal variables present but no initializer sections found", .{}); - log.err(" __thread_data not found", .{}); - log.err(" __thread_bss not found", .{}); - return error.FailedToResolveRelocationTarget; - } - }; - break :blk sym.n_value - base_address; - } - break :blk sym.n_value; - }, - .global => |n_strx| { - // TODO Still trying to figure out how to possibly use stubs for local symbol indirection with - // branching instructions. If it is not possible, then the best course of action is to - // resurrect the former approach of defering creating synthethic atoms in __got and __la_symbol_ptr - // sections until we resolve the relocations. - const resolv = macho_file.symbol_resolver.get(n_strx).?; - switch (resolv.where) { - .global => break :blk macho_file.globals.items[resolv.where_index].n_value, - .undef => { - if (macho_file.stubs_table.get(n_strx)) |stub_index| { - const atom = macho_file.stubs.items[stub_index]; - break :blk macho_file.locals.items[atom.local_sym_index].n_value; - } else { - if (macho_file.tlv_ptr_entries_table.get(rel.target)) |tlv_ptr_index| { - is_via_thread_ptrs = true; - const atom = macho_file.tlv_ptr_entries.items[tlv_ptr_index].atom; - break :blk macho_file.locals.items[atom.local_sym_index].n_value; - } - break :blk 0; - } - }, - } - }, - } + break :base_address macho_file.getSection(.{ + .seg = macho_file.data_segment_cmd_index.?, + .sect = sect_id, + }).addr; + } else 0; + break :blk target_sym.n_value - base_address; }; - log.debug(" | source_addr = 0x{x}", .{source_addr}); - log.debug(" | target_addr = 0x{x}", .{target_addr}); + log.debug(" | source_addr = 0x{x}", .{source_addr}); switch (arch) { .aarch64 => { switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) { .ARM64_RELOC_BRANCH26 => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); const displacement = math.cast( i28, @intCast(i64, target_addr) - @intCast(i64, source_addr), @@ -788,6 +651,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .ARM64_RELOC_TLVP_LOAD_PAGE21, => { const actual_target_addr = @intCast(i64, target_addr) + rel.addend; + log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); const source_page = @intCast(i32, source_addr >> 12); const target_page = @intCast(i32, actual_target_addr >> 12); const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); @@ -805,6 +669,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .ARM64_RELOC_PAGEOFF12 => { const code = self.code.items[rel.offset..][0..4]; const actual_target_addr = @intCast(i64, target_addr) + rel.addend; + log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); const narrowed = @truncate(u12, @intCast(u64, actual_target_addr)); if (isArithmeticOp(self.code.items[rel.offset..][0..4])) { var inst = aarch64.Instruction{ @@ -842,6 +707,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { const code = self.code.items[rel.offset..][0..4]; const actual_target_addr = @intCast(i64, target_addr) + rel.addend; + log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); const narrowed = @truncate(u12, @intCast(u64, actual_target_addr)); var inst: aarch64.Instruction = .{ .load_store_register = mem.bytesToValue(meta.TagPayload( @@ -856,6 +722,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { const code = self.code.items[rel.offset..][0..4]; const actual_target_addr = @intCast(i64, target_addr) + rel.addend; + log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); const RegInfo = struct { rd: u5, @@ -886,7 +753,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { } }; const narrowed = @truncate(u12, @intCast(u64, actual_target_addr)); - var inst = if (is_via_thread_ptrs) blk: { + var inst = if (macho_file.tlv_ptr_entries_table.contains(rel.target)) blk: { const offset = try math.divExact(u12, narrowed, 8); break :blk aarch64.Instruction{ .load_store_register = .{ @@ -913,18 +780,20 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { mem.writeIntLittle(u32, code, inst.toU32()); }, .ARM64_RELOC_POINTER_TO_GOT => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); const result = math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr)) orelse return error.Overflow; mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, result)); }, .ARM64_RELOC_UNSIGNED => { const result = blk: { if (rel.subtractor) |subtractor| { - const sym = macho_file.locals.items[subtractor]; + const sym = macho_file.getSymbol(subtractor); break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + rel.addend; } else { break :blk @intCast(i64, target_addr) + rel.addend; } }; + log.debug(" | target_addr = 0x{x}", .{result}); if (rel.length == 3) { mem.writeIntLittle(u64, self.code.items[rel.offset..][0..8], @bitCast(u64, result)); @@ -943,6 +812,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .x86_64 => { switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { .X86_64_RELOC_BRANCH => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); const displacement = math.cast( i32, @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4 + rel.addend, @@ -950,6 +820,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement)); }, .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); const displacement = math.cast( i32, @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4 + rel.addend, @@ -957,7 +828,8 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement)); }, .X86_64_RELOC_TLV => { - if (!is_via_thread_ptrs) { + log.debug(" | target_addr = 0x{x}", .{target_addr}); + if (!macho_file.tlv_ptr_entries_table.contains(rel.target)) { // We need to rewrite the opcode from movq to leaq. self.code.items[rel.offset - 2] = 0x8d; } @@ -980,6 +852,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { else => unreachable, }; const actual_target_addr = @intCast(i64, target_addr) + rel.addend; + log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); const displacement = math.cast( i32, actual_target_addr - @intCast(i64, source_addr + correction + 4), @@ -989,12 +862,13 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { .X86_64_RELOC_UNSIGNED => { const result = blk: { if (rel.subtractor) |subtractor| { - const sym = macho_file.locals.items[subtractor]; + const sym = macho_file.getSymbol(subtractor); break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + rel.addend; } else { break :blk @intCast(i64, target_addr) + rel.addend; } }; + log.debug(" | target_addr = 0x{x}", .{result}); if (rel.length == 3) { mem.writeIntLittle(u64, self.code.items[rel.offset..][0..8], @bitCast(u64, result)); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 5e5aca26c1..4da106eca1 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -5,7 +5,7 @@ const build_options = @import("build_options"); const assert = std.debug.assert; const fs = std.fs; const link = @import("../../link.zig"); -const log = std.log.scoped(.link); +const log = std.log.scoped(.dsym); const macho = std.macho; const makeStaticString = MachO.makeStaticString; const math = std.math; @@ -17,6 +17,7 @@ const Allocator = mem.Allocator; const Dwarf = @import("../Dwarf.zig"); const MachO = @import("../MachO.zig"); const Module = @import("../../Module.zig"); +const StringTable = @import("../strtab.zig").StringTable; const TextBlock = MachO.TextBlock; const Type = @import("../../type.zig").Type; @@ -59,6 +60,8 @@ debug_aranges_section_dirty: bool = false, debug_info_header_dirty: bool = false, debug_line_header_dirty: bool = false, +strtab: StringTable(.strtab) = .{}, + relocs: std.ArrayListUnmanaged(Reloc) = .{}, pub const Reloc = struct { @@ -93,6 +96,7 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void .strsize = 0, }, }); + try self.strtab.buffer.append(allocator, 0); self.load_commands_dirty = true; } @@ -269,22 +273,36 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti for (self.relocs.items) |*reloc| { const sym = switch (reloc.@"type") { - .direct_load => self.base.locals.items[reloc.target], + .direct_load => self.base.getSymbol(.{ .sym_index = reloc.target, .file = null }), .got_load => blk: { - const got_index = self.base.got_entries_table.get(.{ .local = reloc.target }).?; + const got_index = self.base.got_entries_table.get(.{ + .sym_index = reloc.target, + .file = null, + }).?; const got_entry = self.base.got_entries.items[got_index]; - break :blk self.base.locals.items[got_entry.atom.local_sym_index]; + break :blk got_entry.getSymbol(self.base); }, }; if (sym.n_value == reloc.prev_vaddr) continue; + const sym_name = switch (reloc.@"type") { + .direct_load => self.base.getSymbolName(.{ .sym_index = reloc.target, .file = null }), + .got_load => blk: { + const got_index = self.base.got_entries_table.get(.{ + .sym_index = reloc.target, + .file = null, + }).?; + const got_entry = self.base.got_entries.items[got_index]; + break :blk got_entry.getName(self.base); + }, + }; const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; const sect = &seg.sections.items[self.debug_info_section_index.?]; const file_offset = sect.offset + reloc.offset; log.debug("resolving relocation: {d}@{x} ('{s}') at offset {x}", .{ reloc.target, sym.n_value, - self.base.getString(sym.n_strx), + sym_name, file_offset, }); try self.file.pwriteAll(mem.asBytes(&sym.n_value), file_offset); @@ -367,6 +385,7 @@ pub fn deinit(self: *DebugSymbols, allocator: Allocator) void { } self.load_commands.deinit(allocator); self.dwarf.deinit(); + self.strtab.deinit(allocator); self.relocs.deinit(allocator); } @@ -582,21 +601,39 @@ fn writeSymbolTable(self: *DebugSymbols) !void { const tracy = trace(@src()); defer tracy.end(); + const gpa = self.base.base.allocator; const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; symtab.symoff = @intCast(u32, seg.inner.fileoff); - var locals = std.ArrayList(macho.nlist_64).init(self.base.base.allocator); + var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); - for (self.base.locals.items) |sym| { - if (sym.n_strx == 0) continue; - if (self.base.symbol_resolver.get(sym.n_strx)) |_| continue; - try locals.append(sym); + for (self.base.locals.items) |sym, sym_id| { + if (sym.n_strx == 0) continue; // no name, skip + if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip + const sym_loc = MachO.SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = null }; + if (self.base.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip + if (self.base.globals.contains(self.base.getSymbolName(sym_loc))) continue; // global symbol is either an export or import, skip + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.base.getSymbolName(sym_loc)); + try locals.append(out_sym); + } + + var exports = std.ArrayList(macho.nlist_64).init(gpa); + defer exports.deinit(); + + for (self.base.globals.values()) |global| { + const sym = self.base.getSymbol(global); + if (sym.undf()) continue; // import, skip + if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.base.getSymbolName(global)); + try exports.append(out_sym); } const nlocals = locals.items.len; - const nexports = self.base.globals.items.len; + const nexports = exports.items.len; const locals_off = symtab.symoff; const locals_size = nlocals * @sizeOf(macho.nlist_64); const exports_off = locals_off + locals_size; @@ -641,7 +678,7 @@ fn writeSymbolTable(self: *DebugSymbols) !void { try self.file.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); - try self.file.pwriteAll(mem.sliceAsBytes(self.base.globals.items), exports_off); + try self.file.pwriteAll(mem.sliceAsBytes(exports.items), exports_off); self.load_commands_dirty = true; } @@ -655,7 +692,7 @@ fn writeStringTable(self: *DebugSymbols) !void { const symtab_size = @intCast(u32, symtab.nsyms * @sizeOf(macho.nlist_64)); symtab.stroff = symtab.symoff + symtab_size; - const needed_size = mem.alignForwardGeneric(u64, self.base.strtab.items.len, @alignOf(u64)); + const needed_size = mem.alignForwardGeneric(u64, self.strtab.buffer.items.len, @alignOf(u64)); symtab.strsize = @intCast(u32, needed_size); if (symtab_size + needed_size > seg.inner.filesize) { @@ -692,7 +729,7 @@ fn writeStringTable(self: *DebugSymbols) !void { log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - try self.file.pwriteAll(self.base.strtab.items, symtab.stroff); + try self.file.pwriteAll(self.strtab.buffer.items, symtab.stroff); self.load_commands_dirty = true; } diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 03291cefab..5e10c0c0a3 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -3,7 +3,6 @@ const Object = @This(); const std = @import("std"); const build_options = @import("build_options"); const assert = std.debug.assert; -const dwarf = std.dwarf; const fs = std.fs; const io = std.io; const log = std.log.scoped(.link); @@ -16,13 +15,21 @@ const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); +const MatchingSection = MachO.MatchingSection; +const SymbolWithLoc = MachO.SymbolWithLoc; file: fs.File, name: []const u8, +mtime: u64, + +/// Data contents of the file. Includes sections, and data of load commands. +/// Excludes the backing memory for the header and load commands. +/// Initialized in `parse`. +contents: []const u8 = undefined, file_offset: ?u32 = null, -header: ?macho.mach_header_64 = null, +header: macho.mach_header_64 = undefined, load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, @@ -42,212 +49,58 @@ dwarf_debug_line_str_index: ?u16 = null, dwarf_debug_ranges_index: ?u16 = null, symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -strtab: std.ArrayListUnmanaged(u8) = .{}, -data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, - -// Debug info -debug_info: ?DebugInfo = null, -tu_name: ?[]const u8 = null, -tu_comp_dir: ?[]const u8 = null, -mtime: ?u64 = null, - -contained_atoms: std.ArrayListUnmanaged(*Atom) = .{}, -start_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{}, -end_atoms: std.AutoHashMapUnmanaged(MachO.MatchingSection, *Atom) = .{}, -sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, +strtab: []const u8 = &.{}, +data_in_code_entries: []const macho.data_in_code_entry = &.{}, -// TODO symbol mapping and its inverse can probably be simple arrays -// instead of hash maps. -symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, -reverse_symbol_mapping: std.AutoHashMapUnmanaged(u32, u32) = .{}, - -analyzed: bool = false, - -const DebugInfo = struct { - inner: dwarf.DwarfInfo, - debug_info: []u8, - debug_abbrev: []u8, - debug_str: []u8, - debug_line: []u8, - debug_line_str: []u8, - debug_ranges: []u8, - - pub fn parseFromObject(allocator: Allocator, object: *const Object) !?DebugInfo { - var debug_info = blk: { - const index = object.dwarf_debug_info_index orelse return null; - break :blk try object.readSection(allocator, index); - }; - var debug_abbrev = blk: { - const index = object.dwarf_debug_abbrev_index orelse return null; - break :blk try object.readSection(allocator, index); - }; - var debug_str = blk: { - const index = object.dwarf_debug_str_index orelse return null; - break :blk try object.readSection(allocator, index); - }; - var debug_line = blk: { - const index = object.dwarf_debug_line_index orelse return null; - break :blk try object.readSection(allocator, index); - }; - var debug_line_str = blk: { - if (object.dwarf_debug_line_str_index) |ind| { - break :blk try object.readSection(allocator, ind); - } - break :blk try allocator.alloc(u8, 0); - }; - var debug_ranges = blk: { - if (object.dwarf_debug_ranges_index) |ind| { - break :blk try object.readSection(allocator, ind); - } - break :blk try allocator.alloc(u8, 0); - }; +sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, - var inner: dwarf.DwarfInfo = .{ - .endian = .Little, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - try dwarf.openDwarfDebugInfo(&inner, allocator); - - return DebugInfo{ - .inner = inner, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - } +/// List of atoms that map to the symbols parsed from this object file. +managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, - pub fn deinit(self: *DebugInfo, allocator: Allocator) void { - allocator.free(self.debug_info); - allocator.free(self.debug_abbrev); - allocator.free(self.debug_str); - allocator.free(self.debug_line); - allocator.free(self.debug_line_str); - allocator.free(self.debug_ranges); - self.inner.deinit(allocator); - } -}; +/// Table of atoms belonging to this object file indexed by the symbol index. +atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, -pub fn deinit(self: *Object, allocator: Allocator) void { +pub fn deinit(self: *Object, gpa: Allocator) void { for (self.load_commands.items) |*lc| { - lc.deinit(allocator); + lc.deinit(gpa); } - self.load_commands.deinit(allocator); - self.data_in_code_entries.deinit(allocator); - self.symtab.deinit(allocator); - self.strtab.deinit(allocator); - self.sections_as_symbols.deinit(allocator); - self.symbol_mapping.deinit(allocator); - self.reverse_symbol_mapping.deinit(allocator); - allocator.free(self.name); - - self.contained_atoms.deinit(allocator); - self.start_atoms.deinit(allocator); - self.end_atoms.deinit(allocator); - - if (self.debug_info) |*db| { - db.deinit(allocator); + self.load_commands.deinit(gpa); + gpa.free(self.contents); + self.sections_as_symbols.deinit(gpa); + self.atom_by_index_table.deinit(gpa); + + for (self.managed_atoms.items) |atom| { + atom.deinit(gpa); + gpa.destroy(atom); } + self.managed_atoms.deinit(gpa); - if (self.tu_name) |n| { - allocator.free(n); - } - - if (self.tu_comp_dir) |n| { - allocator.free(n); - } -} - -pub fn free(self: *Object, allocator: Allocator, macho_file: *MachO) void { - log.debug("freeObject {*}", .{self}); - - var it = self.end_atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - const first_atom = self.start_atoms.get(match).?; - const last_atom = entry.value_ptr.*; - var atom = first_atom; - - while (true) { - if (atom.local_sym_index != 0) { - macho_file.locals_free_list.append(allocator, atom.local_sym_index) catch {}; - const local = &macho_file.locals.items[atom.local_sym_index]; - local.* = .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }; - atom.local_sym_index = 0; - } - if (atom == last_atom) { - break; - } - if (atom.next) |next| { - atom = next; - } else break; - } - } - - self.freeAtoms(macho_file); + gpa.free(self.name); } -fn freeAtoms(self: *Object, macho_file: *MachO) void { - var it = self.end_atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var first_atom: *Atom = self.start_atoms.get(match).?; - var last_atom: *Atom = entry.value_ptr.*; - - if (macho_file.atoms.getPtr(match)) |atom_ptr| { - if (atom_ptr.* == last_atom) { - if (first_atom.prev) |prev| { - // TODO shrink the section size here - atom_ptr.* = prev; - } else { - _ = macho_file.atoms.fetchRemove(match); - } - } - } - - if (first_atom.prev) |prev| { - prev.next = last_atom.next; - } else { - first_atom.prev = null; - } +pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { + const file_stat = try self.file.stat(); + const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + self.contents = try self.file.readToEndAlloc(allocator, file_size); - if (last_atom.next) |next| { - next.prev = last_atom.prev; - } else { - last_atom.next = null; - } - } -} + var stream = std.io.fixedBufferStream(self.contents); + const reader = stream.reader(); -pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { - const reader = self.file.reader(); - if (self.file_offset) |offset| { - try reader.context.seekTo(offset); + const file_offset = self.file_offset orelse 0; + if (file_offset > 0) { + try reader.context.seekTo(file_offset); } - const header = try reader.readStruct(macho.mach_header_64); - if (header.filetype != macho.MH_OBJECT) { + self.header = try reader.readStruct(macho.mach_header_64); + if (self.header.filetype != macho.MH_OBJECT) { log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, - header.filetype, + self.header.filetype, }); return error.NotObject; } - const this_arch: std.Target.Cpu.Arch = switch (header.cputype) { + const this_arch: std.Target.Cpu.Arch = switch (self.header.cputype) { macho.CPU_TYPE_ARM64 => .aarch64, macho.CPU_TYPE_X86_64 => .x86_64, else => |value| { @@ -260,22 +113,10 @@ pub fn parse(self: *Object, allocator: Allocator, target: std.Target) !void { return error.MismatchedCpuArchitecture; } - self.header = header; - - try self.readLoadCommands(allocator, reader); - try self.parseSymtab(allocator); - try self.parseDataInCode(allocator); - try self.parseDebugInfo(allocator); -} - -pub fn readLoadCommands(self: *Object, allocator: Allocator, reader: anytype) !void { - const header = self.header orelse unreachable; // Unreachable here signifies a fatal unexplored condition. - const offset = self.file_offset orelse 0; - - try self.load_commands.ensureUnusedCapacity(allocator, header.ncmds); + try self.load_commands.ensureUnusedCapacity(allocator, self.header.ncmds); var i: u16 = 0; - while (i < header.ncmds) : (i += 1) { + while (i < self.header.ncmds) : (i += 1) { var cmd = try macho.LoadCommand.read(allocator, reader); switch (cmd.cmd()) { .SEGMENT_64 => { @@ -305,18 +146,18 @@ pub fn readLoadCommands(self: *Object, allocator: Allocator, reader: anytype) !v } } - sect.offset += offset; + sect.offset += file_offset; if (sect.reloff > 0) { - sect.reloff += offset; + sect.reloff += file_offset; } } - seg.inner.fileoff += offset; + seg.inner.fileoff += file_offset; }, .SYMTAB => { self.symtab_cmd_index = i; - cmd.symtab.symoff += offset; - cmd.symtab.stroff += offset; + cmd.symtab.symoff += file_offset; + cmd.symtab.stroff += file_offset; }, .DYSYMTAB => { self.dysymtab_cmd_index = i; @@ -326,7 +167,7 @@ pub fn readLoadCommands(self: *Object, allocator: Allocator, reader: anytype) !v }, .DATA_IN_CODE => { self.data_in_code_cmd_index = i; - cmd.linkedit_data.dataoff += offset; + cmd.linkedit_data.dataoff += file_offset; }, else => { log.debug("Unknown load command detected: 0x{x}.", .{cmd.cmd()}); @@ -334,21 +175,37 @@ pub fn readLoadCommands(self: *Object, allocator: Allocator, reader: anytype) !v } self.load_commands.appendAssumeCapacity(cmd); } + + try self.parseSymtab(allocator); } -const NlistWithIndex = struct { - nlist: macho.nlist_64, +const Context = struct { + symtab: []const macho.nlist_64, + strtab: []const u8, +}; + +const SymbolAtIndex = struct { index: u32, - fn lessThan(_: void, lhs: NlistWithIndex, rhs: NlistWithIndex) bool { - // We sort by type: defined < undefined, and - // afterwards by address in each group. Normally, dysymtab should - // be enough to guarantee the sort, but turns out not every compiler - // is kind enough to specify the symbols in the correct order. - if (lhs.nlist.sect()) { - if (rhs.nlist.sect()) { + fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 { + return ctx.symtab[self.index]; + } + + fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 { + const sym = self.getSymbol(ctx); + assert(sym.n_strx < ctx.strtab.len); + return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0); + } + + /// Returns whether lhs is less than rhs by allocated address in object file. + /// Undefined symbols are pushed to the back (always evaluate to true). + fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { + const lhs = lhs_index.getSymbol(ctx); + const rhs = rhs_index.getSymbol(ctx); + if (lhs.sect()) { + if (rhs.sect()) { // Same group, sort by address. - return lhs.nlist.n_value < rhs.nlist.n_value; + return lhs.n_value < rhs.n_value; } else { return true; } @@ -357,60 +214,108 @@ const NlistWithIndex = struct { } } - fn filterInSection(symbols: []NlistWithIndex, sect: macho.section_64) []NlistWithIndex { - const Predicate = struct { - addr: u64, - - pub fn predicate(self: @This(), symbol: NlistWithIndex) bool { - return symbol.nlist.n_value >= self.addr; - } - }; - - const start = MachO.findFirst(NlistWithIndex, symbols, 0, Predicate{ .addr = sect.addr }); - const end = MachO.findFirst(NlistWithIndex, symbols, start, Predicate{ .addr = sect.addr + sect.size }); + /// Returns whether lhs is less senior than rhs. The rules are: + /// 1. ext + /// 2. weak + /// 3. local + /// 4. temp (local starting with `l` prefix). + fn lessThanBySeniority(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { + const lhs = lhs_index.getSymbol(ctx); + const rhs = rhs_index.getSymbol(ctx); + if (!rhs.ext()) { + const lhs_name = lhs_index.getSymbolName(ctx); + return mem.startsWith(u8, lhs_name, "l") or mem.startsWith(u8, lhs_name, "L"); + } else if (rhs.pext() or rhs.weakDef()) { + return !lhs.ext(); + } else { + return false; + } + } - return symbols[start..end]; + /// Like lessThanBySeniority but negated. + fn greaterThanBySeniority(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { + return !lessThanBySeniority(ctx, lhs_index, rhs_index); } }; -fn filterDice(dices: []macho.data_in_code_entry, start_addr: u64, end_addr: u64) []macho.data_in_code_entry { +fn filterSymbolsByAddress( + indexes: []SymbolAtIndex, + start_addr: u64, + end_addr: u64, + ctx: Context, +) []SymbolAtIndex { + const Predicate = struct { + addr: u64, + ctx: Context, + + pub fn predicate(pred: @This(), index: SymbolAtIndex) bool { + return index.getSymbol(pred.ctx).n_value >= pred.addr; + } + }; + + const start = MachO.findFirst(SymbolAtIndex, indexes, 0, Predicate{ + .addr = start_addr, + .ctx = ctx, + }); + const end = MachO.findFirst(SymbolAtIndex, indexes, start, Predicate{ + .addr = end_addr, + .ctx = ctx, + }); + + return indexes[start..end]; +} + +fn filterRelocs( + relocs: []const macho.relocation_info, + start_addr: u64, + end_addr: u64, +) []const macho.relocation_info { const Predicate = struct { addr: u64, - pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { - return dice.offset >= self.addr; + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; } }; - const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); - const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); + const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); + const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); - return dices[start..end]; + return relocs[start..end]; } -pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) !void { +/// Splits object into atoms assuming one-shot linking mode. +pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) !void { + assert(macho_file.mode == .one_shot); + const tracy = trace(@src()); defer tracy.end(); + const gpa = macho_file.base.allocator; const seg = self.load_commands.items[self.segment_cmd_index.?].segment; - log.debug("analysing {s}", .{self.name}); + log.debug("splitting object({d}, {s}) into atoms: one-shot mode", .{ object_id, self.name }); // You would expect that the symbol table is at least pre-sorted based on symbol's type: // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, // the GO compiler does not necessarily respect that therefore we sort immediately by type // and address within. - var sorted_all_nlists = try std.ArrayList(NlistWithIndex).initCapacity(allocator, self.symtab.items.len); - defer sorted_all_nlists.deinit(); + const context = Context{ + .symtab = self.getSourceSymtab(), + .strtab = self.strtab, + }; + var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, context.symtab.len); + defer sorted_all_syms.deinit(); - for (self.symtab.items) |nlist, index| { - sorted_all_nlists.appendAssumeCapacity(.{ - .nlist = nlist, - .index = @intCast(u32, index), - }); + for (context.symtab) |_, index| { + sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) }); } - sort.sort(NlistWithIndex, sorted_all_nlists.items, {}, NlistWithIndex.lessThan); + // We sort by type: defined < undefined, and + // afterwards by address in each group. Normally, dysymtab should + // be enough to guarantee the sort, but turns out not every compiler + // is kind enough to specify the symbols in the correct order. + sort.sort(SymbolAtIndex, sorted_all_syms.items, context, SymbolAtIndex.lessThan); // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we // have to infer the start of undef section in the symtab ourselves. @@ -418,226 +323,328 @@ pub fn parseIntoAtoms(self: *Object, allocator: Allocator, macho_file: *MachO) ! const dysymtab = self.load_commands.items[cmd_index].dysymtab; break :blk dysymtab.iundefsym; } else blk: { - var iundefsym: usize = sorted_all_nlists.items.len; + var iundefsym: usize = sorted_all_syms.items.len; while (iundefsym > 0) : (iundefsym -= 1) { - const nlist = sorted_all_nlists.items[iundefsym - 1]; - if (nlist.nlist.sect()) break; + const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context); + if (sym.sect()) break; } break :blk iundefsym; }; // We only care about defined symbols, so filter every other out. - const sorted_nlists = sorted_all_nlists.items[0..iundefsym]; + const sorted_syms = sorted_all_syms.items[0..iundefsym]; + const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; for (seg.sections.items) |sect, id| { const sect_id = @intCast(u8, id); - log.debug("putting section '{s},{s}' as an Atom", .{ sect.segName(), sect.sectName() }); + log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); // Get matching segment/section in the final artifact. const match = (try macho_file.getMatchingSection(sect)) orelse { - log.debug("unhandled section", .{}); + log.debug(" unhandled section", .{}); continue; }; - // Read section's code - var code = try allocator.alloc(u8, @intCast(usize, sect.size)); - defer allocator.free(code); - _ = try self.file.preadAll(code, sect.offset); - - // Read section's list of relocations - var raw_relocs = try allocator.alloc(u8, sect.nreloc * @sizeOf(macho.relocation_info)); - defer allocator.free(raw_relocs); - _ = try self.file.preadAll(raw_relocs, sect.reloff); - const relocs = mem.bytesAsSlice(macho.relocation_info, raw_relocs); - - // Symbols within this section only. - const filtered_nlists = NlistWithIndex.filterInSection(sorted_nlists, sect); - - macho_file.has_dices = macho_file.has_dices or blk: { - if (self.text_section_index) |index| { - if (index != id) break :blk false; - if (self.data_in_code_entries.items.len == 0) break :blk false; - break :blk true; - } - break :blk false; - }; - macho_file.has_stabs = macho_file.has_stabs or self.debug_info != null; - - // Since there is no symbol to refer to this atom, we create - // a temp one, unless we already did that when working out the relocations - // of other atoms. - const atom_local_sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const atom_local_sym_index = @intCast(u32, macho_file.locals.items.len); - try macho_file.locals.append(allocator, .{ - .n_strx = 0, - .n_type = macho.N_SECT, - .n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1), - .n_desc = 0, - .n_value = 0, - }); - try self.sections_as_symbols.putNoClobber(allocator, sect_id, atom_local_sym_index); - break :blk atom_local_sym_index; - }; - const alignment = try math.powi(u32, 2, sect.@"align"); - const aligned_size = mem.alignForwardGeneric(u64, sect.size, alignment); - const atom = try macho_file.createEmptyAtom(atom_local_sym_index, aligned_size, sect.@"align"); + log.debug(" output sect({d}, '{s},{s}')", .{ + macho_file.getSectionOrdinal(match), + macho_file.getSection(match).segName(), + macho_file.getSection(match).sectName(), + }); + const arch = macho_file.base.options.target.cpu.arch; const is_zerofill = blk: { const section_type = sect.type_(); break :blk section_type == macho.S_ZEROFILL or section_type == macho.S_THREAD_LOCAL_ZEROFILL; }; - if (!is_zerofill) { - mem.copy(u8, atom.code.items, code); - } - // TODO stage2 bug: @alignCast shouldn't be needed - try atom.parseRelocs(@alignCast(@alignOf(macho.relocation_info), relocs), .{ - .base_addr = sect.addr, - .allocator = allocator, - .object = self, - .macho_file = macho_file, - }); + // Read section's code + const code: ?[]const u8 = if (!is_zerofill) try self.getSectionContents(sect_id) else null; - if (macho_file.has_dices) { - const dices = filterDice(self.data_in_code_entries.items, sect.addr, sect.addr + sect.size); - try atom.dices.ensureTotalCapacity(allocator, dices.len); + // Read section's list of relocations + const raw_relocs = self.contents[sect.reloff..][0 .. sect.nreloc * @sizeOf(macho.relocation_info)]; + const relocs = mem.bytesAsSlice( + macho.relocation_info, + @alignCast(@alignOf(macho.relocation_info), raw_relocs), + ); - for (dices) |dice| { - atom.dices.appendAssumeCapacity(.{ - .offset = dice.offset - (math.cast(u32, sect.addr) orelse return error.Overflow), - .length = dice.length, - .kind = dice.kind, - }); + // Symbols within this section only. + const filtered_syms = filterSymbolsByAddress( + sorted_syms, + sect.addr, + sect.addr + sect.size, + context, + ); + + if (subsections_via_symbols and filtered_syms.len > 0) { + // If the first nlist does not match the start of the section, + // then we need to encapsulate the memory range [section start, first symbol) + // as a temporary symbol and insert the matching Atom. + const first_sym = filtered_syms[0].getSymbol(context); + if (first_sym.n_value > sect.addr) { + const sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const sym_index = @intCast(u32, self.symtab.items.len); + try self.symtab.append(gpa, .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = macho_file.getSectionOrdinal(match), + .n_desc = 0, + .n_value = sect.addr, + }); + try self.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index); + break :blk sym_index; + }; + const atom_size = first_sym.n_value - sect.addr; + const atom_code: ?[]const u8 = if (code) |cc| blk: { + const size = math.cast(usize, atom_size) orelse return error.Overflow; + break :blk cc[0..size]; + } else null; + const atom = try self.createAtomFromSubsection( + macho_file, + object_id, + sym_index, + atom_size, + sect.@"align", + atom_code, + relocs, + &.{}, + match, + sect, + ); + try macho_file.addAtomToSection(atom, match); } - } - // Since this is atom gets a helper local temporary symbol that didn't exist - // in the object file which encompasses the entire section, we need traverse - // the filtered symbols and note which symbol is contained within so that - // we can properly allocate addresses down the line. - // While we're at it, we need to update segment,section mapping of each symbol too. - try atom.contained.ensureTotalCapacity(allocator, filtered_nlists.len); - - for (filtered_nlists) |nlist_with_index| { - const nlist = nlist_with_index.nlist; - const local_sym_index = self.symbol_mapping.get(nlist_with_index.index) orelse unreachable; - const local = &macho_file.locals.items[local_sym_index]; - local.n_sect = @intCast(u8, macho_file.section_ordinals.getIndex(match).? + 1); - - const stab: ?Atom.Stab = if (self.debug_info) |di| blk: { - // TODO there has to be a better to handle this. - for (di.inner.func_list.items) |func| { - if (func.pc_range) |range| { - if (nlist.n_value >= range.start and nlist.n_value < range.end) { - break :blk Atom.Stab{ - .function = range.end - range.start, - }; - } - } + var next_sym_count: usize = 0; + while (next_sym_count < filtered_syms.len) { + const next_sym = filtered_syms[next_sym_count].getSymbol(context); + const addr = next_sym.n_value; + const atom_syms = filterSymbolsByAddress( + filtered_syms[next_sym_count..], + addr, + addr + 1, + context, + ); + next_sym_count += atom_syms.len; + + // We want to bubble up the first externally defined symbol here. + assert(atom_syms.len > 0); + var sorted_atom_syms = std.ArrayList(SymbolAtIndex).init(gpa); + defer sorted_atom_syms.deinit(); + try sorted_atom_syms.appendSlice(atom_syms); + sort.sort( + SymbolAtIndex, + sorted_atom_syms.items, + context, + SymbolAtIndex.greaterThanBySeniority, + ); + + const atom_size = blk: { + const end_addr = if (next_sym_count < filtered_syms.len) + filtered_syms[next_sym_count].getSymbol(context).n_value + else + sect.addr + sect.size; + break :blk end_addr - addr; + }; + const atom_code: ?[]const u8 = if (code) |cc| blk: { + const start = math.cast(usize, addr - sect.addr) orelse return error.Overflow; + const size = math.cast(usize, atom_size) orelse return error.Overflow; + break :blk cc[start..][0..size]; + } else null; + const atom_align = if (addr > 0) + math.min(@ctz(u64, addr), sect.@"align") + else + sect.@"align"; + const atom = try self.createAtomFromSubsection( + macho_file, + object_id, + sorted_atom_syms.items[0].index, + atom_size, + atom_align, + atom_code, + relocs, + sorted_atom_syms.items[1..], + match, + sect, + ); + + if (arch == .x86_64 and addr == sect.addr) { + // In x86_64 relocs, it can so happen that the compiler refers to the same + // atom by both the actual assigned symbol and the start of the section. In this + // case, we need to link the two together so add an alias. + const alias = self.sections_as_symbols.get(sect_id) orelse blk: { + const alias = @intCast(u32, self.symtab.items.len); + try self.symtab.append(gpa, .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = macho_file.getSectionOrdinal(match), + .n_desc = 0, + .n_value = addr, + }); + try self.sections_as_symbols.putNoClobber(gpa, sect_id, alias); + break :blk alias; + }; + try atom.contained.append(gpa, .{ + .sym_index = alias, + .offset = 0, + }); + try self.atom_by_index_table.put(gpa, alias, atom); } - // TODO - // if (zld.globals.contains(zld.getString(sym.strx))) break :blk .global; - break :blk .static; - } else null; - - atom.contained.appendAssumeCapacity(.{ - .local_sym_index = local_sym_index, - .offset = nlist.n_value - sect.addr, - .stab = stab, - }); - } - if (!self.start_atoms.contains(match)) { - try self.start_atoms.putNoClobber(allocator, match, atom); - } - - if (self.end_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; + try macho_file.addAtomToSection(atom, match); + } } else { - try self.end_atoms.putNoClobber(allocator, match, atom); + // If there is no symbol to refer to this atom, we create + // a temp one, unless we already did that when working out the relocations + // of other atoms. + const sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { + const sym_index = @intCast(u32, self.symtab.items.len); + try self.symtab.append(gpa, .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = macho_file.getSectionOrdinal(match), + .n_desc = 0, + .n_value = sect.addr, + }); + try self.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index); + break :blk sym_index; + }; + const atom = try self.createAtomFromSubsection( + macho_file, + object_id, + sym_index, + sect.size, + sect.@"align", + code, + relocs, + filtered_syms, + match, + sect, + ); + try macho_file.addAtomToSection(atom, match); } - try self.contained_atoms.append(allocator, atom); } } -fn parseSymtab(self: *Object, allocator: Allocator) !void { - const index = self.symtab_cmd_index orelse return; - const symtab_cmd = self.load_commands.items[index].symtab; - - var symtab = try allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms); - defer allocator.free(symtab); - _ = try self.file.preadAll(symtab, symtab_cmd.symoff); - const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); - try self.symtab.appendSlice(allocator, slice); - - var strtab = try allocator.alloc(u8, symtab_cmd.strsize); - defer allocator.free(strtab); - _ = try self.file.preadAll(strtab, symtab_cmd.stroff); - try self.strtab.appendSlice(allocator, strtab); -} - -pub fn parseDebugInfo(self: *Object, allocator: Allocator) !void { - log.debug("parsing debug info in '{s}'", .{self.name}); +fn createAtomFromSubsection( + self: *Object, + macho_file: *MachO, + object_id: u32, + sym_index: u32, + size: u64, + alignment: u32, + code: ?[]const u8, + relocs: []const macho.relocation_info, + indexes: []const SymbolAtIndex, + match: MatchingSection, + sect: macho.section_64, +) !*Atom { + const gpa = macho_file.base.allocator; + const sym = self.symtab.items[sym_index]; + const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); + atom.file = object_id; + self.symtab.items[sym_index].n_sect = macho_file.getSectionOrdinal(match); + + log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ + sym_index, + self.getString(sym.n_strx), + macho_file.getSectionOrdinal(match), + macho_file.getSection(match).segName(), + macho_file.getSection(match).sectName(), + object_id, + }); + + try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); + try self.managed_atoms.append(gpa, atom); + + if (code) |cc| { + assert(size == cc.len); + mem.copy(u8, atom.code.items, cc); + } - var debug_info = blk: { - var di = try DebugInfo.parseFromObject(allocator, self); - break :blk di orelse return; - }; + const base_offset = sym.n_value - sect.addr; + const filtered_relocs = filterRelocs(relocs, base_offset, base_offset + size); + try atom.parseRelocs(filtered_relocs, .{ + .macho_file = macho_file, + .base_addr = sect.addr, + .base_offset = @intCast(i32, base_offset), + }); + + // Since this is atom gets a helper local temporary symbol that didn't exist + // in the object file which encompasses the entire section, we need traverse + // the filtered symbols and note which symbol is contained within so that + // we can properly allocate addresses down the line. + // While we're at it, we need to update segment,section mapping of each symbol too. + try atom.contained.ensureTotalCapacity(gpa, indexes.len); + for (indexes) |inner_sym_index| { + const inner_sym = &self.symtab.items[inner_sym_index.index]; + inner_sym.n_sect = macho_file.getSectionOrdinal(match); + atom.contained.appendAssumeCapacity(.{ + .sym_index = inner_sym_index.index, + .offset = inner_sym.n_value - sym.n_value, + }); - // We assume there is only one CU. - const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { - error.MissingDebugInfo => { - // TODO audit cases with missing debug info and audit our dwarf.zig module. - log.debug("invalid or missing debug info in {s}; skipping", .{self.name}); - return; - }, - else => |e| return e, - }; - const name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name); - const comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); + try self.atom_by_index_table.putNoClobber(gpa, inner_sym_index.index, atom); + } - self.debug_info = debug_info; - self.tu_name = try allocator.dupe(u8, name); - self.tu_comp_dir = try allocator.dupe(u8, comp_dir); + return atom; +} - if (self.mtime == null) { - self.mtime = mtime: { - const stat = self.file.stat() catch break :mtime 0; - break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); - }; - } +fn parseSymtab(self: *Object, allocator: Allocator) !void { + const index = self.symtab_cmd_index orelse return; + const symtab = self.load_commands.items[index].symtab; + try self.symtab.appendSlice(allocator, self.getSourceSymtab()); + self.strtab = self.contents[symtab.stroff..][0..symtab.strsize]; } -pub fn parseDataInCode(self: *Object, allocator: Allocator) !void { - const index = self.data_in_code_cmd_index orelse return; - const data_in_code = self.load_commands.items[index].linkedit_data; +pub fn getSourceSymtab(self: Object) []const macho.nlist_64 { + const index = self.symtab_cmd_index orelse return &[0]macho.nlist_64{}; + const symtab = self.load_commands.items[index].symtab; + const symtab_size = @sizeOf(macho.nlist_64) * symtab.nsyms; + const raw_symtab = self.contents[symtab.symoff..][0..symtab_size]; + return mem.bytesAsSlice( + macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), raw_symtab), + ); +} - var buffer = try allocator.alloc(u8, data_in_code.datasize); - defer allocator.free(buffer); +pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { + const symtab = self.getSourceSymtab(); + if (index >= symtab.len) return null; + return symtab[index]; +} - _ = try self.file.preadAll(buffer, data_in_code.dataoff); +pub fn getSourceSection(self: Object, index: u16) macho.section_64 { + const seg = self.load_commands.items[self.segment_cmd_index.?].segment; + assert(index < seg.sections.items.len); + return seg.sections.items[index]; +} - var stream = io.fixedBufferStream(buffer); - var reader = stream.reader(); - while (true) { - const dice = reader.readStruct(macho.data_in_code_entry) catch |err| switch (err) { - error.EndOfStream => break, - }; - try self.data_in_code_entries.append(allocator, dice); - } +pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { + const index = self.data_in_code_cmd_index orelse return null; + const data_in_code = self.load_commands.items[index].linkedit_data; + const raw_dice = self.contents[data_in_code.dataoff..][0..data_in_code.datasize]; + return mem.bytesAsSlice( + macho.data_in_code_entry, + @alignCast(@alignOf(macho.data_in_code_entry), raw_dice), + ); } -fn readSection(self: Object, allocator: Allocator, index: u16) ![]u8 { - const seg = self.load_commands.items[self.segment_cmd_index.?].segment; - const sect = seg.sections.items[index]; - var buffer = try allocator.alloc(u8, @intCast(usize, sect.size)); - _ = try self.file.preadAll(buffer, sect.offset); - return buffer; +pub fn getSectionContents(self: Object, index: u16) error{Overflow}![]const u8 { + const sect = self.getSourceSection(index); + const size = math.cast(usize, sect.size) orelse return error.Overflow; + log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{ + sect.segName(), + sect.sectName(), + sect.offset, + sect.offset + sect.size, + }); + return self.contents[sect.offset..][0..size]; } pub fn getString(self: Object, off: u32) []const u8 { - assert(off < self.strtab.items.len); - return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.items.ptr + off), 0); + assert(off < self.strtab.len); + return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.ptr + off), 0); +} + +pub fn getAtomForSymbol(self: Object, sym_index: u32) ?*Atom { + return self.atom_by_index_table.get(sym_index); } diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig new file mode 100644 index 0000000000..909a0450d6 --- /dev/null +++ b/src/link/MachO/dead_strip.zig @@ -0,0 +1,292 @@ +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.dead_strip); +const macho = std.macho; +const math = std.math; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const MachO = @import("../MachO.zig"); +const MatchingSection = MachO.MatchingSection; + +pub fn gcAtoms(macho_file: *MachO) !void { + const gpa = macho_file.base.allocator; + var arena_allocator = std.heap.ArenaAllocator.init(gpa); + defer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + var roots = std.AutoHashMap(*Atom, void).init(arena); + try collectRoots(&roots, macho_file); + + var alive = std.AutoHashMap(*Atom, void).init(arena); + try mark(roots, &alive, macho_file); + + try prune(arena, alive, macho_file); +} + +fn removeAtomFromSection(atom: *Atom, match: MatchingSection, macho_file: *MachO) void { + const sect = macho_file.getSectionPtr(match); + + // If we want to enable GC for incremental codepath, we need to take into + // account any padding that might have been left here. + sect.size -= atom.size; + + if (atom.prev) |prev| { + prev.next = atom.next; + } + if (atom.next) |next| { + next.prev = atom.prev; + } else { + const last = macho_file.atoms.getPtr(match).?; + if (atom.prev) |prev| { + last.* = prev; + } else { + // The section will be GCed in the next step. + last.* = undefined; + sect.size = 0; + } + } +} + +fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { + const output_mode = macho_file.base.options.output_mode; + + switch (output_mode) { + .Exe => { + // Add entrypoint as GC root + const global = try macho_file.getEntryPoint(); + const atom = macho_file.getAtomForSymbol(global).?; // panic here means fatal error + _ = try roots.getOrPut(atom); + }, + else => |other| { + assert(other == .Lib); + // Add exports as GC roots + for (macho_file.globals.values()) |global| { + const sym = macho_file.getSymbol(global); + if (!sym.sect()) continue; + const atom = macho_file.getAtomForSymbol(global) orelse { + log.debug("skipping {s}", .{macho_file.getSymbolName(global)}); + continue; + }; + _ = try roots.getOrPut(atom); + log.debug("adding root", .{}); + macho_file.logAtom(atom); + } + }, + } + + // TODO just a temp until we learn how to parse unwind records + if (macho_file.globals.get("___gxx_personality_v0")) |global| { + if (macho_file.getAtomForSymbol(global)) |atom| { + _ = try roots.getOrPut(atom); + log.debug("adding root", .{}); + macho_file.logAtom(atom); + } + } + + for (macho_file.objects.items) |object| { + for (object.managed_atoms.items) |atom| { + const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; + if (source_sym.tentative()) continue; + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + const is_gc_root = blk: { + if (source_sect.isDontDeadStrip()) break :blk true; + if (mem.eql(u8, "__StaticInit", source_sect.sectName())) break :blk true; + switch (source_sect.type_()) { + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => break :blk true, + else => break :blk false, + } + }; + if (is_gc_root) { + try roots.putNoClobber(atom, {}); + log.debug("adding root", .{}); + macho_file.logAtom(atom); + } + } + } +} + +fn markLive(atom: *Atom, alive: *std.AutoHashMap(*Atom, void), macho_file: *MachO) anyerror!void { + const gop = try alive.getOrPut(atom); + if (gop.found_existing) return; + + log.debug("marking live", .{}); + macho_file.logAtom(atom); + + for (atom.relocs.items) |rel| { + const target_atom = rel.getTargetAtom(macho_file) orelse continue; + try markLive(target_atom, alive, macho_file); + } +} + +fn refersLive(atom: *Atom, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) bool { + for (atom.relocs.items) |rel| { + const target_atom = rel.getTargetAtom(macho_file) orelse continue; + if (alive.contains(target_atom)) return true; + } + return false; +} + +fn refersDead(atom: *Atom, macho_file: *MachO) bool { + for (atom.relocs.items) |rel| { + const target_atom = rel.getTargetAtom(macho_file) orelse continue; + const target_sym = target_atom.getSymbol(macho_file); + if (target_sym.n_desc == MachO.N_DESC_GCED) return true; + } + return false; +} + +fn mark( + roots: std.AutoHashMap(*Atom, void), + alive: *std.AutoHashMap(*Atom, void), + macho_file: *MachO, +) !void { + try alive.ensureUnusedCapacity(roots.count()); + + var it = roots.keyIterator(); + while (it.next()) |root| { + try markLive(root.*, alive, macho_file); + } + + var loop: bool = true; + while (loop) { + loop = false; + + for (macho_file.objects.items) |object| { + for (object.managed_atoms.items) |atom| { + if (alive.contains(atom)) continue; + const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; + if (source_sym.tentative()) continue; + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + if (source_sect.isDontDeadStripIfReferencesLive() and refersLive(atom, alive.*, macho_file)) { + try markLive(atom, alive, macho_file); + loop = true; + } + } + } + } +} + +fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { + // Any section that ends up here will be updated, that is, + // its size and alignment recalculated. + var gc_sections = std.AutoHashMap(MatchingSection, void).init(arena); + var loop: bool = true; + while (loop) { + loop = false; + + for (macho_file.objects.items) |object| { + for (object.getSourceSymtab()) |_, source_index| { + const atom = object.getAtomForSymbol(@intCast(u32, source_index)) orelse continue; + if (alive.contains(atom)) continue; + + const global = atom.getSymbolWithLoc(); + const sym = atom.getSymbolPtr(macho_file); + const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + + if (sym.n_desc == MachO.N_DESC_GCED) continue; + if (!sym.ext() and !refersDead(atom, macho_file)) continue; + + macho_file.logAtom(atom); + sym.n_desc = MachO.N_DESC_GCED; + removeAtomFromSection(atom, match, macho_file); + _ = try gc_sections.put(match, {}); + + for (atom.contained.items) |sym_off| { + const inner = macho_file.getSymbolPtr(.{ + .sym_index = sym_off.sym_index, + .file = atom.file, + }); + inner.n_desc = MachO.N_DESC_GCED; + } + + if (macho_file.got_entries_table.contains(global)) { + const got_atom = macho_file.getGotAtomForSymbol(global).?; + const got_sym = got_atom.getSymbolPtr(macho_file); + got_sym.n_desc = MachO.N_DESC_GCED; + } + + if (macho_file.stubs_table.contains(global)) { + const stubs_atom = macho_file.getStubsAtomForSymbol(global).?; + const stubs_sym = stubs_atom.getSymbolPtr(macho_file); + stubs_sym.n_desc = MachO.N_DESC_GCED; + } + + if (macho_file.tlv_ptr_entries_table.contains(global)) { + const tlv_ptr_atom = macho_file.getTlvPtrAtomForSymbol(global).?; + const tlv_ptr_sym = tlv_ptr_atom.getSymbolPtr(macho_file); + tlv_ptr_sym.n_desc = MachO.N_DESC_GCED; + } + + loop = true; + } + } + } + + for (macho_file.got_entries.items) |entry| { + const sym = entry.getSymbol(macho_file); + if (sym.n_desc != MachO.N_DESC_GCED) continue; + + // TODO tombstone + const atom = entry.getAtom(macho_file); + const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + removeAtomFromSection(atom, match, macho_file); + _ = try gc_sections.put(match, {}); + _ = macho_file.got_entries_table.remove(entry.target); + } + + for (macho_file.stubs.items) |entry| { + const sym = entry.getSymbol(macho_file); + if (sym.n_desc != MachO.N_DESC_GCED) continue; + + // TODO tombstone + const atom = entry.getAtom(macho_file); + const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + removeAtomFromSection(atom, match, macho_file); + _ = try gc_sections.put(match, {}); + _ = macho_file.stubs_table.remove(entry.target); + } + + for (macho_file.tlv_ptr_entries.items) |entry| { + const sym = entry.getSymbol(macho_file); + if (sym.n_desc != MachO.N_DESC_GCED) continue; + + // TODO tombstone + const atom = entry.getAtom(macho_file); + const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + removeAtomFromSection(atom, match, macho_file); + _ = try gc_sections.put(match, {}); + _ = macho_file.tlv_ptr_entries_table.remove(entry.target); + } + + var gc_sections_it = gc_sections.iterator(); + while (gc_sections_it.next()) |entry| { + const match = entry.key_ptr.*; + const sect = macho_file.getSectionPtr(match); + if (sect.size == 0) continue; // Pruning happens automatically in next step. + + sect.@"align" = 0; + sect.size = 0; + + var atom = macho_file.atoms.get(match).?; + + while (atom.prev) |prev| { + atom = prev; + } + + while (true) { + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); + const padding = aligned_end_addr - sect.size; + sect.size += padding + atom.size; + sect.@"align" = @maximum(sect.@"align", atom.alignment); + + if (atom.next) |next| { + atom = next; + } else break; + } + } +} diff --git a/src/link/strtab.zig b/src/link/strtab.zig new file mode 100644 index 0000000000..ae9b00027e --- /dev/null +++ b/src/link/strtab.zig @@ -0,0 +1,113 @@ +const std = @import("std"); +const mem = std.mem; + +const Allocator = mem.Allocator; +const StringIndexAdapter = std.hash_map.StringIndexAdapter; +const StringIndexContext = std.hash_map.StringIndexContext; + +pub fn StringTable(comptime log_scope: @Type(.EnumLiteral)) type { + return struct { + const Self = @This(); + + const log = std.log.scoped(log_scope); + + buffer: std.ArrayListUnmanaged(u8) = .{}, + table: std.HashMapUnmanaged(u32, bool, StringIndexContext, std.hash_map.default_max_load_percentage) = .{}, + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.buffer.deinit(gpa); + self.table.deinit(gpa); + } + + pub fn toOwnedSlice(self: *Self, gpa: Allocator) []const u8 { + const result = self.buffer.toOwnedSlice(gpa); + self.table.clearRetainingCapacity(); + return result; + } + + pub const PrunedResult = struct { + buffer: []const u8, + idx_map: std.AutoHashMap(u32, u32), + }; + + pub fn toPrunedResult(self: *Self, gpa: Allocator) !PrunedResult { + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacity(self.buffer.items.len); + buffer.appendAssumeCapacity(0); + + var idx_map = std.AutoHashMap(u32, u32).init(gpa); + errdefer idx_map.deinit(); + try idx_map.ensureTotalCapacity(self.table.count()); + + var it = self.table.iterator(); + while (it.next()) |entry| { + const off = entry.key_ptr.*; + const save = entry.value_ptr.*; + if (!save) continue; + const new_off = @intCast(u32, buffer.items.len); + buffer.appendSliceAssumeCapacity(self.getAssumeExists(off)); + idx_map.putAssumeCapacityNoClobber(off, new_off); + } + + self.buffer.clearRetainingCapacity(); + self.table.clearRetainingCapacity(); + + return PrunedResult{ + .buffer = buffer.toOwnedSlice(), + .idx_map = idx_map, + }; + } + + pub fn insert(self: *Self, gpa: Allocator, string: []const u8) !u32 { + const gop = try self.table.getOrPutContextAdapted(gpa, @as([]const u8, string), StringIndexAdapter{ + .bytes = &self.buffer, + }, StringIndexContext{ + .bytes = &self.buffer, + }); + if (gop.found_existing) { + const off = gop.key_ptr.*; + gop.value_ptr.* = true; + log.debug("reusing string '{s}' at offset 0x{x}", .{ string, off }); + return off; + } + + try self.buffer.ensureUnusedCapacity(gpa, string.len + 1); + const new_off = @intCast(u32, self.buffer.items.len); + + log.debug("writing new string '{s}' at offset 0x{x}", .{ string, new_off }); + + self.buffer.appendSliceAssumeCapacity(string); + self.buffer.appendAssumeCapacity(0); + + gop.key_ptr.* = new_off; + gop.value_ptr.* = true; + + return new_off; + } + + pub fn delete(self: *Self, string: []const u8) void { + const value_ptr = self.table.getPtrAdapted(@as([]const u8, string), StringIndexAdapter{ + .bytes = &self.buffer, + }) orelse return; + value_ptr.* = false; + log.debug("marked '{s}' for deletion", .{string}); + } + + pub fn getOffset(self: *Self, string: []const u8) ?u32 { + return self.table.getKeyAdapted(string, StringIndexAdapter{ + .bytes = &self.buffer, + }); + } + + pub fn get(self: Self, off: u32) ?[]const u8 { + log.debug("getting string at 0x{x}", .{off}); + if (off >= self.buffer.items.len) return null; + return mem.sliceTo(@ptrCast([*:0]const u8, self.buffer.items.ptr + off), 0); + } + + pub fn getAssumeExists(self: Self, off: u32) []const u8 { + return self.get(off) orelse unreachable; + } + }; +} diff --git a/src/main.zig b/src/main.zig index 823cbf8757..27682003f2 100644 --- a/src/main.zig +++ b/src/main.zig @@ -446,6 +446,8 @@ const usage_build_generic = \\ --compress-debug-sections=[e] Debug section compression settings \\ none No compression \\ zlib Compression with deflate/inflate + \\ --gc-sections Force removal of functions and data that are unreachable by the entry point or exported symbols + \\ --no-gc-sections Don't force removal of unreachable functions and data \\ --subsystem [subsystem] (Windows) /SUBSYSTEM:<subsystem> to the linker \\ --stack [size] Override default stack size \\ --image-base [addr] Set base address for executable image @@ -463,6 +465,7 @@ const usage_build_generic = \\ -search_dylibs_first (Darwin) search `libx.dylib` in each dir in library search paths, then `libx.a` \\ -headerpad [value] (Darwin) set minimum space for future expansion of the load commands in hexadecimal notation \\ -headerpad_max_install_names (Darwin) set enough space as if all paths were MAXPATHLEN + \\ -dead_strip (Darwin) remove functions and data that are unreachable by the entry point or exported symbols \\ -dead_strip_dylibs (Darwin) remove dylibs that are unreachable by the entry point or exported symbols \\ --import-memory (WebAssembly) import memory from the environment \\ --import-table (WebAssembly) import function table from the host environment @@ -969,6 +972,8 @@ fn buildOutputType( }; } else if (mem.eql(u8, arg, "-headerpad_max_install_names")) { headerpad_max_install_names = true; + } else if (mem.eql(u8, arg, "-dead_strip")) { + linker_gc_sections = true; } else if (mem.eql(u8, arg, "-dead_strip_dylibs")) { dead_strip_dylibs = true; } else if (mem.eql(u8, arg, "-T") or mem.eql(u8, arg, "--script")) { @@ -1311,6 +1316,10 @@ fn buildOutputType( try linker_export_symbol_names.append(arg["--export=".len..]); } else if (mem.eql(u8, arg, "-Bsymbolic")) { linker_bind_global_refs_locally = true; + } else if (mem.eql(u8, arg, "--gc-sections")) { + linker_gc_sections = true; + } else if (mem.eql(u8, arg, "--no-gc-sections")) { + linker_gc_sections = false; } else if (mem.eql(u8, arg, "--debug-compile-errors")) { debug_compile_errors = true; } else if (mem.eql(u8, arg, "--verbose-link")) { @@ -1764,6 +1773,8 @@ fn buildOutputType( }; } else if (mem.eql(u8, arg, "-headerpad_max_install_names")) { headerpad_max_install_names = true; + } else if (mem.eql(u8, arg, "-dead_strip")) { + linker_gc_sections = true; } else if (mem.eql(u8, arg, "-dead_strip_dylibs")) { dead_strip_dylibs = true; } else if (mem.eql(u8, arg, "--gc-sections")) { |
