diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2022-10-22 10:14:46 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-10-22 10:14:46 -0700 |
| commit | e67c756b9114debdaa566188f677f60e507dfac8 (patch) | |
| tree | b28f0638e0115df6adbb1b402687211d5c37c4da /src | |
| parent | 776f7de9673087597e1a549d3567c18c43c800ab (diff) | |
| parent | 593b75b109f49e7afbe2fe6fa1d519dc5799a3fb (diff) | |
| download | zig-e67c756b9114debdaa566188f677f60e507dfac8.tar.gz zig-e67c756b9114debdaa566188f677f60e507dfac8.zip | |
Merge pull request #13260 from ziglang/zld-sync
macho: faster and more memory efficient linker
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/MachO.zig | 1103 | ||||
| -rw-r--r-- | src/link/MachO/Archive.zig | 15 | ||||
| -rw-r--r-- | src/link/MachO/Atom.zig | 784 | ||||
| -rw-r--r-- | src/link/MachO/DebugSymbols.zig | 2 | ||||
| -rw-r--r-- | src/link/MachO/DwarfInfo.zig | 467 | ||||
| -rw-r--r-- | src/link/MachO/Object.zig | 775 | ||||
| -rw-r--r-- | src/link/MachO/Relocation.zig | 1 | ||||
| -rw-r--r-- | src/link/MachO/Trie.zig | 62 | ||||
| -rw-r--r-- | src/link/MachO/ZldAtom.zig | 1057 | ||||
| -rw-r--r-- | src/link/MachO/dead_strip.zig | 474 | ||||
| -rw-r--r-- | src/link/MachO/thunks.zig | 364 | ||||
| -rw-r--r-- | src/link/MachO/zld.zig | 5340 |
12 files changed, 6704 insertions, 3740 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a563d17d4a..30bbc2b6c0 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -55,8 +55,6 @@ pub const SearchStrategy = enum { dylibs_first, }; -pub const N_DESC_GCED: u16 = @bitCast(u16, @as(i16, -1)); - const Section = struct { header: macho.section_64, segment_index: u8, @@ -105,8 +103,6 @@ uuid: macho.uuid_command = .{ .uuid = undefined, }, -objects: std.ArrayListUnmanaged(Object) = .{}, -archives: std.ArrayListUnmanaged(Archive) = .{}, dylibs: std.ArrayListUnmanaged(Dylib) = .{}, dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, referenced_dylibs: std.AutoArrayHashMapUnmanaged(u16, void) = .{}, @@ -143,12 +139,6 @@ stub_helper_preamble_atom: ?*Atom = null, strtab: StringTable(.strtab) = .{}, -// TODO I think synthetic tables are a perfect match for some generic refactoring, -// and probably reusable between linker backends too. -tlv_ptr_entries: std.ArrayListUnmanaged(Entry) = .{}, -tlv_ptr_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, -tlv_ptr_entries_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, - got_entries: std.ArrayListUnmanaged(Entry) = .{}, got_entries_free_list: std.ArrayListUnmanaged(u32) = .{}, got_entries_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, @@ -276,7 +266,7 @@ pub const SymbolWithLoc = struct { const ideal_factor = 3; /// Default path to dyld -const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; +pub const default_dyld_path: [*:0]const u8 = "/usr/lib/dyld"; /// In order for a slice of bytes to be considered eligible to keep metadata pointing at /// it as a possible place to put new symbols, it must have enough room for this many bytes @@ -286,12 +276,12 @@ pub const min_text_capacity = padToIdeal(minimum_text_block_size); /// Default virtual memory offset corresponds to the size of __PAGEZERO segment and /// start of __TEXT segment. -const default_pagezero_vmsize: u64 = 0x100000000; +pub const default_pagezero_vmsize: u64 = 0x100000000; /// We commit 0x1000 = 4096 bytes of space to the header and /// the table of load commands. This should be plenty for any /// potential future extensions. -const default_headerpad_size: u32 = 0x1000; +pub const default_headerpad_size: u32 = 0x1000; pub const Export = struct { sym_index: ?u32 = null, @@ -465,7 +455,14 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No } var libs = std.StringArrayHashMap(link.SystemLib).init(arena); - try self.resolveLibSystem(arena, comp, &.{}, &libs); + try resolveLibSystem( + arena, + comp, + self.base.options.sysroot, + self.base.options.target, + &.{}, + &libs, + ); const id_symlink_basename = "link.id"; @@ -660,15 +657,16 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No } pub fn resolveLibSystem( - self: *MachO, arena: Allocator, comp: *Compilation, + syslibroot: ?[]const u8, + target: std.Target, search_dirs: []const []const u8, out_libs: anytype, ) !void { // If we were given the sysroot, try to look there first for libSystem.B.{dylib, tbd}. var libsystem_available = false; - if (self.base.options.sysroot != null) blk: { + if (syslibroot != null) blk: { // Try stub file first. If we hit it, then we're done as the stub file // re-exports every single symbol definition. for (search_dirs) |dir| { @@ -693,7 +691,7 @@ pub fn resolveLibSystem( } if (!libsystem_available) { const libsystem_name = try std.fmt.allocPrint(arena, "libSystem.{d}.tbd", .{ - self.base.options.target.os.version_range.semver.min.major, + target.os.version_range.semver.min.major, }); const full_path = try comp.zig_lib_directory.join(arena, &[_][]const u8{ "libc", "darwin", libsystem_name, @@ -783,94 +781,6 @@ pub fn resolveFramework( return full_path; } -fn parseObject(self: *MachO, path: []const u8) !bool { - const gpa = self.base.allocator; - const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return false, - else => |e| return e, - }; - defer file.close(); - - const name = try gpa.dupe(u8, path); - errdefer gpa.free(name); - const cpu_arch = self.base.options.target.cpu.arch; - const mtime: u64 = mtime: { - const stat = file.stat() catch break :mtime 0; - break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); - }; - const file_stat = try file.stat(); - const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); - - var object = Object{ - .name = name, - .mtime = mtime, - .contents = contents, - }; - - object.parse(gpa, cpu_arch) catch |err| switch (err) { - error.EndOfStream, error.NotObject => { - object.deinit(gpa); - return false; - }, - else => |e| return e, - }; - - try self.objects.append(gpa, object); - - return true; -} - -fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool { - const gpa = self.base.allocator; - const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { - error.FileNotFound => return false, - else => |e| return e, - }; - errdefer file.close(); - - const name = try gpa.dupe(u8, path); - errdefer gpa.free(name); - const cpu_arch = self.base.options.target.cpu.arch; - const reader = file.reader(); - const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); - try reader.context.seekTo(fat_offset); - - var archive = Archive{ - .name = name, - .fat_offset = fat_offset, - .file = file, - }; - - archive.parse(gpa, reader) catch |err| switch (err) { - error.EndOfStream, error.NotArchive => { - archive.deinit(gpa); - return false; - }, - else => |e| return e, - }; - - if (force_load) { - defer archive.deinit(gpa); - // Get all offsets from the ToC - var offsets = std.AutoArrayHashMap(u32, void).init(gpa); - defer offsets.deinit(); - for (archive.toc.values()) |offs| { - for (offs.items) |off| { - _ = try offsets.getOrPut(off); - } - } - for (offsets.keys()) |off| { - const object = try archive.parseObject(gpa, cpu_arch, off); - try self.objects.append(gpa, object); - } - } else { - try self.archives.append(gpa, archive); - } - - return true; -} - const ParseDylibError = error{ OutOfMemory, EmptyStubFile, @@ -1019,7 +929,6 @@ pub fn parseLibs( .needed = lib_info.needed, .weak = lib_info.weak, })) continue; - if (try self.parseArchive(lib, false)) continue; log.debug("unknown filetype for a library: '{s}'", .{lib}); } @@ -1070,29 +979,7 @@ pub fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: } } -pub fn createEmptyAtom(gpa: Allocator, sym_index: u32, size: u64, alignment: u32) !*Atom { - const size_usize = math.cast(usize, size) orelse return error.Overflow; - const atom = try gpa.create(Atom); - errdefer gpa.destroy(atom); - atom.* = Atom.empty; - atom.sym_index = sym_index; - atom.size = size; - atom.alignment = alignment; - - try atom.code.resize(gpa, size_usize); - mem.set(u8, atom.code.items, 0); - - return atom; -} - pub fn writeAtom(self: *MachO, atom: *Atom, code: []const u8) !void { - // TODO: temporary sanity check - assert(atom.code.items.len == 0); - assert(atom.relocs.items.len == 0); - assert(atom.rebases.items.len == 0); - assert(atom.bindings.items.len == 0); - assert(atom.lazy_bindings.items.len == 0); - const sym = atom.getSymbol(self); const section = self.sections.get(sym.n_sect - 1); const file_offset = section.header.offset + sym.n_value - section.header.addr; @@ -1137,10 +1024,7 @@ pub fn allocateSpecialSymbols(self: *MachO) !void { const global = self.getGlobal(name) orelse continue; if (global.file != null) continue; const sym = self.getSymbolPtr(global); - const seg = switch (self.mode) { - .incremental => self.getSegment(self.text_section_index.?), - .one_shot => self.segments.items[self.text_segment_cmd_index.?], - }; + const seg = self.getSegment(self.text_section_index.?); sym.n_sect = 1; sym.n_value = seg.vmaddr; @@ -1155,16 +1039,13 @@ pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { const gpa = self.base.allocator; const sym_index = try self.allocateSymbol(); - const atom = switch (self.mode) { - .incremental => blk: { - const atom = try gpa.create(Atom); - atom.* = Atom.empty; - atom.sym_index = sym_index; - atom.size = @sizeOf(u64); - atom.alignment = @alignOf(u64); - break :blk atom; - }, - .one_shot => try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3), + const atom = blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = @sizeOf(u64); + atom.alignment = @alignOf(u64); + break :blk atom; }; errdefer gpa.destroy(atom); @@ -1174,61 +1055,31 @@ pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { const sym = atom.getSymbolPtr(self); sym.n_type = macho.N_SECT; sym.n_sect = self.got_section_index.? + 1; + sym.n_value = try self.allocateAtom(atom, atom.size, @alignOf(u64)); - if (self.mode == .incremental) { - sym.n_value = try self.allocateAtom(atom, atom.size, @alignOf(u64)); + log.debug("allocated GOT atom at 0x{x}", .{sym.n_value}); - log.debug("allocated GOT atom at 0x{x}", .{sym.n_value}); + try atom.addRelocation(self, .{ + .@"type" = switch (self.base.options.target.cpu.arch) { + .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), + .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), + else => unreachable, + }, + .target = target, + .offset = 0, + .addend = 0, + .pcrel = false, + .length = 3, + }); - try atom.addRelocation(self, .{ - .@"type" = switch (self.base.options.target.cpu.arch) { - .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), - .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), - else => unreachable, - }, - .target = target, + const target_sym = self.getSymbol(target); + if (target_sym.undf()) { + try atom.addBinding(self, .{ + .target = self.getGlobal(self.getSymbolName(target)).?, .offset = 0, - .addend = 0, - .pcrel = false, - .length = 3, }); - - const target_sym = self.getSymbol(target); - if (target_sym.undf()) { - try atom.addBinding(self, .{ - .target = self.getGlobal(self.getSymbolName(target)).?, - .offset = 0, - }); - } else { - try atom.addRebase(self, 0); - } } else { - try atom.relocs.append(gpa, .{ - .offset = 0, - .target = target, - .addend = 0, - .subtractor = null, - .pcrel = false, - .length = 3, - .@"type" = switch (self.base.options.target.cpu.arch) { - .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), - .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), - else => unreachable, - }, - }); - - const target_sym = self.getSymbol(target); - if (target_sym.undf()) { - const global = self.getGlobal(self.getSymbolName(target)).?; - try atom.bindings.append(gpa, .{ - .target = global, - .offset = 0, - }); - } else { - try atom.rebases.append(gpa, 0); - } - - try self.addAtomToSection(atom); + try atom.addRebase(self, 0); } return atom; @@ -1241,16 +1092,13 @@ pub fn createDyldPrivateAtom(self: *MachO) !void { const gpa = self.base.allocator; const sym_index = try self.allocateSymbol(); - const atom = switch (self.mode) { - .incremental => blk: { - const atom = try gpa.create(Atom); - atom.* = Atom.empty; - atom.sym_index = sym_index; - atom.size = @sizeOf(u64); - atom.alignment = @alignOf(u64); - break :blk atom; - }, - .one_shot => try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3), + const atom = blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = @sizeOf(u64); + atom.alignment = @alignOf(u64); + break :blk atom; }; errdefer gpa.destroy(atom); @@ -1262,13 +1110,9 @@ pub fn createDyldPrivateAtom(self: *MachO) !void { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - if (self.mode == .incremental) { - sym.n_value = try self.allocateAtom(atom, atom.size, @alignOf(u64)); - log.debug("allocated dyld_private atom at 0x{x}", .{sym.n_value}); - try self.writePtrWidthAtom(atom); - } else { - try self.addAtomToSection(atom); - } + sym.n_value = try self.allocateAtom(atom, atom.size, @alignOf(u64)); + log.debug("allocated dyld_private atom at 0x{x}", .{sym.n_value}); + try self.writePtrWidthAtom(atom); } pub fn createStubHelperPreambleAtom(self: *MachO) !void { @@ -1282,26 +1126,18 @@ pub fn createStubHelperPreambleAtom(self: *MachO) !void { .aarch64 => 6 * @sizeOf(u32), else => unreachable, }; - const alignment: u32 = switch (arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, - }; const sym_index = try self.allocateSymbol(); - const atom = switch (self.mode) { - .incremental => blk: { - const atom = try gpa.create(Atom); - atom.* = Atom.empty; - atom.sym_index = sym_index; - atom.size = size; - atom.alignment = switch (arch) { - .x86_64 => 1, - .aarch64 => @alignOf(u32), - else => unreachable, - }; - break :blk atom; - }, - .one_shot => try MachO.createEmptyAtom(gpa, sym_index, size, alignment), + const atom = blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = size; + atom.alignment = switch (arch) { + .x86_64 => 1, + .aarch64 => @alignOf(u32), + else => unreachable, + }; + break :blk atom; }; errdefer gpa.destroy(atom); @@ -1328,43 +1164,21 @@ pub fn createStubHelperPreambleAtom(self: *MachO) !void { code[9] = 0xff; code[10] = 0x25; - if (self.mode == .incremental) { - try atom.addRelocations(self, 2, .{ .{ - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_SIGNED), - .target = .{ .sym_index = dyld_private_sym_index, .file = null }, - .offset = 3, - .addend = 0, - .pcrel = true, - .length = 2, - }, .{ - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_GOT), - .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, - .offset = 11, - .addend = 0, - .pcrel = true, - .length = 2, - } }); - } else { - try atom.relocs.ensureUnusedCapacity(self.base.allocator, 2); - atom.relocs.appendAssumeCapacity(.{ - .offset = 3, - .target = .{ .sym_index = dyld_private_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_SIGNED), - }); - atom.relocs.appendAssumeCapacity(.{ - .offset = 11, - .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_GOT), - }); - } + try atom.addRelocations(self, 2, .{ .{ + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_SIGNED), + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, + .offset = 3, + .addend = 0, + .pcrel = true, + .length = 2, + }, .{ + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_GOT), + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, + .offset = 11, + .addend = 0, + .pcrel = true, + .length = 2, + } }); }, .aarch64 => { @@ -1390,75 +1204,35 @@ pub fn createStubHelperPreambleAtom(self: *MachO) !void { // br x16 mem.writeIntLittle(u32, code[20..][0..4], aarch64.Instruction.br(.x16).toU32()); - if (self.mode == .incremental) { - try atom.addRelocations(self, 4, .{ .{ - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), - .target = .{ .sym_index = dyld_private_sym_index, .file = null }, - .offset = 0, - .addend = 0, - .pcrel = true, - .length = 2, - }, .{ - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), - .target = .{ .sym_index = dyld_private_sym_index, .file = null }, - .offset = 4, - .addend = 0, - .pcrel = false, - .length = 2, - }, .{ - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGE21), - .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, - .offset = 12, - .addend = 0, - .pcrel = true, - .length = 2, - }, .{ - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGEOFF12), - .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, - .offset = 16, - .addend = 0, - .pcrel = false, - .length = 2, - } }); - } else { - try atom.relocs.ensureUnusedCapacity(gpa, 4); - atom.relocs.appendAssumeCapacity(.{ - .offset = 0, - .target = .{ .sym_index = dyld_private_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), - }); - atom.relocs.appendAssumeCapacity(.{ - .offset = 4, - .target = .{ .sym_index = dyld_private_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = false, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), - }); - atom.relocs.appendAssumeCapacity(.{ - .offset = 12, - .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGE21), - }); - atom.relocs.appendAssumeCapacity(.{ - .offset = 16, - .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = false, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGEOFF12), - }); - } + try atom.addRelocations(self, 4, .{ .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, + .offset = 0, + .addend = 0, + .pcrel = true, + .length = 2, + }, .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), + .target = .{ .sym_index = dyld_private_sym_index, .file = null }, + .offset = 4, + .addend = 0, + .pcrel = false, + .length = 2, + }, .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGE21), + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, + .offset = 12, + .addend = 0, + .pcrel = true, + .length = 2, + }, .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_GOT_LOAD_PAGEOFF12), + .target = .{ .sym_index = self.dyld_stub_binder_index.?, .file = null }, + .offset = 16, + .addend = 0, + .pcrel = false, + .length = 2, + } }); }, else => unreachable, @@ -1468,14 +1242,9 @@ pub fn createStubHelperPreambleAtom(self: *MachO) !void { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - if (self.mode == .incremental) { - sym.n_value = try self.allocateAtom(atom, size, atom.alignment); - log.debug("allocated stub preamble atom at 0x{x}", .{sym.n_value}); - try self.writeAtom(atom, code); - } else { - mem.copy(u8, atom.code.items, code); - try self.addAtomToSection(atom); - } + sym.n_value = try self.allocateAtom(atom, size, atom.alignment); + log.debug("allocated stub preamble atom at 0x{x}", .{sym.n_value}); + try self.writeAtom(atom, code); } pub fn createStubHelperAtom(self: *MachO) !*Atom { @@ -1486,26 +1255,18 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { .aarch64 => 3 * @sizeOf(u32), else => unreachable, }; - const alignment: u2 = switch (arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, - }; const sym_index = try self.allocateSymbol(); - const atom = switch (self.mode) { - .incremental => blk: { - const atom = try gpa.create(Atom); - atom.* = Atom.empty; - atom.sym_index = sym_index; - atom.size = size; - atom.alignment = switch (arch) { - .x86_64 => 1, - .aarch64 => @alignOf(u32), - else => unreachable, - }; - break :blk atom; - }, - .one_shot => try MachO.createEmptyAtom(gpa, sym_index, size, alignment), + const atom = blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = size; + atom.alignment = switch (arch) { + .x86_64 => 1, + .aarch64 => @alignOf(u32), + else => unreachable, + }; + break :blk atom; }; errdefer gpa.destroy(atom); @@ -1525,27 +1286,14 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { // jmpq code[5] = 0xe9; - if (self.mode == .incremental) { - try atom.addRelocation(self, .{ - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), - .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, - .offset = 6, - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else { - try atom.relocs.ensureTotalCapacity(gpa, 1); - atom.relocs.appendAssumeCapacity(.{ - .offset = 6, - .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), - }); - } + try atom.addRelocation(self, .{ + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), + .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, + .offset = 6, + .addend = 0, + .pcrel = true, + .length = 2, + }); }, .aarch64 => { const literal = blk: { @@ -1561,27 +1309,14 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(0).toU32()); // Next 4 bytes 8..12 are just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. - if (self.mode == .incremental) { - try atom.addRelocation(self, .{ - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_BRANCH26), - .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, - .offset = 4, - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else { - try atom.relocs.ensureTotalCapacity(gpa, 1); - atom.relocs.appendAssumeCapacity(.{ - .offset = 4, - .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_BRANCH26), - }); - } + try atom.addRelocation(self, .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_BRANCH26), + .target = .{ .sym_index = self.stub_helper_preamble_atom.?.sym_index, .file = null }, + .offset = 4, + .addend = 0, + .pcrel = true, + .length = 2, + }); }, else => unreachable, } @@ -1589,14 +1324,9 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - if (self.mode == .incremental) { - sym.n_value = try self.allocateAtom(atom, size, atom.alignment); - log.debug("allocated stub helper atom at 0x{x}", .{sym.n_value}); - try self.writeAtom(atom, code); - } else { - mem.copy(u8, atom.code.items, code); - try self.addAtomToSection(atom); - } + sym.n_value = try self.allocateAtom(atom, size, atom.alignment); + log.debug("allocated stub helper atom at 0x{x}", .{sym.n_value}); + try self.writeAtom(atom, code); return atom; } @@ -1604,16 +1334,13 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWithLoc) !*Atom { const gpa = self.base.allocator; const sym_index = try self.allocateSymbol(); - const atom = switch (self.mode) { - .incremental => blk: { - const atom = try gpa.create(Atom); - atom.* = Atom.empty; - atom.sym_index = sym_index; - atom.size = @sizeOf(u64); - atom.alignment = @alignOf(u64); - break :blk atom; - }, - .one_shot => try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3), + const atom = blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = @sizeOf(u64); + atom.alignment = @alignOf(u64); + break :blk atom; }; errdefer gpa.destroy(atom); @@ -1621,56 +1348,30 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWi sym.n_type = macho.N_SECT; sym.n_sect = self.la_symbol_ptr_section_index.? + 1; - if (self.mode == .incremental) { - try atom.addRelocation(self, .{ - .@"type" = switch (self.base.options.target.cpu.arch) { - .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), - .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), - else => unreachable, - }, - .target = .{ .sym_index = stub_sym_index, .file = null }, - .offset = 0, - .addend = 0, - .pcrel = false, - .length = 3, - }); - try atom.addRebase(self, 0); - try atom.addLazyBinding(self, .{ - .target = self.getGlobal(self.getSymbolName(target)).?, - .offset = 0, - }); - } else { - try atom.relocs.append(gpa, .{ - .offset = 0, - .target = .{ .sym_index = stub_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = false, - .length = 3, - .@"type" = switch (self.base.options.target.cpu.arch) { - .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), - .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), - else => unreachable, - }, - }); - try atom.rebases.append(gpa, 0); - const global = self.getGlobal(self.getSymbolName(target)).?; - try atom.lazy_bindings.append(gpa, .{ - .target = global, - .offset = 0, - }); - } + try atom.addRelocation(self, .{ + .@"type" = switch (self.base.options.target.cpu.arch) { + .aarch64 => @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_UNSIGNED), + .x86_64 => @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_UNSIGNED), + else => unreachable, + }, + .target = .{ .sym_index = stub_sym_index, .file = null }, + .offset = 0, + .addend = 0, + .pcrel = false, + .length = 3, + }); + try atom.addRebase(self, 0); + try atom.addLazyBinding(self, .{ + .target = self.getGlobal(self.getSymbolName(target)).?, + .offset = 0, + }); try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - if (self.mode == .incremental) { - sym.n_value = try self.allocateAtom(atom, atom.size, @alignOf(u64)); - log.debug("allocated lazy pointer atom at 0x{x}", .{sym.n_value}); - try self.writePtrWidthAtom(atom); - } else { - try self.addAtomToSection(atom); - } + sym.n_value = try self.allocateAtom(atom, atom.size, @alignOf(u64)); + log.debug("allocated lazy pointer atom at 0x{x}", .{sym.n_value}); + try self.writePtrWidthAtom(atom); return atom; } @@ -1678,32 +1379,24 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWi pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { const gpa = self.base.allocator; const arch = self.base.options.target.cpu.arch; - const alignment: u2 = switch (arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, // unhandled architecture type - }; const size: u4 = switch (arch) { .x86_64 => 6, .aarch64 => 3 * @sizeOf(u32), else => unreachable, // unhandled architecture type }; const sym_index = try self.allocateSymbol(); - const atom = switch (self.mode) { - .incremental => blk: { - const atom = try gpa.create(Atom); - atom.* = Atom.empty; - atom.sym_index = sym_index; - atom.size = size; - atom.alignment = switch (arch) { - .x86_64 => 1, - .aarch64 => @alignOf(u32), - else => unreachable, // unhandled architecture type + const atom = blk: { + const atom = try gpa.create(Atom); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = size; + atom.alignment = switch (arch) { + .x86_64 => 1, + .aarch64 => @alignOf(u32), + else => unreachable, // unhandled architecture type - }; - break :blk atom; - }, - .one_shot => try MachO.createEmptyAtom(gpa, sym_index, size, alignment), + }; + break :blk atom; }; errdefer gpa.destroy(atom); @@ -1721,26 +1414,14 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { code[0] = 0xff; code[1] = 0x25; - if (self.mode == .incremental) { - try atom.addRelocation(self, .{ - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), - .target = .{ .sym_index = laptr_sym_index, .file = null }, - .offset = 2, - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else { - try atom.relocs.append(gpa, .{ - .offset = 2, - .target = .{ .sym_index = laptr_sym_index, .file = null }, - .addend = 0, - .subtractor = null, - .pcrel = true, - .length = 2, - .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), - }); - } + try atom.addRelocation(self, .{ + .@"type" = @enumToInt(macho.reloc_type_x86_64.X86_64_RELOC_BRANCH), + .target = .{ .sym_index = laptr_sym_index, .file = null }, + .offset = 2, + .addend = 0, + .pcrel = true, + .length = 2, + }); }, .aarch64 => { // adrp x16, pages @@ -1754,46 +1435,24 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { // br x16 mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.br(.x16).toU32()); - if (self.mode == .incremental) { - try atom.addRelocations(self, 2, .{ - .{ - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), - .target = .{ .sym_index = laptr_sym_index, .file = null }, - .offset = 0, - .addend = 0, - .pcrel = true, - .length = 2, - }, - .{ - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), - .target = .{ .sym_index = laptr_sym_index, .file = null }, - .offset = 4, - .addend = 0, - .pcrel = false, - .length = 2, - }, - }); - } else { - try atom.relocs.ensureTotalCapacity(gpa, 2); - atom.relocs.appendAssumeCapacity(.{ - .offset = 0, + try atom.addRelocations(self, 2, .{ + .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), .target = .{ .sym_index = laptr_sym_index, .file = null }, + .offset = 0, .addend = 0, - .subtractor = null, .pcrel = true, .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGE21), - }); - atom.relocs.appendAssumeCapacity(.{ - .offset = 4, + }, + .{ + .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), .target = .{ .sym_index = laptr_sym_index, .file = null }, + .offset = 4, .addend = 0, - .subtractor = null, .pcrel = false, .length = 2, - .@"type" = @enumToInt(macho.reloc_type_arm64.ARM64_RELOC_PAGEOFF12), - }); - } + }, + }); }, else => unreachable, } @@ -1801,96 +1460,13 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - if (self.mode == .incremental) { - sym.n_value = try self.allocateAtom(atom, size, atom.alignment); - log.debug("allocated stub atom at 0x{x}", .{sym.n_value}); - try self.writeAtom(atom, code); - } else { - mem.copy(u8, atom.code.items, code); - try self.addAtomToSection(atom); - } - - return atom; -} - -pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { - assert(self.mode == .one_shot); - - const gpa = self.base.allocator; - const sym_index = try self.allocateSymbol(); - const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); - - const target_sym = self.getSymbol(target); - assert(target_sym.undf()); - - const global = self.getGlobal(self.getSymbolName(target)).?; - try atom.bindings.append(gpa, .{ - .target = global, - .offset = 0, - }); - - try self.managed_atoms.append(gpa, atom); - try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - - const sym = atom.getSymbolPtr(self); - sym.n_type = macho.N_SECT; - const sect_id = (try self.getOutputSection(.{ - .segname = makeStaticString("__DATA"), - .sectname = makeStaticString("__thread_ptrs"), - .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, - })).?; - sym.n_sect = sect_id + 1; - - try self.addAtomToSection(atom); + sym.n_value = try self.allocateAtom(atom, size, atom.alignment); + log.debug("allocated stub atom at 0x{x}", .{sym.n_value}); + try self.writeAtom(atom, code); return atom; } -pub fn createTentativeDefAtoms(self: *MachO) !void { - assert(self.mode == .one_shot); - const gpa = self.base.allocator; - - for (self.globals.items) |global| { - const sym = self.getSymbolPtr(global); - if (!sym.tentative()) continue; - - log.debug("creating tentative definition for ATOM(%{d}, '{s}') in object({?d})", .{ - global.sym_index, self.getSymbolName(global), global.file, - }); - - // Convert any tentative definition into a regular symbol and allocate - // text blocks for each tentative definition. - const size = sym.n_value; - const alignment = (sym.n_desc >> 8) & 0x0f; - const sect_id = (try self.getOutputSection(.{ - .segname = makeStaticString("__DATA"), - .sectname = makeStaticString("__bss"), - .flags = macho.S_ZEROFILL, - })).?; - sym.* = .{ - .n_strx = sym.n_strx, - .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = sect_id + 1, - .n_desc = 0, - .n_value = 0, - }; - - const atom = try MachO.createEmptyAtom(gpa, global.sym_index, size, alignment); - atom.file = global.file; - - try self.addAtomToSection(atom); - - if (global.file) |file| { - const object = &self.objects.items[file]; - try object.managed_atoms.append(gpa, atom); - try object.atom_by_index_table.putNoClobber(gpa, global.sym_index, atom); - } else { - try self.managed_atoms.append(gpa, atom); - try self.atom_by_index_table.putNoClobber(gpa, global.sym_index, atom); - } - } -} - pub fn createMhExecuteHeaderSymbol(self: *MachO) !void { if (self.base.options.output_mode != .Exe) return; if (self.getGlobal("__mh_execute_header")) |global| { @@ -1989,90 +1565,6 @@ fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { gop.value_ptr.* = current; } -pub fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { - const object = &self.objects.items[object_id]; - log.debug("resolving symbols in '{s}'", .{object.name}); - - for (object.symtab.items) |sym, index| { - const sym_index = @intCast(u32, index); - const sym_name = object.getString(sym.n_strx); - - if (sym.stab()) { - log.err("unhandled symbol type: stab", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name}); - return error.UnhandledSymbolType; - } - - if (sym.indr()) { - log.err("unhandled symbol type: indirect", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name}); - return error.UnhandledSymbolType; - } - - if (sym.abs()) { - log.err("unhandled symbol type: absolute", .{}); - log.err(" symbol '{s}'", .{sym_name}); - log.err(" first definition in '{s}'", .{object.name}); - return error.UnhandledSymbolType; - } - - if (sym.sect() and !sym.ext()) { - log.debug("symbol '{s}' local to object {s}; skipping...", .{ - sym_name, - object.name, - }); - continue; - } - - const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = object_id }; - self.resolveGlobalSymbol(sym_loc) catch |err| switch (err) { - error.MultipleSymbolDefinitions => { - const global = self.getGlobal(sym_name).?; - log.err("symbol '{s}' defined multiple times", .{sym_name}); - if (global.file) |file| { - log.err(" first definition in '{s}'", .{self.objects.items[file].name}); - } - log.err(" next definition in '{s}'", .{self.objects.items[object_id].name}); - return error.MultipleSymbolDefinitions; - }, - else => |e| return e, - }; - } -} - -pub fn resolveSymbolsInArchives(self: *MachO) !void { - if (self.archives.items.len == 0) return; - - const gpa = self.base.allocator; - const cpu_arch = self.base.options.target.cpu.arch; - var next_sym: usize = 0; - loop: while (next_sym < self.unresolved.count()) { - const global_index = self.unresolved.keys()[next_sym]; - const global = self.globals.items[global_index]; - const sym_name = self.getSymbolName(global); - - for (self.archives.items) |archive| { - // Check if the entry exists in a static archive. - const offsets = archive.toc.get(sym_name) orelse { - // No hit. - continue; - }; - assert(offsets.items.len > 0); - - const object_id = @intCast(u16, self.objects.items.len); - const object = try archive.parseObject(gpa, cpu_arch, offsets.items[0]); - try self.objects.append(gpa, object); - try self.resolveSymbolsInObject(object_id); - - continue :loop; - } - - next_sym += 1; - } -} - pub fn resolveSymbolsInDylibs(self: *MachO) !void { if (self.dylibs.items.len == 0) return; @@ -2209,9 +1701,7 @@ pub fn resolveDyldStubBinder(self: *MachO) !void { const got_atom = try self.createGotAtom(global); self.got_entries.items[got_index].sym_index = got_atom.sym_index; - if (self.mode == .incremental) { - try self.writePtrWidthAtom(got_atom); - } + try self.writePtrWidthAtom(got_atom); } pub fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { @@ -2236,10 +1726,7 @@ pub fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { pub fn writeMainLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { if (self.base.options.output_mode != .Exe) return; - const seg_id = switch (self.mode) { - .incremental => self.header_segment_cmd_index.?, - .one_shot => self.text_segment_cmd_index.?, - }; + const seg_id = self.header_segment_cmd_index.?; const seg = self.segments.items[seg_id]; const global = try self.getEntryPoint(); const sym = self.getSymbol(global); @@ -2417,9 +1904,6 @@ pub fn deinit(self: *MachO) void { d_sym.deinit(gpa); } - self.tlv_ptr_entries.deinit(gpa); - self.tlv_ptr_entries_free_list.deinit(gpa); - self.tlv_ptr_entries_table.deinit(gpa); self.got_entries.deinit(gpa); self.got_entries_free_list.deinit(gpa); self.got_entries_table.deinit(gpa); @@ -2442,16 +1926,6 @@ pub fn deinit(self: *MachO) void { self.resolver.deinit(gpa); } - for (self.objects.items) |*object| { - object.deinit(gpa); - } - self.objects.deinit(gpa); - - for (self.archives.items) |*archive| { - archive.deinit(gpa); - } - self.archives.deinit(gpa); - for (self.dylibs.items) |*dylib| { dylib.deinit(gpa); } @@ -2467,16 +1941,11 @@ pub fn deinit(self: *MachO) void { self.sections.deinit(gpa); for (self.managed_atoms.items) |atom| { - atom.deinit(gpa); gpa.destroy(atom); } self.managed_atoms.deinit(gpa); - if (self.base.options.module) |mod| { - for (self.decls.keys()) |decl_index| { - const decl = mod.declPtr(decl_index); - decl.link.macho.deinit(gpa); - } + if (self.base.options.module) |_| { self.decls.deinit(gpa); } else { assert(self.decls.count() == 0); @@ -2525,11 +1994,9 @@ pub fn deinit(self: *MachO) void { } } -fn freeAtom(self: *MachO, atom: *Atom, owns_atom: bool) void { +fn freeAtom(self: *MachO, atom: *Atom) void { log.debug("freeAtom {*}", .{atom}); - if (!owns_atom) { - atom.deinit(self.base.allocator); - } + // Remove any relocs and base relocs associated with this Atom self.freeRelocationsForAtom(atom); @@ -2694,27 +2161,6 @@ pub fn allocateStubEntry(self: *MachO, target: SymbolWithLoc) !u32 { return index; } -pub fn allocateTlvPtrEntry(self: *MachO, target: SymbolWithLoc) !u32 { - try self.tlv_ptr_entries.ensureUnusedCapacity(self.base.allocator, 1); - - const index = blk: { - if (self.tlv_ptr_entries_free_list.popOrNull()) |index| { - log.debug(" (reusing TLV ptr entry index {d})", .{index}); - break :blk index; - } else { - log.debug(" (allocating TLV ptr entry at index {d})", .{self.tlv_ptr_entries.items.len}); - const index = @intCast(u32, self.tlv_ptr_entries.items.len); - _ = self.tlv_ptr_entries.addOneAssumeCapacity(); - break :blk index; - } - }; - - self.tlv_ptr_entries.items[index] = .{ .target = target, .sym_index = 0 }; - try self.tlv_ptr_entries_table.putNoClobber(self.base.allocator, target, index); - - return index; -} - pub fn allocateDeclIndexes(self: *MachO, decl_index: Module.Decl.Index) !void { if (self.llvm_object) |_| return; const decl = self.base.options.module.?.declPtr(decl_index); @@ -2845,7 +2291,7 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu symbol.n_type = macho.N_SECT; symbol.n_sect = sect_id + 1; symbol.n_value = try self.allocateAtom(atom, code.len, required_alignment); - errdefer self.freeAtom(atom, true); + errdefer self.freeAtom(atom); try unnamed_consts.append(gpa, atom); @@ -3137,7 +2583,7 @@ fn updateDeclCode(self: *MachO, decl_index: Module.Decl.Index, code: []const u8) sym.n_desc = 0; const vaddr = try self.allocateAtom(atom, code_len, required_alignment); - errdefer self.freeAtom(atom, false); + errdefer self.freeAtom(atom); log.debug("allocated atom for {s} at 0x{x}", .{ sym_name, vaddr }); log.debug(" (required alignment 0x{x})", .{required_alignment}); @@ -3256,15 +2702,15 @@ pub fn updateDeclExports( self.resolveGlobalSymbol(sym_loc) catch |err| switch (err) { error.MultipleSymbolDefinitions => { + // TODO: this needs rethinking const global = self.getGlobal(exp_name).?; if (sym_loc.sym_index != global.sym_index and global.file != null) { _ = try module.failed_exports.put(module.gpa, exp, try Module.ErrorMsg.create( gpa, decl.srcLoc(), \\LinkError: symbol '{s}' defined multiple times - \\ first definition in '{s}' , - .{ exp_name, self.objects.items[global.file.?].name }, + .{exp_name}, )); } }, @@ -3314,7 +2760,7 @@ fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { const gpa = self.base.allocator; const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { - self.freeAtom(atom, true); + self.freeAtom(atom); self.locals_free_list.append(gpa, atom.sym_index) catch {}; self.locals.items[atom.sym_index].n_type = 0; _ = self.atom_by_index_table.remove(atom.sym_index); @@ -3335,7 +2781,7 @@ pub fn freeDecl(self: *MachO, decl_index: Module.Decl.Index) void { const kv = self.decls.fetchSwapRemove(decl_index); if (kv.?.value) |_| { - self.freeAtom(&decl.link.macho, false); + self.freeAtom(&decl.link.macho); self.freeUnnamedConsts(decl_index); } @@ -3969,22 +3415,6 @@ fn insertSection(self: *MachO, segment_index: u8, header: macho.section_64) !u8 return insertion_index; } -pub fn addAtomToSection(self: *MachO, atom: *Atom) !void { - const sect_id = atom.getSymbol(self).n_sect - 1; - var section = self.sections.get(sect_id); - if (section.header.size > 0) { - section.last_atom.?.next = atom; - atom.prev = section.last_atom.?; - } - section.last_atom = atom; - const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, section.header.size, atom_alignment); - const padding = aligned_end_addr - section.header.size; - section.header.size += padding + atom.size; - section.header.@"align" = @max(section.header.@"align", atom.alignment); - self.sections.set(sect_id, section); -} - pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { const gpa = self.base.allocator; @@ -4174,7 +3604,6 @@ fn collectExportData(self: *MachO, trie: *Trie) !void { if (sym.undf()) continue; if (!sym.ext()) continue; - if (sym.n_desc == N_DESC_GCED) continue; const sym_name = self.getSymbolName(global); log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); @@ -4422,33 +3851,18 @@ fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { for (self.locals.items) |sym, sym_id| { if (sym.n_strx == 0) continue; // no name, skip - if (sym.n_desc == N_DESC_GCED) continue; // GCed, skip const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = null }; if (self.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip if (self.getGlobal(self.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip try locals.append(sym); } - for (self.objects.items) |object, object_id| { - for (object.symtab.items) |sym, sym_id| { - if (sym.n_strx == 0) continue; // no name, skip - if (sym.n_desc == N_DESC_GCED) continue; // GCed, skip - const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = @intCast(u32, object_id) }; - if (self.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip - if (self.getGlobal(self.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip - var out_sym = sym; - out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(sym_loc)); - try locals.append(out_sym); - } - } - var exports = std.ArrayList(macho.nlist_64).init(gpa); defer exports.deinit(); for (self.globals.items) |global| { const sym = self.getSymbol(global); if (sym.undf()) continue; // import, skip - if (sym.n_desc == N_DESC_GCED) continue; // GCed, skip var out_sym = sym; out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); try exports.append(out_sym); @@ -4551,8 +3965,6 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !voi stubs.reserved1 = 0; for (self.stubs.items) |entry| { if (entry.sym_index == 0) continue; - const atom_sym = entry.getSymbol(self); - if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); assert(target_sym.undf()); try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); @@ -4564,8 +3976,6 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !voi got.reserved1 = nstubs; for (self.got_entries.items) |entry| { if (entry.sym_index == 0) continue; - const atom_sym = entry.getSymbol(self); - if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); if (target_sym.undf()) { try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); @@ -4580,8 +3990,6 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !voi la_symbol_ptr.reserved1 = nstubs + ngot_entries; for (self.stubs.items) |entry| { if (entry.sym_index == 0) continue; - const atom_sym = entry.getSymbol(self); - if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); assert(target_sym.undf()); try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); @@ -4807,12 +4215,8 @@ pub fn symbolIsTemp(self: *MachO, sym_with_loc: SymbolWithLoc) bool { /// Returns pointer-to-symbol described by `sym_with_loc` descriptor. pub fn getSymbolPtr(self: *MachO, sym_with_loc: SymbolWithLoc) *macho.nlist_64 { - if (sym_with_loc.file) |file| { - const object = &self.objects.items[file]; - return &object.symtab.items[sym_with_loc.sym_index]; - } else { - return &self.locals.items[sym_with_loc.sym_index]; - } + assert(sym_with_loc.file == null); + return &self.locals.items[sym_with_loc.sym_index]; } /// Returns symbol described by `sym_with_loc` descriptor. @@ -4822,14 +4226,9 @@ pub fn getSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) macho.nlist_64 { /// Returns name of the symbol described by `sym_with_loc` descriptor. pub fn getSymbolName(self: *MachO, sym_with_loc: SymbolWithLoc) []const u8 { - if (sym_with_loc.file) |file| { - const object = self.objects.items[file]; - const sym = object.symtab.items[sym_with_loc.sym_index]; - return object.getString(sym.n_strx); - } else { - const sym = self.locals.items[sym_with_loc.sym_index]; - return self.strtab.get(sym.n_strx).?; - } + assert(sym_with_loc.file == null); + const sym = self.locals.items[sym_with_loc.sym_index]; + return self.strtab.get(sym.n_strx).?; } /// Returns pointer to the global entry for `name` if one exists. @@ -4878,12 +4277,8 @@ pub fn getOrPutGlobalPtr(self: *MachO, name: []const u8) !GetOrPutGlobalPtrResul /// Returns atom if there is an atom referenced by the symbol described by `sym_with_loc` descriptor. /// Returns null on failure. pub fn getAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { - if (sym_with_loc.file) |file| { - const object = self.objects.items[file]; - return object.getAtomForSymbol(sym_with_loc.sym_index); - } else { - return self.atom_by_index_table.get(sym_with_loc.sym_index); - } + assert(sym_with_loc.file == null); + return self.atom_by_index_table.get(sym_with_loc.sym_index); } /// Returns GOT atom that references `sym_with_loc` if one exists. @@ -4900,13 +4295,6 @@ pub fn getStubsAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { return self.stubs.items[stubs_index].getAtom(self); } -/// Returns TLV pointer atom that references `sym_with_loc` if one exists. -/// Returns null otherwise. -pub fn getTlvPtrAtomForSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) ?*Atom { - const tlv_ptr_index = self.tlv_ptr_entries_table.get(sym_with_loc) orelse return null; - return self.tlv_ptr_entries.items[tlv_ptr_index].getAtom(self); -} - /// Returns symbol location corresponding to the set entrypoint. /// Asserts output mode is executable. pub fn getEntryPoint(self: MachO) error{MissingMainEntrypoint}!SymbolWithLoc { @@ -5199,7 +4587,7 @@ pub fn logSections(self: *MachO) void { } } -fn logSymAttributes(sym: macho.nlist_64, buf: *[9]u8) []const u8 { +fn logSymAttributes(sym: macho.nlist_64, buf: *[4]u8) []const u8 { mem.set(u8, buf[0..4], '_'); mem.set(u8, buf[4..], ' '); if (sym.sect()) { @@ -5218,35 +4606,13 @@ fn logSymAttributes(sym: macho.nlist_64, buf: *[9]u8) []const u8 { if (sym.undf()) { buf[3] = 'u'; } - if (sym.n_desc == N_DESC_GCED) { - mem.copy(u8, buf[5..], "DEAD"); - } return buf[0..]; } pub fn logSymtab(self: *MachO) void { - var buf: [9]u8 = undefined; + var buf: [4]u8 = undefined; log.debug("symtab:", .{}); - for (self.objects.items) |object, id| { - log.debug(" object({d}): {s}", .{ id, object.name }); - for (object.symtab.items) |sym, sym_id| { - const where = if (sym.undf() and !sym.tentative()) "ord" else "sect"; - const def_index = if (sym.undf() and !sym.tentative()) - @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER) - else - sym.n_sect + 1; - log.debug(" %{d}: {s} @{x} in {s}({d}), {s}", .{ - sym_id, - object.getString(sym.n_strx), - sym.n_value, - where, - def_index, - logSymAttributes(sym, &buf), - }); - } - } - log.debug(" object(null)", .{}); for (self.locals.items) |sym, sym_id| { const where = if (sym.undf() and !sym.tentative()) "ord" else "sect"; const def_index = if (sym.undf() and !sym.tentative()) @@ -5272,7 +4638,6 @@ pub fn logSymtab(self: *MachO) void { log.debug("GOT entries:", .{}); for (self.got_entries.items) |entry, i| { const atom_sym = entry.getSymbol(self); - if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); if (target_sym.undf()) { log.debug(" {d}@{x} => import('{s}')", .{ @@ -5291,19 +4656,6 @@ pub fn logSymtab(self: *MachO) void { } } - log.debug("__thread_ptrs entries:", .{}); - for (self.tlv_ptr_entries.items) |entry, i| { - const atom_sym = entry.getSymbol(self); - if (atom_sym.n_desc == N_DESC_GCED) continue; - const target_sym = self.getSymbol(entry.target); - assert(target_sym.undf()); - log.debug(" {d}@{x} => import('{s}')", .{ - i, - atom_sym.n_value, - self.getSymbolName(entry.target), - }); - } - log.debug("stubs entries:", .{}); for (self.stubs.items) |entry, i| { const target_sym = self.getSymbol(entry.target); @@ -5352,21 +4704,4 @@ pub fn logAtom(self: *MachO, atom: *const Atom) void { atom.file, sym.n_sect, }); - - for (atom.contained.items) |sym_off| { - const inner_sym = self.getSymbol(.{ - .sym_index = sym_off.sym_index, - .file = atom.file, - }); - const inner_sym_name = self.getSymbolName(.{ - .sym_index = sym_off.sym_index, - .file = atom.file, - }); - log.debug(" (%{d}, '{s}') @ {x} ({x})", .{ - sym_off.sym_index, - inner_sym_name, - inner_sym.n_value, - sym_off.offset, - }); - } } diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index 59a956534e..d222394ad5 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -165,6 +165,7 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! while (true) { const n_strx = symtab_reader.readIntLittle(u32) catch |err| switch (err) { error.EndOfStream => break, + else => |e| return e, }; const object_offset = try symtab_reader.readIntLittle(u32); @@ -183,7 +184,7 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! pub fn parseObject( self: Archive, - allocator: Allocator, + gpa: Allocator, cpu_arch: std.Target.Cpu.Arch, offset: u32, ) !Object { @@ -198,15 +199,15 @@ pub fn parseObject( } const name_or_length = try object_header.nameOrLength(); - const object_name = try parseName(allocator, name_or_length, reader); - defer allocator.free(object_name); + const object_name = try parseName(gpa, name_or_length, reader); + defer gpa.free(object_name); log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name }); const name = name: { var buffer: [std.fs.MAX_PATH_BYTES]u8 = undefined; const path = try std.os.realpath(self.name, &buffer); - break :name try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name }); + break :name try std.fmt.allocPrint(gpa, "{s}({s})", .{ path, object_name }); }; const object_name_len = switch (name_or_length) { @@ -214,7 +215,7 @@ pub fn parseObject( .Length => |len| len, }; const object_size = (try object_header.size()) - object_name_len; - const contents = try allocator.allocWithOptions(u8, object_size, @alignOf(u64), null); + const contents = try gpa.allocWithOptions(u8, object_size, @alignOf(u64), null); const amt = try reader.readAll(contents); if (amt != object_size) { return error.InputOutput; @@ -222,11 +223,11 @@ pub fn parseObject( var object = Object{ .name = name, - .mtime = try self.header.date(), + .mtime = object_header.date() catch 0, .contents = contents, }; - try object.parse(allocator, cpu_arch); + try object.parse(gpa, cpu_arch); return object; } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index b5bc82e769..47ef974cb1 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -15,8 +15,7 @@ const Allocator = mem.Allocator; const Arch = std.Target.Cpu.Arch; const Dwarf = @import("../Dwarf.zig"); const MachO = @import("../MachO.zig"); -const Object = @import("Object.zig"); -const RelocationIncr = @import("Relocation.zig"); // temporary name until we clean up object-file relocation scanning +const Relocation = @import("Relocation.zig"); const SymbolWithLoc = MachO.SymbolWithLoc; /// Each decl always gets a local symbol with the fully qualified name. @@ -30,12 +29,6 @@ sym_index: u32, /// null means symbol defined by Zig source. file: ?u32, -/// List of symbols contained within this atom -contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, - -/// Code (may be non-relocated) this atom represents -code: std.ArrayListUnmanaged(u8) = .{}, - /// Size and alignment of this atom /// Unlike in Elf, we need to store the size of this symbol as part of /// the atom since macho.nlist_64 lacks this information. @@ -45,21 +38,6 @@ size: u64, /// For instance, alignment of 0 should be read as 2^0 = 1 byte aligned. alignment: u32, -/// List of relocations belonging to this atom. -relocs: std.ArrayListUnmanaged(Relocation) = .{}, - -/// List of offsets contained within this atom that need rebasing by the dynamic -/// loader for example in presence of ASLR. -rebases: std.ArrayListUnmanaged(u64) = .{}, - -/// List of offsets contained within this atom that will be dynamically bound -/// by the dynamic loader and contain pointers to resolved (at load time) extern -/// symbols (aka proxies aka imports). -bindings: std.ArrayListUnmanaged(Binding) = .{}, - -/// List of lazy bindings (cf bindings above). -lazy_bindings: std.ArrayListUnmanaged(Binding) = .{}, - /// Points to the previous and next neighbours next: ?*Atom, prev: ?*Atom, @@ -76,50 +54,6 @@ pub const SymbolAtOffset = struct { offset: u64, }; -pub const Relocation = struct { - /// Offset within the atom's code buffer. - /// Note relocation size can be inferred by relocation's kind. - offset: u32, - - target: MachO.SymbolWithLoc, - - addend: i64, - - subtractor: ?MachO.SymbolWithLoc, - - pcrel: bool, - - length: u2, - - @"type": u4, - - pub fn getTargetAtom(self: Relocation, macho_file: *MachO) ?*Atom { - const is_via_got = got: { - switch (macho_file.base.options.target.cpu.arch) { - .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, self.@"type")) { - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_POINTER_TO_GOT, - => true, - else => false, - }, - .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, self.@"type")) { - .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, - else => false, - }, - else => unreachable, - } - }; - - if (is_via_got) { - return macho_file.getGotAtomForSymbol(self.target).?; // panic means fatal error - } - if (macho_file.getStubsAtomForSymbol(self.target)) |stubs_atom| return stubs_atom; - if (macho_file.getTlvPtrAtomForSymbol(self.target)) |tlv_ptr_atom| return tlv_ptr_atom; - return macho_file.getAtomForSymbol(self.target); - } -}; - pub const empty = Atom{ .sym_index = 0, .file = null, @@ -130,24 +64,6 @@ pub const empty = Atom{ .dbg_info_atom = undefined, }; -pub fn deinit(self: *Atom, allocator: Allocator) void { - self.lazy_bindings.deinit(allocator); - self.bindings.deinit(allocator); - self.rebases.deinit(allocator); - self.relocs.deinit(allocator); - self.contained.deinit(allocator); - self.code.deinit(allocator); -} - -pub fn clearRetainingCapacity(self: *Atom) void { - self.lazy_bindings.clearRetainingCapacity(); - self.bindings.clearRetainingCapacity(); - self.rebases.clearRetainingCapacity(); - self.relocs.clearRetainingCapacity(); - self.contained.clearRetainingCapacity(); - self.code.clearRetainingCapacity(); -} - /// Returns symbol referencing this atom. pub fn getSymbol(self: Atom, macho_file: *MachO) macho.nlist_64 { return self.getSymbolPtr(macho_file).*; @@ -165,17 +81,6 @@ pub fn getSymbolWithLoc(self: Atom) SymbolWithLoc { return .{ .sym_index = self.sym_index, .file = self.file }; } -/// Returns true if the symbol pointed at with `sym_loc` is contained within this atom. -/// WARNING this function assumes all atoms have been allocated in the virtual memory. -/// Calling it without allocating with `MachO.allocateSymbols` (or equivalent) will -/// give bogus results. -pub fn isSymbolContained(self: Atom, sym_loc: SymbolWithLoc, macho_file: *MachO) bool { - const sym = macho_file.getSymbol(sym_loc); - if (!sym.sect()) return false; - const self_sym = self.getSymbol(macho_file); - return sym.n_value >= self_sym.n_value and sym.n_value < self_sym.n_value + self.size; -} - /// Returns the name of this atom. pub fn getName(self: Atom, macho_file: *MachO) []const u8 { return macho_file.getSymbolName(.{ @@ -211,690 +116,7 @@ pub fn freeListEligible(self: Atom, macho_file: *MachO) bool { return surplus >= MachO.min_text_capacity; } -const RelocContext = struct { - macho_file: *MachO, - base_addr: u64 = 0, - base_offset: i32 = 0, -}; - -pub fn parseRelocs(self: *Atom, relocs: []align(1) const macho.relocation_info, context: RelocContext) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const gpa = context.macho_file.base.allocator; - - const arch = context.macho_file.base.options.target.cpu.arch; - var addend: i64 = 0; - var subtractor: ?SymbolWithLoc = null; - - for (relocs) |rel, i| { - blk: { - switch (arch) { - .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { - .ARM64_RELOC_ADDEND => { - assert(addend == 0); - addend = rel.r_symbolnum; - // Verify that it's followed by ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12. - if (relocs.len <= i + 1) { - log.err("no relocation after ARM64_RELOC_ADDEND", .{}); - return error.UnexpectedRelocationType; - } - const next = @intToEnum(macho.reloc_type_arm64, relocs[i + 1].r_type); - switch (next) { - .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, - else => { - log.err("unexpected relocation type after ARM64_RELOC_ADDEND", .{}); - log.err(" expected ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12", .{}); - log.err(" found {s}", .{@tagName(next)}); - return error.UnexpectedRelocationType; - }, - } - continue; - }, - .ARM64_RELOC_SUBTRACTOR => {}, - else => break :blk, - }, - .x86_64 => switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { - .X86_64_RELOC_SUBTRACTOR => {}, - else => break :blk, - }, - else => unreachable, - } - - assert(subtractor == null); - const sym_loc = MachO.SymbolWithLoc{ - .sym_index = rel.r_symbolnum, - .file = self.file, - }; - const sym = context.macho_file.getSymbol(sym_loc); - if (sym.sect() and !sym.ext()) { - subtractor = sym_loc; - } else { - const sym_name = context.macho_file.getSymbolName(sym_loc); - subtractor = context.macho_file.getGlobal(sym_name).?; - } - // Verify that *_SUBTRACTOR is followed by *_UNSIGNED. - if (relocs.len <= i + 1) { - log.err("no relocation after *_RELOC_SUBTRACTOR", .{}); - return error.UnexpectedRelocationType; - } - switch (arch) { - .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, relocs[i + 1].r_type)) { - .ARM64_RELOC_UNSIGNED => {}, - else => { - log.err("unexpected relocation type after ARM64_RELOC_ADDEND", .{}); - log.err(" expected ARM64_RELOC_UNSIGNED", .{}); - log.err(" found {s}", .{ - @tagName(@intToEnum(macho.reloc_type_arm64, relocs[i + 1].r_type)), - }); - return error.UnexpectedRelocationType; - }, - }, - .x86_64 => switch (@intToEnum(macho.reloc_type_x86_64, relocs[i + 1].r_type)) { - .X86_64_RELOC_UNSIGNED => {}, - else => { - log.err("unexpected relocation type after X86_64_RELOC_ADDEND", .{}); - log.err(" expected X86_64_RELOC_UNSIGNED", .{}); - log.err(" found {s}", .{ - @tagName(@intToEnum(macho.reloc_type_x86_64, relocs[i + 1].r_type)), - }); - return error.UnexpectedRelocationType; - }, - }, - else => unreachable, - } - continue; - } - - const object = &context.macho_file.objects.items[self.file.?]; - const target = target: { - if (rel.r_extern == 0) { - const sect_id = @intCast(u16, rel.r_symbolnum - 1); - const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { - const sect = object.getSourceSection(sect_id); - const out_sect_id = (try context.macho_file.getOutputSection(sect)) orelse - unreachable; - const sym_index = @intCast(u32, object.symtab.items.len); - try object.symtab.append(gpa, .{ - .n_strx = 0, - .n_type = macho.N_SECT, - .n_sect = out_sect_id + 1, - .n_desc = 0, - .n_value = sect.addr, - }); - try object.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index); - break :blk sym_index; - }; - break :target MachO.SymbolWithLoc{ .sym_index = sym_index, .file = self.file }; - } - - const sym_loc = MachO.SymbolWithLoc{ - .sym_index = rel.r_symbolnum, - .file = self.file, - }; - const sym = context.macho_file.getSymbol(sym_loc); - - if (sym.sect() and !sym.ext()) { - break :target sym_loc; - } else { - const sym_name = context.macho_file.getSymbolName(sym_loc); - break :target context.macho_file.getGlobal(sym_name).?; - } - }; - const offset = @intCast(u32, rel.r_address - context.base_offset); - - switch (arch) { - .aarch64 => { - switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { - .ARM64_RELOC_BRANCH26 => { - // TODO rewrite relocation - try addStub(target, context); - }, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_POINTER_TO_GOT, - => { - // TODO rewrite relocation - try addGotEntry(target, context); - }, - .ARM64_RELOC_UNSIGNED => { - addend = if (rel.r_length == 3) - mem.readIntLittle(i64, self.code.items[offset..][0..8]) - else - mem.readIntLittle(i32, self.code.items[offset..][0..4]); - if (rel.r_extern == 0) { - const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; - addend -= @intCast(i64, target_sect_base_addr); - } - try self.addPtrBindingOrRebase(rel, target, context); - }, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, - => { - try addTlvPtrEntry(target, context); - }, - else => {}, - } - }, - .x86_64 => { - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - switch (rel_type) { - .X86_64_RELOC_BRANCH => { - // TODO rewrite relocation - try addStub(target, context); - addend = mem.readIntLittle(i32, self.code.items[offset..][0..4]); - }, - .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { - // TODO rewrite relocation - try addGotEntry(target, context); - addend = mem.readIntLittle(i32, self.code.items[offset..][0..4]); - }, - .X86_64_RELOC_UNSIGNED => { - addend = if (rel.r_length == 3) - mem.readIntLittle(i64, self.code.items[offset..][0..8]) - else - mem.readIntLittle(i32, self.code.items[offset..][0..4]); - if (rel.r_extern == 0) { - const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; - addend -= @intCast(i64, target_sect_base_addr); - } - try self.addPtrBindingOrRebase(rel, target, context); - }, - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => { - const correction: u3 = switch (rel_type) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, - else => unreachable, - }; - addend = mem.readIntLittle(i32, self.code.items[offset..][0..4]) + correction; - if (rel.r_extern == 0) { - // Note for the future self: when r_extern == 0, we should subtract correction from the - // addend. - const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; - // We need to add base_offset, i.e., offset of this atom wrt to the source - // section. Otherwise, the addend will over-/under-shoot. - addend += @intCast(i64, context.base_addr + offset + 4) - - @intCast(i64, target_sect_base_addr) + context.base_offset; - } - }, - .X86_64_RELOC_TLV => { - try addTlvPtrEntry(target, context); - }, - else => {}, - } - }, - else => unreachable, - } - - try self.relocs.append(gpa, .{ - .offset = offset, - .target = target, - .addend = addend, - .subtractor = subtractor, - .pcrel = rel.r_pcrel == 1, - .length = rel.r_length, - .@"type" = rel.r_type, - }); - - addend = 0; - subtractor = null; - } -} - -fn addPtrBindingOrRebase( - self: *Atom, - rel: macho.relocation_info, - target: MachO.SymbolWithLoc, - context: RelocContext, -) !void { - const gpa = context.macho_file.base.allocator; - const sym = context.macho_file.getSymbol(target); - if (sym.undf()) { - try self.bindings.append(gpa, .{ - .target = target, - .offset = @intCast(u32, rel.r_address - context.base_offset), - }); - } else { - const source_sym = self.getSymbol(context.macho_file); - const section = context.macho_file.sections.get(source_sym.n_sect - 1); - const header = section.header; - const segment_index = section.segment_index; - const sect_type = header.@"type"(); - - const should_rebase = rebase: { - if (rel.r_length != 3) break :rebase false; - - // TODO actually, a check similar to what dyld is doing, that is, verifying - // that the segment is writable should be enough here. - const is_right_segment = blk: { - if (context.macho_file.data_segment_cmd_index) |idx| { - if (segment_index == idx) { - break :blk true; - } - } - if (context.macho_file.data_const_segment_cmd_index) |idx| { - if (segment_index == idx) { - break :blk true; - } - } - break :blk false; - }; - - if (!is_right_segment) break :rebase false; - if (sect_type != macho.S_LITERAL_POINTERS and - sect_type != macho.S_REGULAR and - sect_type != macho.S_MOD_INIT_FUNC_POINTERS and - sect_type != macho.S_MOD_TERM_FUNC_POINTERS) - { - break :rebase false; - } - - break :rebase true; - }; - - if (should_rebase) { - try self.rebases.append(gpa, @intCast(u32, rel.r_address - context.base_offset)); - } - } -} - -fn addTlvPtrEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void { - const target_sym = context.macho_file.getSymbol(target); - if (!target_sym.undf()) return; - if (context.macho_file.tlv_ptr_entries_table.contains(target)) return; - - const index = try context.macho_file.allocateTlvPtrEntry(target); - const atom = try context.macho_file.createTlvPtrAtom(target); - context.macho_file.tlv_ptr_entries.items[index].sym_index = atom.sym_index; -} - -fn addGotEntry(target: MachO.SymbolWithLoc, context: RelocContext) !void { - if (context.macho_file.got_entries_table.contains(target)) return; - - const index = try context.macho_file.allocateGotEntry(target); - const atom = try context.macho_file.createGotAtom(target); - context.macho_file.got_entries.items[index].sym_index = atom.sym_index; -} - -fn addStub(target: MachO.SymbolWithLoc, context: RelocContext) !void { - const target_sym = context.macho_file.getSymbol(target); - if (!target_sym.undf()) return; - if (context.macho_file.stubs_table.contains(target)) return; - - const stub_index = try context.macho_file.allocateStubEntry(target); - - const stub_helper_atom = try context.macho_file.createStubHelperAtom(); - const laptr_atom = try context.macho_file.createLazyPointerAtom(stub_helper_atom.sym_index, target); - const stub_atom = try context.macho_file.createStubAtom(laptr_atom.sym_index); - - context.macho_file.stubs.items[stub_index].sym_index = stub_atom.sym_index; -} - -pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - log.debug("ATOM(%{d}, '{s}')", .{ self.sym_index, self.getName(macho_file) }); - - for (self.relocs.items) |rel| { - const arch = macho_file.base.options.target.cpu.arch; - switch (arch) { - .aarch64 => { - log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ - @tagName(@intToEnum(macho.reloc_type_arm64, rel.@"type")), - rel.offset, - rel.target.sym_index, - rel.target.file, - }); - }, - .x86_64 => { - log.debug(" RELA({s}) @ {x} => %{d} in object({?d})", .{ - @tagName(@intToEnum(macho.reloc_type_x86_64, rel.@"type")), - rel.offset, - rel.target.sym_index, - rel.target.file, - }); - }, - else => unreachable, - } - - const source_addr = blk: { - const source_sym = self.getSymbol(macho_file); - break :blk source_sym.n_value + rel.offset; - }; - const is_tlv = is_tlv: { - const source_sym = self.getSymbol(macho_file); - const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; - break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; - }; - const target_addr = blk: { - const target_atom = rel.getTargetAtom(macho_file) orelse { - // If there is no atom for target, we still need to check for special, atom-less - // symbols such as `___dso_handle`. - const target_name = macho_file.getSymbolName(rel.target); - assert(macho_file.getGlobal(target_name) != null); - const atomless_sym = macho_file.getSymbol(rel.target); - log.debug(" | atomless target '{s}'", .{target_name}); - break :blk atomless_sym.n_value; - }; - log.debug(" | target ATOM(%{d}, '{s}') in object({?d})", .{ - target_atom.sym_index, - target_atom.getName(macho_file), - target_atom.file, - }); - // If `rel.target` is contained within the target atom, pull its address value. - const target_sym = if (target_atom.isSymbolContained(rel.target, macho_file)) - macho_file.getSymbol(rel.target) - else - target_atom.getSymbol(macho_file); - assert(target_sym.n_desc != MachO.N_DESC_GCED); - const base_address: u64 = if (is_tlv) base_address: { - // For TLV relocations, the value specified as a relocation is the displacement from the - // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first - // defined TLV template init section in the following order: - // * wrt to __thread_data if defined, then - // * wrt to __thread_bss - const sect_id: u16 = sect_id: { - if (macho_file.getSectionByName("__DATA", "__thread_data")) |i| { - break :sect_id i; - } else if (macho_file.getSectionByName("__DATA", "__thread_bss")) |i| { - break :sect_id i; - } else { - log.err("threadlocal variables present but no initializer sections found", .{}); - log.err(" __thread_data not found", .{}); - log.err(" __thread_bss not found", .{}); - return error.FailedToResolveRelocationTarget; - } - }; - break :base_address macho_file.sections.items(.header)[sect_id].addr; - } else 0; - break :blk target_sym.n_value - base_address; - }; - - log.debug(" | source_addr = 0x{x}", .{source_addr}); - - switch (arch) { - .aarch64 => { - switch (@intToEnum(macho.reloc_type_arm64, rel.@"type")) { - .ARM64_RELOC_BRANCH26 => { - log.debug(" | target_addr = 0x{x}", .{target_addr}); - const displacement = math.cast( - i28, - @intCast(i64, target_addr) - @intCast(i64, source_addr), - ) orelse { - log.err("jump too big to encode as i28 displacement value", .{}); - log.err(" (target - source) = displacement => 0x{x} - 0x{x} = 0x{x}", .{ - target_addr, - source_addr, - @intCast(i64, target_addr) - @intCast(i64, source_addr), - }); - log.err(" TODO implement branch islands to extend jump distance for arm64", .{}); - return error.TODOImplementBranchIslands; - }; - const code = self.code.items[rel.offset..][0..4]; - var inst = aarch64.Instruction{ - .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), code), - }; - inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .ARM64_RELOC_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - => { - const actual_target_addr = @intCast(i64, target_addr) + rel.addend; - log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); - const source_page = @intCast(i32, source_addr >> 12); - const target_page = @intCast(i32, actual_target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - const code = self.code.items[rel.offset..][0..4]; - var inst = aarch64.Instruction{ - .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), code), - }; - inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - inst.pc_relative_address.immlo = @truncate(u2, pages); - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .ARM64_RELOC_PAGEOFF12 => { - const code = self.code.items[rel.offset..][0..4]; - const actual_target_addr = @intCast(i64, target_addr) + rel.addend; - log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); - const narrowed = @truncate(u12, @intCast(u64, actual_target_addr)); - if (isArithmeticOp(self.code.items[rel.offset..][0..4])) { - var inst = aarch64.Instruction{ - .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code), - }; - inst.add_subtract_immediate.imm12 = narrowed; - mem.writeIntLittle(u32, code, inst.toU32()); - } else { - var inst = aarch64.Instruction{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - const offset: u12 = blk: { - if (inst.load_store_register.size == 0) { - if (inst.load_store_register.v == 1) { - // 128-bit SIMD is scaled by 16. - break :blk try math.divExact(u12, narrowed, 16); - } - // Otherwise, 8-bit SIMD or ldrb. - break :blk narrowed; - } else { - const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); - break :blk try math.divExact(u12, narrowed, denom); - } - }; - inst.load_store_register.offset = offset; - mem.writeIntLittle(u32, code, inst.toU32()); - } - }, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { - const code = self.code.items[rel.offset..][0..4]; - const actual_target_addr = @intCast(i64, target_addr) + rel.addend; - log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); - const narrowed = @truncate(u12, @intCast(u64, actual_target_addr)); - var inst: aarch64.Instruction = .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - const offset = try math.divExact(u12, narrowed, 8); - inst.load_store_register.offset = offset; - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { - const code = self.code.items[rel.offset..][0..4]; - const actual_target_addr = @intCast(i64, target_addr) + rel.addend; - log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); - - const RegInfo = struct { - rd: u5, - rn: u5, - size: u2, - }; - const reg_info: RegInfo = blk: { - if (isArithmeticOp(code)) { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code); - break :blk .{ - .rd = inst.rd, - .rn = inst.rn, - .size = inst.sf, - }; - } else { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code); - break :blk .{ - .rd = inst.rt, - .rn = inst.rn, - .size = inst.size, - }; - } - }; - const narrowed = @truncate(u12, @intCast(u64, actual_target_addr)); - var inst = if (macho_file.tlv_ptr_entries_table.contains(rel.target)) blk: { - const offset = try math.divExact(u12, narrowed, 8); - break :blk aarch64.Instruction{ - .load_store_register = .{ - .rt = reg_info.rd, - .rn = reg_info.rn, - .offset = offset, - .opc = 0b01, - .op1 = 0b01, - .v = 0, - .size = reg_info.size, - }, - }; - } else aarch64.Instruction{ - .add_subtract_immediate = .{ - .rd = reg_info.rd, - .rn = reg_info.rn, - .imm12 = narrowed, - .sh = 0, - .s = 0, - .op = 0, - .sf = @truncate(u1, reg_info.size), - }, - }; - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .ARM64_RELOC_POINTER_TO_GOT => { - log.debug(" | target_addr = 0x{x}", .{target_addr}); - const result = math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr)) orelse return error.Overflow; - mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, result)); - }, - .ARM64_RELOC_UNSIGNED => { - const result = blk: { - if (rel.subtractor) |subtractor| { - const sym = macho_file.getSymbol(subtractor); - break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + rel.addend; - } else { - break :blk @intCast(i64, target_addr) + rel.addend; - } - }; - log.debug(" | target_addr = 0x{x}", .{result}); - - if (rel.length == 3) { - mem.writeIntLittle(u64, self.code.items[rel.offset..][0..8], @bitCast(u64, result)); - } else { - mem.writeIntLittle( - u32, - self.code.items[rel.offset..][0..4], - @truncate(u32, @bitCast(u64, result)), - ); - } - }, - .ARM64_RELOC_SUBTRACTOR => unreachable, - .ARM64_RELOC_ADDEND => unreachable, - } - }, - .x86_64 => { - switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { - .X86_64_RELOC_BRANCH => { - log.debug(" | target_addr = 0x{x}", .{target_addr}); - const displacement = math.cast( - i32, - @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4 + rel.addend, - ) orelse return error.Overflow; - mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement)); - }, - .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { - log.debug(" | target_addr = 0x{x}", .{target_addr}); - const displacement = math.cast( - i32, - @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4 + rel.addend, - ) orelse return error.Overflow; - mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement)); - }, - .X86_64_RELOC_TLV => { - log.debug(" | target_addr = 0x{x}", .{target_addr}); - if (!macho_file.tlv_ptr_entries_table.contains(rel.target)) { - // We need to rewrite the opcode from movq to leaq. - self.code.items[rel.offset - 2] = 0x8d; - } - const displacement = math.cast( - i32, - @intCast(i64, target_addr) - @intCast(i64, source_addr) - 4 + rel.addend, - ) orelse return error.Overflow; - mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement)); - }, - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => { - const correction: u3 = switch (@intToEnum(macho.reloc_type_x86_64, rel.@"type")) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, - else => unreachable, - }; - const actual_target_addr = @intCast(i64, target_addr) + rel.addend; - log.debug(" | target_addr = 0x{x}", .{actual_target_addr}); - const displacement = math.cast( - i32, - actual_target_addr - @intCast(i64, source_addr + correction + 4), - ) orelse return error.Overflow; - mem.writeIntLittle(u32, self.code.items[rel.offset..][0..4], @bitCast(u32, displacement)); - }, - .X86_64_RELOC_UNSIGNED => { - const result = blk: { - if (rel.subtractor) |subtractor| { - const sym = macho_file.getSymbol(subtractor); - break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + rel.addend; - } else { - break :blk @intCast(i64, target_addr) + rel.addend; - } - }; - log.debug(" | target_addr = 0x{x}", .{result}); - - if (rel.length == 3) { - mem.writeIntLittle(u64, self.code.items[rel.offset..][0..8], @bitCast(u64, result)); - } else { - mem.writeIntLittle( - u32, - self.code.items[rel.offset..][0..4], - @truncate(u32, @bitCast(u64, result)), - ); - } - }, - .X86_64_RELOC_SUBTRACTOR => unreachable, - } - }, - else => unreachable, - } - } -} - -inline fn isArithmeticOp(inst: *const [4]u8) bool { - const group_decode = @truncate(u5, inst[3]); - return ((group_decode >> 2) == 4); -} - -pub fn addRelocation(self: *Atom, macho_file: *MachO, reloc: RelocationIncr) !void { +pub fn addRelocation(self: *Atom, macho_file: *MachO, reloc: Relocation) !void { return self.addRelocations(macho_file, 1, .{reloc}); } @@ -902,7 +124,7 @@ pub fn addRelocations( self: *Atom, macho_file: *MachO, comptime count: comptime_int, - relocs: [count]RelocationIncr, + relocs: [count]Relocation, ) !void { const gpa = macho_file.base.allocator; const target = macho_file.base.options.target; diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index c81602543e..f0c3ee4f3c 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -477,7 +477,6 @@ fn writeSymtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { for (self.base.locals.items) |sym, sym_id| { if (sym.n_strx == 0) continue; // no name, skip - if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip const sym_loc = MachO.SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = null }; if (self.base.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip if (self.base.getGlobal(self.base.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip @@ -492,7 +491,6 @@ fn writeSymtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { for (self.base.globals.items) |global| { const sym = self.base.getSymbol(global); if (sym.undf()) continue; // import, skip - if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip var out_sym = sym; out_sym.n_strx = try self.strtab.insert(gpa, self.base.getSymbolName(global)); try exports.append(out_sym); diff --git a/src/link/MachO/DwarfInfo.zig b/src/link/MachO/DwarfInfo.zig new file mode 100644 index 0000000000..0685b3fc99 --- /dev/null +++ b/src/link/MachO/DwarfInfo.zig @@ -0,0 +1,467 @@ +const DwarfInfo = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const dwarf = std.dwarf; +const leb = std.leb; +const log = std.log.scoped(.macho); +const math = std.math; +const mem = std.mem; + +const Allocator = mem.Allocator; +pub const AbbrevLookupTable = std.AutoHashMap(u64, struct { pos: usize, len: usize }); +pub const SubprogramLookupByName = std.StringHashMap(struct { addr: u64, size: u64 }); + +debug_info: []const u8, +debug_abbrev: []const u8, +debug_str: []const u8, + +pub fn getCompileUnitIterator(self: DwarfInfo) CompileUnitIterator { + return .{ .ctx = self }; +} + +const CompileUnitIterator = struct { + ctx: DwarfInfo, + pos: usize = 0, + + pub fn next(self: *CompileUnitIterator) !?CompileUnit { + if (self.pos >= self.ctx.debug_info.len) return null; + + var stream = std.io.fixedBufferStream(self.ctx.debug_info); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + const cuh = try CompileUnit.Header.read(reader); + const total_length = cuh.length + @as(u64, if (cuh.is_64bit) @sizeOf(u64) else @sizeOf(u32)); + const offset = math.cast(usize, creader.bytes_read) orelse return error.Overflow; + + const cu = CompileUnit{ + .cuh = cuh, + .debug_info_off = offset, + }; + + self.pos += (math.cast(usize, total_length) orelse return error.Overflow); + + return cu; + } +}; + +pub fn genSubprogramLookupByName( + self: DwarfInfo, + compile_unit: CompileUnit, + abbrev_lookup: AbbrevLookupTable, + lookup: *SubprogramLookupByName, +) !void { + var abbrev_it = compile_unit.getAbbrevEntryIterator(self); + while (try abbrev_it.next(abbrev_lookup)) |entry| switch (entry.tag) { + dwarf.TAG.subprogram => { + var attr_it = entry.getAttributeIterator(self, compile_unit.cuh); + + var name: ?[]const u8 = null; + var low_pc: ?u64 = null; + var high_pc: ?u64 = null; + + while (try attr_it.next()) |attr| switch (attr.name) { + dwarf.AT.name => if (attr.getString(self, compile_unit.cuh)) |str| { + log.warn("subprogram: {s}", .{str}); + name = str; + }, + dwarf.AT.low_pc => { + if (attr.getAddr(self, compile_unit.cuh)) |addr| { + low_pc = addr; + } + if (try attr.getConstant(self)) |constant| { + low_pc = @intCast(u64, constant); + } + }, + dwarf.AT.high_pc => { + if (attr.getAddr(self, compile_unit.cuh)) |addr| { + high_pc = addr; + } + if (try attr.getConstant(self)) |constant| { + high_pc = @intCast(u64, constant); + } + }, + else => {}, + }; + + if (name == null or low_pc == null or high_pc == null) continue; + + try lookup.putNoClobber(name.?, .{ .addr = low_pc.?, .size = high_pc.? }); + }, + else => {}, + }; +} + +pub fn genAbbrevLookupByKind(self: DwarfInfo, off: usize, lookup: *AbbrevLookupTable) !void { + const data = self.debug_abbrev[off..]; + var stream = std.io.fixedBufferStream(data); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + while (true) { + const kind = try leb.readULEB128(u64, reader); + + if (kind == 0) break; + + const pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow; + _ = try leb.readULEB128(u64, reader); // TAG + _ = try reader.readByte(); // CHILDREN + + while (true) { + const name = try leb.readULEB128(u64, reader); + const form = try leb.readULEB128(u64, reader); + + if (name == 0 and form == 0) break; + } + + const next_pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow; + + try lookup.putNoClobber(kind, .{ + .pos = pos, + .len = next_pos - pos - 2, + }); + } +} + +pub const CompileUnit = struct { + cuh: Header, + debug_info_off: usize, + + pub const Header = struct { + is_64bit: bool, + length: u64, + version: u16, + debug_abbrev_offset: u64, + address_size: u8, + + fn read(reader: anytype) !Header { + var length: u64 = try reader.readIntLittle(u32); + + const is_64bit = length == 0xffffffff; + if (is_64bit) { + length = try reader.readIntLittle(u64); + } + + const version = try reader.readIntLittle(u16); + const debug_abbrev_offset = if (is_64bit) + try reader.readIntLittle(u64) + else + try reader.readIntLittle(u32); + const address_size = try reader.readIntLittle(u8); + + return Header{ + .is_64bit = is_64bit, + .length = length, + .version = version, + .debug_abbrev_offset = debug_abbrev_offset, + .address_size = address_size, + }; + } + }; + + inline fn getDebugInfo(self: CompileUnit, ctx: DwarfInfo) []const u8 { + return ctx.debug_info[self.debug_info_off..][0..self.cuh.length]; + } + + pub fn getAbbrevEntryIterator(self: CompileUnit, ctx: DwarfInfo) AbbrevEntryIterator { + return .{ .cu = self, .ctx = ctx }; + } +}; + +const AbbrevEntryIterator = struct { + cu: CompileUnit, + ctx: DwarfInfo, + pos: usize = 0, + + pub fn next(self: *AbbrevEntryIterator, lookup: AbbrevLookupTable) !?AbbrevEntry { + if (self.pos + self.cu.debug_info_off >= self.ctx.debug_info.len) return null; + + const debug_info = self.ctx.debug_info[self.pos + self.cu.debug_info_off ..]; + var stream = std.io.fixedBufferStream(debug_info); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + const kind = try leb.readULEB128(u64, reader); + self.pos += (math.cast(usize, creader.bytes_read) orelse return error.Overflow); + + if (kind == 0) { + return AbbrevEntry.@"null"(); + } + + const abbrev_pos = lookup.get(kind) orelse return error.MalformedDwarf; + const len = try findAbbrevEntrySize( + self.ctx, + abbrev_pos.pos, + abbrev_pos.len, + self.pos + self.cu.debug_info_off, + self.cu.cuh, + ); + const entry = try getAbbrevEntry( + self.ctx, + abbrev_pos.pos, + abbrev_pos.len, + self.pos + self.cu.debug_info_off, + len, + ); + + self.pos += len; + + return entry; + } +}; + +pub const AbbrevEntry = struct { + tag: u64, + children: u8, + debug_abbrev_off: usize, + debug_abbrev_len: usize, + debug_info_off: usize, + debug_info_len: usize, + + fn @"null"() AbbrevEntry { + return .{ + .tag = 0, + .children = dwarf.CHILDREN.no, + .debug_abbrev_off = 0, + .debug_abbrev_len = 0, + .debug_info_off = 0, + .debug_info_len = 0, + }; + } + + pub fn hasChildren(self: AbbrevEntry) bool { + return self.children == dwarf.CHILDREN.yes; + } + + inline fn getDebugInfo(self: AbbrevEntry, ctx: DwarfInfo) []const u8 { + return ctx.debug_info[self.debug_info_off..][0..self.debug_info_len]; + } + + inline fn getDebugAbbrev(self: AbbrevEntry, ctx: DwarfInfo) []const u8 { + return ctx.debug_abbrev[self.debug_abbrev_off..][0..self.debug_abbrev_len]; + } + + pub fn getAttributeIterator(self: AbbrevEntry, ctx: DwarfInfo, cuh: CompileUnit.Header) AttributeIterator { + return .{ .entry = self, .ctx = ctx, .cuh = cuh }; + } +}; + +pub const Attribute = struct { + name: u64, + form: u64, + debug_info_off: usize, + debug_info_len: usize, + + inline fn getDebugInfo(self: Attribute, ctx: DwarfInfo) []const u8 { + return ctx.debug_info[self.debug_info_off..][0..self.debug_info_len]; + } + + pub fn getString(self: Attribute, ctx: DwarfInfo, cuh: CompileUnit.Header) ?[]const u8 { + if (self.form != dwarf.FORM.strp) return null; + const debug_info = self.getDebugInfo(ctx); + const off = if (cuh.is_64bit) + mem.readIntLittle(u64, debug_info[0..8]) + else + mem.readIntLittle(u32, debug_info[0..4]); + return ctx.getString(off); + } + + pub fn getConstant(self: Attribute, ctx: DwarfInfo) !?i128 { + const debug_info = self.getDebugInfo(ctx); + var stream = std.io.fixedBufferStream(debug_info); + const reader = stream.reader(); + + return switch (self.form) { + dwarf.FORM.data1 => debug_info[0], + dwarf.FORM.data2 => mem.readIntLittle(u16, debug_info[0..2]), + dwarf.FORM.data4 => mem.readIntLittle(u32, debug_info[0..4]), + dwarf.FORM.data8 => mem.readIntLittle(u64, debug_info[0..8]), + dwarf.FORM.udata => try leb.readULEB128(u64, reader), + dwarf.FORM.sdata => try leb.readILEB128(i64, reader), + else => null, + }; + } + + pub fn getReference(self: Attribute, ctx: DwarfInfo) !?u64 { + const debug_info = self.getDebugInfo(ctx); + var stream = std.io.fixedBufferStream(debug_info); + const reader = stream.reader(); + + return switch (self.form) { + dwarf.FORM.ref1 => debug_info[0], + dwarf.FORM.ref2 => mem.readIntLittle(u16, debug_info[0..2]), + dwarf.FORM.ref4 => mem.readIntLittle(u32, debug_info[0..4]), + dwarf.FORM.ref8 => mem.readIntLittle(u64, debug_info[0..8]), + dwarf.FORM.ref_udata => try leb.readULEB128(u64, reader), + else => null, + }; + } + + pub fn getAddr(self: Attribute, ctx: DwarfInfo, cuh: CompileUnit.Header) ?u64 { + if (self.form != dwarf.FORM.addr) return null; + const debug_info = self.getDebugInfo(ctx); + return switch (cuh.address_size) { + 1 => debug_info[0], + 2 => mem.readIntLittle(u16, debug_info[0..2]), + 4 => mem.readIntLittle(u32, debug_info[0..4]), + 8 => mem.readIntLittle(u64, debug_info[0..8]), + else => unreachable, + }; + } +}; + +const AttributeIterator = struct { + entry: AbbrevEntry, + ctx: DwarfInfo, + cuh: CompileUnit.Header, + debug_abbrev_pos: usize = 0, + debug_info_pos: usize = 0, + + pub fn next(self: *AttributeIterator) !?Attribute { + const debug_abbrev = self.entry.getDebugAbbrev(self.ctx); + if (self.debug_abbrev_pos >= debug_abbrev.len) return null; + + var stream = std.io.fixedBufferStream(debug_abbrev[self.debug_abbrev_pos..]); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + const name = try leb.readULEB128(u64, reader); + const form = try leb.readULEB128(u64, reader); + + self.debug_abbrev_pos += (math.cast(usize, creader.bytes_read) orelse return error.Overflow); + + const len = try findFormSize( + self.ctx, + form, + self.debug_info_pos + self.entry.debug_info_off, + self.cuh, + ); + const attr = Attribute{ + .name = name, + .form = form, + .debug_info_off = self.debug_info_pos + self.entry.debug_info_off, + .debug_info_len = len, + }; + + self.debug_info_pos += len; + + return attr; + } +}; + +fn getAbbrevEntry(self: DwarfInfo, da_off: usize, da_len: usize, di_off: usize, di_len: usize) !AbbrevEntry { + const debug_abbrev = self.debug_abbrev[da_off..][0..da_len]; + var stream = std.io.fixedBufferStream(debug_abbrev); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + const tag = try leb.readULEB128(u64, reader); + const children = switch (tag) { + std.dwarf.TAG.const_type, + std.dwarf.TAG.packed_type, + std.dwarf.TAG.pointer_type, + std.dwarf.TAG.reference_type, + std.dwarf.TAG.restrict_type, + std.dwarf.TAG.rvalue_reference_type, + std.dwarf.TAG.shared_type, + std.dwarf.TAG.volatile_type, + => if (creader.bytes_read == da_len) std.dwarf.CHILDREN.no else try reader.readByte(), + else => try reader.readByte(), + }; + + const pos = math.cast(usize, creader.bytes_read) orelse return error.Overflow; + + return AbbrevEntry{ + .tag = tag, + .children = children, + .debug_abbrev_off = pos + da_off, + .debug_abbrev_len = da_len - pos, + .debug_info_off = di_off, + .debug_info_len = di_len, + }; +} + +fn findFormSize(self: DwarfInfo, form: u64, di_off: usize, cuh: CompileUnit.Header) !usize { + const debug_info = self.debug_info[di_off..]; + var stream = std.io.fixedBufferStream(debug_info); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + switch (form) { + dwarf.FORM.strp => return if (cuh.is_64bit) @sizeOf(u64) else @sizeOf(u32), + dwarf.FORM.sec_offset => return if (cuh.is_64bit) @sizeOf(u64) else @sizeOf(u32), + dwarf.FORM.addr => return cuh.address_size, + dwarf.FORM.exprloc => { + const expr_len = try leb.readULEB128(u64, reader); + var i: u64 = 0; + while (i < expr_len) : (i += 1) { + _ = try reader.readByte(); + } + return math.cast(usize, creader.bytes_read) orelse error.Overflow; + }, + dwarf.FORM.flag_present => return 0, + + dwarf.FORM.data1 => return @sizeOf(u8), + dwarf.FORM.data2 => return @sizeOf(u16), + dwarf.FORM.data4 => return @sizeOf(u32), + dwarf.FORM.data8 => return @sizeOf(u64), + dwarf.FORM.udata => { + _ = try leb.readULEB128(u64, reader); + return math.cast(usize, creader.bytes_read) orelse error.Overflow; + }, + dwarf.FORM.sdata => { + _ = try leb.readILEB128(i64, reader); + return math.cast(usize, creader.bytes_read) orelse error.Overflow; + }, + + dwarf.FORM.ref1 => return @sizeOf(u8), + dwarf.FORM.ref2 => return @sizeOf(u16), + dwarf.FORM.ref4 => return @sizeOf(u32), + dwarf.FORM.ref8 => return @sizeOf(u64), + dwarf.FORM.ref_udata => { + _ = try leb.readULEB128(u64, reader); + return math.cast(usize, creader.bytes_read) orelse error.Overflow; + }, + + else => return error.ToDo, + } +} + +fn findAbbrevEntrySize(self: DwarfInfo, da_off: usize, da_len: usize, di_off: usize, cuh: CompileUnit.Header) !usize { + const debug_abbrev = self.debug_abbrev[da_off..][0..da_len]; + var stream = std.io.fixedBufferStream(debug_abbrev); + var creader = std.io.countingReader(stream.reader()); + const reader = creader.reader(); + + const tag = try leb.readULEB128(u64, reader); + switch (tag) { + std.dwarf.TAG.const_type, + std.dwarf.TAG.packed_type, + std.dwarf.TAG.pointer_type, + std.dwarf.TAG.reference_type, + std.dwarf.TAG.restrict_type, + std.dwarf.TAG.rvalue_reference_type, + std.dwarf.TAG.shared_type, + std.dwarf.TAG.volatile_type, + => if (creader.bytes_read != da_len) { + _ = try reader.readByte(); + }, + else => _ = try reader.readByte(), + } + + var len: usize = 0; + while (creader.bytes_read < debug_abbrev.len) { + _ = try leb.readULEB128(u64, reader); + const form = try leb.readULEB128(u64, reader); + const form_len = try self.findFormSize(form, di_off + len, cuh); + len += form_len; + } + + return len; +} + +fn getString(self: DwarfInfo, off: u64) []const u8 { + assert(off < self.debug_str.len); + return mem.sliceTo(@ptrCast([*:0]const u8, self.debug_str.ptr + @intCast(usize, off)), 0); +} diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 6d24bccad8..32f33eeff0 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -1,3 +1,7 @@ +//! Represents an input relocatable Object file. +//! Each Object is fully loaded into memory for easier +//! access into different data within. + const Object = @This(); const std = @import("std"); @@ -14,10 +18,12 @@ const sort = std.sort; const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; -const Atom = @import("Atom.zig"); +const Atom = @import("ZldAtom.zig"); +const AtomIndex = @import("zld.zig").AtomIndex; +const DwarfInfo = @import("DwarfInfo.zig"); const LoadCommandIterator = macho.LoadCommandIterator; -const MachO = @import("../MachO.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; +const Zld = @import("zld.zig").Zld; +const SymbolWithLoc = @import("zld.zig").SymbolWithLoc; name: []const u8, mtime: u64, @@ -30,31 +36,33 @@ header: macho.mach_header_64 = undefined, in_symtab: ?[]align(1) const macho.nlist_64 = null, in_strtab: ?[]const u8 = null, -symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -sections: std.ArrayListUnmanaged(macho.section_64) = .{}, - -sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, - -/// List of atoms that map to the symbols parsed from this object file. -managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, - -/// Table of atoms belonging to this object file indexed by the symbol index. -atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, +/// Output symtab is sorted so that we can easily reference symbols following each +/// other in address space. +/// The length of the symtab is at least of the input symtab length however there +/// can be trailing section symbols. +symtab: []macho.nlist_64 = undefined, +/// Can be undefined as set together with in_symtab. +source_symtab_lookup: []u32 = undefined, +/// Can be undefined as set together with in_symtab. +strtab_lookup: []u32 = undefined, +/// Can be undefined as set together with in_symtab. +atom_by_index_table: []AtomIndex = undefined, +/// Can be undefined as set together with in_symtab. +globals_lookup: []i64 = undefined, + +atoms: std.ArrayListUnmanaged(AtomIndex) = .{}, pub fn deinit(self: *Object, gpa: Allocator) void { - self.symtab.deinit(gpa); - self.sections.deinit(gpa); - self.sections_as_symbols.deinit(gpa); - self.atom_by_index_table.deinit(gpa); - - for (self.managed_atoms.items) |atom| { - atom.deinit(gpa); - gpa.destroy(atom); - } - self.managed_atoms.deinit(gpa); - + self.atoms.deinit(gpa); gpa.free(self.name); gpa.free(self.contents); + if (self.in_symtab) |_| { + gpa.free(self.source_symtab_lookup); + gpa.free(self.strtab_lookup); + gpa.free(self.symtab); + gpa.free(self.atom_by_index_table); + gpa.free(self.globals_lookup); + } } pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { @@ -93,230 +101,245 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) }; while (it.next()) |cmd| { switch (cmd.cmd()) { - .SEGMENT_64 => { - const segment = cmd.cast(macho.segment_command_64).?; - try self.sections.ensureUnusedCapacity(allocator, segment.nsects); - for (cmd.getSections()) |sect| { - self.sections.appendAssumeCapacity(sect); - } - }, .SYMTAB => { const symtab = cmd.cast(macho.symtab_command).?; - // Sadly, SYMTAB may be at an unaligned offset within the object file. self.in_symtab = @ptrCast( - [*]align(1) const macho.nlist_64, - self.contents.ptr + symtab.symoff, + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &self.contents[symtab.symoff]), )[0..symtab.nsyms]; self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; - try self.symtab.appendUnalignedSlice(allocator, self.in_symtab.?); + + const nsects = self.getSourceSections().len; + + self.symtab = try allocator.alloc(macho.nlist_64, self.in_symtab.?.len + nsects); + self.source_symtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); + self.strtab_lookup = try allocator.alloc(u32, self.in_symtab.?.len); + self.globals_lookup = try allocator.alloc(i64, self.in_symtab.?.len); + self.atom_by_index_table = try allocator.alloc(AtomIndex, self.in_symtab.?.len + nsects); + + for (self.symtab) |*sym| { + sym.* = .{ + .n_value = 0, + .n_sect = 0, + .n_desc = 0, + .n_strx = 0, + .n_type = 0, + }; + } + + mem.set(i64, self.globals_lookup, -1); + mem.set(AtomIndex, self.atom_by_index_table, 0); + + // You would expect that the symbol table is at least pre-sorted based on symbol's type: + // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, + // the GO compiler does not necessarily respect that therefore we sort immediately by type + // and address within. + var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(allocator, self.in_symtab.?.len); + defer sorted_all_syms.deinit(); + + for (self.in_symtab.?) |_, index| { + sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) }); + } + + // We sort by type: defined < undefined, and + // afterwards by address in each group. Normally, dysymtab should + // be enough to guarantee the sort, but turns out not every compiler + // is kind enough to specify the symbols in the correct order. + sort.sort(SymbolAtIndex, sorted_all_syms.items, self, SymbolAtIndex.lessThan); + + for (sorted_all_syms.items) |sym_id, i| { + const sym = sym_id.getSymbol(self); + + self.symtab[i] = sym; + self.source_symtab_lookup[i] = sym_id.index; + + const sym_name_len = mem.sliceTo(@ptrCast([*:0]const u8, self.in_strtab.?.ptr + sym.n_strx), 0).len + 1; + self.strtab_lookup[i] = @intCast(u32, sym_name_len); + } }, else => {}, } } } -const Context = struct { - object: *const Object, -}; - const SymbolAtIndex = struct { index: u32, + const Context = *const Object; + fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 { - return ctx.object.getSourceSymbol(self.index).?; + return ctx.in_symtab.?[self.index]; } fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 { - const sym = self.getSymbol(ctx); - return ctx.object.getString(sym.n_strx); + const off = self.getSymbol(ctx).n_strx; + return mem.sliceTo(@ptrCast([*:0]const u8, ctx.in_strtab.?.ptr + off), 0); } - /// Returns whether lhs is less than rhs by allocated address in object file. - /// Undefined symbols are pushed to the back (always evaluate to true). + /// Performs lexicographic-like check. + /// * lhs and rhs defined + /// * if lhs == rhs + /// * if lhs.n_sect == rhs.n_sect + /// * ext < weak < local < temp + /// * lhs.n_sect < rhs.n_sect + /// * lhs < rhs + /// * !rhs is undefined fn lessThan(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { const lhs = lhs_index.getSymbol(ctx); const rhs = rhs_index.getSymbol(ctx); - if (lhs.sect()) { - if (rhs.sect()) { - // Same group, sort by address. - return lhs.n_value < rhs.n_value; - } else { - return true; - } - } else { - return false; - } - } - - /// Returns whether lhs is less senior than rhs. The rules are: - /// 1. ext - /// 2. weak - /// 3. local - /// 4. temp (local starting with `l` prefix). - fn lessThanBySeniority(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { - const lhs = lhs_index.getSymbol(ctx); - const rhs = rhs_index.getSymbol(ctx); - if (!rhs.ext()) { - const lhs_name = lhs_index.getSymbolName(ctx); - return mem.startsWith(u8, lhs_name, "l") or mem.startsWith(u8, lhs_name, "L"); - } else if (rhs.pext() or rhs.weakDef()) { - return !lhs.ext(); - } else { + if (lhs.sect() and rhs.sect()) { + if (lhs.n_value == rhs.n_value) { + if (lhs.n_sect == rhs.n_sect) { + if (lhs.ext() and rhs.ext()) { + if ((lhs.pext() or lhs.weakDef()) and (rhs.pext() or rhs.weakDef())) { + return false; + } else return rhs.pext() or rhs.weakDef(); + } else { + const lhs_name = lhs_index.getSymbolName(ctx); + const lhs_temp = mem.startsWith(u8, lhs_name, "l") or mem.startsWith(u8, lhs_name, "L"); + const rhs_name = rhs_index.getSymbolName(ctx); + const rhs_temp = mem.startsWith(u8, rhs_name, "l") or mem.startsWith(u8, rhs_name, "L"); + if (lhs_temp and rhs_temp) { + return false; + } else return rhs_temp; + } + } else return lhs.n_sect < rhs.n_sect; + } else return lhs.n_value < rhs.n_value; + } else if (lhs.undf() and rhs.undf()) { return false; - } + } else return rhs.undf(); } - /// Like lessThanBySeniority but negated. - fn greaterThanBySeniority(ctx: Context, lhs_index: SymbolAtIndex, rhs_index: SymbolAtIndex) bool { - return !lessThanBySeniority(ctx, lhs_index, rhs_index); + fn lessThanByNStrx(ctx: Context, lhs: SymbolAtIndex, rhs: SymbolAtIndex) bool { + return lhs.getSymbol(ctx).n_strx < rhs.getSymbol(ctx).n_strx; } }; -fn filterSymbolsByAddress( - indexes: []SymbolAtIndex, - start_addr: u64, - end_addr: u64, - ctx: Context, -) []SymbolAtIndex { - const Predicate = struct { - addr: u64, - ctx: Context, +fn filterSymbolsBySection(symbols: []macho.nlist_64, n_sect: u8) struct { + index: u32, + len: u32, +} { + const FirstMatch = struct { + n_sect: u8, - pub fn predicate(pred: @This(), index: SymbolAtIndex) bool { - return index.getSymbol(pred.ctx).n_value >= pred.addr; + pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { + return symbol.n_sect == pred.n_sect; } }; + const FirstNonMatch = struct { + n_sect: u8, - const start = MachO.findFirst(SymbolAtIndex, indexes, 0, Predicate{ - .addr = start_addr, - .ctx = ctx, + pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { + return symbol.n_sect != pred.n_sect; + } + }; + + const index = @import("zld.zig").lsearch(macho.nlist_64, symbols, FirstMatch{ + .n_sect = n_sect, }); - const end = MachO.findFirst(SymbolAtIndex, indexes, start, Predicate{ - .addr = end_addr, - .ctx = ctx, + const len = @import("zld.zig").lsearch(macho.nlist_64, symbols[index..], FirstNonMatch{ + .n_sect = n_sect, }); - return indexes[start..end]; + return .{ .index = @intCast(u32, index), .len = @intCast(u32, len) }; } -fn filterRelocs( - relocs: []align(1) const macho.relocation_info, - start_addr: u64, - end_addr: u64, -) []align(1) const macho.relocation_info { +fn filterSymbolsByAddress(symbols: []macho.nlist_64, n_sect: u8, start_addr: u64, end_addr: u64) struct { + index: u32, + len: u32, +} { const Predicate = struct { addr: u64, + n_sect: u8, - pub fn predicate(self: @This(), rel: macho.relocation_info) bool { - return rel.r_address < self.addr; + pub fn predicate(pred: @This(), symbol: macho.nlist_64) bool { + return symbol.n_value >= pred.addr; } }; - const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); - const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); + const index = @import("zld.zig").lsearch(macho.nlist_64, symbols, Predicate{ + .addr = start_addr, + .n_sect = n_sect, + }); + const len = @import("zld.zig").lsearch(macho.nlist_64, symbols[index..], Predicate{ + .addr = end_addr, + .n_sect = n_sect, + }); - return relocs[start..end]; + return .{ .index = @intCast(u32, index), .len = @intCast(u32, len) }; } -pub fn scanInputSections(self: Object, macho_file: *MachO) !void { - for (self.sections.items) |sect| { - const sect_id = (try macho_file.getOutputSection(sect)) orelse { - log.debug(" unhandled section", .{}); - continue; - }; - const output = macho_file.sections.items(.header)[sect_id]; - log.debug("mapping '{s},{s}' into output sect({d}, '{s},{s}')", .{ - sect.segName(), - sect.sectName(), - sect_id + 1, - output.segName(), - output.sectName(), - }); +const SortedSection = struct { + header: macho.section_64, + id: u8, +}; + +fn sectionLessThanByAddress(ctx: void, lhs: SortedSection, rhs: SortedSection) bool { + _ = ctx; + if (lhs.header.addr == rhs.header.addr) { + return lhs.id < rhs.id; } + return lhs.header.addr < rhs.header.addr; } -/// Splits object into atoms assuming one-shot linking mode. -pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { - assert(macho_file.mode == .one_shot); +/// Splits input sections into Atoms. +/// If the Object was compiled with `MH_SUBSECTIONS_VIA_SYMBOLS`, splits section +/// into subsections where each subsection then represents an Atom. +pub fn splitIntoAtoms(self: *Object, zld: *Zld, object_id: u31) !void { + const gpa = zld.gpa; - const tracy = trace(@src()); - defer tracy.end(); + log.debug("splitting object({d}, {s}) into atoms", .{ object_id, self.name }); - const gpa = macho_file.base.allocator; + const sections = self.getSourceSections(); + for (sections) |sect, id| { + if (sect.isDebug()) continue; + const out_sect_id = (try zld.getOutputSection(sect)) orelse { + log.debug(" unhandled section '{s},{s}'", .{ sect.segName(), sect.sectName() }); + continue; + }; + if (sect.size == 0) continue; - log.debug("splitting object({d}, {s}) into atoms: one-shot mode", .{ object_id, self.name }); + const sect_id = @intCast(u8, id); + const sym = self.getSectionAliasSymbolPtr(sect_id); + sym.* = .{ + .n_strx = 0, + .n_type = macho.N_SECT, + .n_sect = out_sect_id + 1, + .n_desc = 0, + .n_value = sect.addr, + }; + } - const in_symtab = self.in_symtab orelse { - for (self.sections.items) |sect, id| { + if (self.in_symtab == null) { + for (sections) |sect, id| { if (sect.isDebug()) continue; - const out_sect_id = (try macho_file.getOutputSection(sect)) orelse { - log.debug(" unhandled section", .{}); - continue; - }; + const out_sect_id = (try zld.getOutputSection(sect)) orelse continue; if (sect.size == 0) continue; const sect_id = @intCast(u8, id); - const sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const sym_index = @intCast(u32, self.symtab.items.len); - try self.symtab.append(gpa, .{ - .n_strx = 0, - .n_type = macho.N_SECT, - .n_sect = out_sect_id + 1, - .n_desc = 0, - .n_value = sect.addr, - }); - try self.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index); - break :blk sym_index; - }; - const code: ?[]const u8 = if (!sect.isZerofill()) try self.getSectionContents(sect) else null; - const relocs = @ptrCast( - [*]align(1) const macho.relocation_info, - self.contents.ptr + sect.reloff, - )[0..sect.nreloc]; - const atom = try self.createAtomFromSubsection( - macho_file, + const sym_index = self.getSectionAliasSymbolIndex(sect_id); + const atom_index = try self.createAtomFromSubsection( + zld, object_id, sym_index, + 0, + 0, sect.size, sect.@"align", - code, - relocs, - &.{}, out_sect_id, - sect, ); - try macho_file.addAtomToSection(atom); + zld.addAtomToSection(atom_index); } return; - }; - - // You would expect that the symbol table is at least pre-sorted based on symbol's type: - // local < extern defined < undefined. Unfortunately, this is not guaranteed! For instance, - // the GO compiler does not necessarily respect that therefore we sort immediately by type - // and address within. - const context = Context{ - .object = self, - }; - var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, in_symtab.len); - defer sorted_all_syms.deinit(); - - for (in_symtab) |_, index| { - sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) }); } - // We sort by type: defined < undefined, and - // afterwards by address in each group. Normally, dysymtab should - // be enough to guarantee the sort, but turns out not every compiler - // is kind enough to specify the symbols in the correct order. - sort.sort(SymbolAtIndex, sorted_all_syms.items, context, SymbolAtIndex.lessThan); - // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we // have to infer the start of undef section in the symtab ourselves. const iundefsym = blk: { const dysymtab = self.parseDysymtab() orelse { - var iundefsym: usize = sorted_all_syms.items.len; + var iundefsym: usize = self.in_symtab.?.len; while (iundefsym > 0) : (iundefsym -= 1) { - const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context); + const sym = self.symtab[iundefsym - 1]; if (sym.sect()) break; } break :blk iundefsym; @@ -325,271 +348,210 @@ pub fn splitIntoAtoms(self: *Object, macho_file: *MachO, object_id: u32) !void { }; // We only care about defined symbols, so filter every other out. - const sorted_syms = sorted_all_syms.items[0..iundefsym]; + const symtab = try gpa.dupe(macho.nlist_64, self.symtab[0..iundefsym]); + defer gpa.free(symtab); + const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - for (self.sections.items) |sect, id| { + // Sort section headers by address. + var sorted_sections = try gpa.alloc(SortedSection, sections.len); + defer gpa.free(sorted_sections); + + for (sections) |sect, id| { + sorted_sections[id] = .{ .header = sect, .id = @intCast(u8, id) }; + } + + std.sort.sort(SortedSection, sorted_sections, {}, sectionLessThanByAddress); + + var sect_sym_index: u32 = 0; + for (sorted_sections) |section| { + const sect = section.header; if (sect.isDebug()) continue; - const sect_id = @intCast(u8, id); + const sect_id = section.id; log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); - // Get matching segment/section in the final artifact. - const out_sect_id = (try macho_file.getOutputSection(sect)) orelse { - log.debug(" unhandled section", .{}); - continue; - }; + // Get output segment/section in the final artifact. + const out_sect_id = (try zld.getOutputSection(sect)) orelse continue; log.debug(" output sect({d}, '{s},{s}')", .{ out_sect_id + 1, - macho_file.sections.items(.header)[out_sect_id].segName(), - macho_file.sections.items(.header)[out_sect_id].sectName(), + zld.sections.items(.header)[out_sect_id].segName(), + zld.sections.items(.header)[out_sect_id].sectName(), }); - const cpu_arch = macho_file.base.options.target.cpu.arch; - - // Read section's code - const code: ?[]const u8 = if (!sect.isZerofill()) try self.getSectionContents(sect) else null; + const cpu_arch = zld.options.target.cpu.arch; + const sect_loc = filterSymbolsBySection(symtab[sect_sym_index..], sect_id + 1); + const sect_start_index = sect_sym_index + sect_loc.index; - // Read section's list of relocations - const relocs = @ptrCast( - [*]align(1) const macho.relocation_info, - self.contents.ptr + sect.reloff, - )[0..sect.nreloc]; + sect_sym_index += sect_loc.len; - // Symbols within this section only. - const filtered_syms = filterSymbolsByAddress( - sorted_syms, - sect.addr, - sect.addr + sect.size, - context, - ); - - if (subsections_via_symbols and filtered_syms.len > 0) { + if (sect.size == 0) continue; + if (subsections_via_symbols and sect_loc.len > 0) { // If the first nlist does not match the start of the section, // then we need to encapsulate the memory range [section start, first symbol) // as a temporary symbol and insert the matching Atom. - const first_sym = filtered_syms[0].getSymbol(context); + const first_sym = symtab[sect_start_index]; if (first_sym.n_value > sect.addr) { - const sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const sym_index = @intCast(u32, self.symtab.items.len); - try self.symtab.append(gpa, .{ - .n_strx = 0, - .n_type = macho.N_SECT, - .n_sect = out_sect_id + 1, - .n_desc = 0, - .n_value = sect.addr, - }); - try self.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index); - break :blk sym_index; - }; + const sym_index = self.getSectionAliasSymbolIndex(sect_id); const atom_size = first_sym.n_value - sect.addr; - const atom_code: ?[]const u8 = if (code) |cc| blk: { - const size = math.cast(usize, atom_size) orelse return error.Overflow; - break :blk cc[0..size]; - } else null; - const atom = try self.createAtomFromSubsection( - macho_file, + const atom_index = try self.createAtomFromSubsection( + zld, object_id, sym_index, + 0, + 0, atom_size, sect.@"align", - atom_code, - relocs, - &.{}, out_sect_id, - sect, ); - try macho_file.addAtomToSection(atom); + zld.addAtomToSection(atom_index); } - var next_sym_count: usize = 0; - while (next_sym_count < filtered_syms.len) { - const next_sym = filtered_syms[next_sym_count].getSymbol(context); + var next_sym_index = sect_start_index; + while (next_sym_index < sect_start_index + sect_loc.len) { + const next_sym = symtab[next_sym_index]; const addr = next_sym.n_value; - const atom_syms = filterSymbolsByAddress( - filtered_syms[next_sym_count..], + const atom_loc = filterSymbolsByAddress( + symtab[next_sym_index..], + sect_id + 1, addr, addr + 1, - context, - ); - next_sym_count += atom_syms.len; - - // We want to bubble up the first externally defined symbol here. - assert(atom_syms.len > 0); - var sorted_atom_syms = std.ArrayList(SymbolAtIndex).init(gpa); - defer sorted_atom_syms.deinit(); - try sorted_atom_syms.appendSlice(atom_syms); - sort.sort( - SymbolAtIndex, - sorted_atom_syms.items, - context, - SymbolAtIndex.greaterThanBySeniority, ); + assert(atom_loc.len > 0); + const atom_sym_index = atom_loc.index + next_sym_index; + const nsyms_trailing = atom_loc.len - 1; + next_sym_index += atom_loc.len; + + // TODO: We want to bubble up the first externally defined symbol here. + const atom_size = if (next_sym_index < sect_start_index + sect_loc.len) + symtab[next_sym_index].n_value - addr + else + sect.addr + sect.size - addr; - const atom_size = blk: { - const end_addr = if (next_sym_count < filtered_syms.len) - filtered_syms[next_sym_count].getSymbol(context).n_value - else - sect.addr + sect.size; - break :blk end_addr - addr; - }; - const atom_code: ?[]const u8 = if (code) |cc| blk: { - const start = math.cast(usize, addr - sect.addr) orelse return error.Overflow; - const size = math.cast(usize, atom_size) orelse return error.Overflow; - break :blk cc[start..][0..size]; - } else null; const atom_align = if (addr > 0) math.min(@ctz(addr), sect.@"align") else sect.@"align"; - const atom = try self.createAtomFromSubsection( - macho_file, + + const atom_index = try self.createAtomFromSubsection( + zld, object_id, - sorted_atom_syms.items[0].index, + atom_sym_index, + atom_sym_index + 1, + nsyms_trailing, atom_size, atom_align, - atom_code, - relocs, - sorted_atom_syms.items[1..], out_sect_id, - sect, ); + // TODO rework this at the relocation level if (cpu_arch == .x86_64 and addr == sect.addr) { // In x86_64 relocs, it can so happen that the compiler refers to the same // atom by both the actual assigned symbol and the start of the section. In this // case, we need to link the two together so add an alias. - const alias = self.sections_as_symbols.get(sect_id) orelse blk: { - const alias = @intCast(u32, self.symtab.items.len); - try self.symtab.append(gpa, .{ - .n_strx = 0, - .n_type = macho.N_SECT, - .n_sect = out_sect_id + 1, - .n_desc = 0, - .n_value = addr, - }); - try self.sections_as_symbols.putNoClobber(gpa, sect_id, alias); - break :blk alias; - }; - try atom.contained.append(gpa, .{ - .sym_index = alias, - .offset = 0, - }); - try self.atom_by_index_table.put(gpa, alias, atom); + const alias_index = self.getSectionAliasSymbolIndex(sect_id); + self.atom_by_index_table[alias_index] = atom_index; } - try macho_file.addAtomToSection(atom); + zld.addAtomToSection(atom_index); } } else { - // If there is no symbol to refer to this atom, we create - // a temp one, unless we already did that when working out the relocations - // of other atoms. - const sym_index = self.sections_as_symbols.get(sect_id) orelse blk: { - const sym_index = @intCast(u32, self.symtab.items.len); - try self.symtab.append(gpa, .{ - .n_strx = 0, - .n_type = macho.N_SECT, - .n_sect = out_sect_id + 1, - .n_desc = 0, - .n_value = sect.addr, - }); - try self.sections_as_symbols.putNoClobber(gpa, sect_id, sym_index); - break :blk sym_index; - }; - const atom = try self.createAtomFromSubsection( - macho_file, + const alias_index = self.getSectionAliasSymbolIndex(sect_id); + const atom_index = try self.createAtomFromSubsection( + zld, object_id, - sym_index, + alias_index, + sect_start_index, + sect_loc.len, sect.size, sect.@"align", - code, - relocs, - filtered_syms, out_sect_id, - sect, ); - try macho_file.addAtomToSection(atom); + zld.addAtomToSection(atom_index); } } } fn createAtomFromSubsection( self: *Object, - macho_file: *MachO, - object_id: u32, + zld: *Zld, + object_id: u31, sym_index: u32, + inner_sym_index: u32, + inner_nsyms_trailing: u32, size: u64, alignment: u32, - code: ?[]const u8, - relocs: []align(1) const macho.relocation_info, - indexes: []const SymbolAtIndex, out_sect_id: u8, - sect: macho.section_64, -) !*Atom { - const gpa = macho_file.base.allocator; - const sym = self.symtab.items[sym_index]; - const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); +) !AtomIndex { + const gpa = zld.gpa; + const atom_index = try zld.createEmptyAtom(sym_index, size, alignment); + const atom = zld.getAtomPtr(atom_index); + atom.inner_sym_index = inner_sym_index; + atom.inner_nsyms_trailing = inner_nsyms_trailing; atom.file = object_id; - self.symtab.items[sym_index].n_sect = out_sect_id + 1; + self.symtab[sym_index].n_sect = out_sect_id + 1; log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ sym_index, - self.getString(sym.n_strx), + self.getSymbolName(sym_index), out_sect_id + 1, - macho_file.sections.items(.header)[out_sect_id].segName(), - macho_file.sections.items(.header)[out_sect_id].sectName(), + zld.sections.items(.header)[out_sect_id].segName(), + zld.sections.items(.header)[out_sect_id].sectName(), object_id, }); - try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.managed_atoms.append(gpa, atom); + try self.atoms.append(gpa, atom_index); + self.atom_by_index_table[sym_index] = atom_index; - if (code) |cc| { - assert(size == cc.len); - mem.copy(u8, atom.code.items, cc); + var it = Atom.getInnerSymbolsIterator(zld, atom_index); + while (it.next()) |sym_loc| { + const inner = zld.getSymbolPtr(sym_loc); + inner.n_sect = out_sect_id + 1; + self.atom_by_index_table[sym_loc.sym_index] = atom_index; } - const base_offset = sym.n_value - sect.addr; - const filtered_relocs = filterRelocs(relocs, base_offset, base_offset + size); - try atom.parseRelocs(filtered_relocs, .{ - .macho_file = macho_file, - .base_addr = sect.addr, - .base_offset = @intCast(i32, base_offset), - }); - - // Since this is atom gets a helper local temporary symbol that didn't exist - // in the object file which encompasses the entire section, we need traverse - // the filtered symbols and note which symbol is contained within so that - // we can properly allocate addresses down the line. - // While we're at it, we need to update segment,section mapping of each symbol too. - try atom.contained.ensureTotalCapacity(gpa, indexes.len); - for (indexes) |inner_sym_index| { - const inner_sym = &self.symtab.items[inner_sym_index.index]; - inner_sym.n_sect = out_sect_id + 1; - atom.contained.appendAssumeCapacity(.{ - .sym_index = inner_sym_index.index, - .offset = inner_sym.n_value - sym.n_value, - }); - - try self.atom_by_index_table.putNoClobber(gpa, inner_sym_index.index, atom); - } - - return atom; + return atom_index; } pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { const symtab = self.in_symtab.?; if (index >= symtab.len) return null; - return symtab[index]; + const mapped_index = self.source_symtab_lookup[index]; + return symtab[mapped_index]; +} + +/// Expects an arena allocator. +/// Caller owns memory. +pub fn createReverseSymbolLookup(self: Object, arena: Allocator) ![]u32 { + const symtab = self.in_symtab orelse return &[0]u32{}; + const lookup = try arena.alloc(u32, symtab.len); + for (self.source_symtab_lookup) |source_id, id| { + lookup[source_id] = @intCast(u32, id); + } + return lookup; } pub fn getSourceSection(self: Object, index: u16) macho.section_64 { - assert(index < self.sections.items.len); - return self.sections.items[index]; + const sections = self.getSourceSections(); + assert(index < sections.len); + return sections[index]; +} + +pub fn getSourceSections(self: Object) []const macho.section_64 { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| switch (cmd.cmd()) { + .SEGMENT_64 => { + return cmd.getSections(); + }, + else => {}, + } else unreachable; } -pub fn parseDataInCode(self: Object) ?[]align(1) const macho.data_in_code_entry { +pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { var it = LoadCommandIterator{ .ncmds = self.header.ncmds, .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], @@ -600,8 +562,8 @@ pub fn parseDataInCode(self: Object) ?[]align(1) const macho.data_in_code_entry const dice = cmd.cast(macho.linkedit_data_command).?; const ndice = @divExact(dice.datasize, @sizeOf(macho.data_in_code_entry)); return @ptrCast( - [*]align(1) const macho.data_in_code_entry, - self.contents.ptr + dice.dataoff, + [*]const macho.data_in_code_entry, + @alignCast(@alignOf(macho.data_in_code_entry), &self.contents[dice.dataoff]), )[0..ndice]; }, else => {}, @@ -624,73 +586,66 @@ fn parseDysymtab(self: Object) ?macho.dysymtab_command { } else return null; } -pub fn parseDwarfInfo(self: Object) error{Overflow}!dwarf.DwarfInfo { - var di = dwarf.DwarfInfo{ - .endian = .Little, +pub fn parseDwarfInfo(self: Object) DwarfInfo { + var di = DwarfInfo{ .debug_info = &[0]u8{}, .debug_abbrev = &[0]u8{}, .debug_str = &[0]u8{}, - .debug_str_offsets = &[0]u8{}, - .debug_line = &[0]u8{}, - .debug_line_str = &[0]u8{}, - .debug_ranges = &[0]u8{}, - .debug_loclists = &[0]u8{}, - .debug_rnglists = &[0]u8{}, - .debug_addr = &[0]u8{}, - .debug_names = &[0]u8{}, - .debug_frame = &[0]u8{}, }; - for (self.sections.items) |sect| { - const segname = sect.segName(); + for (self.getSourceSections()) |sect| { + if (!sect.isDebug()) continue; const sectname = sect.sectName(); - if (mem.eql(u8, segname, "__DWARF")) { - if (mem.eql(u8, sectname, "__debug_info")) { - di.debug_info = try self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_abbrev")) { - di.debug_abbrev = try self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_str")) { - di.debug_str = try self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_str_offsets")) { - di.debug_str_offsets = try self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_line")) { - di.debug_line = try self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_line_str")) { - di.debug_line_str = try self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_ranges")) { - di.debug_ranges = try self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_loclists")) { - di.debug_loclists = try self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_rnglists")) { - di.debug_rnglists = try self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_addr")) { - di.debug_addr = try self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_names")) { - di.debug_names = try self.getSectionContents(sect); - } else if (mem.eql(u8, sectname, "__debug_frame")) { - di.debug_frame = try self.getSectionContents(sect); - } + if (mem.eql(u8, sectname, "__debug_info")) { + di.debug_info = self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_abbrev")) { + di.debug_abbrev = self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_str")) { + di.debug_str = self.getSectionContents(sect); } } return di; } -pub fn getSectionContents(self: Object, sect: macho.section_64) error{Overflow}![]const u8 { - const size = math.cast(usize, sect.size) orelse return error.Overflow; - log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{ - sect.segName(), - sect.sectName(), - sect.offset, - sect.offset + sect.size, - }); +pub fn getSectionContents(self: Object, sect: macho.section_64) []const u8 { + const size = @intCast(usize, sect.size); return self.contents[sect.offset..][0..size]; } -pub fn getString(self: Object, off: u32) []const u8 { +pub fn getSectionAliasSymbolIndex(self: Object, sect_id: u8) u32 { + const start = @intCast(u32, self.in_symtab.?.len); + return start + sect_id; +} + +pub fn getSectionAliasSymbol(self: *Object, sect_id: u8) macho.nlist_64 { + return self.symtab[self.getSectionAliasSymbolIndex(sect_id)]; +} + +pub fn getSectionAliasSymbolPtr(self: *Object, sect_id: u8) *macho.nlist_64 { + return &self.symtab[self.getSectionAliasSymbolIndex(sect_id)]; +} + +pub fn getRelocs(self: Object, sect: macho.section_64) []align(1) const macho.relocation_info { + if (sect.nreloc == 0) return &[0]macho.relocation_info{}; + return @ptrCast([*]align(1) const macho.relocation_info, self.contents.ptr + sect.reloff)[0..sect.nreloc]; +} + +pub fn getSymbolName(self: Object, index: u32) []const u8 { const strtab = self.in_strtab.?; - assert(off < strtab.len); - return mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + off), 0); + const sym = self.symtab[index]; + + if (self.getSourceSymbol(index) == null) { + assert(sym.n_strx == 0); + return ""; + } + + const start = sym.n_strx; + const len = self.strtab_lookup[index]; + + return strtab[start..][0 .. len - 1 :0]; } -pub fn getAtomForSymbol(self: Object, sym_index: u32) ?*Atom { - return self.atom_by_index_table.get(sym_index); +pub fn getAtomIndexForSymbol(self: Object, sym_index: u32) ?AtomIndex { + const atom_index = self.atom_by_index_table[sym_index]; + if (atom_index == 0) return null; + return atom_index; } diff --git a/src/link/MachO/Relocation.zig b/src/link/MachO/Relocation.zig index 1acbb30a24..338bcb66cb 100644 --- a/src/link/MachO/Relocation.zig +++ b/src/link/MachO/Relocation.zig @@ -47,7 +47,6 @@ pub fn getTargetAtom(self: Relocation, macho_file: *MachO) ?*Atom { else => unreachable, } if (macho_file.getStubsAtomForSymbol(self.target)) |stubs_atom| return stubs_atom; - if (macho_file.getTlvPtrAtomForSymbol(self.target)) |tlv_ptr_atom| return tlv_ptr_atom; return macho_file.getAtomForSymbol(self.target); } diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index d85aabe63d..8660781fe9 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -108,7 +108,7 @@ pub const Node = struct { .label = to_label, }); - return if (match == label.len) to_node else mid.put(allocator, label[match..]); + return if (match == label.len) mid else mid.put(allocator, label[match..]); } // Add a new node. @@ -489,6 +489,21 @@ test "Trie basic" { } } +fn expectEqualHexStrings(expected: []const u8, given: []const u8) !void { + assert(expected.len > 0); + if (mem.eql(u8, expected, given)) return; + const expected_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(expected)}); + defer testing.allocator.free(expected_fmt); + const given_fmt = try std.fmt.allocPrint(testing.allocator, "{x}", .{std.fmt.fmtSliceHexLower(given)}); + defer testing.allocator.free(given_fmt); + const idx = mem.indexOfDiff(u8, expected_fmt, given_fmt).?; + var padding = try testing.allocator.alloc(u8, idx + 5); + defer testing.allocator.free(padding); + mem.set(u8, padding, ' '); + std.debug.print("\nEXP: {s}\nGIV: {s}\n{s}^ -- first differing byte\n", .{ expected_fmt, given_fmt, padding }); + return error.TestFailed; +} + test "write Trie to a byte stream" { var gpa = testing.allocator; var trie: Trie = .{}; @@ -523,16 +538,14 @@ test "write Trie to a byte stream" { defer gpa.free(buffer); var stream = std.io.fixedBufferStream(buffer); { - const nwritten = try trie.write(stream.writer()); - try testing.expect(nwritten == trie.size); - try testing.expect(mem.eql(u8, buffer, &exp_buffer)); + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); } { // Writing finalized trie again should yield the same result. try stream.seekTo(0); - const nwritten = try trie.write(stream.writer()); - try testing.expect(nwritten == trie.size); - try testing.expect(mem.eql(u8, buffer, &exp_buffer)); + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); } } @@ -562,8 +575,37 @@ test "parse Trie from byte stream" { var out_buffer = try gpa.alloc(u8, trie.size); defer gpa.free(out_buffer); var out_stream = std.io.fixedBufferStream(out_buffer); - const nwritten = try trie.write(out_stream.writer()); + _ = try trie.write(out_stream.writer()); + try expectEqualHexStrings(&in_buffer, out_buffer); +} - try testing.expect(nwritten == trie.size); - try testing.expect(mem.eql(u8, &in_buffer, out_buffer)); +test "ordering bug" { + var gpa = testing.allocator; + var trie: Trie = .{}; + defer trie.deinit(gpa); + + try trie.put(gpa, .{ + .name = "_asStr", + .vmaddr_offset = 0x558, + .export_flags = 0, + }); + try trie.put(gpa, .{ + .name = "_a", + .vmaddr_offset = 0x8008, + .export_flags = 0, + }); + try trie.finalize(gpa); + + const exp_buffer = [_]u8{ + 0x00, 0x01, 0x5F, 0x61, 0x00, 0x06, 0x04, 0x00, + 0x88, 0x80, 0x02, 0x01, 0x73, 0x53, 0x74, 0x72, + 0x00, 0x12, 0x03, 0x00, 0xD8, 0x0A, 0x00, + }; + + var buffer = try gpa.alloc(u8, trie.size); + defer gpa.free(buffer); + var stream = std.io.fixedBufferStream(buffer); + // Writing finalized trie again should yield the same result. + _ = try trie.write(stream.writer()); + try expectEqualHexStrings(&exp_buffer, buffer); } diff --git a/src/link/MachO/ZldAtom.zig b/src/link/MachO/ZldAtom.zig new file mode 100644 index 0000000000..2bfe730ad7 --- /dev/null +++ b/src/link/MachO/ZldAtom.zig @@ -0,0 +1,1057 @@ +//! An atom is a single smallest unit of measure that will get an +//! allocated virtual memory address in the final linked image. +//! For example, we parse each input section within an input relocatable +//! object file into a set of atoms which are then laid out contiguously +//! as they were defined in the input file. + +const Atom = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const aarch64 = @import("../../arch/aarch64/bits.zig"); +const assert = std.debug.assert; +const log = std.log.scoped(.atom); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; + +const Allocator = mem.Allocator; +const Arch = std.Target.Cpu.Arch; +const AtomIndex = @import("zld.zig").AtomIndex; +const Object = @import("Object.zig"); +const SymbolWithLoc = @import("zld.zig").SymbolWithLoc; +const Zld = @import("zld.zig").Zld; + +/// Each Atom always gets a symbol with the fully qualified name. +/// The symbol can reside in any object file context structure in `symtab` array +/// (see `Object`), or if the symbol is a synthetic symbol such as a GOT cell or +/// a stub trampoline, it can be found in the linkers `locals` arraylist. +sym_index: u32, + +/// -1 means an Atom is a synthetic Atom such as a GOT cell defined by the linker. +/// Otherwise, it is the index into appropriate object file. +/// Prefer using `getFile()` helper to get the file index out rather than using +/// the field directly. +file: i32, + +/// If this Atom is not a synthetic Atom, i.e., references a subsection in an +/// Object file, `inner_sym_index` and `inner_nsyms_trailing` tell where and if +/// this Atom contains any additional symbol references that fall within this Atom's +/// address range. These could for example be an alias symbol which can be used +/// internally by the relocation records, or if the Object file couldn't be split +/// into subsections, this Atom may encompass an entire input section. +inner_sym_index: u32, +inner_nsyms_trailing: u32, + +/// Size of this atom. +size: u64, + +/// Alignment of this atom as a power of 2. +/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned. +alignment: u32, + +/// Cached index and length into the relocations records array that correspond to +/// this Atom and need to be resolved before the Atom can be committed into the +/// final linked image. +/// Do not use these fields directly. Instead, use `getAtomRelocs()` helper. +cached_relocs_start: i32, +cached_relocs_len: u32, + +/// Points to the previous and next neighbours +next_index: ?AtomIndex, +prev_index: ?AtomIndex, + +pub const empty = Atom{ + .sym_index = 0, + .inner_sym_index = 0, + .inner_nsyms_trailing = 0, + .file = -1, + .size = 0, + .alignment = 0, + .cached_relocs_start = -1, + .cached_relocs_len = 0, + .prev_index = null, + .next_index = null, +}; + +/// Returns `null` if the Atom is a synthetic Atom. +/// Otherwise, returns an index into an array of Objects. +pub inline fn getFile(self: Atom) ?u31 { + if (self.file == -1) return null; + return @intCast(u31, self.file); +} + +pub inline fn getSymbolWithLoc(self: Atom) SymbolWithLoc { + return .{ + .sym_index = self.sym_index, + .file = self.file, + }; +} + +const InnerSymIterator = struct { + sym_index: u32, + count: u32, + file: i32, + + pub fn next(it: *@This()) ?SymbolWithLoc { + if (it.count == 0) return null; + const res = SymbolWithLoc{ .sym_index = it.sym_index, .file = it.file }; + it.sym_index += 1; + it.count -= 1; + return res; + } +}; + +/// Returns an iterator over potentially contained symbols. +/// Panics when called on a synthetic Atom. +pub fn getInnerSymbolsIterator(zld: *Zld, atom_index: AtomIndex) InnerSymIterator { + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); + return .{ + .sym_index = atom.inner_sym_index, + .count = atom.inner_nsyms_trailing, + .file = atom.file, + }; +} + +/// Returns a section alias symbol if one is defined. +/// An alias symbol is used to represent the start of an input section +/// if there were no symbols defined within that range. +/// Alias symbols are only used on x86_64. +pub fn getSectionAlias(zld: *Zld, atom_index: AtomIndex) ?SymbolWithLoc { + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); + + const object = zld.objects.items[atom.getFile().?]; + const nbase = @intCast(u32, object.in_symtab.?.len); + const ntotal = @intCast(u32, object.symtab.len); + var sym_index: u32 = nbase; + while (sym_index < ntotal) : (sym_index += 1) { + if (object.getAtomIndexForSymbol(sym_index)) |other_atom_index| { + if (other_atom_index == atom_index) return SymbolWithLoc{ + .sym_index = sym_index, + .file = atom.file, + }; + } + } + return null; +} + +/// Given an index into a contained symbol within, calculates an offset wrt +/// the start of this Atom. +pub fn calcInnerSymbolOffset(zld: *Zld, atom_index: AtomIndex, sym_index: u32) u64 { + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); + + if (atom.sym_index == sym_index) return 0; + + const object = zld.objects.items[atom.getFile().?]; + const source_sym = object.getSourceSymbol(sym_index).?; + const base_addr = if (object.getSourceSymbol(atom.sym_index)) |sym| + sym.n_value + else blk: { + const nbase = @intCast(u32, object.in_symtab.?.len); + const sect_id = @intCast(u16, atom.sym_index - nbase); + const source_sect = object.getSourceSection(sect_id); + break :blk source_sect.addr; + }; + return source_sym.n_value - base_addr; +} + +pub fn scanAtomRelocs( + zld: *Zld, + atom_index: AtomIndex, + relocs: []align(1) const macho.relocation_info, + reverse_lookup: []u32, +) !void { + const arch = zld.options.target.cpu.arch; + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); // synthetic atoms do not have relocs + + return switch (arch) { + .aarch64 => scanAtomRelocsArm64(zld, atom_index, relocs, reverse_lookup), + .x86_64 => scanAtomRelocsX86(zld, atom_index, relocs, reverse_lookup), + else => unreachable, + }; +} + +const RelocContext = struct { + base_addr: u64 = 0, + base_offset: i32 = 0, +}; + +pub fn parseRelocTarget( + zld: *Zld, + atom_index: AtomIndex, + rel: macho.relocation_info, + reverse_lookup: []u32, +) SymbolWithLoc { + const atom = zld.getAtom(atom_index); + const object = &zld.objects.items[atom.getFile().?]; + + if (rel.r_extern == 0) { + const sect_id = @intCast(u8, rel.r_symbolnum - 1); + const sym_index = object.getSectionAliasSymbolIndex(sect_id); + return SymbolWithLoc{ .sym_index = sym_index, .file = atom.file }; + } + + const sym_index = reverse_lookup[rel.r_symbolnum]; + const sym_loc = SymbolWithLoc{ + .sym_index = sym_index, + .file = atom.file, + }; + const sym = zld.getSymbol(sym_loc); + + if (sym.sect() and !sym.ext()) { + return sym_loc; + } else if (object.globals_lookup[sym_index] > -1) { + const global_index = @intCast(u32, object.globals_lookup[sym_index]); + return zld.globals.items[global_index]; + } else return sym_loc; +} + +pub fn getRelocTargetAtomIndex(zld: *Zld, rel: macho.relocation_info, target: SymbolWithLoc) ?AtomIndex { + const is_via_got = got: { + switch (zld.options.target.cpu.arch) { + .aarch64 => break :got switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_POINTER_TO_GOT, + => true, + else => false, + }, + .x86_64 => break :got switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { + .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => true, + else => false, + }, + else => unreachable, + } + }; + + if (is_via_got) { + return zld.getGotAtomIndexForSymbol(target).?; // panic means fatal error + } + if (zld.getStubsAtomIndexForSymbol(target)) |stubs_atom| return stubs_atom; + if (zld.getTlvPtrAtomIndexForSymbol(target)) |tlv_ptr_atom| return tlv_ptr_atom; + + if (target.getFile() == null) { + const target_sym_name = zld.getSymbolName(target); + if (mem.eql(u8, "__mh_execute_header", target_sym_name)) return null; + if (mem.eql(u8, "___dso_handle", target_sym_name)) return null; + + unreachable; // referenced symbol not found + } + + const object = zld.objects.items[target.getFile().?]; + return object.getAtomIndexForSymbol(target.sym_index); +} + +fn scanAtomRelocsArm64( + zld: *Zld, + atom_index: AtomIndex, + relocs: []align(1) const macho.relocation_info, + reverse_lookup: []u32, +) !void { + for (relocs) |rel| { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + + switch (rel_type) { + .ARM64_RELOC_ADDEND, .ARM64_RELOC_SUBTRACTOR => continue, + else => {}, + } + + if (rel.r_extern == 0) continue; + + const atom = zld.getAtom(atom_index); + const object = &zld.objects.items[atom.getFile().?]; + const sym_index = reverse_lookup[rel.r_symbolnum]; + const sym_loc = SymbolWithLoc{ + .sym_index = sym_index, + .file = atom.file, + }; + const sym = zld.getSymbol(sym_loc); + + if (sym.sect() and !sym.ext()) continue; + + const target = if (object.globals_lookup[sym_index] > -1) blk: { + const global_index = @intCast(u32, object.globals_lookup[sym_index]); + break :blk zld.globals.items[global_index]; + } else sym_loc; + + switch (rel_type) { + .ARM64_RELOC_BRANCH26 => { + // TODO rewrite relocation + try addStub(zld, target); + }, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_POINTER_TO_GOT, + => { + // TODO rewrite relocation + try addGotEntry(zld, target); + }, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => { + try addTlvPtrEntry(zld, target); + }, + else => {}, + } + } +} + +fn scanAtomRelocsX86( + zld: *Zld, + atom_index: AtomIndex, + relocs: []align(1) const macho.relocation_info, + reverse_lookup: []u32, +) !void { + for (relocs) |rel| { + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + + switch (rel_type) { + .X86_64_RELOC_SUBTRACTOR => continue, + else => {}, + } + + if (rel.r_extern == 0) continue; + + const atom = zld.getAtom(atom_index); + const object = &zld.objects.items[atom.getFile().?]; + const sym_index = reverse_lookup[rel.r_symbolnum]; + const sym_loc = SymbolWithLoc{ + .sym_index = sym_index, + .file = atom.file, + }; + const sym = zld.getSymbol(sym_loc); + + if (sym.sect() and !sym.ext()) continue; + + const target = if (object.globals_lookup[sym_index] > -1) blk: { + const global_index = @intCast(u32, object.globals_lookup[sym_index]); + break :blk zld.globals.items[global_index]; + } else sym_loc; + + switch (rel_type) { + .X86_64_RELOC_BRANCH => { + // TODO rewrite relocation + try addStub(zld, target); + }, + .X86_64_RELOC_GOT, .X86_64_RELOC_GOT_LOAD => { + // TODO rewrite relocation + try addGotEntry(zld, target); + }, + .X86_64_RELOC_TLV => { + try addTlvPtrEntry(zld, target); + }, + else => {}, + } + } +} + +fn addTlvPtrEntry(zld: *Zld, target: SymbolWithLoc) !void { + const target_sym = zld.getSymbol(target); + if (!target_sym.undf()) return; + if (zld.tlv_ptr_table.contains(target)) return; + + const gpa = zld.gpa; + const atom_index = try zld.createTlvPtrAtom(); + const tlv_ptr_index = @intCast(u32, zld.tlv_ptr_entries.items.len); + try zld.tlv_ptr_entries.append(gpa, .{ + .target = target, + .atom_index = atom_index, + }); + try zld.tlv_ptr_table.putNoClobber(gpa, target, tlv_ptr_index); +} + +fn addGotEntry(zld: *Zld, target: SymbolWithLoc) !void { + if (zld.got_table.contains(target)) return; + const gpa = zld.gpa; + const atom_index = try zld.createGotAtom(); + const got_index = @intCast(u32, zld.got_entries.items.len); + try zld.got_entries.append(gpa, .{ + .target = target, + .atom_index = atom_index, + }); + try zld.got_table.putNoClobber(gpa, target, got_index); +} + +fn addStub(zld: *Zld, target: SymbolWithLoc) !void { + const target_sym = zld.getSymbol(target); + if (!target_sym.undf()) return; + if (zld.stubs_table.contains(target)) return; + + const gpa = zld.gpa; + _ = try zld.createStubHelperAtom(); + _ = try zld.createLazyPointerAtom(); + const atom_index = try zld.createStubAtom(); + const stubs_index = @intCast(u32, zld.stubs.items.len); + try zld.stubs.append(gpa, .{ + .target = target, + .atom_index = atom_index, + }); + try zld.stubs_table.putNoClobber(gpa, target, stubs_index); +} + +pub fn resolveRelocs( + zld: *Zld, + atom_index: AtomIndex, + atom_code: []u8, + atom_relocs: []align(1) const macho.relocation_info, + reverse_lookup: []u32, +) !void { + const arch = zld.options.target.cpu.arch; + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); // synthetic atoms do not have relocs + + const object = zld.objects.items[atom.getFile().?]; + const ctx: RelocContext = blk: { + if (object.getSourceSymbol(atom.sym_index)) |source_sym| { + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + break :blk .{ + .base_addr = source_sect.addr, + .base_offset = @intCast(i32, source_sym.n_value - source_sect.addr), + }; + } + const nbase = @intCast(u32, object.in_symtab.?.len); + const sect_id = @intCast(u16, atom.sym_index - nbase); + const source_sect = object.getSourceSection(sect_id); + break :blk .{ + .base_addr = source_sect.addr, + .base_offset = 0, + }; + }; + + log.debug("resolving relocations in ATOM(%{d}, '{s}')", .{ + atom.sym_index, + zld.getSymbolName(atom.getSymbolWithLoc()), + }); + + return switch (arch) { + .aarch64 => resolveRelocsArm64(zld, atom_index, atom_code, atom_relocs, reverse_lookup, ctx), + .x86_64 => resolveRelocsX86(zld, atom_index, atom_code, atom_relocs, reverse_lookup, ctx), + else => unreachable, + }; +} + +pub fn getRelocTargetAddress(zld: *Zld, rel: macho.relocation_info, target: SymbolWithLoc, is_tlv: bool) !u64 { + const target_atom_index = getRelocTargetAtomIndex(zld, rel, target) orelse { + // If there is no atom for target, we still need to check for special, atom-less + // symbols such as `___dso_handle`. + const target_name = zld.getSymbolName(target); + const atomless_sym = zld.getSymbol(target); + log.debug(" | atomless target '{s}'", .{target_name}); + return atomless_sym.n_value; + }; + const target_atom = zld.getAtom(target_atom_index); + log.debug(" | target ATOM(%{d}, '{s}') in object({?})", .{ + target_atom.sym_index, + zld.getSymbolName(target_atom.getSymbolWithLoc()), + target_atom.file, + }); + + const target_sym = zld.getSymbol(target_atom.getSymbolWithLoc()); + assert(target_sym.n_desc != @import("zld.zig").N_DEAD); + + // If `target` is contained within the target atom, pull its address value. + const offset = if (target_atom.getFile() != null) blk: { + const object = zld.objects.items[target_atom.getFile().?]; + break :blk if (object.getSourceSymbol(target.sym_index)) |_| + Atom.calcInnerSymbolOffset(zld, target_atom_index, target.sym_index) + else + 0; // section alias + } else 0; + const base_address: u64 = if (is_tlv) base_address: { + // For TLV relocations, the value specified as a relocation is the displacement from the + // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first + // defined TLV template init section in the following order: + // * wrt to __thread_data if defined, then + // * wrt to __thread_bss + const sect_id: u16 = sect_id: { + if (zld.getSectionByName("__DATA", "__thread_data")) |i| { + break :sect_id i; + } else if (zld.getSectionByName("__DATA", "__thread_bss")) |i| { + break :sect_id i; + } else { + log.err("threadlocal variables present but no initializer sections found", .{}); + log.err(" __thread_data not found", .{}); + log.err(" __thread_bss not found", .{}); + return error.FailedToResolveRelocationTarget; + } + }; + break :base_address zld.sections.items(.header)[sect_id].addr; + } else 0; + return target_sym.n_value + offset - base_address; +} + +fn resolveRelocsArm64( + zld: *Zld, + atom_index: AtomIndex, + atom_code: []u8, + atom_relocs: []align(1) const macho.relocation_info, + reverse_lookup: []u32, + context: RelocContext, +) !void { + const atom = zld.getAtom(atom_index); + const object = zld.objects.items[atom.getFile().?]; + + var addend: ?i64 = null; + var subtractor: ?SymbolWithLoc = null; + + for (atom_relocs) |rel| { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + + switch (rel_type) { + .ARM64_RELOC_ADDEND => { + assert(addend == null); + + log.debug(" RELA({s}) @ {x} => {x}", .{ @tagName(rel_type), rel.r_address, rel.r_symbolnum }); + + addend = rel.r_symbolnum; + continue; + }, + .ARM64_RELOC_SUBTRACTOR => { + assert(subtractor == null); + + log.debug(" RELA({s}) @ {x} => %{d} in object({d})", .{ + @tagName(rel_type), + rel.r_address, + rel.r_symbolnum, + atom.file, + }); + + subtractor = parseRelocTarget(zld, atom_index, rel, reverse_lookup); + continue; + }, + else => {}, + } + + const target = parseRelocTarget(zld, atom_index, rel, reverse_lookup); + const rel_offset = @intCast(u32, rel.r_address - context.base_offset); + + log.debug(" RELA({s}) @ {x} => %{d} ('{s}') in object({?})", .{ + @tagName(rel_type), + rel.r_address, + target.sym_index, + zld.getSymbolName(target), + target.file, + }); + + const source_addr = blk: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + break :blk source_sym.n_value + rel_offset; + }; + const is_tlv = is_tlv: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + const header = zld.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; + }; + const target_addr = try getRelocTargetAddress(zld, rel, target, is_tlv); + + log.debug(" | source_addr = 0x{x}", .{source_addr}); + + switch (rel_type) { + .ARM64_RELOC_BRANCH26 => { + const actual_target = if (zld.getStubsAtomIndexForSymbol(target)) |stub_atom_index| inner: { + const stub_atom = zld.getAtom(stub_atom_index); + break :inner stub_atom.getSymbolWithLoc(); + } else target; + log.debug(" source {s} (object({?})), target {s} (object({?}))", .{ + zld.getSymbolName(atom.getSymbolWithLoc()), + atom.file, + zld.getSymbolName(target), + zld.getAtom(getRelocTargetAtomIndex(zld, rel, target).?).file, + }); + + const displacement = if (calcPcRelativeDisplacementArm64( + source_addr, + zld.getSymbol(actual_target).n_value, + )) |disp| blk: { + log.debug(" | target_addr = 0x{x}", .{zld.getSymbol(actual_target).n_value}); + break :blk disp; + } else |_| blk: { + const thunk_index = zld.thunk_table.get(atom_index).?; + const thunk = zld.thunks.items[thunk_index]; + const thunk_sym = zld.getSymbol(thunk.getTrampolineForSymbol( + zld, + actual_target, + ).?); + log.debug(" | target_addr = 0x{x}", .{thunk_sym.n_value}); + break :blk try calcPcRelativeDisplacementArm64(source_addr, thunk_sym.n_value); + }; + + const code = atom_code[rel_offset..][0..4]; + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code), + }; + inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); + mem.writeIntLittle(u32, code, inst.toU32()); + }, + + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + => { + const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + (addend orelse 0)); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const pages = @bitCast(u21, calcNumberOfPages(source_addr, adjusted_target_addr)); + const code = atom_code[rel_offset..][0..4]; + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code), + }; + inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); + inst.pc_relative_address.immlo = @truncate(u2, pages); + mem.writeIntLittle(u32, code, inst.toU32()); + addend = null; + }, + + .ARM64_RELOC_PAGEOFF12 => { + const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + (addend orelse 0)); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const code = atom_code[rel_offset..][0..4]; + if (isArithmeticOp(code)) { + const off = try calcPageOffset(adjusted_target_addr, .arithmetic); + var inst = aarch64.Instruction{ + .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code), + }; + inst.add_subtract_immediate.imm12 = off; + mem.writeIntLittle(u32, code, inst.toU32()); + } else { + var inst = aarch64.Instruction{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + const off = try calcPageOffset(adjusted_target_addr, switch (inst.load_store_register.size) { + 0 => if (inst.load_store_register.v == 1) + PageOffsetInstKind.load_store_128 + else + PageOffsetInstKind.load_store_8, + 1 => .load_store_16, + 2 => .load_store_32, + 3 => .load_store_64, + }); + inst.load_store_register.offset = off; + mem.writeIntLittle(u32, code, inst.toU32()); + } + addend = null; + }, + + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => { + const code = atom_code[rel_offset..][0..4]; + const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + (addend orelse 0)); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const off = try calcPageOffset(adjusted_target_addr, .load_store_64); + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + inst.load_store_register.offset = off; + mem.writeIntLittle(u32, code, inst.toU32()); + addend = null; + }, + + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => { + const code = atom_code[rel_offset..][0..4]; + const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + (addend orelse 0)); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const RegInfo = struct { + rd: u5, + rn: u5, + size: u2, + }; + const reg_info: RegInfo = blk: { + if (isArithmeticOp(code)) { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; + } else { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = inst.size, + }; + } + }; + + var inst = if (zld.tlv_ptr_table.contains(target)) aarch64.Instruction{ + .load_store_register = .{ + .rt = reg_info.rd, + .rn = reg_info.rn, + .offset = try calcPageOffset(adjusted_target_addr, .load_store_64), + .opc = 0b01, + .op1 = 0b01, + .v = 0, + .size = reg_info.size, + }, + } else aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = try calcPageOffset(adjusted_target_addr, .arithmetic), + .sh = 0, + .s = 0, + .op = 0, + .sf = @truncate(u1, reg_info.size), + }, + }; + mem.writeIntLittle(u32, code, inst.toU32()); + addend = null; + }, + + .ARM64_RELOC_POINTER_TO_GOT => { + log.debug(" | target_addr = 0x{x}", .{target_addr}); + const result = math.cast(i32, @intCast(i64, target_addr) - @intCast(i64, source_addr)) orelse + return error.Overflow; + mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @bitCast(u32, result)); + }, + + .ARM64_RELOC_UNSIGNED => { + var ptr_addend = if (rel.r_length == 3) + mem.readIntLittle(i64, atom_code[rel_offset..][0..8]) + else + mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + + if (rel.r_extern == 0) { + const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; + ptr_addend -= @intCast(i64, target_sect_base_addr); + } + + const result = blk: { + if (subtractor) |sub| { + const sym = zld.getSymbol(sub); + break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + ptr_addend; + } else { + break :blk @intCast(i64, target_addr) + ptr_addend; + } + }; + log.debug(" | target_addr = 0x{x}", .{result}); + + if (rel.r_length == 3) { + mem.writeIntLittle(u64, atom_code[rel_offset..][0..8], @bitCast(u64, result)); + } else { + mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @truncate(u32, @bitCast(u64, result))); + } + + subtractor = null; + }, + + .ARM64_RELOC_ADDEND => unreachable, + .ARM64_RELOC_SUBTRACTOR => unreachable, + } + } +} + +fn resolveRelocsX86( + zld: *Zld, + atom_index: AtomIndex, + atom_code: []u8, + atom_relocs: []align(1) const macho.relocation_info, + reverse_lookup: []u32, + context: RelocContext, +) !void { + const atom = zld.getAtom(atom_index); + const object = zld.objects.items[atom.getFile().?]; + + var subtractor: ?SymbolWithLoc = null; + + for (atom_relocs) |rel| { + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + + switch (rel_type) { + .X86_64_RELOC_SUBTRACTOR => { + assert(subtractor == null); + + log.debug(" RELA({s}) @ {x} => %{d} in object({d})", .{ + @tagName(rel_type), + rel.r_address, + rel.r_symbolnum, + atom.file, + }); + + subtractor = parseRelocTarget(zld, atom_index, rel, reverse_lookup); + continue; + }, + else => {}, + } + + const target = parseRelocTarget(zld, atom_index, rel, reverse_lookup); + const rel_offset = @intCast(u32, rel.r_address - context.base_offset); + + log.debug(" RELA({s}) @ {x} => %{d} in object({?})", .{ + @tagName(rel_type), + rel.r_address, + target.sym_index, + target.file, + }); + + const source_addr = blk: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + break :blk source_sym.n_value + rel_offset; + }; + const is_tlv = is_tlv: { + const source_sym = zld.getSymbol(atom.getSymbolWithLoc()); + const header = zld.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.@"type"() == macho.S_THREAD_LOCAL_VARIABLES; + }; + + log.debug(" | source_addr = 0x{x}", .{source_addr}); + + const target_addr = try getRelocTargetAddress(zld, rel, target, is_tlv); + + switch (rel_type) { + .X86_64_RELOC_BRANCH => { + const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend); + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + }, + + .X86_64_RELOC_GOT, + .X86_64_RELOC_GOT_LOAD, + => { + const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend); + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + }, + + .X86_64_RELOC_TLV => { + const addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend); + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, 0); + + if (zld.tlv_ptr_table.get(target) == null) { + // We need to rewrite the opcode from movq to leaq. + atom_code[rel_offset - 2] = 0x8d; + } + + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + }, + + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + const correction: u3 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + var addend = mem.readIntLittle(i32, atom_code[rel_offset..][0..4]) + correction; + + if (rel.r_extern == 0) { + // Note for the future self: when r_extern == 0, we should subtract correction from the + // addend. + const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; + // We need to add base_offset, i.e., offset of this atom wrt to the source + // section. Otherwise, the addend will over-/under-shoot. + addend += @intCast(i32, @intCast(i64, context.base_addr + rel_offset + 4) - + @intCast(i64, target_sect_base_addr) + context.base_offset); + } + + const adjusted_target_addr = @intCast(u64, @intCast(i64, target_addr) + addend); + + log.debug(" | target_addr = 0x{x}", .{adjusted_target_addr}); + + const disp = try calcPcRelativeDisplacementX86(source_addr, adjusted_target_addr, correction); + mem.writeIntLittle(i32, atom_code[rel_offset..][0..4], disp); + }, + + .X86_64_RELOC_UNSIGNED => { + var addend = if (rel.r_length == 3) + mem.readIntLittle(i64, atom_code[rel_offset..][0..8]) + else + mem.readIntLittle(i32, atom_code[rel_offset..][0..4]); + + if (rel.r_extern == 0) { + const target_sect_base_addr = object.getSourceSection(@intCast(u16, rel.r_symbolnum - 1)).addr; + addend -= @intCast(i64, target_sect_base_addr); + } + + const result = blk: { + if (subtractor) |sub| { + const sym = zld.getSymbol(sub); + break :blk @intCast(i64, target_addr) - @intCast(i64, sym.n_value) + addend; + } else { + break :blk @intCast(i64, target_addr) + addend; + } + }; + log.debug(" | target_addr = 0x{x}", .{result}); + + if (rel.r_length == 3) { + mem.writeIntLittle(u64, atom_code[rel_offset..][0..8], @bitCast(u64, result)); + } else { + mem.writeIntLittle(u32, atom_code[rel_offset..][0..4], @truncate(u32, @bitCast(u64, result))); + } + + subtractor = null; + }, + + .X86_64_RELOC_SUBTRACTOR => unreachable, + } + } +} + +inline fn isArithmeticOp(inst: *const [4]u8) bool { + const group_decode = @truncate(u5, inst[3]); + return ((group_decode >> 2) == 4); +} + +pub fn getAtomCode(zld: *Zld, atom_index: AtomIndex) []const u8 { + const atom = zld.getAtom(atom_index); + assert(atom.getFile() != null); // Synthetic atom shouldn't need to inquire for code. + const object = zld.objects.items[atom.getFile().?]; + const source_sym = object.getSourceSymbol(atom.sym_index) orelse { + // If there was no matching symbol present in the source symtab, this means + // we are dealing with either an entire section, or part of it, but also + // starting at the beginning. + const nbase = @intCast(u32, object.in_symtab.?.len); + const sect_id = @intCast(u16, atom.sym_index - nbase); + const source_sect = object.getSourceSection(sect_id); + assert(!source_sect.isZerofill()); + const code = object.getSectionContents(source_sect); + const code_len = @intCast(usize, atom.size); + return code[0..code_len]; + }; + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + assert(!source_sect.isZerofill()); + const code = object.getSectionContents(source_sect); + const offset = @intCast(usize, source_sym.n_value - source_sect.addr); + const code_len = @intCast(usize, atom.size); + return code[offset..][0..code_len]; +} + +pub fn getAtomRelocs(zld: *Zld, atom_index: AtomIndex) []align(1) const macho.relocation_info { + const atom = zld.getAtomPtr(atom_index); + assert(atom.getFile() != null); // Synthetic atom shouldn't need to unique for relocs. + const object = zld.objects.items[atom.getFile().?]; + + const source_sect = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + assert(!source_sect.isZerofill()); + break :blk source_sect; + } else blk: { + // If there was no matching symbol present in the source symtab, this means + // we are dealing with either an entire section, or part of it, but also + // starting at the beginning. + const nbase = @intCast(u32, object.in_symtab.?.len); + const sect_id = @intCast(u16, atom.sym_index - nbase); + const source_sect = object.getSourceSection(sect_id); + assert(!source_sect.isZerofill()); + break :blk source_sect; + }; + + const relocs = object.getRelocs(source_sect); + + if (atom.cached_relocs_start == -1) { + const indexes = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { + const offset = source_sym.n_value - source_sect.addr; + break :blk filterRelocs(relocs, offset, offset + atom.size); + } else filterRelocs(relocs, 0, atom.size); + atom.cached_relocs_start = indexes.start; + atom.cached_relocs_len = indexes.len; + } + + return relocs[@intCast(u32, atom.cached_relocs_start)..][0..atom.cached_relocs_len]; +} + +fn filterRelocs( + relocs: []align(1) const macho.relocation_info, + start_addr: u64, + end_addr: u64, +) struct { start: i32, len: u32 } { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address >= self.addr; + } + }; + const LPredicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; + } + }; + + const start = @import("zld.zig").bsearch(macho.relocation_info, relocs, Predicate{ .addr = end_addr }); + const len = @import("zld.zig").lsearch(macho.relocation_info, relocs[start..], LPredicate{ .addr = start_addr }); + + return .{ .start = @intCast(i32, start), .len = @intCast(u32, len) }; +} + +pub fn calcPcRelativeDisplacementX86(source_addr: u64, target_addr: u64, correction: u3) error{Overflow}!i32 { + const disp = @intCast(i64, target_addr) - @intCast(i64, source_addr + 4 + correction); + return math.cast(i32, disp) orelse error.Overflow; +} + +pub fn calcPcRelativeDisplacementArm64(source_addr: u64, target_addr: u64) error{Overflow}!i28 { + const disp = @intCast(i64, target_addr) - @intCast(i64, source_addr); + return math.cast(i28, disp) orelse error.Overflow; +} + +pub fn calcNumberOfPages(source_addr: u64, target_addr: u64) i21 { + const source_page = @intCast(i32, source_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @intCast(i21, target_page - source_page); + return pages; +} + +const PageOffsetInstKind = enum { + arithmetic, + load_store_8, + load_store_16, + load_store_32, + load_store_64, + load_store_128, +}; + +pub fn calcPageOffset(target_addr: u64, kind: PageOffsetInstKind) !u12 { + const narrowed = @truncate(u12, target_addr); + return switch (kind) { + .arithmetic, .load_store_8 => narrowed, + .load_store_16 => try math.divExact(u12, narrowed, 2), + .load_store_32 => try math.divExact(u12, narrowed, 4), + .load_store_64 => try math.divExact(u12, narrowed, 8), + .load_store_128 => try math.divExact(u12, narrowed, 16), + }; +} diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index fb2832da21..6090162ce8 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -1,3 +1,5 @@ +//! An algorithm for dead stripping of unreferenced Atoms. + const std = @import("std"); const assert = std.debug.assert; const log = std.log.scoped(.dead_strip); @@ -6,93 +8,105 @@ const math = std.math; const mem = std.mem; const Allocator = mem.Allocator; -const Atom = @import("Atom.zig"); -const MachO = @import("../MachO.zig"); - -pub fn gcAtoms(macho_file: *MachO) !void { - const gpa = macho_file.base.allocator; - var arena_allocator = std.heap.ArenaAllocator.init(gpa); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); +const AtomIndex = @import("zld.zig").AtomIndex; +const Atom = @import("ZldAtom.zig"); +const SymbolWithLoc = @import("zld.zig").SymbolWithLoc; +const Zld = @import("zld.zig").Zld; - var roots = std.AutoHashMap(*Atom, void).init(arena); - try collectRoots(&roots, macho_file); +const N_DEAD = @import("zld.zig").N_DEAD; - var alive = std.AutoHashMap(*Atom, void).init(arena); - try mark(roots, &alive, macho_file); +const AtomTable = std.AutoHashMap(AtomIndex, void); - try prune(arena, alive, macho_file); -} +pub fn gcAtoms(zld: *Zld, reverse_lookups: [][]u32) !void { + const gpa = zld.gpa; -fn removeAtomFromSection(atom: *Atom, match: u8, macho_file: *MachO) void { - var section = macho_file.sections.get(match); + var arena = std.heap.ArenaAllocator.init(gpa); + defer arena.deinit(); - // If we want to enable GC for incremental codepath, we need to take into - // account any padding that might have been left here. - section.header.size -= atom.size; + var roots = AtomTable.init(arena.allocator()); + try roots.ensureUnusedCapacity(@intCast(u32, zld.globals.items.len)); - if (atom.prev) |prev| { - prev.next = atom.next; - } - if (atom.next) |next| { - next.prev = atom.prev; - } else { - if (atom.prev) |prev| { - section.last_atom = prev; - } else { - // The section will be GCed in the next step. - section.last_atom = null; - section.header.size = 0; - } - } + var alive = AtomTable.init(arena.allocator()); + try alive.ensureTotalCapacity(@intCast(u32, zld.atoms.items.len)); - macho_file.sections.set(match, section); + try collectRoots(zld, &roots); + try mark(zld, roots, &alive, reverse_lookups); + try prune(zld, alive); } -fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { - const output_mode = macho_file.base.options.output_mode; +fn collectRoots(zld: *Zld, roots: *AtomTable) !void { + log.debug("collecting roots", .{}); - switch (output_mode) { + switch (zld.options.output_mode) { .Exe => { // Add entrypoint as GC root - const global = try macho_file.getEntryPoint(); - const atom = macho_file.getAtomForSymbol(global).?; // panic here means fatal error - _ = try roots.getOrPut(atom); + const global: SymbolWithLoc = zld.getEntryPoint(); + const object = zld.objects.items[global.getFile().?]; + const atom_index = object.getAtomIndexForSymbol(global.sym_index).?; // panic here means fatal error + _ = try roots.getOrPut(atom_index); + + log.debug("root(ATOM({d}, %{d}, {d}))", .{ + atom_index, + zld.getAtom(atom_index).sym_index, + zld.getAtom(atom_index).file, + }); }, else => |other| { assert(other == .Lib); // Add exports as GC roots - for (macho_file.globals.items) |global| { - const sym = macho_file.getSymbol(global); - if (!sym.sect()) continue; - const atom = macho_file.getAtomForSymbol(global) orelse { - log.debug("skipping {s}", .{macho_file.getSymbolName(global)}); - continue; - }; - _ = try roots.getOrPut(atom); - log.debug("adding root", .{}); - macho_file.logAtom(atom); + for (zld.globals.items) |global| { + const sym = zld.getSymbol(global); + if (sym.undf()) continue; + + const object = zld.objects.items[global.getFile().?]; + const atom_index = object.getAtomIndexForSymbol(global.sym_index).?; // panic here means fatal error + _ = try roots.getOrPut(atom_index); + + log.debug("root(ATOM({d}, %{d}, {d}))", .{ + atom_index, + zld.getAtom(atom_index).sym_index, + zld.getAtom(atom_index).file, + }); } }, } // TODO just a temp until we learn how to parse unwind records - if (macho_file.getGlobal("___gxx_personality_v0")) |global| { - if (macho_file.getAtomForSymbol(global)) |atom| { - _ = try roots.getOrPut(atom); - log.debug("adding root", .{}); - macho_file.logAtom(atom); + for (zld.globals.items) |global| { + if (mem.eql(u8, "___gxx_personality_v0", zld.getSymbolName(global))) { + const object = zld.objects.items[global.getFile().?]; + if (object.getAtomIndexForSymbol(global.sym_index)) |atom_index| { + _ = try roots.getOrPut(atom_index); + + log.debug("root(ATOM({d}, %{d}, {d}))", .{ + atom_index, + zld.getAtom(atom_index).sym_index, + zld.getAtom(atom_index).file, + }); + } + break; } } - for (macho_file.objects.items) |object| { - for (object.managed_atoms.items) |atom| { - const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; - if (source_sym.tentative()) continue; - const source_sect = object.getSourceSection(source_sym.n_sect - 1); + for (zld.objects.items) |object| { + const has_subsections = object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; + + for (object.atoms.items) |atom_index| { const is_gc_root = blk: { + // Modelled after ld64 which treats each object file compiled without MH_SUBSECTIONS_VIA_SYMBOLS + // as a root. + if (!has_subsections) break :blk true; + + const atom = zld.getAtom(atom_index); + const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| + source_sym.n_sect - 1 + else sect_id: { + const nbase = @intCast(u32, object.in_symtab.?.len); + const sect_id = @intCast(u16, atom.sym_index - nbase); + break :sect_id sect_id; + }; + const source_sect = object.getSourceSection(sect_id); if (source_sect.isDontDeadStrip()) break :blk true; - if (mem.eql(u8, "__StaticInit", source_sect.sectName())) break :blk true; switch (source_sect.@"type"()) { macho.S_MOD_INIT_FUNC_POINTERS, macho.S_MOD_TERM_FUNC_POINTERS, @@ -100,197 +114,229 @@ fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void else => break :blk false, } }; + if (is_gc_root) { - try roots.putNoClobber(atom, {}); - log.debug("adding root", .{}); - macho_file.logAtom(atom); + try roots.putNoClobber(atom_index, {}); + + log.debug("root(ATOM({d}, %{d}, {d}))", .{ + atom_index, + zld.getAtom(atom_index).sym_index, + zld.getAtom(atom_index).file, + }); } } } } -fn markLive(atom: *Atom, alive: *std.AutoHashMap(*Atom, void), macho_file: *MachO) anyerror!void { - const gop = try alive.getOrPut(atom); - if (gop.found_existing) return; +fn markLive( + zld: *Zld, + atom_index: AtomIndex, + alive: *AtomTable, + reverse_lookups: [][]u32, +) anyerror!void { + if (alive.contains(atom_index)) return; + + const atom = zld.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + + log.debug("mark(ATOM({d}, %{d}, {d}))", .{ atom_index, sym_loc.sym_index, sym_loc.file }); + + alive.putAssumeCapacityNoClobber(atom_index, {}); + + const cpu_arch = zld.options.target.cpu.arch; + + const sym = zld.getSymbol(atom.getSymbolWithLoc()); + const header = zld.sections.items(.header)[sym.n_sect - 1]; + if (header.isZerofill()) return; + + const relocs = Atom.getAtomRelocs(zld, atom_index); + const reverse_lookup = reverse_lookups[atom.getFile().?]; + for (relocs) |rel| { + const target = switch (cpu_arch) { + .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_ADDEND => continue, + else => Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup), + }, + .x86_64 => Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup), + else => unreachable, + }; + const target_sym = zld.getSymbol(target); + + if (rel.r_extern == 0) { + // We are pessimistic and mark all atoms within the target section as live. + // TODO: this can be improved by marking only the relevant atoms. + const sect_id = target_sym.n_sect; + const object = zld.objects.items[target.getFile().?]; + for (object.atoms.items) |other_atom_index| { + const other_atom = zld.getAtom(other_atom_index); + const other_sym = zld.getSymbol(other_atom.getSymbolWithLoc()); + if (other_sym.n_sect == sect_id) { + try markLive(zld, other_atom_index, alive, reverse_lookups); + } + } + continue; + } - log.debug("marking live", .{}); - macho_file.logAtom(atom); + if (target_sym.undf()) continue; + if (target.getFile() == null) { + const target_sym_name = zld.getSymbolName(target); + if (mem.eql(u8, "__mh_execute_header", target_sym_name)) continue; + if (mem.eql(u8, "___dso_handle", target_sym_name)) continue; - for (atom.relocs.items) |rel| { - const target_atom = rel.getTargetAtom(macho_file) orelse continue; - try markLive(target_atom, alive, macho_file); - } -} + unreachable; // referenced symbol not found + } + + const object = zld.objects.items[target.getFile().?]; + const target_atom_index = object.getAtomIndexForSymbol(target.sym_index).?; + log.debug(" following ATOM({d}, %{d}, {d})", .{ + target_atom_index, + zld.getAtom(target_atom_index).sym_index, + zld.getAtom(target_atom_index).file, + }); -fn refersLive(atom: *Atom, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) bool { - for (atom.relocs.items) |rel| { - const target_atom = rel.getTargetAtom(macho_file) orelse continue; - if (alive.contains(target_atom)) return true; + try markLive(zld, target_atom_index, alive, reverse_lookups); } - return false; } -fn refersDead(atom: *Atom, macho_file: *MachO) bool { - for (atom.relocs.items) |rel| { - const target_atom = rel.getTargetAtom(macho_file) orelse continue; - const target_sym = target_atom.getSymbol(macho_file); - if (target_sym.n_desc == MachO.N_DESC_GCED) return true; +fn refersLive(zld: *Zld, atom_index: AtomIndex, alive: AtomTable, reverse_lookups: [][]u32) !bool { + const atom = zld.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + + log.debug("refersLive(ATOM({d}, %{d}, {d}))", .{ atom_index, sym_loc.sym_index, sym_loc.file }); + + const cpu_arch = zld.options.target.cpu.arch; + + const sym = zld.getSymbol(sym_loc); + const header = zld.sections.items(.header)[sym.n_sect - 1]; + assert(!header.isZerofill()); + + const relocs = Atom.getAtomRelocs(zld, atom_index); + const reverse_lookup = reverse_lookups[atom.getFile().?]; + for (relocs) |rel| { + const target = switch (cpu_arch) { + .aarch64 => switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_ADDEND => continue, + else => Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup), + }, + .x86_64 => Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup), + else => unreachable, + }; + + const object = zld.objects.items[target.getFile().?]; + const target_atom_index = object.getAtomIndexForSymbol(target.sym_index) orelse { + log.debug("atom for symbol '{s}' not found; skipping...", .{zld.getSymbolName(target)}); + continue; + }; + if (alive.contains(target_atom_index)) { + log.debug(" refers live ATOM({d}, %{d}, {d})", .{ + target_atom_index, + zld.getAtom(target_atom_index).sym_index, + zld.getAtom(target_atom_index).file, + }); + return true; + } } + return false; } -fn mark( - roots: std.AutoHashMap(*Atom, void), - alive: *std.AutoHashMap(*Atom, void), - macho_file: *MachO, -) !void { - try alive.ensureUnusedCapacity(roots.count()); - +fn mark(zld: *Zld, roots: AtomTable, alive: *AtomTable, reverse_lookups: [][]u32) !void { var it = roots.keyIterator(); while (it.next()) |root| { - try markLive(root.*, alive, macho_file); + try markLive(zld, root.*, alive, reverse_lookups); } var loop: bool = true; while (loop) { loop = false; - for (macho_file.objects.items) |object| { - for (object.managed_atoms.items) |atom| { - if (alive.contains(atom)) continue; - const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; - if (source_sym.tentative()) continue; - const source_sect = object.getSourceSection(source_sym.n_sect - 1); - if (source_sect.isDontDeadStripIfReferencesLive() and refersLive(atom, alive.*, macho_file)) { - try markLive(atom, alive, macho_file); - loop = true; + for (zld.objects.items) |object| { + for (object.atoms.items) |atom_index| { + if (alive.contains(atom_index)) continue; + + const atom = zld.getAtom(atom_index); + const sect_id = if (object.getSourceSymbol(atom.sym_index)) |source_sym| + source_sym.n_sect - 1 + else blk: { + const nbase = @intCast(u32, object.in_symtab.?.len); + const sect_id = @intCast(u16, atom.sym_index - nbase); + break :blk sect_id; + }; + const source_sect = object.getSourceSection(sect_id); + + if (source_sect.isDontDeadStripIfReferencesLive()) { + if (try refersLive(zld, atom_index, alive.*, reverse_lookups)) { + try markLive(zld, atom_index, alive, reverse_lookups); + loop = true; + } } } } } } -fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { - // Any section that ends up here will be updated, that is, - // its size and alignment recalculated. - var gc_sections = std.AutoHashMap(u8, void).init(arena); - var loop: bool = true; - while (loop) { - loop = false; - - for (macho_file.objects.items) |object| { - const in_symtab = object.in_symtab orelse continue; - - for (in_symtab) |_, source_index| { - const atom = object.getAtomForSymbol(@intCast(u32, source_index)) orelse continue; - if (alive.contains(atom)) continue; - - const global = atom.getSymbolWithLoc(); - const sym = atom.getSymbolPtr(macho_file); - const match = sym.n_sect - 1; - - if (sym.n_desc == MachO.N_DESC_GCED) continue; - if (!sym.ext() and !refersDead(atom, macho_file)) continue; - - macho_file.logAtom(atom); - sym.n_desc = MachO.N_DESC_GCED; - removeAtomFromSection(atom, match, macho_file); - _ = try gc_sections.put(match, {}); - - for (atom.contained.items) |sym_off| { - const inner = macho_file.getSymbolPtr(.{ - .sym_index = sym_off.sym_index, - .file = atom.file, - }); - inner.n_desc = MachO.N_DESC_GCED; - } - - if (macho_file.got_entries_table.contains(global)) { - const got_atom = macho_file.getGotAtomForSymbol(global).?; - const got_sym = got_atom.getSymbolPtr(macho_file); - got_sym.n_desc = MachO.N_DESC_GCED; - } +fn prune(zld: *Zld, alive: AtomTable) !void { + log.debug("pruning dead atoms", .{}); + for (zld.objects.items) |*object| { + var i: usize = 0; + while (i < object.atoms.items.len) { + const atom_index = object.atoms.items[i]; + if (alive.contains(atom_index)) { + i += 1; + continue; + } - if (macho_file.stubs_table.contains(global)) { - const stubs_atom = macho_file.getStubsAtomForSymbol(global).?; - const stubs_sym = stubs_atom.getSymbolPtr(macho_file); - stubs_sym.n_desc = MachO.N_DESC_GCED; + const atom = zld.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + + log.debug("prune(ATOM({d}, %{d}, {d}))", .{ + atom_index, + sym_loc.sym_index, + sym_loc.file, + }); + log.debug(" {s} in {s}", .{ zld.getSymbolName(sym_loc), object.name }); + + const sym = zld.getSymbolPtr(sym_loc); + const sect_id = sym.n_sect - 1; + var section = zld.sections.get(sect_id); + section.header.size -= atom.size; + + if (atom.prev_index) |prev_index| { + const prev = zld.getAtomPtr(prev_index); + prev.next_index = atom.next_index; + } else { + if (atom.next_index) |next_index| { + section.first_atom_index = next_index; } - - if (macho_file.tlv_ptr_entries_table.contains(global)) { - const tlv_ptr_atom = macho_file.getTlvPtrAtomForSymbol(global).?; - const tlv_ptr_sym = tlv_ptr_atom.getSymbolPtr(macho_file); - tlv_ptr_sym.n_desc = MachO.N_DESC_GCED; + } + if (atom.next_index) |next_index| { + const next = zld.getAtomPtr(next_index); + next.prev_index = atom.prev_index; + } else { + if (atom.prev_index) |prev_index| { + section.last_atom_index = prev_index; + } else { + assert(section.header.size == 0); + section.first_atom_index = undefined; + section.last_atom_index = undefined; } - - loop = true; } - } - } - for (macho_file.got_entries.items) |entry| { - const sym = entry.getSymbol(macho_file); - if (sym.n_desc != MachO.N_DESC_GCED) continue; + zld.sections.set(sect_id, section); + _ = object.atoms.swapRemove(i); - // TODO tombstone - const atom = entry.getAtom(macho_file).?; - const match = sym.n_sect - 1; - removeAtomFromSection(atom, match, macho_file); - _ = try gc_sections.put(match, {}); - _ = macho_file.got_entries_table.remove(entry.target); - } + sym.n_desc = N_DEAD; - for (macho_file.stubs.items) |entry| { - const sym = entry.getSymbol(macho_file); - if (sym.n_desc != MachO.N_DESC_GCED) continue; - - // TODO tombstone - const atom = entry.getAtom(macho_file).?; - const match = sym.n_sect - 1; - removeAtomFromSection(atom, match, macho_file); - _ = try gc_sections.put(match, {}); - _ = macho_file.stubs_table.remove(entry.target); - } - - for (macho_file.tlv_ptr_entries.items) |entry| { - const sym = entry.getSymbol(macho_file); - if (sym.n_desc != MachO.N_DESC_GCED) continue; - - // TODO tombstone - const atom = entry.getAtom(macho_file).?; - const match = sym.n_sect - 1; - removeAtomFromSection(atom, match, macho_file); - _ = try gc_sections.put(match, {}); - _ = macho_file.tlv_ptr_entries_table.remove(entry.target); - } - - var gc_sections_it = gc_sections.iterator(); - while (gc_sections_it.next()) |entry| { - const match = entry.key_ptr.*; - var section = macho_file.sections.get(match); - if (section.header.size == 0) continue; // Pruning happens automatically in next step. - - section.header.@"align" = 0; - section.header.size = 0; - - var atom = section.last_atom.?; - - while (atom.prev) |prev| { - atom = prev; - } - - while (true) { - const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, section.header.size, atom_alignment); - const padding = aligned_end_addr - section.header.size; - section.header.size += padding + atom.size; - section.header.@"align" = @max(section.header.@"align", atom.alignment); + var inner_sym_it = Atom.getInnerSymbolsIterator(zld, atom_index); + while (inner_sym_it.next()) |inner| { + const inner_sym = zld.getSymbolPtr(inner); + inner_sym.n_desc = N_DEAD; + } - if (atom.next) |next| { - atom = next; - } else break; + if (Atom.getSectionAlias(zld, atom_index)) |alias| { + const alias_sym = zld.getSymbolPtr(alias); + alias_sym.n_desc = N_DEAD; + } } - - macho_file.sections.set(match, section); } } diff --git a/src/link/MachO/thunks.zig b/src/link/MachO/thunks.zig new file mode 100644 index 0000000000..db85884c31 --- /dev/null +++ b/src/link/MachO/thunks.zig @@ -0,0 +1,364 @@ +//! An algorithm for allocating output machine code section (aka `__TEXT,__text`), +//! and insertion of range extending thunks. As such, this algorithm is only run +//! for a target that requires range extenders such as arm64. +//! +//! The algorithm works pessimistically and assumes that any reference to an Atom in +//! another output section is out of range. + +const std = @import("std"); +const assert = std.debug.assert; +const log = std.log.scoped(.thunks); +const macho = std.macho; +const math = std.math; +const mem = std.mem; + +const aarch64 = @import("../../arch/aarch64/bits.zig"); + +const Allocator = mem.Allocator; +const Atom = @import("ZldAtom.zig"); +const AtomIndex = @import("zld.zig").AtomIndex; +const SymbolWithLoc = @import("zld.zig").SymbolWithLoc; +const Zld = @import("zld.zig").Zld; + +pub const ThunkIndex = u32; + +/// Branch instruction has 26 bits immediate but 4 byte aligned. +const jump_bits = @bitSizeOf(i28); + +const max_distance = (1 << (jump_bits - 1)); + +/// A branch will need an extender if its target is larger than +/// `2^(jump_bits - 1) - margin` where margin is some arbitrary number. +/// mold uses 5MiB margin, while ld64 uses 4MiB margin. We will follow mold +/// and assume margin to be 5MiB. +const max_allowed_distance = max_distance - 0x500_000; + +pub const Thunk = struct { + start_index: AtomIndex, + len: u32, + + lookup: std.AutoArrayHashMapUnmanaged(SymbolWithLoc, AtomIndex) = .{}, + + pub fn deinit(self: *Thunk, gpa: Allocator) void { + self.lookup.deinit(gpa); + } + + pub fn getStartAtomIndex(self: Thunk) AtomIndex { + assert(self.len != 0); + return self.start_index; + } + + pub fn getEndAtomIndex(self: Thunk) AtomIndex { + assert(self.len != 0); + return self.start_index + self.len - 1; + } + + pub fn getSize(self: Thunk) u64 { + return 12 * self.len; + } + + pub fn getAlignment() u32 { + return @alignOf(u32); + } + + pub fn getTrampolineForSymbol(self: Thunk, zld: *Zld, target: SymbolWithLoc) ?SymbolWithLoc { + const atom_index = self.lookup.get(target) orelse return null; + const atom = zld.getAtom(atom_index); + return atom.getSymbolWithLoc(); + } +}; + +pub fn createThunks(zld: *Zld, sect_id: u8, reverse_lookups: [][]u32) !void { + const header = &zld.sections.items(.header)[sect_id]; + if (header.size == 0) return; + + const gpa = zld.gpa; + const first_atom_index = zld.sections.items(.first_atom_index)[sect_id]; + + header.size = 0; + header.@"align" = 0; + + var atom_count: u32 = 0; + + { + var atom_index = first_atom_index; + while (true) { + const atom = zld.getAtom(atom_index); + const sym = zld.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value = 0; + atom_count += 1; + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + var allocated = std.AutoHashMap(AtomIndex, void).init(gpa); + defer allocated.deinit(); + try allocated.ensureTotalCapacity(atom_count); + + var group_start = first_atom_index; + var group_end = first_atom_index; + var offset: u64 = 0; + + while (true) { + const group_start_atom = zld.getAtom(group_start); + log.debug("GROUP START at {d}", .{group_start}); + + while (true) { + const atom = zld.getAtom(group_end); + offset = mem.alignForwardGeneric(u64, offset, try math.powi(u32, 2, atom.alignment)); + + const sym = zld.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value = offset; + offset += atom.size; + + zld.logAtom(group_end, log); + + header.@"align" = @max(header.@"align", atom.alignment); + + allocated.putAssumeCapacityNoClobber(group_end, {}); + + const group_start_sym = zld.getSymbol(group_start_atom.getSymbolWithLoc()); + if (offset - group_start_sym.n_value >= max_allowed_distance) break; + + if (atom.next_index) |next_index| { + group_end = next_index; + } else break; + } + log.debug("GROUP END at {d}", .{group_end}); + + // Insert thunk at group_end + const thunk_index = @intCast(u32, zld.thunks.items.len); + try zld.thunks.append(gpa, .{ .start_index = undefined, .len = 0 }); + + // Scan relocs in the group and create trampolines for any unreachable callsite. + var atom_index = group_start; + while (true) { + const atom = zld.getAtom(atom_index); + try scanRelocs( + zld, + atom_index, + reverse_lookups[atom.getFile().?], + allocated, + thunk_index, + group_end, + ); + + if (atom_index == group_end) break; + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + + offset = mem.alignForwardGeneric(u64, offset, Thunk.getAlignment()); + allocateThunk(zld, thunk_index, offset, header); + offset += zld.thunks.items[thunk_index].getSize(); + + const thunk = zld.thunks.items[thunk_index]; + if (thunk.len == 0) { + const group_end_atom = zld.getAtom(group_end); + if (group_end_atom.next_index) |next_index| { + group_start = next_index; + group_end = next_index; + } else break; + } else { + const thunk_end_atom_index = thunk.getEndAtomIndex(); + const thunk_end_atom = zld.getAtom(thunk_end_atom_index); + if (thunk_end_atom.next_index) |next_index| { + group_start = next_index; + group_end = next_index; + } else break; + } + } + + header.size = @intCast(u32, offset); +} + +fn allocateThunk( + zld: *Zld, + thunk_index: ThunkIndex, + base_offset: u64, + header: *macho.section_64, +) void { + const thunk = zld.thunks.items[thunk_index]; + if (thunk.len == 0) return; + + const first_atom_index = thunk.getStartAtomIndex(); + const end_atom_index = thunk.getEndAtomIndex(); + + var atom_index = first_atom_index; + var offset = base_offset; + while (true) { + const atom = zld.getAtom(atom_index); + offset = mem.alignForwardGeneric(u64, offset, Thunk.getAlignment()); + + const sym = zld.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value = offset; + offset += atom.size; + + zld.logAtom(atom_index, log); + + header.@"align" = @max(header.@"align", atom.alignment); + + if (end_atom_index == atom_index) break; + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } +} + +fn scanRelocs( + zld: *Zld, + atom_index: AtomIndex, + reverse_lookup: []u32, + allocated: std.AutoHashMap(AtomIndex, void), + thunk_index: ThunkIndex, + group_end: AtomIndex, +) !void { + const atom = zld.getAtom(atom_index); + const object = zld.objects.items[atom.getFile().?]; + + const base_offset = if (object.getSourceSymbol(atom.sym_index)) |source_sym| blk: { + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + break :blk @intCast(i32, source_sym.n_value - source_sect.addr); + } else 0; + + const relocs = Atom.getAtomRelocs(zld, atom_index); + for (relocs) |rel| { + if (!relocNeedsThunk(rel)) continue; + + const target = Atom.parseRelocTarget(zld, atom_index, rel, reverse_lookup); + if (isReachable(zld, atom_index, rel, base_offset, target, allocated)) continue; + + log.debug("{x}: source = {s}@{x}, target = {s}@{x} unreachable", .{ + rel.r_address - base_offset, + zld.getSymbolName(atom.getSymbolWithLoc()), + zld.getSymbol(atom.getSymbolWithLoc()).n_value, + zld.getSymbolName(target), + zld.getSymbol(target).n_value, + }); + + const gpa = zld.gpa; + const target_sym = zld.getSymbol(target); + + const actual_target: SymbolWithLoc = if (target_sym.undf()) blk: { + const stub_atom_index = zld.getStubsAtomIndexForSymbol(target).?; + break :blk .{ .sym_index = zld.getAtom(stub_atom_index).sym_index }; + } else target; + + const thunk = &zld.thunks.items[thunk_index]; + const gop = try thunk.lookup.getOrPut(gpa, actual_target); + if (!gop.found_existing) { + const thunk_atom_index = try createThunkAtom(zld); + gop.value_ptr.* = thunk_atom_index; + + const thunk_atom = zld.getAtomPtr(thunk_atom_index); + const end_atom_index = if (thunk.len == 0) group_end else thunk.getEndAtomIndex(); + const end_atom = zld.getAtomPtr(end_atom_index); + + if (end_atom.next_index) |first_after_index| { + const first_after_atom = zld.getAtomPtr(first_after_index); + first_after_atom.prev_index = thunk_atom_index; + thunk_atom.next_index = first_after_index; + } + + end_atom.next_index = thunk_atom_index; + thunk_atom.prev_index = end_atom_index; + + if (thunk.len == 0) { + thunk.start_index = thunk_atom_index; + } + + thunk.len += 1; + } + + try zld.thunk_table.put(gpa, atom_index, thunk_index); + } +} + +inline fn relocNeedsThunk(rel: macho.relocation_info) bool { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + return rel_type == .ARM64_RELOC_BRANCH26; +} + +fn isReachable( + zld: *Zld, + atom_index: AtomIndex, + rel: macho.relocation_info, + base_offset: i32, + target: SymbolWithLoc, + allocated: std.AutoHashMap(AtomIndex, void), +) bool { + if (zld.getStubsAtomIndexForSymbol(target)) |_| return false; + + const source_atom = zld.getAtom(atom_index); + const source_sym = zld.getSymbol(source_atom.getSymbolWithLoc()); + + const target_object = zld.objects.items[target.getFile().?]; + const target_atom_index = target_object.getAtomIndexForSymbol(target.sym_index).?; + const target_atom = zld.getAtom(target_atom_index); + const target_sym = zld.getSymbol(target_atom.getSymbolWithLoc()); + + if (source_sym.n_sect != target_sym.n_sect) return false; + + if (!allocated.contains(target_atom_index)) return false; + + const source_addr = source_sym.n_value + @intCast(u32, rel.r_address - base_offset); + const target_addr = Atom.getRelocTargetAddress(zld, rel, target, false) catch unreachable; + _ = Atom.calcPcRelativeDisplacementArm64(source_addr, target_addr) catch + return false; + + return true; +} + +fn createThunkAtom(zld: *Zld) !AtomIndex { + const sym_index = try zld.allocateSymbol(); + const atom_index = try zld.createEmptyAtom(sym_index, @sizeOf(u32) * 3, 2); + const sym = zld.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = zld.getSectionByName("__TEXT", "__text") orelse unreachable; + sym.n_sect = sect_id + 1; + + return atom_index; +} + +fn getThunkIndex(zld: *Zld, atom_index: AtomIndex) ?ThunkIndex { + const atom = zld.getAtom(atom_index); + const sym = zld.getSymbol(atom.getSymbolWithLoc()); + for (zld.thunks.items) |thunk, i| { + if (thunk.len == 0) continue; + + const thunk_atom_index = thunk.getStartAtomIndex(); + const thunk_atom = zld.getAtom(thunk_atom_index); + const thunk_sym = zld.getSymbol(thunk_atom.getSymbolWithLoc()); + const start_addr = thunk_sym.n_value; + const end_addr = start_addr + thunk.getSize(); + + if (start_addr <= sym.n_value and sym.n_value < end_addr) { + return @intCast(u32, i); + } + } + return null; +} + +pub fn writeThunkCode(zld: *Zld, atom_index: AtomIndex, writer: anytype) !void { + const atom = zld.getAtom(atom_index); + const sym = zld.getSymbol(atom.getSymbolWithLoc()); + const source_addr = sym.n_value; + const thunk = zld.thunks.items[getThunkIndex(zld, atom_index).?]; + const target_addr = for (thunk.lookup.keys()) |target| { + const target_atom_index = thunk.lookup.get(target).?; + if (atom_index == target_atom_index) break zld.getSymbol(target).n_value; + } else unreachable; + + const pages = Atom.calcNumberOfPages(source_addr, target_addr); + try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32()); + const off = try Atom.calcPageOffset(target_addr, .arithmetic); + try writer.writeIntLittle(u32, aarch64.Instruction.add(.x16, .x16, off, false).toU32()); + try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32()); +} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 04bf6176f6..b9adaf7bf0 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -10,43 +10,3751 @@ const mem = std.mem; const aarch64 = @import("../../arch/aarch64/bits.zig"); const bind = @import("bind.zig"); +const dead_strip = @import("dead_strip.zig"); +const fat = @import("fat.zig"); const link = @import("../../link.zig"); +const thunks = @import("thunks.zig"); const trace = @import("../../tracy.zig").trace; -const Atom = MachO.Atom; +const Allocator = mem.Allocator; +const Archive = @import("Archive.zig"); +const Atom = @import("ZldAtom.zig"); const Cache = @import("../../Cache.zig"); const CodeSignature = @import("CodeSignature.zig"); const Compilation = @import("../../Compilation.zig"); +const DwarfInfo = @import("DwarfInfo.zig"); const Dylib = @import("Dylib.zig"); const MachO = @import("../MachO.zig"); +const LibStub = @import("../tapi.zig").LibStub; const Object = @import("Object.zig"); -const SymbolWithLoc = MachO.SymbolWithLoc; +const StringTable = @import("../strtab.zig").StringTable; const Trie = @import("Trie.zig"); -const dead_strip = @import("dead_strip.zig"); +pub const Zld = struct { + gpa: Allocator, + file: fs.File, + page_size: u16, + options: link.Options, + + objects: std.ArrayListUnmanaged(Object) = .{}, + archives: std.ArrayListUnmanaged(Archive) = .{}, + dylibs: std.ArrayListUnmanaged(Dylib) = .{}, + dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, + referenced_dylibs: std.AutoArrayHashMapUnmanaged(u16, void) = .{}, + + segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, + sections: std.MultiArrayList(Section) = .{}, + + locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, + globals: std.ArrayListUnmanaged(SymbolWithLoc) = .{}, + + entry_index: ?u32 = null, + mh_execute_header_index: ?u32 = null, + dso_handle_index: ?u32 = null, + dyld_stub_binder_index: ?u32 = null, + dyld_private_sym_index: ?u32 = null, + stub_helper_preamble_sym_index: ?u32 = null, + + strtab: StringTable(.strtab) = .{}, + + tlv_ptr_entries: std.ArrayListUnmanaged(IndirectPointer) = .{}, + tlv_ptr_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, + + got_entries: std.ArrayListUnmanaged(IndirectPointer) = .{}, + got_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, + + stubs: std.ArrayListUnmanaged(IndirectPointer) = .{}, + stubs_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, + + thunk_table: std.AutoHashMapUnmanaged(AtomIndex, thunks.ThunkIndex) = .{}, + thunks: std.ArrayListUnmanaged(thunks.Thunk) = .{}, + + atoms: std.ArrayListUnmanaged(Atom) = .{}, + + fn parseObject(self: *Zld, path: []const u8) !bool { + const gpa = self.gpa; + const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return false, + else => |e| return e, + }; + defer file.close(); + + const name = try gpa.dupe(u8, path); + errdefer gpa.free(name); + const cpu_arch = self.options.target.cpu.arch; + const mtime: u64 = mtime: { + const stat = file.stat() catch break :mtime 0; + break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); + }; + const file_stat = try file.stat(); + const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); + + var object = Object{ + .name = name, + .mtime = mtime, + .contents = contents, + }; + + object.parse(gpa, cpu_arch) catch |err| switch (err) { + error.EndOfStream, error.NotObject => { + object.deinit(gpa); + return false; + }, + else => |e| return e, + }; + + try self.objects.append(gpa, object); + + return true; + } + + fn parseArchive(self: *Zld, path: []const u8, force_load: bool) !bool { + const gpa = self.gpa; + const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return false, + else => |e| return e, + }; + errdefer file.close(); + + const name = try gpa.dupe(u8, path); + errdefer gpa.free(name); + const cpu_arch = self.options.target.cpu.arch; + const reader = file.reader(); + const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); + try reader.context.seekTo(fat_offset); + + var archive = Archive{ + .name = name, + .fat_offset = fat_offset, + .file = file, + }; + + archive.parse(gpa, reader) catch |err| switch (err) { + error.EndOfStream, error.NotArchive => { + archive.deinit(gpa); + return false; + }, + else => |e| return e, + }; + + if (force_load) { + defer archive.deinit(gpa); + // Get all offsets from the ToC + var offsets = std.AutoArrayHashMap(u32, void).init(gpa); + defer offsets.deinit(); + for (archive.toc.values()) |offs| { + for (offs.items) |off| { + _ = try offsets.getOrPut(off); + } + } + for (offsets.keys()) |off| { + const object = try archive.parseObject(gpa, cpu_arch, off); + try self.objects.append(gpa, object); + } + } else { + try self.archives.append(gpa, archive); + } + + return true; + } + + const ParseDylibError = error{ + OutOfMemory, + EmptyStubFile, + MismatchedCpuArchitecture, + UnsupportedCpuArchitecture, + EndOfStream, + } || fs.File.OpenError || std.os.PReadError || Dylib.Id.ParseError; + + const DylibCreateOpts = struct { + syslibroot: ?[]const u8, + id: ?Dylib.Id = null, + dependent: bool = false, + needed: bool = false, + weak: bool = false, + }; + + fn parseDylib( + self: *Zld, + path: []const u8, + dependent_libs: anytype, + opts: DylibCreateOpts, + ) ParseDylibError!bool { + const gpa = self.gpa; + const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { + error.FileNotFound => return false, + else => |e| return e, + }; + defer file.close(); + + const cpu_arch = self.options.target.cpu.arch; + const file_stat = try file.stat(); + var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + + const reader = file.reader(); + const fat_offset = math.cast(usize, try fat.getLibraryOffset(reader, cpu_arch)) orelse + return error.Overflow; + try file.seekTo(fat_offset); + file_size -= fat_offset; + + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); + defer gpa.free(contents); + + const dylib_id = @intCast(u16, self.dylibs.items.len); + var dylib = Dylib{ .weak = opts.weak }; + + dylib.parseFromBinary( + gpa, + cpu_arch, + dylib_id, + dependent_libs, + path, + contents, + ) catch |err| switch (err) { + error.EndOfStream, error.NotDylib => { + try file.seekTo(0); + + var lib_stub = LibStub.loadFromFile(gpa, file) catch { + dylib.deinit(gpa); + return false; + }; + defer lib_stub.deinit(); + + try dylib.parseFromStub( + gpa, + self.options.target, + lib_stub, + dylib_id, + dependent_libs, + path, + ); + }, + else => |e| return e, + }; + + if (opts.id) |id| { + if (dylib.id.?.current_version < id.compatibility_version) { + log.warn("found dylib is incompatible with the required minimum version", .{}); + log.warn(" dylib: {s}", .{id.name}); + log.warn(" required minimum version: {}", .{id.compatibility_version}); + log.warn(" dylib version: {}", .{dylib.id.?.current_version}); + + // TODO maybe this should be an error and facilitate auto-cleanup? + dylib.deinit(gpa); + return false; + } + } + + try self.dylibs.append(gpa, dylib); + try self.dylibs_map.putNoClobber(gpa, dylib.id.?.name, dylib_id); + + const should_link_dylib_even_if_unreachable = blk: { + if (self.options.dead_strip_dylibs and !opts.needed) break :blk false; + break :blk !(opts.dependent or self.referenced_dylibs.contains(dylib_id)); + }; + + if (should_link_dylib_even_if_unreachable) { + try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); + } + + return true; + } + + fn parseInputFiles( + self: *Zld, + files: []const []const u8, + syslibroot: ?[]const u8, + dependent_libs: anytype, + ) !void { + for (files) |file_name| { + const full_path = full_path: { + var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; + break :full_path try fs.realpath(file_name, &buffer); + }; + log.debug("parsing input file path '{s}'", .{full_path}); + + if (try self.parseObject(full_path)) continue; + if (try self.parseArchive(full_path, false)) continue; + if (try self.parseDylib(full_path, dependent_libs, .{ + .syslibroot = syslibroot, + })) continue; + + log.debug("unknown filetype for positional input file: '{s}'", .{file_name}); + } + } + + fn parseAndForceLoadStaticArchives(self: *Zld, files: []const []const u8) !void { + for (files) |file_name| { + const full_path = full_path: { + var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; + break :full_path try fs.realpath(file_name, &buffer); + }; + log.debug("parsing and force loading static archive '{s}'", .{full_path}); + + if (try self.parseArchive(full_path, true)) continue; + log.debug("unknown filetype: expected static archive: '{s}'", .{file_name}); + } + } + + fn parseLibs( + self: *Zld, + lib_names: []const []const u8, + lib_infos: []const link.SystemLib, + syslibroot: ?[]const u8, + dependent_libs: anytype, + ) !void { + for (lib_names) |lib, i| { + const lib_info = lib_infos[i]; + log.debug("parsing lib path '{s}'", .{lib}); + if (try self.parseDylib(lib, dependent_libs, .{ + .syslibroot = syslibroot, + .needed = lib_info.needed, + .weak = lib_info.weak, + })) continue; + if (try self.parseArchive(lib, false)) continue; + + log.debug("unknown filetype for a library: '{s}'", .{lib}); + } + } + + fn parseDependentLibs(self: *Zld, syslibroot: ?[]const u8, dependent_libs: anytype) !void { + // At this point, we can now parse dependents of dylibs preserving the inclusion order of: + // 1) anything on the linker line is parsed first + // 2) afterwards, we parse dependents of the included dylibs + // TODO this should not be performed if the user specifies `-flat_namespace` flag. + // See ld64 manpages. + var arena_alloc = std.heap.ArenaAllocator.init(self.gpa); + const arena = arena_alloc.allocator(); + defer arena_alloc.deinit(); + + while (dependent_libs.readItem()) |*dep_id| { + defer dep_id.id.deinit(self.gpa); + + if (self.dylibs_map.contains(dep_id.id.name)) continue; + + const weak = self.dylibs.items[dep_id.parent].weak; + const has_ext = blk: { + const basename = fs.path.basename(dep_id.id.name); + break :blk mem.lastIndexOfScalar(u8, basename, '.') != null; + }; + const extension = if (has_ext) fs.path.extension(dep_id.id.name) else ""; + const without_ext = if (has_ext) blk: { + const index = mem.lastIndexOfScalar(u8, dep_id.id.name, '.') orelse unreachable; + break :blk dep_id.id.name[0..index]; + } else dep_id.id.name; + + for (&[_][]const u8{ extension, ".tbd" }) |ext| { + const with_ext = try std.fmt.allocPrint(arena, "{s}{s}", .{ without_ext, ext }); + const full_path = if (syslibroot) |root| try fs.path.join(arena, &.{ root, with_ext }) else with_ext; + + log.debug("trying dependency at fully resolved path {s}", .{full_path}); + + const did_parse_successfully = try self.parseDylib(full_path, dependent_libs, .{ + .id = dep_id.id, + .syslibroot = syslibroot, + .dependent = true, + .weak = weak, + }); + if (did_parse_successfully) break; + } else { + log.debug("unable to resolve dependency {s}", .{dep_id.id.name}); + } + } + } + + pub fn getOutputSection(self: *Zld, sect: macho.section_64) !?u8 { + const segname = sect.segName(); + const sectname = sect.sectName(); + const res: ?u8 = blk: { + if (mem.eql(u8, "__LLVM", segname)) { + log.debug("TODO LLVM section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + break :blk null; + } + + if (sect.isCode()) { + break :blk self.getSectionByName("__TEXT", "__text") orelse try self.initSection( + "__TEXT", + "__text", + .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }, + ); + } + + if (sect.isDebug()) { + // TODO debug attributes + if (mem.eql(u8, "__LD", segname) and mem.eql(u8, "__compact_unwind", sectname)) { + log.debug("TODO compact unwind section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + } + break :blk null; + } + + switch (sect.@"type"()) { + macho.S_4BYTE_LITERALS, + macho.S_8BYTE_LITERALS, + macho.S_16BYTE_LITERALS, + => { + break :blk self.getSectionByName("__TEXT", "__const") orelse try self.initSection( + "__TEXT", + "__const", + .{}, + ); + }, + macho.S_CSTRING_LITERALS => { + if (mem.startsWith(u8, sectname, "__objc")) { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + .{}, + ); + } + break :blk self.getSectionByName("__TEXT", "__cstring") orelse try self.initSection( + "__TEXT", + "__cstring", + .{ .flags = macho.S_CSTRING_LITERALS }, + ); + }, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => { + break :blk self.getSectionByName("__DATA_CONST", sectname) orelse try self.initSection( + "__DATA_CONST", + sectname, + .{ .flags = sect.flags }, + ); + }, + macho.S_LITERAL_POINTERS, + macho.S_ZEROFILL, + macho.S_THREAD_LOCAL_VARIABLES, + macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + macho.S_THREAD_LOCAL_REGULAR, + macho.S_THREAD_LOCAL_ZEROFILL, + => { + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + .{ .flags = sect.flags }, + ); + }, + macho.S_COALESCED => { + // TODO unwind info + if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { + log.debug("TODO eh frame section: type 0x{x}, name '{s},{s}'", .{ + sect.flags, segname, sectname, + }); + break :blk null; + } + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + .{}, + ); + }, + macho.S_REGULAR => { + if (mem.eql(u8, segname, "__TEXT")) { + if (mem.eql(u8, sectname, "__rodata") or + mem.eql(u8, sectname, "__typelink") or + mem.eql(u8, sectname, "__itablink") or + mem.eql(u8, sectname, "__gosymtab") or + mem.eql(u8, sectname, "__gopclntab")) + { + break :blk self.getSectionByName("__DATA_CONST", "__const") orelse try self.initSection( + "__DATA_CONST", + "__const", + .{}, + ); + } + } + if (mem.eql(u8, segname, "__DATA")) { + if (mem.eql(u8, sectname, "__const") or + mem.eql(u8, sectname, "__cfstring") or + mem.eql(u8, sectname, "__objc_classlist") or + mem.eql(u8, sectname, "__objc_imageinfo")) + { + break :blk self.getSectionByName("__DATA_CONST", sectname) orelse + try self.initSection( + "__DATA_CONST", + sectname, + .{}, + ); + } else if (mem.eql(u8, sectname, "__data")) { + break :blk self.getSectionByName("__DATA", "__data") orelse + try self.initSection( + "__DATA", + "__data", + .{}, + ); + } + } + break :blk self.getSectionByName(segname, sectname) orelse try self.initSection( + segname, + sectname, + .{}, + ); + }, + else => break :blk null, + } + }; + return res; + } + + pub fn addAtomToSection(self: *Zld, atom_index: AtomIndex) void { + const atom = self.getAtomPtr(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + var section = self.sections.get(sym.n_sect - 1); + if (section.header.size > 0) { + const last_atom = self.getAtomPtr(section.last_atom_index); + last_atom.next_index = atom_index; + atom.prev_index = section.last_atom_index; + } else { + section.first_atom_index = atom_index; + } + section.last_atom_index = atom_index; + section.header.size += atom.size; + self.sections.set(sym.n_sect - 1, section); + } + + pub fn createEmptyAtom(self: *Zld, sym_index: u32, size: u64, alignment: u32) !AtomIndex { + const gpa = self.gpa; + const index = @intCast(AtomIndex, self.atoms.items.len); + const atom = try self.atoms.addOne(gpa); + atom.* = Atom.empty; + atom.sym_index = sym_index; + atom.size = size; + atom.alignment = alignment; + + log.debug("creating ATOM(%{d}) at index {d}", .{ sym_index, index }); + + return index; + } + + pub fn createGotAtom(self: *Zld) !AtomIndex { + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = self.getSectionByName("__DATA_CONST", "__got") orelse + try self.initSection("__DATA_CONST", "__got", .{ + .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, + }); + sym.n_sect = sect_id + 1; + + self.addAtomToSection(atom_index); + + return atom_index; + } + + fn writeGotPointer(self: *Zld, got_index: u32, writer: anytype) !void { + const target_addr = blk: { + const entry = self.got_entries.items[got_index]; + const sym = entry.getTargetSymbol(self); + break :blk sym.n_value; + }; + try writer.writeIntLittle(u64, target_addr); + } + + pub fn createTlvPtrAtom(self: *Zld) !AtomIndex { + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = (try self.getOutputSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__thread_ptrs"), + .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, + })).?; + sym.n_sect = sect_id + 1; + + self.addAtomToSection(atom_index); + + return atom_index; + } + + fn createDyldStubBinderGotAtom(self: *Zld) !void { + const sym_index = self.dyld_stub_binder_index orelse return; + const gpa = self.gpa; + const target = SymbolWithLoc{ .sym_index = sym_index }; + const atom_index = try self.createGotAtom(); + const got_index = @intCast(u32, self.got_entries.items.len); + try self.got_entries.append(gpa, .{ + .target = target, + .atom_index = atom_index, + }); + try self.got_table.putNoClobber(gpa, target, got_index); + } + + fn createDyldPrivateAtom(self: *Zld) !void { + if (self.dyld_stub_binder_index == null) return; + + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = self.getSectionByName("__DATA", "__data") orelse try self.initSection("__DATA", "__data", .{}); + sym.n_sect = sect_id + 1; + + self.dyld_private_sym_index = sym_index; + + self.addAtomToSection(atom_index); + } + + fn createStubHelperPreambleAtom(self: *Zld) !void { + if (self.dyld_stub_binder_index == null) return; + + const cpu_arch = self.options.target.cpu.arch; + const size: u64 = switch (cpu_arch) { + .x86_64 => 15, + .aarch64 => 6 * @sizeOf(u32), + else => unreachable, + }; + const alignment: u32 = switch (cpu_arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, + }; + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, size, alignment); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = self.getSectionByName("__TEXT", "__stub_helper") orelse + try self.initSection("__TEXT", "__stub_helper", .{ + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + }); + sym.n_sect = sect_id + 1; + + self.stub_helper_preamble_sym_index = sym_index; + + self.addAtomToSection(atom_index); + } + + fn writeStubHelperPreambleCode(self: *Zld, writer: anytype) !void { + const cpu_arch = self.options.target.cpu.arch; + const source_addr = blk: { + const sym = self.getSymbol(.{ .sym_index = self.stub_helper_preamble_sym_index.? }); + break :blk sym.n_value; + }; + const dyld_private_addr = blk: { + const sym = self.getSymbol(.{ .sym_index = self.dyld_private_sym_index.? }); + break :blk sym.n_value; + }; + const dyld_stub_binder_got_addr = blk: { + const index = self.got_table.get(.{ .sym_index = self.dyld_stub_binder_index.? }).?; + const entry = self.got_entries.items[index]; + break :blk entry.getAtomSymbol(self).n_value; + }; + switch (cpu_arch) { + .x86_64 => { + try writer.writeAll(&.{ 0x4c, 0x8d, 0x1d }); + { + const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 3, dyld_private_addr, 0); + try writer.writeIntLittle(i32, disp); + } + try writer.writeAll(&.{ 0x41, 0x53, 0xff, 0x25 }); + { + const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 11, dyld_stub_binder_got_addr, 0); + try writer.writeIntLittle(i32, disp); + } + }, + .aarch64 => { + { + const pages = Atom.calcNumberOfPages(source_addr, dyld_private_addr); + try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x17, pages).toU32()); + } + { + const off = try Atom.calcPageOffset(dyld_private_addr, .arithmetic); + try writer.writeIntLittle(u32, aarch64.Instruction.add(.x17, .x17, off, false).toU32()); + } + try writer.writeIntLittle(u32, aarch64.Instruction.stp( + .x16, + .x17, + aarch64.Register.sp, + aarch64.Instruction.LoadStorePairOffset.pre_index(-16), + ).toU32()); + { + const pages = Atom.calcNumberOfPages(source_addr + 12, dyld_stub_binder_got_addr); + try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32()); + } + { + const off = try Atom.calcPageOffset(dyld_stub_binder_got_addr, .load_store_64); + try writer.writeIntLittle(u32, aarch64.Instruction.ldr( + .x16, + .x16, + aarch64.Instruction.LoadStoreOffset.imm(off), + ).toU32()); + } + try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32()); + }, + else => unreachable, + } + } + + pub fn createStubHelperAtom(self: *Zld) !AtomIndex { + const cpu_arch = self.options.target.cpu.arch; + const stub_size: u4 = switch (cpu_arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + const alignment: u2 = switch (cpu_arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, + }; + + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, stub_size, alignment); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_sect = macho.N_SECT; + + const sect_id = self.getSectionByName("__TEXT", "__stub_helper").?; + sym.n_sect = sect_id + 1; + + self.addAtomToSection(atom_index); + + return atom_index; + } + + fn writeStubHelperCode(self: *Zld, atom_index: AtomIndex, writer: anytype) !void { + const cpu_arch = self.options.target.cpu.arch; + const source_addr = blk: { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + break :blk sym.n_value; + }; + const target_addr = blk: { + const sym = self.getSymbol(.{ .sym_index = self.stub_helper_preamble_sym_index.? }); + break :blk sym.n_value; + }; + switch (cpu_arch) { + .x86_64 => { + try writer.writeAll(&.{ 0x68, 0x0, 0x0, 0x0, 0x0, 0xe9 }); + { + const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 6, target_addr, 0); + try writer.writeIntLittle(i32, disp); + } + }, + .aarch64 => { + const stub_size: u4 = 3 * @sizeOf(u32); + const literal = blk: { + const div_res = try math.divExact(u64, stub_size - @sizeOf(u32), 4); + break :blk math.cast(u18, div_res) orelse return error.Overflow; + }; + try writer.writeIntLittle(u32, aarch64.Instruction.ldrLiteral( + .w16, + literal, + ).toU32()); + { + const disp = try Atom.calcPcRelativeDisplacementArm64(source_addr + 4, target_addr); + try writer.writeIntLittle(u32, aarch64.Instruction.b(disp).toU32()); + } + try writer.writeAll(&.{ 0x0, 0x0, 0x0, 0x0 }); + }, + else => unreachable, + } + } + + pub fn createLazyPointerAtom(self: *Zld) !AtomIndex { + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, @sizeOf(u64), 3); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = self.getSectionByName("__DATA", "__la_symbol_ptr") orelse + try self.initSection("__DATA", "__la_symbol_ptr", .{ + .flags = macho.S_LAZY_SYMBOL_POINTERS, + }); + sym.n_sect = sect_id + 1; + + self.addAtomToSection(atom_index); + + return atom_index; + } + + fn writeLazyPointer(self: *Zld, stub_helper_index: u32, writer: anytype) !void { + const target_addr = blk: { + const sect_id = self.getSectionByName("__TEXT", "__stub_helper").?; + var atom_index = self.sections.items(.first_atom_index)[sect_id]; + var count: u32 = 0; + while (count < stub_helper_index + 1) : (count += 1) { + const atom = self.getAtom(atom_index); + if (atom.next_index) |next_index| { + atom_index = next_index; + } + } + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + break :blk sym.n_value; + }; + try writer.writeIntLittle(u64, target_addr); + } + + pub fn createStubAtom(self: *Zld) !AtomIndex { + const cpu_arch = self.options.target.cpu.arch; + const alignment: u2 = switch (cpu_arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_size: u4 = switch (cpu_arch) { + .x86_64 => 6, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, // unhandled architecture type + }; + const sym_index = try self.allocateSymbol(); + const atom_index = try self.createEmptyAtom(sym_index, stub_size, alignment); + const sym = self.getSymbolPtr(.{ .sym_index = sym_index }); + sym.n_type = macho.N_SECT; + + const sect_id = self.getSectionByName("__TEXT", "__stubs") orelse + try self.initSection("__TEXT", "__stubs", .{ + .flags = macho.S_SYMBOL_STUBS | + macho.S_ATTR_PURE_INSTRUCTIONS | + macho.S_ATTR_SOME_INSTRUCTIONS, + .reserved2 = stub_size, + }); + sym.n_sect = sect_id + 1; + + self.addAtomToSection(atom_index); + + return atom_index; + } + + fn writeStubCode(self: *Zld, atom_index: AtomIndex, stub_index: u32, writer: anytype) !void { + const cpu_arch = self.options.target.cpu.arch; + const source_addr = blk: { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + break :blk sym.n_value; + }; + const target_addr = blk: { + // TODO: cache this at stub atom creation; they always go in pairs anyhow + const la_sect_id = self.getSectionByName("__DATA", "__la_symbol_ptr").?; + var la_atom_index = self.sections.items(.first_atom_index)[la_sect_id]; + var count: u32 = 0; + while (count < stub_index) : (count += 1) { + const la_atom = self.getAtom(la_atom_index); + la_atom_index = la_atom.next_index.?; + } + const atom = self.getAtom(la_atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + break :blk sym.n_value; + }; + switch (cpu_arch) { + .x86_64 => { + try writer.writeAll(&.{ 0xff, 0x25 }); + { + const disp = try Atom.calcPcRelativeDisplacementX86(source_addr + 2, target_addr, 0); + try writer.writeIntLittle(i32, disp); + } + }, + .aarch64 => { + { + const pages = Atom.calcNumberOfPages(source_addr, target_addr); + try writer.writeIntLittle(u32, aarch64.Instruction.adrp(.x16, pages).toU32()); + } + { + const off = try Atom.calcPageOffset(target_addr, .load_store_64); + try writer.writeIntLittle(u32, aarch64.Instruction.ldr( + .x16, + .x16, + aarch64.Instruction.LoadStoreOffset.imm(off), + ).toU32()); + } + try writer.writeIntLittle(u32, aarch64.Instruction.br(.x16).toU32()); + }, + else => unreachable, + } + } + + fn createTentativeDefAtoms(self: *Zld) !void { + const gpa = self.gpa; + + for (self.globals.items) |global| { + const sym = self.getSymbolPtr(global); + if (!sym.tentative()) continue; + if (sym.n_desc == N_DEAD) continue; + + log.debug("creating tentative definition for ATOM(%{d}, '{s}') in object({?})", .{ + global.sym_index, self.getSymbolName(global), global.file, + }); + + // Convert any tentative definition into a regular symbol and allocate + // text blocks for each tentative definition. + const size = sym.n_value; + const alignment = (sym.n_desc >> 8) & 0x0f; + const n_sect = (try self.getOutputSection(.{ + .segname = makeStaticString("__DATA"), + .sectname = makeStaticString("__bss"), + .flags = macho.S_ZEROFILL, + })).? + 1; + + sym.* = .{ + .n_strx = sym.n_strx, + .n_type = macho.N_SECT | macho.N_EXT, + .n_sect = n_sect, + .n_desc = 0, + .n_value = 0, + }; + + const atom_index = try self.createEmptyAtom(global.sym_index, size, alignment); + const atom = self.getAtomPtr(atom_index); + atom.file = global.file; + + self.addAtomToSection(atom_index); + + assert(global.getFile() != null); + const object = &self.objects.items[global.getFile().?]; + try object.atoms.append(gpa, atom_index); + object.atom_by_index_table[global.sym_index] = atom_index; + } + } + + fn resolveSymbolsInObject(self: *Zld, object_id: u16, resolver: *SymbolResolver) !void { + const object = &self.objects.items[object_id]; + const in_symtab = object.in_symtab orelse return; + + log.debug("resolving symbols in '{s}'", .{object.name}); + + var sym_index: u32 = 0; + while (sym_index < in_symtab.len) : (sym_index += 1) { + const sym = &object.symtab[sym_index]; + const sym_name = object.getSymbolName(sym_index); + + if (sym.stab()) { + log.err("unhandled symbol type: stab", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name}); + return error.UnhandledSymbolType; + } + + if (sym.indr()) { + log.err("unhandled symbol type: indirect", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name}); + return error.UnhandledSymbolType; + } + + if (sym.abs()) { + log.err("unhandled symbol type: absolute", .{}); + log.err(" symbol '{s}'", .{sym_name}); + log.err(" first definition in '{s}'", .{object.name}); + return error.UnhandledSymbolType; + } + + if (sym.sect() and !sym.ext()) { + log.debug("symbol '{s}' local to object {s}; skipping...", .{ + sym_name, + object.name, + }); + continue; + } + + const sym_loc = SymbolWithLoc{ .sym_index = sym_index, .file = object_id }; + + const global_index = resolver.table.get(sym_name) orelse { + const gpa = self.gpa; + const name = try resolver.arena.dupe(u8, sym_name); + const global_index = @intCast(u32, self.globals.items.len); + try self.globals.append(gpa, sym_loc); + try resolver.table.putNoClobber(name, global_index); + if (sym.undf() and !sym.tentative()) { + try resolver.unresolved.putNoClobber(global_index, {}); + } + continue; + }; + const global = &self.globals.items[global_index]; + const global_sym = self.getSymbol(global.*); + + // Cases to consider: sym vs global_sym + // 1. strong(sym) and strong(global_sym) => error + // 2. strong(sym) and weak(global_sym) => sym + // 3. strong(sym) and tentative(global_sym) => sym + // 4. strong(sym) and undf(global_sym) => sym + // 5. weak(sym) and strong(global_sym) => global_sym + // 6. weak(sym) and tentative(global_sym) => sym + // 7. weak(sym) and undf(global_sym) => sym + // 8. tentative(sym) and strong(global_sym) => global_sym + // 9. tentative(sym) and weak(global_sym) => global_sym + // 10. tentative(sym) and tentative(global_sym) => pick larger + // 11. tentative(sym) and undf(global_sym) => sym + // 12. undf(sym) and * => global_sym + // + // Reduces to: + // 1. strong(sym) and strong(global_sym) => error + // 2. * and strong(global_sym) => global_sym + // 3. weak(sym) and weak(global_sym) => global_sym + // 4. tentative(sym) and tentative(global_sym) => pick larger + // 5. undf(sym) and * => global_sym + // 6. else => sym + + const sym_is_strong = sym.sect() and !(sym.weakDef() or sym.pext()); + const global_is_strong = global_sym.sect() and !(global_sym.weakDef() or global_sym.pext()); + const sym_is_weak = sym.sect() and (sym.weakDef() or sym.pext()); + const global_is_weak = global_sym.sect() and (global_sym.weakDef() or global_sym.pext()); + + if (sym_is_strong and global_is_strong) { + log.err("symbol '{s}' defined multiple times", .{sym_name}); + if (global.getFile()) |file| { + log.err(" first definition in '{s}'", .{self.objects.items[file].name}); + } + log.err(" next definition in '{s}'", .{self.objects.items[object_id].name}); + return error.MultipleSymbolDefinitions; + } + + const update_global = blk: { + if (global_is_strong) break :blk false; + if (sym_is_weak and global_is_weak) break :blk false; + if (sym.tentative() and global_sym.tentative()) { + if (global_sym.n_value >= sym.n_value) break :blk false; + } + if (sym.undf() and !sym.tentative()) break :blk false; + break :blk true; + }; + + if (update_global) { + const global_object = &self.objects.items[global.getFile().?]; + global_object.globals_lookup[global.sym_index] = global_index; + _ = resolver.unresolved.swapRemove(resolver.table.get(sym_name).?); + global.* = sym_loc; + } else { + object.globals_lookup[sym_index] = global_index; + } + } + } + + fn resolveSymbolsInArchives(self: *Zld, resolver: *SymbolResolver) !void { + if (self.archives.items.len == 0) return; + + const gpa = self.gpa; + const cpu_arch = self.options.target.cpu.arch; + var next_sym: usize = 0; + loop: while (next_sym < resolver.unresolved.count()) { + const global = self.globals.items[resolver.unresolved.keys()[next_sym]]; + const sym_name = self.getSymbolName(global); + + for (self.archives.items) |archive| { + // Check if the entry exists in a static archive. + const offsets = archive.toc.get(sym_name) orelse { + // No hit. + continue; + }; + assert(offsets.items.len > 0); + + const object_id = @intCast(u16, self.objects.items.len); + const object = try archive.parseObject(gpa, cpu_arch, offsets.items[0]); + try self.objects.append(gpa, object); + try self.resolveSymbolsInObject(object_id, resolver); + + continue :loop; + } + + next_sym += 1; + } + } + + fn resolveSymbolsInDylibs(self: *Zld, resolver: *SymbolResolver) !void { + if (self.dylibs.items.len == 0) return; + + var next_sym: usize = 0; + loop: while (next_sym < resolver.unresolved.count()) { + const global_index = resolver.unresolved.keys()[next_sym]; + const global = self.globals.items[global_index]; + const sym = self.getSymbolPtr(global); + const sym_name = self.getSymbolName(global); + + for (self.dylibs.items) |dylib, id| { + if (!dylib.symbols.contains(sym_name)) continue; + + const dylib_id = @intCast(u16, id); + if (!self.referenced_dylibs.contains(dylib_id)) { + try self.referenced_dylibs.putNoClobber(self.gpa, dylib_id, {}); + } + + const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; + sym.n_type |= macho.N_EXT; + sym.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; + + if (dylib.weak) { + sym.n_desc |= macho.N_WEAK_REF; + } + + assert(resolver.unresolved.swapRemove(global_index)); + continue :loop; + } + + next_sym += 1; + } + } + + fn resolveSymbolsAtLoading(self: *Zld, resolver: *SymbolResolver) !void { + const is_lib = self.options.output_mode == .Lib; + const is_dyn_lib = self.options.link_mode == .Dynamic and is_lib; + const allow_undef = is_dyn_lib and (self.options.allow_shlib_undefined orelse false); + + var next_sym: usize = 0; + while (next_sym < resolver.unresolved.count()) { + const global_index = resolver.unresolved.keys()[next_sym]; + const global = self.globals.items[global_index]; + const sym = self.getSymbolPtr(global); + const sym_name = self.getSymbolName(global); + + if (sym.discarded()) { + sym.* = .{ + .n_strx = 0, + .n_type = macho.N_UNDF, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + _ = resolver.unresolved.swapRemove(global_index); + continue; + } else if (allow_undef) { + const n_desc = @bitCast( + u16, + macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP * @intCast(i16, macho.N_SYMBOL_RESOLVER), + ); + sym.n_type = macho.N_EXT; + sym.n_desc = n_desc; + _ = resolver.unresolved.swapRemove(global_index); + continue; + } + + log.err("undefined reference to symbol '{s}'", .{sym_name}); + if (global.getFile()) |file| { + log.err(" first referenced in '{s}'", .{self.objects.items[file].name}); + } + + next_sym += 1; + } + } + + fn createMhExecuteHeaderSymbol(self: *Zld, resolver: *SymbolResolver) !void { + if (self.options.output_mode != .Exe) return; + if (resolver.table.get("__mh_execute_header")) |global_index| { + const global = self.globals.items[global_index]; + const sym = self.getSymbol(global); + self.mh_execute_header_index = global_index; + if (!sym.undf() and !(sym.pext() or sym.weakDef())) return; + } + + const gpa = self.gpa; + const sym_index = try self.allocateSymbol(); + const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; + const sym = self.getSymbolPtr(sym_loc); + sym.n_strx = try self.strtab.insert(gpa, "__mh_execute_header"); + sym.n_type = macho.N_SECT | macho.N_EXT; + sym.n_desc = macho.REFERENCED_DYNAMICALLY; + + if (resolver.table.get("__mh_execute_header")) |global_index| { + const global = &self.globals.items[global_index]; + const global_object = &self.objects.items[global.getFile().?]; + global_object.globals_lookup[global.sym_index] = global_index; + global.* = sym_loc; + self.mh_execute_header_index = global_index; + } else { + const global_index = @intCast(u32, self.globals.items.len); + try self.globals.append(gpa, sym_loc); + self.mh_execute_header_index = global_index; + } + } + + fn createDsoHandleSymbol(self: *Zld, resolver: *SymbolResolver) !void { + const global_index = resolver.table.get("___dso_handle") orelse return; + const global = &self.globals.items[global_index]; + self.dso_handle_index = global_index; + if (!self.getSymbol(global.*).undf()) return; + + const gpa = self.gpa; + const sym_index = try self.allocateSymbol(); + const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; + const sym = self.getSymbolPtr(sym_loc); + sym.n_strx = try self.strtab.insert(gpa, "___dso_handle"); + sym.n_type = macho.N_SECT | macho.N_EXT; + sym.n_desc = macho.N_WEAK_DEF; + + const global_object = &self.objects.items[global.getFile().?]; + global_object.globals_lookup[global.sym_index] = global_index; + _ = resolver.unresolved.swapRemove(resolver.table.get("___dso_handle").?); + global.* = sym_loc; + } + + fn resolveDyldStubBinder(self: *Zld, resolver: *SymbolResolver) !void { + if (self.dyld_stub_binder_index != null) return; + if (resolver.unresolved.count() == 0) return; // no need for a stub binder if we don't have any imports + + const gpa = self.gpa; + const sym_name = "dyld_stub_binder"; + const sym_index = try self.allocateSymbol(); + const sym_loc = SymbolWithLoc{ .sym_index = sym_index }; + const sym = self.getSymbolPtr(sym_loc); + sym.n_strx = try self.strtab.insert(gpa, sym_name); + sym.n_type = macho.N_UNDF; + + const global = SymbolWithLoc{ .sym_index = sym_index }; + try self.globals.append(gpa, global); + + for (self.dylibs.items) |dylib, id| { + if (!dylib.symbols.contains(sym_name)) continue; + + const dylib_id = @intCast(u16, id); + if (!self.referenced_dylibs.contains(dylib_id)) { + try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); + } + + const ordinal = self.referenced_dylibs.getIndex(dylib_id) orelse unreachable; + sym.n_type |= macho.N_EXT; + sym.n_desc = @intCast(u16, ordinal + 1) * macho.N_SYMBOL_RESOLVER; + self.dyld_stub_binder_index = sym_index; + + break; + } + + if (self.dyld_stub_binder_index == null) { + log.err("undefined reference to symbol '{s}'", .{sym_name}); + return error.UndefinedSymbolReference; + } + } + + fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { + const name_len = mem.sliceTo(MachO.default_dyld_path, 0).len; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylinker_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylinker_command{ + .cmd = .LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), + }); + try lc_writer.writeAll(mem.sliceTo(MachO.default_dyld_path, 0)); + const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; + } + + fn writeMainLC(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { + if (self.options.output_mode != .Exe) return; + const seg_id = self.getSegmentByName("__TEXT").?; + const seg = self.segments.items[seg_id]; + const global = self.getEntryPoint(); + const sym = self.getSymbol(global); + try lc_writer.writeStruct(macho.entry_point_command{ + .cmd = .MAIN, + .cmdsize = @sizeOf(macho.entry_point_command), + .entryoff = @intCast(u32, sym.n_value - seg.vmaddr), + .stacksize = self.options.stack_size_override orelse 0, + }); + ncmds.* += 1; + } + + const WriteDylibLCCtx = struct { + cmd: macho.LC, + name: []const u8, + timestamp: u32 = 2, + current_version: u32 = 0x10000, + compatibility_version: u32 = 0x10000, + }; + + fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { + const name_len = ctx.name.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylib_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylib_command{ + .cmd = ctx.cmd, + .cmdsize = cmdsize, + .dylib = .{ + .name = @sizeOf(macho.dylib_command), + .timestamp = ctx.timestamp, + .current_version = ctx.current_version, + .compatibility_version = ctx.compatibility_version, + }, + }); + try lc_writer.writeAll(ctx.name); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; + } + + fn writeDylibIdLC(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { + if (self.options.output_mode != .Lib) return; + const install_name = self.options.install_name orelse self.options.emit.?.sub_path; + const curr = self.options.version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + const compat = self.options.compatibility_version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + try writeDylibLC(.{ + .cmd = .ID_DYLIB, + .name = install_name, + .current_version = curr.major << 16 | curr.minor << 8 | curr.patch, + .compatibility_version = compat.major << 16 | compat.minor << 8 | compat.patch, + }, ncmds, lc_writer); + } + + const RpathIterator = struct { + buffer: []const []const u8, + table: std.StringHashMap(void), + count: usize = 0, + + fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator { + return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) }; + } + + fn deinit(it: *RpathIterator) void { + it.table.deinit(); + } + + fn next(it: *RpathIterator) !?[]const u8 { + while (true) { + if (it.count >= it.buffer.len) return null; + const rpath = it.buffer[it.count]; + it.count += 1; + const gop = try it.table.getOrPut(rpath); + if (gop.found_existing) continue; + return rpath; + } + } + }; + + fn writeRpathLCs(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { + const gpa = self.gpa; + + var it = RpathIterator.init(gpa, self.options.rpath_list); + defer it.deinit(); + + while (try it.next()) |rpath| { + const rpath_len = rpath.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.rpath_command{ + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + try lc_writer.writeAll(rpath); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; + } + } + + fn writeBuildVersionLC(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { + const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + const platform_version = blk: { + const ver = self.options.target.os.version_range.semver.min; + const platform_version = ver.major << 16 | ver.minor << 8; + break :blk platform_version; + }; + const sdk_version = if (self.options.native_darwin_sdk) |sdk| blk: { + const ver = sdk.version; + const sdk_version = ver.major << 16 | ver.minor << 8; + break :blk sdk_version; + } else platform_version; + const is_simulator_abi = self.options.target.abi == .simulator; + try lc_writer.writeStruct(macho.build_version_command{ + .cmdsize = cmdsize, + .platform = switch (self.options.target.os.tag) { + .macos => .MACOS, + .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, + .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, + .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, + else => unreachable, + }, + .minos = platform_version, + .sdk = sdk_version, + .ntools = 1, + }); + try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ + .tool = .LD, + .version = 0x0, + })); + ncmds.* += 1; + } + + fn writeLoadDylibLCs(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { + for (self.referenced_dylibs.keys()) |id| { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + try writeDylibLC(.{ + .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, + .name = dylib_id.name, + .timestamp = dylib_id.timestamp, + .current_version = dylib_id.current_version, + .compatibility_version = dylib_id.compatibility_version, + }, ncmds, lc_writer); + } + } + + pub fn deinit(self: *Zld) void { + const gpa = self.gpa; + + self.tlv_ptr_entries.deinit(gpa); + self.tlv_ptr_table.deinit(gpa); + self.got_entries.deinit(gpa); + self.got_table.deinit(gpa); + self.stubs.deinit(gpa); + self.stubs_table.deinit(gpa); + self.thunk_table.deinit(gpa); + + for (self.thunks.items) |*thunk| { + thunk.deinit(gpa); + } + self.thunks.deinit(gpa); + + self.strtab.deinit(gpa); + self.locals.deinit(gpa); + self.globals.deinit(gpa); + + for (self.objects.items) |*object| { + object.deinit(gpa); + } + self.objects.deinit(gpa); + for (self.archives.items) |*archive| { + archive.deinit(gpa); + } + self.archives.deinit(gpa); + for (self.dylibs.items) |*dylib| { + dylib.deinit(gpa); + } + self.dylibs.deinit(gpa); + self.dylibs_map.deinit(gpa); + self.referenced_dylibs.deinit(gpa); + + self.segments.deinit(gpa); + self.sections.deinit(gpa); + self.atoms.deinit(gpa); + } + + fn createSegments(self: *Zld) !void { + const pagezero_vmsize = self.options.pagezero_size orelse MachO.default_pagezero_vmsize; + const aligned_pagezero_vmsize = mem.alignBackwardGeneric(u64, pagezero_vmsize, self.page_size); + if (self.options.output_mode != .Lib and aligned_pagezero_vmsize > 0) { + if (aligned_pagezero_vmsize != pagezero_vmsize) { + log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); + log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); + } + try self.segments.append(self.gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__PAGEZERO"), + .vmsize = aligned_pagezero_vmsize, + }); + } + + // __TEXT segment is non-optional + { + const protection = getSegmentMemoryProtection("__TEXT"); + try self.segments.append(self.gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__TEXT"), + .maxprot = protection, + .initprot = protection, + }); + } + + for (self.sections.items(.header)) |header, sect_id| { + if (header.size == 0) continue; // empty section + + const segname = header.segName(); + const segment_id = self.getSegmentByName(segname) orelse blk: { + log.debug("creating segment '{s}'", .{segname}); + const segment_id = @intCast(u8, self.segments.items.len); + const protection = getSegmentMemoryProtection(segname); + try self.segments.append(self.gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString(segname), + .maxprot = protection, + .initprot = protection, + }); + break :blk segment_id; + }; + const segment = &self.segments.items[segment_id]; + segment.cmdsize += @sizeOf(macho.section_64); + segment.nsects += 1; + self.sections.items(.segment_index)[sect_id] = segment_id; + } + + // __LINKEDIT always comes last + { + const protection = getSegmentMemoryProtection("__LINKEDIT"); + try self.segments.append(self.gpa, .{ + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__LINKEDIT"), + .maxprot = protection, + .initprot = protection, + }); + } + } + + inline fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool) u64 { + const name_len = if (assume_max_path_len) std.os.PATH_MAX else std.mem.len(name) + 1; + return mem.alignForwardGeneric(u64, cmd_size + name_len, @alignOf(u64)); + } + + fn calcLCsSize(self: *Zld, assume_max_path_len: bool) !u32 { + const gpa = self.gpa; + + var sizeofcmds: u64 = 0; + for (self.segments.items) |seg| { + sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64); + } + + // LC_DYLD_INFO_ONLY + sizeofcmds += @sizeOf(macho.dyld_info_command); + // LC_FUNCTION_STARTS + if (self.getSectionByName("__TEXT", "__text")) |_| { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + // LC_DATA_IN_CODE + sizeofcmds += @sizeOf(macho.linkedit_data_command); + // LC_SYMTAB + sizeofcmds += @sizeOf(macho.symtab_command); + // LC_DYSYMTAB + sizeofcmds += @sizeOf(macho.dysymtab_command); + // LC_LOAD_DYLINKER + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylinker_command), + mem.sliceTo(MachO.default_dyld_path, 0), + false, + ); + // LC_MAIN + if (self.options.output_mode == .Exe) { + sizeofcmds += @sizeOf(macho.entry_point_command); + } + // LC_ID_DYLIB + if (self.options.output_mode == .Lib) { + sizeofcmds += blk: { + const install_name = self.options.install_name orelse self.options.emit.?.sub_path; + break :blk calcInstallNameLen( + @sizeOf(macho.dylib_command), + install_name, + assume_max_path_len, + ); + }; + } + // LC_RPATH + { + var it = RpathIterator.init(gpa, self.options.rpath_list); + defer it.deinit(); + while (try it.next()) |rpath| { + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.rpath_command), + rpath, + assume_max_path_len, + ); + } + } + // LC_SOURCE_VERSION + sizeofcmds += @sizeOf(macho.source_version_command); + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + // LC_UUID + sizeofcmds += @sizeOf(macho.uuid_command); + // LC_LOAD_DYLIB + for (self.referenced_dylibs.keys()) |id| { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylib_command), + dylib_id.name, + assume_max_path_len, + ); + } + // LC_CODE_SIGNATURE + { + const target = self.options.target; + const requires_codesig = blk: { + if (self.options.entitlements) |_| break :blk true; + if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) + break :blk true; + break :blk false; + }; + if (requires_codesig) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + } + + return @intCast(u32, sizeofcmds); + } + + fn calcMinHeaderPad(self: *Zld) !u64 { + var padding: u32 = (try self.calcLCsSize(false)) + (self.options.headerpad_size orelse 0); + log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); + + if (self.options.headerpad_max_install_names) { + var min_headerpad_size: u32 = try self.calcLCsSize(true); + log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ + min_headerpad_size + @sizeOf(macho.mach_header_64), + }); + padding = @max(padding, min_headerpad_size); + } + + const offset = @sizeOf(macho.mach_header_64) + padding; + log.debug("actual headerpad size 0x{x}", .{offset}); + + return offset; + } + + pub fn allocateSymbol(self: *Zld) !u32 { + try self.locals.ensureUnusedCapacity(self.gpa, 1); + log.debug(" (allocating symbol index {d})", .{self.locals.items.len}); + const index = @intCast(u32, self.locals.items.len); + _ = self.locals.addOneAssumeCapacity(); + self.locals.items[index] = .{ + .n_strx = 0, + .n_type = 0, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }; + return index; + } + + fn allocateSpecialSymbols(self: *Zld) !void { + for (&[_]?u32{ + self.dso_handle_index, + self.mh_execute_header_index, + }) |maybe_index| { + const global_index = maybe_index orelse continue; + const global = self.globals.items[global_index]; + if (global.getFile() != null) continue; + const name = self.getSymbolName(global); + const sym = self.getSymbolPtr(global); + const segment_index = self.getSegmentByName("__TEXT").?; + const seg = self.segments.items[segment_index]; + sym.n_sect = 1; + sym.n_value = seg.vmaddr; + log.debug("allocating {s} at the start of {s}", .{ + name, + seg.segName(), + }); + } + } + + fn writeAtoms(self: *Zld, reverse_lookups: [][]u32) !void { + const gpa = self.gpa; + const slice = self.sections.slice(); + + for (slice.items(.first_atom_index)) |first_atom_index, sect_id| { + const header = slice.items(.header)[sect_id]; + var atom_index = first_atom_index; + + if (header.isZerofill()) continue; + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacity(math.cast(usize, header.size) orelse return error.Overflow); + + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); + + var count: u32 = 0; + while (true) : (count += 1) { + const atom = self.getAtom(atom_index); + const this_sym = self.getSymbol(atom.getSymbolWithLoc()); + const padding_size: usize = if (atom.next_index) |next_index| blk: { + const next_sym = self.getSymbol(self.getAtom(next_index).getSymbolWithLoc()); + const size = next_sym.n_value - (this_sym.n_value + atom.size); + break :blk math.cast(usize, size) orelse return error.Overflow; + } else 0; + + log.debug(" (adding ATOM(%{d}, '{s}') from object({?}) to buffer)", .{ + atom.sym_index, + self.getSymbolName(atom.getSymbolWithLoc()), + atom.file, + }); + if (padding_size > 0) { + log.debug(" (with padding {x})", .{padding_size}); + } + + const offset = buffer.items.len; + + // TODO: move writing synthetic sections into a separate function + if (atom.getFile() == null) outer: { + if (self.dyld_private_sym_index) |sym_index| { + if (atom.sym_index == sym_index) { + buffer.appendSliceAssumeCapacity(&[_]u8{0} ** @sizeOf(u64)); + break :outer; + } + } + switch (header.@"type"()) { + macho.S_NON_LAZY_SYMBOL_POINTERS => { + try self.writeGotPointer(count, buffer.writer()); + }, + macho.S_LAZY_SYMBOL_POINTERS => { + try self.writeLazyPointer(count, buffer.writer()); + }, + macho.S_THREAD_LOCAL_VARIABLE_POINTERS => { + buffer.appendSliceAssumeCapacity(&[_]u8{0} ** @sizeOf(u64)); + }, + else => { + if (self.stub_helper_preamble_sym_index) |sym_index| { + if (sym_index == atom.sym_index) { + try self.writeStubHelperPreambleCode(buffer.writer()); + break :outer; + } + } + if (header.@"type"() == macho.S_SYMBOL_STUBS) { + try self.writeStubCode(atom_index, count, buffer.writer()); + } else if (mem.eql(u8, header.sectName(), "__stub_helper")) { + try self.writeStubHelperCode(atom_index, buffer.writer()); + } else if (header.isCode()) { + // A thunk + try thunks.writeThunkCode(self, atom_index, buffer.writer()); + } else unreachable; + }, + } + } else { + const code = Atom.getAtomCode(self, atom_index); + const relocs = Atom.getAtomRelocs(self, atom_index); + const size = math.cast(usize, atom.size) orelse return error.Overflow; + buffer.appendSliceAssumeCapacity(code); + try Atom.resolveRelocs( + self, + atom_index, + buffer.items[offset..][0..size], + relocs, + reverse_lookups[atom.getFile().?], + ); + } + + var i: usize = 0; + while (i < padding_size) : (i += 1) { + // TODO with NOPs + buffer.appendAssumeCapacity(0); + } + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else { + assert(buffer.items.len == header.size); + log.debug(" (writing at file offset 0x{x})", .{header.offset}); + try self.file.pwriteAll(buffer.items, header.offset); + break; + } + } + } + } + + fn pruneAndSortSections(self: *Zld) !void { + const gpa = self.gpa; + + const SortSection = struct { + pub fn lessThan(_: void, lhs: Section, rhs: Section) bool { + return getSectionPrecedence(lhs.header) < getSectionPrecedence(rhs.header); + } + }; + + const slice = self.sections.slice(); + var sections = std.ArrayList(Section).init(gpa); + defer sections.deinit(); + try sections.ensureTotalCapacity(slice.len); + + { + var i: u8 = 0; + while (i < slice.len) : (i += 1) { + const section = self.sections.get(i); + if (section.header.size == 0) { + log.debug("pruning section {s},{s}", .{ + section.header.segName(), + section.header.sectName(), + }); + continue; + } + sections.appendAssumeCapacity(section); + } + } + + std.sort.sort(Section, sections.items, {}, SortSection.lessThan); + + self.sections.shrinkRetainingCapacity(0); + for (sections.items) |out| { + self.sections.appendAssumeCapacity(out); + } + } + + fn calcSectionSizes(self: *Zld, reverse_lookups: [][]u32) !void { + const slice = self.sections.slice(); + for (slice.items(.header)) |*header, sect_id| { + if (header.size == 0) continue; + if (self.requiresThunks()) { + if (header.isCode() and !(header.@"type"() == macho.S_SYMBOL_STUBS) and !mem.eql(u8, header.sectName(), "__stub_helper")) continue; + } + + var atom_index = slice.items(.first_atom_index)[sect_id]; + header.size = 0; + header.@"align" = 0; + + while (true) { + const atom = self.getAtom(atom_index); + const atom_alignment = try math.powi(u32, 2, atom.alignment); + const atom_offset = mem.alignForwardGeneric(u64, header.size, atom_alignment); + const padding = atom_offset - header.size; + + const sym = self.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value = atom_offset; + + header.size += padding + atom.size; + header.@"align" = @max(header.@"align", atom.alignment); + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + if (self.requiresThunks()) { + for (slice.items(.header)) |header, sect_id| { + if (!header.isCode()) continue; + if (header.@"type"() == macho.S_SYMBOL_STUBS) continue; + if (mem.eql(u8, header.sectName(), "__stub_helper")) continue; + + // Create jump/branch range extenders if needed. + try thunks.createThunks(self, @intCast(u8, sect_id), reverse_lookups); + } + } + } + + fn allocateSegments(self: *Zld) !void { + for (self.segments.items) |*segment, segment_index| { + const is_text_segment = mem.eql(u8, segment.segName(), "__TEXT"); + const base_size = if (is_text_segment) try self.calcMinHeaderPad() else 0; + try self.allocateSegment(@intCast(u8, segment_index), base_size); + } + } + + fn getSegmentAllocBase(self: Zld, segment_index: u8) struct { vmaddr: u64, fileoff: u64 } { + if (segment_index > 0) { + const prev_segment = self.segments.items[segment_index - 1]; + return .{ + .vmaddr = prev_segment.vmaddr + prev_segment.vmsize, + .fileoff = prev_segment.fileoff + prev_segment.filesize, + }; + } + return .{ .vmaddr = 0, .fileoff = 0 }; + } + + fn allocateSegment(self: *Zld, segment_index: u8, init_size: u64) !void { + const segment = &self.segments.items[segment_index]; + + if (mem.eql(u8, segment.segName(), "__PAGEZERO")) return; // allocated upon creation + + const base = self.getSegmentAllocBase(segment_index); + segment.vmaddr = base.vmaddr; + segment.fileoff = base.fileoff; + segment.filesize = init_size; + segment.vmsize = init_size; + + // Allocate the sections according to their alignment at the beginning of the segment. + const indexes = self.getSectionIndexes(segment_index); + var start = init_size; + + const slice = self.sections.slice(); + for (slice.items(.header)[indexes.start..indexes.end]) |*header, sect_id| { + var atom_index = slice.items(.first_atom_index)[indexes.start + sect_id]; + + const alignment = try math.powi(u32, 2, header.@"align"); + const start_aligned = mem.alignForwardGeneric(u64, start, alignment); + const n_sect = @intCast(u8, indexes.start + sect_id + 1); + + header.offset = if (header.isZerofill()) + 0 + else + @intCast(u32, segment.fileoff + start_aligned); + header.addr = segment.vmaddr + start_aligned; + + log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ + n_sect, + header.segName(), + header.sectName(), + }); + + while (true) { + const atom = self.getAtom(atom_index); + const sym = self.getSymbolPtr(atom.getSymbolWithLoc()); + sym.n_value += header.addr; + sym.n_sect = n_sect; + + log.debug(" ATOM(%{d}, '{s}') @{x}", .{ + atom.sym_index, + self.getSymbolName(atom.getSymbolWithLoc()), + sym.n_value, + }); + + if (atom.getFile() != null) { + // Update each symbol contained within the atom + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |sym_loc| { + const inner_sym = self.getSymbolPtr(sym_loc); + inner_sym.n_value = sym.n_value + Atom.calcInnerSymbolOffset( + self, + atom_index, + sym_loc.sym_index, + ); + inner_sym.n_sect = n_sect; + } + + // If there is a section alias, update it now too + if (Atom.getSectionAlias(self, atom_index)) |sym_loc| { + const alias = self.getSymbolPtr(sym_loc); + alias.n_value = sym.n_value; + alias.n_sect = n_sect; + } + } + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + + start = start_aligned + header.size; + + if (!header.isZerofill()) { + segment.filesize = start; + } + segment.vmsize = start; + } + + segment.filesize = mem.alignForwardGeneric(u64, segment.filesize, self.page_size); + segment.vmsize = mem.alignForwardGeneric(u64, segment.vmsize, self.page_size); + } + + const InitSectionOpts = struct { + flags: u32 = macho.S_REGULAR, + reserved1: u32 = 0, + reserved2: u32 = 0, + }; + + fn initSection( + self: *Zld, + segname: []const u8, + sectname: []const u8, + opts: InitSectionOpts, + ) !u8 { + const gpa = self.gpa; + log.debug("creating section '{s},{s}'", .{ segname, sectname }); + const index = @intCast(u8, self.sections.slice().len); + try self.sections.append(gpa, .{ + .segment_index = undefined, + .header = .{ + .sectname = makeStaticString(sectname), + .segname = makeStaticString(segname), + .flags = opts.flags, + .reserved1 = opts.reserved1, + .reserved2 = opts.reserved2, + }, + .first_atom_index = undefined, + .last_atom_index = undefined, + }); + return index; + } + + inline fn getSegmentPrecedence(segname: []const u8) u4 { + if (mem.eql(u8, segname, "__PAGEZERO")) return 0x0; + if (mem.eql(u8, segname, "__TEXT")) return 0x1; + if (mem.eql(u8, segname, "__DATA_CONST")) return 0x2; + if (mem.eql(u8, segname, "__DATA")) return 0x3; + if (mem.eql(u8, segname, "__LINKEDIT")) return 0x5; + return 0x4; + } + + inline fn getSegmentMemoryProtection(segname: []const u8) macho.vm_prot_t { + if (mem.eql(u8, segname, "__PAGEZERO")) return macho.PROT.NONE; + if (mem.eql(u8, segname, "__TEXT")) return macho.PROT.READ | macho.PROT.EXEC; + if (mem.eql(u8, segname, "__LINKEDIT")) return macho.PROT.READ; + return macho.PROT.READ | macho.PROT.WRITE; + } + + inline fn getSectionPrecedence(header: macho.section_64) u8 { + const segment_precedence: u4 = getSegmentPrecedence(header.segName()); + const section_precedence: u4 = blk: { + if (header.isCode()) { + if (mem.eql(u8, "__text", header.sectName())) break :blk 0x0; + if (header.@"type"() == macho.S_SYMBOL_STUBS) break :blk 0x1; + break :blk 0x2; + } + switch (header.@"type"()) { + macho.S_NON_LAZY_SYMBOL_POINTERS, + macho.S_LAZY_SYMBOL_POINTERS, + => break :blk 0x0, + macho.S_MOD_INIT_FUNC_POINTERS => break :blk 0x1, + macho.S_MOD_TERM_FUNC_POINTERS => break :blk 0x2, + macho.S_ZEROFILL => break :blk 0xf, + macho.S_THREAD_LOCAL_REGULAR => break :blk 0xd, + macho.S_THREAD_LOCAL_ZEROFILL => break :blk 0xe, + else => if (mem.eql(u8, "__eh_frame", header.sectName())) + break :blk 0xf + else + break :blk 0x3, + } + }; + return (@intCast(u8, segment_precedence) << 4) + section_precedence; + } + + fn writeSegmentHeaders(self: *Zld, ncmds: *u32, writer: anytype) !void { + for (self.segments.items) |seg, i| { + const indexes = self.getSectionIndexes(@intCast(u8, i)); + var out_seg = seg; + out_seg.cmdsize = @sizeOf(macho.segment_command_64); + out_seg.nsects = 0; + + // Update section headers count; any section with size of 0 is excluded + // since it doesn't have any data in the final binary file. + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + out_seg.cmdsize += @sizeOf(macho.section_64); + out_seg.nsects += 1; + } + + if (out_seg.nsects == 0 and + (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or + mem.eql(u8, out_seg.segName(), "__DATA"))) continue; + + try writer.writeStruct(out_seg); + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.size == 0) continue; + try writer.writeStruct(header); + } + + ncmds.* += 1; + } + } + + fn writeLinkeditSegmentData(self: *Zld, ncmds: *u32, lc_writer: anytype, reverse_lookups: [][]u32) !void { + try self.writeDyldInfoData(ncmds, lc_writer, reverse_lookups); + try self.writeFunctionStarts(ncmds, lc_writer); + try self.writeDataInCode(ncmds, lc_writer); + try self.writeSymtabs(ncmds, lc_writer); + + const seg = self.getLinkeditSegmentPtr(); + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); + } + + fn collectRebaseDataFromContainer( + self: *Zld, + sect_id: u8, + pointers: *std.ArrayList(bind.Pointer), + container: anytype, + ) !void { + const slice = self.sections.slice(); + const segment_index = slice.items(.segment_index)[sect_id]; + const seg = self.getSegment(sect_id); + + try pointers.ensureUnusedCapacity(container.items.len); + + for (container.items) |entry| { + const target_sym = entry.getTargetSymbol(self); + if (target_sym.undf()) continue; + + const atom_sym = entry.getAtomSymbol(self); + const base_offset = atom_sym.n_value - seg.vmaddr; + + log.debug(" | rebase at {x}", .{base_offset}); + + pointers.appendAssumeCapacity(.{ + .offset = base_offset, + .segment_id = segment_index, + }); + } + } + + fn collectRebaseData(self: *Zld, pointers: *std.ArrayList(bind.Pointer)) !void { + log.debug("collecting rebase data", .{}); + + // First, unpack GOT entries + if (self.getSectionByName("__DATA_CONST", "__got")) |sect_id| { + try self.collectRebaseDataFromContainer(sect_id, pointers, self.got_entries); + } + + const slice = self.sections.slice(); + + // Next, unpact lazy pointers + // TODO: save la_ptr in a container so that we can re-use the helper + if (self.getSectionByName("__DATA", "__la_symbol_ptr")) |sect_id| { + const segment_index = slice.items(.segment_index)[sect_id]; + const seg = self.getSegment(sect_id); + var atom_index = slice.items(.first_atom_index)[sect_id]; + + try pointers.ensureUnusedCapacity(self.stubs.items.len); + + while (true) { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + const base_offset = sym.n_value - seg.vmaddr; + + log.debug(" | rebase at {x}", .{base_offset}); + + pointers.appendAssumeCapacity(.{ + .offset = base_offset, + .segment_id = segment_index, + }); + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + // Finally, unpack the rest. + for (slice.items(.header)) |header, sect_id| { + switch (header.@"type"()) { + macho.S_LITERAL_POINTERS, + macho.S_REGULAR, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => {}, + else => continue, + } + + const segment_index = slice.items(.segment_index)[sect_id]; + const segment = self.getSegment(@intCast(u8, sect_id)); + if (segment.maxprot & macho.PROT.WRITE == 0) continue; + + log.debug("{s},{s}", .{ header.segName(), header.sectName() }); + + const cpu_arch = self.options.target.cpu.arch; + var atom_index = slice.items(.first_atom_index)[sect_id]; + + while (true) { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + + const should_rebase = blk: { + if (self.dyld_private_sym_index) |sym_index| { + if (atom.getFile() == null and atom.sym_index == sym_index) break :blk false; + } + break :blk !sym.undf(); + }; + + if (should_rebase) { + log.debug(" ATOM({d}, %{d}, '{s}')", .{ atom_index, atom.sym_index, self.getSymbolName(atom.getSymbolWithLoc()) }); + + const object = self.objects.items[atom.getFile().?]; + const base_rel_offset: i32 = blk: { + const source_sym = object.getSourceSymbol(atom.sym_index) orelse break :blk 0; + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + break :blk @intCast(i32, source_sym.n_value - source_sect.addr); + }; + const relocs = Atom.getAtomRelocs(self, atom_index); + + for (relocs) |rel| { + switch (cpu_arch) { + .aarch64 => { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + if (rel_type != .ARM64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + .x86_64 => { + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + if (rel_type != .X86_64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + else => unreachable, + } + + const base_offset = @intCast(i32, sym.n_value - segment.vmaddr); + const rel_offset = rel.r_address - base_rel_offset; + const offset = @intCast(u64, base_offset + rel_offset); + log.debug(" | rebase at {x}", .{offset}); + + try pointers.append(.{ + .offset = offset, + .segment_id = segment_index, + }); + } + } + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + } + + fn collectBindDataFromContainer( + self: *Zld, + sect_id: u8, + pointers: *std.ArrayList(bind.Pointer), + container: anytype, + ) !void { + const slice = self.sections.slice(); + const segment_index = slice.items(.segment_index)[sect_id]; + const seg = self.getSegment(sect_id); + + try pointers.ensureUnusedCapacity(container.items.len); + + for (container.items) |entry| { + const bind_sym_name = entry.getTargetSymbolName(self); + const bind_sym = entry.getTargetSymbol(self); + if (bind_sym.sect()) continue; + + const sym = entry.getAtomSymbol(self); + const base_offset = sym.n_value - seg.vmaddr; + + const dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER); + var flags: u4 = 0; + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + pointers.appendAssumeCapacity(.{ + .offset = base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); + } + } + + fn collectBindData(self: *Zld, pointers: *std.ArrayList(bind.Pointer), reverse_lookups: [][]u32) !void { + log.debug("collecting bind data", .{}); + + // First, unpack GOT section + if (self.getSectionByName("__DATA_CONST", "__got")) |sect_id| { + try self.collectBindDataFromContainer(sect_id, pointers, self.got_entries); + } + + // Next, unpack TLV pointers section + if (self.getSectionByName("__DATA", "__thread_ptrs")) |sect_id| { + try self.collectBindDataFromContainer(sect_id, pointers, self.tlv_ptr_entries); + } + + // Finally, unpack the rest. + const slice = self.sections.slice(); + for (slice.items(.header)) |header, sect_id| { + switch (header.@"type"()) { + macho.S_LITERAL_POINTERS, + macho.S_REGULAR, + macho.S_MOD_INIT_FUNC_POINTERS, + macho.S_MOD_TERM_FUNC_POINTERS, + => {}, + else => continue, + } + + const segment_index = slice.items(.segment_index)[sect_id]; + const segment = self.getSegment(@intCast(u8, sect_id)); + if (segment.maxprot & macho.PROT.WRITE == 0) continue; + + const cpu_arch = self.options.target.cpu.arch; + var atom_index = slice.items(.first_atom_index)[sect_id]; + + log.debug("{s},{s}", .{ header.segName(), header.sectName() }); + + while (true) { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + + log.debug(" ATOM({d}, %{d}, '{s}')", .{ atom_index, atom.sym_index, self.getSymbolName(atom.getSymbolWithLoc()) }); + + const should_bind = blk: { + if (self.dyld_private_sym_index) |sym_index| { + if (atom.getFile() == null and atom.sym_index == sym_index) break :blk false; + } + break :blk true; + }; + + if (should_bind) { + const object = self.objects.items[atom.getFile().?]; + const base_rel_offset: i32 = blk: { + const source_sym = object.getSourceSymbol(atom.sym_index) orelse break :blk 0; + const source_sect = object.getSourceSection(source_sym.n_sect - 1); + break :blk @intCast(i32, source_sym.n_value - source_sect.addr); + }; + const relocs = Atom.getAtomRelocs(self, atom_index); + + for (relocs) |rel| { + switch (cpu_arch) { + .aarch64 => { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + if (rel_type != .ARM64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + .x86_64 => { + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + if (rel_type != .X86_64_RELOC_UNSIGNED) continue; + if (rel.r_length != 3) continue; + }, + else => unreachable, + } + + const global = Atom.parseRelocTarget(self, atom_index, rel, reverse_lookups[atom.getFile().?]); + const bind_sym_name = self.getSymbolName(global); + const bind_sym = self.getSymbol(global); + if (!bind_sym.undf()) continue; + + const base_offset = @intCast(i32, sym.n_value - segment.vmaddr); + const rel_offset = rel.r_address - base_rel_offset; + const offset = @intCast(u64, base_offset + rel_offset); + + const dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER); + var flags: u4 = 0; + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + try pointers.append(.{ + .offset = offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); + } + } + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + } + + fn collectLazyBindData(self: *Zld, pointers: *std.ArrayList(bind.Pointer)) !void { + const sect_id = self.getSectionByName("__DATA", "__la_symbol_ptr") orelse return; + + log.debug("collecting lazy bind data", .{}); + + const slice = self.sections.slice(); + const segment_index = slice.items(.segment_index)[sect_id]; + const seg = self.getSegment(sect_id); + var atom_index = slice.items(.first_atom_index)[sect_id]; + + // TODO: we actually don't need to store lazy pointer atoms as they are synthetically generated by the linker + try pointers.ensureUnusedCapacity(self.stubs.items.len); + + var count: u32 = 0; + while (true) : (count += 1) { + const atom = self.getAtom(atom_index); + + log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, self.getSymbolName(atom.getSymbolWithLoc()) }); + + const sym = self.getSymbol(atom.getSymbolWithLoc()); + const base_offset = sym.n_value - seg.vmaddr; + + const stub_entry = self.stubs.items[count]; + const bind_sym = stub_entry.getTargetSymbol(self); + const bind_sym_name = stub_entry.getTargetSymbolName(self); + const dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER); + var flags: u4 = 0; + log.debug(" | lazy bind at {x}, import('{s}') in dylib({d})", .{ + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + pointers.appendAssumeCapacity(.{ + .offset = base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + + fn collectExportData(self: *Zld, trie: *Trie) !void { + const gpa = self.gpa; + + // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. + log.debug("collecting export data", .{}); + + const segment_index = self.getSegmentByName("__TEXT").?; + const exec_segment = self.segments.items[segment_index]; + const base_address = exec_segment.vmaddr; + + if (self.options.output_mode == .Exe) { + for (&[_]SymbolWithLoc{ + self.getEntryPoint(), + self.globals.items[self.mh_execute_header_index.?], + }) |global| { + const sym = self.getSymbol(global); + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + } else { + assert(self.options.output_mode == .Lib); + for (self.globals.items) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; + if (sym.n_desc == N_DEAD) continue; + + const sym_name = self.getSymbolName(global); + log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); + try trie.put(gpa, .{ + .name = sym_name, + .vmaddr_offset = sym.n_value - base_address, + .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, + }); + } + } + + try trie.finalize(gpa); + } + + fn writeDyldInfoData(self: *Zld, ncmds: *u32, lc_writer: anytype, reverse_lookups: [][]u32) !void { + const gpa = self.gpa; + + var rebase_pointers = std.ArrayList(bind.Pointer).init(gpa); + defer rebase_pointers.deinit(); + try self.collectRebaseData(&rebase_pointers); + + var bind_pointers = std.ArrayList(bind.Pointer).init(gpa); + defer bind_pointers.deinit(); + try self.collectBindData(&bind_pointers, reverse_lookups); + + var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); + defer lazy_bind_pointers.deinit(); + try self.collectLazyBindData(&lazy_bind_pointers); + + var trie = Trie{}; + defer trie.deinit(gpa); + try self.collectExportData(&trie); + + const link_seg = self.getLinkeditSegmentPtr(); + const rebase_off = mem.alignForwardGeneric(u64, link_seg.fileoff, @alignOf(u64)); + assert(rebase_off == link_seg.fileoff); + const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); + log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size }); + + const bind_off = mem.alignForwardGeneric(u64, rebase_off + rebase_size, @alignOf(u64)); + const bind_size = try bind.bindInfoSize(bind_pointers.items); + log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size }); + + const lazy_bind_off = mem.alignForwardGeneric(u64, bind_off + bind_size, @alignOf(u64)); + const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); + log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ lazy_bind_off, lazy_bind_off + lazy_bind_size }); + + const export_off = mem.alignForwardGeneric(u64, lazy_bind_off + lazy_bind_size, @alignOf(u64)); + const export_size = trie.size; + log.debug("writing export trie from 0x{x} to 0x{x}", .{ export_off, export_off + export_size }); + + const needed_size = math.cast(usize, export_off + export_size - rebase_off) orelse return error.Overflow; + link_seg.filesize = needed_size; + + var buffer = try gpa.alloc(u8, needed_size); + defer gpa.free(buffer); + mem.set(u8, buffer, 0); + + var stream = std.io.fixedBufferStream(buffer); + const writer = stream.writer(); + + try bind.writeRebaseInfo(rebase_pointers.items, writer); + try stream.seekTo(bind_off - rebase_off); + + try bind.writeBindInfo(bind_pointers.items, writer); + try stream.seekTo(lazy_bind_off - rebase_off); + + try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); + try stream.seekTo(export_off - rebase_off); + + _ = try trie.write(writer); + + log.debug("writing dyld info from 0x{x} to 0x{x}", .{ + rebase_off, + rebase_off + needed_size, + }); + + try self.file.pwriteAll(buffer, rebase_off); + + const offset = math.cast(usize, lazy_bind_off - rebase_off) orelse return error.Overflow; + const size = math.cast(usize, lazy_bind_size) orelse return error.Overflow; + try self.populateLazyBindOffsetsInStubHelper(buffer[offset..][0..size]); + + try lc_writer.writeStruct(macho.dyld_info_command{ + .cmd = .DYLD_INFO_ONLY, + .cmdsize = @sizeOf(macho.dyld_info_command), + .rebase_off = @intCast(u32, rebase_off), + .rebase_size = @intCast(u32, rebase_size), + .bind_off = @intCast(u32, bind_off), + .bind_size = @intCast(u32, bind_size), + .weak_bind_off = 0, + .weak_bind_size = 0, + .lazy_bind_off = @intCast(u32, lazy_bind_off), + .lazy_bind_size = @intCast(u32, lazy_bind_size), + .export_off = @intCast(u32, export_off), + .export_size = @intCast(u32, export_size), + }); + ncmds.* += 1; + } + + fn populateLazyBindOffsetsInStubHelper(self: *Zld, buffer: []const u8) !void { + const gpa = self.gpa; + + const stub_helper_section_index = self.getSectionByName("__TEXT", "__stub_helper") orelse return; + const la_symbol_ptr_section_index = self.getSectionByName("__DATA", "__la_symbol_ptr") orelse return; + + if (self.stub_helper_preamble_sym_index == null) return; + + const section = self.sections.get(stub_helper_section_index); + const last_atom_index = section.last_atom_index; + + var table = std.AutoHashMap(i64, AtomIndex).init(gpa); + defer table.deinit(); + + { + var stub_atom_index = last_atom_index; + var laptr_atom_index = self.sections.items(.last_atom_index)[la_symbol_ptr_section_index]; + + const base_addr = blk: { + const segment_index = self.getSegmentByName("__DATA").?; + const seg = self.segments.items[segment_index]; + break :blk seg.vmaddr; + }; + + while (true) { + const stub_atom = self.getAtom(stub_atom_index); + const laptr_atom = self.getAtom(laptr_atom_index); + const laptr_off = blk: { + const sym = self.getSymbolPtr(laptr_atom.getSymbolWithLoc()); + break :blk @intCast(i64, sym.n_value - base_addr); + }; + + try table.putNoClobber(laptr_off, stub_atom_index); + + if (laptr_atom.prev_index) |prev_index| { + laptr_atom_index = prev_index; + stub_atom_index = stub_atom.prev_index.?; + } else break; + } + } + + var stream = std.io.fixedBufferStream(buffer); + var reader = stream.reader(); + var offsets = std.ArrayList(struct { sym_offset: i64, offset: u32 }).init(gpa); + try offsets.append(.{ .sym_offset = undefined, .offset = 0 }); + defer offsets.deinit(); + var valid_block = false; + + while (true) { + const inst = reader.readByte() catch |err| switch (err) { + error.EndOfStream => break, + }; + const opcode: u8 = inst & macho.BIND_OPCODE_MASK; + + switch (opcode) { + macho.BIND_OPCODE_DO_BIND => { + valid_block = true; + }, + macho.BIND_OPCODE_DONE => { + if (valid_block) { + const offset = try stream.getPos(); + try offsets.append(.{ .sym_offset = undefined, .offset = @intCast(u32, offset) }); + } + valid_block = false; + }, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { + var next = try reader.readByte(); + while (next != @as(u8, 0)) { + next = try reader.readByte(); + } + }, + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + var inserted = offsets.pop(); + inserted.sym_offset = try std.leb.readILEB128(i64, reader); + try offsets.append(inserted); + }, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { + _ = try std.leb.readULEB128(u64, reader); + }, + macho.BIND_OPCODE_SET_ADDEND_SLEB => { + _ = try std.leb.readILEB128(i64, reader); + }, + else => {}, + } + } + + const header = self.sections.items(.header)[stub_helper_section_index]; + const stub_offset: u4 = switch (self.options.target.cpu.arch) { + .x86_64 => 1, + .aarch64 => 2 * @sizeOf(u32), + else => unreachable, + }; + var buf: [@sizeOf(u32)]u8 = undefined; + _ = offsets.pop(); + + while (offsets.popOrNull()) |bind_offset| { + const atom_index = table.get(bind_offset.sym_offset).?; + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + + const file_offset = header.offset + sym.n_value - header.addr + stub_offset; + mem.writeIntLittle(u32, &buf, bind_offset.offset); + + log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ + bind_offset.offset, + self.getSymbolName(atom.getSymbolWithLoc()), + file_offset, + }); + + try self.file.pwriteAll(&buf, file_offset); + } + } + + const asc_u64 = std.sort.asc(u64); + + fn writeFunctionStarts(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { + const text_seg_index = self.getSegmentByName("__TEXT") orelse return; + const text_sect_index = self.getSectionByName("__TEXT", "__text") orelse return; + const text_seg = self.segments.items[text_seg_index]; + + const gpa = self.gpa; + + // We need to sort by address first + var addresses = std.ArrayList(u64).init(gpa); + defer addresses.deinit(); + try addresses.ensureTotalCapacityPrecise(self.globals.items.len); + + for (self.globals.items) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; + if (sym.n_desc == N_DEAD) continue; + + const sect_id = sym.n_sect - 1; + if (sect_id != text_sect_index) continue; + + addresses.appendAssumeCapacity(sym.n_value); + } + + std.sort.sort(u64, addresses.items, {}, asc_u64); + + var offsets = std.ArrayList(u32).init(gpa); + defer offsets.deinit(); + try offsets.ensureTotalCapacityPrecise(addresses.items.len); + + var last_off: u32 = 0; + for (addresses.items) |addr| { + const offset = @intCast(u32, addr - text_seg.vmaddr); + const diff = offset - last_off; + + if (diff == 0) continue; + + offsets.appendAssumeCapacity(diff); + last_off = offset; + } + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + + const max_size = @intCast(usize, offsets.items.len * @sizeOf(u64)); + try buffer.ensureTotalCapacity(max_size); + + for (offsets.items) |offset| { + try std.leb.writeULEB128(buffer.writer(), offset); + } + + const link_seg = self.getLinkeditSegmentPtr(); + const offset = mem.alignForwardGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64)); + const needed_size = buffer.items.len; + link_seg.filesize = offset + needed_size - link_seg.fileoff; + + log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + try self.file.pwriteAll(buffer.items, offset); + + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .FUNCTION_STARTS, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; + } + + fn filterDataInCode( + dices: []const macho.data_in_code_entry, + start_addr: u64, + end_addr: u64, + ) []const macho.data_in_code_entry { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), dice: macho.data_in_code_entry) bool { + return dice.offset >= self.addr; + } + }; + + const start = lsearch(macho.data_in_code_entry, dices, Predicate{ .addr = start_addr }); + const end = lsearch(macho.data_in_code_entry, dices[start..], Predicate{ .addr = end_addr }) + start; + + return dices[start..end]; + } + + fn writeDataInCode(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { + var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.gpa); + defer out_dice.deinit(); + + const text_sect_id = self.getSectionByName("__TEXT", "__text") orelse return; + const text_sect_header = self.sections.items(.header)[text_sect_id]; + + for (self.objects.items) |object| { + const dice = object.parseDataInCode() orelse continue; + try out_dice.ensureUnusedCapacity(dice.len); + + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + const sect_id = sym.n_sect - 1; + if (sect_id != text_sect_id) { + continue; + } + + const source_addr = if (object.getSourceSymbol(atom.sym_index)) |source_sym| + source_sym.n_value + else blk: { + const nbase = @intCast(u32, object.in_symtab.?.len); + const source_sect_id = @intCast(u16, atom.sym_index - nbase); + break :blk object.getSourceSection(source_sect_id).addr; + }; + const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); + const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse + return error.Overflow; + + for (filtered_dice) |single| { + const offset = math.cast(u32, single.offset - source_addr + base) orelse + return error.Overflow; + out_dice.appendAssumeCapacity(.{ + .offset = offset, + .length = single.length, + .kind = single.kind, + }); + } + } + } + + const seg = self.getLinkeditSegmentPtr(); + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); + seg.filesize = offset + needed_size - seg.fileoff; + + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + try self.file.pwriteAll(mem.sliceAsBytes(out_dice.items), offset); + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; + } + + fn writeSymtabs(self: *Zld, ncmds: *u32, lc_writer: anytype) !void { + var symtab_cmd = macho.symtab_command{ + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }; + var dysymtab_cmd = macho.dysymtab_command{ + .cmdsize = @sizeOf(macho.dysymtab_command), + .ilocalsym = 0, + .nlocalsym = 0, + .iextdefsym = 0, + .nextdefsym = 0, + .iundefsym = 0, + .nundefsym = 0, + .tocoff = 0, + .ntoc = 0, + .modtaboff = 0, + .nmodtab = 0, + .extrefsymoff = 0, + .nextrefsyms = 0, + .indirectsymoff = 0, + .nindirectsyms = 0, + .extreloff = 0, + .nextrel = 0, + .locreloff = 0, + .nlocrel = 0, + }; + var ctx = try self.writeSymtab(&symtab_cmd); + defer ctx.imports_table.deinit(); + try self.writeDysymtab(ctx, &dysymtab_cmd); + try self.writeStrtab(&symtab_cmd); + try lc_writer.writeStruct(symtab_cmd); + try lc_writer.writeStruct(dysymtab_cmd); + ncmds.* += 2; + } + + fn writeSymtab(self: *Zld, lc: *macho.symtab_command) !SymtabCtx { + const gpa = self.gpa; + + var locals = std.ArrayList(macho.nlist_64).init(gpa); + defer locals.deinit(); + + for (self.objects.items) |object| { + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const sym_loc = atom.getSymbolWithLoc(); + const sym = self.getSymbol(sym_loc); + if (sym.n_strx == 0) continue; // no name, skip + if (sym.ext()) continue; // an export lands in its own symtab section, skip + if (self.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip + + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(sym_loc)); + try locals.append(out_sym); + } + } + + if (!self.options.strip) { + for (self.objects.items) |object| { + try self.generateSymbolStabs(object, &locals); + } + } + + var exports = std.ArrayList(macho.nlist_64).init(gpa); + defer exports.deinit(); + + for (self.globals.items) |global| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; // import, skip + if (sym.n_desc == N_DEAD) continue; + + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); + try exports.append(out_sym); + } + + var imports = std.ArrayList(macho.nlist_64).init(gpa); + defer imports.deinit(); + + var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); + + for (self.globals.items) |global| { + const sym = self.getSymbol(global); + if (!sym.undf()) continue; // not an import, skip + if (sym.n_desc == N_DEAD) continue; + + const new_index = @intCast(u32, imports.items.len); + var out_sym = sym; + out_sym.n_strx = try self.strtab.insert(gpa, self.getSymbolName(global)); + try imports.append(out_sym); + try imports_table.putNoClobber(global, new_index); + } + + const nlocals = @intCast(u32, locals.items.len); + const nexports = @intCast(u32, exports.items.len); + const nimports = @intCast(u32, imports.items.len); + const nsyms = nlocals + nexports + nimports; + + const seg = self.getLinkeditSegmentPtr(); + const offset = mem.alignForwardGeneric( + u64, + seg.fileoff + seg.filesize, + @alignOf(macho.nlist_64), + ); + const needed_size = nsyms * @sizeOf(macho.nlist_64); + seg.filesize = offset + needed_size - seg.fileoff; + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacityPrecise(needed_size); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); + buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); + + log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + try self.file.pwriteAll(buffer.items, offset); + + lc.symoff = @intCast(u32, offset); + lc.nsyms = nsyms; + + return SymtabCtx{ + .nlocalsym = nlocals, + .nextdefsym = nexports, + .nundefsym = nimports, + .imports_table = imports_table, + }; + } + + fn writeStrtab(self: *Zld, lc: *macho.symtab_command) !void { + const seg = self.getLinkeditSegmentPtr(); + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = self.strtab.buffer.items.len; + seg.filesize = offset + needed_size - seg.fileoff; + + log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + try self.file.pwriteAll(self.strtab.buffer.items, offset); + + lc.stroff = @intCast(u32, offset); + lc.strsize = @intCast(u32, needed_size); + } + + const SymtabCtx = struct { + nlocalsym: u32, + nextdefsym: u32, + nundefsym: u32, + imports_table: std.AutoHashMap(SymbolWithLoc, u32), + }; + + fn writeDysymtab(self: *Zld, ctx: SymtabCtx, lc: *macho.dysymtab_command) !void { + const gpa = self.gpa; + const nstubs = @intCast(u32, self.stubs.items.len); + const ngot_entries = @intCast(u32, self.got_entries.items.len); + const nindirectsyms = nstubs * 2 + ngot_entries; + const iextdefsym = ctx.nlocalsym; + const iundefsym = iextdefsym + ctx.nextdefsym; + + const seg = self.getLinkeditSegmentPtr(); + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = nindirectsyms * @sizeOf(u32); + seg.filesize = offset + needed_size - seg.fileoff; + + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + var buf = std.ArrayList(u8).init(gpa); + defer buf.deinit(); + try buf.ensureTotalCapacity(needed_size); + const writer = buf.writer(); + + if (self.getSectionByName("__TEXT", "__stubs")) |sect_id| { + const stubs = &self.sections.items(.header)[sect_id]; + stubs.reserved1 = 0; + for (self.stubs.items) |entry| { + const target_sym = entry.getTargetSymbol(self); + assert(target_sym.undf()); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + } + } + + if (self.getSectionByName("__DATA_CONST", "__got")) |sect_id| { + const got = &self.sections.items(.header)[sect_id]; + got.reserved1 = nstubs; + for (self.got_entries.items) |entry| { + const target_sym = entry.getTargetSymbol(self); + if (target_sym.undf()) { + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + } else { + try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); + } + } + } + + if (self.getSectionByName("__DATA", "__la_symbol_ptr")) |sect_id| { + const la_symbol_ptr = &self.sections.items(.header)[sect_id]; + la_symbol_ptr.reserved1 = nstubs + ngot_entries; + for (self.stubs.items) |entry| { + const target_sym = entry.getTargetSymbol(self); + assert(target_sym.undf()); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); + } + } + + assert(buf.items.len == needed_size); + try self.file.pwriteAll(buf.items, offset); + + lc.nlocalsym = ctx.nlocalsym; + lc.iextdefsym = iextdefsym; + lc.nextdefsym = ctx.nextdefsym; + lc.iundefsym = iundefsym; + lc.nundefsym = ctx.nundefsym; + lc.indirectsymoff = @intCast(u32, offset); + lc.nindirectsyms = nindirectsyms; + } + + fn writeCodeSignaturePadding( + self: *Zld, + code_sig: *CodeSignature, + ncmds: *u32, + lc_writer: anytype, + ) !u32 { + const seg = self.getLinkeditSegmentPtr(); + // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file + // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, 16); + const needed_size = code_sig.estimateSize(offset); + seg.filesize = offset + needed_size - seg.fileoff; + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); + log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + // Pad out the space. We need to do this to calculate valid hashes for everything in the file + // except for code signature data. + try self.file.pwriteAll(&[_]u8{0}, offset + needed_size - 1); + + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; + + return @intCast(u32, offset); + } + + fn writeCodeSignature(self: *Zld, code_sig: *CodeSignature, offset: u32) !void { + const seg_id = self.getSegmentByName("__TEXT").?; + const seg = self.segments.items[seg_id]; + + var buffer = std.ArrayList(u8).init(self.gpa); + defer buffer.deinit(); + try buffer.ensureTotalCapacityPrecise(code_sig.size()); + try code_sig.writeAdhocSignature(self.gpa, .{ + .file = self.file, + .exec_seg_base = seg.fileoff, + .exec_seg_limit = seg.filesize, + .file_size = offset, + .output_mode = self.options.output_mode, + }, buffer.writer()); + assert(buffer.items.len == code_sig.size()); + + log.debug("writing code signature from 0x{x} to 0x{x}", .{ + offset, + offset + buffer.items.len, + }); + + try self.file.pwriteAll(buffer.items, offset); + } + + /// Writes Mach-O file header. + fn writeHeader(self: *Zld, ncmds: u32, sizeofcmds: u32) !void { + var header: macho.mach_header_64 = .{}; + header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; + + switch (self.options.target.cpu.arch) { + .aarch64 => { + header.cputype = macho.CPU_TYPE_ARM64; + header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; + }, + .x86_64 => { + header.cputype = macho.CPU_TYPE_X86_64; + header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; + }, + else => return error.UnsupportedCpuArchitecture, + } + + switch (self.options.output_mode) { + .Exe => { + header.filetype = macho.MH_EXECUTE; + }, + .Lib => { + // By this point, it can only be a dylib. + header.filetype = macho.MH_DYLIB; + header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; + }, + else => unreachable, + } + + if (self.getSectionByName("__DATA", "__thread_vars")) |sect_id| { + header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; + if (self.sections.items(.header)[sect_id].size > 0) { + header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; + } + } + + header.ncmds = ncmds; + header.sizeofcmds = sizeofcmds; + + log.debug("writing Mach-O header {}", .{header}); + + try self.file.pwriteAll(mem.asBytes(&header), 0); + } + + pub fn makeStaticString(bytes: []const u8) [16]u8 { + var buf = [_]u8{0} ** 16; + assert(bytes.len <= buf.len); + mem.copy(u8, &buf, bytes); + return buf; + } + + pub inline fn getAtomPtr(self: *Zld, atom_index: AtomIndex) *Atom { + assert(atom_index < self.atoms.items.len); + return &self.atoms.items[atom_index]; + } + + pub inline fn getAtom(self: Zld, atom_index: AtomIndex) Atom { + assert(atom_index < self.atoms.items.len); + return self.atoms.items[atom_index]; + } + + fn getSegmentByName(self: Zld, segname: []const u8) ?u8 { + for (self.segments.items) |seg, i| { + if (mem.eql(u8, segname, seg.segName())) return @intCast(u8, i); + } else return null; + } + + pub inline fn getSegment(self: Zld, sect_id: u8) macho.segment_command_64 { + const index = self.sections.items(.segment_index)[sect_id]; + return self.segments.items[index]; + } + + pub inline fn getSegmentPtr(self: *Zld, sect_id: u8) *macho.segment_command_64 { + const index = self.sections.items(.segment_index)[sect_id]; + return &self.segments.items[index]; + } + + pub inline fn getLinkeditSegmentPtr(self: *Zld) *macho.segment_command_64 { + assert(self.segments.items.len > 0); + const seg = &self.segments.items[self.segments.items.len - 1]; + assert(mem.eql(u8, seg.segName(), "__LINKEDIT")); + return seg; + } + + pub fn getSectionByName(self: Zld, segname: []const u8, sectname: []const u8) ?u8 { + // TODO investigate caching with a hashmap + for (self.sections.items(.header)) |header, i| { + if (mem.eql(u8, header.segName(), segname) and mem.eql(u8, header.sectName(), sectname)) + return @intCast(u8, i); + } else return null; + } + + pub fn getSectionIndexes(self: Zld, segment_index: u8) struct { start: u8, end: u8 } { + var start: u8 = 0; + const nsects = for (self.segments.items) |seg, i| { + if (i == segment_index) break @intCast(u8, seg.nsects); + start += @intCast(u8, seg.nsects); + } else 0; + return .{ .start = start, .end = start + nsects }; + } + + pub fn symbolIsTemp(self: *Zld, sym_with_loc: SymbolWithLoc) bool { + const sym = self.getSymbol(sym_with_loc); + if (!sym.sect()) return false; + if (sym.ext()) return false; + const sym_name = self.getSymbolName(sym_with_loc); + return mem.startsWith(u8, sym_name, "l") or mem.startsWith(u8, sym_name, "L"); + } + + /// Returns pointer-to-symbol described by `sym_with_loc` descriptor. + pub fn getSymbolPtr(self: *Zld, sym_with_loc: SymbolWithLoc) *macho.nlist_64 { + if (sym_with_loc.getFile()) |file| { + const object = &self.objects.items[file]; + return &object.symtab[sym_with_loc.sym_index]; + } else { + return &self.locals.items[sym_with_loc.sym_index]; + } + } + + /// Returns symbol described by `sym_with_loc` descriptor. + pub fn getSymbol(self: *Zld, sym_with_loc: SymbolWithLoc) macho.nlist_64 { + return self.getSymbolPtr(sym_with_loc).*; + } + + /// Returns name of the symbol described by `sym_with_loc` descriptor. + pub fn getSymbolName(self: *Zld, sym_with_loc: SymbolWithLoc) []const u8 { + if (sym_with_loc.getFile()) |file| { + const object = self.objects.items[file]; + return object.getSymbolName(sym_with_loc.sym_index); + } else { + const sym = self.locals.items[sym_with_loc.sym_index]; + return self.strtab.get(sym.n_strx).?; + } + } + + /// Returns GOT atom that references `sym_with_loc` if one exists. + /// Returns null otherwise. + pub fn getGotAtomIndexForSymbol(self: *Zld, sym_with_loc: SymbolWithLoc) ?AtomIndex { + const index = self.got_table.get(sym_with_loc) orelse return null; + const entry = self.got_entries.items[index]; + return entry.atom_index; + } + + /// Returns stubs atom that references `sym_with_loc` if one exists. + /// Returns null otherwise. + pub fn getStubsAtomIndexForSymbol(self: *Zld, sym_with_loc: SymbolWithLoc) ?AtomIndex { + const index = self.stubs_table.get(sym_with_loc) orelse return null; + const entry = self.stubs.items[index]; + return entry.atom_index; + } + + /// Returns TLV pointer atom that references `sym_with_loc` if one exists. + /// Returns null otherwise. + pub fn getTlvPtrAtomIndexForSymbol(self: *Zld, sym_with_loc: SymbolWithLoc) ?AtomIndex { + const index = self.tlv_ptr_table.get(sym_with_loc) orelse return null; + const entry = self.tlv_ptr_entries.items[index]; + return entry.atom_index; + } + + /// Returns symbol location corresponding to the set entrypoint. + /// Asserts output mode is executable. + pub fn getEntryPoint(self: Zld) SymbolWithLoc { + assert(self.options.output_mode == .Exe); + const global_index = self.entry_index.?; + return self.globals.items[global_index]; + } + + inline fn requiresThunks(self: Zld) bool { + return self.options.target.cpu.arch == .aarch64; + } + + pub fn generateSymbolStabs(self: *Zld, object: Object, locals: *std.ArrayList(macho.nlist_64)) !void { + log.debug("generating stabs for '{s}'", .{object.name}); + + const gpa = self.gpa; + var debug_info = object.parseDwarfInfo(); + + var lookup = DwarfInfo.AbbrevLookupTable.init(gpa); + defer lookup.deinit(); + try lookup.ensureUnusedCapacity(std.math.maxInt(u8)); + + // We assume there is only one CU. + var cu_it = debug_info.getCompileUnitIterator(); + const compile_unit = while (try cu_it.next()) |cu| { + const offset = math.cast(usize, cu.cuh.debug_abbrev_offset) orelse return error.Overflow; + try debug_info.genAbbrevLookupByKind(offset, &lookup); + break cu; + } else { + log.debug("no compile unit found in debug info in {s}; skipping", .{object.name}); + return; + }; + + var abbrev_it = compile_unit.getAbbrevEntryIterator(debug_info); + const cu_entry: DwarfInfo.AbbrevEntry = while (try abbrev_it.next(lookup)) |entry| switch (entry.tag) { + dwarf.TAG.compile_unit => break entry, + else => continue, + } else { + log.debug("missing DWARF_TAG_compile_unit tag in {s}; skipping", .{object.name}); + return; + }; + + var maybe_tu_name: ?[]const u8 = null; + var maybe_tu_comp_dir: ?[]const u8 = null; + var attr_it = cu_entry.getAttributeIterator(debug_info, compile_unit.cuh); + + while (try attr_it.next()) |attr| switch (attr.name) { + dwarf.AT.comp_dir => maybe_tu_comp_dir = attr.getString(debug_info, compile_unit.cuh) orelse continue, + dwarf.AT.name => maybe_tu_name = attr.getString(debug_info, compile_unit.cuh) orelse continue, + else => continue, + }; + + if (maybe_tu_name == null or maybe_tu_comp_dir == null) { + log.debug("missing DWARF_AT_comp_dir and DWARF_AT_name attributes {s}; skipping", .{object.name}); + return; + } + + const tu_name = maybe_tu_name.?; + const tu_comp_dir = maybe_tu_comp_dir.?; + + // Open scope + try locals.ensureUnusedCapacity(3); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_comp_dir), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, tu_name), + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + locals.appendAssumeCapacity(.{ + .n_strx = try self.strtab.insert(gpa, object.name), + .n_type = macho.N_OSO, + .n_sect = 0, + .n_desc = 1, + .n_value = object.mtime, + }); + + var stabs_buf: [4]macho.nlist_64 = undefined; + + var name_lookup: ?DwarfInfo.SubprogramLookupByName = if (object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS == 0) blk: { + var name_lookup = DwarfInfo.SubprogramLookupByName.init(gpa); + errdefer name_lookup.deinit(); + try name_lookup.ensureUnusedCapacity(@intCast(u32, object.atoms.items.len)); + try debug_info.genSubprogramLookupByName(compile_unit, lookup, &name_lookup); + break :blk name_lookup; + } else null; + defer if (name_lookup) |*nl| nl.deinit(); + + for (object.atoms.items) |atom_index| { + const atom = self.getAtom(atom_index); + const stabs = try self.generateSymbolStabsForSymbol( + atom_index, + atom.getSymbolWithLoc(), + name_lookup, + &stabs_buf, + ); + try locals.appendSlice(stabs); + + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |sym_loc| { + const contained_stabs = try self.generateSymbolStabsForSymbol( + atom_index, + sym_loc, + name_lookup, + &stabs_buf, + ); + try locals.appendSlice(contained_stabs); + } + } + + // Close scope + try locals.append(.{ + .n_strx = 0, + .n_type = macho.N_SO, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + } + + fn generateSymbolStabsForSymbol( + self: *Zld, + atom_index: AtomIndex, + sym_loc: SymbolWithLoc, + lookup: ?DwarfInfo.SubprogramLookupByName, + buf: *[4]macho.nlist_64, + ) ![]const macho.nlist_64 { + const gpa = self.gpa; + const object = self.objects.items[sym_loc.getFile().?]; + const sym = self.getSymbol(sym_loc); + const sym_name = self.getSymbolName(sym_loc); + const header = self.sections.items(.header)[sym.n_sect - 1]; + + if (sym.n_strx == 0) return buf[0..0]; + if (self.symbolIsTemp(sym_loc)) return buf[0..0]; + + if (!header.isCode()) { + // Since we are not dealing with machine code, it's either a global or a static depending + // on the linkage scope. + if (sym.sect() and sym.ext()) { + // Global gets an N_GSYM stab type. + buf[0] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_GSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = 0, + }; + } else { + // Local static gets an N_STSYM stab type. + buf[0] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_STSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + } + return buf[0..1]; + } + + const size: u64 = size: { + if (object.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0) { + break :size self.getAtom(atom_index).size; + } + + // Since we don't have subsections to work with, we need to infer the size of each function + // the slow way by scanning the debug info for matching symbol names and extracting + // the symbol's DWARF_AT_low_pc and DWARF_AT_high_pc values. + const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; + const subprogram = lookup.?.get(sym_name[1..]) orelse return buf[0..0]; + + if (subprogram.addr <= source_sym.n_value and source_sym.n_value < subprogram.addr + subprogram.size) { + break :size subprogram.size; + } else { + log.debug("no stab found for {s}", .{sym_name}); + return buf[0..0]; + } + }; + + buf[0] = .{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[1] = .{ + .n_strx = try self.strtab.insert(gpa, sym_name), + .n_type = macho.N_FUN, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }; + buf[2] = .{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = size, + }; + buf[3] = .{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = size, + }; + + return buf; + } + + fn logSegments(self: *Zld) void { + log.debug("segments:", .{}); + for (self.segments.items) |segment, i| { + log.debug(" segment({d}): {s} @{x} ({x}), sizeof({x})", .{ + i, + segment.segName(), + segment.fileoff, + segment.vmaddr, + segment.vmsize, + }); + } + } + + fn logSections(self: *Zld) void { + log.debug("sections:", .{}); + for (self.sections.items(.header)) |header, i| { + log.debug(" sect({d}): {s},{s} @{x} ({x}), sizeof({x})", .{ + i + 1, + header.segName(), + header.sectName(), + header.offset, + header.addr, + header.size, + }); + } + } + + fn logSymAttributes(sym: macho.nlist_64, buf: []u8) []const u8 { + if (sym.sect()) { + buf[0] = 's'; + } + if (sym.ext()) { + if (sym.weakDef() or sym.pext()) { + buf[1] = 'w'; + } else { + buf[1] = 'e'; + } + } + if (sym.tentative()) { + buf[2] = 't'; + } + if (sym.undf()) { + buf[3] = 'u'; + } + return buf[0..]; + } + + fn logSymtab(self: *Zld) void { + var buf: [4]u8 = undefined; + + const scoped_log = std.log.scoped(.symtab); + + scoped_log.debug("locals:", .{}); + for (self.objects.items) |object, id| { + scoped_log.debug(" object({d}): {s}", .{ id, object.name }); + if (object.in_symtab == null) continue; + for (object.symtab) |sym, sym_id| { + mem.set(u8, &buf, '_'); + scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s}", .{ + sym_id, + object.getSymbolName(@intCast(u32, sym_id)), + sym.n_value, + sym.n_sect, + logSymAttributes(sym, &buf), + }); + } + } + scoped_log.debug(" object(-1)", .{}); + for (self.locals.items) |sym, sym_id| { + if (sym.undf()) continue; + scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s}", .{ + sym_id, + self.strtab.get(sym.n_strx).?, + sym.n_value, + sym.n_sect, + logSymAttributes(sym, &buf), + }); + } + + scoped_log.debug("exports:", .{}); + for (self.globals.items) |global, i| { + const sym = self.getSymbol(global); + if (sym.undf()) continue; + if (sym.n_desc == N_DEAD) continue; + scoped_log.debug(" %{d}: {s} @{x} in sect({d}), {s} (def in object({?}))", .{ + i, + self.getSymbolName(global), + sym.n_value, + sym.n_sect, + logSymAttributes(sym, &buf), + global.file, + }); + } + + scoped_log.debug("imports:", .{}); + for (self.globals.items) |global, i| { + const sym = self.getSymbol(global); + if (!sym.undf()) continue; + if (sym.n_desc == N_DEAD) continue; + const ord = @divTrunc(sym.n_desc, macho.N_SYMBOL_RESOLVER); + scoped_log.debug(" %{d}: {s} @{x} in ord({d}), {s}", .{ + i, + self.getSymbolName(global), + sym.n_value, + ord, + logSymAttributes(sym, &buf), + }); + } + + scoped_log.debug("GOT entries:", .{}); + for (self.got_entries.items) |entry, i| { + const atom_sym = entry.getAtomSymbol(self); + const target_sym = entry.getTargetSymbol(self); + const target_sym_name = entry.getTargetSymbolName(self); + if (target_sym.undf()) { + scoped_log.debug(" {d}@{x} => import('{s}')", .{ + i, + atom_sym.n_value, + target_sym_name, + }); + } else { + scoped_log.debug(" {d}@{x} => local(%{d}) in object({?}) {s}", .{ + i, + atom_sym.n_value, + entry.target.sym_index, + entry.target.file, + logSymAttributes(target_sym, buf[0..4]), + }); + } + } + + scoped_log.debug("__thread_ptrs entries:", .{}); + for (self.tlv_ptr_entries.items) |entry, i| { + const atom_sym = entry.getAtomSymbol(self); + const target_sym = entry.getTargetSymbol(self); + const target_sym_name = entry.getTargetSymbolName(self); + assert(target_sym.undf()); + scoped_log.debug(" {d}@{x} => import('{s}')", .{ + i, + atom_sym.n_value, + target_sym_name, + }); + } + + scoped_log.debug("stubs entries:", .{}); + for (self.stubs.items) |entry, i| { + const atom_sym = entry.getAtomSymbol(self); + const target_sym = entry.getTargetSymbol(self); + const target_sym_name = entry.getTargetSymbolName(self); + assert(target_sym.undf()); + scoped_log.debug(" {d}@{x} => import('{s}')", .{ + i, + atom_sym.n_value, + target_sym_name, + }); + } + + scoped_log.debug("thunks:", .{}); + for (self.thunks.items) |thunk, i| { + scoped_log.debug(" thunk({d})", .{i}); + for (thunk.lookup.keys()) |target, j| { + const target_sym = self.getSymbol(target); + const atom = self.getAtom(thunk.lookup.get(target).?); + const atom_sym = self.getSymbol(atom.getSymbolWithLoc()); + scoped_log.debug(" {d}@{x} => thunk('{s}'@{x})", .{ + j, + atom_sym.n_value, + self.getSymbolName(target), + target_sym.n_value, + }); + } + } + } + + fn logAtoms(self: *Zld) void { + log.debug("atoms:", .{}); + const slice = self.sections.slice(); + for (slice.items(.first_atom_index)) |first_atom_index, sect_id| { + var atom_index = first_atom_index; + const header = slice.items(.header)[sect_id]; + + log.debug("{s},{s}", .{ header.segName(), header.sectName() }); + + while (true) { + const atom = self.getAtom(atom_index); + self.logAtom(atom_index, log); + + if (atom.next_index) |next_index| { + atom_index = next_index; + } else break; + } + } + } + + pub fn logAtom(self: *Zld, atom_index: AtomIndex, logger: anytype) void { + if (!build_options.enable_logging) return; + + const atom = self.getAtom(atom_index); + const sym = self.getSymbol(atom.getSymbolWithLoc()); + const sym_name = self.getSymbolName(atom.getSymbolWithLoc()); + logger.debug(" ATOM({d}, %{d}, '{s}') @ {x} (sizeof({x}), alignof({x})) in object({?}) in sect({d})", .{ + atom_index, + atom.sym_index, + sym_name, + sym.n_value, + atom.size, + atom.alignment, + atom.file, + sym.n_sect, + }); + + if (atom.getFile() != null) { + var it = Atom.getInnerSymbolsIterator(self, atom_index); + while (it.next()) |sym_loc| { + const inner = self.getSymbol(sym_loc); + const inner_name = self.getSymbolName(sym_loc); + const offset = Atom.calcInnerSymbolOffset(self, atom_index, sym_loc.sym_index); + + logger.debug(" (%{d}, '{s}') @ {x} ({x})", .{ + sym_loc.sym_index, + inner_name, + inner.n_value, + offset, + }); + } + + if (Atom.getSectionAlias(self, atom_index)) |sym_loc| { + const alias = self.getSymbol(sym_loc); + const alias_name = self.getSymbolName(sym_loc); + + logger.debug(" (%{d}, '{s}') @ {x} ({x})", .{ + sym_loc.sym_index, + alias_name, + alias.n_value, + 0, + }); + } + } + } +}; + +pub const N_DEAD: u16 = @bitCast(u16, @as(i16, -1)); + +const Section = struct { + header: macho.section_64, + segment_index: u8, + first_atom_index: AtomIndex, + last_atom_index: AtomIndex, +}; + +pub const AtomIndex = u32; + +const IndirectPointer = struct { + target: SymbolWithLoc, + atom_index: AtomIndex, + + pub fn getTargetSymbol(self: @This(), zld: *Zld) macho.nlist_64 { + return zld.getSymbol(self.target); + } + + pub fn getTargetSymbolName(self: @This(), zld: *Zld) []const u8 { + return zld.getSymbolName(self.target); + } + + pub fn getAtomSymbol(self: @This(), zld: *Zld) macho.nlist_64 { + const atom = zld.getAtom(self.atom_index); + return zld.getSymbol(atom.getSymbolWithLoc()); + } +}; + +pub const SymbolWithLoc = struct { + // Index into the respective symbol table. + sym_index: u32, + + // -1 means it's a synthetic global. + file: i32 = -1, + + pub inline fn getFile(self: SymbolWithLoc) ?u31 { + if (self.file == -1) return null; + return @intCast(u31, self.file); + } + + pub inline fn eql(self: SymbolWithLoc, other: SymbolWithLoc) bool { + return self.file == other.file and self.sym_index == other.sym_index; + } +}; + +const SymbolResolver = struct { + arena: Allocator, + table: std.StringHashMap(u32), + unresolved: std.AutoArrayHashMap(u32, void), +}; pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = macho_file.base.allocator; + const options = macho_file.base.options; + const target = options.target; + var arena_allocator = std.heap.ArenaAllocator.init(gpa); defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); - const directory = macho_file.base.options.emit.?.directory; // Just an alias to make it shorter to type. - const full_out_path = try directory.join(arena, &[_][]const u8{macho_file.base.options.emit.?.sub_path}); + const directory = options.emit.?.directory; // Just an alias to make it shorter to type. + const full_out_path = try directory.join(arena, &[_][]const u8{options.emit.?.sub_path}); // If there is no Zig code to compile, then we should skip flushing the output file because it // will not be part of the linker line anyway. - const module_obj_path: ?[]const u8 = if (macho_file.base.options.module) |module| blk: { - if (macho_file.base.options.use_stage1) { + const module_obj_path: ?[]const u8 = if (options.module) |module| blk: { + if (options.use_stage1) { const obj_basename = try std.zig.binNameAlloc(arena, .{ - .root_name = macho_file.base.options.root_name, - .target = macho_file.base.options.target, + .root_name = options.root_name, + .target = target, .output_mode = .Obj, }); - switch (macho_file.base.options.cache_mode) { + switch (options.cache_mode) { .incremental => break :blk try module.zig_cache_artifact_directory.join( arena, &[_][]const u8{obj_basename}, @@ -71,24 +3779,24 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr sub_prog_node.context.refresh(); defer sub_prog_node.end(); - const cpu_arch = macho_file.base.options.target.cpu.arch; - const os_tag = macho_file.base.options.target.os.tag; - const abi = macho_file.base.options.target.abi; - const is_lib = macho_file.base.options.output_mode == .Lib; - const is_dyn_lib = macho_file.base.options.link_mode == .Dynamic and is_lib; - const is_exe_or_dyn_lib = is_dyn_lib or macho_file.base.options.output_mode == .Exe; - const stack_size = macho_file.base.options.stack_size_override orelse 0; - const is_debug_build = macho_file.base.options.optimize_mode == .Debug; - const gc_sections = macho_file.base.options.gc_sections orelse !is_debug_build; + const cpu_arch = target.cpu.arch; + const os_tag = target.os.tag; + const abi = target.abi; + const is_lib = options.output_mode == .Lib; + const is_dyn_lib = options.link_mode == .Dynamic and is_lib; + const is_exe_or_dyn_lib = is_dyn_lib or options.output_mode == .Exe; + const stack_size = options.stack_size_override orelse 0; + const is_debug_build = options.optimize_mode == .Debug; + const gc_sections = options.gc_sections orelse !is_debug_build; const id_symlink_basename = "zld.id"; var man: Cache.Manifest = undefined; - defer if (!macho_file.base.options.disable_lld_caching) man.deinit(); + defer if (!options.disable_lld_caching) man.deinit(); var digest: [Cache.hex_digest_len]u8 = undefined; - if (!macho_file.base.options.disable_lld_caching) { + if (!options.disable_lld_caching) { man = comp.cache_parent.obtain(); // We are about to obtain this lock, so here we give other processes a chance first. @@ -96,7 +3804,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr comptime assert(Compilation.link_hash_implementation_version == 7); - for (macho_file.base.options.objects) |obj| { + for (options.objects) |obj| { _ = try man.addFile(obj.path, null); man.hash.add(obj.must_link); } @@ -107,24 +3815,24 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr // We can skip hashing libc and libc++ components that we are in charge of building from Zig // installation sources because they are always a product of the compiler version + target information. man.hash.add(stack_size); - man.hash.addOptional(macho_file.base.options.pagezero_size); - man.hash.addOptional(macho_file.base.options.search_strategy); - man.hash.addOptional(macho_file.base.options.headerpad_size); - man.hash.add(macho_file.base.options.headerpad_max_install_names); + man.hash.addOptional(options.pagezero_size); + man.hash.addOptional(options.search_strategy); + man.hash.addOptional(options.headerpad_size); + man.hash.add(options.headerpad_max_install_names); man.hash.add(gc_sections); - man.hash.add(macho_file.base.options.dead_strip_dylibs); - man.hash.add(macho_file.base.options.strip); - man.hash.addListOfBytes(macho_file.base.options.lib_dirs); - man.hash.addListOfBytes(macho_file.base.options.framework_dirs); - link.hashAddSystemLibs(&man.hash, macho_file.base.options.frameworks); - man.hash.addListOfBytes(macho_file.base.options.rpath_list); + man.hash.add(options.dead_strip_dylibs); + man.hash.add(options.strip); + man.hash.addListOfBytes(options.lib_dirs); + man.hash.addListOfBytes(options.framework_dirs); + link.hashAddSystemLibs(&man.hash, options.frameworks); + man.hash.addListOfBytes(options.rpath_list); if (is_dyn_lib) { - man.hash.addOptionalBytes(macho_file.base.options.install_name); - man.hash.addOptional(macho_file.base.options.version); + man.hash.addOptionalBytes(options.install_name); + man.hash.addOptional(options.version); } - link.hashAddSystemLibs(&man.hash, macho_file.base.options.system_libs); - man.hash.addOptionalBytes(macho_file.base.options.sysroot); - try man.addOptionalFile(macho_file.base.options.entitlements); + link.hashAddSystemLibs(&man.hash, options.system_libs); + man.hash.addOptionalBytes(options.sysroot); + try man.addOptionalFile(options.entitlements); // We don't actually care whether it's a cache hit or miss; we just need the digest and the lock. _ = try man.hit(); @@ -163,13 +3871,13 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr }; } - if (macho_file.base.options.output_mode == .Obj) { + if (options.output_mode == .Obj) { // LLD's MachO driver does not support the equivalent of `-r` so we do a simple file copy // here. TODO: think carefully about how we can avoid this redundant operation when doing // build-obj. See also the corresponding TODO in linkAsArchive. const the_object_path = blk: { - if (macho_file.base.options.objects.len != 0) { - break :blk macho_file.base.options.objects[0].path; + if (options.objects.len != 0) { + break :blk options.objects[0].path; } if (comp.c_object_table.count() != 0) @@ -188,36 +3896,37 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try fs.cwd().copyFile(the_object_path, fs.cwd(), full_out_path, .{}); } } else { - const sub_path = macho_file.base.options.emit.?.sub_path; + const page_size = macho_file.page_size; + const sub_path = options.emit.?.sub_path; if (macho_file.base.file == null) { macho_file.base.file = try directory.handle.createFile(sub_path, .{ .truncate = true, .read = true, - .mode = link.determineMode(macho_file.base.options), + .mode = link.determineMode(options), }); } - // Index 0 is always a null symbol. - try macho_file.locals.append(gpa, .{ - .n_strx = 0, - .n_type = 0, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - try macho_file.strtab.buffer.append(gpa, 0); - try initSections(macho_file); + var zld = Zld{ + .gpa = gpa, + .file = macho_file.base.file.?, + .page_size = macho_file.page_size, + .options = options, + }; + defer zld.deinit(); + + try zld.atoms.append(gpa, Atom.empty); // AtomIndex at 0 is reserved as null atom + try zld.strtab.buffer.append(gpa, 0); var lib_not_found = false; var framework_not_found = false; // Positional arguments to the linker such as object files and static archives. var positionals = std.ArrayList([]const u8).init(arena); - try positionals.ensureUnusedCapacity(macho_file.base.options.objects.len); + try positionals.ensureUnusedCapacity(options.objects.len); var must_link_archives = std.StringArrayHashMap(void).init(arena); - try must_link_archives.ensureUnusedCapacity(macho_file.base.options.objects.len); + try must_link_archives.ensureUnusedCapacity(options.objects.len); - for (macho_file.base.options.objects) |obj| { + for (options.objects) |obj| { if (must_link_archives.contains(obj.path)) continue; if (obj.must_link) { _ = must_link_archives.getOrPutAssumeCapacity(obj.path); @@ -239,7 +3948,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } // libc++ dep - if (macho_file.base.options.link_libcpp) { + if (options.link_libcpp) { try positionals.append(comp.libcxxabi_static_lib.?.full_object_path); try positionals.append(comp.libcxx_static_lib.?.full_object_path); } @@ -247,7 +3956,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr // Shared and static libraries passed via `-l` flag. var candidate_libs = std.StringArrayHashMap(link.SystemLib).init(arena); - const system_lib_names = macho_file.base.options.system_libs.keys(); + const system_lib_names = options.system_libs.keys(); for (system_lib_names) |system_lib_name| { // By this time, we depend on these libs being dynamically linked libraries and not static libraries // (the check for that needs to be earlier), but they could be full paths to .dylib files, in which @@ -257,7 +3966,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr continue; } - const system_lib_info = macho_file.base.options.system_libs.get(system_lib_name).?; + const system_lib_info = options.system_libs.get(system_lib_name).?; try candidate_libs.put(system_lib_name, .{ .needed = system_lib_info.needed, .weak = system_lib_info.weak, @@ -265,8 +3974,8 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } var lib_dirs = std.ArrayList([]const u8).init(arena); - for (macho_file.base.options.lib_dirs) |dir| { - if (try MachO.resolveSearchDir(arena, dir, macho_file.base.options.sysroot)) |search_dir| { + for (options.lib_dirs) |dir| { + if (try MachO.resolveSearchDir(arena, dir, options.sysroot)) |search_dir| { try lib_dirs.append(search_dir); } else { log.warn("directory not found for '-L{s}'", .{dir}); @@ -276,7 +3985,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr var libs = std.StringArrayHashMap(link.SystemLib).init(arena); // Assume ld64 default -search_paths_first if no strategy specified. - const search_strategy = macho_file.base.options.search_strategy orelse .paths_first; + const search_strategy = options.search_strategy orelse .paths_first; outer: for (candidate_libs.keys()) |lib_name| { switch (search_strategy) { .paths_first => { @@ -321,23 +4030,23 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } } - try macho_file.resolveLibSystem(arena, comp, lib_dirs.items, &libs); + try MachO.resolveLibSystem(arena, comp, options.sysroot, target, lib_dirs.items, &libs); // frameworks var framework_dirs = std.ArrayList([]const u8).init(arena); - for (macho_file.base.options.framework_dirs) |dir| { - if (try MachO.resolveSearchDir(arena, dir, macho_file.base.options.sysroot)) |search_dir| { + for (options.framework_dirs) |dir| { + if (try MachO.resolveSearchDir(arena, dir, options.sysroot)) |search_dir| { try framework_dirs.append(search_dir); } else { log.warn("directory not found for '-F{s}'", .{dir}); } } - outer: for (macho_file.base.options.frameworks.keys()) |f_name| { + outer: for (options.frameworks.keys()) |f_name| { for (framework_dirs.items) |dir| { for (&[_][]const u8{ ".tbd", ".dylib", "" }) |ext| { if (try MachO.resolveFramework(arena, dir, f_name, ext)) |full_path| { - const info = macho_file.base.options.frameworks.get(f_name).?; + const info = options.frameworks.get(f_name).?; try libs.put(full_path, .{ .needed = info.needed, .weak = info.weak, @@ -358,7 +4067,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } } - if (macho_file.base.options.verbose_link) { + if (options.verbose_link) { var argv = std.ArrayList([]const u8).init(arena); try argv.append("zig"); @@ -371,38 +4080,38 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr if (is_dyn_lib) { try argv.append("-dylib"); - if (macho_file.base.options.install_name) |install_name| { + if (options.install_name) |install_name| { try argv.append("-install_name"); try argv.append(install_name); } } - if (macho_file.base.options.sysroot) |syslibroot| { + if (options.sysroot) |syslibroot| { try argv.append("-syslibroot"); try argv.append(syslibroot); } - for (macho_file.base.options.rpath_list) |rpath| { + for (options.rpath_list) |rpath| { try argv.append("-rpath"); try argv.append(rpath); } - if (macho_file.base.options.pagezero_size) |pagezero_size| { + if (options.pagezero_size) |pagezero_size| { try argv.append("-pagezero_size"); try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{pagezero_size})); } - if (macho_file.base.options.search_strategy) |strat| switch (strat) { + if (options.search_strategy) |strat| switch (strat) { .paths_first => try argv.append("-search_paths_first"), .dylibs_first => try argv.append("-search_dylibs_first"), }; - if (macho_file.base.options.headerpad_size) |headerpad_size| { + if (options.headerpad_size) |headerpad_size| { try argv.append("-headerpad_size"); try argv.append(try std.fmt.allocPrint(arena, "0x{x}", .{headerpad_size})); } - if (macho_file.base.options.headerpad_max_install_names) { + if (options.headerpad_max_install_names) { try argv.append("-headerpad_max_install_names"); } @@ -410,16 +4119,16 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try argv.append("-dead_strip"); } - if (macho_file.base.options.dead_strip_dylibs) { + if (options.dead_strip_dylibs) { try argv.append("-dead_strip_dylibs"); } - if (macho_file.base.options.entry) |entry| { + if (options.entry) |entry| { try argv.append("-e"); try argv.append(entry); } - for (macho_file.base.options.objects) |obj| { + for (options.objects) |obj| { try argv.append(obj.path); } @@ -435,7 +4144,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try argv.append(lib.full_object_path); } - if (macho_file.base.options.link_libcpp) { + if (options.link_libcpp) { try argv.append(comp.libcxxabi_static_lib.?.full_object_path); try argv.append(comp.libcxx_static_lib.?.full_object_path); } @@ -446,8 +4155,8 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try argv.append("-lSystem"); try argv.append("-lc"); - for (macho_file.base.options.system_libs.keys()) |l_name| { - const info = macho_file.base.options.system_libs.get(l_name).?; + for (options.system_libs.keys()) |l_name| { + const info = options.system_libs.get(l_name).?; const arg = if (info.needed) try std.fmt.allocPrint(arena, "-needed-l{s}", .{l_name}) else if (info.weak) @@ -457,12 +4166,12 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try argv.append(arg); } - for (macho_file.base.options.lib_dirs) |lib_dir| { + for (options.lib_dirs) |lib_dir| { try argv.append(try std.fmt.allocPrint(arena, "-L{s}", .{lib_dir})); } - for (macho_file.base.options.frameworks.keys()) |framework| { - const info = macho_file.base.options.frameworks.get(framework).?; + for (options.frameworks.keys()) |framework| { + const info = options.frameworks.get(framework).?; const arg = if (info.needed) try std.fmt.allocPrint(arena, "-needed_framework {s}", .{framework}) else if (info.weak) @@ -472,11 +4181,11 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try argv.append(arg); } - for (macho_file.base.options.framework_dirs) |framework_dir| { + for (options.framework_dirs) |framework_dir| { try argv.append(try std.fmt.allocPrint(arena, "-F{s}", .{framework_dir})); } - if (is_dyn_lib and (macho_file.base.options.allow_shlib_undefined orelse false)) { + if (is_dyn_lib and (options.allow_shlib_undefined orelse false)) { try argv.append("-undefined"); try argv.append("dynamic_lookup"); } @@ -493,23 +4202,29 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr parent: u16, }, .Dynamic).init(arena); - try macho_file.parseInputFiles(positionals.items, macho_file.base.options.sysroot, &dependent_libs); - try macho_file.parseAndForceLoadStaticArchives(must_link_archives.keys()); - try macho_file.parseLibs(libs.keys(), libs.values(), macho_file.base.options.sysroot, &dependent_libs); - try macho_file.parseDependentLibs(macho_file.base.options.sysroot, &dependent_libs); + try zld.parseInputFiles(positionals.items, options.sysroot, &dependent_libs); + try zld.parseAndForceLoadStaticArchives(must_link_archives.keys()); + try zld.parseLibs(libs.keys(), libs.values(), options.sysroot, &dependent_libs); + try zld.parseDependentLibs(options.sysroot, &dependent_libs); + + var resolver = SymbolResolver{ + .arena = arena, + .table = std.StringHashMap(u32).init(arena), + .unresolved = std.AutoArrayHashMap(u32, void).init(arena), + }; - for (macho_file.objects.items) |_, object_id| { - try macho_file.resolveSymbolsInObject(@intCast(u16, object_id)); + for (zld.objects.items) |_, object_id| { + try zld.resolveSymbolsInObject(@intCast(u16, object_id), &resolver); } - try macho_file.resolveSymbolsInArchives(); - try macho_file.resolveDyldStubBinder(); - try macho_file.resolveSymbolsInDylibs(); - try macho_file.createMhExecuteHeaderSymbol(); - try macho_file.createDsoHandleSymbol(); - try macho_file.resolveSymbolsAtLoading(); + try zld.resolveSymbolsInArchives(&resolver); + try zld.resolveDyldStubBinder(&resolver); + try zld.resolveSymbolsInDylibs(&resolver); + try zld.createMhExecuteHeaderSymbol(&resolver); + try zld.createDsoHandleSymbol(&resolver); + try zld.resolveSymbolsAtLoading(&resolver); - if (macho_file.unresolved.count() > 0) { + if (resolver.unresolved.count() > 0) { return error.UndefinedSymbolReference; } if (lib_not_found) { @@ -519,67 +4234,98 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr return error.FrameworkNotFound; } - for (macho_file.objects.items) |*object| { - try object.scanInputSections(macho_file); + if (options.output_mode == .Exe) { + const entry_name = options.entry orelse "_main"; + const global_index = resolver.table.get(entry_name) orelse { + log.err("entrypoint '{s}' not found", .{entry_name}); + return error.MissingMainEntrypoint; + }; + zld.entry_index = global_index; } - try macho_file.createDyldPrivateAtom(); - try macho_file.createTentativeDefAtoms(); - try macho_file.createStubHelperPreambleAtom(); + for (zld.objects.items) |*object, object_id| { + try object.splitIntoAtoms(&zld, @intCast(u31, object_id)); + } - for (macho_file.objects.items) |*object, object_id| { - try object.splitIntoAtoms(macho_file, @intCast(u32, object_id)); + var reverse_lookups: [][]u32 = try arena.alloc([]u32, zld.objects.items.len); + for (zld.objects.items) |object, i| { + reverse_lookups[i] = try object.createReverseSymbolLookup(arena); } if (gc_sections) { - try dead_strip.gcAtoms(macho_file); + try dead_strip.gcAtoms(&zld, reverse_lookups); } - try allocateSegments(macho_file); - try allocateSymbols(macho_file); + try zld.createDyldPrivateAtom(); + try zld.createTentativeDefAtoms(); + try zld.createStubHelperPreambleAtom(); - try macho_file.allocateSpecialSymbols(); + for (zld.objects.items) |object| { + for (object.atoms.items) |atom_index| { + const atom = zld.getAtom(atom_index); + const sym = zld.getSymbol(atom.getSymbolWithLoc()); + const header = zld.sections.items(.header)[sym.n_sect - 1]; + if (header.isZerofill()) continue; - if (build_options.enable_logging or true) { - macho_file.logSymtab(); - macho_file.logSections(); - macho_file.logAtoms(); + const relocs = Atom.getAtomRelocs(&zld, atom_index); + try Atom.scanAtomRelocs(&zld, atom_index, relocs, reverse_lookups[atom.getFile().?]); + } } - try writeAtoms(macho_file); + try zld.createDyldStubBinderGotAtom(); + + try zld.calcSectionSizes(reverse_lookups); + try zld.pruneAndSortSections(); + try zld.createSegments(); + try zld.allocateSegments(); + + try zld.allocateSpecialSymbols(); + + if (build_options.enable_logging) { + zld.logSymtab(); + zld.logSegments(); + zld.logSections(); + zld.logAtoms(); + } + + try zld.writeAtoms(reverse_lookups); var lc_buffer = std.ArrayList(u8).init(arena); const lc_writer = lc_buffer.writer(); var ncmds: u32 = 0; - try writeLinkeditSegmentData(macho_file, &ncmds, lc_writer); + try zld.writeLinkeditSegmentData(&ncmds, lc_writer, reverse_lookups); // If the last section of __DATA segment is zerofill section, we need to ensure // that the free space between the end of the last non-zerofill section of __DATA // segment and the beginning of __LINKEDIT segment is zerofilled as the loader will // copy-paste this space into memory for quicker zerofill operation. - if (macho_file.data_segment_cmd_index) |data_seg_id| blk: { + if (zld.getSegmentByName("__DATA")) |data_seg_id| blk: { var physical_zerofill_start: u64 = 0; - const section_indexes = macho_file.getSectionIndexes(data_seg_id); - for (macho_file.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| { + const section_indexes = zld.getSectionIndexes(data_seg_id); + for (zld.sections.items(.header)[section_indexes.start..section_indexes.end]) |header| { if (header.isZerofill() and header.size > 0) break; physical_zerofill_start = header.offset + header.size; } else break :blk; - const linkedit = macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; + const linkedit = zld.getLinkeditSegmentPtr(); const physical_zerofill_size = math.cast(usize, linkedit.fileoff - physical_zerofill_start) orelse return error.Overflow; if (physical_zerofill_size > 0) { - var padding = try macho_file.base.allocator.alloc(u8, physical_zerofill_size); - defer macho_file.base.allocator.free(padding); + log.debug("zeroing out zerofill area of length {x} at {x}", .{ + physical_zerofill_size, + physical_zerofill_start, + }); + var padding = try zld.gpa.alloc(u8, physical_zerofill_size); + defer zld.gpa.free(padding); mem.set(u8, padding, 0); - try macho_file.base.file.?.pwriteAll(padding, physical_zerofill_start); + try zld.file.pwriteAll(padding, physical_zerofill_start); } } - try MachO.writeDylinkerLC(&ncmds, lc_writer); - try macho_file.writeMainLC(&ncmds, lc_writer); - try macho_file.writeDylibIdLC(&ncmds, lc_writer); - try macho_file.writeRpathLCs(&ncmds, lc_writer); + try Zld.writeDylinkerLC(&ncmds, lc_writer); + try zld.writeMainLC(&ncmds, lc_writer); + try zld.writeDylibIdLC(&ncmds, lc_writer); + try zld.writeRpathLCs(&ncmds, lc_writer); { try lc_writer.writeStruct(macho.source_version_command{ @@ -589,7 +4335,7 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr ncmds += 1; } - try macho_file.writeBuildVersionLC(&ncmds, lc_writer); + try zld.writeBuildVersionLC(&ncmds, lc_writer); { var uuid_lc = macho.uuid_command{ @@ -601,10 +4347,10 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr ncmds += 1; } - try macho_file.writeLoadDylibLCs(&ncmds, lc_writer); + try zld.writeLoadDylibLCs(&ncmds, lc_writer); const requires_codesig = blk: { - if (macho_file.base.options.entitlements) |_| break :blk true; + if (options.entitlements) |_| break :blk true; if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) break :blk true; break :blk false; }; @@ -615,29 +4361,29 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr // written out to the file. // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. - var codesig = CodeSignature.init(macho_file.page_size); - codesig.code_directory.ident = macho_file.base.options.emit.?.sub_path; - if (macho_file.base.options.entitlements) |path| { - try codesig.addEntitlements(arena, path); + var codesig = CodeSignature.init(page_size); + codesig.code_directory.ident = options.emit.?.sub_path; + if (options.entitlements) |path| { + try codesig.addEntitlements(gpa, path); } - codesig_offset = try writeCodeSignaturePadding(macho_file, &codesig, &ncmds, lc_writer); + codesig_offset = try zld.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); break :blk codesig; } else null; + defer if (codesig) |*csig| csig.deinit(gpa); var headers_buf = std.ArrayList(u8).init(arena); - try writeSegmentHeaders(macho_file, &ncmds, headers_buf.writer()); - - try macho_file.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); - try macho_file.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); + try zld.writeSegmentHeaders(&ncmds, headers_buf.writer()); - try writeHeader(macho_file, ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + try zld.file.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try zld.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); + try zld.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); if (codesig) |*csig| { - try writeCodeSignature(macho_file, csig, codesig_offset.?); // code signing always comes last + try zld.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last } } - if (!macho_file.base.options.disable_lld_caching) { + if (!options.disable_lld_caching) { // Update the file with the digest. If it fails we can continue; it only // means that the next invocation will have an unnecessary cache miss. Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest) catch |err| { @@ -653,1301 +4399,33 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr } } -fn initSections(macho_file: *MachO) !void { - const gpa = macho_file.base.allocator; - const cpu_arch = macho_file.base.options.target.cpu.arch; - const pagezero_vmsize = macho_file.calcPagezeroSize(); - - if (macho_file.pagezero_segment_cmd_index == null) { - if (pagezero_vmsize > 0) { - macho_file.pagezero_segment_cmd_index = @intCast(u8, macho_file.segments.items.len); - try macho_file.segments.append(gpa, .{ - .segname = MachO.makeStaticString("__PAGEZERO"), - .vmsize = pagezero_vmsize, - .cmdsize = @sizeOf(macho.segment_command_64), - }); - } - } - - if (macho_file.text_segment_cmd_index == null) { - macho_file.text_segment_cmd_index = @intCast(u8, macho_file.segments.items.len); - try macho_file.segments.append(gpa, .{ - .segname = MachO.makeStaticString("__TEXT"), - .vmaddr = pagezero_vmsize, - .vmsize = 0, - .filesize = 0, - .maxprot = macho.PROT.READ | macho.PROT.EXEC, - .initprot = macho.PROT.READ | macho.PROT.EXEC, - .cmdsize = @sizeOf(macho.segment_command_64), - }); - } - - if (macho_file.text_section_index == null) { - macho_file.text_section_index = try macho_file.initSection("__TEXT", "__text", .{ - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - - if (macho_file.stubs_section_index == null) { - const stub_size: u4 = switch (cpu_arch) { - .x86_64 => 6, - .aarch64 => 3 * @sizeOf(u32), - else => unreachable, // unhandled architecture type - }; - macho_file.stubs_section_index = try macho_file.initSection("__TEXT", "__stubs", .{ - .flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - .reserved2 = stub_size, - }); - } - - if (macho_file.stub_helper_section_index == null) { - macho_file.stub_helper_section_index = try macho_file.initSection("__TEXT", "__stub_helper", .{ - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - - if (macho_file.data_const_segment_cmd_index == null) { - macho_file.data_const_segment_cmd_index = @intCast(u8, macho_file.segments.items.len); - try macho_file.segments.append(gpa, .{ - .segname = MachO.makeStaticString("__DATA_CONST"), - .vmaddr = 0, - .vmsize = 0, - .fileoff = 0, - .filesize = 0, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), - }); - } - - if (macho_file.got_section_index == null) { - macho_file.got_section_index = try macho_file.initSection("__DATA_CONST", "__got", .{ - .flags = macho.S_NON_LAZY_SYMBOL_POINTERS, - }); - } - - if (macho_file.data_segment_cmd_index == null) { - macho_file.data_segment_cmd_index = @intCast(u8, macho_file.segments.items.len); - try macho_file.segments.append(gpa, .{ - .segname = MachO.makeStaticString("__DATA"), - .vmaddr = 0, - .vmsize = 0, - .fileoff = 0, - .filesize = 0, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), - }); - } - - if (macho_file.la_symbol_ptr_section_index == null) { - macho_file.la_symbol_ptr_section_index = try macho_file.initSection("__DATA", "__la_symbol_ptr", .{ - .flags = macho.S_LAZY_SYMBOL_POINTERS, - }); - } - - if (macho_file.data_section_index == null) { - macho_file.data_section_index = try macho_file.initSection("__DATA", "__data", .{}); - } - - if (macho_file.linkedit_segment_cmd_index == null) { - macho_file.linkedit_segment_cmd_index = @intCast(u8, macho_file.segments.items.len); - try macho_file.segments.append(gpa, .{ - .segname = MachO.makeStaticString("__LINKEDIT"), - .vmaddr = 0, - .fileoff = 0, - .maxprot = macho.PROT.READ, - .initprot = macho.PROT.READ, - .cmdsize = @sizeOf(macho.segment_command_64), - }); - } -} - -fn writeAtoms(macho_file: *MachO) !void { - assert(macho_file.mode == .one_shot); - - const gpa = macho_file.base.allocator; - const slice = macho_file.sections.slice(); - - for (slice.items(.last_atom)) |last_atom, sect_id| { - const header = slice.items(.header)[sect_id]; - if (header.size == 0) continue; - var atom = last_atom.?; - - if (header.isZerofill()) continue; - - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - try buffer.ensureTotalCapacity(math.cast(usize, header.size) orelse return error.Overflow); - - log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); - - while (atom.prev) |prev| { - atom = prev; - } - - while (true) { - const this_sym = atom.getSymbol(macho_file); - const padding_size: usize = if (atom.next) |next| blk: { - const next_sym = next.getSymbol(macho_file); - const size = next_sym.n_value - (this_sym.n_value + atom.size); - break :blk math.cast(usize, size) orelse return error.Overflow; - } else 0; - - log.debug(" (adding ATOM(%{d}, '{s}') from object({?d}) to buffer)", .{ - atom.sym_index, - atom.getName(macho_file), - atom.file, - }); - if (padding_size > 0) { - log.debug(" (with padding {x})", .{padding_size}); - } - - try atom.resolveRelocs(macho_file); - buffer.appendSliceAssumeCapacity(atom.code.items); - - var i: usize = 0; - while (i < padding_size) : (i += 1) { - // TODO with NOPs - buffer.appendAssumeCapacity(0); - } - - if (atom.next) |next| { - atom = next; - } else { - assert(buffer.items.len == header.size); - log.debug(" (writing at file offset 0x{x})", .{header.offset}); - try macho_file.base.file.?.pwriteAll(buffer.items, header.offset); - break; - } - } - } -} - -fn allocateSegments(macho_file: *MachO) !void { - try allocateSegment(macho_file, macho_file.text_segment_cmd_index, &.{ - macho_file.pagezero_segment_cmd_index, - }, try macho_file.calcMinHeaderPad()); - - if (macho_file.text_segment_cmd_index) |index| blk: { - const indexes = macho_file.getSectionIndexes(index); - if (indexes.start == indexes.end) break :blk; - const seg = macho_file.segments.items[index]; - - // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. - var min_alignment: u32 = 0; - for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |header| { - const alignment = try math.powi(u32, 2, header.@"align"); - min_alignment = math.max(min_alignment, alignment); - } - - assert(min_alignment > 0); - const last_header = macho_file.sections.items(.header)[indexes.end - 1]; - const shift: u32 = shift: { - const diff = seg.filesize - last_header.offset - last_header.size; - const factor = @divTrunc(diff, min_alignment); - break :shift @intCast(u32, factor * min_alignment); - }; - - if (shift > 0) { - for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |*header| { - header.offset += shift; - header.addr += shift; - } - } - } - - try allocateSegment(macho_file, macho_file.data_const_segment_cmd_index, &.{ - macho_file.text_segment_cmd_index, - macho_file.pagezero_segment_cmd_index, - }, 0); - - try allocateSegment(macho_file, macho_file.data_segment_cmd_index, &.{ - macho_file.data_const_segment_cmd_index, - macho_file.text_segment_cmd_index, - macho_file.pagezero_segment_cmd_index, - }, 0); - - try allocateSegment(macho_file, macho_file.linkedit_segment_cmd_index, &.{ - macho_file.data_segment_cmd_index, - macho_file.data_const_segment_cmd_index, - macho_file.text_segment_cmd_index, - macho_file.pagezero_segment_cmd_index, - }, 0); -} - -fn getSegmentAllocBase(macho_file: *MachO, indices: []const ?u8) struct { vmaddr: u64, fileoff: u64 } { - for (indices) |maybe_prev_id| { - const prev_id = maybe_prev_id orelse continue; - const prev = macho_file.segments.items[prev_id]; - return .{ - .vmaddr = prev.vmaddr + prev.vmsize, - .fileoff = prev.fileoff + prev.filesize, - }; - } - return .{ .vmaddr = 0, .fileoff = 0 }; -} - -fn allocateSegment(macho_file: *MachO, maybe_index: ?u8, indices: []const ?u8, init_size: u64) !void { - const index = maybe_index orelse return; - const seg = &macho_file.segments.items[index]; - - const base = getSegmentAllocBase(macho_file, indices); - seg.vmaddr = base.vmaddr; - seg.fileoff = base.fileoff; - seg.filesize = init_size; - seg.vmsize = init_size; - - // Allocate the sections according to their alignment at the beginning of the segment. - const indexes = macho_file.getSectionIndexes(index); - var start = init_size; - const slice = macho_file.sections.slice(); - for (slice.items(.header)[indexes.start..indexes.end]) |*header| { - const alignment = try math.powi(u32, 2, header.@"align"); - const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - - header.offset = if (header.isZerofill()) - 0 - else - @intCast(u32, seg.fileoff + start_aligned); - header.addr = seg.vmaddr + start_aligned; - - start = start_aligned + header.size; - - if (!header.isZerofill()) { - seg.filesize = start; - } - seg.vmsize = start; - } - - seg.filesize = mem.alignForwardGeneric(u64, seg.filesize, macho_file.page_size); - seg.vmsize = mem.alignForwardGeneric(u64, seg.vmsize, macho_file.page_size); -} - -fn allocateSymbols(macho_file: *MachO) !void { - const slice = macho_file.sections.slice(); - for (slice.items(.last_atom)) |last_atom, sect_id| { - const header = slice.items(.header)[sect_id]; - var atom = last_atom orelse continue; - - while (atom.prev) |prev| { - atom = prev; - } - - const n_sect = @intCast(u8, sect_id + 1); - var base_vaddr = header.addr; - - log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ - n_sect, - header.segName(), - header.sectName(), - }); - - while (true) { - const alignment = try math.powi(u32, 2, atom.alignment); - base_vaddr = mem.alignForwardGeneric(u64, base_vaddr, alignment); - - const sym = atom.getSymbolPtr(macho_file); - sym.n_value = base_vaddr; - sym.n_sect = n_sect; - - log.debug(" ATOM(%{d}, '{s}') @{x}", .{ atom.sym_index, atom.getName(macho_file), base_vaddr }); - - // Update each symbol contained within the atom - for (atom.contained.items) |sym_at_off| { - const contained_sym = macho_file.getSymbolPtr(.{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }); - contained_sym.n_value = base_vaddr + sym_at_off.offset; - contained_sym.n_sect = n_sect; - } - - base_vaddr += atom.size; - - if (atom.next) |next| { - atom = next; - } else break; - } - } -} - -fn writeLinkeditSegmentData(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { - const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; - seg.filesize = 0; - seg.vmsize = 0; - - try writeDyldInfoData(macho_file, ncmds, lc_writer); - try writeFunctionStarts(macho_file, ncmds, lc_writer); - try writeDataInCode(macho_file, ncmds, lc_writer); - try writeSymtabs(macho_file, ncmds, lc_writer); - - seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, macho_file.page_size); -} - -fn writeDyldInfoData(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const gpa = macho_file.base.allocator; - - var rebase_pointers = std.ArrayList(bind.Pointer).init(gpa); - defer rebase_pointers.deinit(); - var bind_pointers = std.ArrayList(bind.Pointer).init(gpa); - defer bind_pointers.deinit(); - var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); - defer lazy_bind_pointers.deinit(); - - const slice = macho_file.sections.slice(); - for (slice.items(.last_atom)) |last_atom, sect_id| { - var atom = last_atom orelse continue; - const segment_index = slice.items(.segment_index)[sect_id]; - const header = slice.items(.header)[sect_id]; - - if (mem.eql(u8, header.segName(), "__TEXT")) continue; // __TEXT is non-writable - - log.debug("dyld info for {s},{s}", .{ header.segName(), header.sectName() }); - - const seg = macho_file.segments.items[segment_index]; - - while (true) { - log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(macho_file) }); - const sym = atom.getSymbol(macho_file); - const base_offset = sym.n_value - seg.vmaddr; - - for (atom.rebases.items) |offset| { - log.debug(" | rebase at {x}", .{base_offset + offset}); - try rebase_pointers.append(.{ - .offset = base_offset + offset, - .segment_id = segment_index, - }); - } - - for (atom.bindings.items) |binding| { - const bind_sym = macho_file.getSymbol(binding.target); - const bind_sym_name = macho_file.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = segment_index, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); - } - - for (atom.lazy_bindings.items) |binding| { - const bind_sym = macho_file.getSymbol(binding.target); - const bind_sym_name = macho_file.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try lazy_bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = segment_index, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); - } - - if (atom.prev) |prev| { - atom = prev; - } else break; - } - } - - var trie: Trie = .{}; - defer trie.deinit(gpa); - - { - // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. - log.debug("generating export trie", .{}); - - const text_segment = macho_file.segments.items[macho_file.text_segment_cmd_index.?]; - const base_address = text_segment.vmaddr; - - if (macho_file.base.options.output_mode == .Exe) { - for (&[_]SymbolWithLoc{ - try macho_file.getEntryPoint(), - macho_file.getGlobal("__mh_execute_header").?, - }) |global| { - const sym = macho_file.getSymbol(global); - const sym_name = macho_file.getSymbolName(global); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - try trie.put(gpa, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } +/// Binary search +pub fn bsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + var min: usize = 0; + var max: usize = haystack.len; + while (min < max) { + const index = (min + max) / 2; + const curr = haystack[index]; + if (predicate.predicate(curr)) { + min = index + 1; } else { - assert(macho_file.base.options.output_mode == .Lib); - for (macho_file.globals.items) |global| { - const sym = macho_file.getSymbol(global); - - if (sym.undf()) continue; - if (!sym.ext()) continue; - if (sym.n_desc == MachO.N_DESC_GCED) continue; - - const sym_name = macho_file.getSymbolName(global); - log.debug(" (putting '{s}' defined at 0x{x})", .{ sym_name, sym.n_value }); - try trie.put(gpa, .{ - .name = sym_name, - .vmaddr_offset = sym.n_value - base_address, - .export_flags = macho.EXPORT_SYMBOL_FLAGS_KIND_REGULAR, - }); - } - } - - try trie.finalize(gpa); - } - - const link_seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; - const rebase_off = mem.alignForwardGeneric(u64, link_seg.fileoff, @alignOf(u64)); - assert(rebase_off == link_seg.fileoff); - const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size }); - - const bind_off = mem.alignForwardGeneric(u64, rebase_off + rebase_size, @alignOf(u64)); - const bind_size = try bind.bindInfoSize(bind_pointers.items); - log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size }); - - const lazy_bind_off = mem.alignForwardGeneric(u64, bind_off + bind_size, @alignOf(u64)); - const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); - log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ lazy_bind_off, lazy_bind_off + lazy_bind_size }); - - const export_off = mem.alignForwardGeneric(u64, lazy_bind_off + lazy_bind_size, @alignOf(u64)); - const export_size = trie.size; - log.debug("writing export trie from 0x{x} to 0x{x}", .{ export_off, export_off + export_size }); - - const needed_size = export_off + export_size - rebase_off; - link_seg.filesize = needed_size; - - var buffer = try gpa.alloc(u8, math.cast(usize, needed_size) orelse return error.Overflow); - defer gpa.free(buffer); - mem.set(u8, buffer, 0); - - var stream = std.io.fixedBufferStream(buffer); - const writer = stream.writer(); - - try bind.writeRebaseInfo(rebase_pointers.items, writer); - try stream.seekTo(bind_off - rebase_off); - - try bind.writeBindInfo(bind_pointers.items, writer); - try stream.seekTo(lazy_bind_off - rebase_off); - - try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); - try stream.seekTo(export_off - rebase_off); - - _ = try trie.write(writer); - - log.debug("writing dyld info from 0x{x} to 0x{x}", .{ - rebase_off, - rebase_off + needed_size, - }); - - try macho_file.base.file.?.pwriteAll(buffer, rebase_off); - const start = math.cast(usize, lazy_bind_off - rebase_off) orelse return error.Overflow; - const end = start + (math.cast(usize, lazy_bind_size) orelse return error.Overflow); - try populateLazyBindOffsetsInStubHelper(macho_file, buffer[start..end]); - - try lc_writer.writeStruct(macho.dyld_info_command{ - .cmd = .DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = @intCast(u32, rebase_off), - .rebase_size = @intCast(u32, rebase_size), - .bind_off = @intCast(u32, bind_off), - .bind_size = @intCast(u32, bind_size), - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = @intCast(u32, lazy_bind_off), - .lazy_bind_size = @intCast(u32, lazy_bind_size), - .export_off = @intCast(u32, export_off), - .export_size = @intCast(u32, export_size), - }); - ncmds.* += 1; -} - -fn populateLazyBindOffsetsInStubHelper(macho_file: *MachO, buffer: []const u8) !void { - const gpa = macho_file.base.allocator; - - const stub_helper_section_index = macho_file.stub_helper_section_index orelse return; - if (macho_file.stub_helper_preamble_atom == null) return; - - const section = macho_file.sections.get(stub_helper_section_index); - const last_atom = section.last_atom orelse return; - if (last_atom == macho_file.stub_helper_preamble_atom.?) return; // TODO is this a redundant check? - - var table = std.AutoHashMap(i64, *Atom).init(gpa); - defer table.deinit(); - - { - var stub_atom = last_atom; - var laptr_atom = macho_file.sections.items(.last_atom)[macho_file.la_symbol_ptr_section_index.?].?; - const base_addr = blk: { - const seg = macho_file.segments.items[macho_file.data_segment_cmd_index.?]; - break :blk seg.vmaddr; - }; - - while (true) { - const laptr_off = blk: { - const sym = laptr_atom.getSymbol(macho_file); - break :blk @intCast(i64, sym.n_value - base_addr); - }; - try table.putNoClobber(laptr_off, stub_atom); - if (laptr_atom.prev) |prev| { - laptr_atom = prev; - stub_atom = stub_atom.prev.?; - } else break; - } - } - - var stream = std.io.fixedBufferStream(buffer); - var reader = stream.reader(); - var offsets = std.ArrayList(struct { sym_offset: i64, offset: u32 }).init(gpa); - try offsets.append(.{ .sym_offset = undefined, .offset = 0 }); - defer offsets.deinit(); - var valid_block = false; - - while (true) { - const inst = reader.readByte() catch |err| switch (err) { - error.EndOfStream => break, - }; - const opcode: u8 = inst & macho.BIND_OPCODE_MASK; - - switch (opcode) { - macho.BIND_OPCODE_DO_BIND => { - valid_block = true; - }, - macho.BIND_OPCODE_DONE => { - if (valid_block) { - const offset = try stream.getPos(); - try offsets.append(.{ .sym_offset = undefined, .offset = @intCast(u32, offset) }); - } - valid_block = false; - }, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - var next = try reader.readByte(); - while (next != @as(u8, 0)) { - next = try reader.readByte(); - } - }, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - var inserted = offsets.pop(); - inserted.sym_offset = try std.leb.readILEB128(i64, reader); - try offsets.append(inserted); - }, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { - _ = try std.leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_ADDEND_SLEB => { - _ = try std.leb.readILEB128(i64, reader); - }, - else => {}, - } - } - - const header = macho_file.sections.items(.header)[stub_helper_section_index]; - const stub_offset: u4 = switch (macho_file.base.options.target.cpu.arch) { - .x86_64 => 1, - .aarch64 => 2 * @sizeOf(u32), - else => unreachable, - }; - var buf: [@sizeOf(u32)]u8 = undefined; - _ = offsets.pop(); - - while (offsets.popOrNull()) |bind_offset| { - const atom = table.get(bind_offset.sym_offset).?; - const sym = atom.getSymbol(macho_file); - const file_offset = header.offset + sym.n_value - header.addr + stub_offset; - mem.writeIntLittle(u32, &buf, bind_offset.offset); - log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ - bind_offset.offset, - atom.getName(macho_file), - file_offset, - }); - try macho_file.base.file.?.pwriteAll(&buf, file_offset); - } -} - -const asc_u64 = std.sort.asc(u64); - -fn writeFunctionStarts(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const text_seg_index = macho_file.text_segment_cmd_index orelse return; - const text_sect_index = macho_file.text_section_index orelse return; - const text_seg = macho_file.segments.items[text_seg_index]; - - const gpa = macho_file.base.allocator; - - // We need to sort by address first - var addresses = std.ArrayList(u64).init(gpa); - defer addresses.deinit(); - try addresses.ensureTotalCapacityPrecise(macho_file.globals.items.len); - - for (macho_file.globals.items) |global| { - const sym = macho_file.getSymbol(global); - if (sym.undf()) continue; - if (sym.n_desc == MachO.N_DESC_GCED) continue; - const sect_id = sym.n_sect - 1; - if (sect_id != text_sect_index) continue; - - addresses.appendAssumeCapacity(sym.n_value); - } - - std.sort.sort(u64, addresses.items, {}, asc_u64); - - var offsets = std.ArrayList(u32).init(gpa); - defer offsets.deinit(); - try offsets.ensureTotalCapacityPrecise(addresses.items.len); - - var last_off: u32 = 0; - for (addresses.items) |addr| { - const offset = @intCast(u32, addr - text_seg.vmaddr); - const diff = offset - last_off; - - if (diff == 0) continue; - - offsets.appendAssumeCapacity(diff); - last_off = offset; - } - - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - - const max_size = @intCast(usize, offsets.items.len * @sizeOf(u64)); - try buffer.ensureTotalCapacity(max_size); - - for (offsets.items) |offset| { - try std.leb.writeULEB128(buffer.writer(), offset); - } - - const link_seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; - const offset = mem.alignForwardGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64)); - const needed_size = buffer.items.len; - link_seg.filesize = offset + needed_size - link_seg.fileoff; - - log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - - try macho_file.base.file.?.pwriteAll(buffer.items, offset); - - try lc_writer.writeStruct(macho.linkedit_data_command{ - .cmd = .FUNCTION_STARTS, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = @intCast(u32, offset), - .datasize = @intCast(u32, needed_size), - }); - ncmds.* += 1; -} - -fn filterDataInCode( - dices: []align(1) const macho.data_in_code_entry, - start_addr: u64, - end_addr: u64, -) []align(1) const macho.data_in_code_entry { - const Predicate = struct { - addr: u64, - - pub fn predicate(macho_file: @This(), dice: macho.data_in_code_entry) bool { - return dice.offset >= macho_file.addr; - } - }; - - const start = MachO.findFirst(macho.data_in_code_entry, dices, 0, Predicate{ .addr = start_addr }); - const end = MachO.findFirst(macho.data_in_code_entry, dices, start, Predicate{ .addr = end_addr }); - - return dices[start..end]; -} - -fn writeDataInCode(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { - const tracy = trace(@src()); - defer tracy.end(); - - var out_dice = std.ArrayList(macho.data_in_code_entry).init(macho_file.base.allocator); - defer out_dice.deinit(); - - const text_sect_id = macho_file.text_section_index orelse return; - const text_sect_header = macho_file.sections.items(.header)[text_sect_id]; - - for (macho_file.objects.items) |object| { - const dice = object.parseDataInCode() orelse continue; - try out_dice.ensureUnusedCapacity(dice.len); - - for (object.managed_atoms.items) |atom| { - const sym = atom.getSymbol(macho_file); - if (sym.n_desc == MachO.N_DESC_GCED) continue; - - const sect_id = sym.n_sect - 1; - if (sect_id != macho_file.text_section_index.?) { - continue; - } - - const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; - const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; - const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); - const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse - return error.Overflow; - - for (filtered_dice) |single| { - const offset = single.offset - source_addr + base; - out_dice.appendAssumeCapacity(.{ - .offset = offset, - .length = single.length, - .kind = single.kind, - }); - } + max = index; } } - - const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; - const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); - const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); - seg.filesize = offset + needed_size - seg.fileoff; - - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - - try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), offset); - try lc_writer.writeStruct(macho.linkedit_data_command{ - .cmd = .DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = @intCast(u32, offset), - .datasize = @intCast(u32, needed_size), - }); - ncmds.* += 1; -} - -fn writeSymtabs(macho_file: *MachO, ncmds: *u32, lc_writer: anytype) !void { - var symtab_cmd = macho.symtab_command{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }; - var dysymtab_cmd = macho.dysymtab_command{ - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }; - var ctx = try writeSymtab(macho_file, &symtab_cmd); - defer ctx.imports_table.deinit(); - try writeDysymtab(macho_file, ctx, &dysymtab_cmd); - try writeStrtab(macho_file, &symtab_cmd); - try lc_writer.writeStruct(symtab_cmd); - try lc_writer.writeStruct(dysymtab_cmd); - ncmds.* += 2; + return min; } -fn writeSymtab(macho_file: *MachO, lc: *macho.symtab_command) !SymtabCtx { - const gpa = macho_file.base.allocator; - - var locals = std.ArrayList(macho.nlist_64).init(gpa); - defer locals.deinit(); - - for (macho_file.locals.items) |sym, sym_id| { - if (sym.n_strx == 0) continue; // no name, skip - if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip - const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = null }; - if (macho_file.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip - if (macho_file.getGlobal(macho_file.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip - try locals.append(sym); - } - - for (macho_file.objects.items) |object, object_id| { - for (object.symtab.items) |sym, sym_id| { - if (sym.n_strx == 0) continue; // no name, skip - if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip - const sym_loc = SymbolWithLoc{ .sym_index = @intCast(u32, sym_id), .file = @intCast(u32, object_id) }; - if (macho_file.symbolIsTemp(sym_loc)) continue; // local temp symbol, skip - if (macho_file.getGlobal(macho_file.getSymbolName(sym_loc)) != null) continue; // global symbol is either an export or import, skip - var out_sym = sym; - out_sym.n_strx = try macho_file.strtab.insert(gpa, macho_file.getSymbolName(sym_loc)); - try locals.append(out_sym); - } - - if (!macho_file.base.options.strip) { - try generateSymbolStabs(macho_file, object, &locals); - } - } - - var exports = std.ArrayList(macho.nlist_64).init(gpa); - defer exports.deinit(); - - for (macho_file.globals.items) |global| { - const sym = macho_file.getSymbol(global); - if (sym.undf()) continue; // import, skip - if (sym.n_desc == MachO.N_DESC_GCED) continue; // GCed, skip - var out_sym = sym; - out_sym.n_strx = try macho_file.strtab.insert(gpa, macho_file.getSymbolName(global)); - try exports.append(out_sym); - } - - var imports = std.ArrayList(macho.nlist_64).init(gpa); - defer imports.deinit(); +/// Linear search +pub fn lsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); - var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); - - for (macho_file.globals.items) |global| { - const sym = macho_file.getSymbol(global); - if (sym.n_strx == 0) continue; // no name, skip - if (!sym.undf()) continue; // not an import, skip - const new_index = @intCast(u32, imports.items.len); - var out_sym = sym; - out_sym.n_strx = try macho_file.strtab.insert(gpa, macho_file.getSymbolName(global)); - try imports.append(out_sym); - try imports_table.putNoClobber(global, new_index); + var i: usize = 0; + while (i < haystack.len) : (i += 1) { + if (predicate.predicate(haystack[i])) break; } - - const nlocals = @intCast(u32, locals.items.len); - const nexports = @intCast(u32, exports.items.len); - const nimports = @intCast(u32, imports.items.len); - const nsyms = nlocals + nexports + nimports; - - const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; - const offset = mem.alignForwardGeneric( - u64, - seg.fileoff + seg.filesize, - @alignOf(macho.nlist_64), - ); - const needed_size = nsyms * @sizeOf(macho.nlist_64); - seg.filesize = offset + needed_size - seg.fileoff; - - var buffer = std.ArrayList(u8).init(gpa); - defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(needed_size); - buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); - buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); - buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); - - log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - try macho_file.base.file.?.pwriteAll(buffer.items, offset); - - lc.symoff = @intCast(u32, offset); - lc.nsyms = nsyms; - - return SymtabCtx{ - .nlocalsym = nlocals, - .nextdefsym = nexports, - .nundefsym = nimports, - .imports_table = imports_table, - }; -} - -fn writeStrtab(macho_file: *MachO, lc: *macho.symtab_command) !void { - const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; - const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); - const needed_size = macho_file.strtab.buffer.items.len; - seg.filesize = offset + needed_size - seg.fileoff; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - - try macho_file.base.file.?.pwriteAll(macho_file.strtab.buffer.items, offset); - - lc.stroff = @intCast(u32, offset); - lc.strsize = @intCast(u32, needed_size); -} - -pub fn generateSymbolStabs( - macho_file: *MachO, - object: Object, - locals: *std.ArrayList(macho.nlist_64), -) !void { - assert(!macho_file.base.options.strip); - - log.debug("parsing debug info in '{s}'", .{object.name}); - - const gpa = macho_file.base.allocator; - var debug_info = try object.parseDwarfInfo(); - defer debug_info.deinit(gpa); - try dwarf.openDwarfDebugInfo(&debug_info, gpa); - - // We assume there is only one CU. - const compile_unit = debug_info.findCompileUnit(0x0) catch |err| switch (err) { - error.MissingDebugInfo => { - // TODO audit cases with missing debug info and audit our dwarf.zig module. - log.debug("invalid or missing debug info in {s}; skipping", .{object.name}); - return; - }, - else => |e| return e, - }; - - const tu_name = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.name, debug_info.debug_str, compile_unit.*); - const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.comp_dir, debug_info.debug_str, compile_unit.*); - - // Open scope - try locals.ensureUnusedCapacity(3); - locals.appendAssumeCapacity(.{ - .n_strx = try macho_file.strtab.insert(gpa, tu_comp_dir), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try macho_file.strtab.insert(gpa, tu_name), - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - locals.appendAssumeCapacity(.{ - .n_strx = try macho_file.strtab.insert(gpa, object.name), - .n_type = macho.N_OSO, - .n_sect = 0, - .n_desc = 1, - .n_value = object.mtime, - }); - - var stabs_buf: [4]macho.nlist_64 = undefined; - - for (object.managed_atoms.items) |atom| { - const stabs = try generateSymbolStabsForSymbol( - macho_file, - atom.getSymbolWithLoc(), - debug_info, - &stabs_buf, - ); - try locals.appendSlice(stabs); - - for (atom.contained.items) |sym_at_off| { - const sym_loc = SymbolWithLoc{ - .sym_index = sym_at_off.sym_index, - .file = atom.file, - }; - const contained_stabs = try generateSymbolStabsForSymbol( - macho_file, - sym_loc, - debug_info, - &stabs_buf, - ); - try locals.appendSlice(contained_stabs); - } - } - - // Close scope - try locals.append(.{ - .n_strx = 0, - .n_type = macho.N_SO, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); -} - -fn generateSymbolStabsForSymbol( - macho_file: *MachO, - sym_loc: SymbolWithLoc, - debug_info: dwarf.DwarfInfo, - buf: *[4]macho.nlist_64, -) ![]const macho.nlist_64 { - const gpa = macho_file.base.allocator; - const object = macho_file.objects.items[sym_loc.file.?]; - const sym = macho_file.getSymbol(sym_loc); - const sym_name = macho_file.getSymbolName(sym_loc); - - if (sym.n_strx == 0) return buf[0..0]; - if (sym.n_desc == MachO.N_DESC_GCED) return buf[0..0]; - if (macho_file.symbolIsTemp(sym_loc)) return buf[0..0]; - - const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; - const size: ?u64 = size: { - if (source_sym.tentative()) break :size null; - for (debug_info.func_list.items) |func| { - if (func.pc_range) |range| { - if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { - break :size range.end - range.start; - } - } - } - break :size null; - }; - - if (size) |ss| { - buf[0] = .{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - buf[1] = .{ - .n_strx = try macho_file.strtab.insert(gpa, sym_name), - .n_type = macho.N_FUN, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - buf[2] = .{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = ss, - }; - buf[3] = .{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = ss, - }; - return buf; - } else { - buf[0] = .{ - .n_strx = try macho_file.strtab.insert(gpa, sym_name), - .n_type = macho.N_STSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }; - return buf[0..1]; - } -} - -const SymtabCtx = struct { - nlocalsym: u32, - nextdefsym: u32, - nundefsym: u32, - imports_table: std.AutoHashMap(SymbolWithLoc, u32), -}; - -fn writeDysymtab(macho_file: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !void { - const gpa = macho_file.base.allocator; - const nstubs = @intCast(u32, macho_file.stubs_table.count()); - const ngot_entries = @intCast(u32, macho_file.got_entries_table.count()); - const nindirectsyms = nstubs * 2 + ngot_entries; - const iextdefsym = ctx.nlocalsym; - const iundefsym = iextdefsym + ctx.nextdefsym; - - const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; - const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); - const needed_size = nindirectsyms * @sizeOf(u32); - seg.filesize = offset + needed_size - seg.fileoff; - - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - - var buf = std.ArrayList(u8).init(gpa); - defer buf.deinit(); - try buf.ensureTotalCapacity(needed_size); - const writer = buf.writer(); - - if (macho_file.stubs_section_index) |sect_id| { - const stubs = &macho_file.sections.items(.header)[sect_id]; - stubs.reserved1 = 0; - for (macho_file.stubs.items) |entry| { - if (entry.sym_index == 0) continue; - const atom_sym = entry.getSymbol(macho_file); - if (atom_sym.n_desc == MachO.N_DESC_GCED) continue; - const target_sym = macho_file.getSymbol(entry.target); - assert(target_sym.undf()); - try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); - } - } - - if (macho_file.got_section_index) |sect_id| { - const got = &macho_file.sections.items(.header)[sect_id]; - got.reserved1 = nstubs; - for (macho_file.got_entries.items) |entry| { - if (entry.sym_index == 0) continue; - const atom_sym = entry.getSymbol(macho_file); - if (atom_sym.n_desc == MachO.N_DESC_GCED) continue; - const target_sym = macho_file.getSymbol(entry.target); - if (target_sym.undf()) { - try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); - } else { - try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); - } - } - } - - if (macho_file.la_symbol_ptr_section_index) |sect_id| { - const la_symbol_ptr = &macho_file.sections.items(.header)[sect_id]; - la_symbol_ptr.reserved1 = nstubs + ngot_entries; - for (macho_file.stubs.items) |entry| { - if (entry.sym_index == 0) continue; - const atom_sym = entry.getSymbol(macho_file); - if (atom_sym.n_desc == MachO.N_DESC_GCED) continue; - const target_sym = macho_file.getSymbol(entry.target); - assert(target_sym.undf()); - try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); - } - } - - assert(buf.items.len == needed_size); - try macho_file.base.file.?.pwriteAll(buf.items, offset); - - lc.nlocalsym = ctx.nlocalsym; - lc.iextdefsym = iextdefsym; - lc.nextdefsym = ctx.nextdefsym; - lc.iundefsym = iundefsym; - lc.nundefsym = ctx.nundefsym; - lc.indirectsymoff = @intCast(u32, offset); - lc.nindirectsyms = nindirectsyms; -} - -fn writeCodeSignaturePadding( - macho_file: *MachO, - code_sig: *CodeSignature, - ncmds: *u32, - lc_writer: anytype, -) !u32 { - const seg = &macho_file.segments.items[macho_file.linkedit_segment_cmd_index.?]; - // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file - // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 - const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, 16); - const needed_size = code_sig.estimateSize(offset); - seg.filesize = offset + needed_size - seg.fileoff; - seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, macho_file.page_size); - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - // Pad out the space. We need to do this to calculate valid hashes for everything in the file - // except for code signature data. - try macho_file.base.file.?.pwriteAll(&[_]u8{0}, offset + needed_size - 1); - - try lc_writer.writeStruct(macho.linkedit_data_command{ - .cmd = .CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = @intCast(u32, offset), - .datasize = @intCast(u32, needed_size), - }); - ncmds.* += 1; - - return @intCast(u32, offset); -} - -fn writeCodeSignature(macho_file: *MachO, code_sig: *CodeSignature, offset: u32) !void { - const seg = macho_file.segments.items[macho_file.text_segment_cmd_index.?]; - - var buffer = std.ArrayList(u8).init(macho_file.base.allocator); - defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(code_sig.size()); - try code_sig.writeAdhocSignature(macho_file.base.allocator, .{ - .file = macho_file.base.file.?, - .exec_seg_base = seg.fileoff, - .exec_seg_limit = seg.filesize, - .file_size = offset, - .output_mode = macho_file.base.options.output_mode, - }, buffer.writer()); - assert(buffer.items.len == code_sig.size()); - - log.debug("writing code signature from 0x{x} to 0x{x}", .{ - offset, - offset + buffer.items.len, - }); - - try macho_file.base.file.?.pwriteAll(buffer.items, offset); -} - -fn writeSegmentHeaders(macho_file: *MachO, ncmds: *u32, writer: anytype) !void { - for (macho_file.segments.items) |seg, i| { - const indexes = macho_file.getSectionIndexes(@intCast(u8, i)); - var out_seg = seg; - out_seg.cmdsize = @sizeOf(macho.segment_command_64); - out_seg.nsects = 0; - - // Update section headers count; any section with size of 0 is excluded - // since it doesn't have any data in the final binary file. - for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |header| { - if (header.size == 0) continue; - out_seg.cmdsize += @sizeOf(macho.section_64); - out_seg.nsects += 1; - } - - if (out_seg.nsects == 0 and - (mem.eql(u8, out_seg.segName(), "__DATA_CONST") or - mem.eql(u8, out_seg.segName(), "__DATA"))) continue; - - try writer.writeStruct(out_seg); - for (macho_file.sections.items(.header)[indexes.start..indexes.end]) |header| { - if (header.size == 0) continue; - try writer.writeStruct(header); - } - - ncmds.* += 1; - } -} - -/// Writes Mach-O file header. -fn writeHeader(macho_file: *MachO, ncmds: u32, sizeofcmds: u32) !void { - var header: macho.mach_header_64 = .{}; - header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; - - switch (macho_file.base.options.target.cpu.arch) { - .aarch64 => { - header.cputype = macho.CPU_TYPE_ARM64; - header.cpusubtype = macho.CPU_SUBTYPE_ARM_ALL; - }, - .x86_64 => { - header.cputype = macho.CPU_TYPE_X86_64; - header.cpusubtype = macho.CPU_SUBTYPE_X86_64_ALL; - }, - else => return error.UnsupportedCpuArchitecture, - } - - switch (macho_file.base.options.output_mode) { - .Exe => { - header.filetype = macho.MH_EXECUTE; - }, - .Lib => { - // By this point, it can only be a dylib. - header.filetype = macho.MH_DYLIB; - header.flags |= macho.MH_NO_REEXPORTED_DYLIBS; - }, - else => unreachable, - } - - if (macho_file.getSectionByName("__DATA", "__thread_vars")) |sect_id| { - if (macho_file.sections.items(.header)[sect_id].size > 0) { - header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; - } - } - - header.ncmds = ncmds; - header.sizeofcmds = sizeofcmds; - - log.debug("writing Mach-O header {}", .{header}); - - try macho_file.base.file.?.pwriteAll(mem.asBytes(&header), 0); + return i; } |
