diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2022-07-31 18:19:17 +0200 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2022-08-03 21:19:41 +0200 |
| commit | f26d5ee7ea97c8fd6e5b2655f845be7e4293930e (patch) | |
| tree | fab17016b079fcd7aaef84672feb469136dcc646 /src/link | |
| parent | 4c750016eb9b1c0831cbb0398a4d6ee9dbdc932e (diff) | |
| download | zig-f26d5ee7ea97c8fd6e5b2655f845be7e4293930e.tar.gz zig-f26d5ee7ea97c8fd6e5b2655f845be7e4293930e.zip | |
macho: sync with zld
gitrev a2c32e972f8c5adfcda8ed2d99379ae868f59c24
https://github.com/kubkon/zld/commit/a2c32e972f8c5adfcda8ed2d99379ae868f59c24
Diffstat (limited to 'src/link')
| -rw-r--r-- | src/link/Dwarf.zig | 44 | ||||
| -rw-r--r-- | src/link/MachO.zig | 3473 | ||||
| -rw-r--r-- | src/link/MachO/Archive.zig | 58 | ||||
| -rw-r--r-- | src/link/MachO/Atom.zig | 35 | ||||
| -rw-r--r-- | src/link/MachO/CodeSignature.zig | 12 | ||||
| -rw-r--r-- | src/link/MachO/DebugSymbols.zig | 510 | ||||
| -rw-r--r-- | src/link/MachO/Dylib.zig | 159 | ||||
| -rw-r--r-- | src/link/MachO/Object.zig | 306 | ||||
| -rw-r--r-- | src/link/MachO/dead_strip.zig | 48 | ||||
| -rw-r--r-- | src/link/MachO/fat.zig | 4 |
10 files changed, 2027 insertions, 2622 deletions
diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 03ba53801b..627f946e36 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -853,8 +853,7 @@ pub fn commitDeclState( .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = &dwarf_segment.sections.items[d_sym.debug_line_section_index.?]; + const debug_line_sect = &d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = debug_line_sect.offset + src_fn.off; try pwriteDbgLineNops(d_sym.file, file_pos, 0, &[0]u8{}, src_fn.len); }, @@ -933,8 +932,8 @@ pub fn commitDeclState( .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = &dwarf_segment.sections.items[d_sym.debug_line_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_line_sect = &d_sym.sections.items[d_sym.debug_line_section_index.?]; if (needed_size != debug_line_sect.size) { if (needed_size > d_sym.allocatedSize(debug_line_sect.offset)) { const new_offset = d_sym.findFreeSpace(needed_size, 1); @@ -955,10 +954,9 @@ pub fn commitDeclState( ); debug_line_sect.offset = @intCast(u32, new_offset); - debug_line_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_line_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; } debug_line_sect.size = needed_size; - d_sym.load_commands_dirty = true; // TODO look into making only the one section dirty d_sym.debug_line_header_dirty = true; } const file_pos = debug_line_sect.offset + src_fn.off; @@ -1137,8 +1135,7 @@ fn updateDeclDebugInfoAllocation(self: *Dwarf, file: *File, atom: *Atom, len: u3 .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; const file_pos = debug_info_sect.offset + atom.off; try pwriteDbgInfoNops(d_sym.file, file_pos, 0, &[0]u8{}, atom.len, false); }, @@ -1235,8 +1232,8 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; if (needed_size != debug_info_sect.size) { if (needed_size > d_sym.allocatedSize(debug_info_sect.offset)) { const new_offset = d_sym.findFreeSpace(needed_size, 1); @@ -1257,10 +1254,9 @@ fn writeDeclDebugInfo(self: *Dwarf, file: *File, atom: *Atom, dbg_info_buf: []co ); debug_info_sect.offset = @intCast(u32, new_offset); - debug_info_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_info_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; } debug_info_sect.size = needed_size; - d_sym.load_commands_dirty = true; // TODO look into making only the one section dirty d_sym.debug_line_header_dirty = true; } const file_pos = debug_info_sect.offset + atom.off; @@ -1330,8 +1326,7 @@ pub fn updateDeclLineNumber(self: *Dwarf, file: *File, decl: *const Module.Decl) .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const sect = dwarf_seg.sections.items[d_sym.debug_line_section_index.?]; + const sect = d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = sect.offset + decl.fn_link.macho.off + self.getRelocDbgLineOff(); try d_sym.file.pwriteAll(&data, file_pos); }, @@ -1557,14 +1552,14 @@ pub fn writeDbgAbbrev(self: *Dwarf, file: *File) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_abbrev_sect = &dwarf_segment.sections.items[d_sym.debug_abbrev_section_index.?]; + const dwarf_segment = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_abbrev_sect = &d_sym.sections.items[d_sym.debug_abbrev_section_index.?]; const allocated_size = d_sym.allocatedSize(debug_abbrev_sect.offset); if (needed_size > allocated_size) { debug_abbrev_sect.size = 0; // free the space const offset = d_sym.findFreeSpace(needed_size, 1); debug_abbrev_sect.offset = @intCast(u32, offset); - debug_abbrev_sect.addr = dwarf_segment.inner.vmaddr + offset - dwarf_segment.inner.fileoff; + debug_abbrev_sect.addr = dwarf_segment.vmaddr + offset - dwarf_segment.fileoff; } debug_abbrev_sect.size = needed_size; log.debug("__debug_abbrev start=0x{x} end=0x{x}", .{ @@ -1681,8 +1676,7 @@ pub fn writeDbgInfoHeader(self: *Dwarf, file: *File, module: *Module, low_pc: u6 .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = dwarf_seg.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = d_sym.sections.items[d_sym.debug_info_section_index.?]; const file_pos = debug_info_sect.offset; try pwriteDbgInfoNops(d_sym.file, file_pos, 0, di_buf.items, jmp_amt, false); }, @@ -1998,13 +1992,13 @@ pub fn writeDbgAranges(self: *Dwarf, file: *File, addr: u64, size: u64) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_aranges_sect = &dwarf_seg.sections.items[d_sym.debug_aranges_section_index.?]; + const dwarf_seg = d_sym.segments.items[d_sym.dwarf_segment_cmd_index.?]; + const debug_aranges_sect = &d_sym.sections.items[d_sym.debug_aranges_section_index.?]; const allocated_size = d_sym.allocatedSize(debug_aranges_sect.offset); if (needed_size > allocated_size) { debug_aranges_sect.size = 0; // free the space const new_offset = d_sym.findFreeSpace(needed_size, 16); - debug_aranges_sect.addr = dwarf_seg.inner.vmaddr + new_offset - dwarf_seg.inner.fileoff; + debug_aranges_sect.addr = dwarf_seg.vmaddr + new_offset - dwarf_seg.fileoff; debug_aranges_sect.offset = @intCast(u32, new_offset); } debug_aranges_sect.size = needed_size; @@ -2134,8 +2128,7 @@ pub fn writeDbgLineHeader(self: *Dwarf, file: *File, module: *Module) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_seg = d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_line_sect = dwarf_seg.sections.items[d_sym.debug_line_section_index.?]; + const debug_line_sect = d_sym.sections.items[d_sym.debug_line_section_index.?]; const file_pos = debug_line_sect.offset; try pwriteDbgLineNops(d_sym.file, file_pos, 0, di_buf.items, jmp_amt); }, @@ -2264,8 +2257,7 @@ pub fn flushModule(self: *Dwarf, file: *File, module: *Module) !void { .macho => { const macho_file = file.cast(File.MachO).?; const d_sym = &macho_file.d_sym.?; - const dwarf_segment = &d_sym.load_commands.items[d_sym.dwarf_segment_cmd_index.?].segment; - const debug_info_sect = &dwarf_segment.sections.items[d_sym.debug_info_section_index.?]; + const debug_info_sect = &d_sym.sections.items[d_sym.debug_info_section_index.?]; break :blk debug_info_sect.offset; }, // for wasm, the offset is always 0 as we write to memory first diff --git a/src/link/MachO.zig b/src/link/MachO.zig index a247b3e6c5..b912130957 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -17,6 +17,7 @@ const aarch64 = @import("../arch/aarch64/bits.zig"); const bind = @import("MachO/bind.zig"); const codegen = @import("../codegen.zig"); const dead_strip = @import("MachO/dead_strip.zig"); +const fat = @import("MachO/fat.zig"); const link = @import("../link.zig"); const llvm_backend = @import("../codegen/llvm.zig"); const target_util = @import("../target.zig"); @@ -60,6 +61,29 @@ const SystemLib = struct { weak: bool = false, }; +const Section = struct { + header: macho.section_64, + segment_index: u8, + last_atom: ?*Atom = null, // TODO temporary hack; we really should shrink section to 0 + + /// A list of atoms that have surplus capacity. This list can have false + /// positives, as functions grow and shrink over time, only sometimes being added + /// or removed from the freelist. + /// + /// An atom has surplus capacity when its overcapacity value is greater than + /// padToIdeal(minimum_atom_size). That is, when it has so + /// much extra capacity, that we could fit a small new symbol in it, itself with + /// ideal_capacity or more. + /// + /// Ideal capacity is defined by size + (size / ideal_factor). + /// + /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that + /// overcapacity can be negative. A simple way to have negative overcapacity is to + /// allocate a fresh atom, which will have ideal capacity, and then grow it + /// by 1 byte. It will then have -1 overcapacity. + free_list: std.ArrayListUnmanaged(*Atom) = .{}, +}; + base: File, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. @@ -77,80 +101,67 @@ page_size: u16, /// fashion (default for LLVM backend). mode: enum { incremental, one_shot }, -/// The absolute address of the entry point. -entry_addr: ?u64 = null, - -/// Code signature (if any) -code_signature: ?CodeSignature = null, +uuid: macho.uuid_command = .{ + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, +}, objects: std.ArrayListUnmanaged(Object) = .{}, archives: std.ArrayListUnmanaged(Archive) = .{}, - dylibs: std.ArrayListUnmanaged(Dylib) = .{}, dylibs_map: std.StringHashMapUnmanaged(u16) = .{}, referenced_dylibs: std.AutoArrayHashMapUnmanaged(u16, void) = .{}, -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, - -pagezero_segment_cmd_index: ?u16 = null, -text_segment_cmd_index: ?u16 = null, -data_const_segment_cmd_index: ?u16 = null, -data_segment_cmd_index: ?u16 = null, -linkedit_segment_cmd_index: ?u16 = null, -dyld_info_cmd_index: ?u16 = null, -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -dylinker_cmd_index: ?u16 = null, -data_in_code_cmd_index: ?u16 = null, -function_starts_cmd_index: ?u16 = null, -main_cmd_index: ?u16 = null, -dylib_id_cmd_index: ?u16 = null, -source_version_cmd_index: ?u16 = null, -build_version_cmd_index: ?u16 = null, -uuid_cmd_index: ?u16 = null, -code_signature_cmd_index: ?u16 = null, +segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, +sections: std.MultiArrayList(Section) = .{}, + +pagezero_segment_cmd_index: ?u8 = null, +text_segment_cmd_index: ?u8 = null, +data_const_segment_cmd_index: ?u8 = null, +data_segment_cmd_index: ?u8 = null, +linkedit_segment_cmd_index: ?u8 = null, // __TEXT segment sections -text_section_index: ?u16 = null, -stubs_section_index: ?u16 = null, -stub_helper_section_index: ?u16 = null, -text_const_section_index: ?u16 = null, -cstring_section_index: ?u16 = null, -ustring_section_index: ?u16 = null, -gcc_except_tab_section_index: ?u16 = null, -unwind_info_section_index: ?u16 = null, -eh_frame_section_index: ?u16 = null, - -objc_methlist_section_index: ?u16 = null, -objc_methname_section_index: ?u16 = null, -objc_methtype_section_index: ?u16 = null, -objc_classname_section_index: ?u16 = null, +text_section_index: ?u8 = null, +stubs_section_index: ?u8 = null, +stub_helper_section_index: ?u8 = null, +text_const_section_index: ?u8 = null, +cstring_section_index: ?u8 = null, +ustring_section_index: ?u8 = null, +gcc_except_tab_section_index: ?u8 = null, +unwind_info_section_index: ?u8 = null, +eh_frame_section_index: ?u8 = null, + +objc_methlist_section_index: ?u8 = null, +objc_methname_section_index: ?u8 = null, +objc_methtype_section_index: ?u8 = null, +objc_classname_section_index: ?u8 = null, // __DATA_CONST segment sections -got_section_index: ?u16 = null, -mod_init_func_section_index: ?u16 = null, -mod_term_func_section_index: ?u16 = null, -data_const_section_index: ?u16 = null, +got_section_index: ?u8 = null, +mod_init_func_section_index: ?u8 = null, +mod_term_func_section_index: ?u8 = null, +data_const_section_index: ?u8 = null, -objc_cfstring_section_index: ?u16 = null, -objc_classlist_section_index: ?u16 = null, -objc_imageinfo_section_index: ?u16 = null, +objc_cfstring_section_index: ?u8 = null, +objc_classlist_section_index: ?u8 = null, +objc_imageinfo_section_index: ?u8 = null, // __DATA segment sections -tlv_section_index: ?u16 = null, -tlv_data_section_index: ?u16 = null, -tlv_bss_section_index: ?u16 = null, -tlv_ptrs_section_index: ?u16 = null, -la_symbol_ptr_section_index: ?u16 = null, -data_section_index: ?u16 = null, -bss_section_index: ?u16 = null, - -objc_const_section_index: ?u16 = null, -objc_selrefs_section_index: ?u16 = null, -objc_classrefs_section_index: ?u16 = null, -objc_data_section_index: ?u16 = null, - -rustc_section_index: ?u16 = null, +tlv_section_index: ?u8 = null, +tlv_data_section_index: ?u8 = null, +tlv_bss_section_index: ?u8 = null, +tlv_ptrs_section_index: ?u8 = null, +la_symbol_ptr_section_index: ?u8 = null, +data_section_index: ?u8 = null, +bss_section_index: ?u8 = null, + +objc_const_section_index: ?u8 = null, +objc_selrefs_section_index: ?u8 = null, +objc_classrefs_section_index: ?u8 = null, +objc_data_section_index: ?u8 = null, + +rustc_section_index: ?u8 = null, rustc_section_size: u64 = 0, locals: std.ArrayListUnmanaged(macho.nlist_64) = .{}, @@ -188,37 +199,12 @@ stubs_table: std.AutoHashMapUnmanaged(SymbolWithLoc, u32) = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, -load_commands_dirty: bool = false, -sections_order_dirty: bool = false, - /// A helper var to indicate if we are at the start of the incremental updates, or /// already somewhere further along the update-and-run chain. /// TODO once we add opening a prelinked output binary from file, this will become /// obsolete as we will carry on where we left off. cold_start: bool = true, -section_ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}, - -/// A list of atoms that have surplus capacity. This list can have false -/// positives, as functions grow and shrink over time, only sometimes being added -/// or removed from the freelist. -/// -/// An atom has surplus capacity when its overcapacity value is greater than -/// padToIdeal(minimum_atom_size). That is, when it has so -/// much extra capacity, that we could fit a small new symbol in it, itself with -/// ideal_capacity or more. -/// -/// Ideal capacity is defined by size + (size / ideal_factor). -/// -/// Overcapacity is measured by actual_capacity - ideal_capacity. Note that -/// overcapacity can be negative. A simple way to have negative overcapacity is to -/// allocate a fresh atom, which will have ideal capacity, and then grow it -/// by 1 byte. It will then have -1 overcapacity. -atom_free_lists: std.AutoHashMapUnmanaged(MatchingSection, std.ArrayListUnmanaged(*Atom)) = .{}, - -/// Pointer to the last allocated atom -atoms: std.AutoHashMapUnmanaged(MatchingSection, *Atom) = .{}, - /// List of atoms that are either synthetic or map directly to the Zig source program. managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, @@ -250,7 +236,7 @@ unnamed_const_atoms: UnnamedConstTable = .{}, /// We store them here so that we can properly dispose of any allocated /// memory within the atom in the incremental linker. /// TODO consolidate this. -decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?MatchingSection) = .{}, +decls: std.AutoArrayHashMapUnmanaged(Module.Decl.Index, ?u8) = .{}, const Entry = struct { target: SymbolWithLoc, @@ -408,12 +394,7 @@ pub fn openPath(allocator: Allocator, options: link.Options) !*MachO { pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { const cpu_arch = options.target.cpu.arch; - const os_tag = options.target.os.tag; - const abi = options.target.abi; const page_size: u16 = if (cpu_arch == .aarch64) 0x4000 else 0x1000; - // Adhoc code signature is required when targeting aarch64-macos either directly or indirectly via the simulator - // ABI such as aarch64-ios-simulator, etc. - const requires_adhoc_codesig = cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator); const use_llvm = build_options.have_llvm and options.use_llvm; const use_stage1 = build_options.is_stage1 and options.use_stage1; @@ -428,10 +409,6 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*MachO { .file = null, }, .page_size = page_size, - .code_signature = if (requires_adhoc_codesig) - CodeSignature.init(page_size) - else - null, .mode = if (use_stage1 or use_llvm or options.module == null or options.cache_mode == .whole) .one_shot else @@ -562,8 +539,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, - }, .Dynamic).init(self.base.allocator); - defer dependent_libs.deinit(); + }, .Dynamic).init(arena); + try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); } @@ -573,7 +550,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try self.createDyldPrivateAtom(); try self.createStubHelperPreambleAtom(); try self.resolveSymbolsInDylibs(); - try self.addCodeSignatureLC(); if (self.unresolved.count() > 0) { return error.UndefinedSymbolReference; @@ -583,67 +559,91 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No if (build_options.enable_logging) { self.logSymtab(); - self.logSectionOrdinals(); self.logAtoms(); } try self.writeAtomsIncremental(); - try self.setEntryPoint(); - try self.updateSectionOrdinals(); - try self.writeLinkeditSegment(); + var lc_buffer = std.ArrayList(u8).init(arena); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; - if (self.d_sym) |*d_sym| { - // Flush debug symbols bundle. - try d_sym.flushModule(self.base.allocator, self.base.options); + try self.writeLinkeditSegmentData(&ncmds, lc_writer); + try writeDylinkerLC(&ncmds, lc_writer); + + self.writeMainLC(&ncmds, lc_writer) catch |err| switch (err) { + error.MissingMainEntrypoint => { + self.error_flags.no_entry_point_found = true; + }, + else => |e| return e, + }; + + try self.writeDylibIdLC(&ncmds, lc_writer); + try self.writeRpathLCs(&ncmds, lc_writer); + + { + try lc_writer.writeStruct(macho.source_version_command{ + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, + }); + ncmds += 1; } - // code signature and entitlements - if (self.base.options.entitlements) |path| { - if (self.code_signature) |*csig| { - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - } else { - var csig = CodeSignature.init(self.page_size); - try csig.addEntitlements(self.base.allocator, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - self.code_signature = csig; - } + try self.writeBuildVersionLC(&ncmds, lc_writer); + + { + std.crypto.random.bytes(&self.uuid.uuid); + try lc_writer.writeStruct(self.uuid); + ncmds += 1; } - if (self.code_signature) |*csig| { - csig.clear(self.base.allocator); - csig.code_directory.ident = self.base.options.emit.?.sub_path; + try self.writeLoadDylibLCs(&ncmds, lc_writer); + + const target = self.base.options.target; + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) + break :blk true; + break :blk false; + }; + var codesig_offset: ?u32 = null; + var codesig: ?CodeSignature = if (requires_codesig) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values // written out to the file. // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. - try self.writeCodeSignaturePadding(csig); - } + var codesig = CodeSignature.init(self.page_size); + codesig.code_directory.ident = self.base.options.emit.?.sub_path; + if (self.base.options.entitlements) |path| { + try codesig.addEntitlements(arena, path); + } + codesig_offset = try self.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + break :blk codesig; + } else null; - try self.writeLoadCommands(); - try self.writeHeader(); + var headers_buf = std.ArrayList(u8).init(arena); + try self.writeSegmentHeaders(0, self.segments.items.len, &ncmds, headers_buf.writer()); - if (self.entry_addr == null and self.base.options.output_mode == .Exe) { - log.debug("flushing. no_entry_point_found = true", .{}); - self.error_flags.no_entry_point_found = true; - } else { - log.debug("flushing. no_entry_point_found = false", .{}); - self.error_flags.no_entry_point_found = false; - } + try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); - assert(!self.load_commands_dirty); + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); - if (self.code_signature) |*csig| { - try self.writeCodeSignature(csig); // code signing always comes last + if (codesig) |*csig| { + try self.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last } - if (build_options.enable_link_snapshots) { - if (self.base.options.enable_link_snapshots) - try self.snapshotState(); + if (self.d_sym) |*d_sym| { + // Flush debug symbols bundle. + try d_sym.flushModule(self.base.allocator, self.base.options); } + // if (build_options.enable_link_snapshots) { + // if (self.base.options.enable_link_snapshots) + // try self.snapshotState(); + // } + if (cache_miss) { // Update the file with the digest. If it fails we can continue; it only // means that the next invocation will have an unnecessary cache miss. @@ -708,6 +708,9 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) sub_prog_node.context.refresh(); defer sub_prog_node.end(); + const cpu_arch = self.base.options.target.cpu.arch; + const os_tag = self.base.options.target.os.tag; + const abi = self.base.options.target.abi; const is_lib = self.base.options.output_mode == .Lib; const is_dyn_lib = self.base.options.link_mode == .Dynamic and is_lib; const is_exe_or_dyn_lib = is_dyn_lib or self.base.options.output_mode == .Exe; @@ -990,40 +993,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) } } - // rpaths - var rpath_table = std.StringArrayHashMap(void).init(arena); - for (self.base.options.rpath_list) |rpath| { - if (rpath_table.contains(rpath)) continue; - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.rpath_command) + rpath.len + 1, - @sizeOf(u64), - )); - var rpath_cmd = macho.emptyGenericCommandWithData(macho.rpath_command{ - .cmdsize = cmdsize, - .path = @sizeOf(macho.rpath_command), - }); - rpath_cmd.data = try gpa.alloc(u8, cmdsize - rpath_cmd.inner.path); - mem.set(u8, rpath_cmd.data, 0); - mem.copy(u8, rpath_cmd.data, rpath); - try self.load_commands.append(gpa, .{ .rpath = rpath_cmd }); - try rpath_table.putNoClobber(rpath, {}); - self.load_commands_dirty = true; - } - - // code signature and entitlements - if (self.base.options.entitlements) |path| { - if (self.code_signature) |*csig| { - try csig.addEntitlements(gpa, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - } else { - var csig = CodeSignature.init(self.page_size); - try csig.addEntitlements(gpa, path); - csig.code_directory.ident = self.base.options.emit.?.sub_path; - self.code_signature = csig; - } - } - if (self.base.options.verbose_link) { var argv = std.ArrayList([]const u8).init(arena); @@ -1048,7 +1017,7 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try argv.append(syslibroot); } - for (rpath_table.keys()) |rpath| { + for (self.base.options.rpath_list) |rpath| { try argv.append("-rpath"); try argv.append(rpath); } @@ -1157,15 +1126,15 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) var dependent_libs = std.fifo.LinearFifo(struct { id: Dylib.Id, parent: u16, - }, .Dynamic).init(gpa); - defer dependent_libs.deinit(); + }, .Dynamic).init(arena); + try self.parseInputFiles(positionals.items, self.base.options.sysroot, &dependent_libs); try self.parseAndForceLoadStaticArchives(must_link_archives.keys()); try self.parseLibs(libs.keys(), libs.values(), self.base.options.sysroot, &dependent_libs); try self.parseDependentLibs(self.base.options.sysroot, &dependent_libs); - for (self.objects.items) |*object, object_id| { - try self.resolveSymbolsInObject(object, @intCast(u16, object_id)); + for (self.objects.items) |_, object_id| { + try self.resolveSymbolsInObject(@intCast(u16, object_id)); } try self.resolveSymbolsInArchives(); @@ -1175,7 +1144,6 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) try self.resolveSymbolsInDylibs(); try self.createMhExecuteHeaderSymbol(); try self.createDsoHandleSymbol(); - try self.addCodeSignatureLC(); try self.resolveSymbolsAtLoading(); if (self.unresolved.count() > 0) { @@ -1206,41 +1174,79 @@ fn linkOneShot(self: *MachO, comp: *Compilation, prog_node: *std.Progress.Node) if (build_options.enable_logging) { self.logSymtab(); - self.logSectionOrdinals(); self.logAtoms(); } try self.writeAtomsOneShot(); if (self.rustc_section_index) |id| { - const sect = self.getSectionPtr(.{ - .seg = self.data_segment_cmd_index.?, - .sect = id, + const header = &self.sections.items(.header)[id]; + header.size = self.rustc_section_size; + } + + var lc_buffer = std.ArrayList(u8).init(arena); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; + + try self.writeLinkeditSegmentData(&ncmds, lc_writer); + try writeDylinkerLC(&ncmds, lc_writer); + try self.writeMainLC(&ncmds, lc_writer); + try self.writeDylibIdLC(&ncmds, lc_writer); + try self.writeRpathLCs(&ncmds, lc_writer); + + { + try lc_writer.writeStruct(macho.source_version_command{ + .cmdsize = @sizeOf(macho.source_version_command), + .version = 0x0, }); - sect.size = self.rustc_section_size; + ncmds += 1; } - try self.setEntryPoint(); - try self.writeLinkeditSegment(); + try self.writeBuildVersionLC(&ncmds, lc_writer); + + { + var uuid_lc = macho.uuid_command{ + .cmdsize = @sizeOf(macho.uuid_command), + .uuid = undefined, + }; + std.crypto.random.bytes(&uuid_lc.uuid); + try lc_writer.writeStruct(uuid_lc); + ncmds += 1; + } - if (self.code_signature) |*csig| { - csig.clear(gpa); - csig.code_directory.ident = self.base.options.emit.?.sub_path; + try self.writeLoadDylibLCs(&ncmds, lc_writer); + + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (cpu_arch == .aarch64 and (os_tag == .macos or abi == .simulator)) break :blk true; + break :blk false; + }; + var codesig_offset: ?u32 = null; + var codesig: ?CodeSignature = if (requires_codesig) blk: { // Preallocate space for the code signature. // We need to do this at this stage so that we have the load commands with proper values // written out to the file. // The most important here is to have the correct vm and filesize of the __LINKEDIT segment // where the code signature goes into. - try self.writeCodeSignaturePadding(csig); - } + var codesig = CodeSignature.init(self.page_size); + codesig.code_directory.ident = self.base.options.emit.?.sub_path; + if (self.base.options.entitlements) |path| { + try codesig.addEntitlements(arena, path); + } + codesig_offset = try self.writeCodeSignaturePadding(&codesig, &ncmds, lc_writer); + break :blk codesig; + } else null; - try self.writeLoadCommands(); - try self.writeHeader(); + var headers_buf = std.ArrayList(u8).init(arena); + try self.writeSegmentHeaders(0, self.segments.items.len, &ncmds, headers_buf.writer()); - assert(!self.load_commands_dirty); + try self.base.file.?.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); - if (self.code_signature) |*csig| { - try self.writeCodeSignature(csig); // code signing always comes last + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + + if (codesig) |*csig| { + try self.writeCodeSignature(csig, codesig_offset.?); // code signing always comes last } } @@ -1395,66 +1401,77 @@ fn resolveFramework( } fn parseObject(self: *MachO, path: []const u8) !bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; - errdefer file.close(); - - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + defer file.close(); + const name = try gpa.dupe(u8, path); + errdefer gpa.free(name); + const cpu_arch = self.base.options.target.cpu.arch; const mtime: u64 = mtime: { const stat = file.stat() catch break :mtime 0; break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); }; + const file_stat = try file.stat(); + const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); var object = Object{ .name = name, - .file = file, .mtime = mtime, + .contents = contents, }; - object.parse(self.base.allocator, self.base.options.target.cpu.arch) catch |err| switch (err) { + object.parse(gpa, cpu_arch) catch |err| switch (err) { error.EndOfStream, error.NotObject => { - object.deinit(self.base.allocator); + object.deinit(gpa); return false; }, else => |e| return e, }; - try self.objects.append(self.base.allocator, object); + try self.objects.append(gpa, object); return true; } fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; errdefer file.close(); - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + const name = try gpa.dupe(u8, path); + errdefer gpa.free(name); + const cpu_arch = self.base.options.target.cpu.arch; + const reader = file.reader(); + const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); + try reader.context.seekTo(fat_offset); var archive = Archive{ .name = name, + .fat_offset = fat_offset, .file = file, }; - archive.parse(self.base.allocator, self.base.options.target.cpu.arch) catch |err| switch (err) { + archive.parse(gpa, reader) catch |err| switch (err) { error.EndOfStream, error.NotArchive => { - archive.deinit(self.base.allocator); + archive.deinit(gpa); return false; }, else => |e| return e, }; if (force_load) { - defer archive.deinit(self.base.allocator); + defer archive.deinit(gpa); + defer file.close(); // Get all offsets from the ToC - var offsets = std.AutoArrayHashMap(u32, void).init(self.base.allocator); + var offsets = std.AutoArrayHashMap(u32, void).init(gpa); defer offsets.deinit(); for (archive.toc.values()) |offs| { for (offs.items) |off| { @@ -1462,15 +1479,11 @@ fn parseArchive(self: *MachO, path: []const u8, force_load: bool) !bool { } } for (offsets.keys()) |off| { - const object = try self.objects.addOne(self.base.allocator); - object.* = try archive.parseObject( - self.base.allocator, - self.base.options.target.cpu.arch, - off, - ); + const object = try archive.parseObject(gpa, cpu_arch, off); + try self.objects.append(gpa, object); } } else { - try self.archives.append(self.base.allocator, archive); + try self.archives.append(gpa, archive); } return true; @@ -1481,6 +1494,7 @@ const ParseDylibError = error{ EmptyStubFile, MismatchedCpuArchitecture, UnsupportedCpuArchitecture, + EndOfStream, } || fs.File.OpenError || std.os.PReadError || Dylib.Id.ParseError; const DylibCreateOpts = struct { @@ -1497,43 +1511,52 @@ pub fn parseDylib( dependent_libs: anytype, opts: DylibCreateOpts, ) ParseDylibError!bool { + const gpa = self.base.allocator; const file = fs.cwd().openFile(path, .{}) catch |err| switch (err) { error.FileNotFound => return false, else => |e| return e, }; - errdefer file.close(); + defer file.close(); + + const cpu_arch = self.base.options.target.cpu.arch; + const file_stat = try file.stat(); + var file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - const name = try self.base.allocator.dupe(u8, path); - errdefer self.base.allocator.free(name); + const reader = file.reader(); + const fat_offset = try fat.getLibraryOffset(reader, cpu_arch); + try file.seekTo(fat_offset); + file_size -= fat_offset; + + const contents = try file.readToEndAllocOptions(gpa, file_size, file_size, @alignOf(u64), null); + defer gpa.free(contents); const dylib_id = @intCast(u16, self.dylibs.items.len); - var dylib = Dylib{ - .name = name, - .file = file, - .weak = opts.weak, - }; + var dylib = Dylib{ .weak = opts.weak }; - dylib.parse( - self.base.allocator, - self.base.options.target.cpu.arch, + dylib.parseFromBinary( + gpa, + cpu_arch, dylib_id, dependent_libs, + path, + contents, ) catch |err| switch (err) { error.EndOfStream, error.NotDylib => { try file.seekTo(0); - var lib_stub = LibStub.loadFromFile(self.base.allocator, file) catch { - dylib.deinit(self.base.allocator); + var lib_stub = LibStub.loadFromFile(gpa, file) catch { + dylib.deinit(gpa); return false; }; defer lib_stub.deinit(); try dylib.parseFromStub( - self.base.allocator, + gpa, self.base.options.target, lib_stub, dylib_id, dependent_libs, + path, ); }, else => |e| return e, @@ -1547,13 +1570,13 @@ pub fn parseDylib( log.warn(" dylib version: {}", .{dylib.id.?.current_version}); // TODO maybe this should be an error and facilitate auto-cleanup? - dylib.deinit(self.base.allocator); + dylib.deinit(gpa); return false; } } - try self.dylibs.append(self.base.allocator, dylib); - try self.dylibs_map.putNoClobber(self.base.allocator, dylib.id.?.name, dylib_id); + try self.dylibs.append(gpa, dylib); + try self.dylibs_map.putNoClobber(gpa, dylib.id.?.name, dylib_id); const should_link_dylib_even_if_unreachable = blk: { if (self.base.options.dead_strip_dylibs and !opts.needed) break :blk false; @@ -1561,8 +1584,7 @@ pub fn parseDylib( }; if (should_link_dylib_even_if_unreachable) { - try self.addLoadDylibLC(dylib_id); - try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); + try self.referenced_dylibs.putNoClobber(gpa, dylib_id, {}); } return true; @@ -1572,10 +1594,8 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const for (files) |file_name| { const full_path = full_path: { var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - const path = try fs.realpath(file_name, &buffer); - break :full_path try self.base.allocator.dupe(u8, path); + break :full_path try fs.realpath(file_name, &buffer); }; - defer self.base.allocator.free(full_path); log.debug("parsing input file path '{s}'", .{full_path}); if (try self.parseObject(full_path)) continue; @@ -1592,10 +1612,8 @@ fn parseAndForceLoadStaticArchives(self: *MachO, files: []const []const u8) !voi for (files) |file_name| { const full_path = full_path: { var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - const path = try fs.realpath(file_name, &buffer); - break :full_path try self.base.allocator.dupe(u8, path); + break :full_path try fs.realpath(file_name, &buffer); }; - defer self.base.allocator.free(full_path); log.debug("parsing and force loading static archive '{s}'", .{full_path}); if (try self.parseArchive(full_path, true)) continue; @@ -1669,24 +1687,10 @@ fn parseDependentLibs(self: *MachO, syslibroot: ?[]const u8, dependent_libs: any } } -pub const MatchingSection = struct { - seg: u16, - sect: u16, - - pub fn eql(this: MatchingSection, other: struct { - seg: ?u16, - sect: ?u16, - }) bool { - const seg = other.seg orelse return false; - const sect = other.sect orelse return false; - return this.seg == seg and this.sect == sect; - } -}; - -pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSection { +pub fn getOutputSection(self: *MachO, sect: macho.section_64) !?u8 { const segname = sect.segName(); const sectname = sect.sectName(); - const res: ?MatchingSection = blk: { + const res: ?u8 = blk: { switch (sect.type_()) { macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS => { if (self.text_const_section_index == null) { @@ -1698,11 +1702,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; + break :blk self.text_const_section_index.?; }, macho.S_CSTRING_LITERALS => { if (mem.eql(u8, sectname, "__objc_methname")) { @@ -1717,11 +1717,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methname_section_index.?, - }; + break :blk self.objc_methname_section_index.?; } else if (mem.eql(u8, sectname, "__objc_methtype")) { if (self.objc_methtype_section_index == null) { self.objc_methtype_section_index = try self.initSection( @@ -1732,11 +1728,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methtype_section_index.?, - }; + break :blk self.objc_methtype_section_index.?; } else if (mem.eql(u8, sectname, "__objc_classname")) { if (self.objc_classname_section_index == null) { self.objc_classname_section_index = try self.initSection( @@ -1747,11 +1739,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_classname_section_index.?, - }; + break :blk self.objc_classname_section_index.?; } if (self.cstring_section_index == null) { @@ -1765,11 +1753,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.cstring_section_index.?, - }; + break :blk self.cstring_section_index.?; }, macho.S_LITERAL_POINTERS => { if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__objc_selrefs")) { @@ -1784,11 +1768,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_selrefs_section_index.?, - }; + break :blk self.objc_selrefs_section_index.?; } else { // TODO investigate break :blk null; @@ -1806,11 +1786,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_init_func_section_index.?, - }; + break :blk self.mod_init_func_section_index.?; }, macho.S_MOD_TERM_FUNC_POINTERS => { if (self.mod_term_func_section_index == null) { @@ -1824,11 +1800,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.mod_term_func_section_index.?, - }; + break :blk self.mod_term_func_section_index.?; }, macho.S_ZEROFILL => { if (self.bss_section_index == null) { @@ -1842,11 +1814,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + break :blk self.bss_section_index.?; }, macho.S_THREAD_LOCAL_VARIABLES => { if (self.tlv_section_index == null) { @@ -1860,11 +1828,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_section_index.?, - }; + break :blk self.tlv_section_index.?; }, macho.S_THREAD_LOCAL_VARIABLE_POINTERS => { if (self.tlv_ptrs_section_index == null) { @@ -1878,11 +1842,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_ptrs_section_index.?, - }; + break :blk self.tlv_ptrs_section_index.?; }, macho.S_THREAD_LOCAL_REGULAR => { if (self.tlv_data_section_index == null) { @@ -1896,11 +1856,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_data_section_index.?, - }; + break :blk self.tlv_data_section_index.?; }, macho.S_THREAD_LOCAL_ZEROFILL => { if (self.tlv_bss_section_index == null) { @@ -1914,11 +1870,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.tlv_bss_section_index.?, - }; + break :blk self.tlv_bss_section_index.?; }, macho.S_COALESCED => { if (mem.eql(u8, "__TEXT", segname) and mem.eql(u8, "__eh_frame", sectname)) { @@ -1933,11 +1885,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.eh_frame_section_index.?, - }; + break :blk self.eh_frame_section_index.?; } // TODO audit this: is this the right mapping? @@ -1951,10 +1899,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio ); } - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; }, macho.S_REGULAR => { if (sect.isCode()) { @@ -1971,11 +1916,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio }, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; + break :blk self.text_section_index.?; } if (sect.isDebug()) { // TODO debug attributes @@ -1998,11 +1939,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.ustring_section_index.?, - }; + break :blk self.ustring_section_index.?; } else if (mem.eql(u8, sectname, "__gcc_except_tab")) { if (self.gcc_except_tab_section_index == null) { self.gcc_except_tab_section_index = try self.initSection( @@ -2013,11 +1950,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.gcc_except_tab_section_index.?, - }; + break :blk self.gcc_except_tab_section_index.?; } else if (mem.eql(u8, sectname, "__objc_methlist")) { if (self.objc_methlist_section_index == null) { self.objc_methlist_section_index = try self.initSection( @@ -2028,11 +1961,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.objc_methlist_section_index.?, - }; + break :blk self.objc_methlist_section_index.?; } else if (mem.eql(u8, sectname, "__rodata") or mem.eql(u8, sectname, "__typelink") or mem.eql(u8, sectname, "__itablink") or @@ -2048,11 +1977,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; } else { if (self.text_const_section_index == null) { self.text_const_section_index = try self.initSection( @@ -2063,11 +1988,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }; + break :blk self.text_const_section_index.?; } } @@ -2081,11 +2002,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; } if (mem.eql(u8, segname, "__DATA")) { @@ -2099,11 +2016,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.data_const_section_index.?, - }; + break :blk self.data_const_section_index.?; } else if (mem.eql(u8, sectname, "__cfstring")) { if (self.objc_cfstring_section_index == null) { self.objc_cfstring_section_index = try self.initSection( @@ -2114,11 +2027,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_cfstring_section_index.?, - }; + break :blk self.objc_cfstring_section_index.?; } else if (mem.eql(u8, sectname, "__objc_classlist")) { if (self.objc_classlist_section_index == null) { self.objc_classlist_section_index = try self.initSection( @@ -2129,11 +2038,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_classlist_section_index.?, - }; + break :blk self.objc_classlist_section_index.?; } else if (mem.eql(u8, sectname, "__objc_imageinfo")) { if (self.objc_imageinfo_section_index == null) { self.objc_imageinfo_section_index = try self.initSection( @@ -2144,11 +2049,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.objc_imageinfo_section_index.?, - }; + break :blk self.objc_imageinfo_section_index.?; } else if (mem.eql(u8, sectname, "__objc_const")) { if (self.objc_const_section_index == null) { self.objc_const_section_index = try self.initSection( @@ -2159,11 +2060,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_const_section_index.?, - }; + break :blk self.objc_const_section_index.?; } else if (mem.eql(u8, sectname, "__objc_classrefs")) { if (self.objc_classrefs_section_index == null) { self.objc_classrefs_section_index = try self.initSection( @@ -2174,11 +2071,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_classrefs_section_index.?, - }; + break :blk self.objc_classrefs_section_index.?; } else if (mem.eql(u8, sectname, "__objc_data")) { if (self.objc_data_section_index == null) { self.objc_data_section_index = try self.initSection( @@ -2189,11 +2082,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.objc_data_section_index.?, - }; + break :blk self.objc_data_section_index.?; } else if (mem.eql(u8, sectname, ".rustc")) { if (self.rustc_section_index == null) { self.rustc_section_index = try self.initSection( @@ -2207,11 +2096,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio // decompress the metadata. self.rustc_section_size = sect.size; } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.rustc_section_index.?, - }; + break :blk self.rustc_section_index.?; } else { if (self.data_section_index == null) { self.data_section_index = try self.initSection( @@ -2222,11 +2107,7 @@ pub fn getMatchingSection(self: *MachO, sect: macho.section_64) !?MatchingSectio .{}, ); } - - break :blk .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } } @@ -2259,30 +2140,33 @@ pub fn createEmptyAtom(gpa: Allocator, sym_index: u32, size: u64, alignment: u32 return atom; } -pub fn writeAtom(self: *MachO, atom: *Atom, match: MatchingSection) !void { - const sect = self.getSection(match); +pub fn writeAtom(self: *MachO, atom: *Atom, sect_id: u8) !void { + const section = self.sections.get(sect_id); const sym = atom.getSymbol(self); - const file_offset = sect.offset + sym.n_value - sect.addr; + const file_offset = section.header.offset + sym.n_value - section.header.addr; try atom.resolveRelocs(self); log.debug("writing atom for symbol {s} at file offset 0x{x}", .{ atom.getName(self), file_offset }); try self.base.file.?.pwriteAll(atom.code.items, file_offset); } fn allocateSymbols(self: *MachO) !void { - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last_atom, sect_id| { + const header = slice.items(.header)[sect_id]; + var atom = last_atom orelse continue; while (atom.prev) |prev| { atom = prev; } - const n_sect = self.getSectionOrdinal(match); - const sect = self.getSection(match); - var base_vaddr = sect.addr; + const n_sect = @intCast(u8, sect_id + 1); + var base_vaddr = header.addr; - log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ n_sect, sect.segName(), sect.sectName() }); + log.debug("allocating local symbols in sect({d}, '{s},{s}')", .{ + n_sect, + header.segName(), + header.sectName(), + }); while (true) { const alignment = try math.powi(u32, 2, atom.alignment); @@ -2296,7 +2180,10 @@ fn allocateSymbols(self: *MachO) !void { // Update each symbol contained within the atom for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); contained_sym.n_value = base_vaddr + sym_at_off.offset; contained_sym.n_sect = n_sect; } @@ -2310,15 +2197,18 @@ fn allocateSymbols(self: *MachO) !void { } } -fn shiftLocalsByOffset(self: *MachO, match: MatchingSection, offset: i64) !void { - var atom = self.atoms.get(match) orelse return; +fn shiftLocalsByOffset(self: *MachO, sect_id: u8, offset: i64) !void { + var atom = self.sections.items(.last_atom)[sect_id] orelse return; while (true) { const atom_sym = atom.getSymbolPtr(self); atom_sym.n_value = @intCast(u64, @intCast(i64, atom_sym.n_value) + offset); for (atom.contained.items) |sym_at_off| { - const contained_sym = self.getSymbolPtr(.{ .sym_index = sym_at_off.sym_index, .file = atom.file }); + const contained_sym = self.getSymbolPtr(.{ + .sym_index = sym_at_off.sym_index, + .file = atom.file, + }); contained_sym.n_value = @intCast(u64, @intCast(i64, contained_sym.n_value) + offset); } @@ -2336,16 +2226,13 @@ fn allocateSpecialSymbols(self: *MachO) !void { const global = self.globals.get(name) orelse continue; if (global.file != null) continue; const sym = self.getSymbolPtr(global); - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - sym.n_sect = self.getSectionOrdinal(.{ - .seg = self.text_segment_cmd_index.?, - .sect = 0, - }); - sym.n_value = seg.inner.vmaddr; + const seg = self.segments.items[self.text_segment_cmd_index.?]; + sym.n_sect = 1; + sym.n_value = seg.vmaddr; log.debug("allocating {s} at the start of {s}", .{ name, - seg.inner.segName(), + seg.segName(), }); } } @@ -2353,18 +2240,20 @@ fn allocateSpecialSymbols(self: *MachO) !void { fn writeAtomsOneShot(self: *MachO) !void { assert(self.mode == .one_shot); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const sect = self.getSection(entry.key_ptr.*); - var atom: *Atom = entry.value_ptr.*; + const gpa = self.base.allocator; + const slice = self.sections.slice(); + + for (slice.items(.last_atom)) |last_atom, sect_id| { + const header = slice.items(.header)[sect_id]; + var atom = last_atom.?; - if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; + if (header.flags == macho.S_ZEROFILL or header.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; - var buffer = std.ArrayList(u8).init(self.base.allocator); + var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); - try buffer.ensureTotalCapacity(math.cast(usize, sect.size) orelse return error.Overflow); + try buffer.ensureTotalCapacity(math.cast(usize, header.size) orelse return error.Overflow); - log.debug("writing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); while (atom.prev) |prev| { atom = prev; @@ -2399,18 +2288,18 @@ fn writeAtomsOneShot(self: *MachO) !void { if (atom.next) |next| { atom = next; } else { - assert(buffer.items.len == sect.size); - log.debug(" (writing at file offset 0x{x})", .{sect.offset}); - try self.base.file.?.pwriteAll(buffer.items, sect.offset); + assert(buffer.items.len == header.size); + log.debug(" (writing at file offset 0x{x})", .{header.offset}); + try self.base.file.?.pwriteAll(buffer.items, header.offset); break; } } } } -fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anytype) !void { - const is_code = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; - const min_alignment: u3 = if (!is_code) +fn writePadding(self: *MachO, sect_id: u8, size: usize, writer: anytype) !void { + const header = self.sections.items(.header)[sect_id]; + const min_alignment: u3 = if (!header.isCode()) 1 else switch (self.base.options.target.cpu.arch) { .aarch64 => @sizeOf(u32), @@ -2421,7 +2310,7 @@ fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anyty const len = @divExact(size, min_alignment); var i: usize = 0; while (i < len) : (i += 1) { - if (!is_code) { + if (!header.isCode()) { try writer.writeByte(0); } else switch (self.base.options.target.cpu.arch) { .aarch64 => { @@ -2439,20 +2328,20 @@ fn writePadding(self: *MachO, match: MatchingSection, size: usize, writer: anyty fn writeAtomsIncremental(self: *MachO) !void { assert(self.mode == .incremental); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - const sect = self.getSection(match); - var atom: *Atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last, i| { + var atom: *Atom = last orelse continue; + const sect_i = @intCast(u8, i); + const header = slice.items(.header)[sect_i]; // TODO handle zerofill in stage2 // if (sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL) continue; - log.debug("writing atoms in {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("writing atoms in {s},{s}", .{ header.segName(), header.sectName() }); while (true) { if (atom.dirty) { - try self.writeAtom(atom, match); + try self.writeAtom(atom, sect_i); atom.dirty = false; } @@ -2503,10 +2392,7 @@ pub fn createGotAtom(self: *MachO, target: SymbolWithLoc) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.data_const_segment_cmd_index.?, - .sect = self.got_section_index.?, - }); + try self.allocateAtomCommon(atom, self.got_section_index.?); return atom; } @@ -2535,7 +2421,7 @@ pub fn createTlvPtrAtom(self: *MachO, target: SymbolWithLoc) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - const match = (try self.getMatchingSection(.{ + const match = (try self.getOutputSection(.{ .segname = makeStaticString("__DATA"), .sectname = makeStaticString("__thread_ptrs"), .flags = macho.S_THREAD_LOCAL_VARIABLE_POINTERS, @@ -2561,10 +2447,7 @@ fn createDyldPrivateAtom(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, sym_index, @sizeOf(u64), 3); self.dyld_private_atom = atom; - try self.allocateAtomCommon(atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }); + try self.allocateAtomCommon(atom, self.data_section_index.?); try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); @@ -2692,10 +2575,7 @@ fn createStubHelperPreambleAtom(self: *MachO) !void { } self.stub_helper_preamble_atom = atom; - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stub_helper_section_index.?); try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); @@ -2771,10 +2651,7 @@ pub fn createStubHelperAtom(self: *MachO) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stub_helper_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stub_helper_section_index.?); return atom; } @@ -2814,10 +2691,7 @@ pub fn createLazyPointerAtom(self: *MachO, stub_sym_index: u32, target: SymbolWi try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }); + try self.allocateAtomCommon(atom, self.la_symbol_ptr_section_index.?); return atom; } @@ -2896,10 +2770,7 @@ pub fn createStubAtom(self: *MachO, laptr_sym_index: u32) !*Atom { try self.managed_atoms.append(gpa, atom); try self.atom_by_index_table.putNoClobber(gpa, sym_index, atom); - try self.allocateAtomCommon(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.stubs_section_index.?, - }); + try self.allocateAtomCommon(atom, self.stubs_section_index.?); return atom; } @@ -2917,12 +2788,6 @@ fn createTentativeDefAtoms(self: *MachO) !void { // Convert any tentative definition into a regular symbol and allocate // text blocks for each tentative definition. - const match = MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; - _ = try self.section_ordinals.getOrPut(gpa, match); - const size = sym.n_value; const alignment = (sym.n_desc >> 8) & 0x0f; @@ -2937,7 +2802,7 @@ fn createTentativeDefAtoms(self: *MachO) !void { const atom = try MachO.createEmptyAtom(gpa, global.sym_index, size, alignment); atom.file = global.file; - try self.allocateAtomCommon(atom, match); + try self.allocateAtomCommon(atom, self.bss_section_index.?); if (global.file) |file| { const object = &self.objects.items[file]; @@ -3060,7 +2925,8 @@ fn resolveGlobalSymbol(self: *MachO, current: SymbolWithLoc) !void { gop.value_ptr.* = current; } -fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { +fn resolveSymbolsInObject(self: *MachO, object_id: u16) !void { + const object = &self.objects.items[object_id]; log.debug("resolving symbols in '{s}'", .{object.name}); for (object.symtab.items) |sym, index| { @@ -3115,6 +2981,8 @@ fn resolveSymbolsInObject(self: *MachO, object: *Object, object_id: u16) !void { fn resolveSymbolsInArchives(self: *MachO) !void { if (self.archives.items.len == 0) return; + const gpa = self.base.allocator; + const cpu_arch = self.base.options.target.cpu.arch; var next_sym: usize = 0; loop: while (next_sym < self.unresolved.count()) { const global = self.globals.values()[self.unresolved.keys()[next_sym]]; @@ -3129,13 +2997,9 @@ fn resolveSymbolsInArchives(self: *MachO) !void { assert(offsets.items.len > 0); const object_id = @intCast(u16, self.objects.items.len); - const object = try self.objects.addOne(self.base.allocator); - object.* = try archive.parseObject( - self.base.allocator, - self.base.options.target.cpu.arch, - offsets.items[0], - ); - try self.resolveSymbolsInObject(object, object_id); + const object = try archive.parseObject(gpa, cpu_arch, offsets.items[0]); + try self.objects.append(gpa, object); + try self.resolveSymbolsInObject(object_id); continue :loop; } @@ -3159,7 +3023,6 @@ fn resolveSymbolsInDylibs(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { - try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -3257,7 +3120,6 @@ fn resolveDyldStubBinder(self: *MachO) !void { const dylib_id = @intCast(u16, id); if (!self.referenced_dylibs.contains(dylib_id)) { - try self.addLoadDylibLC(dylib_id); try self.referenced_dylibs.putNoClobber(self.base.allocator, dylib_id, {}); } @@ -3280,47 +3142,192 @@ fn resolveDyldStubBinder(self: *MachO) !void { self.got_entries.items[got_index].sym_index = got_atom.sym_index; } -fn addLoadDylibLC(self: *MachO, id: u16) !void { - const dylib = self.dylibs.items[id]; - const dylib_id = dylib.id orelse unreachable; - var dylib_cmd = try macho.createLoadDylibCommand( - self.base.allocator, - if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, - dylib_id.name, - dylib_id.timestamp, - dylib_id.current_version, - dylib_id.compatibility_version, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .dylib = dylib_cmd }); - self.load_commands_dirty = true; +fn writeDylinkerLC(ncmds: *u32, lc_writer: anytype) !void { + const name_len = mem.sliceTo(default_dyld_path, 0).len; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylinker_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylinker_command{ + .cmd = .LOAD_DYLINKER, + .cmdsize = cmdsize, + .name = @sizeOf(macho.dylinker_command), + }); + try lc_writer.writeAll(mem.sliceTo(default_dyld_path, 0)); + const padding = cmdsize - @sizeOf(macho.dylinker_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; +} + +fn writeMainLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + if (self.base.options.output_mode != .Exe) return; + const seg = self.segments.items[self.text_segment_cmd_index.?]; + const global = try self.getEntryPoint(); + const sym = self.getSymbol(global); + try lc_writer.writeStruct(macho.entry_point_command{ + .cmd = .MAIN, + .cmdsize = @sizeOf(macho.entry_point_command), + .entryoff = @intCast(u32, sym.n_value - seg.vmaddr), + .stacksize = self.base.options.stack_size_override orelse 0, + }); + ncmds.* += 1; } -fn addCodeSignatureLC(self: *MachO) !void { - if (self.code_signature_cmd_index != null or self.code_signature == null) return; - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, +const WriteDylibLCCtx = struct { + cmd: macho.LC, + name: []const u8, + timestamp: u32 = 2, + current_version: u32 = 0x10000, + compatibility_version: u32 = 0x10000, +}; + +fn writeDylibLC(ctx: WriteDylibLCCtx, ncmds: *u32, lc_writer: anytype) !void { + const name_len = ctx.name.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.dylib_command) + name_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.dylib_command{ + .cmd = ctx.cmd, + .cmdsize = cmdsize, + .dylib = .{ + .name = @sizeOf(macho.dylib_command), + .timestamp = ctx.timestamp, + .current_version = ctx.current_version, + .compatibility_version = ctx.compatibility_version, }, }); - self.load_commands_dirty = true; + try lc_writer.writeAll(ctx.name); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.dylib_command) - name_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; } -fn setEntryPoint(self: *MachO) !void { - if (self.base.options.output_mode != .Exe) return; +fn writeDylibIdLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + if (self.base.options.output_mode != .Lib) return; + const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; + const curr = self.base.options.version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + const compat = self.base.options.compatibility_version orelse std.builtin.Version{ + .major = 1, + .minor = 0, + .patch = 0, + }; + try writeDylibLC(.{ + .cmd = .ID_DYLIB, + .name = install_name, + .current_version = curr.major << 16 | curr.minor << 8 | curr.patch, + .compatibility_version = compat.major << 16 | compat.minor << 8 | compat.patch, + }, ncmds, lc_writer); +} - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const global = try self.getEntryPoint(); - const sym = self.getSymbol(global); - const ec = &self.load_commands.items[self.main_cmd_index.?].main; - ec.entryoff = @intCast(u32, sym.n_value - seg.inner.vmaddr); - ec.stacksize = self.base.options.stack_size_override orelse 0; - self.entry_addr = sym.n_value; - self.load_commands_dirty = true; +const RpathIterator = struct { + buffer: []const []const u8, + table: std.StringHashMap(void), + count: usize = 0, + + fn init(gpa: Allocator, rpaths: []const []const u8) RpathIterator { + return .{ .buffer = rpaths, .table = std.StringHashMap(void).init(gpa) }; + } + + fn deinit(it: *RpathIterator) void { + it.table.deinit(); + } + + fn next(it: *RpathIterator) !?[]const u8 { + while (true) { + if (it.count >= it.buffer.len) return null; + const rpath = it.buffer[it.count]; + it.count += 1; + const gop = try it.table.getOrPut(rpath); + if (gop.found_existing) continue; + return rpath; + } + } +}; + +fn writeRpathLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const gpa = self.base.allocator; + + var it = RpathIterator.init(gpa, self.base.options.rpath_list); + defer it.deinit(); + + while (try it.next()) |rpath| { + const rpath_len = rpath.len + 1; + const cmdsize = @intCast(u32, mem.alignForwardGeneric( + u64, + @sizeOf(macho.rpath_command) + rpath_len, + @sizeOf(u64), + )); + try lc_writer.writeStruct(macho.rpath_command{ + .cmdsize = cmdsize, + .path = @sizeOf(macho.rpath_command), + }); + try lc_writer.writeAll(rpath); + try lc_writer.writeByte(0); + const padding = cmdsize - @sizeOf(macho.rpath_command) - rpath_len; + if (padding > 0) { + try lc_writer.writeByteNTimes(0, padding); + } + ncmds.* += 1; + } +} + +fn writeBuildVersionLC(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const cmdsize = @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + const platform_version = blk: { + const ver = self.base.options.target.os.version_range.semver.min; + const platform_version = ver.major << 16 | ver.minor << 8; + break :blk platform_version; + }; + const sdk_version = if (self.base.options.native_darwin_sdk) |sdk| blk: { + const ver = sdk.version; + const sdk_version = ver.major << 16 | ver.minor << 8; + break :blk sdk_version; + } else platform_version; + const is_simulator_abi = self.base.options.target.abi == .simulator; + try lc_writer.writeStruct(macho.build_version_command{ + .cmdsize = cmdsize, + .platform = switch (self.base.options.target.os.tag) { + .macos => .MACOS, + .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, + .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, + .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, + else => unreachable, + }, + .minos = platform_version, + .sdk = sdk_version, + .ntools = 1, + }); + try lc_writer.writeAll(mem.asBytes(&macho.build_tool_version{ + .tool = .LD, + .version = 0x0, + })); + ncmds.* += 1; +} + +fn writeLoadDylibLCs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + for (self.referenced_dylibs.keys()) |id| { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + try writeDylibLC(.{ + .cmd = if (dylib.weak) .LOAD_WEAK_DYLIB else .LOAD_DYLIB, + .name = dylib_id.name, + .timestamp = dylib_id.timestamp, + .current_version = dylib_id.current_version, + .compatibility_version = dylib_id.compatibility_version, + }, ncmds, lc_writer); + } } pub fn deinit(self: *MachO) void { @@ -3334,7 +3341,6 @@ pub fn deinit(self: *MachO) void { d_sym.deinit(gpa); } - self.section_ordinals.deinit(gpa); self.tlv_ptr_entries.deinit(gpa); self.tlv_ptr_entries_free_list.deinit(gpa); self.tlv_ptr_entries_table.deinit(gpa); @@ -3371,24 +3377,19 @@ pub fn deinit(self: *MachO) void { self.dylibs_map.deinit(gpa); self.referenced_dylibs.deinit(gpa); - for (self.load_commands.items) |*lc| { - lc.deinit(gpa); + self.segments.deinit(gpa); + + for (self.sections.items(.free_list)) |*list| { + list.deinit(gpa); } - self.load_commands.deinit(gpa); + self.sections.deinit(gpa); for (self.managed_atoms.items) |atom| { atom.deinit(gpa); gpa.destroy(atom); } self.managed_atoms.deinit(gpa); - self.atoms.deinit(gpa); - { - var it = self.atom_free_lists.valueIterator(); - while (it.next()) |free_list| { - free_list.deinit(gpa); - } - self.atom_free_lists.deinit(gpa); - } + if (self.base.options.module) |mod| { for (self.decls.keys()) |decl_index| { const decl = mod.declPtr(decl_index); @@ -3408,34 +3409,24 @@ pub fn deinit(self: *MachO) void { } self.atom_by_index_table.deinit(gpa); - - if (self.code_signature) |*csig| { - csig.deinit(gpa); - } } pub fn closeFiles(self: MachO) void { - for (self.objects.items) |object| { - object.file.close(); - } for (self.archives.items) |archive| { archive.file.close(); } - for (self.dylibs.items) |dylib| { - dylib.file.close(); - } if (self.d_sym) |ds| { ds.file.close(); } } -fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) void { +fn freeAtom(self: *MachO, atom: *Atom, sect_id: u8, owns_atom: bool) void { log.debug("freeAtom {*}", .{atom}); if (!owns_atom) { atom.deinit(self.base.allocator); } - const free_list = self.atom_free_lists.getPtr(match).?; + const free_list = &self.sections.items(.free_list)[sect_id]; var already_have_free_list_node = false; { var i: usize = 0; @@ -3452,13 +3443,14 @@ fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) } } - if (self.atoms.getPtr(match)) |last_atom| { - if (last_atom.* == atom) { + const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; + if (maybe_last_atom.*) |last_atom| { + if (last_atom == atom) { if (atom.prev) |prev| { // TODO shrink the section size here - last_atom.* = prev; + maybe_last_atom.* = prev; } else { - _ = self.atoms.fetchRemove(match); + maybe_last_atom.* = null; } } } @@ -3486,21 +3478,21 @@ fn freeAtom(self: *MachO, atom: *Atom, match: MatchingSection, owns_atom: bool) } } -fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, match: MatchingSection) void { +fn shrinkAtom(self: *MachO, atom: *Atom, new_block_size: u64, sect_id: u8) void { _ = self; _ = atom; _ = new_block_size; - _ = match; + _ = sect_id; // TODO check the new capacity, and if it crosses the size threshold into a big enough // capacity, insert a free list node for it. } -fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, match: MatchingSection) !u64 { +fn growAtom(self: *MachO, atom: *Atom, new_atom_size: u64, alignment: u64, sect_id: u8) !u64 { const sym = atom.getSymbol(self); const align_ok = mem.alignBackwardGeneric(u64, sym.n_value, alignment) == sym.n_value; const need_realloc = !align_ok or new_atom_size > atom.capacity(self); if (!need_realloc) return sym.n_value; - return self.allocateAtom(atom, new_atom_size, alignment, match); + return self.allocateAtom(atom, new_atom_size, alignment, sect_id); } fn allocateSymbol(self: *MachO) !u32 { @@ -3671,10 +3663,11 @@ pub fn updateFunc(self: *MachO, module: *Module, func: *Module.Fn, air: Air, liv } pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Module.Decl.Index) !u32 { - var code_buffer = std.ArrayList(u8).init(self.base.allocator); + const gpa = self.base.allocator; + + var code_buffer = std.ArrayList(u8).init(gpa); defer code_buffer.deinit(); - const gpa = self.base.allocator; const module = self.base.options.module.?; const gop = try self.unnamed_const_atoms.getOrPut(gpa, decl_index); if (!gop.found_existing) { @@ -3725,25 +3718,25 @@ pub fn lowerUnnamedConst(self: *MachO, typed_value: TypedValue, decl_index: Modu atom.code.clearRetainingCapacity(); try atom.code.appendSlice(gpa, code); - const match = try self.getMatchingSectionAtom( + const sect_id = try self.getOutputSectionAtom( atom, decl_name, typed_value.ty, typed_value.val, required_alignment, ); - const addr = try self.allocateAtom(atom, code.len, required_alignment, match); + const addr = try self.allocateAtom(atom, code.len, required_alignment, sect_id); log.debug("allocated atom for {?s} at 0x{x}", .{ name, addr }); log.debug(" (required alignment 0x{x})", .{required_alignment}); - errdefer self.freeAtom(atom, match, true); + errdefer self.freeAtom(atom, sect_id, true); const symbol = atom.getSymbolPtr(self); symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = self.getSectionOrdinal(match), + .n_sect = sect_id + 1, .n_desc = 0, .n_value = addr, }; @@ -3894,44 +3887,35 @@ fn needsPointerRebase(ty: Type, val: Value, mod: *Module) bool { } } -fn getMatchingSectionAtom( +fn getOutputSectionAtom( self: *MachO, atom: *Atom, name: []const u8, ty: Type, val: Value, alignment: u32, -) !MatchingSection { +) !u8 { const code = atom.code.items; const mod = self.base.options.module.?; const align_log_2 = math.log2(alignment); const zig_ty = ty.zigTypeTag(); const mode = self.base.options.optimize_mode; - const match: MatchingSection = blk: { + const sect_id: u8 = blk: { // TODO finish and audit this function if (val.isUndefDeep()) { if (mode == .ReleaseFast or mode == .ReleaseSmall) { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.bss_section_index.?, - }; + break :blk self.bss_section_index.?; } else { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } } if (val.castTag(.variable)) |_| { - break :blk MatchingSection{ - .seg = self.data_segment_cmd_index.?, - .sect = self.data_section_index.?, - }; + break :blk self.data_section_index.?; } if (needsPointerRebase(ty, val, mod)) { - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__DATA_CONST"), .sectname = makeStaticString("__const"), .size = code.len, @@ -3941,10 +3925,7 @@ fn getMatchingSectionAtom( switch (zig_ty) { .Fn => { - break :blk MatchingSection{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, - }; + break :blk self.text_section_index.?; }, .Array => { if (val.tag() == .bytes) { @@ -3953,7 +3934,7 @@ fn getMatchingSectionAtom( .const_slice_u8_sentinel_0, .manyptr_const_u8_sentinel_0, => { - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__TEXT"), .sectname = makeStaticString("__cstring"), .flags = macho.S_CSTRING_LITERALS, @@ -3967,22 +3948,21 @@ fn getMatchingSectionAtom( }, else => {}, } - break :blk (try self.getMatchingSection(.{ + break :blk (try self.getOutputSection(.{ .segname = makeStaticString("__TEXT"), .sectname = makeStaticString("__const"), .size = code.len, .@"align" = align_log_2, })).?; }; - const sect = self.getSection(match); - log.debug(" allocating atom '{s}' in '{s},{s}' ({d},{d})", .{ + const header = self.sections.items(.header)[sect_id]; + log.debug(" allocating atom '{s}' in '{s},{s}', ord({d})", .{ name, - sect.segName(), - sect.sectName(), - match.seg, - match.sect, + header.segName(), + header.sectName(), + sect_id, }); - return match; + return sect_id; } fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 { @@ -3996,7 +3976,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 const decl_ptr = self.decls.getPtr(decl_index).?; if (decl_ptr.* == null) { - decl_ptr.* = try self.getMatchingSectionAtom( + decl_ptr.* = try self.getOutputSectionAtom( &decl.link.macho, sym_name, decl.ty, @@ -4045,7 +4025,7 @@ fn placeDecl(self: *MachO, decl_index: Module.Decl.Index, code_len: usize) !u64 symbol.* = .{ .n_strx = name_str_index, .n_type = macho.N_SECT, - .n_sect = self.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = addr, }; @@ -4134,10 +4114,7 @@ pub fn updateDeclExports( sym.* = .{ .n_strx = try self.strtab.insert(gpa, exp_name), .n_type = macho.N_SECT | macho.N_EXT, - .n_sect = self.getSectionOrdinal(.{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_section_index.?, // TODO what if we export a variable? - }), + .n_sect = self.text_section_index.? + 1, // TODO what if we export a variable? .n_desc = 0, .n_value = decl_sym.n_value, }; @@ -4208,10 +4185,7 @@ pub fn deleteExport(self: *MachO, exp: Export) void { fn freeUnnamedConsts(self: *MachO, decl_index: Module.Decl.Index) void { const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; for (unnamed_consts.items) |atom| { - self.freeAtom(atom, .{ - .seg = self.text_segment_cmd_index.?, - .sect = self.text_const_section_index.?, - }, true); + self.freeAtom(atom, self.text_const_section_index.?, true); self.locals_free_list.append(self.base.allocator, atom.sym_index) catch {}; self.locals.items[atom.sym_index].n_type = 0; _ = self.atom_by_index_table.remove(atom.sym_index); @@ -4294,6 +4268,7 @@ pub fn getDeclVAddr(self: *MachO, decl_index: Module.Decl.Index, reloc_info: Fil } fn populateMissingMetadata(self: *MachO) !void { + const gpa = self.base.allocator; const cpu_arch = self.base.options.target.cpu.arch; const pagezero_vmsize = self.base.options.pagezero_size orelse default_pagezero_vmsize; const aligned_pagezero_vmsize = mem.alignBackwardGeneric(u64, pagezero_vmsize, self.page_size); @@ -4305,21 +4280,16 @@ fn populateMissingMetadata(self: *MachO) !void { log.warn("requested __PAGEZERO size (0x{x}) is not page aligned", .{pagezero_vmsize}); log.warn(" rounding down to 0x{x}", .{aligned_pagezero_vmsize}); } - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__PAGEZERO"), - .vmsize = aligned_pagezero_vmsize, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + self.pagezero_segment_cmd_index = @intCast(u8, self.segments.items.len); + try self.segments.append(gpa, .{ + .segname = makeStaticString("__PAGEZERO"), + .vmsize = aligned_pagezero_vmsize, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.text_segment_cmd_index = @intCast(u8, self.segments.items.len); const needed_size = if (self.mode == .incremental) blk: { const headerpad_size = @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size); const program_code_size_hint = self.base.options.program_code_size_hint; @@ -4329,20 +4299,15 @@ fn populateMissingMetadata(self: *MachO) !void { log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); break :blk needed_size; } else 0; - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__TEXT"), - .vmaddr = aligned_pagezero_vmsize, - .vmsize = needed_size, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.EXEC, - .initprot = macho.PROT.READ | macho.PROT.EXEC, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__TEXT"), + .vmaddr = aligned_pagezero_vmsize, + .vmsize = needed_size, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.EXEC, + .initprot = macho.PROT.READ | macho.PROT.EXEC, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.text_section_index == null) { @@ -4419,7 +4384,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.data_const_segment_cmd_index == null) { - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.data_const_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; @@ -4434,21 +4399,16 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff + needed_size, }); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DATA_CONST"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__DATA_CONST"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.WRITE, + .initprot = macho.PROT.READ | macho.PROT.WRITE, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.got_section_index == null) { @@ -4469,7 +4429,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.data_segment_cmd_index == null) { - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.data_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; var needed_size: u64 = 0; @@ -4484,21 +4444,16 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff + needed_size, }); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DATA"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .maxprot = macho.PROT.READ | macho.PROT.WRITE, - .initprot = macho.PROT.READ | macho.PROT.WRITE, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__DATA"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .maxprot = macho.PROT.READ | macho.PROT.WRITE, + .initprot = macho.PROT.READ | macho.PROT.WRITE, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.la_symbol_ptr_section_index == null) { @@ -4602,7 +4557,7 @@ fn populateMissingMetadata(self: *MachO) !void { } if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); var vmaddr: u64 = 0; var fileoff: u64 = 0; if (self.mode == .incremental) { @@ -4611,249 +4566,113 @@ fn populateMissingMetadata(self: *MachO) !void { fileoff = base.fileoff; log.debug("found __LINKEDIT segment free space at 0x{x}", .{fileoff}); } - try self.load_commands.append(self.base.allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__LINKEDIT"), - .vmaddr = vmaddr, - .fileoff = fileoff, - .maxprot = macho.PROT.READ, - .initprot = macho.PROT.READ, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, - }); - self.load_commands_dirty = true; - } - - if (self.dyld_info_cmd_index == null) { - self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .dyld_info_only = .{ - .cmd = .DYLD_INFO_ONLY, - .cmdsize = @sizeOf(macho.dyld_info_command), - .rebase_off = 0, - .rebase_size = 0, - .bind_off = 0, - .bind_size = 0, - .weak_bind_off = 0, - .weak_bind_size = 0, - .lazy_bind_off = 0, - .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, - }, + try self.segments.append(gpa, .{ + .segname = makeStaticString("__LINKEDIT"), + .vmaddr = vmaddr, + .fileoff = fileoff, + .maxprot = macho.PROT.READ, + .initprot = macho.PROT.READ, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .symtab = .{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - self.load_commands_dirty = true; - } - - if (self.dysymtab_cmd_index == null) { - self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .dysymtab = .{ - .cmdsize = @sizeOf(macho.dysymtab_command), - .ilocalsym = 0, - .nlocalsym = 0, - .iextdefsym = 0, - .nextdefsym = 0, - .iundefsym = 0, - .nundefsym = 0, - .tocoff = 0, - .ntoc = 0, - .modtaboff = 0, - .nmodtab = 0, - .extrefsymoff = 0, - .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, - .extreloff = 0, - .nextrel = 0, - .locreloff = 0, - .nlocrel = 0, - }, - }); - self.load_commands_dirty = true; } +} - if (self.dylinker_cmd_index == null) { - self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.dylinker_command) + mem.sliceTo(default_dyld_path, 0).len, - @sizeOf(u64), - )); - var dylinker_cmd = macho.emptyGenericCommandWithData(macho.dylinker_command{ - .cmd = .LOAD_DYLINKER, - .cmdsize = cmdsize, - .name = @sizeOf(macho.dylinker_command), - }); - dylinker_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylinker_cmd.inner.name); - mem.set(u8, dylinker_cmd.data, 0); - mem.copy(u8, dylinker_cmd.data, mem.sliceTo(default_dyld_path, 0)); - try self.load_commands.append(self.base.allocator, .{ .dylinker = dylinker_cmd }); - self.load_commands_dirty = true; - } - - if (self.main_cmd_index == null and self.base.options.output_mode == .Exe) { - self.main_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .main = .{ - .cmdsize = @sizeOf(macho.entry_point_command), - .entryoff = 0x0, - .stacksize = 0, - }, - }); - self.load_commands_dirty = true; - } +inline fn calcInstallNameLen(cmd_size: u64, name: []const u8, assume_max_path_len: bool) u64 { + const name_len = if (assume_max_path_len) std.os.PATH_MAX else std.mem.len(name) + 1; + return mem.alignForwardGeneric(u64, cmd_size + name_len, @alignOf(u64)); +} - if (self.dylib_id_cmd_index == null and self.base.options.output_mode == .Lib) { - self.dylib_id_cmd_index = @intCast(u16, self.load_commands.items.len); - const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; - const current_version = self.base.options.version orelse - std.builtin.Version{ .major = 1, .minor = 0, .patch = 0 }; - const compat_version = self.base.options.compatibility_version orelse - std.builtin.Version{ .major = 1, .minor = 0, .patch = 0 }; - var dylib_cmd = try macho.createLoadDylibCommand( - self.base.allocator, - .ID_DYLIB, - install_name, - 2, - current_version.major << 16 | current_version.minor << 8 | current_version.patch, - compat_version.major << 16 | compat_version.minor << 8 | compat_version.patch, - ); - errdefer dylib_cmd.deinit(self.base.allocator); - try self.load_commands.append(self.base.allocator, .{ .dylib = dylib_cmd }); - self.load_commands_dirty = true; +fn calcLCsSize(self: *MachO, assume_max_path_len: bool) !u32 { + const gpa = self.base.allocator; + var sizeofcmds: u64 = 0; + for (self.segments.items) |seg| { + sizeofcmds += seg.nsects * @sizeOf(macho.section_64) + @sizeOf(macho.segment_command_64); + } + + // LC_DYLD_INFO_ONLY + sizeofcmds += @sizeOf(macho.dyld_info_command); + // LC_FUNCTION_STARTS + if (self.text_section_index != null) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } + // LC_DATA_IN_CODE + sizeofcmds += @sizeOf(macho.linkedit_data_command); + // LC_SYMTAB + sizeofcmds += @sizeOf(macho.symtab_command); + // LC_DYSYMTAB + sizeofcmds += @sizeOf(macho.dysymtab_command); + // LC_LOAD_DYLINKER + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylinker_command), + mem.sliceTo(default_dyld_path, 0), + false, + ); + // LC_MAIN + if (self.base.options.output_mode == .Exe) { + sizeofcmds += @sizeOf(macho.entry_point_command); + } + // LC_ID_DYLIB + if (self.base.options.output_mode == .Lib) { + sizeofcmds += blk: { + const install_name = self.base.options.install_name orelse self.base.options.emit.?.sub_path; + break :blk calcInstallNameLen( + @sizeOf(macho.dylib_command), + install_name, + assume_max_path_len, + ); + }; } - - if (self.source_version_cmd_index == null) { - self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .source_version = .{ - .cmdsize = @sizeOf(macho.source_version_command), - .version = 0x0, - }, - }); - self.load_commands_dirty = true; + // LC_RPATH + { + var it = RpathIterator.init(gpa, self.base.options.rpath_list); + defer it.deinit(); + while (try it.next()) |rpath| { + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.rpath_command), + rpath, + assume_max_path_len, + ); + } } - - if (self.build_version_cmd_index == null) { - self.build_version_cmd_index = @intCast(u16, self.load_commands.items.len); - const cmdsize = @intCast(u32, mem.alignForwardGeneric( - u64, - @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version), - @sizeOf(u64), - )); - const platform_version = blk: { - const ver = self.base.options.target.os.version_range.semver.min; - const platform_version = ver.major << 16 | ver.minor << 8; - break :blk platform_version; - }; - const sdk_version = if (self.base.options.native_darwin_sdk) |sdk| blk: { - const ver = sdk.version; - const sdk_version = ver.major << 16 | ver.minor << 8; - break :blk sdk_version; - } else platform_version; - const is_simulator_abi = self.base.options.target.abi == .simulator; - var cmd = macho.emptyGenericCommandWithData(macho.build_version_command{ - .cmdsize = cmdsize, - .platform = switch (self.base.options.target.os.tag) { - .macos => .MACOS, - .ios => if (is_simulator_abi) macho.PLATFORM.IOSSIMULATOR else macho.PLATFORM.IOS, - .watchos => if (is_simulator_abi) macho.PLATFORM.WATCHOSSIMULATOR else macho.PLATFORM.WATCHOS, - .tvos => if (is_simulator_abi) macho.PLATFORM.TVOSSIMULATOR else macho.PLATFORM.TVOS, - else => unreachable, - }, - .minos = platform_version, - .sdk = sdk_version, - .ntools = 1, - }); - const ld_ver = macho.build_tool_version{ - .tool = .LD, - .version = 0x0, - }; - cmd.data = try self.base.allocator.alloc(u8, cmdsize - @sizeOf(macho.build_version_command)); - mem.set(u8, cmd.data, 0); - mem.copy(u8, cmd.data, mem.asBytes(&ld_ver)); - try self.load_commands.append(self.base.allocator, .{ .build_version = cmd }); - self.load_commands_dirty = true; - } - - if (self.uuid_cmd_index == null) { - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - var uuid_cmd: macho.uuid_command = .{ - .cmdsize = @sizeOf(macho.uuid_command), - .uuid = undefined, + // LC_SOURCE_VERSION + sizeofcmds += @sizeOf(macho.source_version_command); + // LC_BUILD_VERSION + sizeofcmds += @sizeOf(macho.build_version_command) + @sizeOf(macho.build_tool_version); + // LC_UUID + sizeofcmds += @sizeOf(macho.uuid_command); + // LC_LOAD_DYLIB + for (self.referenced_dylibs.keys()) |id| { + const dylib = self.dylibs.items[id]; + const dylib_id = dylib.id orelse unreachable; + sizeofcmds += calcInstallNameLen( + @sizeOf(macho.dylib_command), + dylib_id.name, + assume_max_path_len, + ); + } + // LC_CODE_SIGNATURE + { + const target = self.base.options.target; + const requires_codesig = blk: { + if (self.base.options.entitlements) |_| break :blk true; + if (target.cpu.arch == .aarch64 and (target.os.tag == .macos or target.abi == .simulator)) + break :blk true; + break :blk false; }; - std.crypto.random.bytes(&uuid_cmd.uuid); - try self.load_commands.append(self.base.allocator, .{ .uuid = uuid_cmd }); - self.load_commands_dirty = true; - } - - if (self.function_starts_cmd_index == null) { - self.function_starts_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .FUNCTION_STARTS, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - self.load_commands_dirty = true; + if (requires_codesig) { + sizeofcmds += @sizeOf(macho.linkedit_data_command); + } } - if (self.data_in_code_cmd_index == null) { - self.data_in_code_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .linkedit_data = .{ - .cmd = .DATA_IN_CODE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - self.load_commands_dirty = true; - } + return @intCast(u32, sizeofcmds); } -fn calcMinHeaderpad(self: *MachO) u64 { - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - sizeofcmds += lc.cmdsize(); - } - - var padding: u32 = sizeofcmds + (self.base.options.headerpad_size orelse 0); +fn calcMinHeaderPad(self: *MachO) !u64 { + var padding: u32 = (try self.calcLCsSize(false)) + (self.base.options.headerpad_size orelse 0); log.debug("minimum requested headerpad size 0x{x}", .{padding + @sizeOf(macho.mach_header_64)}); if (self.base.options.headerpad_max_install_names) { - var min_headerpad_size: u32 = 0; - for (self.load_commands.items) |lc| switch (lc.cmd()) { - .ID_DYLIB, - .LOAD_WEAK_DYLIB, - .LOAD_DYLIB, - .REEXPORT_DYLIB, - => { - min_headerpad_size += @sizeOf(macho.dylib_command) + std.os.PATH_MAX + 1; - }, - - else => {}, - }; + var min_headerpad_size: u32 = try self.calcLCsSize(true); log.debug("headerpad_max_install_names minimum headerpad size 0x{x}", .{ min_headerpad_size + @sizeOf(macho.mach_header_64), }); @@ -4868,32 +4687,31 @@ fn calcMinHeaderpad(self: *MachO) u64 { fn allocateSegments(self: *MachO) !void { try self.allocateSegment(self.text_segment_cmd_index, &.{ self.pagezero_segment_cmd_index, - }, self.calcMinHeaderpad()); + }, try self.calcMinHeaderPad()); if (self.text_segment_cmd_index) |index| blk: { - const seg = &self.load_commands.items[index].segment; - if (seg.sections.items.len == 0) break :blk; + const seg = &self.segments.items[index]; + if (seg.nsects == 0) break :blk; // Shift all sections to the back to minimize jump size between __TEXT and __DATA segments. var min_alignment: u32 = 0; - for (seg.sections.items) |sect| { - const alignment = try math.powi(u32, 2, sect.@"align"); + for (self.sections.items(.header)[0..seg.nsects]) |header| { + const alignment = try math.powi(u32, 2, header.@"align"); min_alignment = math.max(min_alignment, alignment); } assert(min_alignment > 0); - const last_sect_idx = seg.sections.items.len - 1; - const last_sect = seg.sections.items[last_sect_idx]; + const last_header = self.sections.items(.header)[seg.nsects - 1]; const shift: u32 = shift: { - const diff = seg.inner.filesize - last_sect.offset - last_sect.size; + const diff = seg.filesize - last_header.offset - last_header.size; const factor = @divTrunc(diff, min_alignment); break :shift @intCast(u32, factor * min_alignment); }; if (shift > 0) { - for (seg.sections.items) |*sect| { - sect.offset += shift; - sect.addr += shift; + for (self.sections.items(.header)[0..seg.nsects]) |*header| { + header.offset += shift; + header.addr += shift; } } } @@ -4917,42 +4735,42 @@ fn allocateSegments(self: *MachO) !void { }, 0); } -fn allocateSegment(self: *MachO, maybe_index: ?u16, indices: []const ?u16, init_size: u64) !void { +fn allocateSegment(self: *MachO, maybe_index: ?u8, indices: []const ?u8, init_size: u64) !void { const index = maybe_index orelse return; - const seg = &self.load_commands.items[index].segment; + const seg = &self.segments.items[index]; const base = self.getSegmentAllocBase(indices); - seg.inner.vmaddr = base.vmaddr; - seg.inner.fileoff = base.fileoff; - seg.inner.filesize = init_size; - seg.inner.vmsize = init_size; + seg.vmaddr = base.vmaddr; + seg.fileoff = base.fileoff; + seg.filesize = init_size; + seg.vmsize = init_size; // Allocate the sections according to their alignment at the beginning of the segment. var start = init_size; - for (seg.sections.items) |*sect| { - const is_zerofill = sect.flags == macho.S_ZEROFILL or sect.flags == macho.S_THREAD_LOCAL_ZEROFILL; - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; - const alignment = try math.powi(u32, 2, sect.@"align"); + const slice = self.sections.slice(); + for (slice.items(.header)) |*header, sect_id| { + const segment_index = slice.items(.segment_index)[sect_id]; + if (segment_index != index) continue; + const is_zerofill = header.flags == macho.S_ZEROFILL or header.flags == macho.S_THREAD_LOCAL_ZEROFILL; + const alignment = try math.powi(u32, 2, header.@"align"); const start_aligned = mem.alignForwardGeneric(u64, start, alignment); - // TODO handle zerofill sections in stage2 - sect.offset = if (is_zerofill and (use_stage1 or use_llvm)) + header.offset = if (is_zerofill) 0 else - @intCast(u32, seg.inner.fileoff + start_aligned); - sect.addr = seg.inner.vmaddr + start_aligned; + @intCast(u32, seg.fileoff + start_aligned); + header.addr = seg.vmaddr + start_aligned; - start = start_aligned + sect.size; + start = start_aligned + header.size; - if (!(is_zerofill and (use_stage1 or use_llvm))) { - seg.inner.filesize = start; + if (!is_zerofill) { + seg.filesize = start; } - seg.inner.vmsize = start; + seg.vmsize = start; } - seg.inner.filesize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.vmsize, self.page_size); + seg.filesize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); + seg.vmsize = mem.alignForwardGeneric(u64, seg.vmsize, self.page_size); } const InitSectionOpts = struct { @@ -4963,16 +4781,16 @@ const InitSectionOpts = struct { fn initSection( self: *MachO, - segment_id: u16, + segment_id: u8, sectname: []const u8, size: u64, alignment: u32, opts: InitSectionOpts, -) !u16 { - const seg = &self.load_commands.items[segment_id].segment; - var sect = macho.section_64{ +) !u8 { + const seg = &self.segments.items[segment_id]; + var header = macho.section_64{ .sectname = makeStaticString(sectname), - .segname = seg.inner.segname, + .segname = seg.segname, .size = if (self.mode == .incremental) @intCast(u32, size) else 0, .@"align" = alignment, .flags = opts.flags, @@ -4982,165 +4800,157 @@ fn initSection( if (self.mode == .incremental) { const alignment_pow_2 = try math.powi(u32, 2, alignment); - const padding: ?u32 = if (segment_id == self.text_segment_cmd_index.?) - @maximum(self.base.options.headerpad_size orelse 0, default_headerpad_size) + const padding: ?u64 = if (segment_id == self.text_segment_cmd_index.?) + try self.calcMinHeaderPad() else null; const off = self.findFreeSpace(segment_id, alignment_pow_2, padding); log.debug("allocating {s},{s} section from 0x{x} to 0x{x}", .{ - sect.segName(), - sect.sectName(), + header.segName(), + header.sectName(), off, off + size, }); - sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; - - const is_zerofill = opts.flags == macho.S_ZEROFILL or opts.flags == macho.S_THREAD_LOCAL_ZEROFILL; - const use_llvm = build_options.have_llvm and self.base.options.use_llvm; - const use_stage1 = build_options.is_stage1 and self.base.options.use_stage1; + header.addr = seg.vmaddr + off - seg.fileoff; // TODO handle zerofill in stage2 - if (!(is_zerofill and (use_stage1 or use_llvm))) { - sect.offset = @intCast(u32, off); - } - } + // const is_zerofill = opts.flags == macho.S_ZEROFILL or opts.flags == macho.S_THREAD_LOCAL_ZEROFILL; + header.offset = @intCast(u32, off); - const index = @intCast(u16, seg.sections.items.len); - try seg.sections.append(self.base.allocator, sect); - seg.inner.cmdsize += @sizeOf(macho.section_64); - seg.inner.nsects += 1; - - const match = MatchingSection{ - .seg = segment_id, - .sect = index, - }; - _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - try self.atom_free_lists.putNoClobber(self.base.allocator, match, .{}); + try self.updateSectionOrdinals(); + } - self.load_commands_dirty = true; - self.sections_order_dirty = true; + const index = @intCast(u8, self.sections.slice().len); + try self.sections.append(self.base.allocator, .{ + .segment_index = segment_id, + .header = header, + }); + seg.cmdsize += @sizeOf(macho.section_64); + seg.nsects += 1; return index; } -fn findFreeSpace(self: MachO, segment_id: u16, alignment: u64, start: ?u32) u64 { - const seg = self.load_commands.items[segment_id].segment; - if (seg.sections.items.len == 0) { - return if (start) |v| v else seg.inner.fileoff; +fn findFreeSpace(self: MachO, segment_id: u8, alignment: u64, start: ?u64) u64 { + const seg = self.segments.items[segment_id]; + const indexes = self.getSectionIndexes(segment_id); + if (indexes.end - indexes.start == 0) { + return if (start) |v| v else seg.fileoff; } - const last_sect = seg.sections.items[seg.sections.items.len - 1]; + const last_sect = self.sections.items(.header)[indexes.end - 1]; const final_off = last_sect.offset + padToIdeal(last_sect.size); return mem.alignForwardGeneric(u64, final_off, alignment); } -fn growSegment(self: *MachO, seg_id: u16, new_size: u64) !void { - const seg = &self.load_commands.items[seg_id].segment; - const new_seg_size = mem.alignForwardGeneric(u64, new_size, self.page_size); - assert(new_seg_size > seg.inner.filesize); - const offset_amt = new_seg_size - seg.inner.filesize; +fn growSegment(self: *MachO, segment_index: u8, new_size: u64) !void { + const segment = &self.segments.items[segment_index]; + const new_segment_size = mem.alignForwardGeneric(u64, new_size, self.page_size); + assert(new_segment_size > segment.filesize); + const offset_amt = new_segment_size - segment.filesize; log.debug("growing segment {s} from 0x{x} to 0x{x}", .{ - seg.inner.segname, - seg.inner.filesize, - new_seg_size, + segment.segname, + segment.filesize, + new_segment_size, }); - seg.inner.filesize = new_seg_size; - seg.inner.vmsize = new_seg_size; + segment.filesize = new_segment_size; + segment.vmsize = new_segment_size; log.debug(" (new segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - seg.inner.fileoff, - seg.inner.fileoff + seg.inner.filesize, - seg.inner.vmaddr, - seg.inner.vmaddr + seg.inner.vmsize, + segment.fileoff, + segment.fileoff + segment.filesize, + segment.vmaddr, + segment.vmaddr + segment.vmsize, }); - var next: usize = seg_id + 1; + var next: u8 = segment_index + 1; while (next < self.linkedit_segment_cmd_index.? + 1) : (next += 1) { - const next_seg = &self.load_commands.items[next].segment; + const next_segment = &self.segments.items[next]; try MachO.copyRangeAllOverlappingAlloc( self.base.allocator, self.base.file.?, - next_seg.inner.fileoff, - next_seg.inner.fileoff + offset_amt, - math.cast(usize, next_seg.inner.filesize) orelse return error.Overflow, + next_segment.fileoff, + next_segment.fileoff + offset_amt, + math.cast(usize, next_segment.filesize) orelse return error.Overflow, ); - next_seg.inner.fileoff += offset_amt; - next_seg.inner.vmaddr += offset_amt; + next_segment.fileoff += offset_amt; + next_segment.vmaddr += offset_amt; log.debug(" (new {s} segment file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - next_seg.inner.segname, - next_seg.inner.fileoff, - next_seg.inner.fileoff + next_seg.inner.filesize, - next_seg.inner.vmaddr, - next_seg.inner.vmaddr + next_seg.inner.vmsize, + next_segment.segname, + next_segment.fileoff, + next_segment.fileoff + next_segment.filesize, + next_segment.vmaddr, + next_segment.vmaddr + next_segment.vmsize, }); - for (next_seg.sections.items) |*moved_sect, moved_sect_id| { - moved_sect.offset += @intCast(u32, offset_amt); - moved_sect.addr += offset_amt; + const indexes = self.getSectionIndexes(next); + for (self.sections.items(.header)[indexes.start..indexes.end]) |*header, i| { + header.offset += @intCast(u32, offset_amt); + header.addr += offset_amt; log.debug(" (new {s},{s} file offsets from 0x{x} to 0x{x} (in memory 0x{x} to 0x{x}))", .{ - moved_sect.segName(), - moved_sect.sectName(), - moved_sect.offset, - moved_sect.offset + moved_sect.size, - moved_sect.addr, - moved_sect.addr + moved_sect.size, + header.segName(), + header.sectName(), + header.offset, + header.offset + header.size, + header.addr, + header.addr + header.size, }); - try self.shiftLocalsByOffset(.{ - .seg = @intCast(u16, next), - .sect = @intCast(u16, moved_sect_id), - }, @intCast(i64, offset_amt)); + try self.shiftLocalsByOffset(@intCast(u8, i + indexes.start), @intCast(i64, offset_amt)); } } } -fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { +fn growSection(self: *MachO, sect_id: u8, new_size: u32) !void { const tracy = trace(@src()); defer tracy.end(); - const seg = &self.load_commands.items[match.seg].segment; - const sect = &seg.sections.items[match.sect]; + const section = self.sections.get(sect_id); + const segment_index = section.segment_index; + const header = section.header; + const segment = self.segments.items[segment_index]; - const alignment = try math.powi(u32, 2, sect.@"align"); - const max_size = self.allocatedSize(match.seg, sect.offset); + const alignment = try math.powi(u32, 2, header.@"align"); + const max_size = self.allocatedSize(segment_index, header.offset); const ideal_size = padToIdeal(new_size); const needed_size = mem.alignForwardGeneric(u32, ideal_size, alignment); if (needed_size > max_size) blk: { log.debug(" (need to grow! needed 0x{x}, max 0x{x})", .{ needed_size, max_size }); - if (match.sect == seg.sections.items.len - 1) { + const indexes = self.getSectionIndexes(segment_index); + if (sect_id == indexes.end - 1) { // Last section, just grow segments - try self.growSegment(match.seg, seg.inner.filesize + needed_size - max_size); + try self.growSegment(segment_index, segment.filesize + needed_size - max_size); break :blk; } // Need to move all sections below in file and address spaces. const offset_amt = offset: { - const max_alignment = try self.getSectionMaxAlignment(match.seg, match.sect + 1); + const max_alignment = try self.getSectionMaxAlignment(sect_id + 1, indexes.end); break :offset mem.alignForwardGeneric(u64, needed_size - max_size, max_alignment); }; // Before we commit to this, check if the segment needs to grow too. // We assume that each section header is growing linearly with the increasing // file offset / virtual memory address space. - const last_sect = seg.sections.items[seg.sections.items.len - 1]; - const last_sect_off = last_sect.offset + last_sect.size; - const seg_off = seg.inner.fileoff + seg.inner.filesize; + const last_sect_header = self.sections.items(.header)[indexes.end - 1]; + const last_sect_off = last_sect_header.offset + last_sect_header.size; + const seg_off = segment.fileoff + segment.filesize; if (last_sect_off + offset_amt > seg_off) { // Need to grow segment first. const spill_size = (last_sect_off + offset_amt) - seg_off; - try self.growSegment(match.seg, seg.inner.filesize + spill_size); + try self.growSegment(segment_index, segment.filesize + spill_size); } // We have enough space to expand within the segment, so move all sections by // the required amount and update their header offsets. - const next_sect = seg.sections.items[match.sect + 1]; + const next_sect = self.sections.items(.header)[sect_id + 1]; const total_size = last_sect_off - next_sect.offset; try MachO.copyRangeAllOverlappingAlloc( @@ -5151,9 +4961,7 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { math.cast(usize, total_size) orelse return error.Overflow, ); - var next = match.sect + 1; - while (next < seg.sections.items.len) : (next += 1) { - const moved_sect = &seg.sections.items[next]; + for (self.sections.items(.header)[sect_id + 1 .. indexes.end]) |*moved_sect, i| { moved_sect.offset += @intCast(u32, offset_amt); moved_sect.addr += offset_amt; @@ -5166,49 +4974,45 @@ fn growSection(self: *MachO, match: MatchingSection, new_size: u32) !void { moved_sect.addr + moved_sect.size, }); - try self.shiftLocalsByOffset(.{ - .seg = match.seg, - .sect = next, - }, @intCast(i64, offset_amt)); + try self.shiftLocalsByOffset(@intCast(u8, sect_id + 1 + i), @intCast(i64, offset_amt)); } } } -fn allocatedSize(self: MachO, segment_id: u16, start: u64) u64 { - const seg = self.load_commands.items[segment_id].segment; - assert(start >= seg.inner.fileoff); - var min_pos: u64 = seg.inner.fileoff + seg.inner.filesize; +fn allocatedSize(self: MachO, segment_id: u8, start: u64) u64 { + const segment = self.segments.items[segment_id]; + const indexes = self.getSectionIndexes(segment_id); + assert(start >= segment.fileoff); + var min_pos: u64 = segment.fileoff + segment.filesize; if (start > min_pos) return 0; - for (seg.sections.items) |section| { - if (section.offset <= start) continue; - if (section.offset < min_pos) min_pos = section.offset; + for (self.sections.items(.header)[indexes.start..indexes.end]) |header| { + if (header.offset <= start) continue; + if (header.offset < min_pos) min_pos = header.offset; } return min_pos - start; } -fn getSectionMaxAlignment(self: *MachO, segment_id: u16, start_sect_id: u16) !u32 { - const seg = self.load_commands.items[segment_id].segment; +fn getSectionMaxAlignment(self: *MachO, start: u8, end: u8) !u32 { var max_alignment: u32 = 1; - var next = start_sect_id; - while (next < seg.sections.items.len) : (next += 1) { - const sect = seg.sections.items[next]; - const alignment = try math.powi(u32, 2, sect.@"align"); + const slice = self.sections.slice(); + for (slice.items(.header)[start..end]) |header| { + const alignment = try math.powi(u32, 2, header.@"align"); max_alignment = math.max(max_alignment, alignment); } return max_alignment; } -fn allocateAtomCommon(self: *MachO, atom: *Atom, match: MatchingSection) !void { +fn allocateAtomCommon(self: *MachO, atom: *Atom, sect_id: u8) !void { const sym = atom.getSymbolPtr(self); if (self.mode == .incremental) { const size = atom.size; const alignment = try math.powi(u32, 2, atom.alignment); - const vaddr = try self.allocateAtom(atom, size, alignment, match); + const vaddr = try self.allocateAtom(atom, size, alignment, sect_id); const sym_name = atom.getName(self); log.debug("allocated {s} atom at 0x{x}", .{ sym_name, vaddr }); sym.n_value = vaddr; - } else try self.addAtomToSection(atom, match); - sym.n_sect = self.getSectionOrdinal(match); + } else try self.addAtomToSection(atom, sect_id); + sym.n_sect = sect_id + 1; } fn allocateAtom( @@ -5216,15 +5020,15 @@ fn allocateAtom( atom: *Atom, new_atom_size: u64, alignment: u64, - match: MatchingSection, + sect_id: u8, ) !u64 { const tracy = trace(@src()); defer tracy.end(); - const sect = self.getSectionPtr(match); - var free_list = self.atom_free_lists.get(match).?; - const needs_padding = match.seg == self.text_segment_cmd_index.? and match.sect == self.text_section_index.?; - const new_atom_ideal_capacity = if (needs_padding) padToIdeal(new_atom_size) else new_atom_size; + const header = &self.sections.items(.header)[sect_id]; + const free_list = &self.sections.items(.free_list)[sect_id]; + const maybe_last_atom = &self.sections.items(.last_atom)[sect_id]; + const new_atom_ideal_capacity = if (header.isCode()) padToIdeal(new_atom_size) else new_atom_size; // We use these to indicate our intention to update metadata, placing the new atom, // and possibly removing a free list node. @@ -5244,7 +5048,7 @@ fn allocateAtom( // Is it enough that we could fit this new atom? const sym = big_atom.getSymbol(self); const capacity = big_atom.capacity(self); - const ideal_capacity = if (needs_padding) padToIdeal(capacity) else capacity; + const ideal_capacity = if (header.isCode()) padToIdeal(capacity) else capacity; const ideal_capacity_end_vaddr = math.add(u64, sym.n_value, ideal_capacity) catch ideal_capacity; const capacity_end_vaddr = sym.n_value + capacity; const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; @@ -5272,30 +5076,28 @@ fn allocateAtom( free_list_removal = i; } break :blk new_start_vaddr; - } else if (self.atoms.get(match)) |last| { + } else if (maybe_last_atom.*) |last| { const last_symbol = last.getSymbol(self); - const ideal_capacity = if (needs_padding) padToIdeal(last.size) else last.size; + const ideal_capacity = if (header.isCode()) padToIdeal(last.size) else last.size; const ideal_capacity_end_vaddr = last_symbol.n_value + ideal_capacity; const new_start_vaddr = mem.alignForwardGeneric(u64, ideal_capacity_end_vaddr, alignment); atom_placement = last; break :blk new_start_vaddr; } else { - break :blk mem.alignForwardGeneric(u64, sect.addr, alignment); + break :blk mem.alignForwardGeneric(u64, header.addr, alignment); } }; const expand_section = atom_placement == null or atom_placement.?.next == null; if (expand_section) { - const needed_size = @intCast(u32, (vaddr + new_atom_size) - sect.addr); - try self.growSection(match, needed_size); - _ = try self.atoms.put(self.base.allocator, match, atom); - sect.size = needed_size; - self.load_commands_dirty = true; + const needed_size = @intCast(u32, (vaddr + new_atom_size) - header.addr); + try self.growSection(sect_id, needed_size); + maybe_last_atom.* = atom; + header.size = needed_size; } const align_pow = @intCast(u32, math.log2(alignment)); - if (sect.@"align" < align_pow) { - sect.@"align" = align_pow; - self.load_commands_dirty = true; + if (header.@"align" < align_pow) { + header.@"align" = align_pow; } atom.size = new_atom_size; atom.alignment = align_pow; @@ -5322,20 +5124,19 @@ fn allocateAtom( return vaddr; } -pub fn addAtomToSection(self: *MachO, atom: *Atom, match: MatchingSection) !void { - if (self.atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try self.atoms.putNoClobber(self.base.allocator, match, atom); +pub fn addAtomToSection(self: *MachO, atom: *Atom, sect_id: u8) !void { + var section = self.sections.get(sect_id); + if (section.header.size > 0) { + section.last_atom.?.next = atom; + atom.prev = section.last_atom.?; } - const sect = self.getSectionPtr(match); + section.last_atom = atom; const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); - const padding = aligned_end_addr - sect.size; - sect.size += padding + atom.size; - sect.@"align" = @maximum(sect.@"align", atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, section.header.size, atom_alignment); + const padding = aligned_end_addr - section.header.size; + section.header.size += padding + atom.size; + section.header.@"align" = @maximum(section.header.@"align", atom.alignment); + self.sections.set(sect_id, section); } pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { @@ -5368,74 +5169,27 @@ pub fn getGlobalSymbol(self: *MachO, name: []const u8) !u32 { return sym_index; } -fn getSegmentAllocBase(self: MachO, indices: []const ?u16) struct { vmaddr: u64, fileoff: u64 } { +fn getSegmentAllocBase(self: MachO, indices: []const ?u8) struct { vmaddr: u64, fileoff: u64 } { for (indices) |maybe_prev_id| { const prev_id = maybe_prev_id orelse continue; - const prev = self.load_commands.items[prev_id].segment; + const prev = self.segments.items[prev_id]; return .{ - .vmaddr = prev.inner.vmaddr + prev.inner.vmsize, - .fileoff = prev.inner.fileoff + prev.inner.filesize, + .vmaddr = prev.vmaddr + prev.vmsize, + .fileoff = prev.fileoff + prev.filesize, }; } return .{ .vmaddr = 0, .fileoff = 0 }; } -fn pruneAndSortSectionsInSegment(self: *MachO, maybe_seg_id: *?u16, indices: []*?u16) !void { - const seg_id = maybe_seg_id.* orelse return; - - var mapping = std.AutoArrayHashMap(u16, ?u16).init(self.base.allocator); - defer mapping.deinit(); - - const seg = &self.load_commands.items[seg_id].segment; - var sections = seg.sections.toOwnedSlice(self.base.allocator); - defer self.base.allocator.free(sections); - try seg.sections.ensureTotalCapacity(self.base.allocator, sections.len); - - for (indices) |maybe_index| { - const old_idx = maybe_index.* orelse continue; - const sect = §ions[old_idx]; - if (sect.size == 0) { - log.debug("pruning section {s},{s}", .{ sect.segName(), sect.sectName() }); - maybe_index.* = null; - seg.inner.cmdsize -= @sizeOf(macho.section_64); - seg.inner.nsects -= 1; - } else { - maybe_index.* = @intCast(u16, seg.sections.items.len); - seg.sections.appendAssumeCapacity(sect.*); - } - try mapping.putNoClobber(old_idx, maybe_index.*); - } - - var atoms = std.ArrayList(struct { match: MatchingSection, atom: *Atom }).init(self.base.allocator); - defer atoms.deinit(); - try atoms.ensureTotalCapacity(mapping.count()); - - for (mapping.keys()) |old_sect| { - const new_sect = mapping.get(old_sect).? orelse { - _ = self.atoms.remove(.{ .seg = seg_id, .sect = old_sect }); - continue; - }; - const kv = self.atoms.fetchRemove(.{ .seg = seg_id, .sect = old_sect }).?; - atoms.appendAssumeCapacity(.{ - .match = .{ .seg = seg_id, .sect = new_sect }, - .atom = kv.value, - }); - } - - while (atoms.popOrNull()) |next| { - try self.atoms.putNoClobber(self.base.allocator, next.match, next.atom); - } +fn pruneAndSortSections(self: *MachO) !void { + const gpa = self.base.allocator; - if (seg.inner.nsects == 0 and !mem.eql(u8, "__TEXT", seg.inner.segName())) { - // Segment has now become empty, so mark it as such - log.debug("marking segment {s} as dead", .{seg.inner.segName()}); - seg.inner.cmd = @intToEnum(macho.LC, 0); - maybe_seg_id.* = null; - } -} + var sections = self.sections.toOwnedSlice(); + defer sections.deinit(gpa); + try self.sections.ensureTotalCapacity(gpa, sections.len); -fn pruneAndSortSections(self: *MachO) !void { - try self.pruneAndSortSectionsInSegment(&self.text_segment_cmd_index, &.{ + for (&[_]*?u8{ + // __TEXT &self.text_section_index, &self.stubs_section_index, &self.stub_helper_section_index, @@ -5448,9 +5202,7 @@ fn pruneAndSortSections(self: *MachO) !void { &self.objc_methtype_section_index, &self.objc_classname_section_index, &self.eh_frame_section_index, - }); - - try self.pruneAndSortSectionsInSegment(&self.data_const_segment_cmd_index, &.{ + // __DATA_CONST &self.got_section_index, &self.mod_init_func_section_index, &self.mod_term_func_section_index, @@ -5458,9 +5210,7 @@ fn pruneAndSortSections(self: *MachO) !void { &self.objc_cfstring_section_index, &self.objc_classlist_section_index, &self.objc_imageinfo_section_index, - }); - - try self.pruneAndSortSectionsInSegment(&self.data_segment_cmd_index, &.{ + // __DATA &self.rustc_section_index, &self.la_symbol_ptr_section_index, &self.objc_const_section_index, @@ -5473,103 +5223,129 @@ fn pruneAndSortSections(self: *MachO) !void { &self.tlv_data_section_index, &self.tlv_bss_section_index, &self.bss_section_index, - }); - - // Create new section ordinals. - self.section_ordinals.clearRetainingCapacity(); - if (self.text_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); - } - } - if (self.data_const_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), + }) |maybe_index| { + const old_idx = maybe_index.* orelse continue; + const segment_index = sections.items(.segment_index)[old_idx]; + const header = sections.items(.header)[old_idx]; + const last_atom = sections.items(.last_atom)[old_idx]; + if (header.size == 0) { + log.debug("pruning section {s},{s}", .{ header.segName(), header.sectName() }); + maybe_index.* = null; + const seg = &self.segments.items[segment_index]; + seg.cmdsize -= @sizeOf(macho.section_64); + seg.nsects -= 1; + } else { + maybe_index.* = @intCast(u8, self.sections.slice().len); + self.sections.appendAssumeCapacity(.{ + .segment_index = segment_index, + .header = header, + .last_atom = last_atom, }); - assert(!res.found_existing); } } - if (self.data_segment_cmd_index) |seg_id| { - const seg = self.load_commands.items[seg_id].segment; - for (seg.sections.items) |_, sect_id| { - const res = self.section_ordinals.getOrPutAssumeCapacity(.{ - .seg = seg_id, - .sect = @intCast(u16, sect_id), - }); - assert(!res.found_existing); + + for (self.segments.items) |*seg| { + const segname = seg.segName(); + if (seg.nsects == 0 and + !mem.eql(u8, "__TEXT", segname) and + !mem.eql(u8, "__PAGEZERO", segname) and + !mem.eql(u8, "__LINKEDIT", segname)) + { + // Segment has now become empty, so mark it as such + log.debug("marking segment {s} as dead", .{seg.segName()}); + seg.cmd = @intToEnum(macho.LC, 0); } } - self.sections_order_dirty = false; } fn updateSectionOrdinals(self: *MachO) !void { - if (!self.sections_order_dirty) return; - + _ = self; const tracy = trace(@src()); defer tracy.end(); - log.debug("updating section ordinals", .{}); - - const gpa = self.base.allocator; + @panic("updating section ordinals"); + + // const gpa = self.base.allocator; + + // var ordinal_remap = std.AutoHashMap(u8, u8).init(gpa); + // defer ordinal_remap.deinit(); + // var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; + + // var new_ordinal: u8 = 0; + // for (&[_]?u16{ + // self.text_segment_cmd_index, + // self.data_const_segment_cmd_index, + // self.data_segment_cmd_index, + // }) |maybe_index| { + // const index = maybe_index orelse continue; + // const seg = self.load_commands.items[index].segment; + // for (seg.sections.items) |sect, sect_id| { + // const match = MatchingSection{ + // .seg = @intCast(u16, index), + // .sect = @intCast(u16, sect_id), + // }; + // const old_ordinal = self.getSectionOrdinal(match); + // new_ordinal += 1; + // log.debug("'{s},{s}': sect({d}, '_,_') => sect({d}, '_,_')", .{ + // sect.segName(), + // sect.sectName(), + // old_ordinal, + // new_ordinal, + // }); + // try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); + // try ordinals.putNoClobber(gpa, match, {}); + // } + // } + + // // FIXME Jakub + // // TODO no need for duping work here; simply walk the atom graph + // for (self.locals.items) |*sym| { + // if (sym.undf()) continue; + // if (sym.n_sect == 0) continue; + // sym.n_sect = ordinal_remap.get(sym.n_sect).?; + // } + // for (self.objects.items) |*object| { + // for (object.symtab.items) |*sym| { + // if (sym.undf()) continue; + // if (sym.n_sect == 0) continue; + // sym.n_sect = ordinal_remap.get(sym.n_sect).?; + // } + // } + + // self.section_ordinals.deinit(gpa); + // self.section_ordinals = ordinals; +} - var ordinal_remap = std.AutoHashMap(u8, u8).init(gpa); - defer ordinal_remap.deinit(); - var ordinals: std.AutoArrayHashMapUnmanaged(MatchingSection, void) = .{}; +pub fn writeSegmentHeaders(self: *MachO, start: usize, end: usize, ncmds: *u32, writer: anytype) !void { + var count: usize = 0; + for (self.segments.items[start..end]) |seg| { + if (seg.cmd == .NONE) continue; + try writer.writeStruct(seg); - var new_ordinal: u8 = 0; - for (&[_]?u16{ - self.text_segment_cmd_index, - self.data_const_segment_cmd_index, - self.data_segment_cmd_index, - }) |maybe_index| { - const index = maybe_index orelse continue; - const seg = self.load_commands.items[index].segment; - for (seg.sections.items) |sect, sect_id| { - const match = MatchingSection{ - .seg = @intCast(u16, index), - .sect = @intCast(u16, sect_id), - }; - const old_ordinal = self.getSectionOrdinal(match); - new_ordinal += 1; - log.debug("'{s},{s}': sect({d}, '_,_') => sect({d}, '_,_')", .{ - sect.segName(), - sect.sectName(), - old_ordinal, - new_ordinal, - }); - try ordinal_remap.putNoClobber(old_ordinal, new_ordinal); - try ordinals.putNoClobber(gpa, match, {}); + // TODO + for (self.sections.items(.header)[count..][0..seg.nsects]) |header| { + try writer.writeStruct(header); } - } - // FIXME Jakub - // TODO no need for duping work here; simply walk the atom graph - for (self.locals.items) |*sym| { - if (sym.undf()) continue; - if (sym.n_sect == 0) continue; - sym.n_sect = ordinal_remap.get(sym.n_sect).?; - } - for (self.objects.items) |*object| { - for (object.symtab.items) |*sym| { - if (sym.undf()) continue; - if (sym.n_sect == 0) continue; - sym.n_sect = ordinal_remap.get(sym.n_sect).?; - } + count += seg.nsects; + ncmds.* += 1; } +} + +fn writeLinkeditSegmentData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + seg.filesize = 0; + seg.vmsize = 0; + + try self.writeDyldInfoData(ncmds, lc_writer); + try self.writeFunctionStarts(ncmds, lc_writer); + try self.writeDataInCode(ncmds, lc_writer); + try self.writeSymtabs(ncmds, lc_writer); - self.section_ordinals.deinit(gpa); - self.section_ordinals = ordinals; + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); } -fn writeDyldInfoData(self: *MachO) !void { +fn writeDyldInfoData(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); @@ -5582,89 +5358,86 @@ fn writeDyldInfoData(self: *MachO) !void { var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); defer lazy_bind_pointers.deinit(); - { - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom: *Atom = entry.value_ptr.*; + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last_atom, sect_id| { + var atom = last_atom orelse continue; + const segment_index = slice.items(.segment_index)[sect_id]; + const header = slice.items(.header)[sect_id]; - if (self.text_segment_cmd_index) |seg| { - if (match.seg == seg) continue; // __TEXT is non-writable - } + if (mem.eql(u8, header.segName(), "__TEXT")) continue; // __TEXT is non-writable - const seg = self.getSegment(match); - const sect = self.getSection(match); - log.debug("dyld info for {s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("dyld info for {s},{s}", .{ header.segName(), header.sectName() }); - while (true) { - log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); - const sym = atom.getSymbol(self); - const base_offset = sym.n_value - seg.inner.vmaddr; + const seg = self.segments.items[segment_index]; - for (atom.rebases.items) |offset| { - log.debug(" | rebase at {x}", .{base_offset + offset}); - try rebase_pointers.append(.{ - .offset = base_offset + offset, - .segment_id = match.seg, - }); - } + while (true) { + log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); + const sym = atom.getSymbol(self); + const base_offset = sym.n_value - seg.vmaddr; - for (atom.bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); - } + for (atom.rebases.items) |offset| { + log.debug(" | rebase at {x}", .{base_offset + offset}); + try rebase_pointers.append(.{ + .offset = base_offset + offset, + .segment_id = segment_index, + }); + } - for (atom.lazy_bindings.items) |binding| { - const bind_sym = self.getSymbol(binding.target); - const bind_sym_name = self.getSymbolName(binding.target); - const dylib_ordinal = @divTrunc( - @bitCast(i16, bind_sym.n_desc), - macho.N_SYMBOL_RESOLVER, - ); - var flags: u4 = 0; - log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ - binding.offset + base_offset, - bind_sym_name, - dylib_ordinal, - }); - if (bind_sym.weakRef()) { - log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); - } - try lazy_bind_pointers.append(.{ - .offset = binding.offset + base_offset, - .segment_id = match.seg, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, - }); + for (atom.bindings.items) |binding| { + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } + try bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); + } - if (atom.prev) |prev| { - atom = prev; - } else break; + for (atom.lazy_bindings.items) |binding| { + const bind_sym = self.getSymbol(binding.target); + const bind_sym_name = self.getSymbolName(binding.target); + const dylib_ordinal = @divTrunc( + @bitCast(i16, bind_sym.n_desc), + macho.N_SYMBOL_RESOLVER, + ); + var flags: u4 = 0; + log.debug(" | lazy bind at {x} import('{s}') ord({d})", .{ + binding.offset + base_offset, + bind_sym_name, + dylib_ordinal, + }); + if (bind_sym.weakRef()) { + log.debug(" | marking as weak ref ", .{}); + flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); + } + try lazy_bind_pointers.append(.{ + .offset = binding.offset + base_offset, + .segment_id = segment_index, + .dylib_ordinal = dylib_ordinal, + .name = bind_sym_name, + .bind_flags = flags, + }); } + + if (atom.prev) |prev| { + atom = prev; + } else break; } } @@ -5675,8 +5448,8 @@ fn writeDyldInfoData(self: *MachO) !void { // TODO handle macho.EXPORT_SYMBOL_FLAGS_REEXPORT and macho.EXPORT_SYMBOL_FLAGS_STUB_AND_RESOLVER. log.debug("generating export trie", .{}); - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const base_address = text_segment.inner.vmaddr; + const text_segment = self.segments.items[self.text_segment_cmd_index.?]; + const base_address = text_segment.vmaddr; if (self.base.options.output_mode == .Exe) { for (&[_]SymbolWithLoc{ @@ -5714,48 +5487,27 @@ fn writeDyldInfoData(self: *MachO) !void { try trie.finalize(gpa); } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].dyld_info_only; - - const rebase_off = mem.alignForwardGeneric(u64, seg.inner.fileoff, @alignOf(u64)); + const link_seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const rebase_off = mem.alignForwardGeneric(u64, link_seg.fileoff, @alignOf(u64)); + assert(rebase_off == link_seg.fileoff); const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); - dyld_info.rebase_off = @intCast(u32, rebase_off); - dyld_info.rebase_size = @intCast(u32, rebase_size); - log.debug("writing rebase info from 0x{x} to 0x{x}", .{ - dyld_info.rebase_off, - dyld_info.rebase_off + dyld_info.rebase_size, - }); + log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size }); - const bind_off = mem.alignForwardGeneric(u64, dyld_info.rebase_off + dyld_info.rebase_size, @alignOf(u64)); + const bind_off = mem.alignForwardGeneric(u64, rebase_off + rebase_size, @alignOf(u64)); const bind_size = try bind.bindInfoSize(bind_pointers.items); - dyld_info.bind_off = @intCast(u32, bind_off); - dyld_info.bind_size = @intCast(u32, bind_size); - log.debug("writing bind info from 0x{x} to 0x{x}", .{ - dyld_info.bind_off, - dyld_info.bind_off + dyld_info.bind_size, - }); + log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size }); - const lazy_bind_off = mem.alignForwardGeneric(u64, dyld_info.bind_off + dyld_info.bind_size, @alignOf(u64)); + const lazy_bind_off = mem.alignForwardGeneric(u64, bind_off + bind_size, @alignOf(u64)); const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); - dyld_info.lazy_bind_off = @intCast(u32, lazy_bind_off); - dyld_info.lazy_bind_size = @intCast(u32, lazy_bind_size); - log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ - dyld_info.lazy_bind_off, - dyld_info.lazy_bind_off + dyld_info.lazy_bind_size, - }); + log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ lazy_bind_off, lazy_bind_off + lazy_bind_size }); - const export_off = mem.alignForwardGeneric(u64, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size, @alignOf(u64)); + const export_off = mem.alignForwardGeneric(u64, lazy_bind_off + lazy_bind_size, @alignOf(u64)); const export_size = trie.size; - dyld_info.export_off = @intCast(u32, export_off); - dyld_info.export_size = @intCast(u32, export_size); - log.debug("writing export trie from 0x{x} to 0x{x}", .{ - dyld_info.export_off, - dyld_info.export_off + dyld_info.export_size, - }); + log.debug("writing export trie from 0x{x} to 0x{x}", .{ export_off, export_off + export_size }); - seg.inner.filesize = dyld_info.export_off + dyld_info.export_size - seg.inner.fileoff; + const needed_size = export_off + export_size - rebase_off; + link_seg.filesize = needed_size; - const needed_size = dyld_info.export_off + dyld_info.export_size - dyld_info.rebase_off; var buffer = try gpa.alloc(u8, needed_size); defer gpa.free(buffer); mem.set(u8, buffer, 0); @@ -5763,54 +5515,61 @@ fn writeDyldInfoData(self: *MachO) !void { var stream = std.io.fixedBufferStream(buffer); const writer = stream.writer(); - const base_off = dyld_info.rebase_off; try bind.writeRebaseInfo(rebase_pointers.items, writer); - try stream.seekTo(dyld_info.bind_off - base_off); + try stream.seekTo(bind_off - rebase_off); try bind.writeBindInfo(bind_pointers.items, writer); - try stream.seekTo(dyld_info.lazy_bind_off - base_off); + try stream.seekTo(lazy_bind_off - rebase_off); try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); - try stream.seekTo(dyld_info.export_off - base_off); + try stream.seekTo(export_off - rebase_off); _ = try trie.write(writer); log.debug("writing dyld info from 0x{x} to 0x{x}", .{ - dyld_info.rebase_off, - dyld_info.rebase_off + needed_size, + rebase_off, + rebase_off + needed_size, }); - try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); - try self.populateLazyBindOffsetsInStubHelper( - buffer[dyld_info.lazy_bind_off - base_off ..][0..dyld_info.lazy_bind_size], - ); - - self.load_commands_dirty = true; + try self.base.file.?.pwriteAll(buffer, rebase_off); + try self.populateLazyBindOffsetsInStubHelper(buffer[lazy_bind_off - rebase_off ..][0..lazy_bind_size]); + + try lc_writer.writeStruct(macho.dyld_info_command{ + .cmd = .DYLD_INFO_ONLY, + .cmdsize = @sizeOf(macho.dyld_info_command), + .rebase_off = @intCast(u32, rebase_off), + .rebase_size = @intCast(u32, rebase_size), + .bind_off = @intCast(u32, bind_off), + .bind_size = @intCast(u32, bind_size), + .weak_bind_off = 0, + .weak_bind_size = 0, + .lazy_bind_off = @intCast(u32, lazy_bind_off), + .lazy_bind_size = @intCast(u32, lazy_bind_size), + .export_off = @intCast(u32, export_off), + .export_size = @intCast(u32, export_size), + }); + ncmds.* += 1; } fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { const gpa = self.base.allocator; - const text_segment_cmd_index = self.text_segment_cmd_index orelse return; + const stub_helper_section_index = self.stub_helper_section_index orelse return; - const last_atom = self.atoms.get(.{ - .seg = text_segment_cmd_index, - .sect = stub_helper_section_index, - }) orelse return; if (self.stub_helper_preamble_atom == null) return; - if (last_atom == self.stub_helper_preamble_atom.?) return; + + const section = self.sections.get(stub_helper_section_index); + const last_atom = section.last_atom orelse return; + if (last_atom == self.stub_helper_preamble_atom.?) return; // TODO is this a redundant check? var table = std.AutoHashMap(i64, *Atom).init(gpa); defer table.deinit(); { var stub_atom = last_atom; - var laptr_atom = self.atoms.get(.{ - .seg = self.data_segment_cmd_index.?, - .sect = self.la_symbol_ptr_section_index.?, - }).?; + var laptr_atom = self.sections.items(.last_atom)[self.la_symbol_ptr_section_index.?].?; const base_addr = blk: { - const seg = self.load_commands.items[self.data_segment_cmd_index.?].segment; - break :blk seg.inner.vmaddr; + const seg = self.segments.items[self.data_segment_cmd_index.?]; + break :blk seg.vmaddr; }; while (true) { @@ -5871,10 +5630,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { } } - const sect = self.getSection(.{ - .seg = text_segment_cmd_index, - .sect = stub_helper_section_index, - }); + const header = self.sections.items(.header)[stub_helper_section_index]; const stub_offset: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 1, .aarch64 => 2 * @sizeOf(u32), @@ -5886,7 +5642,7 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { while (offsets.popOrNull()) |bind_offset| { const atom = table.get(bind_offset.sym_offset).?; const sym = atom.getSymbol(self); - const file_offset = sect.offset + sym.n_value - sect.addr + stub_offset; + const file_offset = header.offset + sym.n_value - header.addr + stub_offset; mem.writeIntLittle(u32, &buf, bind_offset.offset); log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ bind_offset.offset, @@ -5899,14 +5655,14 @@ fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { const asc_u64 = std.sort.asc(u64); -fn writeFunctionStarts(self: *MachO) !void { - const text_seg_index = self.text_segment_cmd_index orelse return; - const text_sect_index = self.text_section_index orelse return; - const text_seg = self.load_commands.items[text_seg_index].segment; - +fn writeFunctionStarts(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); + const text_seg_index = self.text_segment_cmd_index orelse return; + const text_sect_index = self.text_section_index orelse return; + const text_seg = self.segments.items[text_seg_index]; + const gpa = self.base.allocator; // We need to sort by address first @@ -5918,8 +5674,8 @@ fn writeFunctionStarts(self: *MachO) !void { const sym = self.getSymbol(global); if (sym.undf()) continue; if (sym.n_desc == N_DESC_GCED) continue; - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - if (match.seg != text_seg_index or match.sect != text_sect_index) continue; + const sect_id = sym.n_sect - 1; + if (sect_id != text_sect_index) continue; addresses.appendAssumeCapacity(sym.n_value); } @@ -5932,7 +5688,7 @@ fn writeFunctionStarts(self: *MachO) !void { var last_off: u32 = 0; for (addresses.items) |addr| { - const offset = @intCast(u32, addr - text_seg.inner.vmaddr); + const offset = @intCast(u32, addr - text_seg.vmaddr); const diff = offset - last_off; if (diff == 0) continue; @@ -5951,22 +5707,22 @@ fn writeFunctionStarts(self: *MachO) !void { try std.leb.writeULEB128(buffer.writer(), offset); } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const fn_cmd = &self.load_commands.items[self.function_starts_cmd_index.?].linkedit_data; + const link_seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, link_seg.fileoff + link_seg.filesize, @alignOf(u64)); + const needed_size = buffer.items.len; + link_seg.filesize = offset + needed_size - link_seg.fileoff; - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const datasize = buffer.items.len; - fn_cmd.dataoff = @intCast(u32, dataoff); - fn_cmd.datasize = @intCast(u32, datasize); - seg.inner.filesize = fn_cmd.dataoff + fn_cmd.datasize - seg.inner.fileoff; + log.debug("writing function starts info from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - log.debug("writing function starts info from 0x{x} to 0x{x}", .{ - fn_cmd.dataoff, - fn_cmd.dataoff + fn_cmd.datasize, - }); + try self.base.file.?.pwriteAll(buffer.items, offset); - try self.base.file.?.pwriteAll(buffer.items, fn_cmd.dataoff); - self.load_commands_dirty = true; + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .FUNCTION_STARTS, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; } fn filterDataInCode( @@ -5988,17 +5744,15 @@ fn filterDataInCode( return dices[start..end]; } -fn writeDataInCode(self: *MachO) !void { +fn writeDataInCode(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); var out_dice = std.ArrayList(macho.data_in_code_entry).init(self.base.allocator); defer out_dice.deinit(); - const text_sect = self.getSection(.{ - .seg = self.text_segment_cmd_index orelse return, - .sect = self.text_section_index orelse return, - }); + const text_sect_id = self.text_section_index orelse return; + const text_sect_header = self.sections.items(.header)[text_sect_id]; for (self.objects.items) |object| { const dice = object.parseDataInCode() orelse continue; @@ -6008,15 +5762,15 @@ fn writeDataInCode(self: *MachO) !void { const sym = atom.getSymbol(self); if (sym.n_desc == N_DESC_GCED) continue; - const match = self.getMatchingSectionFromOrdinal(sym.n_sect); - if (match.seg != self.text_segment_cmd_index.? and match.sect != self.text_section_index.?) { + const sect_id = sym.n_sect - 1; + if (sect_id != self.text_section_index.?) { continue; } const source_sym = object.getSourceSymbol(atom.sym_index) orelse continue; const source_addr = math.cast(u32, source_sym.n_value) orelse return error.Overflow; const filtered_dice = filterDataInCode(dice, source_addr, source_addr + atom.size); - const base = math.cast(u32, sym.n_value - text_sect.addr + text_sect.offset) orelse + const base = math.cast(u32, sym.n_value - text_sect_header.addr + text_sect_header.offset) orelse return error.Overflow; for (filtered_dice) |single| { @@ -6030,33 +5784,63 @@ fn writeDataInCode(self: *MachO) !void { } } - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const dice_cmd = &self.load_commands.items[self.data_in_code_cmd_index.?].linkedit_data; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = out_dice.items.len * @sizeOf(macho.data_in_code_entry); + seg.filesize = offset + needed_size - seg.fileoff; - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - const datasize = out_dice.items.len * @sizeOf(macho.data_in_code_entry); - dice_cmd.dataoff = @intCast(u32, dataoff); - dice_cmd.datasize = @intCast(u32, datasize); - seg.inner.filesize = dice_cmd.dataoff + dice_cmd.datasize - seg.inner.fileoff; + log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); - log.debug("writing data-in-code from 0x{x} to 0x{x}", .{ - dice_cmd.dataoff, - dice_cmd.dataoff + dice_cmd.datasize, + try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), offset); + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .DATA_IN_CODE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), }); - - try self.base.file.?.pwriteAll(mem.sliceAsBytes(out_dice.items), dice_cmd.dataoff); - self.load_commands_dirty = true; + ncmds.* += 1; } -fn writeSymtab(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); +fn writeSymtabs(self: *MachO, ncmds: *u32, lc_writer: anytype) !void { + var symtab_cmd = macho.symtab_command{ + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }; + var dysymtab_cmd = macho.dysymtab_command{ + .cmdsize = @sizeOf(macho.dysymtab_command), + .ilocalsym = 0, + .nlocalsym = 0, + .iextdefsym = 0, + .nextdefsym = 0, + .iundefsym = 0, + .nundefsym = 0, + .tocoff = 0, + .ntoc = 0, + .modtaboff = 0, + .nmodtab = 0, + .extrefsymoff = 0, + .nextrefsyms = 0, + .indirectsymoff = 0, + .nindirectsyms = 0, + .extreloff = 0, + .nextrel = 0, + .locreloff = 0, + .nlocrel = 0, + }; + var ctx = try self.writeSymtab(&symtab_cmd); + defer ctx.imports_table.deinit(); + try self.writeDysymtab(ctx, &dysymtab_cmd); + try self.writeStrtab(&symtab_cmd); + try lc_writer.writeStruct(symtab_cmd); + try lc_writer.writeStruct(dysymtab_cmd); + ncmds.* += 2; +} +fn writeSymtab(self: *MachO, lc: *macho.symtab_command) !SymtabCtx { const gpa = self.base.allocator; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const symoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(macho.nlist_64)); - symtab.symoff = @intCast(u32, symoff); var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); @@ -6101,8 +5885,8 @@ fn writeSymtab(self: *MachO) !void { var imports = std.ArrayList(macho.nlist_64).init(gpa); defer imports.deinit(); + var imports_table = std.AutoHashMap(SymbolWithLoc, u32).init(gpa); - defer imports_table.deinit(); for (self.globals.values()) |global| { const sym = self.getSymbol(global); @@ -6115,56 +5899,84 @@ fn writeSymtab(self: *MachO) !void { try imports_table.putNoClobber(global, new_index); } - const nlocals = locals.items.len; - const nexports = exports.items.len; - const nimports = imports.items.len; - symtab.nsyms = @intCast(u32, nlocals + nexports + nimports); + const nlocals = @intCast(u32, locals.items.len); + const nexports = @intCast(u32, exports.items.len); + const nimports = @intCast(u32, imports.items.len); + const nsyms = nlocals + nexports + nimports; + + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric( + u64, + seg.fileoff + seg.filesize, + @alignOf(macho.nlist_64), + ); + const needed_size = nsyms * @sizeOf(macho.nlist_64); + seg.filesize = offset + needed_size - seg.fileoff; var buffer = std.ArrayList(u8).init(gpa); defer buffer.deinit(); - try buffer.ensureTotalCapacityPrecise(symtab.nsyms * @sizeOf(macho.nlist_64)); + try buffer.ensureTotalCapacityPrecise(needed_size); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(locals.items)); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(exports.items)); buffer.appendSliceAssumeCapacity(mem.sliceAsBytes(imports.items)); - log.debug("writing symtab from 0x{x} to 0x{x}", .{ symtab.symoff, symtab.symoff + buffer.items.len }); - try self.base.file.?.pwriteAll(buffer.items, symtab.symoff); + log.debug("writing symtab from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + try self.base.file.?.pwriteAll(buffer.items, offset); + + lc.symoff = @intCast(u32, offset); + lc.nsyms = nsyms; - seg.inner.filesize = symtab.symoff + buffer.items.len - seg.inner.fileoff; + return SymtabCtx{ + .nlocalsym = nlocals, + .nextdefsym = nexports, + .nundefsym = nimports, + .imports_table = imports_table, + }; +} - // Update dynamic symbol table. - const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].dysymtab; - dysymtab.nlocalsym = @intCast(u32, nlocals); - dysymtab.iextdefsym = dysymtab.nlocalsym; - dysymtab.nextdefsym = @intCast(u32, nexports); - dysymtab.iundefsym = dysymtab.nlocalsym + dysymtab.nextdefsym; - dysymtab.nundefsym = @intCast(u32, nimports); +fn writeStrtab(self: *MachO, lc: *macho.symtab_command) !void { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = self.strtab.buffer.items.len; + seg.filesize = offset + needed_size - seg.fileoff; + + log.debug("writing string table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); + + try self.base.file.?.pwriteAll(self.strtab.buffer.items, offset); + + lc.stroff = @intCast(u32, offset); + lc.strsize = @intCast(u32, needed_size); +} +const SymtabCtx = struct { + nlocalsym: u32, + nextdefsym: u32, + nundefsym: u32, + imports_table: std.AutoHashMap(SymbolWithLoc, u32), +}; + +fn writeDysymtab(self: *MachO, ctx: SymtabCtx, lc: *macho.dysymtab_command) !void { + const gpa = self.base.allocator; const nstubs = @intCast(u32, self.stubs_table.count()); const ngot_entries = @intCast(u32, self.got_entries_table.count()); + const nindirectsyms = nstubs * 2 + ngot_entries; + const iextdefsym = ctx.nlocalsym; + const iundefsym = iextdefsym + ctx.nextdefsym; - const indirectsymoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - dysymtab.indirectsymoff = @intCast(u32, indirectsymoff); - dysymtab.nindirectsyms = nstubs * 2 + ngot_entries; - - seg.inner.filesize = dysymtab.indirectsymoff + dysymtab.nindirectsyms * @sizeOf(u32) - seg.inner.fileoff; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, @alignOf(u64)); + const needed_size = nindirectsyms * @sizeOf(u32); + seg.filesize = offset + needed_size - seg.fileoff; - log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ - dysymtab.indirectsymoff, - dysymtab.indirectsymoff + dysymtab.nindirectsyms * @sizeOf(u32), - }); + log.debug("writing indirect symbol table from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); var buf = std.ArrayList(u8).init(gpa); defer buf.deinit(); - try buf.ensureTotalCapacity(dysymtab.nindirectsyms * @sizeOf(u32)); + try buf.ensureTotalCapacity(needed_size); const writer = buf.writer(); - if (self.text_segment_cmd_index) |text_segment_cmd_index| blk: { - const stubs_section_index = self.stubs_section_index orelse break :blk; - const stubs = self.getSectionPtr(.{ - .seg = text_segment_cmd_index, - .sect = stubs_section_index, - }); + if (self.stubs_section_index) |sect_id| { + const stubs = &self.sections.items(.header)[sect_id]; stubs.reserved1 = 0; for (self.stubs.items) |entry| { if (entry.sym_index == 0) continue; @@ -6172,16 +5984,12 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); assert(target_sym.undf()); - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } } - if (self.data_const_segment_cmd_index) |data_const_segment_cmd_index| blk: { - const got_section_index = self.got_section_index orelse break :blk; - const got = self.getSectionPtr(.{ - .seg = data_const_segment_cmd_index, - .sect = got_section_index, - }); + if (self.got_section_index) |sect_id| { + const got = &self.sections.items(.header)[sect_id]; got.reserved1 = nstubs; for (self.got_entries.items) |entry| { if (entry.sym_index == 0) continue; @@ -6189,19 +5997,15 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); if (target_sym.undf()) { - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } else { try writer.writeIntLittle(u32, macho.INDIRECT_SYMBOL_LOCAL); } } } - if (self.data_segment_cmd_index) |data_segment_cmd_index| blk: { - const la_symbol_ptr_section_index = self.la_symbol_ptr_section_index orelse break :blk; - const la_symbol_ptr = self.getSectionPtr(.{ - .seg = data_segment_cmd_index, - .sect = la_symbol_ptr_section_index, - }); + if (self.la_symbol_ptr_section_index) |sect_id| { + const la_symbol_ptr = &self.sections.items(.header)[sect_id]; la_symbol_ptr.reserved1 = nstubs + ngot_entries; for (self.stubs.items) |entry| { if (entry.sym_index == 0) continue; @@ -6209,131 +6013,76 @@ fn writeSymtab(self: *MachO) !void { if (atom_sym.n_desc == N_DESC_GCED) continue; const target_sym = self.getSymbol(entry.target); assert(target_sym.undf()); - try writer.writeIntLittle(u32, dysymtab.iundefsym + imports_table.get(entry.target).?); + try writer.writeIntLittle(u32, iundefsym + ctx.imports_table.get(entry.target).?); } } - assert(buf.items.len == dysymtab.nindirectsyms * @sizeOf(u32)); + assert(buf.items.len == needed_size); + try self.base.file.?.pwriteAll(buf.items, offset); - try self.base.file.?.pwriteAll(buf.items, dysymtab.indirectsymoff); - self.load_commands_dirty = true; + lc.nlocalsym = ctx.nlocalsym; + lc.iextdefsym = iextdefsym; + lc.nextdefsym = ctx.nextdefsym; + lc.iundefsym = iundefsym; + lc.nundefsym = ctx.nundefsym; + lc.indirectsymoff = @intCast(u32, offset); + lc.nindirectsyms = nindirectsyms; } -fn writeStrtab(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const stroff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, @alignOf(u64)); - - const strsize = self.strtab.buffer.items.len; - symtab.stroff = @intCast(u32, stroff); - symtab.strsize = @intCast(u32, strsize); - seg.inner.filesize = symtab.stroff + symtab.strsize - seg.inner.fileoff; - - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.base.file.?.pwriteAll(self.strtab.buffer.items, symtab.stroff); - - self.load_commands_dirty = true; -} - -fn writeLinkeditSegment(self: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - seg.inner.filesize = 0; - - try self.writeDyldInfoData(); - try self.writeFunctionStarts(); - try self.writeDataInCode(); - try self.writeSymtab(); - try self.writeStrtab(); - - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); -} - -fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { - const tracy = trace(@src()); - defer tracy.end(); - - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const cs_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].linkedit_data; +fn writeCodeSignaturePadding( + self: *MachO, + code_sig: *CodeSignature, + ncmds: *u32, + lc_writer: anytype, +) !u32 { + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file // https://github.com/opensource-apple/cctools/blob/fdb4825f303fd5c0751be524babd32958181b3ed/libstuff/checkout.c#L271 - const dataoff = mem.alignForwardGeneric(u64, seg.inner.fileoff + seg.inner.filesize, 16); - const datasize = code_sig.estimateSize(dataoff); - cs_cmd.dataoff = @intCast(u32, dataoff); - cs_cmd.datasize = @intCast(u32, code_sig.estimateSize(dataoff)); - - // Advance size of __LINKEDIT segment - seg.inner.filesize = cs_cmd.dataoff + cs_cmd.datasize - seg.inner.fileoff; - seg.inner.vmsize = mem.alignForwardGeneric(u64, seg.inner.filesize, self.page_size); - log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ dataoff, dataoff + datasize }); + const offset = mem.alignForwardGeneric(u64, seg.fileoff + seg.filesize, 16); + const needed_size = code_sig.estimateSize(offset); + seg.filesize = offset + needed_size - seg.fileoff; + seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); + log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ offset, offset + needed_size }); // Pad out the space. We need to do this to calculate valid hashes for everything in the file // except for code signature data. - try self.base.file.?.pwriteAll(&[_]u8{0}, dataoff + datasize - 1); - self.load_commands_dirty = true; -} + try self.base.file.?.pwriteAll(&[_]u8{0}, offset + needed_size - 1); -fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature) !void { - const tracy = trace(@src()); - defer tracy.end(); + try lc_writer.writeStruct(macho.linkedit_data_command{ + .cmd = .CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = @intCast(u32, offset), + .datasize = @intCast(u32, needed_size), + }); + ncmds.* += 1; + + return @intCast(u32, offset); +} - const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].linkedit_data; - const seg = self.load_commands.items[self.text_segment_cmd_index.?].segment; +fn writeCodeSignature(self: *MachO, code_sig: *CodeSignature, offset: u32) !void { + const seg = self.segments.items[self.text_segment_cmd_index.?]; var buffer = std.ArrayList(u8).init(self.base.allocator); defer buffer.deinit(); try buffer.ensureTotalCapacityPrecise(code_sig.size()); try code_sig.writeAdhocSignature(self.base.allocator, .{ .file = self.base.file.?, - .exec_seg_base = seg.inner.fileoff, - .exec_seg_limit = seg.inner.filesize, - .code_sig_cmd = code_sig_cmd, + .exec_seg_base = seg.fileoff, + .exec_seg_limit = seg.filesize, + .file_size = offset, .output_mode = self.base.options.output_mode, }, buffer.writer()); assert(buffer.items.len == code_sig.size()); log.debug("writing code signature from 0x{x} to 0x{x}", .{ - code_sig_cmd.dataoff, - code_sig_cmd.dataoff + buffer.items.len, + offset, + offset + buffer.items.len, }); - try self.base.file.?.pwriteAll(buffer.items, code_sig_cmd.dataoff); -} - -/// Writes all load commands and section headers. -fn writeLoadCommands(self: *MachO) !void { - if (!self.load_commands_dirty) return; - - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - sizeofcmds += lc.cmdsize(); - } - - var buffer = try self.base.allocator.alloc(u8, sizeofcmds); - defer self.base.allocator.free(buffer); - var fib = std.io.fixedBufferStream(buffer); - const writer = fib.writer(); - for (self.load_commands.items) |lc| { - if (lc.cmd() == .NONE) continue; - try lc.write(writer); - } - - const off = @sizeOf(macho.mach_header_64); - - log.debug("writing load commands from 0x{x} to 0x{x}", .{ off, off + sizeofcmds }); - - try self.base.file.?.pwriteAll(buffer, off); - self.load_commands_dirty = false; + try self.base.file.?.pwriteAll(buffer.items, offset); } /// Writes Mach-O file header. -fn writeHeader(self: *MachO) !void { +fn writeHeader(self: *MachO, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.flags = macho.MH_NOUNDEFS | macho.MH_DYLDLINK | macho.MH_PIE | macho.MH_TWOLEVEL; @@ -6365,14 +6114,8 @@ fn writeHeader(self: *MachO) !void { header.flags |= macho.MH_HAS_TLV_DESCRIPTORS; } - header.ncmds = 0; - header.sizeofcmds = 0; - - for (self.load_commands.items) |cmd| { - if (cmd.cmd() == .NONE) continue; - header.sizeofcmds += cmd.cmdsize(); - header.ncmds += 1; - } + header.ncmds = ncmds; + header.sizeofcmds = sizeofcmds; log.debug("writing Mach-O header {}", .{header}); @@ -6392,33 +6135,13 @@ pub fn makeStaticString(bytes: []const u8) [16]u8 { return buf; } -pub fn getSectionOrdinal(self: *MachO, match: MatchingSection) u8 { - return @intCast(u8, self.section_ordinals.getIndex(match).?) + 1; -} - -pub fn getMatchingSectionFromOrdinal(self: *MachO, ord: u8) MatchingSection { - const index = ord - 1; - assert(index < self.section_ordinals.count()); - return self.section_ordinals.keys()[index]; -} - -pub fn getSegmentPtr(self: *MachO, match: MatchingSection) *macho.SegmentCommand { - assert(match.seg < self.load_commands.items.len); - return &self.load_commands.items[match.seg].segment; -} - -pub fn getSegment(self: *MachO, match: MatchingSection) macho.SegmentCommand { - return self.getSegmentPtr(match).*; -} - -pub fn getSectionPtr(self: *MachO, match: MatchingSection) *macho.section_64 { - const seg = self.getSegmentPtr(match); - assert(match.sect < seg.sections.items.len); - return &seg.sections.items[match.sect]; -} - -pub fn getSection(self: *MachO, match: MatchingSection) macho.section_64 { - return self.getSectionPtr(match).*; +fn getSectionIndexes(self: MachO, segment_index: u8) struct { start: u8, end: u8 } { + var start: u8 = 0; + const nsects = for (self.segments.items) |seg, i| { + if (i == segment_index) break @intCast(u8, seg.nsects); + start += @intCast(u8, seg.nsects); + } else 0; + return .{ .start = start, .end = start + nsects }; } pub fn symbolIsTemp(self: *MachO, sym_with_loc: SymbolWithLoc) bool { @@ -6512,72 +6235,6 @@ pub fn findFirst(comptime T: type, haystack: []const T, start: usize, predicate: return i; } -const DebugInfo = struct { - inner: dwarf.DwarfInfo, - debug_info: []const u8, - debug_abbrev: []const u8, - debug_str: []const u8, - debug_line: []const u8, - debug_line_str: []const u8, - debug_ranges: []const u8, - - pub fn parse(allocator: Allocator, object: Object) !?DebugInfo { - var debug_info = blk: { - const index = object.dwarf_debug_info_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_abbrev = blk: { - const index = object.dwarf_debug_abbrev_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_str = blk: { - const index = object.dwarf_debug_str_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line = blk: { - const index = object.dwarf_debug_line_index orelse return null; - break :blk try object.getSectionContents(index); - }; - var debug_line_str = blk: { - if (object.dwarf_debug_line_str_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - var debug_ranges = blk: { - if (object.dwarf_debug_ranges_index) |ind| { - break :blk try object.getSectionContents(ind); - } - break :blk &[0]u8{}; - }; - - var inner: dwarf.DwarfInfo = .{ - .endian = .Little, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - try dwarf.openDwarfDebugInfo(&inner, allocator); - - return DebugInfo{ - .inner = inner, - .debug_info = debug_info, - .debug_abbrev = debug_abbrev, - .debug_str = debug_str, - .debug_line = debug_line, - .debug_line_str = debug_line_str, - .debug_ranges = debug_ranges, - }; - } - - pub fn deinit(self: *DebugInfo, allocator: Allocator) void { - self.inner.deinit(allocator); - } -}; - pub fn generateSymbolStabs( self: *MachO, object: Object, @@ -6585,14 +6242,15 @@ pub fn generateSymbolStabs( ) !void { assert(!self.base.options.strip); - const gpa = self.base.allocator; - log.debug("parsing debug info in '{s}'", .{object.name}); - var debug_info = (try DebugInfo.parse(gpa, object)) orelse return; + const gpa = self.base.allocator; + var debug_info = try object.parseDwarfInfo(); + defer debug_info.deinit(gpa); + try dwarf.openDwarfDebugInfo(&debug_info, gpa); // We assume there is only one CU. - const compile_unit = debug_info.inner.findCompileUnit(0x0) catch |err| switch (err) { + const compile_unit = debug_info.findCompileUnit(0x0) catch |err| switch (err) { error.MissingDebugInfo => { // TODO audit cases with missing debug info and audit our dwarf.zig module. log.debug("invalid or missing debug info in {s}; skipping", .{object.name}); @@ -6600,8 +6258,8 @@ pub fn generateSymbolStabs( }, else => |e| return e, }; - const tu_name = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.name); - const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info.inner, dwarf.AT.comp_dir); + const tu_name = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.name); + const tu_comp_dir = try compile_unit.die.getAttrString(&debug_info, dwarf.AT.comp_dir); // Open scope try locals.ensureUnusedCapacity(3); @@ -6664,7 +6322,7 @@ pub fn generateSymbolStabs( fn generateSymbolStabsForSymbol( self: *MachO, sym_loc: SymbolWithLoc, - debug_info: DebugInfo, + debug_info: dwarf.DwarfInfo, buf: *[4]macho.nlist_64, ) ![]const macho.nlist_64 { const gpa = self.base.allocator; @@ -6679,7 +6337,7 @@ fn generateSymbolStabsForSymbol( const source_sym = object.getSourceSymbol(sym_loc.sym_index) orelse return buf[0..0]; const size: ?u64 = size: { if (source_sym.tentative()) break :size null; - for (debug_info.inner.func_list.items) |func| { + for (debug_info.func_list.items) |func| { if (func.pc_range) |range| { if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { break :size range.end - range.start; @@ -6731,260 +6389,260 @@ fn generateSymbolStabsForSymbol( } } -fn snapshotState(self: *MachO) !void { - const emit = self.base.options.emit orelse { - log.debug("no emit directory found; skipping snapshot...", .{}); - return; - }; - - const Snapshot = struct { - const Node = struct { - const Tag = enum { - section_start, - section_end, - atom_start, - atom_end, - relocation, - - pub fn jsonStringify( - tag: Tag, - options: std.json.StringifyOptions, - out_stream: anytype, - ) !void { - _ = options; - switch (tag) { - .section_start => try out_stream.writeAll("\"section_start\""), - .section_end => try out_stream.writeAll("\"section_end\""), - .atom_start => try out_stream.writeAll("\"atom_start\""), - .atom_end => try out_stream.writeAll("\"atom_end\""), - .relocation => try out_stream.writeAll("\"relocation\""), - } - } - }; - const Payload = struct { - name: []const u8 = "", - aliases: [][]const u8 = &[0][]const u8{}, - is_global: bool = false, - target: u64 = 0, - }; - address: u64, - tag: Tag, - payload: Payload, - }; - timestamp: i128, - nodes: []Node, - }; - - var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); - defer arena_allocator.deinit(); - const arena = arena_allocator.allocator(); - - const out_file = try emit.directory.handle.createFile("snapshots.json", .{ - .truncate = false, - .read = true, - }); - defer out_file.close(); - - if (out_file.seekFromEnd(-1)) { - try out_file.writer().writeByte(','); - } else |err| switch (err) { - error.Unseekable => try out_file.writer().writeByte('['), - else => |e| return e, - } - const writer = out_file.writer(); - - var snapshot = Snapshot{ - .timestamp = std.time.nanoTimestamp(), - .nodes = undefined, - }; - var nodes = std.ArrayList(Snapshot.Node).init(arena); - - for (self.section_ordinals.keys()) |key| { - const sect = self.getSection(key); - const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ sect.segName(), sect.sectName() }); - try nodes.append(.{ - .address = sect.addr, - .tag = .section_start, - .payload = .{ .name = sect_name }, - }); - - const is_tlv = sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; - - var atom: *Atom = self.atoms.get(key) orelse { - try nodes.append(.{ - .address = sect.addr + sect.size, - .tag = .section_end, - .payload = .{}, - }); - continue; - }; - - while (atom.prev) |prev| { - atom = prev; - } - - while (true) { - const atom_sym = atom.getSymbol(self); - var node = Snapshot.Node{ - .address = atom_sym.n_value, - .tag = .atom_start, - .payload = .{ - .name = atom.getName(self), - .is_global = self.globals.contains(atom.getName(self)), - }, - }; - - var aliases = std.ArrayList([]const u8).init(arena); - for (atom.contained.items) |sym_off| { - if (sym_off.offset == 0) { - try aliases.append(self.getSymbolName(.{ - .sym_index = sym_off.sym_index, - .file = atom.file, - })); - } - } - node.payload.aliases = aliases.toOwnedSlice(); - try nodes.append(node); - - var relocs = try std.ArrayList(Snapshot.Node).initCapacity(arena, atom.relocs.items.len); - for (atom.relocs.items) |rel| { - const source_addr = blk: { - const source_sym = atom.getSymbol(self); - break :blk source_sym.n_value + rel.offset; - }; - const target_addr = blk: { - const target_atom = rel.getTargetAtom(self) orelse { - // If there is no atom for target, we still need to check for special, atom-less - // symbols such as `___dso_handle`. - const target_name = self.getSymbolName(rel.target); - if (self.globals.contains(target_name)) { - const atomless_sym = self.getSymbol(rel.target); - break :blk atomless_sym.n_value; - } - break :blk 0; - }; - const target_sym = if (target_atom.isSymbolContained(rel.target, self)) - self.getSymbol(rel.target) - else - target_atom.getSymbol(self); - const base_address: u64 = if (is_tlv) base_address: { - const sect_id: u16 = sect_id: { - if (self.tlv_data_section_index) |i| { - break :sect_id i; - } else if (self.tlv_bss_section_index) |i| { - break :sect_id i; - } else unreachable; - }; - break :base_address self.getSection(.{ - .seg = self.data_segment_cmd_index.?, - .sect = sect_id, - }).addr; - } else 0; - break :blk target_sym.n_value - base_address; - }; - - relocs.appendAssumeCapacity(.{ - .address = source_addr, - .tag = .relocation, - .payload = .{ .target = target_addr }, - }); - } - - if (atom.contained.items.len == 0) { - try nodes.appendSlice(relocs.items); - } else { - // Need to reverse iteration order of relocs since by default for relocatable sources - // they come in reverse. For linking, this doesn't matter in any way, however, for - // arranging the memoryline for displaying it does. - std.mem.reverse(Snapshot.Node, relocs.items); - - var next_i: usize = 0; - var last_rel: usize = 0; - while (next_i < atom.contained.items.len) : (next_i += 1) { - const loc = SymbolWithLoc{ - .sym_index = atom.contained.items[next_i].sym_index, - .file = atom.file, - }; - const cont_sym = self.getSymbol(loc); - const cont_sym_name = self.getSymbolName(loc); - var contained_node = Snapshot.Node{ - .address = cont_sym.n_value, - .tag = .atom_start, - .payload = .{ - .name = cont_sym_name, - .is_global = self.globals.contains(cont_sym_name), - }, - }; - - // Accumulate aliases - var inner_aliases = std.ArrayList([]const u8).init(arena); - while (true) { - if (next_i + 1 >= atom.contained.items.len) break; - const next_sym_loc = SymbolWithLoc{ - .sym_index = atom.contained.items[next_i + 1].sym_index, - .file = atom.file, - }; - const next_sym = self.getSymbol(next_sym_loc); - if (next_sym.n_value != cont_sym.n_value) break; - const next_sym_name = self.getSymbolName(next_sym_loc); - if (self.globals.contains(next_sym_name)) { - try inner_aliases.append(contained_node.payload.name); - contained_node.payload.name = next_sym_name; - contained_node.payload.is_global = true; - } else try inner_aliases.append(next_sym_name); - next_i += 1; - } - - const cont_size = if (next_i + 1 < atom.contained.items.len) - self.getSymbol(.{ - .sym_index = atom.contained.items[next_i + 1].sym_index, - .file = atom.file, - }).n_value - cont_sym.n_value - else - atom_sym.n_value + atom.size - cont_sym.n_value; - - contained_node.payload.aliases = inner_aliases.toOwnedSlice(); - try nodes.append(contained_node); - - for (relocs.items[last_rel..]) |rel| { - if (rel.address >= cont_sym.n_value + cont_size) { - break; - } - try nodes.append(rel); - last_rel += 1; - } - - try nodes.append(.{ - .address = cont_sym.n_value + cont_size, - .tag = .atom_end, - .payload = .{}, - }); - } - } - - try nodes.append(.{ - .address = atom_sym.n_value + atom.size, - .tag = .atom_end, - .payload = .{}, - }); - - if (atom.next) |next| { - atom = next; - } else break; - } - - try nodes.append(.{ - .address = sect.addr + sect.size, - .tag = .section_end, - .payload = .{}, - }); - } - - snapshot.nodes = nodes.toOwnedSlice(); - - try std.json.stringify(snapshot, .{}, writer); - try writer.writeByte(']'); -} +// fn snapshotState(self: *MachO) !void { +// const emit = self.base.options.emit orelse { +// log.debug("no emit directory found; skipping snapshot...", .{}); +// return; +// }; + +// const Snapshot = struct { +// const Node = struct { +// const Tag = enum { +// section_start, +// section_end, +// atom_start, +// atom_end, +// relocation, + +// pub fn jsonStringify( +// tag: Tag, +// options: std.json.StringifyOptions, +// out_stream: anytype, +// ) !void { +// _ = options; +// switch (tag) { +// .section_start => try out_stream.writeAll("\"section_start\""), +// .section_end => try out_stream.writeAll("\"section_end\""), +// .atom_start => try out_stream.writeAll("\"atom_start\""), +// .atom_end => try out_stream.writeAll("\"atom_end\""), +// .relocation => try out_stream.writeAll("\"relocation\""), +// } +// } +// }; +// const Payload = struct { +// name: []const u8 = "", +// aliases: [][]const u8 = &[0][]const u8{}, +// is_global: bool = false, +// target: u64 = 0, +// }; +// address: u64, +// tag: Tag, +// payload: Payload, +// }; +// timestamp: i128, +// nodes: []Node, +// }; + +// var arena_allocator = std.heap.ArenaAllocator.init(self.base.allocator); +// defer arena_allocator.deinit(); +// const arena = arena_allocator.allocator(); + +// const out_file = try emit.directory.handle.createFile("snapshots.json", .{ +// .truncate = false, +// .read = true, +// }); +// defer out_file.close(); + +// if (out_file.seekFromEnd(-1)) { +// try out_file.writer().writeByte(','); +// } else |err| switch (err) { +// error.Unseekable => try out_file.writer().writeByte('['), +// else => |e| return e, +// } +// const writer = out_file.writer(); + +// var snapshot = Snapshot{ +// .timestamp = std.time.nanoTimestamp(), +// .nodes = undefined, +// }; +// var nodes = std.ArrayList(Snapshot.Node).init(arena); + +// for (self.section_ordinals.keys()) |key| { +// const sect = self.getSection(key); +// const sect_name = try std.fmt.allocPrint(arena, "{s},{s}", .{ sect.segName(), sect.sectName() }); +// try nodes.append(.{ +// .address = sect.addr, +// .tag = .section_start, +// .payload = .{ .name = sect_name }, +// }); + +// const is_tlv = sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; + +// var atom: *Atom = self.atoms.get(key) orelse { +// try nodes.append(.{ +// .address = sect.addr + sect.size, +// .tag = .section_end, +// .payload = .{}, +// }); +// continue; +// }; + +// while (atom.prev) |prev| { +// atom = prev; +// } + +// while (true) { +// const atom_sym = atom.getSymbol(self); +// var node = Snapshot.Node{ +// .address = atom_sym.n_value, +// .tag = .atom_start, +// .payload = .{ +// .name = atom.getName(self), +// .is_global = self.globals.contains(atom.getName(self)), +// }, +// }; + +// var aliases = std.ArrayList([]const u8).init(arena); +// for (atom.contained.items) |sym_off| { +// if (sym_off.offset == 0) { +// try aliases.append(self.getSymbolName(.{ +// .sym_index = sym_off.sym_index, +// .file = atom.file, +// })); +// } +// } +// node.payload.aliases = aliases.toOwnedSlice(); +// try nodes.append(node); + +// var relocs = try std.ArrayList(Snapshot.Node).initCapacity(arena, atom.relocs.items.len); +// for (atom.relocs.items) |rel| { +// const source_addr = blk: { +// const source_sym = atom.getSymbol(self); +// break :blk source_sym.n_value + rel.offset; +// }; +// const target_addr = blk: { +// const target_atom = rel.getTargetAtom(self) orelse { +// // If there is no atom for target, we still need to check for special, atom-less +// // symbols such as `___dso_handle`. +// const target_name = self.getSymbolName(rel.target); +// if (self.globals.contains(target_name)) { +// const atomless_sym = self.getSymbol(rel.target); +// break :blk atomless_sym.n_value; +// } +// break :blk 0; +// }; +// const target_sym = if (target_atom.isSymbolContained(rel.target, self)) +// self.getSymbol(rel.target) +// else +// target_atom.getSymbol(self); +// const base_address: u64 = if (is_tlv) base_address: { +// const sect_id: u16 = sect_id: { +// if (self.tlv_data_section_index) |i| { +// break :sect_id i; +// } else if (self.tlv_bss_section_index) |i| { +// break :sect_id i; +// } else unreachable; +// }; +// break :base_address self.getSection(.{ +// .seg = self.data_segment_cmd_index.?, +// .sect = sect_id, +// }).addr; +// } else 0; +// break :blk target_sym.n_value - base_address; +// }; + +// relocs.appendAssumeCapacity(.{ +// .address = source_addr, +// .tag = .relocation, +// .payload = .{ .target = target_addr }, +// }); +// } + +// if (atom.contained.items.len == 0) { +// try nodes.appendSlice(relocs.items); +// } else { +// // Need to reverse iteration order of relocs since by default for relocatable sources +// // they come in reverse. For linking, this doesn't matter in any way, however, for +// // arranging the memoryline for displaying it does. +// std.mem.reverse(Snapshot.Node, relocs.items); + +// var next_i: usize = 0; +// var last_rel: usize = 0; +// while (next_i < atom.contained.items.len) : (next_i += 1) { +// const loc = SymbolWithLoc{ +// .sym_index = atom.contained.items[next_i].sym_index, +// .file = atom.file, +// }; +// const cont_sym = self.getSymbol(loc); +// const cont_sym_name = self.getSymbolName(loc); +// var contained_node = Snapshot.Node{ +// .address = cont_sym.n_value, +// .tag = .atom_start, +// .payload = .{ +// .name = cont_sym_name, +// .is_global = self.globals.contains(cont_sym_name), +// }, +// }; + +// // Accumulate aliases +// var inner_aliases = std.ArrayList([]const u8).init(arena); +// while (true) { +// if (next_i + 1 >= atom.contained.items.len) break; +// const next_sym_loc = SymbolWithLoc{ +// .sym_index = atom.contained.items[next_i + 1].sym_index, +// .file = atom.file, +// }; +// const next_sym = self.getSymbol(next_sym_loc); +// if (next_sym.n_value != cont_sym.n_value) break; +// const next_sym_name = self.getSymbolName(next_sym_loc); +// if (self.globals.contains(next_sym_name)) { +// try inner_aliases.append(contained_node.payload.name); +// contained_node.payload.name = next_sym_name; +// contained_node.payload.is_global = true; +// } else try inner_aliases.append(next_sym_name); +// next_i += 1; +// } + +// const cont_size = if (next_i + 1 < atom.contained.items.len) +// self.getSymbol(.{ +// .sym_index = atom.contained.items[next_i + 1].sym_index, +// .file = atom.file, +// }).n_value - cont_sym.n_value +// else +// atom_sym.n_value + atom.size - cont_sym.n_value; + +// contained_node.payload.aliases = inner_aliases.toOwnedSlice(); +// try nodes.append(contained_node); + +// for (relocs.items[last_rel..]) |rel| { +// if (rel.address >= cont_sym.n_value + cont_size) { +// break; +// } +// try nodes.append(rel); +// last_rel += 1; +// } + +// try nodes.append(.{ +// .address = cont_sym.n_value + cont_size, +// .tag = .atom_end, +// .payload = .{}, +// }); +// } +// } + +// try nodes.append(.{ +// .address = atom_sym.n_value + atom.size, +// .tag = .atom_end, +// .payload = .{}, +// }); + +// if (atom.next) |next| { +// atom = next; +// } else break; +// } + +// try nodes.append(.{ +// .address = sect.addr + sect.size, +// .tag = .section_end, +// .payload = .{}, +// }); +// } + +// snapshot.nodes = nodes.toOwnedSlice(); + +// try std.json.stringify(snapshot, .{}, writer); +// try writer.writeByte(']'); +// } fn logSymAttributes(sym: macho.nlist_64, buf: *[9]u8) []const u8 { mem.set(u8, buf[0..4], '_'); @@ -7104,26 +6762,19 @@ fn logSymtab(self: *MachO) void { } } -fn logSectionOrdinals(self: *MachO) void { - for (self.section_ordinals.keys()) |match, i| { - const sect = self.getSection(match); - log.debug("sect({d}, '{s},{s}')", .{ i + 1, sect.segName(), sect.sectName() }); - } -} - fn logAtoms(self: *MachO) void { log.debug("atoms:", .{}); - var it = self.atoms.iterator(); - while (it.next()) |entry| { - const match = entry.key_ptr.*; - var atom = entry.value_ptr.*; + + const slice = self.sections.slice(); + for (slice.items(.last_atom)) |last, i| { + var atom = last orelse continue; + const header = slice.items(.header)[i]; while (atom.prev) |prev| { atom = prev; } - const sect = self.getSection(match); - log.debug("{s},{s}", .{ sect.segName(), sect.sectName() }); + log.debug("{s},{s}", .{ header.segName(), header.sectName() }); while (true) { self.logAtom(atom); diff --git a/src/link/MachO/Archive.zig b/src/link/MachO/Archive.zig index ee43e5b2a2..054f75fff3 100644 --- a/src/link/MachO/Archive.zig +++ b/src/link/MachO/Archive.zig @@ -6,19 +6,14 @@ const fs = std.fs; const log = std.log.scoped(.link); const macho = std.macho; const mem = std.mem; -const fat = @import("fat.zig"); const Allocator = mem.Allocator; const Object = @import("Object.zig"); file: fs.File, +fat_offset: u64, name: []const u8, - -header: ?ar_hdr = null, - -// The actual contents we care about linking with will be embedded at -// an offset within a file if we are linking against a fat lib -library_offset: u64 = 0, +header: ar_hdr = undefined, /// Parsed table of contents. /// Each symbol name points to a list of all definition @@ -103,11 +98,7 @@ pub fn deinit(self: *Archive, allocator: Allocator) void { allocator.free(self.name); } -pub fn parse(self: *Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { - const reader = self.file.reader(); - self.library_offset = try fat.getLibraryOffset(reader, cpu_arch); - try self.file.seekTo(self.library_offset); - +pub fn parse(self: *Archive, allocator: Allocator, reader: anytype) !void { const magic = try reader.readBytesNoEof(SARMAG); if (!mem.eql(u8, &magic, ARMAG)) { log.debug("invalid magic: expected '{s}', found '{s}'", .{ ARMAG, magic }); @@ -115,21 +106,23 @@ pub fn parse(self: *Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch } self.header = try reader.readStruct(ar_hdr); - if (!mem.eql(u8, &self.header.?.ar_fmag, ARFMAG)) { - log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ ARFMAG, self.header.?.ar_fmag }); + if (!mem.eql(u8, &self.header.ar_fmag, ARFMAG)) { + log.debug("invalid header delimiter: expected '{s}', found '{s}'", .{ + ARFMAG, + self.header.ar_fmag, + }); return error.NotArchive; } - var embedded_name = try parseName(allocator, self.header.?, reader); + const name_or_length = try self.header.nameOrLength(); + var embedded_name = try parseName(allocator, name_or_length, reader); log.debug("parsing archive '{s}' at '{s}'", .{ embedded_name, self.name }); defer allocator.free(embedded_name); try self.parseTableOfContents(allocator, reader); - try reader.context.seekTo(0); } -fn parseName(allocator: Allocator, header: ar_hdr, reader: anytype) ![]u8 { - const name_or_length = try header.nameOrLength(); +fn parseName(allocator: Allocator, name_or_length: ar_hdr.NameOrLength, reader: anytype) ![]u8 { var name: []u8 = undefined; switch (name_or_length) { .Name => |n| { @@ -187,9 +180,14 @@ fn parseTableOfContents(self: *Archive, allocator: Allocator, reader: anytype) ! } } -pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, offset: u32) !Object { +pub fn parseObject( + self: Archive, + allocator: Allocator, + cpu_arch: std.Target.Cpu.Arch, + offset: u32, +) !Object { const reader = self.file.reader(); - try reader.context.seekTo(offset + self.library_offset); + try reader.context.seekTo(self.fat_offset + offset); const object_header = try reader.readStruct(ar_hdr); @@ -198,7 +196,8 @@ pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu return error.MalformedArchive; } - const object_name = try parseName(allocator, object_header, reader); + const name_or_length = try object_header.nameOrLength(); + const object_name = try parseName(allocator, name_or_length, reader); defer allocator.free(object_name); log.debug("extracting object '{s}' from archive '{s}'", .{ object_name, self.name }); @@ -209,15 +208,24 @@ pub fn parseObject(self: Archive, allocator: Allocator, cpu_arch: std.Target.Cpu break :name try std.fmt.allocPrint(allocator, "{s}({s})", .{ path, object_name }); }; + const object_name_len = switch (name_or_length) { + .Name => 0, + .Length => |len| len, + }; + const object_size = (try object_header.size()) - object_name_len; + const contents = try allocator.allocWithOptions(u8, object_size, @alignOf(u64), null); + const amt = try reader.readAll(contents); + if (amt != object_size) { + return error.InputOutput; + } + var object = Object{ - .file = try fs.cwd().openFile(self.name, .{}), .name = name, - .file_offset = @intCast(u32, try reader.context.getPos()), - .mtime = try self.header.?.date(), + .mtime = try self.header.date(), + .contents = contents, }; try object.parse(allocator, cpu_arch); - try reader.context.seekTo(0); return object; } diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index ba00764127..90c86e24ed 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -246,7 +246,7 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after ARM64_RELOC_ADDEND", .{}); log.err(" expected ARM64_RELOC_PAGE21 or ARM64_RELOC_PAGEOFF12", .{}); - log.err(" found {}", .{next}); + log.err(" found {s}", .{@tagName(next)}); return error.UnexpectedRelocationType; }, } @@ -285,7 +285,9 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after ARM64_RELOC_ADDEND", .{}); log.err(" expected ARM64_RELOC_UNSIGNED", .{}); - log.err(" found {}", .{@intToEnum(macho.reloc_type_arm64, relocs[i + 1].r_type)}); + log.err(" found {s}", .{ + @tagName(@intToEnum(macho.reloc_type_arm64, relocs[i + 1].r_type)), + }); return error.UnexpectedRelocationType; }, }, @@ -294,7 +296,9 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: else => { log.err("unexpected relocation type after X86_64_RELOC_ADDEND", .{}); log.err(" expected X86_64_RELOC_UNSIGNED", .{}); - log.err(" found {}", .{@intToEnum(macho.reloc_type_x86_64, relocs[i + 1].r_type)}); + log.err(" found {s}", .{ + @tagName(@intToEnum(macho.reloc_type_x86_64, relocs[i + 1].r_type)), + }); return error.UnexpectedRelocationType; }, }, @@ -309,13 +313,13 @@ pub fn parseRelocs(self: *Atom, relocs: []const macho.relocation_info, context: const sect_id = @intCast(u16, rel.r_symbolnum - 1); const sym_index = object.sections_as_symbols.get(sect_id) orelse blk: { const sect = object.getSourceSection(sect_id); - const match = (try context.macho_file.getMatchingSection(sect)) orelse + const match = (try context.macho_file.getOutputSection(sect)) orelse unreachable; const sym_index = @intCast(u32, object.symtab.items.len); try object.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = context.macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -459,9 +463,10 @@ fn addPtrBindingOrRebase( }); } else { const source_sym = self.getSymbol(context.macho_file); - const match = context.macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); - const sect = context.macho_file.getSection(match); - const sect_type = sect.type_(); + const section = context.macho_file.sections.get(source_sym.n_sect - 1); + const header = section.header; + const segment_index = section.segment_index; + const sect_type = header.type_(); const should_rebase = rebase: { if (rel.r_length != 3) break :rebase false; @@ -470,12 +475,12 @@ fn addPtrBindingOrRebase( // that the segment is writable should be enough here. const is_right_segment = blk: { if (context.macho_file.data_segment_cmd_index) |idx| { - if (match.seg == idx) { + if (segment_index == idx) { break :blk true; } } if (context.macho_file.data_const_segment_cmd_index) |idx| { - if (match.seg == idx) { + if (segment_index == idx) { break :blk true; } } @@ -565,9 +570,8 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { }; const is_tlv = is_tlv: { const source_sym = self.getSymbol(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(source_sym.n_sect); - const sect = macho_file.getSection(match); - break :is_tlv sect.type_() == macho.S_THREAD_LOCAL_VARIABLES; + const header = macho_file.sections.items(.header)[source_sym.n_sect - 1]; + break :is_tlv header.type_() == macho.S_THREAD_LOCAL_VARIABLES; }; const target_addr = blk: { const target_atom = rel.getTargetAtom(macho_file) orelse { @@ -608,10 +612,7 @@ pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { return error.FailedToResolveRelocationTarget; } }; - break :base_address macho_file.getSection(.{ - .seg = macho_file.data_segment_cmd_index.?, - .sect = sect_id, - }).addr; + break :base_address macho_file.sections.items(.header)[sect_id].addr; } else 0; break :blk target_sym.n_value - base_address; }; diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index fbfd487ce2..530a13dc51 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -252,7 +252,7 @@ pub const WriteOpts = struct { file: fs.File, exec_seg_base: u64, exec_seg_limit: u64, - code_sig_cmd: macho.linkedit_data_command, + file_size: u32, output_mode: std.builtin.OutputMode, }; @@ -274,10 +274,9 @@ pub fn writeAdhocSignature( self.code_directory.inner.execSegBase = opts.exec_seg_base; self.code_directory.inner.execSegLimit = opts.exec_seg_limit; self.code_directory.inner.execSegFlags = if (opts.output_mode == .Exe) macho.CS_EXECSEG_MAIN_BINARY else 0; - const file_size = opts.code_sig_cmd.dataoff; - self.code_directory.inner.codeLimit = file_size; + self.code_directory.inner.codeLimit = opts.file_size; - const total_pages = mem.alignForward(file_size, self.page_size) / self.page_size; + const total_pages = mem.alignForward(opts.file_size, self.page_size) / self.page_size; var buffer = try allocator.alloc(u8, self.page_size); defer allocator.free(buffer); @@ -289,7 +288,10 @@ pub fn writeAdhocSignature( var i: usize = 0; while (i < total_pages) : (i += 1) { const fstart = i * self.page_size; - const fsize = if (fstart + self.page_size > file_size) file_size - fstart else self.page_size; + const fsize = if (fstart + self.page_size > opts.file_size) + opts.file_size - fstart + else + self.page_size; const len = try opts.file.preadAll(buffer, fstart); assert(fsize <= len); diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 4da106eca1..f191d43f98 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -25,35 +25,18 @@ base: *MachO, dwarf: Dwarf, file: fs.File, -/// Table of all load commands -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, -/// __PAGEZERO segment -pagezero_segment_cmd_index: ?u16 = null, -/// __TEXT segment -text_segment_cmd_index: ?u16 = null, -/// __DATA_CONST segment -data_const_segment_cmd_index: ?u16 = null, -/// __DATA segment -data_segment_cmd_index: ?u16 = null, -/// __LINKEDIT segment -linkedit_segment_cmd_index: ?u16 = null, -/// __DWARF segment -dwarf_segment_cmd_index: ?u16 = null, -/// Symbol table -symtab_cmd_index: ?u16 = null, -/// UUID load command -uuid_cmd_index: ?u16 = null, - -/// Index into __TEXT,__text section. -text_section_index: ?u16 = null, - -debug_info_section_index: ?u16 = null, -debug_abbrev_section_index: ?u16 = null, -debug_str_section_index: ?u16 = null, -debug_aranges_section_index: ?u16 = null, -debug_line_section_index: ?u16 = null, - -load_commands_dirty: bool = false, +segments: std.ArrayListUnmanaged(macho.segment_command_64) = .{}, +sections: std.ArrayListUnmanaged(macho.section_64) = .{}, + +linkedit_segment_cmd_index: ?u8 = null, +dwarf_segment_cmd_index: ?u8 = null, + +debug_info_section_index: ?u8 = null, +debug_abbrev_section_index: ?u8 = null, +debug_str_section_index: ?u8 = null, +debug_aranges_section_index: ?u8 = null, +debug_line_section_index: ?u8 = null, + debug_string_table_dirty: bool = false, debug_abbrev_section_dirty: bool = false, debug_aranges_section_dirty: bool = false, @@ -78,98 +61,40 @@ pub const Reloc = struct { /// You must call this function *after* `MachO.populateMissingMetadata()` /// has been called to get a viable debug symbols output. pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void { - if (self.uuid_cmd_index == null) { - const base_cmd = self.base.load_commands.items[self.base.uuid_cmd_index.?]; - self.uuid_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(allocator, base_cmd); - self.load_commands_dirty = true; - } - - if (self.symtab_cmd_index == null) { - self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.base.allocator, .{ - .symtab = .{ - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, - }, - }); - try self.strtab.buffer.append(allocator, 0); - self.load_commands_dirty = true; - } - - if (self.pagezero_segment_cmd_index == null) { - self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.pagezero_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.text_segment_cmd_index == null) { - self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.text_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.data_const_segment_cmd_index == null) outer: { - if (self.base.data_const_segment_cmd_index == null) break :outer; // __DATA_CONST is optional - self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.data_const_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - - if (self.data_segment_cmd_index == null) outer: { - if (self.base.data_segment_cmd_index == null) break :outer; // __DATA is optional - self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.data_segment_cmd_index.?].segment; - const cmd = try self.copySegmentCommand(allocator, base_cmd); - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; - } - if (self.linkedit_segment_cmd_index == null) { - self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const base_cmd = self.base.load_commands.items[self.base.linkedit_segment_cmd_index.?].segment; - var cmd = try self.copySegmentCommand(allocator, base_cmd); + self.linkedit_segment_cmd_index = @intCast(u8, self.segments.items.len); // TODO this needs reworking - cmd.inner.vmsize = self.base.page_size; - cmd.inner.fileoff = self.base.page_size; - cmd.inner.filesize = self.base.page_size; - try self.load_commands.append(allocator, .{ .segment = cmd }); - self.load_commands_dirty = true; + try self.segments.append(allocator, .{ + .segname = makeStaticString("__LINKEDIT"), + .vmaddr = self.base.page_size, + .vmsize = self.base.page_size, + .fileoff = self.base.page_size, + .filesize = self.base.page_size, + .maxprot = macho.PROT.READ, + .initprot = macho.PROT.READ, + .cmdsize = @sizeOf(macho.segment_command_64), + }); } if (self.dwarf_segment_cmd_index == null) { - self.dwarf_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + self.dwarf_segment_cmd_index = @intCast(u8, self.segments.items.len); - const linkedit = self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; + const linkedit = self.segments.items[self.base.linkedit_segment_cmd_index.?]; const ideal_size: u16 = 200 + 128 + 160 + 250; const needed_size = mem.alignForwardGeneric(u64, padToIdeal(ideal_size), self.base.page_size); - const fileoff = linkedit.inner.fileoff + linkedit.inner.filesize; - const vmaddr = linkedit.inner.vmaddr + linkedit.inner.vmsize; + const fileoff = linkedit.fileoff + linkedit.filesize; + const vmaddr = linkedit.vmaddr + linkedit.vmsize; log.debug("found __DWARF segment free space 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); - try self.load_commands.append(allocator, .{ - .segment = .{ - .inner = .{ - .segname = makeStaticString("__DWARF"), - .vmaddr = vmaddr, - .vmsize = needed_size, - .fileoff = fileoff, - .filesize = needed_size, - .cmdsize = @sizeOf(macho.segment_command_64), - }, - }, + try self.segments.append(allocator, .{ + .segname = makeStaticString("__DWARF"), + .vmaddr = vmaddr, + .vmsize = needed_size, + .fileoff = fileoff, + .filesize = needed_size, + .cmdsize = @sizeOf(macho.segment_command_64), }); - self.load_commands_dirty = true; } if (self.debug_str_section_index == null) { @@ -203,18 +128,18 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: Allocator) !void } } -fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u16 { - const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; +fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignment: u16) !u8 { + const segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; var sect = macho.section_64{ .sectname = makeStaticString(sectname), - .segname = seg.inner.segname, + .segname = segment.segname, .size = @intCast(u32, size), .@"align" = alignment, }; const alignment_pow_2 = try math.powi(u32, 2, alignment); const off = self.findFreeSpace(size, alignment_pow_2); - assert(off + size <= seg.inner.fileoff + seg.inner.filesize); // TODO expand + assert(off + size <= segment.fileoff + segment.filesize); // TODO expand log.debug("found {s},{s} section free space 0x{x} to 0x{x}", .{ sect.segName(), @@ -223,31 +148,20 @@ fn allocateSection(self: *DebugSymbols, sectname: []const u8, size: u64, alignme off + size, }); - sect.addr = seg.inner.vmaddr + off - seg.inner.fileoff; + sect.addr = segment.vmaddr + off - segment.fileoff; sect.offset = @intCast(u32, off); - const index = @intCast(u16, seg.sections.items.len); - try seg.sections.append(self.base.base.allocator, sect); - seg.inner.cmdsize += @sizeOf(macho.section_64); - seg.inner.nsects += 1; - - // TODO - // const match = MatchingSection{ - // .seg = segment_id, - // .sect = index, - // }; - // _ = try self.section_ordinals.getOrPut(self.base.allocator, match); - // try self.block_free_lists.putNoClobber(self.base.allocator, match, .{}); - - self.load_commands_dirty = true; + const index = @intCast(u8, self.sections.items.len); + try self.sections.append(self.base.base.allocator, sect); + segment.cmdsize += @sizeOf(macho.section_64); + segment.nsects += 1; return index; } fn detectAllocCollision(self: *DebugSymbols, start: u64, size: u64) ?u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; const end = start + padToIdeal(size); - for (seg.sections.items) |section| { + for (self.sections.items) |section| { const increased_size = padToIdeal(section.size); const test_end = section.offset + increased_size; if (end > section.offset and start < test_end) { @@ -258,8 +172,8 @@ fn detectAllocCollision(self: *DebugSymbols, start: u64, size: u64) ?u64 { } pub fn findFreeSpace(self: *DebugSymbols, object_size: u64, min_alignment: u64) u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - var offset: u64 = seg.inner.fileoff; + const segment = self.segments.items[self.dwarf_segment_cmd_index.?]; + var offset: u64 = segment.fileoff; while (self.detectAllocCollision(offset, object_size)) |item_end| { offset = mem.alignForwardGeneric(u64, item_end, min_alignment); } @@ -296,8 +210,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti break :blk got_entry.getName(self.base); }, }; - const seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const sect = &seg.sections.items[self.debug_info_section_index.?]; + const sect = &self.sections.items[self.debug_info_section_index.?]; const file_offset = sect.offset + reloc.offset; log.debug("resolving relocation: {d}@{x} ('{s}') at offset {x}", .{ reloc.target, @@ -311,15 +224,13 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (self.debug_abbrev_section_dirty) { try self.dwarf.writeDbgAbbrev(&self.base.base); - self.load_commands_dirty = true; self.debug_abbrev_section_dirty = false; } if (self.debug_info_header_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; + const text_section = self.base.sections.items(.header)[self.base.text_section_index.?]; const low_pc = text_section.addr; const high_pc = text_section.addr + text_section.size; try self.dwarf.writeDbgInfoHeader(&self.base.base, module, low_pc, high_pc); @@ -329,10 +240,8 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (self.debug_aranges_section_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; + const text_section = self.base.sections.items(.header)[self.base.text_section_index.?]; try self.dwarf.writeDbgAranges(&self.base.base, text_section.addr, text_section.size); - self.load_commands_dirty = true; self.debug_aranges_section_dirty = false; } @@ -342,8 +251,8 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti } { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const debug_strtab_sect = &dwarf_segment.sections.items[self.debug_str_section_index.?]; + const dwarf_segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; + const debug_strtab_sect = &self.sections.items[self.debug_str_section_index.?]; if (self.debug_string_table_dirty or self.dwarf.strtab.items.len != debug_strtab_sect.size) { const allocated_size = self.allocatedSize(debug_strtab_sect.offset); const needed_size = self.dwarf.strtab.items.len; @@ -351,7 +260,7 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti if (needed_size > allocated_size) { debug_strtab_sect.size = 0; // free the space const new_offset = self.findFreeSpace(needed_size, 1); - debug_strtab_sect.addr = dwarf_segment.inner.vmaddr + new_offset - dwarf_segment.inner.fileoff; + debug_strtab_sect.addr = dwarf_segment.vmaddr + new_offset - dwarf_segment.fileoff; debug_strtab_sect.offset = @intCast(u32, new_offset); } debug_strtab_sect.size = @intCast(u32, needed_size); @@ -362,28 +271,53 @@ pub fn flushModule(self: *DebugSymbols, allocator: Allocator, options: link.Opti }); try self.file.pwriteAll(self.dwarf.strtab.items, debug_strtab_sect.offset); - self.load_commands_dirty = true; self.debug_string_table_dirty = false; } } + var lc_buffer = std.ArrayList(u8).init(allocator); + defer lc_buffer.deinit(); + const lc_writer = lc_buffer.writer(); + var ncmds: u32 = 0; + + try self.writeLinkeditSegmentData(&ncmds, lc_writer); self.updateDwarfSegment(); - try self.writeLinkeditSegment(); - try self.updateVirtualMemoryMapping(); - try self.writeLoadCommands(allocator); - try self.writeHeader(); - assert(!self.load_commands_dirty); + { + try lc_writer.writeStruct(self.base.uuid); + ncmds += 1; + } + + var headers_buf = std.ArrayList(u8).init(allocator); + defer headers_buf.deinit(); + try self.base.writeSegmentHeaders( + 0, + self.base.linkedit_segment_cmd_index.?, + &ncmds, + headers_buf.writer(), + ); + + for (self.segments.items) |seg| { + try headers_buf.writer().writeStruct(seg); + ncmds += 2; + } + for (self.sections.items) |header| { + try headers_buf.writer().writeStruct(header); + } + + try self.file.pwriteAll(headers_buf.items, @sizeOf(macho.mach_header_64)); + try self.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64) + headers_buf.items.len); + + try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len + headers_buf.items.len)); + assert(!self.debug_abbrev_section_dirty); assert(!self.debug_aranges_section_dirty); assert(!self.debug_string_table_dirty); } pub fn deinit(self: *DebugSymbols, allocator: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(allocator); - } - self.load_commands.deinit(allocator); + self.segments.deinit(allocator); + self.sections.deinit(allocator); self.dwarf.deinit(); self.strtab.deinit(allocator); self.relocs.deinit(allocator); @@ -402,59 +336,19 @@ pub fn swapRemoveRelocs(self: *DebugSymbols, target: u32) void { } } -fn copySegmentCommand( - self: *DebugSymbols, - allocator: Allocator, - base_cmd: macho.SegmentCommand, -) !macho.SegmentCommand { - var cmd = macho.SegmentCommand{ - .inner = .{ - .segname = undefined, - .cmdsize = base_cmd.inner.cmdsize, - .vmaddr = base_cmd.inner.vmaddr, - .vmsize = base_cmd.inner.vmsize, - .maxprot = base_cmd.inner.maxprot, - .initprot = base_cmd.inner.initprot, - .nsects = base_cmd.inner.nsects, - .flags = base_cmd.inner.flags, - }, - }; - mem.copy(u8, &cmd.inner.segname, &base_cmd.inner.segname); - - try cmd.sections.ensureTotalCapacity(allocator, cmd.inner.nsects); - for (base_cmd.sections.items) |base_sect, i| { - var sect = macho.section_64{ - .sectname = undefined, - .segname = undefined, - .addr = base_sect.addr, - .size = base_sect.size, - .offset = 0, - .@"align" = base_sect.@"align", - .reloff = 0, - .nreloc = 0, - .flags = base_sect.flags, - .reserved1 = base_sect.reserved1, - .reserved2 = base_sect.reserved2, - .reserved3 = base_sect.reserved3, - }; - mem.copy(u8, §.sectname, &base_sect.sectname); - mem.copy(u8, §.segname, &base_sect.segname); - - if (self.base.text_section_index.? == i) { - self.text_section_index = @intCast(u16, i); - } +fn updateDwarfSegment(self: *DebugSymbols) void { + const linkedit = self.segments.items[self.linkedit_segment_cmd_index.?]; + const dwarf_segment = &self.segments.items[self.dwarf_segment_cmd_index.?]; - cmd.sections.appendAssumeCapacity(sect); + const new_start_aligned = linkedit.vmaddr + linkedit.vmsize; + const old_start_aligned = dwarf_segment.vmaddr; + const diff = new_start_aligned - old_start_aligned; + if (diff > 0) { + dwarf_segment.vmaddr = new_start_aligned; } - return cmd; -} - -fn updateDwarfSegment(self: *DebugSymbols) void { - const dwarf_segment = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - var max_offset: u64 = 0; - for (dwarf_segment.sections.items) |sect| { + for (self.sections.items) |*sect| { log.debug(" {s},{s} - 0x{x}-0x{x} - 0x{x}-0x{x}", .{ sect.segName(), sect.sectName(), @@ -466,44 +360,19 @@ fn updateDwarfSegment(self: *DebugSymbols) void { if (sect.offset + sect.size > max_offset) { max_offset = sect.offset + sect.size; } + sect.addr += diff; } - const file_size = max_offset - dwarf_segment.inner.fileoff; + const file_size = max_offset - dwarf_segment.fileoff; log.debug("__DWARF size 0x{x}", .{file_size}); - if (file_size != dwarf_segment.inner.filesize) { - dwarf_segment.inner.filesize = file_size; - if (dwarf_segment.inner.vmsize < dwarf_segment.inner.filesize) { - dwarf_segment.inner.vmsize = mem.alignForwardGeneric(u64, dwarf_segment.inner.filesize, self.base.page_size); - } - self.load_commands_dirty = true; - } -} - -/// Writes all load commands and section headers. -fn writeLoadCommands(self: *DebugSymbols, allocator: Allocator) !void { - if (!self.load_commands_dirty) return; - - var sizeofcmds: u32 = 0; - for (self.load_commands.items) |lc| { - sizeofcmds += lc.cmdsize(); + if (file_size != dwarf_segment.filesize) { + dwarf_segment.filesize = file_size; + dwarf_segment.vmsize = mem.alignForwardGeneric(u64, dwarf_segment.filesize, self.base.page_size); } - - var buffer = try allocator.alloc(u8, sizeofcmds); - defer allocator.free(buffer); - var fib = std.io.fixedBufferStream(buffer); - const writer = fib.writer(); - for (self.load_commands.items) |lc| { - try lc.write(writer); - } - - const off = @sizeOf(macho.mach_header_64); - log.debug("writing {} load commands from 0x{x} to 0x{x}", .{ self.load_commands.items.len, off, off + sizeofcmds }); - try self.file.pwriteAll(buffer, off); - self.load_commands_dirty = false; } -fn writeHeader(self: *DebugSymbols) !void { +fn writeHeader(self: *DebugSymbols, ncmds: u32, sizeofcmds: u32) !void { var header: macho.mach_header_64 = .{}; header.filetype = macho.MH_DSYM; @@ -519,12 +388,8 @@ fn writeHeader(self: *DebugSymbols) !void { else => return error.UnsupportedCpuArchitecture, } - header.ncmds = @intCast(u32, self.load_commands.items.len); - header.sizeofcmds = 0; - - for (self.load_commands.items) |cmd| { - header.sizeofcmds += cmd.cmdsize(); - } + header.ncmds = ncmds; + header.sizeofcmds = sizeofcmds; log.debug("writing Mach-O header {}", .{header}); @@ -532,79 +397,46 @@ fn writeHeader(self: *DebugSymbols) !void { } pub fn allocatedSize(self: *DebugSymbols, start: u64) u64 { - const seg = self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - assert(start >= seg.inner.fileoff); + const seg = self.segments.items[self.dwarf_segment_cmd_index.?]; + assert(start >= seg.fileoff); var min_pos: u64 = std.math.maxInt(u64); - for (seg.sections.items) |section| { + for (self.sections.items) |section| { if (section.offset <= start) continue; if (section.offset < min_pos) min_pos = section.offset; } return min_pos - start; } -fn updateVirtualMemoryMapping(self: *DebugSymbols) !void { - const macho_file = self.base; - const allocator = macho_file.base.allocator; - - const IndexTuple = std.meta.Tuple(&[_]type{ *?u16, *?u16 }); - const indices = &[_]IndexTuple{ - .{ &macho_file.text_segment_cmd_index, &self.text_segment_cmd_index }, - .{ &macho_file.data_const_segment_cmd_index, &self.data_const_segment_cmd_index }, - .{ &macho_file.data_segment_cmd_index, &self.data_segment_cmd_index }, - }; - - for (indices) |tuple| { - const orig_cmd = macho_file.load_commands.items[tuple[0].*.?].segment; - const cmd = try self.copySegmentCommand(allocator, orig_cmd); - const comp_cmd = &self.load_commands.items[tuple[1].*.?]; - comp_cmd.deinit(allocator); - self.load_commands.items[tuple[1].*.?] = .{ .segment = cmd }; - } - - // TODO should we set the linkedit vmsize to that of the binary? - const orig_cmd = macho_file.load_commands.items[macho_file.linkedit_segment_cmd_index.?].segment; - const orig_vmaddr = orig_cmd.inner.vmaddr; - const linkedit_cmd = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - linkedit_cmd.inner.vmaddr = orig_vmaddr; - - // Update VM address for the DWARF segment and sections including re-running relocations. - // TODO re-run relocations - const dwarf_cmd = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - const new_start_aligned = orig_vmaddr + linkedit_cmd.inner.vmsize; - const old_start_aligned = dwarf_cmd.inner.vmaddr; - const diff = new_start_aligned - old_start_aligned; - if (diff > 0) { - dwarf_cmd.inner.vmaddr = new_start_aligned; - - for (dwarf_cmd.sections.items) |*sect| { - sect.addr += (new_start_aligned - old_start_aligned); - } - } - - self.load_commands_dirty = true; -} - -fn writeLinkeditSegment(self: *DebugSymbols) !void { +fn writeLinkeditSegmentData(self: *DebugSymbols, ncmds: *u32, lc_writer: anytype) !void { const tracy = trace(@src()); defer tracy.end(); - try self.writeSymbolTable(); - try self.writeStringTable(); + const source_vmaddr = self.base.segments.items[self.base.linkedit_segment_cmd_index.?].vmaddr; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + seg.vmaddr = source_vmaddr; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const aligned_size = mem.alignForwardGeneric(u64, seg.inner.filesize, self.base.page_size); - seg.inner.filesize = aligned_size; - seg.inner.vmsize = aligned_size; + var symtab_cmd = macho.symtab_command{ + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }; + try self.writeSymtab(&symtab_cmd); + try self.writeStrtab(&symtab_cmd); + try lc_writer.writeStruct(symtab_cmd); + ncmds.* += 1; + + const aligned_size = mem.alignForwardGeneric(u64, seg.filesize, self.base.page_size); + seg.filesize = aligned_size; + seg.vmsize = aligned_size; } -fn writeSymbolTable(self: *DebugSymbols) !void { +fn writeSymtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = self.base.base.allocator; - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - symtab.symoff = @intCast(u32, seg.inner.fileoff); var locals = std.ArrayList(macho.nlist_64).init(gpa); defer locals.deinit(); @@ -634,34 +466,36 @@ fn writeSymbolTable(self: *DebugSymbols) !void { const nlocals = locals.items.len; const nexports = exports.items.len; - const locals_off = symtab.symoff; - const locals_size = nlocals * @sizeOf(macho.nlist_64); - const exports_off = locals_off + locals_size; - const exports_size = nexports * @sizeOf(macho.nlist_64); + const nsyms = nlocals + nexports; - symtab.nsyms = @intCast(u32, nlocals + nexports); - const needed_size = (nlocals + nexports) * @sizeOf(macho.nlist_64); + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const offset = mem.alignForwardGeneric( + u64, + seg.fileoff + seg.filesize, + @alignOf(macho.nlist_64), + ); + const needed_size = nsyms * @sizeOf(macho.nlist_64); - if (needed_size > seg.inner.filesize) { + if (needed_size > seg.filesize) { const aligned_size = mem.alignForwardGeneric(u64, needed_size, self.base.page_size); - const diff = @intCast(u32, aligned_size - seg.inner.filesize); - const dwarf_seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - seg.inner.filesize = aligned_size; + const diff = @intCast(u32, aligned_size - seg.filesize); + const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; + seg.filesize = aligned_size; try MachO.copyRangeAllOverlappingAlloc( self.base.base.allocator, self.file, - dwarf_seg.inner.fileoff, - dwarf_seg.inner.fileoff + diff, - math.cast(usize, dwarf_seg.inner.filesize) orelse return error.Overflow, + dwarf_seg.fileoff, + dwarf_seg.fileoff + diff, + math.cast(usize, dwarf_seg.filesize) orelse return error.Overflow, ); - const old_seg_fileoff = dwarf_seg.inner.fileoff; - dwarf_seg.inner.fileoff += diff; + const old_seg_fileoff = dwarf_seg.fileoff; + dwarf_seg.fileoff += diff; - log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.inner.fileoff }); + log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.fileoff }); - for (dwarf_seg.sections.items) |*sect| { + for (self.sections.items) |*sect| { const old_offset = sect.offset; sect.offset += diff; @@ -674,47 +508,53 @@ fn writeSymbolTable(self: *DebugSymbols) !void { } } + lc.symoff = @intCast(u32, offset); + lc.nsyms = @intCast(u32, nsyms); + + const locals_off = lc.symoff; + const locals_size = nlocals * @sizeOf(macho.nlist_64); + const exports_off = locals_off + locals_size; + const exports_size = nexports * @sizeOf(macho.nlist_64); + log.debug("writing local symbols from 0x{x} to 0x{x}", .{ locals_off, locals_size + locals_off }); try self.file.pwriteAll(mem.sliceAsBytes(locals.items), locals_off); log.debug("writing exported symbols from 0x{x} to 0x{x}", .{ exports_off, exports_size + exports_off }); try self.file.pwriteAll(mem.sliceAsBytes(exports.items), exports_off); - - self.load_commands_dirty = true; } -fn writeStringTable(self: *DebugSymbols) !void { +fn writeStrtab(self: *DebugSymbols, lc: *macho.symtab_command) !void { const tracy = trace(@src()); defer tracy.end(); - const seg = &self.load_commands.items[self.linkedit_segment_cmd_index.?].segment; - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].symtab; - const symtab_size = @intCast(u32, symtab.nsyms * @sizeOf(macho.nlist_64)); - symtab.stroff = symtab.symoff + symtab_size; + const seg = &self.segments.items[self.linkedit_segment_cmd_index.?]; + const symtab_size = @intCast(u32, lc.nsyms * @sizeOf(macho.nlist_64)); + const offset = mem.alignForwardGeneric(u64, lc.symoff + symtab_size, @alignOf(u64)); + lc.stroff = @intCast(u32, offset); const needed_size = mem.alignForwardGeneric(u64, self.strtab.buffer.items.len, @alignOf(u64)); - symtab.strsize = @intCast(u32, needed_size); + lc.strsize = @intCast(u32, needed_size); - if (symtab_size + needed_size > seg.inner.filesize) { - const aligned_size = mem.alignForwardGeneric(u64, symtab_size + needed_size, self.base.page_size); - const diff = @intCast(u32, aligned_size - seg.inner.filesize); - const dwarf_seg = &self.load_commands.items[self.dwarf_segment_cmd_index.?].segment; - seg.inner.filesize = aligned_size; + if (offset + needed_size > seg.filesize) { + const aligned_size = mem.alignForwardGeneric(u64, offset + needed_size, self.base.page_size); + const diff = @intCast(u32, aligned_size - seg.filesize); + const dwarf_seg = &self.segments.items[self.dwarf_segment_cmd_index.?]; + seg.filesize = aligned_size; try MachO.copyRangeAllOverlappingAlloc( self.base.base.allocator, self.file, - dwarf_seg.inner.fileoff, - dwarf_seg.inner.fileoff + diff, - math.cast(usize, dwarf_seg.inner.filesize) orelse return error.Overflow, + dwarf_seg.fileoff, + dwarf_seg.fileoff + diff, + math.cast(usize, dwarf_seg.filesize) orelse return error.Overflow, ); - const old_seg_fileoff = dwarf_seg.inner.fileoff; - dwarf_seg.inner.fileoff += diff; + const old_seg_fileoff = dwarf_seg.fileoff; + dwarf_seg.fileoff += diff; - log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.inner.fileoff }); + log.debug(" (moving __DWARF segment from 0x{x} to 0x{x})", .{ old_seg_fileoff, dwarf_seg.fileoff }); - for (dwarf_seg.sections.items) |*sect| { + for (self.sections.items) |*sect| { const old_offset = sect.offset; sect.offset += diff; @@ -727,9 +567,7 @@ fn writeStringTable(self: *DebugSymbols) !void { } } - log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); - - try self.file.pwriteAll(self.strtab.buffer.items, symtab.stroff); + log.debug("writing string table from 0x{x} to 0x{x}", .{ lc.stroff, lc.stroff + lc.strsize }); - self.load_commands_dirty = true; + try self.file.pwriteAll(self.strtab.buffer.items, lc.stroff); } diff --git a/src/link/MachO/Dylib.zig b/src/link/MachO/Dylib.zig index ffc0b2cca6..0f16eada61 100644 --- a/src/link/MachO/Dylib.zig +++ b/src/link/MachO/Dylib.zig @@ -13,23 +13,9 @@ const fat = @import("fat.zig"); const Allocator = mem.Allocator; const CrossTarget = std.zig.CrossTarget; const LibStub = @import("../tapi.zig").LibStub; +const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -file: fs.File, -name: []const u8, - -header: ?macho.mach_header_64 = null, - -// The actual dylib contents we care about linking with will be embedded at -// an offset within a file if we are linking against a fat lib -library_offset: u64 = 0, - -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, - -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -id_cmd_index: ?u16 = null, - id: ?Id = null, weak: bool = false, @@ -53,16 +39,12 @@ pub const Id = struct { }; } - pub fn fromLoadCommand(allocator: Allocator, lc: macho.GenericCommandWithData(macho.dylib_command)) !Id { - const dylib = lc.inner.dylib; - const dylib_name = @ptrCast([*:0]const u8, lc.data[dylib.name - @sizeOf(macho.dylib_command) ..]); - const name = try allocator.dupe(u8, mem.sliceTo(dylib_name, 0)); - + pub fn fromLoadCommand(allocator: Allocator, lc: macho.dylib_command, name: []const u8) !Id { return Id{ - .name = name, - .timestamp = dylib.timestamp, - .current_version = dylib.current_version, - .compatibility_version = dylib.compatibility_version, + .name = try allocator.dupe(u8, name), + .timestamp = lc.dylib.timestamp, + .current_version = lc.dylib.current_version, + .compatibility_version = lc.dylib.compatibility_version, }; } @@ -126,125 +108,89 @@ pub const Id = struct { }; pub fn deinit(self: *Dylib, allocator: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(allocator); - } - self.load_commands.deinit(allocator); - for (self.symbols.keys()) |key| { allocator.free(key); } self.symbols.deinit(allocator); - - allocator.free(self.name); - if (self.id) |*id| { id.deinit(allocator); } } -pub fn parse( +pub fn parseFromBinary( self: *Dylib, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch, dylib_id: u16, dependent_libs: anytype, + name: []const u8, + data: []align(@alignOf(u64)) const u8, ) !void { - log.debug("parsing shared library '{s}'", .{self.name}); - - self.library_offset = try fat.getLibraryOffset(self.file.reader(), cpu_arch); + var stream = std.io.fixedBufferStream(data); + const reader = stream.reader(); - try self.file.seekTo(self.library_offset); + log.debug("parsing shared library '{s}'", .{name}); - var reader = self.file.reader(); - self.header = try reader.readStruct(macho.mach_header_64); + const header = try reader.readStruct(macho.mach_header_64); - if (self.header.?.filetype != macho.MH_DYLIB) { - log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_DYLIB, self.header.?.filetype }); + if (header.filetype != macho.MH_DYLIB) { + log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_DYLIB, header.filetype }); return error.NotDylib; } - const this_arch: std.Target.Cpu.Arch = try fat.decodeArch(self.header.?.cputype, true); + const this_arch: std.Target.Cpu.Arch = try fat.decodeArch(header.cputype, true); if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {}, found {}", .{ cpu_arch, this_arch }); + log.err("mismatched cpu architecture: expected {s}, found {s}", .{ + @tagName(cpu_arch), + @tagName(this_arch), + }); return error.MismatchedCpuArchitecture; } - try self.readLoadCommands(allocator, reader, dylib_id, dependent_libs); - try self.parseId(allocator); - try self.parseSymbols(allocator); -} - -fn readLoadCommands( - self: *Dylib, - allocator: Allocator, - reader: anytype, - dylib_id: u16, - dependent_libs: anytype, -) !void { - const should_lookup_reexports = self.header.?.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; - - try self.load_commands.ensureUnusedCapacity(allocator, self.header.?.ncmds); - - var i: u16 = 0; - while (i < self.header.?.ncmds) : (i += 1) { - var cmd = try macho.LoadCommand.read(allocator, reader); + const should_lookup_reexports = header.flags & macho.MH_NO_REEXPORTED_DYLIBS == 0; + var it = LoadCommandIterator{ + .ncmds = header.ncmds, + .buffer = data[@sizeOf(macho.mach_header_64)..][0..header.sizeofcmds], + }; + while (it.next()) |cmd| { switch (cmd.cmd()) { .SYMTAB => { - self.symtab_cmd_index = i; - }, - .DYSYMTAB => { - self.dysymtab_cmd_index = i; + const symtab_cmd = cmd.cast(macho.symtab_command).?; + const symtab = @ptrCast( + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &data[symtab_cmd.symoff]), + )[0..symtab_cmd.nsyms]; + const strtab = data[symtab_cmd.stroff..][0..symtab_cmd.strsize]; + + for (symtab) |sym| { + const add_to_symtab = sym.ext() and (sym.sect() or sym.indr()); + if (!add_to_symtab) continue; + + const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0); + try self.symbols.putNoClobber(allocator, try allocator.dupe(u8, sym_name), {}); + } }, .ID_DYLIB => { - self.id_cmd_index = i; + self.id = try Id.fromLoadCommand( + allocator, + cmd.cast(macho.dylib_command).?, + cmd.getDylibPathName(), + ); }, .REEXPORT_DYLIB => { if (should_lookup_reexports) { // Parse install_name to dependent dylib. - var id = try Id.fromLoadCommand(allocator, cmd.dylib); + var id = try Id.fromLoadCommand( + allocator, + cmd.cast(macho.dylib_command).?, + cmd.getDylibPathName(), + ); try dependent_libs.writeItem(.{ .id = id, .parent = dylib_id }); } }, - else => { - log.debug("Unknown load command detected: 0x{x}.", .{@enumToInt(cmd.cmd())}); - }, + else => {}, } - self.load_commands.appendAssumeCapacity(cmd); - } -} - -fn parseId(self: *Dylib, allocator: Allocator) !void { - const index = self.id_cmd_index orelse { - log.debug("no LC_ID_DYLIB load command found; using hard-coded defaults...", .{}); - self.id = try Id.default(allocator, self.name); - return; - }; - self.id = try Id.fromLoadCommand(allocator, self.load_commands.items[index].dylib); -} - -fn parseSymbols(self: *Dylib, allocator: Allocator) !void { - const index = self.symtab_cmd_index orelse return; - const symtab_cmd = self.load_commands.items[index].symtab; - - const symtab = try allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms); - defer allocator.free(symtab); - _ = try self.file.preadAll(symtab, symtab_cmd.symoff + self.library_offset); - const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); - - const strtab = try allocator.alloc(u8, symtab_cmd.strsize); - defer allocator.free(strtab); - _ = try self.file.preadAll(strtab, symtab_cmd.stroff + self.library_offset); - - for (slice) |sym| { - const add_to_symtab = sym.ext() and (sym.sect() or sym.indr()); - - if (!add_to_symtab) continue; - - const sym_name = mem.sliceTo(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx), 0); - const name = try allocator.dupe(u8, sym_name); - try self.symbols.putNoClobber(allocator, name, {}); } } @@ -356,10 +302,11 @@ pub fn parseFromStub( lib_stub: LibStub, dylib_id: u16, dependent_libs: anytype, + name: []const u8, ) !void { if (lib_stub.inner.len == 0) return error.EmptyStubFile; - log.debug("parsing shared library from stub '{s}'", .{self.name}); + log.debug("parsing shared library from stub '{s}'", .{name}); const umbrella_lib = lib_stub.inner[0]; diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 0d929627cd..2e2f3dad84 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -3,6 +3,7 @@ const Object = @This(); const std = @import("std"); const build_options = @import("build_options"); const assert = std.debug.assert; +const dwarf = std.dwarf; const fs = std.fs; const io = std.io; const log = std.log.scoped(.link); @@ -14,43 +15,20 @@ const trace = @import("../../tracy.zig").trace; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); +const LoadCommandIterator = macho.LoadCommandIterator; const MachO = @import("../MachO.zig"); -const MatchingSection = MachO.MatchingSection; const SymbolWithLoc = MachO.SymbolWithLoc; -file: fs.File, name: []const u8, mtime: u64, - -/// Data contents of the file. Includes sections, and data of load commands. -/// Excludes the backing memory for the header and load commands. -/// Initialized in `parse`. -contents: []const u8 = undefined, - -file_offset: ?u32 = null, +contents: []align(@alignOf(u64)) const u8, header: macho.mach_header_64 = undefined, - -load_commands: std.ArrayListUnmanaged(macho.LoadCommand) = .{}, - -segment_cmd_index: ?u16 = null, -text_section_index: ?u16 = null, -symtab_cmd_index: ?u16 = null, -dysymtab_cmd_index: ?u16 = null, -build_version_cmd_index: ?u16 = null, -data_in_code_cmd_index: ?u16 = null, - -// __DWARF segment sections -dwarf_debug_info_index: ?u16 = null, -dwarf_debug_abbrev_index: ?u16 = null, -dwarf_debug_str_index: ?u16 = null, -dwarf_debug_line_index: ?u16 = null, -dwarf_debug_line_str_index: ?u16 = null, -dwarf_debug_ranges_index: ?u16 = null, +in_symtab: []const macho.nlist_64 = undefined, +in_strtab: []const u8 = undefined, symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -strtab: []const u8 = &.{}, -data_in_code_entries: []const macho.data_in_code_entry = &.{}, +sections: std.ArrayListUnmanaged(macho.section_64) = .{}, sections_as_symbols: std.AutoHashMapUnmanaged(u16, u32) = .{}, @@ -61,12 +39,8 @@ managed_atoms: std.ArrayListUnmanaged(*Atom) = .{}, atom_by_index_table: std.AutoHashMapUnmanaged(u32, *Atom) = .{}, pub fn deinit(self: *Object, gpa: Allocator) void { - for (self.load_commands.items) |*lc| { - lc.deinit(gpa); - } - self.load_commands.deinit(gpa); - gpa.free(self.contents); self.symtab.deinit(gpa); + self.sections.deinit(gpa); self.sections_as_symbols.deinit(gpa); self.atom_by_index_table.deinit(gpa); @@ -77,22 +51,15 @@ pub fn deinit(self: *Object, gpa: Allocator) void { self.managed_atoms.deinit(gpa); gpa.free(self.name); + gpa.free(self.contents); } pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) !void { - const file_stat = try self.file.stat(); - const file_size = math.cast(usize, file_stat.size) orelse return error.Overflow; - self.contents = try self.file.readToEndAlloc(allocator, file_size); - var stream = std.io.fixedBufferStream(self.contents); const reader = stream.reader(); - const file_offset = self.file_offset orelse 0; - if (file_offset > 0) { - try reader.context.seekTo(file_offset); - } - self.header = try reader.readStruct(macho.mach_header_64); + if (self.header.filetype != macho.MH_OBJECT) { log.debug("invalid filetype: expected 0x{x}, found 0x{x}", .{ macho.MH_OBJECT, @@ -110,92 +77,54 @@ pub fn parse(self: *Object, allocator: Allocator, cpu_arch: std.Target.Cpu.Arch) }, }; if (this_arch != cpu_arch) { - log.err("mismatched cpu architecture: expected {}, found {}", .{ cpu_arch, this_arch }); + log.err("mismatched cpu architecture: expected {s}, found {s}", .{ + @tagName(cpu_arch), + @tagName(this_arch), + }); return error.MismatchedCpuArchitecture; } - try self.load_commands.ensureUnusedCapacity(allocator, self.header.ncmds); - - var i: u16 = 0; - while (i < self.header.ncmds) : (i += 1) { - var cmd = try macho.LoadCommand.read(allocator, reader); + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { switch (cmd.cmd()) { .SEGMENT_64 => { - self.segment_cmd_index = i; - var seg = cmd.segment; - for (seg.sections.items) |*sect, j| { - const index = @intCast(u16, j); - const segname = sect.segName(); - const sectname = sect.sectName(); - if (mem.eql(u8, segname, "__DWARF")) { - if (mem.eql(u8, sectname, "__debug_info")) { - self.dwarf_debug_info_index = index; - } else if (mem.eql(u8, sectname, "__debug_abbrev")) { - self.dwarf_debug_abbrev_index = index; - } else if (mem.eql(u8, sectname, "__debug_str")) { - self.dwarf_debug_str_index = index; - } else if (mem.eql(u8, sectname, "__debug_line")) { - self.dwarf_debug_line_index = index; - } else if (mem.eql(u8, sectname, "__debug_line_str")) { - self.dwarf_debug_line_str_index = index; - } else if (mem.eql(u8, sectname, "__debug_ranges")) { - self.dwarf_debug_ranges_index = index; - } - } else if (mem.eql(u8, segname, "__TEXT")) { - if (mem.eql(u8, sectname, "__text")) { - self.text_section_index = index; - } - } - - sect.offset += file_offset; - if (sect.reloff > 0) { - sect.reloff += file_offset; - } + const segment = cmd.cast(macho.segment_command_64).?; + try self.sections.ensureUnusedCapacity(allocator, segment.nsects); + for (cmd.getSections()) |sect| { + self.sections.appendAssumeCapacity(sect); } - - seg.inner.fileoff += file_offset; }, .SYMTAB => { - self.symtab_cmd_index = i; - cmd.symtab.symoff += file_offset; - cmd.symtab.stroff += file_offset; - }, - .DYSYMTAB => { - self.dysymtab_cmd_index = i; - }, - .BUILD_VERSION => { - self.build_version_cmd_index = i; - }, - .DATA_IN_CODE => { - self.data_in_code_cmd_index = i; - cmd.linkedit_data.dataoff += file_offset; - }, - else => { - log.debug("Unknown load command detected: 0x{x}.", .{@enumToInt(cmd.cmd())}); + const symtab = cmd.cast(macho.symtab_command).?; + self.in_symtab = @ptrCast( + [*]const macho.nlist_64, + @alignCast(@alignOf(macho.nlist_64), &self.contents[symtab.symoff]), + )[0..symtab.nsyms]; + self.in_strtab = self.contents[symtab.stroff..][0..symtab.strsize]; + try self.symtab.appendSlice(allocator, self.in_symtab); }, + else => {}, } - self.load_commands.appendAssumeCapacity(cmd); } - - try self.parseSymtab(allocator); } const Context = struct { - symtab: []const macho.nlist_64, - strtab: []const u8, + object: *const Object, }; const SymbolAtIndex = struct { index: u32, fn getSymbol(self: SymbolAtIndex, ctx: Context) macho.nlist_64 { - return ctx.symtab[self.index]; + return ctx.object.getSourceSymbol(self.index).?; } fn getSymbolName(self: SymbolAtIndex, ctx: Context) []const u8 { const sym = self.getSymbol(ctx); - assert(sym.n_strx < ctx.strtab.len); - return mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.ptr + sym.n_strx), 0); + return ctx.object.getString(sym.n_strx); } /// Returns whether lhs is less than rhs by allocated address in object file. @@ -293,7 +222,6 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) defer tracy.end(); const gpa = macho_file.base.allocator; - const seg = self.load_commands.items[self.segment_cmd_index.?].segment; log.debug("splitting object({d}, {s}) into atoms: one-shot mode", .{ object_id, self.name }); @@ -302,13 +230,12 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) // the GO compiler does not necessarily respect that therefore we sort immediately by type // and address within. const context = Context{ - .symtab = self.getSourceSymtab(), - .strtab = self.strtab, + .object = self, }; - var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, context.symtab.len); + var sorted_all_syms = try std.ArrayList(SymbolAtIndex).initCapacity(gpa, self.in_symtab.len); defer sorted_all_syms.deinit(); - for (context.symtab) |_, index| { + for (self.in_symtab) |_, index| { sorted_all_syms.appendAssumeCapacity(.{ .index = @intCast(u32, index) }); } @@ -320,36 +247,36 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) // Well, shit, sometimes compilers skip the dysymtab load command altogether, meaning we // have to infer the start of undef section in the symtab ourselves. - const iundefsym = if (self.dysymtab_cmd_index) |cmd_index| blk: { - const dysymtab = self.load_commands.items[cmd_index].dysymtab; + const iundefsym = blk: { + const dysymtab = self.parseDysymtab() orelse { + var iundefsym: usize = sorted_all_syms.items.len; + while (iundefsym > 0) : (iundefsym -= 1) { + const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context); + if (sym.sect()) break; + } + break :blk iundefsym; + }; break :blk dysymtab.iundefsym; - } else blk: { - var iundefsym: usize = sorted_all_syms.items.len; - while (iundefsym > 0) : (iundefsym -= 1) { - const sym = sorted_all_syms.items[iundefsym - 1].getSymbol(context); - if (sym.sect()) break; - } - break :blk iundefsym; }; // We only care about defined symbols, so filter every other out. const sorted_syms = sorted_all_syms.items[0..iundefsym]; const subsections_via_symbols = self.header.flags & macho.MH_SUBSECTIONS_VIA_SYMBOLS != 0; - for (seg.sections.items) |sect, id| { + for (self.sections.items) |sect, id| { const sect_id = @intCast(u8, id); log.debug("splitting section '{s},{s}' into atoms", .{ sect.segName(), sect.sectName() }); // Get matching segment/section in the final artifact. - const match = (try macho_file.getMatchingSection(sect)) orelse { + const match = (try macho_file.getOutputSection(sect)) orelse { log.debug(" unhandled section", .{}); continue; }; log.debug(" output sect({d}, '{s},{s}')", .{ - macho_file.getSectionOrdinal(match), - macho_file.getSection(match).segName(), - macho_file.getSection(match).sectName(), + match + 1, + macho_file.sections.items(.header)[match].segName(), + macho_file.sections.items(.header)[match].sectName(), }); const cpu_arch = macho_file.base.options.target.cpu.arch; @@ -359,14 +286,13 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) }; // Read section's code - const code: ?[]const u8 = if (!is_zerofill) try self.getSectionContents(sect_id) else null; + const code: ?[]const u8 = if (!is_zerofill) try self.getSectionContents(sect) else null; // Read section's list of relocations - const raw_relocs = self.contents[sect.reloff..][0 .. sect.nreloc * @sizeOf(macho.relocation_info)]; - const relocs = mem.bytesAsSlice( - macho.relocation_info, - @alignCast(@alignOf(macho.relocation_info), raw_relocs), - ); + const relocs = @ptrCast( + [*]const macho.relocation_info, + @alignCast(@alignOf(macho.relocation_info), &self.contents[sect.reloff]), + )[0..sect.nreloc]; // Symbols within this section only. const filtered_syms = filterSymbolsByAddress( @@ -387,7 +313,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -476,7 +402,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = addr, }); @@ -501,7 +427,7 @@ pub fn splitIntoAtomsOneShot(self: *Object, macho_file: *MachO, object_id: u32) try self.symtab.append(gpa, .{ .n_strx = 0, .n_type = macho.N_SECT, - .n_sect = macho_file.getSectionOrdinal(match), + .n_sect = match + 1, .n_desc = 0, .n_value = sect.addr, }); @@ -535,21 +461,21 @@ fn createAtomFromSubsection( code: ?[]const u8, relocs: []const macho.relocation_info, indexes: []const SymbolAtIndex, - match: MatchingSection, + match: u8, sect: macho.section_64, ) !*Atom { const gpa = macho_file.base.allocator; const sym = self.symtab.items[sym_index]; const atom = try MachO.createEmptyAtom(gpa, sym_index, size, alignment); atom.file = object_id; - self.symtab.items[sym_index].n_sect = macho_file.getSectionOrdinal(match); + self.symtab.items[sym_index].n_sect = match + 1; log.debug("creating ATOM(%{d}, '{s}') in sect({d}, '{s},{s}') in object({d})", .{ sym_index, self.getString(sym.n_strx), - macho_file.getSectionOrdinal(match), - macho_file.getSection(match).segName(), - macho_file.getSection(match).sectName(), + match + 1, + macho_file.sections.items(.header)[match].segName(), + macho_file.sections.items(.header)[match].sectName(), object_id, }); @@ -577,7 +503,7 @@ fn createAtomFromSubsection( try atom.contained.ensureTotalCapacity(gpa, indexes.len); for (indexes) |inner_sym_index| { const inner_sym = &self.symtab.items[inner_sym_index.index]; - inner_sym.n_sect = macho_file.getSectionOrdinal(match); + inner_sym.n_sect = match + 1; atom.contained.appendAssumeCapacity(.{ .sym_index = inner_sym_index.index, .offset = inner_sym.n_value - sym.n_value, @@ -589,48 +515,84 @@ fn createAtomFromSubsection( return atom; } -fn parseSymtab(self: *Object, allocator: Allocator) !void { - const index = self.symtab_cmd_index orelse return; - const symtab = self.load_commands.items[index].symtab; - try self.symtab.appendSlice(allocator, self.getSourceSymtab()); - self.strtab = self.contents[symtab.stroff..][0..symtab.strsize]; +pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { + if (index >= self.in_symtab.len) return null; + return self.in_symtab[index]; } -pub fn getSourceSymtab(self: Object) []const macho.nlist_64 { - const index = self.symtab_cmd_index orelse return &[0]macho.nlist_64{}; - const symtab = self.load_commands.items[index].symtab; - const symtab_size = @sizeOf(macho.nlist_64) * symtab.nsyms; - const raw_symtab = self.contents[symtab.symoff..][0..symtab_size]; - return mem.bytesAsSlice( - macho.nlist_64, - @alignCast(@alignOf(macho.nlist_64), raw_symtab), - ); +pub fn getSourceSection(self: Object, index: u16) macho.section_64 { + assert(index < self.sections.items.len); + return self.sections.items[index]; } -pub fn getSourceSymbol(self: Object, index: u32) ?macho.nlist_64 { - const symtab = self.getSourceSymtab(); - if (index >= symtab.len) return null; - return symtab[index]; +pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .DATA_IN_CODE => { + const dice = cmd.cast(macho.linkedit_data_command).?; + const ndice = @divExact(dice.datasize, @sizeOf(macho.data_in_code_entry)); + return @ptrCast( + [*]const macho.data_in_code_entry, + @alignCast(@alignOf(macho.data_in_code_entry), &self.contents[dice.dataoff]), + )[0..ndice]; + }, + else => {}, + } + } else return null; } -pub fn getSourceSection(self: Object, index: u16) macho.section_64 { - const seg = self.load_commands.items[self.segment_cmd_index.?].segment; - assert(index < seg.sections.items.len); - return seg.sections.items[index]; +fn parseDysymtab(self: Object) ?macho.dysymtab_command { + var it = LoadCommandIterator{ + .ncmds = self.header.ncmds, + .buffer = self.contents[@sizeOf(macho.mach_header_64)..][0..self.header.sizeofcmds], + }; + while (it.next()) |cmd| { + switch (cmd.cmd()) { + .DYSYMTAB => { + return cmd.cast(macho.dysymtab_command).?; + }, + else => {}, + } + } else return null; } -pub fn parseDataInCode(self: Object) ?[]const macho.data_in_code_entry { - const index = self.data_in_code_cmd_index orelse return null; - const data_in_code = self.load_commands.items[index].linkedit_data; - const raw_dice = self.contents[data_in_code.dataoff..][0..data_in_code.datasize]; - return mem.bytesAsSlice( - macho.data_in_code_entry, - @alignCast(@alignOf(macho.data_in_code_entry), raw_dice), - ); +pub fn parseDwarfInfo(self: Object) error{Overflow}!dwarf.DwarfInfo { + var di = dwarf.DwarfInfo{ + .endian = .Little, + .debug_info = &[0]u8{}, + .debug_abbrev = &[0]u8{}, + .debug_str = &[0]u8{}, + .debug_line = &[0]u8{}, + .debug_line_str = &[0]u8{}, + .debug_ranges = &[0]u8{}, + }; + for (self.sections.items) |sect| { + const segname = sect.segName(); + const sectname = sect.sectName(); + if (mem.eql(u8, segname, "__DWARF")) { + if (mem.eql(u8, sectname, "__debug_info")) { + di.debug_info = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_abbrev")) { + di.debug_abbrev = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_str")) { + di.debug_str = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_line")) { + di.debug_line = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_line_str")) { + di.debug_line_str = try self.getSectionContents(sect); + } else if (mem.eql(u8, sectname, "__debug_ranges")) { + di.debug_ranges = try self.getSectionContents(sect); + } + } + } + return di; } -pub fn getSectionContents(self: Object, index: u16) error{Overflow}![]const u8 { - const sect = self.getSourceSection(index); +pub fn getSectionContents(self: Object, sect: macho.section_64) error{Overflow}![]const u8 { const size = math.cast(usize, sect.size) orelse return error.Overflow; log.debug("getting {s},{s} data at 0x{x} - 0x{x}", .{ sect.segName(), @@ -642,8 +604,8 @@ pub fn getSectionContents(self: Object, index: u16) error{Overflow}![]const u8 { } pub fn getString(self: Object, off: u32) []const u8 { - assert(off < self.strtab.len); - return mem.sliceTo(@ptrCast([*:0]const u8, self.strtab.ptr + off), 0); + assert(off < self.in_strtab.len); + return mem.sliceTo(@ptrCast([*:0]const u8, self.in_strtab.ptr + off), 0); } pub fn getAtomForSymbol(self: Object, sym_index: u32) ?*Atom { diff --git a/src/link/MachO/dead_strip.zig b/src/link/MachO/dead_strip.zig index 909a0450d6..12f46c9f26 100644 --- a/src/link/MachO/dead_strip.zig +++ b/src/link/MachO/dead_strip.zig @@ -8,7 +8,6 @@ const mem = std.mem; const Allocator = mem.Allocator; const Atom = @import("Atom.zig"); const MachO = @import("../MachO.zig"); -const MatchingSection = MachO.MatchingSection; pub fn gcAtoms(macho_file: *MachO) !void { const gpa = macho_file.base.allocator; @@ -25,12 +24,12 @@ pub fn gcAtoms(macho_file: *MachO) !void { try prune(arena, alive, macho_file); } -fn removeAtomFromSection(atom: *Atom, match: MatchingSection, macho_file: *MachO) void { - const sect = macho_file.getSectionPtr(match); +fn removeAtomFromSection(atom: *Atom, match: u8, macho_file: *MachO) void { + var section = macho_file.sections.get(match); // If we want to enable GC for incremental codepath, we need to take into // account any padding that might have been left here. - sect.size -= atom.size; + section.header.size -= atom.size; if (atom.prev) |prev| { prev.next = atom.next; @@ -38,15 +37,16 @@ fn removeAtomFromSection(atom: *Atom, match: MatchingSection, macho_file: *MachO if (atom.next) |next| { next.prev = atom.prev; } else { - const last = macho_file.atoms.getPtr(match).?; if (atom.prev) |prev| { - last.* = prev; + section.last_atom = prev; } else { // The section will be GCed in the next step. - last.* = undefined; - sect.size = 0; + section.last_atom = null; + section.header.size = 0; } } + + macho_file.sections.set(match, section); } fn collectRoots(roots: *std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { @@ -173,19 +173,19 @@ fn mark( fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *MachO) !void { // Any section that ends up here will be updated, that is, // its size and alignment recalculated. - var gc_sections = std.AutoHashMap(MatchingSection, void).init(arena); + var gc_sections = std.AutoHashMap(u8, void).init(arena); var loop: bool = true; while (loop) { loop = false; for (macho_file.objects.items) |object| { - for (object.getSourceSymtab()) |_, source_index| { + for (object.in_symtab) |_, source_index| { const atom = object.getAtomForSymbol(@intCast(u32, source_index)) orelse continue; if (alive.contains(atom)) continue; const global = atom.getSymbolWithLoc(); const sym = atom.getSymbolPtr(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; if (sym.n_desc == MachO.N_DESC_GCED) continue; if (!sym.ext() and !refersDead(atom, macho_file)) continue; @@ -232,7 +232,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.got_entries_table.remove(entry.target); @@ -244,7 +244,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.stubs_table.remove(entry.target); @@ -256,7 +256,7 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac // TODO tombstone const atom = entry.getAtom(macho_file); - const match = macho_file.getMatchingSectionFromOrdinal(sym.n_sect); + const match = sym.n_sect - 1; removeAtomFromSection(atom, match, macho_file); _ = try gc_sections.put(match, {}); _ = macho_file.tlv_ptr_entries_table.remove(entry.target); @@ -265,13 +265,13 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac var gc_sections_it = gc_sections.iterator(); while (gc_sections_it.next()) |entry| { const match = entry.key_ptr.*; - const sect = macho_file.getSectionPtr(match); - if (sect.size == 0) continue; // Pruning happens automatically in next step. + var section = macho_file.sections.get(match); + if (section.header.size == 0) continue; // Pruning happens automatically in next step. - sect.@"align" = 0; - sect.size = 0; + section.header.@"align" = 0; + section.header.size = 0; - var atom = macho_file.atoms.get(match).?; + var atom = section.last_atom.?; while (atom.prev) |prev| { atom = prev; @@ -279,14 +279,16 @@ fn prune(arena: Allocator, alive: std.AutoHashMap(*Atom, void), macho_file: *Mac while (true) { const atom_alignment = try math.powi(u32, 2, atom.alignment); - const aligned_end_addr = mem.alignForwardGeneric(u64, sect.size, atom_alignment); - const padding = aligned_end_addr - sect.size; - sect.size += padding + atom.size; - sect.@"align" = @maximum(sect.@"align", atom.alignment); + const aligned_end_addr = mem.alignForwardGeneric(u64, section.header.size, atom_alignment); + const padding = aligned_end_addr - section.header.size; + section.header.size += padding + atom.size; + section.header.@"align" = @maximum(section.header.@"align", atom.alignment); if (atom.next) |next| { atom = next; } else break; } + + macho_file.sections.set(match, section); } } diff --git a/src/link/MachO/fat.zig b/src/link/MachO/fat.zig index 1511f274a8..7c328c1418 100644 --- a/src/link/MachO/fat.zig +++ b/src/link/MachO/fat.zig @@ -46,7 +46,9 @@ pub fn getLibraryOffset(reader: anytype, cpu_arch: std.Target.Cpu.Arch) !u64 { return fat_arch.offset; } } else { - log.err("Could not find matching cpu architecture in fat library: expected {}", .{cpu_arch}); + log.err("Could not find matching cpu architecture in fat library: expected {s}", .{ + @tagName(cpu_arch), + }); return error.MismatchedCpuArchitecture; } } |
