diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2020-12-22 14:23:55 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-12-22 14:23:55 +0100 |
| commit | 43dbe86226fe89c6364fa0261297f1a9d8eb2a58 (patch) | |
| tree | 44d2b398b48d411ca9d845a952a73837e881ae6b /src | |
| parent | 286077fec8f381c7b4d4d5bf351d963564a1dd69 (diff) | |
| parent | 34663abc9090d7b3afc2bc83d159c1d950b23e1d (diff) | |
| download | zig-43dbe86226fe89c6364fa0261297f1a9d8eb2a58.tar.gz zig-43dbe86226fe89c6364fa0261297f1a9d8eb2a58.zip | |
Merge pull request #7516 from kubkon/macho-better-space-alloc
macho: space preallocation, and various cleanups and fixes
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/MachO.zig | 851 | ||||
| -rw-r--r-- | src/link/MachO/CodeSignature.zig | 6 | ||||
| -rw-r--r-- | src/link/MachO/Trie.zig | 8 | ||||
| -rw-r--r-- | src/link/MachO/commands.zig | 32 |
4 files changed, 575 insertions, 322 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 13704d8839..65dbe4e5f6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -37,6 +37,10 @@ page_size: u16, /// Mach-O header header: ?macho.mach_header_64 = null, +/// We commit 0x1000 = 4096 bytes of space to the header and +/// the table of load commands. This should be plenty for any +/// potential future extensions. +header_pad: u16 = 0x1000, /// Table of all load commands load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, @@ -75,14 +79,11 @@ code_signature_cmd_index: ?u16 = null, /// Index into __TEXT,__text section. text_section_index: ?u16 = null, -/// Index into __TEXT,__got section. +/// Index into __TEXT,__ziggot section. got_section_index: ?u16 = null, /// The absolute address of the entry point. entry_addr: ?u64 = null, -/// TODO move this into each Segment aggregator -linkedit_segment_next_offset: ?u32 = null, - /// Table of all local symbols /// Internally references string table for names (which are optional). local_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, @@ -100,9 +101,7 @@ dyld_stub_binder_index: ?u16 = null, /// Table of symbol names aka the string table. string_table: std.ArrayListUnmanaged(u8) = .{}, -/// Table of symbol vaddr values. The values is the absolute vaddr value. -/// If the vaddr of the executable __TEXT segment vaddr changes, the entire offset -/// table needs to be rewritten. +/// Table of trampolines to the actual symbols in __text section. offset_table: std.ArrayListUnmanaged(u64) = .{}, /// Table of binding info entries. @@ -112,7 +111,13 @@ lazy_binding_info_table: LazyBindingInfoTable = .{}, error_flags: File.ErrorFlags = File.ErrorFlags{}, -cmd_table_dirty: bool = false, +offset_table_count_dirty: bool = false, +header_dirty: bool = false, +load_commands_dirty: bool = false, +binding_info_dirty: bool = false, +lazy_binding_info_dirty: bool = false, +export_info_dirty: bool = false, +string_table_dirty: bool = false, /// A list of text blocks that have surplus capacity. This list can have false /// positives, as functions grow and shrink over time, only sometimes being added @@ -317,10 +322,14 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const main_cmd = &self.load_commands.items[self.main_cmd_index.?].Main; main_cmd.entryoff = addr - text_segment.inner.vmaddr; + self.load_commands_dirty = true; } + try self.writeBindingInfoTable(); + try self.writeLazyBindingInfoTable(); try self.writeExportTrie(); - try self.writeSymbolTable(); + try self.writeAllGlobalAndUndefSymbols(); try self.writeStringTable(); + try self.updateLinkeditSegmentSizes(); if (target.cpu.arch == .aarch64) { // Preallocate space for the code signature. @@ -335,21 +344,24 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { .Lib => return error.TODOImplementWritingLibFiles, } - if (self.cmd_table_dirty) { - try self.writeLoadCommands(); - try self.writeHeader(); - self.cmd_table_dirty = false; - } + try self.writeLoadCommands(); + try self.writeHeader(); if (self.entry_addr == null and self.base.options.output_mode == .Exe) { - log.debug("flushing. no_entry_point_found = true\n", .{}); + log.debug("flushing. no_entry_point_found = true", .{}); self.error_flags.no_entry_point_found = true; } else { - log.debug("flushing. no_entry_point_found = false\n", .{}); + log.debug("flushing. no_entry_point_found = false", .{}); self.error_flags.no_entry_point_found = false; } - assert(!self.cmd_table_dirty); + assert(!self.offset_table_count_dirty); + assert(!self.header_dirty); + assert(!self.load_commands_dirty); + assert(!self.binding_info_dirty); + assert(!self.lazy_binding_info_dirty); + assert(!self.export_info_dirty); + assert(!self.string_table_dirty); if (target.cpu.arch == .aarch64) { switch (output_mode) { @@ -768,9 +780,9 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { const needed_size = @sizeOf(macho.linkedit_data_command) * alloc_num / alloc_den; if (needed_size + after_last_cmd_offset > text_section.offset) { - std.log.err("Unable to extend padding between the end of load commands and start of __text section.", .{}); - std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size}); - std.log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{}); + log.err("Unable to extend padding between the end of load commands and start of __text section.", .{}); + log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size}); + log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{}); return error.NotEnoughPadding; } @@ -806,10 +818,12 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { mem.set(u8, dylib_cmd.data, 0); mem.copy(u8, dylib_cmd.data, mem.spanZ(LIB_SYSTEM_PATH)); try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + self.header_dirty = true; + self.load_commands_dirty = true; if (self.symtab_cmd_index == null or self.dysymtab_cmd_index == null) { - std.log.err("Incomplete Mach-O binary: no LC_SYMTAB or LC_DYSYMTAB load command found!", .{}); - std.log.err("Without the symbol table, it is not possible to patch up the binary for cross-compilation.", .{}); + log.err("Incomplete Mach-O binary: no LC_SYMTAB or LC_DYSYMTAB load command found!", .{}); + log.err("Without the symbol table, it is not possible to patch up the binary for cross-compilation.", .{}); return error.NoSymbolTableFound; } @@ -823,7 +837,7 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { symbol.dylib_ordinal = next_ordinal; } - // Write update dyld info + // Write updated dyld info. const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; { const size = try self.binding_info_table.calcSize(); @@ -853,6 +867,9 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { // Write updated load commands and the header try self.writeLoadCommands(); try self.writeHeader(); + + assert(!self.header_dirty); + assert(!self.load_commands_dirty); } if (self.code_signature_cmd_index == null) outer: { if (target.cpu.arch != .aarch64) break :outer; // This is currently needed only for aarch64 targets. @@ -862,15 +879,12 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { const needed_size = @sizeOf(macho.linkedit_data_command) * alloc_num / alloc_den; if (needed_size + after_last_cmd_offset > text_section.offset) { - std.log.err("Unable to extend padding between the end of load commands and start of __text section.", .{}); - std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size}); - std.log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{}); + log.err("Unable to extend padding between the end of load commands and start of __text section.", .{}); + log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size}); + log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{}); return error.NotEnoughPadding; } - const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - // TODO This is clunky. - self.linkedit_segment_next_offset = @intCast(u32, mem.alignForwardGeneric(u64, linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize, @sizeOf(u64))); // Add code signature load command self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); try self.load_commands.append(self.base.allocator, .{ @@ -881,6 +895,8 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { .datasize = 0, }, }); + self.header_dirty = true; + self.load_commands_dirty = true; // Pad out space for code signature try self.writeCodeSignaturePadding(); @@ -889,6 +905,9 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { try self.writeHeader(); // Generate adhoc code signature try self.writeCodeSignature(); + + assert(!self.header_dirty); + assert(!self.load_commands_dirty); } } } @@ -1002,10 +1021,10 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { try self.offset_table.ensureCapacity(self.base.allocator, self.offset_table.items.len + 1); if (self.local_symbol_free_list.popOrNull()) |i| { - log.debug("reusing symbol index {} for {}\n", .{ i, decl.name }); + log.debug("reusing symbol index {} for {}", .{ i, decl.name }); decl.link.macho.local_sym_index = i; } else { - log.debug("allocating symbol index {} for {}\n", .{ self.local_symbols.items.len, decl.name }); + log.debug("allocating symbol index {} for {}", .{ self.local_symbols.items.len, decl.name }); decl.link.macho.local_sym_index = @intCast(u32, self.local_symbols.items.len); _ = self.local_symbols.addOneAssumeCapacity(); } @@ -1015,6 +1034,7 @@ pub fn allocateDeclIndexes(self: *MachO, decl: *Module.Decl) !void { } else { decl.link.macho.offset_table_index = @intCast(u32, self.offset_table.items.len); _ = self.offset_table.addOneAssumeCapacity(); + self.offset_table_count_dirty = true; } self.local_symbols.items[decl.link.macho.local_sym_index] = .{ @@ -1056,10 +1076,10 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { const need_realloc = code.len > capacity or !mem.isAlignedGeneric(u64, symbol.n_value, required_alignment); if (need_realloc) { const vaddr = try self.growTextBlock(&decl.link.macho, code.len, required_alignment); - log.debug("growing {} from 0x{x} to 0x{x}\n", .{ decl.name, symbol.n_value, vaddr }); + log.debug("growing {} from 0x{x} to 0x{x}", .{ decl.name, symbol.n_value, vaddr }); if (vaddr != symbol.n_value) { symbol.n_value = vaddr; - log.debug(" (writing new offset table entry)\n", .{}); + log.debug(" (writing new offset table entry)", .{}); self.offset_table.items[decl.link.macho.offset_table_index] = vaddr; try self.writeOffsetTableEntry(decl.link.macho.offset_table_index); } @@ -1071,11 +1091,13 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { symbol.n_type = macho.N_SECT; symbol.n_sect = @intCast(u8, self.text_section_index.?) + 1; symbol.n_desc = 0; + + try self.writeLocalSymbol(decl.link.macho.local_sym_index); } else { const decl_name = mem.spanZ(decl.name); const name_str_index = try self.makeString(decl_name); const addr = try self.allocateTextBlock(&decl.link.macho, code.len, required_alignment); - log.debug("allocated text block for {} at 0x{x}\n", .{ decl_name, addr }); + log.debug("allocated text block for {} at 0x{x}", .{ decl_name, addr }); errdefer self.freeTextBlock(&decl.link.macho); symbol.* = .{ @@ -1086,6 +1108,8 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { .n_value = addr, }; self.offset_table.items[decl.link.macho.offset_table_index] = addr; + + try self.writeLocalSymbol(decl.link.macho.local_sym_index); try self.writeOffsetTableEntry(decl.link.macho.offset_table_index); } @@ -1151,7 +1175,6 @@ pub fn updateDeclExports( .Strong => blk: { if (mem.eql(u8, exp.options.name, "_start")) { self.entry_addr = decl_sym.n_value; - self.cmd_table_dirty = true; // TODO This should be handled more granularly instead of invalidating all commands. } break :blk macho.REFERENCE_FLAG_DEFINED; }, @@ -1179,6 +1202,7 @@ pub fn updateDeclExports( const name_str_index = try self.makeString(exp.options.name); const i = if (self.global_symbol_free_list.popOrNull()) |i| i else blk: { _ = self.global_symbols.addOneAssumeCapacity(); + self.export_info_dirty = true; break :blk self.global_symbols.items.len - 1; }; self.global_symbols.items[i] = .{ @@ -1271,6 +1295,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { } header.reserved = 0; self.header = header; + self.header_dirty = true; } if (self.pagezero_segment_cmd_index == null) { self.pagezero_segment_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -1289,107 +1314,117 @@ pub fn populateMissingMetadata(self: *MachO) !void { .flags = 0, }), }); - self.cmd_table_dirty = true; + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.text_segment_cmd_index == null) { self.text_segment_cmd_index = @intCast(u16, self.load_commands.items.len); const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; const initprot = macho.VM_PROT_READ | macho.VM_PROT_EXECUTE; + + const program_code_size_hint = self.base.options.program_code_size_hint; + const offset_table_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; + const ideal_size = self.header_pad + program_code_size_hint + offset_table_size_hint; + const needed_size = mem.alignForwardGeneric(u64, satMul(ideal_size, alloc_num) / alloc_den, self.page_size); + + log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); + try self.load_commands.append(self.base.allocator, .{ .Segment = SegmentCommand.empty(.{ .cmd = macho.LC_SEGMENT_64, .cmdsize = @sizeOf(macho.segment_command_64), .segname = makeStaticString("__TEXT"), .vmaddr = 0x100000000, // always starts at 4GB - .vmsize = 0, + .vmsize = needed_size, .fileoff = 0, - .filesize = 0, + .filesize = needed_size, .maxprot = maxprot, .initprot = initprot, .nsects = 0, .flags = 0, }), }); - self.cmd_table_dirty = true; + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.text_section_index == null) { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; self.text_section_index = @intCast(u16, text_segment.sections.items.len); - const program_code_size_hint = self.base.options.program_code_size_hint; - const file_size = mem.alignForwardGeneric(u64, program_code_size_hint, self.page_size); - const off = @intCast(u32, self.findFreeSpace(file_size, self.page_size)); // TODO maybe findFreeSpace should return u32 directly? + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; + const needed_size = self.base.options.program_code_size_hint; + const off = self.findFreeSpace(text_segment, needed_size, @as(u16, 1) << alignment); - log.debug("found __text section free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); + log.debug("found __text section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - try text_segment.sections.append(self.base.allocator, .{ + try text_segment.addSection(self.base.allocator, .{ .sectname = makeStaticString("__text"), .segname = makeStaticString("__TEXT"), .addr = text_segment.inner.vmaddr + off, - .size = file_size, - .offset = off, - .@"align" = if (self.base.options.target.cpu.arch == .aarch64) 2 else 0, // 2^2 for aarch64, 2^0 for x86_64 + .size = @intCast(u32, needed_size), + .offset = @intCast(u32, off), + .@"align" = alignment, .reloff = 0, .nreloc = 0, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .flags = flags, .reserved1 = 0, .reserved2 = 0, .reserved3 = 0, }); - - text_segment.inner.vmsize = file_size + off; // We add off here since __TEXT segment includes everything prior to __text section. - text_segment.inner.filesize = file_size + off; - text_segment.inner.cmdsize += @sizeOf(macho.section_64); - text_segment.inner.nsects += 1; - self.cmd_table_dirty = true; + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.got_section_index == null) { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const text_section = &text_segment.sections.items[self.text_section_index.?]; self.got_section_index = @intCast(u16, text_segment.sections.items.len); - const file_size = @sizeOf(u64) * self.base.options.symbol_count_hint; - // TODO looking for free space should be done *within* a segment it belongs to - const off = @intCast(u32, text_section.offset + text_section.size); + const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const off = self.findFreeSpace(text_segment, needed_size, @alignOf(u64)); + assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. - log.debug("found __got section free space 0x{x} to 0x{x}\n", .{ off, off + file_size }); + log.debug("found __ziggot section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); - try text_segment.sections.append(self.base.allocator, .{ - .sectname = makeStaticString("__got"), + try text_segment.addSection(self.base.allocator, .{ + .sectname = makeStaticString("__ziggot"), .segname = makeStaticString("__TEXT"), - .addr = text_section.addr + text_section.size, - .size = file_size, - .offset = off, - .@"align" = if (self.base.options.target.cpu.arch == .aarch64) 2 else 0, + .addr = text_segment.inner.vmaddr + off, + .size = needed_size, + .offset = @intCast(u32, off), + .@"align" = @sizeOf(u64), .reloff = 0, .nreloc = 0, - .flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + .flags = flags, .reserved1 = 0, .reserved2 = 0, .reserved3 = 0, }); - - const added_size = mem.alignForwardGeneric(u64, file_size, self.page_size); - text_segment.inner.vmsize += added_size; - text_segment.inner.filesize += added_size; - text_segment.inner.cmdsize += @sizeOf(macho.section_64); - text_segment.inner.nsects += 1; - self.cmd_table_dirty = true; + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.linkedit_segment_cmd_index == null) { self.linkedit_segment_cmd_index = @intCast(u16, self.load_commands.items.len); - const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; const initprot = macho.VM_PROT_READ; - const off = text_segment.inner.fileoff + text_segment.inner.filesize; + const address_and_offset = self.nextSegmentAddressAndOffset(); + + log.debug("found __LINKEDIT segment free space at 0x{x}", .{address_and_offset.offset}); + try self.load_commands.append(self.base.allocator, .{ .Segment = SegmentCommand.empty(.{ .cmd = macho.LC_SEGMENT_64, .cmdsize = @sizeOf(macho.segment_command_64), .segname = makeStaticString("__LINKEDIT"), - .vmaddr = text_segment.inner.vmaddr + text_segment.inner.vmsize, + .vmaddr = address_and_offset.address, .vmsize = 0, - .fileoff = off, + .fileoff = address_and_offset.offset, .filesize = 0, .maxprot = maxprot, .initprot = initprot, @@ -1397,11 +1432,19 @@ pub fn populateMissingMetadata(self: *MachO) !void { .flags = 0, }), }); - self.linkedit_segment_next_offset = @intCast(u32, off); - self.cmd_table_dirty = true; + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.dyld_info_cmd_index == null) { self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); + const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + + // TODO Preallocate rebase, binding, and lazy binding info. + const export_size = 2; + const export_off = self.findFreeSpace(&linkedit_segment, export_size, 1); + + log.debug("found export info free space 0x{x} to 0x{x}", .{ export_off, export_off + export_size }); + try self.load_commands.append(self.base.allocator, .{ .DyldInfoOnly = .{ .cmd = macho.LC_DYLD_INFO_ONLY, @@ -1414,28 +1457,48 @@ pub fn populateMissingMetadata(self: *MachO) !void { .weak_bind_size = 0, .lazy_bind_off = 0, .lazy_bind_size = 0, - .export_off = 0, - .export_size = 0, + .export_off = @intCast(u32, export_off), + .export_size = export_size, }, }); - self.cmd_table_dirty = true; + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.symtab_cmd_index == null) { self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); + const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + + const symtab_size = self.base.options.symbol_count_hint * @sizeOf(macho.nlist_64); + const symtab_off = self.findFreeSpace(&linkedit_segment, symtab_size, @sizeOf(macho.nlist_64)); + + log.debug("found symbol table free space 0x{x} to 0x{x}", .{ symtab_off, symtab_off + symtab_size }); + + try self.string_table.append(self.base.allocator, 0); // Need a null at position 0. + const strtab_size = self.string_table.items.len; + const strtab_off = self.findFreeSpace(&linkedit_segment, strtab_size, 1); + + log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + strtab_size }); + try self.load_commands.append(self.base.allocator, .{ .Symtab = .{ .cmd = macho.LC_SYMTAB, .cmdsize = @sizeOf(macho.symtab_command), - .symoff = 0, - .nsyms = 0, - .stroff = 0, - .strsize = 0, + .symoff = @intCast(u32, symtab_off), + .nsyms = @intCast(u32, self.base.options.symbol_count_hint), + .stroff = @intCast(u32, strtab_off), + .strsize = @intCast(u32, strtab_size), }, }); - self.cmd_table_dirty = true; + try self.writeLocalSymbol(0); + self.header_dirty = true; + self.load_commands_dirty = true; + self.string_table_dirty = true; } if (self.dysymtab_cmd_index == null) { self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); + + // TODO Preallocate space for indirect symbol table. + try self.load_commands.append(self.base.allocator, .{ .Dysymtab = .{ .cmd = macho.LC_DYSYMTAB, @@ -1460,7 +1523,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { .nlocrel = 0, }, }); - self.cmd_table_dirty = true; + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.dylinker_cmd_index == null) { self.dylinker_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -1474,7 +1538,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { mem.set(u8, dylinker_cmd.data, 0); mem.copy(u8, dylinker_cmd.data, mem.spanZ(DEFAULT_DYLD_PATH)); try self.load_commands.append(self.base.allocator, .{ .Dylinker = dylinker_cmd }); - self.cmd_table_dirty = true; + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.libsystem_cmd_index == null) { self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -1496,7 +1561,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { mem.set(u8, dylib_cmd.data, 0); mem.copy(u8, dylib_cmd.data, mem.spanZ(LIB_SYSTEM_PATH)); try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); - self.cmd_table_dirty = true; + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.main_cmd_index == null) { self.main_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -1508,7 +1574,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { .stacksize = 0, }, }); - self.cmd_table_dirty = true; + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.version_min_cmd_index == null) { self.version_min_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -1529,6 +1596,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { .sdk = version, }, }); + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.source_version_cmd_index == null) { self.source_version_cmd_index = @intCast(u16, self.load_commands.items.len); @@ -1539,9 +1608,12 @@ pub fn populateMissingMetadata(self: *MachO) !void { .version = 0x0, }, }); + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.code_signature_cmd_index == null) { self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); + const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; try self.load_commands.append(self.base.allocator, .{ .LinkeditData = .{ .cmd = macho.LC_CODE_SIGNATURE, @@ -1550,6 +1622,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { .datasize = 0, }, }); + self.header_dirty = true; + self.load_commands_dirty = true; } if (self.dyld_stub_binder_index == null) { self.dyld_stub_binder_index = @intCast(u16, self.undef_symbols.items.len); @@ -1631,14 +1705,13 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, const expand_text_section = block_placement == null or block_placement.?.next == null; if (expand_text_section) { - const text_capacity = self.allocatedSize(text_section.offset); const needed_size = (vaddr + new_block_size) - text_section.addr; - assert(needed_size <= text_capacity); // TODO must move the entire text section. + assert(needed_size <= text_segment.inner.filesize); // TODO must move the entire text section. self.last_text_block = text_block; text_section.size = needed_size; - self.cmd_table_dirty = true; // TODO Make more granular. + self.load_commands_dirty = true; // TODO Make more granular. } text_block.size = new_block_size; @@ -1667,7 +1740,7 @@ fn allocateTextBlock(self: *MachO, text_block: *TextBlock, new_block_size: u64, pub fn makeStaticString(comptime bytes: []const u8) [16]u8 { var buf = [_]u8{0} ** 16; if (bytes.len > buf.len) @compileError("string too long; max 16 bytes"); - mem.copy(u8, buf[0..], bytes); + mem.copy(u8, &buf, bytes); return buf; } @@ -1676,6 +1749,7 @@ fn makeString(self: *MachO, bytes: []const u8) !u32 { const result = self.string_table.items.len; self.string_table.appendSliceAssumeCapacity(bytes); self.string_table.appendAssumeCapacity(0); + self.string_table_dirty = true; return @intCast(u32, result); } @@ -1692,103 +1766,196 @@ fn updateString(self: *MachO, old_str_off: u32, new_name: []const u8) !u32 { return self.makeString(new_name); } -fn detectAllocCollision(self: *MachO, start: u64, size: u64) ?u64 { - const hdr_size: u64 = @sizeOf(macho.mach_header_64); - if (start < hdr_size) return hdr_size; - const end = start + satMul(size, alloc_num) / alloc_den; - { - const off = @sizeOf(macho.mach_header_64); - var tight_size: u64 = 0; - for (self.load_commands.items) |cmd| { - tight_size += cmd.cmdsize(); +const NextSegmentAddressAndOffset = struct { + address: u64, + offset: u64, +}; + +fn nextSegmentAddressAndOffset(self: *MachO) NextSegmentAddressAndOffset { + const prev_segment_idx = blk: { + if (self.data_segment_cmd_index) |idx| { + break :blk idx; + } else if (self.text_segment_cmd_index) |idx| { + break :blk idx; + } else { + unreachable; // unhandled LC_SEGMENT_64 load command before __TEXT + } + }; + const prev_segment = self.load_commands.items[prev_segment_idx].Segment; + const address = prev_segment.inner.vmaddr + prev_segment.inner.vmsize; + const offset = prev_segment.inner.fileoff + prev_segment.inner.filesize; + return .{ + .address = address, + .offset = offset, + }; +} + +fn allocatedSize(self: *MachO, segment: *const SegmentCommand, start: u64) u64 { + assert(start > 0); + var min_pos: u64 = std.math.maxInt(u64); + + if (parseAndCmpName(&segment.inner.segname, "__LINKEDIT")) { + assert(segment.sections.items.len == 0); + // __LINKEDIT is a weird segment where sections get their own load commands so we + // special-case it. + if (self.dyld_info_cmd_index) |idx| { + const dyld_info = self.load_commands.items[idx].DyldInfoOnly; + if (dyld_info.rebase_off > start and dyld_info.rebase_off < min_pos) min_pos = dyld_info.rebase_off; + if (dyld_info.bind_off > start and dyld_info.bind_off < min_pos) min_pos = dyld_info.bind_off; + if (dyld_info.weak_bind_off > start and dyld_info.weak_bind_off < min_pos) min_pos = dyld_info.weak_bind_off; + if (dyld_info.lazy_bind_off > start and dyld_info.lazy_bind_off < min_pos) min_pos = dyld_info.lazy_bind_off; + if (dyld_info.export_off > start and dyld_info.export_off < min_pos) min_pos = dyld_info.export_off; } - const increased_size = satMul(tight_size, alloc_num) / alloc_den; - const test_end = off + increased_size; - if (end > off and start < test_end) { - return test_end; + + if (self.function_starts_cmd_index) |idx| { + const fstart = self.load_commands.items[idx].LinkeditData; + if (fstart.dataoff > start and fstart.dataoff < min_pos) min_pos = fstart.dataoff; + } + + if (self.data_in_code_cmd_index) |idx| { + const dic = self.load_commands.items[idx].LinkeditData; + if (dic.dataoff > start and dic.dataoff < min_pos) min_pos = dic.dataoff; } + + if (self.dysymtab_cmd_index) |idx| { + const dysymtab = self.load_commands.items[idx].Dysymtab; + if (dysymtab.indirectsymoff > start and dysymtab.indirectsymoff < min_pos) min_pos = dysymtab.indirectsymoff; + // TODO Handle more dynamic symbol table sections. + } + + if (self.symtab_cmd_index) |idx| { + const symtab = self.load_commands.items[idx].Symtab; + if (symtab.symoff > start and symtab.symoff < min_pos) min_pos = symtab.symoff; + if (symtab.stroff > start and symtab.stroff < min_pos) min_pos = symtab.stroff; + } + } else { + for (segment.sections.items) |section| { + if (section.offset > start and section.offset < min_pos) min_pos = section.offset; + } + } + + return min_pos - start; +} + +inline fn checkForCollision(start: u64, end: u64, off: u64, size: u64) ?u64 { + const increased_size = satMul(size, alloc_num) / alloc_den; + const test_end = off + increased_size; + if (end > off and start < test_end) { + return test_end; } - if (self.text_segment_cmd_index) |text_index| { - const text_segment = self.load_commands.items[text_index].Segment; - for (text_segment.sections.items) |section| { - const increased_size = satMul(section.size, alloc_num) / alloc_den; - const test_end = section.offset + increased_size; - if (end > section.offset and start < test_end) { - return test_end; + return null; +} + +fn detectAllocCollision(self: *MachO, segment: *const SegmentCommand, start: u64, size: u64) ?u64 { + const end = start + satMul(size, alloc_num) / alloc_den; + + if (parseAndCmpName(&segment.inner.segname, "__LINKEDIT")) { + assert(segment.sections.items.len == 0); + // __LINKEDIT is a weird segment where sections get their own load commands so we + // special-case it. + if (self.dyld_info_cmd_index) |idx| outer: { + if (self.load_commands.items.len == idx) break :outer; + const dyld_info = self.load_commands.items[idx].DyldInfoOnly; + if (checkForCollision(start, end, dyld_info.rebase_off, dyld_info.rebase_size)) |pos| { + return pos; + } + // Binding info + if (checkForCollision(start, end, dyld_info.bind_off, dyld_info.bind_size)) |pos| { + return pos; + } + // Weak binding info + if (checkForCollision(start, end, dyld_info.weak_bind_off, dyld_info.weak_bind_size)) |pos| { + return pos; + } + // Lazy binding info + if (checkForCollision(start, end, dyld_info.lazy_bind_off, dyld_info.lazy_bind_size)) |pos| { + return pos; + } + // Export info + if (checkForCollision(start, end, dyld_info.export_off, dyld_info.export_size)) |pos| { + return pos; } } - } - if (self.dyld_info_cmd_index) |dyld_info_index| { - const dyld_info = self.load_commands.items[dyld_info_index].DyldInfoOnly; - const tight_size = dyld_info.export_size; - const increased_size = satMul(tight_size, alloc_num) / alloc_den; - const test_end = dyld_info.export_off + increased_size; - if (end > dyld_info.export_off and start < test_end) { - return test_end; + + if (self.function_starts_cmd_index) |idx| outer: { + if (self.load_commands.items.len == idx) break :outer; + const fstart = self.load_commands.items[idx].LinkeditData; + if (checkForCollision(start, end, fstart.dataoff, fstart.datasize)) |pos| { + return pos; + } } - } - if (self.symtab_cmd_index) |symtab_index| { - const symtab = self.load_commands.items[symtab_index].Symtab; - { - const tight_size = @sizeOf(macho.nlist_64) * symtab.nsyms; - const increased_size = satMul(tight_size, alloc_num) / alloc_den; - const test_end = symtab.symoff + increased_size; - if (end > symtab.symoff and start < test_end) { - return test_end; + + if (self.data_in_code_cmd_index) |idx| outer: { + if (self.load_commands.items.len == idx) break :outer; + const dic = self.load_commands.items[idx].LinkeditData; + if (checkForCollision(start, end, dic.dataoff, dic.datasize)) |pos| { + return pos; } } - { - const increased_size = satMul(symtab.strsize, alloc_num) / alloc_den; - const test_end = symtab.stroff + increased_size; - if (end > symtab.stroff and start < test_end) { - return test_end; + + if (self.dysymtab_cmd_index) |idx| outer: { + if (self.load_commands.items.len == idx) break :outer; + const dysymtab = self.load_commands.items[idx].Dysymtab; + // Indirect symbol table + const nindirectsize = dysymtab.nindirectsyms * @sizeOf(u32); + if (checkForCollision(start, end, dysymtab.indirectsymoff, nindirectsize)) |pos| { + return pos; } + // TODO Handle more dynamic symbol table sections. } - } - return null; -} -fn allocatedSize(self: *MachO, start: u64) u64 { - if (start == 0) - return 0; - var min_pos: u64 = std.math.maxInt(u64); - { - const off = @sizeOf(macho.mach_header_64); - if (off > start and off < min_pos) min_pos = off; - } - if (self.text_segment_cmd_index) |text_index| { - const text_segment = self.load_commands.items[text_index].Segment; - for (text_segment.sections.items) |section| { - if (section.offset <= start) continue; - if (section.offset < min_pos) min_pos = section.offset; + if (self.symtab_cmd_index) |idx| outer: { + if (self.load_commands.items.len == idx) break :outer; + const symtab = self.load_commands.items[idx].Symtab; + // Symbol table + const symsize = symtab.nsyms * @sizeOf(macho.nlist_64); + if (checkForCollision(start, end, symtab.symoff, symsize)) |pos| { + return pos; + } + // String table + if (checkForCollision(start, end, symtab.stroff, symtab.strsize)) |pos| { + return pos; + } + } + } else { + for (segment.sections.items) |section| { + if (checkForCollision(start, end, section.offset, section.size)) |pos| { + return pos; + } } } - if (self.dyld_info_cmd_index) |dyld_info_index| { - const dyld_info = self.load_commands.items[dyld_info_index].DyldInfoOnly; - if (dyld_info.export_off > start and dyld_info.export_off < min_pos) min_pos = dyld_info.export_off; - } - if (self.symtab_cmd_index) |symtab_index| { - const symtab = self.load_commands.items[symtab_index].Symtab; - if (symtab.symoff > start and symtab.symoff < min_pos) min_pos = symtab.symoff; - if (symtab.stroff > start and symtab.stroff < min_pos) min_pos = symtab.stroff; - } - return min_pos - start; + + return null; } -fn findFreeSpace(self: *MachO, object_size: u64, min_alignment: u16) u64 { - var start: u64 = 0; - while (self.detectAllocCollision(start, object_size)) |item_end| { +fn findFreeSpace(self: *MachO, segment: *const SegmentCommand, object_size: u64, min_alignment: u16) u64 { + var start: u64 = if (parseAndCmpName(&segment.inner.segname, "__TEXT")) + self.header_pad + else + segment.inner.fileoff; + while (self.detectAllocCollision(segment, start, object_size)) |item_end| { start = mem.alignForwardGeneric(u64, item_end, min_alignment); } return start; } +/// Saturating multiplication +fn satMul(a: anytype, b: anytype) @TypeOf(a, b) { + const T = @TypeOf(a, b); + return std.math.mul(T, a, b) catch std.math.maxInt(T); +} + fn writeOffsetTableEntry(self: *MachO, index: usize) !void { - const text_semgent = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const sect = &text_semgent.sections.items[self.got_section_index.?]; + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const sect = &text_segment.sections.items[self.got_section_index.?]; const off = sect.offset + @sizeOf(u64) * index; const vmaddr = sect.addr + @sizeOf(u64) * index; + if (self.offset_table_count_dirty) { + // TODO relocate. + self.offset_table_count_dirty = false; + } + var code: [8]u8 = undefined; switch (self.base.options.target.cpu.arch) { .x86_64 => { @@ -1812,75 +1979,114 @@ fn writeOffsetTableEntry(self: *MachO, index: usize) !void { }, else => unreachable, // unsupported target architecture } - log.debug("writing offset table entry 0x{x} at 0x{x}\n", .{ self.offset_table.items[index], off }); + log.debug("writing offset table entry 0x{x} at 0x{x}", .{ self.offset_table.items[index], off }); try self.base.file.?.pwriteAll(&code, off); } -fn writeSymbolTable(self: *MachO) !void { - // TODO workout how we can cache these so that we only overwrite symbols that were updated +fn relocateSymbolTable(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + const nlocals = self.local_symbols.items.len; + const nglobals = self.global_symbols.items.len; + const nundefs = self.undef_symbols.items.len; + const nsyms = nlocals + nglobals + nundefs; + + if (symtab.nsyms < nsyms) { + const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const needed_size = nsyms * @sizeOf(macho.nlist_64); + if (needed_size > self.allocatedSize(&linkedit_segment, symtab.symoff)) { + // Move the entire symbol table to a new location + const new_symoff = self.findFreeSpace(&linkedit_segment, needed_size, @alignOf(macho.nlist_64)); + const existing_size = symtab.nsyms * @sizeOf(macho.nlist_64); + + log.debug("relocating symbol table from 0x{x}-0x{x} to 0x{x}-0x{x}", .{ + symtab.symoff, + symtab.symoff + existing_size, + new_symoff, + new_symoff + existing_size, + }); - const locals_off = self.linkedit_segment_next_offset.?; - const locals_size = self.local_symbols.items.len * @sizeOf(macho.nlist_64); - log.debug("writing local symbols from 0x{x} to 0x{x}\n", .{ locals_off, locals_size + locals_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.local_symbols.items), locals_off); + const amt = try self.base.file.?.copyRangeAll(symtab.symoff, self.base.file.?, new_symoff, existing_size); + if (amt != existing_size) return error.InputOutput; + symtab.symoff = @intCast(u32, new_symoff); + } + symtab.nsyms = @intCast(u32, nsyms); + self.load_commands_dirty = true; + } +} + +fn writeLocalSymbol(self: *MachO, index: usize) !void { + const tracy = trace(@src()); + defer tracy.end(); + try self.relocateSymbolTable(); + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + const off = symtab.symoff + @sizeOf(macho.nlist_64) * index; + log.debug("writing local symbol {} at 0x{x}", .{ index, off }); + try self.base.file.?.pwriteAll(mem.asBytes(&self.local_symbols.items[index]), off); +} + +fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + try self.relocateSymbolTable(); + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + const nlocals = self.local_symbols.items.len; + const nglobals = self.global_symbols.items.len; + const nundefs = self.undef_symbols.items.len; + + const locals_off = symtab.symoff; + const locals_size = nlocals * @sizeOf(macho.nlist_64); const globals_off = locals_off + locals_size; - const globals_size = self.global_symbols.items.len * @sizeOf(macho.nlist_64); - log.debug("writing global symbols from 0x{x} to 0x{x}\n", .{ globals_off, globals_size + globals_off }); + const globals_size = nglobals * @sizeOf(macho.nlist_64); + log.debug("writing global symbols from 0x{x} to 0x{x}", .{ globals_off, globals_size + globals_off }); try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.global_symbols.items), globals_off); const undefs_off = globals_off + globals_size; - const undefs_size = self.undef_symbols.items.len * @sizeOf(macho.nlist_64); - log.debug("writing undef symbols from 0x{x} to 0x{x}\n", .{ undefs_off, undefs_size + undefs_off }); + const undefs_size = nundefs * @sizeOf(macho.nlist_64); + log.debug("writing undef symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undef_symbols.items), undefs_off); - // Update symbol table. - const nlocals = @intCast(u32, self.local_symbols.items.len); - const nglobals = @intCast(u32, self.global_symbols.items.len); - const nundefs = @intCast(u32, self.undef_symbols.items.len); - symtab.symoff = self.linkedit_segment_next_offset.?; - symtab.nsyms = nlocals + nglobals + nundefs; - self.linkedit_segment_next_offset = symtab.symoff + symtab.nsyms * @sizeOf(macho.nlist_64); - // Update dynamic symbol table. const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; - dysymtab.nlocalsym = nlocals; - dysymtab.iextdefsym = nlocals; - dysymtab.nextdefsym = nglobals; - dysymtab.iundefsym = nlocals + nglobals; - dysymtab.nundefsym = nundefs; - - // Advance size of __LINKEDIT segment - const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - linkedit.inner.filesize += symtab.nsyms * @sizeOf(macho.nlist_64); - if (linkedit.inner.vmsize < linkedit.inner.filesize) { - linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size); - } - self.cmd_table_dirty = true; + dysymtab.nlocalsym = @intCast(u32, nlocals); + dysymtab.iextdefsym = @intCast(u32, nlocals); + dysymtab.nextdefsym = @intCast(u32, nglobals); + dysymtab.iundefsym = @intCast(u32, nlocals + nglobals); + dysymtab.nundefsym = @intCast(u32, nundefs); + self.load_commands_dirty = true; } fn writeCodeSignaturePadding(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const code_sig_cmd = &self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; - const fileoff = self.linkedit_segment_next_offset.?; - const datasize = CodeSignature.calcCodeSignaturePadding(self.base.options.emit.?.sub_path, fileoff); - code_sig_cmd.dataoff = fileoff; - code_sig_cmd.datasize = datasize; - - self.linkedit_segment_next_offset = fileoff + datasize; - // Advance size of __LINKEDIT segment - const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - linkedit.inner.filesize += datasize; - if (linkedit.inner.vmsize < linkedit.inner.filesize) { - linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size); - } - log.debug("writing code signature padding from 0x{x} to 0x{x}\n", .{ fileoff, fileoff + datasize }); - // Pad out the space. We need to do this to calculate valid hashes for everything in the file - // except for code signature data. - try self.base.file.?.pwriteAll(&[_]u8{0}, fileoff + datasize - 1); + const fileoff = linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize; + const needed_size = CodeSignature.calcCodeSignaturePadding(self.base.options.emit.?.sub_path, fileoff); + + if (code_sig_cmd.datasize < needed_size) { + code_sig_cmd.dataoff = @intCast(u32, fileoff); + code_sig_cmd.datasize = needed_size; + + // Advance size of __LINKEDIT segment + linkedit_segment.inner.filesize += needed_size; + if (linkedit_segment.inner.vmsize < linkedit_segment.inner.filesize) { + linkedit_segment.inner.vmsize = mem.alignForwardGeneric(u64, linkedit_segment.inner.filesize, self.page_size); + } + log.debug("writing code signature padding from 0x{x} to 0x{x}", .{ fileoff, fileoff + needed_size }); + // Pad out the space. We need to do this to calculate valid hashes for everything in the file + // except for code signature data. + try self.base.file.?.pwriteAll(&[_]u8{0}, fileoff + needed_size - 1); + self.load_commands_dirty = true; + } } fn writeCodeSignature(self: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; const code_sig_cmd = self.load_commands.items[self.code_signature_cmd_index.?].LinkeditData; @@ -1898,14 +2104,18 @@ fn writeCodeSignature(self: *MachO) !void { defer self.base.allocator.free(buffer); code_sig.write(buffer); - log.debug("writing code signature from 0x{x} to 0x{x}\n", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len }); + log.debug("writing code signature from 0x{x} to 0x{x}", .{ code_sig_cmd.dataoff, code_sig_cmd.dataoff + buffer.len }); try self.base.file.?.pwriteAll(buffer, code_sig_cmd.dataoff); } fn writeExportTrie(self: *MachO) !void { + if (!self.export_info_dirty) return; if (self.global_symbols.items.len == 0) return; + const tracy = trace(@src()); + defer tracy.end(); + var trie = Trie.init(self.base.allocator); defer trie.deinit(); @@ -1928,118 +2138,156 @@ fn writeExportTrie(self: *MachO) !void { const nwritten = try trie.write(stream.writer()); assert(nwritten == trie.size); + const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const export_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64))); - dyld_info.export_off = self.linkedit_segment_next_offset.?; - dyld_info.export_size = export_size; + const allocated_size = self.allocatedSize(&linkedit_segment, dyld_info.export_off); + const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); - log.debug("writing export trie from 0x{x} to 0x{x}\n", .{ dyld_info.export_off, dyld_info.export_off + export_size }); - - if (export_size > buffer.len) { - // Pad out to align(8). - try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.export_off + export_size); + if (needed_size > allocated_size) { + dyld_info.export_off = 0; + dyld_info.export_off = @intCast(u32, self.findFreeSpace(&linkedit_segment, needed_size, 1)); } - try self.base.file.?.pwriteAll(buffer, dyld_info.export_off); + dyld_info.export_size = @intCast(u32, needed_size); + log.debug("writing export info from 0x{x} to 0x{x}", .{ dyld_info.export_off, dyld_info.export_off + dyld_info.export_size }); - self.linkedit_segment_next_offset = dyld_info.export_off + dyld_info.export_size; - // Advance size of __LINKEDIT segment - const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - linkedit.inner.filesize += dyld_info.export_size; - if (linkedit.inner.vmsize < linkedit.inner.filesize) { - linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size); - } - self.cmd_table_dirty = true; + try self.base.file.?.pwriteAll(buffer, dyld_info.export_off); + self.load_commands_dirty = true; + self.export_info_dirty = false; } fn writeBindingInfoTable(self: *MachO) !void { - const size = self.binding_info_table.calcSize(); + if (!self.binding_info_dirty) return; + + const tracy = trace(@src()); + defer tracy.end(); + + const size = try self.binding_info_table.calcSize(); var buffer = try self.base.allocator.alloc(u8, size); defer self.base.allocator.free(buffer); var stream = std.io.fixedBufferStream(buffer); try self.binding_info_table.write(stream.writer()); + const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64))); - dyld_info.bind_off = self.linkedit_segment_next_offset.?; - dyld_info.bind_size = bind_size; + const allocated_size = self.allocatedSize(&linkedit_segment, dyld_info.bind_off); + const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); - log.debug("writing binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.bind_off, dyld_info.bind_off + bind_size }); - - if (bind_size > buffer.len) { - // Pad out to align(8). - try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.bind_off + bind_size); + if (needed_size > allocated_size) { + dyld_info.bind_off = 0; + dyld_info.bind_off = @intCast(u32, self.findFreeSpace(&linkedit_segment, needed_size, 1)); } - try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); - self.linkedit_segment_next_offset = dyld_info.bind_off + dyld_info.bind_size; - // Advance size of __LINKEDIT segment - const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - linkedit.inner.filesize += dyld_info.bind_size; - if (linkedit.inner.vmsize < linkedit.inner.filesize) { - linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size); - } - self.cmd_table_dirty = true; + dyld_info.bind_size = @intCast(u32, needed_size); + log.debug("writing binding info from 0x{x} to 0x{x}", .{ dyld_info.bind_off, dyld_info.bind_off + dyld_info.bind_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); + self.load_commands_dirty = true; + self.binding_info_dirty = false; } fn writeLazyBindingInfoTable(self: *MachO) !void { - const size = self.lazy_binding_info_table.calcSize(); + if (!self.lazy_binding_info_dirty) return; + + const size = try self.lazy_binding_info_table.calcSize(); var buffer = try self.base.allocator.alloc(u8, size); defer self.base.allocator.free(buffer); var stream = std.io.fixedBufferStream(buffer); try self.lazy_binding_info_table.write(stream.writer()); + const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64))); - dyld_info.lazy_bind_off = self.linkedit_segment_next_offset.?; - dyld_info.lazy_bind_size = bind_size; - - log.debug("writing lazy binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + bind_size }); + const allocated_size = self.allocatedSize(&linkedit_segment, dyld_info.lazy_bind_off); + const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); - if (bind_size > buffer.len) { - // Pad out to align(8). - try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.lazy_bind_off + bind_size); + if (needed_size > allocated_size) { + dyld_info.lazy_bind_off = 0; + dyld_info.lazy_bind_off = @intCast(u32, self.findFreeSpace(&linkedit_segment, needed_size, 1)); } - try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); - self.linkedit_segment_next_offset = dyld_info.lazy_bind_off + dyld_info.lazy_bind_size; - // Advance size of __LINKEDIT segment - const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - linkedit.inner.filesize += dyld_info.lazy_bind_size; - if (linkedit.inner.vmsize < linkedit.inner.filesize) { - linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size); - } - self.cmd_table_dirty = true; + dyld_info.lazy_bind_size = @intCast(u32, needed_size); + log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); + self.load_commands_dirty = true; + self.lazy_binding_info_dirty = false; } fn writeStringTable(self: *MachO) !void { - const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; - const needed_size = self.string_table.items.len; + if (!self.string_table_dirty) return; - symtab.stroff = self.linkedit_segment_next_offset.?; - symtab.strsize = @intCast(u32, mem.alignForward(needed_size, @sizeOf(u64))); + const tracy = trace(@src()); + defer tracy.end(); - log.debug("writing string table from 0x{x} to 0x{x}\n", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + const allocated_size = self.allocatedSize(&linkedit_segment, symtab.stroff); + const needed_size = mem.alignForwardGeneric(u64, self.string_table.items.len, @alignOf(u64)); - if (symtab.strsize > needed_size) { - // Pad out to align(8); - try self.base.file.?.pwriteAll(&[_]u8{0}, symtab.stroff + symtab.strsize); + if (needed_size > allocated_size) { + symtab.strsize = 0; + symtab.stroff = @intCast(u32, self.findFreeSpace(&linkedit_segment, needed_size, 1)); } + symtab.strsize = @intCast(u32, needed_size); + log.debug("writing string table from 0x{x} to 0x{x}", .{ symtab.stroff, symtab.stroff + symtab.strsize }); + try self.base.file.?.pwriteAll(self.string_table.items, symtab.stroff); + self.load_commands_dirty = true; + self.string_table_dirty = false; +} - self.linkedit_segment_next_offset = symtab.stroff + symtab.strsize; - // Advance size of __LINKEDIT segment - const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - linkedit.inner.filesize += symtab.strsize; - if (linkedit.inner.vmsize < linkedit.inner.filesize) { - linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size); - } - self.cmd_table_dirty = true; +fn updateLinkeditSegmentSizes(self: *MachO) !void { + if (!self.load_commands_dirty) return; + + const tracy = trace(@src()); + defer tracy.end(); + + // Now, we are in position to update __LINKEDIT segment sizes. + // TODO Add checkpointing so that we don't have to do this every single time. + const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + var final_offset = linkedit_segment.inner.fileoff; + + if (self.dyld_info_cmd_index) |idx| { + const dyld_info = self.load_commands.items[idx].DyldInfoOnly; + final_offset = std.math.max(final_offset, dyld_info.rebase_off + dyld_info.rebase_size); + final_offset = std.math.max(final_offset, dyld_info.bind_off + dyld_info.bind_size); + final_offset = std.math.max(final_offset, dyld_info.weak_bind_off + dyld_info.weak_bind_size); + final_offset = std.math.max(final_offset, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size); + final_offset = std.math.max(final_offset, dyld_info.export_off + dyld_info.export_size); + } + if (self.function_starts_cmd_index) |idx| { + const fstart = self.load_commands.items[idx].LinkeditData; + final_offset = std.math.max(final_offset, fstart.dataoff + fstart.datasize); + } + if (self.data_in_code_cmd_index) |idx| { + const dic = self.load_commands.items[idx].LinkeditData; + final_offset = std.math.max(final_offset, dic.dataoff + dic.datasize); + } + if (self.dysymtab_cmd_index) |idx| { + const dysymtab = self.load_commands.items[idx].Dysymtab; + const nindirectsize = dysymtab.nindirectsyms * @sizeOf(u32); + final_offset = std.math.max(final_offset, dysymtab.indirectsymoff + nindirectsize); + // TODO Handle more dynamic symbol table sections. + } + if (self.symtab_cmd_index) |idx| { + const symtab = self.load_commands.items[idx].Symtab; + const symsize = symtab.nsyms * @sizeOf(macho.nlist_64); + final_offset = std.math.max(final_offset, symtab.symoff + symsize); + final_offset = std.math.max(final_offset, symtab.stroff + symtab.strsize); + } + + const filesize = final_offset - linkedit_segment.inner.fileoff; + linkedit_segment.inner.filesize = filesize; + linkedit_segment.inner.vmsize = mem.alignForwardGeneric(u64, filesize, self.page_size); + try self.base.file.?.pwriteAll(&[_]u8{ 0 }, final_offset); + self.load_commands_dirty = true; } /// Writes all load commands and section headers. fn writeLoadCommands(self: *MachO) !void { + if (!self.load_commands_dirty) return; + var sizeofcmds: usize = 0; for (self.load_commands.items) |lc| { sizeofcmds += lc.cmdsize(); @@ -2052,26 +2300,25 @@ fn writeLoadCommands(self: *MachO) !void { try lc.write(writer); } - try self.base.file.?.pwriteAll(buffer, @sizeOf(macho.mach_header_64)); + const off = @sizeOf(macho.mach_header_64); + log.debug("writing {} load commands from 0x{x} to 0x{x}", .{self.load_commands.items.len, off, off + sizeofcmds}); + try self.base.file.?.pwriteAll(buffer, off); + self.load_commands_dirty = false; } /// Writes Mach-O file header. fn writeHeader(self: *MachO) !void { + if (!self.header_dirty) return; + self.header.?.ncmds = @intCast(u32, self.load_commands.items.len); var sizeofcmds: u32 = 0; for (self.load_commands.items) |cmd| { sizeofcmds += cmd.cmdsize(); } self.header.?.sizeofcmds = sizeofcmds; - log.debug("writing Mach-O header {}\n", .{self.header.?}); - const slice = [1]macho.mach_header_64{self.header.?}; - try self.base.file.?.pwriteAll(mem.sliceAsBytes(slice[0..1]), 0); -} - -/// Saturating multiplication -fn satMul(a: anytype, b: anytype) @TypeOf(a, b) { - const T = @TypeOf(a, b); - return std.math.mul(T, a, b) catch std.math.maxInt(T); + log.debug("writing Mach-O header {}", .{self.header.?}); + try self.base.file.?.pwriteAll(mem.asBytes(&self.header.?), 0); + self.header_dirty = false; } /// Parse MachO contents from existing binary file. @@ -2088,18 +2335,18 @@ fn parseFromFile(self: *MachO, file: fs.File) !void { switch (cmd.cmd()) { macho.LC_SEGMENT_64 => { const x = cmd.Segment; - if (parseAndCmpName(x.inner.segname[0..], "__PAGEZERO")) { + if (parseAndCmpName(&x.inner.segname, "__PAGEZERO")) { self.pagezero_segment_cmd_index = i; - } else if (parseAndCmpName(x.inner.segname[0..], "__LINKEDIT")) { + } else if (parseAndCmpName(&x.inner.segname, "__LINKEDIT")) { self.linkedit_segment_cmd_index = i; - } else if (parseAndCmpName(x.inner.segname[0..], "__TEXT")) { + } else if (parseAndCmpName(&x.inner.segname, "__TEXT")) { self.text_segment_cmd_index = i; for (x.sections.items) |sect, j| { - if (parseAndCmpName(sect.sectname[0..], "__text")) { + if (parseAndCmpName(§.sectname, "__text")) { self.text_section_index = @intCast(u16, j); } } - } else if (parseAndCmpName(x.inner.segname[0..], "__DATA")) { + } else if (parseAndCmpName(&x.inner.segname, "__DATA")) { self.data_segment_cmd_index = i; } }, @@ -2140,7 +2387,7 @@ fn parseFromFile(self: *MachO, file: fs.File) !void { self.code_signature_cmd_index = i; }, else => { - std.log.warn("Unknown load command detected: 0x{x}.", .{cmd.cmd()}); + log.warn("Unknown load command detected: 0x{x}.", .{cmd.cmd()}); }, } self.load_commands.appendAssumeCapacity(cmd); @@ -2149,7 +2396,7 @@ fn parseFromFile(self: *MachO, file: fs.File) !void { } fn parseAndCmpName(name: []const u8, needle: []const u8) bool { - const len = mem.indexOfScalar(u8, name[0..], @as(u8, 0)) orelse name.len; + const len = mem.indexOfScalar(u8, name, @as(u8, 0)) orelse name.len; return mem.eql(u8, name[0..len], needle); } diff --git a/src/link/MachO/CodeSignature.zig b/src/link/MachO/CodeSignature.zig index 442132dac1..0d8f307a4e 100644 --- a/src/link/MachO/CodeSignature.zig +++ b/src/link/MachO/CodeSignature.zig @@ -126,7 +126,7 @@ pub fn calcAdhocSignature( Sha256.hash(buffer[0..fsize], &hash, .{}); - cdir.data.appendSliceAssumeCapacity(hash[0..]); + cdir.data.appendSliceAssumeCapacity(&hash); cdir.inner.nCodeSlots += 1; } @@ -174,10 +174,10 @@ test "CodeSignature header" { defer code_sig.deinit(); var buffer: [@sizeOf(macho.SuperBlob)]u8 = undefined; - code_sig.writeHeader(buffer[0..]); + code_sig.writeHeader(&buffer); const expected = &[_]u8{ 0xfa, 0xde, 0x0c, 0xc0, 0x0, 0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0 }; - testing.expect(mem.eql(u8, expected[0..], buffer[0..])); + testing.expect(mem.eql(u8, expected, &buffer)); } pub fn calcCodeSignaturePadding(id: []const u8, file_size: u64) u32 { diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index e445ea4fef..858b6aad60 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -531,14 +531,14 @@ test "write Trie to a byte stream" { { const nwritten = try trie.write(stream.writer()); testing.expect(nwritten == trie.size); - testing.expect(mem.eql(u8, buffer, exp_buffer[0..])); + testing.expect(mem.eql(u8, buffer, &exp_buffer)); } { // Writing finalized trie again should yield the same result. try stream.seekTo(0); const nwritten = try trie.write(stream.writer()); testing.expect(nwritten == trie.size); - testing.expect(mem.eql(u8, buffer, exp_buffer[0..])); + testing.expect(mem.eql(u8, buffer, &exp_buffer)); } } @@ -556,7 +556,7 @@ test "parse Trie from byte stream" { 0x3, 0x0, 0x80, 0x20, 0x0, // terminal node }; - var in_stream = std.io.fixedBufferStream(in_buffer[0..]); + var in_stream = std.io.fixedBufferStream(&in_buffer); var trie = Trie.init(gpa); defer trie.deinit(); const nread = try trie.read(in_stream.reader()); @@ -571,5 +571,5 @@ test "parse Trie from byte stream" { const nwritten = try trie.write(out_stream.writer()); testing.expect(nwritten == trie.size); - testing.expect(mem.eql(u8, in_buffer[0..], out_buffer)); + testing.expect(mem.eql(u8, &in_buffer, out_buffer)); } diff --git a/src/link/MachO/commands.zig b/src/link/MachO/commands.zig index bc1e5fab5f..2fa1f867f5 100644 --- a/src/link/MachO/commands.zig +++ b/src/link/MachO/commands.zig @@ -7,7 +7,7 @@ const macho = std.macho; const testing = std.testing; const Allocator = std.mem.Allocator; -const makeName = @import("../MachO.zig").makeStaticString; +const makeStaticString = @import("../MachO.zig").makeStaticString; pub const LoadCommand = union(enum) { Segment: SegmentCommand, @@ -26,9 +26,9 @@ pub const LoadCommand = union(enum) { const header = try reader.readStruct(macho.load_command); var buffer = try allocator.alloc(u8, header.cmdsize); defer allocator.free(buffer); - mem.copy(u8, buffer[0..], mem.asBytes(&header)); + mem.copy(u8, buffer, mem.asBytes(&header)); try reader.readNoEof(buffer[@sizeOf(macho.load_command)..]); - var stream = io.fixedBufferStream(buffer[0..]); + var stream = io.fixedBufferStream(buffer); return switch (header.cmd) { macho.LC_SEGMENT_64 => LoadCommand{ @@ -155,6 +155,12 @@ pub const SegmentCommand = struct { return .{ .inner = inner }; } + pub fn addSection(self: *SegmentCommand, alloc: *Allocator, section: macho.section_64) !void { + try self.sections.append(alloc, section); + self.inner.cmdsize += @sizeOf(macho.section_64); + self.inner.nsects += 1; + } + pub fn read(alloc: *Allocator, reader: anytype) !SegmentCommand { const inner = try reader.readStruct(macho.segment_command_64); var segment = SegmentCommand{ @@ -210,7 +216,7 @@ pub fn GenericCommandWithData(comptime Cmd: type) type { const inner = try reader.readStruct(Cmd); var data = try allocator.alloc(u8, inner.cmdsize - @sizeOf(Cmd)); errdefer allocator.free(data); - try reader.readNoEof(data[0..]); + try reader.readNoEof(data); return Self{ .inner = inner, .data = data, @@ -277,7 +283,7 @@ test "read-write segment command" { .inner = .{ .cmd = macho.LC_SEGMENT_64, .cmdsize = 152, - .segname = makeName("__TEXT"), + .segname = makeStaticString("__TEXT"), .vmaddr = 4294967296, .vmsize = 294912, .fileoff = 0, @@ -289,8 +295,8 @@ test "read-write segment command" { }, }; try cmd.sections.append(gpa, .{ - .sectname = makeName("__text"), - .segname = makeName("__TEXT"), + .sectname = makeStaticString("__text"), + .segname = makeStaticString("__TEXT"), .addr = 4294983680, .size = 448, .offset = 16384, @@ -303,10 +309,10 @@ test "read-write segment command" { .reserved3 = 0, }); defer cmd.deinit(gpa); - try testRead(gpa, in_buffer[0..], LoadCommand{ .Segment = cmd }); + try testRead(gpa, in_buffer, LoadCommand{ .Segment = cmd }); var out_buffer: [in_buffer.len]u8 = undefined; - try testWrite(out_buffer[0..], LoadCommand{ .Segment = cmd }, in_buffer[0..]); + try testWrite(&out_buffer, LoadCommand{ .Segment = cmd }, in_buffer); } test "read-write generic command with data" { @@ -342,10 +348,10 @@ test "read-write generic command with data" { cmd.data[5] = 0x0; cmd.data[6] = 0x0; cmd.data[7] = 0x0; - try testRead(gpa, in_buffer[0..], LoadCommand{ .Dylib = cmd }); + try testRead(gpa, in_buffer, LoadCommand{ .Dylib = cmd }); var out_buffer: [in_buffer.len]u8 = undefined; - try testWrite(out_buffer[0..], LoadCommand{ .Dylib = cmd }, in_buffer[0..]); + try testWrite(&out_buffer, LoadCommand{ .Dylib = cmd }, in_buffer); } test "read-write C struct command" { @@ -362,8 +368,8 @@ test "read-write C struct command" { .entryoff = 16644, .stacksize = 0, }; - try testRead(gpa, in_buffer[0..], LoadCommand{ .Main = cmd }); + try testRead(gpa, in_buffer, LoadCommand{ .Main = cmd }); var out_buffer: [in_buffer.len]u8 = undefined; - try testWrite(out_buffer[0..], LoadCommand{ .Main = cmd }, in_buffer[0..]); + try testWrite(&out_buffer, LoadCommand{ .Main = cmd }, in_buffer); } |
