diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2021-01-13 15:11:35 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-01-13 15:11:35 -0800 |
| commit | ec1541de26137cf5d1e353023adbf92e1be8ef16 (patch) | |
| tree | 8477de646121c223dad7cf30c6c90bfda92fb89c /src/link | |
| parent | 5487dd13ea23ad7e547995b9a088ba37bfe17737 (diff) | |
| parent | 212814932578f5916bff2dd04d501e1d30be740c (diff) | |
| download | zig-ec1541de26137cf5d1e353023adbf92e1be8ef16.tar.gz zig-ec1541de26137cf5d1e353023adbf92e1be8ef16.zip | |
Merge pull request #7746 from kubkon/macho-extern-fn
macho: extern functions come to MachO!
Diffstat (limited to 'src/link')
| -rw-r--r-- | src/link/MachO.zig | 867 | ||||
| -rw-r--r-- | src/link/MachO/DebugSymbols.zig | 11 | ||||
| -rw-r--r-- | src/link/MachO/imports.zig | 393 |
3 files changed, 881 insertions, 390 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index dbc982f5e2..27c395da95 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -52,6 +52,8 @@ load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, pagezero_segment_cmd_index: ?u16 = null, /// __TEXT segment text_segment_cmd_index: ?u16 = null, +/// __DATA_CONST segment +data_const_segment_cmd_index: ?u16 = null, /// __DATA segment data_segment_cmd_index: ?u16 = null, /// __LINKEDIT segment @@ -87,22 +89,34 @@ code_signature_cmd_index: ?u16 = null, text_section_index: ?u16 = null, /// Index into __TEXT,__ziggot section. got_section_index: ?u16 = null, +/// Index into __TEXT,__stubs section. +stubs_section_index: ?u16 = null, +/// Index into __TEXT,__stub_helper section. +stub_helper_section_index: ?u16 = null, +/// Index into __DATA_CONST,__got section. +data_got_section_index: ?u16 = null, +/// Index into __DATA,__la_symbol_ptr section. +la_symbol_ptr_section_index: ?u16 = null, +/// Index into __DATA,__data section. +data_section_index: ?u16 = null, /// The absolute address of the entry point. entry_addr: ?u64 = null, /// Table of all local symbols /// Internally references string table for names (which are optional). local_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -/// Table of all defined global symbols +/// Table of all global symbols global_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -/// Table of all undefined symbols -undef_symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, +/// Table of all extern nonlazy symbols, indexed by name. +extern_nonlazy_symbols: std.StringArrayHashMapUnmanaged(ExternSymbol) = .{}, +/// Table of all extern lazy symbols, indexed by name. +extern_lazy_symbols: std.StringArrayHashMapUnmanaged(ExternSymbol) = .{}, local_symbol_free_list: std.ArrayListUnmanaged(u32) = .{}, global_symbol_free_list: std.ArrayListUnmanaged(u32) = .{}, offset_table_free_list: std.ArrayListUnmanaged(u32) = .{}, -dyld_stub_binder_index: ?u16 = null, +stub_helper_stubs_start_off: ?u64 = null, /// Table of symbol names aka the string table. string_table: std.ArrayListUnmanaged(u8) = .{}, @@ -110,16 +124,12 @@ string_table: std.ArrayListUnmanaged(u8) = .{}, /// Table of trampolines to the actual symbols in __text section. offset_table: std.ArrayListUnmanaged(u64) = .{}, -/// Table of binding info entries. -binding_info_table: BindingInfoTable = .{}, -/// Table of lazy binding info entries. -lazy_binding_info_table: LazyBindingInfoTable = .{}, - error_flags: File.ErrorFlags = File.ErrorFlags{}, offset_table_count_dirty: bool = false, header_dirty: bool = false, load_commands_dirty: bool = false, +rebase_info_dirty: bool = false, binding_info_dirty: bool = false, lazy_binding_info_dirty: bool = false, export_info_dirty: bool = false, @@ -149,6 +159,12 @@ last_text_block: ?*TextBlock = null, /// prior to calling `generateSymbol`, and then immediately deallocated /// rather than sitting in the global scope. pie_fixups: std.ArrayListUnmanaged(PieFixup) = .{}, +/// A list of all stub (extern decls) fixups required for this run of the linker. +/// Warning, this is currently NOT thread-safe. See the TODO below. +/// TODO Move this list inside `updateDecl` where it should be allocated +/// prior to calling `generateSymbol`, and then immediately deallocated +/// rather than sitting in the global scope. +stub_fixups: std.ArrayListUnmanaged(StubFixup) = .{}, pub const PieFixup = struct { /// Target address we wanted to address in absolute terms. @@ -160,6 +176,19 @@ pub const PieFixup = struct { len: usize, }; +pub const StubFixup = struct { + /// Id of extern (lazy) symbol. + symbol: u32, + /// Signals whether the symbol has already been declared before. If so, + /// then there is no need to rewrite the stub entry and related. + already_defined: bool, + /// Where in the byte stream we should perform the fixup. + start: usize, + /// The length of the byte stream. For x86_64, this will be + /// variable. For aarch64, it will be fixed at 4 bytes. + len: usize, +}; + /// `alloc_num / alloc_den` is the factor of padding when allocating. pub const alloc_num = 4; pub const alloc_den = 3; @@ -379,10 +408,12 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { main_cmd.entryoff = addr - text_segment.inner.vmaddr; self.load_commands_dirty = true; } + try self.writeRebaseInfoTable(); try self.writeBindingInfoTable(); try self.writeLazyBindingInfoTable(); try self.writeExportTrie(); try self.writeAllGlobalAndUndefSymbols(); + try self.writeIndirectSymbolTable(); try self.writeStringTable(); try self.updateLinkeditSegmentSizes(); @@ -418,6 +449,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { assert(!self.offset_table_count_dirty); assert(!self.header_dirty); assert(!self.load_commands_dirty); + assert(!self.rebase_info_dirty); assert(!self.binding_info_dirty); assert(!self.lazy_binding_info_dirty); assert(!self.export_info_dirty); @@ -891,42 +923,9 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { return error.NoSymbolTableFound; } - // Parse dyld info - try self.parseBindingInfoTable(); - try self.parseLazyBindingInfoTable(); - - // Update the dylib ordinals. - self.binding_info_table.dylib_ordinal = next_ordinal; - for (self.lazy_binding_info_table.symbols.items) |*symbol| { - symbol.dylib_ordinal = next_ordinal; - } - - // Write updated dyld info. - const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; - { - const size = try self.binding_info_table.calcSize(); - assert(dyld_info.bind_size >= size); - - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try self.binding_info_table.write(stream.writer()); - - try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); - } - { - const size = try self.lazy_binding_info_table.calcSize(); - assert(dyld_info.lazy_bind_size >= size); - - var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); - defer self.base.allocator.free(buffer); - - var stream = std.io.fixedBufferStream(buffer); - try self.lazy_binding_info_table.write(stream.writer()); - - try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); - } + // Patch dyld info + try self.fixupBindInfo(next_ordinal); + try self.fixupLazyBindInfo(next_ordinal); // Write updated load commands and the header try self.writeLoadCommands(); @@ -1008,14 +1007,20 @@ pub fn deinit(self: *MachO) void { if (self.d_sym) |*ds| { ds.deinit(self.base.allocator); } - self.binding_info_table.deinit(self.base.allocator); - self.lazy_binding_info_table.deinit(self.base.allocator); + for (self.extern_lazy_symbols.items()) |*entry| { + entry.value.deinit(self.base.allocator); + } + self.extern_lazy_symbols.deinit(self.base.allocator); + for (self.extern_nonlazy_symbols.items()) |*entry| { + entry.value.deinit(self.base.allocator); + } + self.extern_nonlazy_symbols.deinit(self.base.allocator); self.pie_fixups.deinit(self.base.allocator); + self.stub_fixups.deinit(self.base.allocator); self.text_block_free_list.deinit(self.base.allocator); self.offset_table.deinit(self.base.allocator); self.offset_table_free_list.deinit(self.base.allocator); self.string_table.deinit(self.base.allocator); - self.undef_symbols.deinit(self.base.allocator); self.global_symbols.deinit(self.base.allocator); self.global_symbol_free_list.deinit(self.base.allocator); self.local_symbols.deinit(self.base.allocator); @@ -1211,7 +1216,7 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { } // Perform PIE fixups (if any) - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; const got_section = text_segment.sections.items[self.got_section_index.?]; while (self.pie_fixups.popOrNull()) |fixup| { const target_addr = fixup.address; @@ -1231,6 +1236,38 @@ pub fn updateDecl(self: *MachO, module: *Module, decl: *Module.Decl) !void { } } + // Resolve stubs (if any) + const stubs = text_segment.sections.items[self.stubs_section_index.?]; + for (self.stub_fixups.items) |fixup| { + const stub_addr = stubs.addr + fixup.symbol * stubs.reserved2; + const text_addr = symbol.n_value + fixup.start; + switch (self.base.options.target.cpu.arch) { + .x86_64 => { + const displacement = @intCast(u32, stub_addr - text_addr - fixup.len); + var placeholder = code_buffer.items[fixup.start + fixup.len - @sizeOf(u32) ..][0..@sizeOf(u32)]; + mem.writeIntSliceLittle(u32, placeholder, displacement); + }, + .aarch64 => { + const displacement = @intCast(u32, stub_addr - text_addr); + var placeholder = code_buffer.items[fixup.start..][0..fixup.len]; + mem.writeIntSliceLittle(u32, placeholder, aarch64.Instruction.bl(@intCast(i28, displacement)).toU32()); + }, + else => unreachable, // unsupported target architecture + } + if (!fixup.already_defined) { + try self.writeStub(fixup.symbol); + try self.writeStubInStubHelper(fixup.symbol); + try self.writeLazySymbolPointer(fixup.symbol); + + const extern_sym = &self.extern_lazy_symbols.items()[fixup.symbol].value; + extern_sym.segment = self.data_segment_cmd_index.?; + extern_sym.offset = fixup.symbol * @sizeOf(u64); + self.rebase_info_dirty = true; + self.lazy_binding_info_dirty = true; + } + } + self.stub_fixups.shrinkRetainingCapacity(0); + const text_section = text_segment.sections.items[self.text_section_index.?]; const section_offset = symbol.n_value - text_section.addr; const file_offset = text_section.offset + section_offset; @@ -1435,7 +1472,7 @@ pub fn populateMissingMetadata(self: *MachO) !void { const program_code_size_hint = self.base.options.program_code_size_hint; const offset_table_size_hint = @sizeOf(u64) * self.base.options.symbol_count_hint; - const ideal_size = self.header_pad + program_code_size_hint + offset_table_size_hint; + const ideal_size = self.header_pad + program_code_size_hint + 3 * offset_table_size_hint; const needed_size = mem.alignForwardGeneric(u64, satMul(ideal_size, alloc_num) / alloc_den, self.page_size); log.debug("found __TEXT segment free space 0x{x} to 0x{x}", .{ 0, needed_size }); @@ -1492,9 +1529,13 @@ pub fn populateMissingMetadata(self: *MachO) !void { } if (self.got_section_index == null) { const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_section = &text_segment.sections.items[self.text_section_index.?]; self.got_section_index = @intCast(u16, text_segment.sections.items.len); + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); @@ -1508,7 +1549,220 @@ pub fn populateMissingMetadata(self: *MachO) !void { .addr = text_segment.inner.vmaddr + off, .size = needed_size, .offset = @intCast(u32, off), - .@"align" = 3, // 2^@sizeOf(u64) + .@"align" = alignment, + .reloff = 0, + .nreloc = 0, + .flags = flags, + .reserved1 = 0, + .reserved2 = 0, + .reserved3 = 0, + }); + self.header_dirty = true; + self.load_commands_dirty = true; + } + if (self.stubs_section_index == null) { + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + self.stubs_section_index = @intCast(u16, text_segment.sections.items.len); + + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 6, + .aarch64 => 2 * @sizeOf(u32), + else => unreachable, // unhandled architecture type + }; + const flags = macho.S_SYMBOL_STUBS | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); + assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. + + log.debug("found __stubs section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + + try text_segment.addSection(self.base.allocator, .{ + .sectname = makeStaticString("__stubs"), + .segname = makeStaticString("__TEXT"), + .addr = text_segment.inner.vmaddr + off, + .size = needed_size, + .offset = @intCast(u32, off), + .@"align" = alignment, + .reloff = 0, + .nreloc = 0, + .flags = flags, + .reserved1 = 0, + .reserved2 = stub_size, + .reserved3 = 0, + }); + self.header_dirty = true; + self.load_commands_dirty = true; + } + if (self.stub_helper_section_index == null) { + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + self.stub_helper_section_index = @intCast(u16, text_segment.sections.items.len); + + const alignment: u2 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, // unhandled architecture type + }; + const flags = macho.S_REGULAR | macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS; + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const off = text_segment.findFreeSpace(needed_size, @alignOf(u64), self.header_pad); + assert(off + needed_size <= text_segment.inner.fileoff + text_segment.inner.filesize); // TODO Must expand __TEXT segment. + + log.debug("found __stub_helper section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + + try text_segment.addSection(self.base.allocator, .{ + .sectname = makeStaticString("__stub_helper"), + .segname = makeStaticString("__TEXT"), + .addr = text_segment.inner.vmaddr + off, + .size = needed_size, + .offset = @intCast(u32, off), + .@"align" = alignment, + .reloff = 0, + .nreloc = 0, + .flags = flags, + .reserved1 = 0, + .reserved2 = 0, + .reserved3 = 0, + }); + self.header_dirty = true; + self.load_commands_dirty = true; + } + if (self.data_const_segment_cmd_index == null) { + self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; + const initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE; + const address_and_offset = self.nextSegmentAddressAndOffset(); + + const ideal_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const needed_size = mem.alignForwardGeneric(u64, satMul(ideal_size, alloc_num) / alloc_den, self.page_size); + + log.debug("found __DATA_CONST segment free space 0x{x} to 0x{x}", .{ address_and_offset.offset, address_and_offset.offset + needed_size }); + + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty(.{ + .cmd = macho.LC_SEGMENT_64, + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__DATA_CONST"), + .vmaddr = address_and_offset.address, + .vmsize = needed_size, + .fileoff = address_and_offset.offset, + .filesize = needed_size, + .maxprot = maxprot, + .initprot = initprot, + .nsects = 0, + .flags = 0, + }), + }); + self.header_dirty = true; + self.load_commands_dirty = true; + } + if (self.data_got_section_index == null) { + const dc_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + self.data_got_section_index = @intCast(u16, dc_segment.sections.items.len); + + const flags = macho.S_NON_LAZY_SYMBOL_POINTERS; + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const off = dc_segment.findFreeSpace(needed_size, @alignOf(u64), null); + assert(off + needed_size <= dc_segment.inner.fileoff + dc_segment.inner.filesize); // TODO Must expand __DATA_CONST segment. + + log.debug("found __got section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + + try dc_segment.addSection(self.base.allocator, .{ + .sectname = makeStaticString("__got"), + .segname = makeStaticString("__DATA_CONST"), + .addr = dc_segment.inner.vmaddr + off - dc_segment.inner.fileoff, + .size = needed_size, + .offset = @intCast(u32, off), + .@"align" = 3, // 2^3 = @sizeOf(u64) + .reloff = 0, + .nreloc = 0, + .flags = flags, + .reserved1 = 0, + .reserved2 = 0, + .reserved3 = 0, + }); + self.header_dirty = true; + self.load_commands_dirty = true; + } + if (self.data_segment_cmd_index == null) { + self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const maxprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE | macho.VM_PROT_EXECUTE; + const initprot = macho.VM_PROT_READ | macho.VM_PROT_WRITE; + const address_and_offset = self.nextSegmentAddressAndOffset(); + + const ideal_size = 2 * @sizeOf(u64) * self.base.options.symbol_count_hint; + const needed_size = mem.alignForwardGeneric(u64, satMul(ideal_size, alloc_num) / alloc_den, self.page_size); + + log.debug("found __DATA segment free space 0x{x} to 0x{x}", .{ address_and_offset.offset, address_and_offset.offset + needed_size }); + + try self.load_commands.append(self.base.allocator, .{ + .Segment = SegmentCommand.empty(.{ + .cmd = macho.LC_SEGMENT_64, + .cmdsize = @sizeOf(macho.segment_command_64), + .segname = makeStaticString("__DATA"), + .vmaddr = address_and_offset.address, + .vmsize = needed_size, + .fileoff = address_and_offset.offset, + .filesize = needed_size, + .maxprot = maxprot, + .initprot = initprot, + .nsects = 0, + .flags = 0, + }), + }); + self.header_dirty = true; + self.load_commands_dirty = true; + } + if (self.la_symbol_ptr_section_index == null) { + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.la_symbol_ptr_section_index = @intCast(u16, data_segment.sections.items.len); + + const flags = macho.S_LAZY_SYMBOL_POINTERS; + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); + assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. + + log.debug("found __la_symbol_ptr section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + + try data_segment.addSection(self.base.allocator, .{ + .sectname = makeStaticString("__la_symbol_ptr"), + .segname = makeStaticString("__DATA"), + .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, + .size = needed_size, + .offset = @intCast(u32, off), + .@"align" = 3, // 2^3 = @sizeOf(u64) + .reloff = 0, + .nreloc = 0, + .flags = flags, + .reserved1 = 0, + .reserved2 = 0, + .reserved3 = 0, + }); + self.header_dirty = true; + self.load_commands_dirty = true; + } + if (self.data_section_index == null) { + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + self.data_section_index = @intCast(u16, data_segment.sections.items.len); + + const flags = macho.S_REGULAR; + const needed_size = @sizeOf(u64) * self.base.options.symbol_count_hint; + const off = data_segment.findFreeSpace(needed_size, @alignOf(u64), null); + assert(off + needed_size <= data_segment.inner.fileoff + data_segment.inner.filesize); // TODO Must expand __DATA segment. + + log.debug("found __data section free space 0x{x} to 0x{x}", .{ off, off + needed_size }); + + try data_segment.addSection(self.base.allocator, .{ + .sectname = makeStaticString("__data"), + .segname = makeStaticString("__DATA"), + .addr = data_segment.inner.vmaddr + off - data_segment.inner.fileoff, + .size = needed_size, + .offset = @intCast(u32, off), + .@"align" = 3, // 2^3 = @sizeOf(u64) .reloff = 0, .nreloc = 0, .flags = flags, @@ -1549,12 +1803,6 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (self.dyld_info_cmd_index == null) { self.dyld_info_cmd_index = @intCast(u16, self.load_commands.items.len); - // TODO Preallocate rebase, binding, and lazy binding info. - const export_size = 2; - const export_off = self.findFreeSpaceLinkedit(export_size, 1); - - log.debug("found export info free space 0x{x} to 0x{x}", .{ export_off, export_off + export_size }); - try self.load_commands.append(self.base.allocator, .{ .DyldInfoOnly = .{ .cmd = macho.LC_DYLD_INFO_ONLY, @@ -1567,37 +1815,67 @@ pub fn populateMissingMetadata(self: *MachO) !void { .weak_bind_size = 0, .lazy_bind_off = 0, .lazy_bind_size = 0, - .export_off = @intCast(u32, export_off), - .export_size = export_size, + .export_off = 0, + .export_size = 0, }, }); + + const dyld = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + + // Preallocate rebase, binding, lazy binding info, and export info. + const expected_size = 48; // TODO This is totally random. + const rebase_off = self.findFreeSpaceLinkedit(expected_size, 1); + log.debug("found rebase info free space 0x{x} to 0x{x}", .{ rebase_off, rebase_off + expected_size }); + dyld.rebase_off = @intCast(u32, rebase_off); + dyld.rebase_size = expected_size; + + const bind_off = self.findFreeSpaceLinkedit(expected_size, 1); + log.debug("found binding info free space 0x{x} to 0x{x}", .{ bind_off, bind_off + expected_size }); + dyld.bind_off = @intCast(u32, bind_off); + dyld.bind_size = expected_size; + + const lazy_bind_off = self.findFreeSpaceLinkedit(expected_size, 1); + log.debug("found lazy binding info free space 0x{x} to 0x{x}", .{ lazy_bind_off, lazy_bind_off + expected_size }); + dyld.lazy_bind_off = @intCast(u32, lazy_bind_off); + dyld.lazy_bind_size = expected_size; + + const export_off = self.findFreeSpaceLinkedit(expected_size, 1); + log.debug("found export info free space 0x{x} to 0x{x}", .{ export_off, export_off + expected_size }); + dyld.export_off = @intCast(u32, export_off); + dyld.export_size = expected_size; + self.header_dirty = true; self.load_commands_dirty = true; } if (self.symtab_cmd_index == null) { self.symtab_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .Symtab = .{ + .cmd = macho.LC_SYMTAB, + .cmdsize = @sizeOf(macho.symtab_command), + .symoff = 0, + .nsyms = 0, + .stroff = 0, + .strsize = 0, + }, + }); + + const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; + const symtab_size = self.base.options.symbol_count_hint * @sizeOf(macho.nlist_64); const symtab_off = self.findFreeSpaceLinkedit(symtab_size, @sizeOf(macho.nlist_64)); - log.debug("found symbol table free space 0x{x} to 0x{x}", .{ symtab_off, symtab_off + symtab_size }); + symtab.symoff = @intCast(u32, symtab_off); + symtab.nsyms = @intCast(u32, self.base.options.symbol_count_hint); try self.string_table.append(self.base.allocator, 0); // Need a null at position 0. const strtab_size = self.string_table.items.len; const strtab_off = self.findFreeSpaceLinkedit(strtab_size, 1); - log.debug("found string table free space 0x{x} to 0x{x}", .{ strtab_off, strtab_off + strtab_size }); + symtab.stroff = @intCast(u32, strtab_off); + symtab.strsize = @intCast(u32, strtab_size); - try self.load_commands.append(self.base.allocator, .{ - .Symtab = .{ - .cmd = macho.LC_SYMTAB, - .cmdsize = @sizeOf(macho.symtab_command), - .symoff = @intCast(u32, symtab_off), - .nsyms = @intCast(u32, self.base.options.symbol_count_hint), - .stroff = @intCast(u32, strtab_off), - .strsize = @intCast(u32, strtab_size), - }, - }); self.header_dirty = true; self.load_commands_dirty = true; self.string_table_dirty = true; @@ -1605,7 +1883,11 @@ pub fn populateMissingMetadata(self: *MachO) !void { if (self.dysymtab_cmd_index == null) { self.dysymtab_cmd_index = @intCast(u16, self.load_commands.items.len); - // TODO Preallocate space for indirect symbol table. + // Preallocate space for indirect symbol table. + const indsymtab_size = self.base.options.symbol_count_hint * @sizeOf(u64); // Each entry is just a u64. + const indsymtab_off = self.findFreeSpaceLinkedit(indsymtab_size, @sizeOf(u64)); + + log.debug("found indirect symbol table free space 0x{x} to 0x{x}", .{ indsymtab_off, indsymtab_off + indsymtab_size }); try self.load_commands.append(self.base.allocator, .{ .Dysymtab = .{ @@ -1623,8 +1905,8 @@ pub fn populateMissingMetadata(self: *MachO) !void { .nmodtab = 0, .extrefsymoff = 0, .nextrefsyms = 0, - .indirectsymoff = 0, - .nindirectsyms = 0, + .indirectsymoff = @intCast(u32, indsymtab_off), + .nindirectsyms = @intCast(u32, self.base.options.symbol_count_hint), .extreloff = 0, .nextrel = 0, .locreloff = 0, @@ -1752,16 +2034,73 @@ pub fn populateMissingMetadata(self: *MachO) !void { self.header_dirty = true; self.load_commands_dirty = true; } - if (self.dyld_stub_binder_index == null) { - self.dyld_stub_binder_index = @intCast(u16, self.undef_symbols.items.len); - const name = try self.makeString("dyld_stub_binder"); - try self.undef_symbols.append(self.base.allocator, .{ - .n_strx = name, - .n_type = macho.N_UNDF | macho.N_EXT, - .n_sect = 0, - .n_desc = macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | macho.N_SYMBOL_RESOLVER, - .n_value = 0, + if (!self.extern_nonlazy_symbols.contains("dyld_stub_binder")) { + const index = @intCast(u32, self.extern_nonlazy_symbols.items().len); + const name = try std.fmt.allocPrint(self.base.allocator, "dyld_stub_binder", .{}); + try self.extern_nonlazy_symbols.putNoClobber(self.base.allocator, name, .{ + .name = name, + .dylib_ordinal = 1, // TODO this is currently hardcoded. + .segment = self.data_const_segment_cmd_index.?, + .offset = index * @sizeOf(u64), }); + self.binding_info_dirty = true; + } + if (self.stub_helper_stubs_start_off == null) { + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stub_helper = &text_segment.sections.items[self.stub_helper_section_index.?]; + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const data = &data_segment.sections.items[self.data_section_index.?]; + const data_const_segment = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const got = &data_const_segment.sections.items[self.data_got_section_index.?]; + switch (self.base.options.target.cpu.arch) { + .x86_64 => { + const code_size = 15; + var code: [code_size]u8 = undefined; + // lea %r11, [rip + disp] + code[0] = 0x4c; + code[1] = 0x8d; + code[2] = 0x1d; + { + const displacement = @intCast(u32, data.addr - stub_helper.addr - 7); + mem.writeIntLittle(u32, code[3..7], displacement); + } + // push %r11 + code[7] = 0x41; + code[8] = 0x53; + // jmp [rip + disp] + code[9] = 0xff; + code[10] = 0x25; + { + const displacement = @intCast(u32, got.addr - stub_helper.addr - code_size); + mem.writeIntLittle(u32, code[11..], displacement); + } + self.stub_helper_stubs_start_off = stub_helper.offset + code_size; + try self.base.file.?.pwriteAll(&code, stub_helper.offset); + }, + .aarch64 => { + var code: [4 * @sizeOf(u32)]u8 = undefined; + { + const displacement = data.addr - stub_helper.addr; + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.adr(.x17, @intCast(i21, displacement)).toU32()); + } + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.stp( + .x16, + .x17, + aarch64.Register.sp, + aarch64.Instruction.LoadStorePairOffset.pre_index(-16), + ).toU32()); + { + const displacement = got.addr - stub_helper.addr - 2 * @sizeOf(u32); + mem.writeIntLittle(u32, code[8..12], aarch64.Instruction.ldr(.x16, .{ + .literal = @intCast(u19, displacement / 4), + }).toU32()); + } + mem.writeIntLittle(u32, code[12..16], aarch64.Instruction.br(.x16).toU32()); + self.stub_helper_stubs_start_off = stub_helper.offset + 4 * @sizeOf(u32); + try self.base.file.?.pwriteAll(&code, stub_helper.offset); + }, + else => unreachable, + } } } @@ -1877,7 +2216,7 @@ pub fn makeStaticString(comptime bytes: []const u8) [16]u8 { return buf; } -fn makeString(self: *MachO, bytes: []const u8) !u32 { +pub fn makeString(self: *MachO, bytes: []const u8) !u32 { try self.string_table.ensureCapacity(self.base.allocator, self.string_table.items.len + bytes.len + 1); const result = @intCast(u32, self.string_table.items.len); self.string_table.appendSliceAssumeCapacity(bytes); @@ -1907,16 +2246,13 @@ const NextSegmentAddressAndOffset = struct { }; fn nextSegmentAddressAndOffset(self: *MachO) NextSegmentAddressAndOffset { - const prev_segment_idx = blk: { - if (self.data_segment_cmd_index) |idx| { - break :blk idx; - } else if (self.text_segment_cmd_index) |idx| { - break :blk idx; - } else { - unreachable; // unhandled LC_SEGMENT_64 load command before __TEXT + var prev_segment_idx: ?usize = null; // We use optional here for safety. + for (self.load_commands.items) |cmd, i| { + if (cmd == .Segment) { + prev_segment_idx = i; } - }; - const prev_segment = self.load_commands.items[prev_segment_idx].Segment; + } + const prev_segment = self.load_commands.items[prev_segment_idx.?].Segment; const address = prev_segment.inner.vmaddr + prev_segment.inner.vmsize; const offset = prev_segment.inner.fileoff + prev_segment.inner.filesize; return .{ @@ -2100,11 +2436,98 @@ fn writeOffsetTableEntry(self: *MachO, index: usize) !void { try self.base.file.?.pwriteAll(&code, off); } +fn writeLazySymbolPointer(self: *MachO, index: u32) !void { + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; + const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; + + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; + const end = stub_helper.addr + stub_off - stub_helper.offset; + var buf: [@sizeOf(u64)]u8 = undefined; + mem.writeIntLittle(u64, &buf, end); + const off = la_symbol_ptr.offset + index * @sizeOf(u64); + log.debug("writing lazy symbol pointer entry 0x{x} at 0x{x}", .{ end, off }); + try self.base.file.?.pwriteAll(&buf, off); +} + +fn writeStub(self: *MachO, index: u32) !void { + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stubs = text_segment.sections.items[self.stubs_section_index.?]; + const data_segment = self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const la_symbol_ptr = data_segment.sections.items[self.la_symbol_ptr_section_index.?]; + + const stub_off = stubs.offset + index * stubs.reserved2; + const stub_addr = stubs.addr + index * stubs.reserved2; + const la_ptr_addr = la_symbol_ptr.addr + index * @sizeOf(u64); + log.debug("writing stub at 0x{x}", .{stub_off}); + var code = try self.base.allocator.alloc(u8, stubs.reserved2); + defer self.base.allocator.free(code); + switch (self.base.options.target.cpu.arch) { + .x86_64 => { + const displacement = @intCast(u32, la_ptr_addr - stub_addr - stubs.reserved2); + // jmp + code[0] = 0xff; + code[1] = 0x25; + mem.writeIntLittle(u32, code[2..][0..4], displacement); + }, + .aarch64 => { + const displacement = la_ptr_addr - stub_addr; + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.x16, .{ + .literal = @intCast(u19, displacement / 4), + }).toU32()); + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.br(.x16).toU32()); + }, + else => unreachable, + } + try self.base.file.?.pwriteAll(code, stub_off); +} + +fn writeStubInStubHelper(self: *MachO, index: u32) !void { + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stub_helper = text_segment.sections.items[self.stub_helper_section_index.?]; + + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + const stub_off = self.stub_helper_stubs_start_off.? + index * stub_size; + var code = try self.base.allocator.alloc(u8, stub_size); + defer self.base.allocator.free(code); + switch (self.base.options.target.cpu.arch) { + .x86_64 => { + const displacement = @intCast(i32, @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - stub_size); + // pushq + code[0] = 0x68; + mem.writeIntLittle(u32, code[1..][0..4], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. + // jmpq + code[5] = 0xe9; + mem.writeIntLittle(u32, code[6..][0..4], @bitCast(u32, displacement)); + }, + .aarch64 => { + const displacement = @intCast(i64, stub_helper.offset) - @intCast(i64, stub_off) - 4; + mem.writeIntLittle(u32, code[0..4], aarch64.Instruction.ldr(.w16, .{ + .literal = 0x2, + }).toU32()); + mem.writeIntLittle(u32, code[4..8], aarch64.Instruction.b(@intCast(i28, displacement)).toU32()); + mem.writeIntLittle(u32, code[8..12], 0x0); // Just a placeholder populated in `populateLazyBindOffsetsInStubHelper`. + }, + else => unreachable, + } + try self.base.file.?.pwriteAll(code, stub_off); +} + fn relocateSymbolTable(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const nlocals = self.local_symbols.items.len; const nglobals = self.global_symbols.items.len; - const nundefs = self.undef_symbols.items.len; + const nundefs = self.extern_lazy_symbols.items().len + self.extern_nonlazy_symbols.items().len; const nsyms = nlocals + nglobals + nundefs; if (symtab.nsyms < nsyms) { @@ -2149,7 +2572,31 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const nlocals = self.local_symbols.items.len; const nglobals = self.global_symbols.items.len; - const nundefs = self.undef_symbols.items.len; + + const nundefs = self.extern_lazy_symbols.items().len + self.extern_nonlazy_symbols.items().len; + var undefs = std.ArrayList(macho.nlist_64).init(self.base.allocator); + defer undefs.deinit(); + try undefs.ensureCapacity(nundefs); + for (self.extern_lazy_symbols.items()) |entry| { + const name = try self.makeString(entry.key); + undefs.appendAssumeCapacity(.{ + .n_strx = name, + .n_type = std.macho.N_UNDF | std.macho.N_EXT, + .n_sect = 0, + .n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER, + .n_value = 0, + }); + } + for (self.extern_nonlazy_symbols.items()) |entry| { + const name = try self.makeString(entry.key); + undefs.appendAssumeCapacity(.{ + .n_strx = name, + .n_type = std.macho.N_UNDF | std.macho.N_EXT, + .n_sect = 0, + .n_desc = std.macho.REFERENCE_FLAG_UNDEFINED_NON_LAZY | std.macho.N_SYMBOL_RESOLVER, + .n_value = 0, + }); + } const locals_off = symtab.symoff; const locals_size = nlocals * @sizeOf(macho.nlist_64); @@ -2161,8 +2608,8 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { const undefs_off = globals_off + globals_size; const undefs_size = nundefs * @sizeOf(macho.nlist_64); - log.debug("writing undef symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); - try self.base.file.?.pwriteAll(mem.sliceAsBytes(self.undef_symbols.items), undefs_off); + log.debug("writing extern symbols from 0x{x} to 0x{x}", .{ undefs_off, undefs_size + undefs_off }); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(undefs.items), undefs_off); // Update dynamic symbol table. const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; @@ -2174,6 +2621,49 @@ fn writeAllGlobalAndUndefSymbols(self: *MachO) !void { self.load_commands_dirty = true; } +fn writeIndirectSymbolTable(self: *MachO) !void { + const text_segment = &self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const stubs = &text_segment.sections.items[self.stubs_section_index.?]; + const data_const_seg = &self.load_commands.items[self.data_const_segment_cmd_index.?].Segment; + const got = &data_const_seg.sections.items[self.data_got_section_index.?]; + const data_segment = &self.load_commands.items[self.data_segment_cmd_index.?].Segment; + const la_symbol_ptr = &data_segment.sections.items[self.la_symbol_ptr_section_index.?]; + const dysymtab = &self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; + dysymtab.nindirectsyms = 0; + // TODO check if we have allocated enough size. + + var buf: [@sizeOf(u32)]u8 = undefined; + var off = dysymtab.indirectsymoff; + + stubs.reserved1 = 0; + for (self.extern_lazy_symbols.items()) |_, i| { + const symtab_idx = @intCast(u32, dysymtab.iundefsym + i); + mem.writeIntLittle(u32, &buf, symtab_idx); + try self.base.file.?.pwriteAll(&buf, off); + off += @sizeOf(u32); + dysymtab.nindirectsyms += 1; + } + + const base_id = @intCast(u32, self.extern_lazy_symbols.items().len); + got.reserved1 = base_id; + for (self.extern_nonlazy_symbols.items()) |_, i| { + const symtab_idx = @intCast(u32, dysymtab.iundefsym + i + base_id); + mem.writeIntLittle(u32, &buf, symtab_idx); + try self.base.file.?.pwriteAll(&buf, off); + off += @sizeOf(u32); + dysymtab.nindirectsyms += 1; + } + + la_symbol_ptr.reserved1 = got.reserved1 + @intCast(u32, self.extern_nonlazy_symbols.items().len); + for (self.extern_lazy_symbols.items()) |_, i| { + const symtab_idx = @intCast(u32, dysymtab.iundefsym + i); + mem.writeIntLittle(u32, &buf, symtab_idx); + try self.base.file.?.pwriteAll(&buf, off); + off += @sizeOf(u32); + dysymtab.nindirectsyms += 1; + } +} + fn writeCodeSignaturePadding(self: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); @@ -2272,18 +2762,63 @@ fn writeExportTrie(self: *MachO) !void { self.export_info_dirty = false; } +fn writeRebaseInfoTable(self: *MachO) !void { + if (!self.rebase_info_dirty) return; + + const tracy = trace(@src()); + defer tracy.end(); + + var symbols = try self.base.allocator.alloc(*const ExternSymbol, self.extern_lazy_symbols.items().len); + defer self.base.allocator.free(symbols); + + for (self.extern_lazy_symbols.items()) |*entry, i| { + symbols[i] = &entry.value; + } + + const size = try rebaseInfoSize(symbols); + var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try writeRebaseInfo(symbols, stream.writer()); + + const linkedit_segment = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + const allocated_size = self.allocatedSizeLinkedit(dyld_info.rebase_off); + const needed_size = mem.alignForwardGeneric(u64, buffer.len, @alignOf(u64)); + + if (needed_size > allocated_size) { + dyld_info.rebase_off = 0; + dyld_info.rebase_off = @intCast(u32, self.findFreeSpaceLinkedit(needed_size, 1)); + } + + dyld_info.rebase_size = @intCast(u32, needed_size); + log.debug("writing rebase info from 0x{x} to 0x{x}", .{ dyld_info.rebase_off, dyld_info.rebase_off + dyld_info.rebase_size }); + + try self.base.file.?.pwriteAll(buffer, dyld_info.rebase_off); + self.load_commands_dirty = true; + self.rebase_info_dirty = false; +} + fn writeBindingInfoTable(self: *MachO) !void { if (!self.binding_info_dirty) return; const tracy = trace(@src()); defer tracy.end(); - const size = try self.binding_info_table.calcSize(); + var symbols = try self.base.allocator.alloc(*const ExternSymbol, self.extern_nonlazy_symbols.items().len); + defer self.base.allocator.free(symbols); + + for (self.extern_nonlazy_symbols.items()) |*entry, i| { + symbols[i] = &entry.value; + } + + const size = try bindInfoSize(symbols); var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); defer self.base.allocator.free(buffer); var stream = std.io.fixedBufferStream(buffer); - try self.binding_info_table.write(stream.writer()); + try writeBindInfo(symbols, stream.writer()); const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; @@ -2306,12 +2841,19 @@ fn writeBindingInfoTable(self: *MachO) !void { fn writeLazyBindingInfoTable(self: *MachO) !void { if (!self.lazy_binding_info_dirty) return; - const size = try self.lazy_binding_info_table.calcSize(); + var symbols = try self.base.allocator.alloc(*const ExternSymbol, self.extern_lazy_symbols.items().len); + defer self.base.allocator.free(symbols); + + for (self.extern_lazy_symbols.items()) |*entry, i| { + symbols[i] = &entry.value; + } + + const size = try lazyBindInfoSize(symbols); var buffer = try self.base.allocator.alloc(u8, @intCast(usize, size)); defer self.base.allocator.free(buffer); var stream = std.io.fixedBufferStream(buffer); - try self.lazy_binding_info_table.write(stream.writer()); + try writeLazyBindInfo(symbols, stream.writer()); const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; @@ -2327,10 +2869,78 @@ fn writeLazyBindingInfoTable(self: *MachO) !void { log.debug("writing lazy binding info from 0x{x} to 0x{x}", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + dyld_info.lazy_bind_size }); try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); + try self.populateLazyBindOffsetsInStubHelper(buffer); self.load_commands_dirty = true; self.lazy_binding_info_dirty = false; } +fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { + if (self.extern_lazy_symbols.items().len == 0) return; + + var stream = std.io.fixedBufferStream(buffer); + var reader = stream.reader(); + var offsets = std.ArrayList(u32).init(self.base.allocator); + try offsets.append(0); + defer offsets.deinit(); + var valid_block = false; + + while (true) { + const inst = reader.readByte() catch |err| switch (err) { + error.EndOfStream => break, + else => return err, + }; + const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK; + const opcode: u8 = inst & macho.BIND_OPCODE_MASK; + + switch (opcode) { + macho.BIND_OPCODE_DO_BIND => { + valid_block = true; + }, + macho.BIND_OPCODE_DONE => { + if (valid_block) { + const offset = try stream.getPos(); + try offsets.append(@intCast(u32, offset)); + } + valid_block = false; + }, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { + var next = try reader.readByte(); + while (next != @as(u8, 0)) { + next = try reader.readByte(); + } + }, + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + _ = try std.leb.readULEB128(u64, reader); + }, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { + _ = try std.leb.readULEB128(u64, reader); + }, + macho.BIND_OPCODE_SET_ADDEND_SLEB => { + _ = try std.leb.readILEB128(i64, reader); + }, + else => {}, + } + } + assert(self.extern_lazy_symbols.items().len <= offsets.items.len); + + const stub_size: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 10, + .aarch64 => 3 * @sizeOf(u32), + else => unreachable, + }; + const off: u4 = switch (self.base.options.target.cpu.arch) { + .x86_64 => 1, + .aarch64 => 2 * @sizeOf(u32), + else => unreachable, + }; + var buf: [@sizeOf(u32)]u8 = undefined; + for (self.extern_lazy_symbols.items()) |_, i| { + const placeholder_off = self.stub_helper_stubs_start_off.? + i * stub_size + off; + mem.writeIntLittle(u32, &buf, offsets.items[i]); + try self.base.file.?.pwriteAll(&buf, placeholder_off); + } +} + fn writeStringTable(self: *MachO) !void { if (!self.string_table_dirty) return; @@ -2438,8 +3048,6 @@ fn writeHeader(self: *MachO) !void { } /// Parse MachO contents from existing binary file. -/// TODO This method is incomplete and currently parses only the header -/// plus the load commands. fn parseFromFile(self: *MachO, file: fs.File) !void { self.base.file = file; var reader = file.reader(); @@ -2464,6 +3072,8 @@ fn parseFromFile(self: *MachO, file: fs.File) !void { } } else if (parseAndCmpName(&x.inner.segname, "__DATA")) { self.data_segment_cmd_index = i; + } else if (parseAndCmpName(&x.inner.segname, "__DATA_CONST")) { + self.data_const_segment_cmd_index = i; } }, macho.LC_DYLD_INFO_ONLY => { @@ -2549,24 +3159,61 @@ fn parseStringTable(self: *MachO) !void { self.string_table.appendSliceAssumeCapacity(buffer); } -fn parseBindingInfoTable(self: *MachO) !void { +fn fixupBindInfo(self: *MachO, dylib_ordinal: u32) !void { const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; var buffer = try self.base.allocator.alloc(u8, dyld_info.bind_size); defer self.base.allocator.free(buffer); const nread = try self.base.file.?.preadAll(buffer, dyld_info.bind_off); assert(nread == buffer.len); - - var stream = std.io.fixedBufferStream(buffer); - try self.binding_info_table.read(stream.reader(), self.base.allocator); + try self.fixupInfoCommon(buffer, dylib_ordinal); + try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); } -fn parseLazyBindingInfoTable(self: *MachO) !void { +fn fixupLazyBindInfo(self: *MachO, dylib_ordinal: u32) !void { const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; var buffer = try self.base.allocator.alloc(u8, dyld_info.lazy_bind_size); defer self.base.allocator.free(buffer); const nread = try self.base.file.?.preadAll(buffer, dyld_info.lazy_bind_off); assert(nread == buffer.len); + try self.fixupInfoCommon(buffer, dylib_ordinal); + try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); +} +fn fixupInfoCommon(self: *MachO, buffer: []u8, dylib_ordinal: u32) !void { var stream = std.io.fixedBufferStream(buffer); - try self.lazy_binding_info_table.read(stream.reader(), self.base.allocator); + var reader = stream.reader(); + + while (true) { + const inst = reader.readByte() catch |err| switch (err) { + error.EndOfStream => break, + else => return err, + }; + const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK; + const opcode: u8 = inst & macho.BIND_OPCODE_MASK; + + switch (opcode) { + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { + var next = try reader.readByte(); + while (next != @as(u8, 0)) { + next = try reader.readByte(); + } + }, + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + _ = try std.leb.readULEB128(u64, reader); + }, + macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { + // Perform the fixup. + try stream.seekBy(-1); + var writer = stream.writer(); + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, dylib_ordinal)); + }, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { + _ = try std.leb.readULEB128(u64, reader); + }, + macho.BIND_OPCODE_SET_ADDEND_SLEB => { + _ = try std.leb.readILEB128(i64, reader); + }, + else => {}, + } + } } diff --git a/src/link/MachO/DebugSymbols.zig b/src/link/MachO/DebugSymbols.zig index 11f87d5495..81a016ce42 100644 --- a/src/link/MachO/DebugSymbols.zig +++ b/src/link/MachO/DebugSymbols.zig @@ -39,6 +39,8 @@ load_commands: std.ArrayListUnmanaged(LoadCommand) = .{}, pagezero_segment_cmd_index: ?u16 = null, /// __TEXT segment text_segment_cmd_index: ?u16 = null, +/// __DATA_CONST segment +data_const_segment_cmd_index: ?u16 = null, /// __DATA segment data_segment_cmd_index: ?u16 = null, /// __LINKEDIT segment @@ -171,6 +173,15 @@ pub fn populateMissingMetadata(self: *DebugSymbols, allocator: *Allocator) !void self.header_dirty = true; self.load_commands_dirty = true; } + if (self.data_const_segment_cmd_index == null) outer: { + if (self.base.data_const_segment_cmd_index == null) break :outer; // __DATA_CONST is optional + self.data_const_segment_cmd_index = @intCast(u16, self.load_commands.items.len); + const base_cmd = self.base.load_commands.items[self.base.data_const_segment_cmd_index.?].Segment; + const cmd = try self.copySegmentCommand(allocator, base_cmd); + try self.load_commands.append(allocator, .{ .Segment = cmd }); + self.header_dirty = true; + self.load_commands_dirty = true; + } if (self.data_segment_cmd_index == null) outer: { if (self.base.data_segment_cmd_index == null) break :outer; // __DATA is optional self.data_segment_cmd_index = @intCast(u16, self.load_commands.items.len); diff --git a/src/link/MachO/imports.zig b/src/link/MachO/imports.zig index c7f8d5c6a5..c5f6211f1a 100644 --- a/src/link/MachO/imports.zig +++ b/src/link/MachO/imports.zig @@ -6,323 +6,156 @@ const mem = std.mem; const assert = std.debug.assert; const Allocator = mem.Allocator; -/// Table of binding info entries used to tell the dyld which -/// symbols to bind at loading time. -pub const BindingInfoTable = struct { +pub const ExternSymbol = struct { + /// Symbol name. + /// We own the memory, therefore we'll need to free it by calling `deinit`. + /// In self-hosted, we don't expect it to be null ever. + /// However, this is for backwards compatibility with LLD when + /// we'll be patching things up post mortem. + name: ?[]u8 = null, + /// Id of the dynamic library where the specified entries can be found. + /// Id of 0 means self. + /// TODO this should really be an id into the table of all defined + /// dylibs. dylib_ordinal: i64 = 0, - /// Binding type; defaults to pointer type. - binding_type: u8 = macho.BIND_TYPE_POINTER, - - symbols: std.ArrayListUnmanaged(Symbol) = .{}, - - pub const Symbol = struct { - /// Symbol name. - name: ?[]u8 = null, - - /// Id of the segment where to bind this symbol to. - segment: u8, + /// Id of the segment where this symbol is defined (will have its address + /// resolved). + segment: u16 = 0, - /// Offset of this symbol wrt to the segment id encoded in `segment`. - offset: i64, + /// Offset relative to the start address of the `segment`. + offset: u32 = 0, - /// Addend value (if any). - addend: ?i64 = null, - }; - - pub fn deinit(self: *BindingInfoTable, allocator: *Allocator) void { - for (self.symbols.items) |*symbol| { - if (symbol.name) |name| { - allocator.free(name); - } + pub fn deinit(self: *ExternSymbol, allocator: *Allocator) void { + if (self.name) |name| { + allocator.free(name); } - self.symbols.deinit(allocator); } +}; - /// Parse the binding info table from byte stream. - pub fn read(self: *BindingInfoTable, reader: anytype, allocator: *Allocator) !void { - var symbol: Symbol = .{ - .segment = 0, - .offset = 0, - }; - - var dylib_ordinal_set = false; - var done = false; - while (true) { - const inst = reader.readByte() catch |err| switch (err) { - error.EndOfStream => break, - else => return err, - }; - const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK; - const opcode: u8 = inst & macho.BIND_OPCODE_MASK; +pub fn rebaseInfoSize(symbols: []*const ExternSymbol) !u64 { + var stream = std.io.countingWriter(std.io.null_writer); + var writer = stream.writer(); + var size: u64 = 0; - switch (opcode) { - macho.BIND_OPCODE_DO_BIND => { - try self.symbols.append(allocator, symbol); - symbol = .{ - .segment = 0, - .offset = 0, - }; - }, - macho.BIND_OPCODE_DONE => { - done = true; - break; - }, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - var name = std.ArrayList(u8).init(allocator); - var next = try reader.readByte(); - while (next != @as(u8, 0)) { - try name.append(next); - next = try reader.readByte(); - } - symbol.name = name.toOwnedSlice(); - }, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - symbol.segment = imm; - symbol.offset = try leb.readILEB128(i64, reader); - }, - macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { - assert(!dylib_ordinal_set); - self.dylib_ordinal = imm; - }, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { - assert(!dylib_ordinal_set); - self.dylib_ordinal = try leb.readILEB128(i64, reader); - }, - macho.BIND_OPCODE_SET_TYPE_IMM => { - self.binding_type = imm; - }, - macho.BIND_OPCODE_SET_ADDEND_SLEB => { - symbol.addend = try leb.readILEB128(i64, reader); - }, - else => { - std.log.warn("unhandled BIND_OPCODE_: 0x{x}", .{opcode}); - }, - } - } - assert(done); + for (symbols) |symbol| { + size += 2; + try leb.writeILEB128(writer, symbol.offset); + size += 1; } - /// Write the binding info table to byte stream. - pub fn write(self: BindingInfoTable, writer: anytype) !void { - if (self.dylib_ordinal > 15) { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); - try leb.writeULEB128(writer, @bitCast(u64, self.dylib_ordinal)); - } else if (self.dylib_ordinal > 0) { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal))); - } else { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal))); - } - try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, self.binding_type)); - - for (self.symbols.items) |symbol| { - if (symbol.name) |name| { - try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags. - try writer.writeAll(name); - try writer.writeByte(0); - } + size += 1 + stream.bytes_written; + return size; +} - try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment)); - try leb.writeILEB128(writer, symbol.offset); - - if (symbol.addend) |addend| { - try writer.writeByte(macho.BIND_OPCODE_SET_ADDEND_SLEB); - try leb.writeILEB128(writer, addend); - } - - try writer.writeByte(macho.BIND_OPCODE_DO_BIND); - } - - try writer.writeByte(macho.BIND_OPCODE_DONE); +pub fn writeRebaseInfo(symbols: []*const ExternSymbol, writer: anytype) !void { + for (symbols) |symbol| { + try writer.writeByte(macho.REBASE_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.REBASE_TYPE_POINTER)); + try writer.writeByte(macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment)); + try leb.writeILEB128(writer, symbol.offset); + try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | @truncate(u4, 1)); } + try writer.writeByte(macho.REBASE_OPCODE_DONE); +} - /// Calculate size in bytes of this binding info table. - pub fn calcSize(self: *BindingInfoTable) !u64 { - var stream = std.io.countingWriter(std.io.null_writer); - var writer = stream.writer(); - var size: u64 = 1; +pub fn bindInfoSize(symbols: []*const ExternSymbol) !u64 { + var stream = std.io.countingWriter(std.io.null_writer); + var writer = stream.writer(); + var size: u64 = 0; - if (self.dylib_ordinal > 15) { - try leb.writeULEB128(writer, @bitCast(u64, self.dylib_ordinal)); + for (symbols) |symbol| { + size += 1; + if (symbol.dylib_ordinal > 15) { + try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal)); } - size += 1; - for (self.symbols.items) |symbol| { - if (symbol.name) |name| { - size += 1; - size += name.len; - size += 1; - } - + if (symbol.name) |name| { size += 1; - try leb.writeILEB128(writer, symbol.offset); - - if (symbol.addend) |addend| { - size += 1; - try leb.writeILEB128(writer, addend); - } - + size += name.len; size += 1; } - size += 1 + stream.bytes_written; - return size; + size += 1; + try leb.writeILEB128(writer, symbol.offset); + size += 2; } -}; - -/// Table of lazy binding info entries used to tell the dyld which -/// symbols to lazily bind at first load of a dylib. -pub const LazyBindingInfoTable = struct { - symbols: std.ArrayListUnmanaged(Symbol) = .{}, - pub const Symbol = struct { - /// Symbol name. - name: ?[]u8 = null, + size += stream.bytes_written; + return size; +} - /// Offset of this symbol wrt to the segment id encoded in `segment`. - offset: i64, - - /// Id of the dylib where this symbol is expected to reside. - /// Positive ordinals point at dylibs imported with LC_LOAD_DYLIB, - /// 0 means this binary, -1 the main executable, and -2 flat lookup. - dylib_ordinal: i64, - - /// Id of the segment where to bind this symbol to. - segment: u8, - - /// Addend value (if any). - addend: ?i64 = null, - }; - - pub fn deinit(self: *LazyBindingInfoTable, allocator: *Allocator) void { - for (self.symbols.items) |*symbol| { - if (symbol.name) |name| { - allocator.free(name); - } +pub fn writeBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void { + for (symbols) |symbol| { + if (symbol.dylib_ordinal > 15) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal)); + } else if (symbol.dylib_ordinal > 0) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal))); + } else { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal))); } - self.symbols.deinit(allocator); - } + try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.BIND_TYPE_POINTER)); - /// Parse the binding info table from byte stream. - pub fn read(self: *LazyBindingInfoTable, reader: anytype, allocator: *Allocator) !void { - var symbol: Symbol = .{ - .offset = 0, - .segment = 0, - .dylib_ordinal = 0, - }; - - var done = false; - while (true) { - const inst = reader.readByte() catch |err| switch (err) { - error.EndOfStream => break, - else => return err, - }; - const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK; - const opcode: u8 = inst & macho.BIND_OPCODE_MASK; - - switch (opcode) { - macho.BIND_OPCODE_DO_BIND => { - try self.symbols.append(allocator, symbol); - }, - macho.BIND_OPCODE_DONE => { - done = true; - symbol = .{ - .offset = 0, - .segment = 0, - .dylib_ordinal = 0, - }; - }, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - var name = std.ArrayList(u8).init(allocator); - var next = try reader.readByte(); - while (next != @as(u8, 0)) { - try name.append(next); - next = try reader.readByte(); - } - symbol.name = name.toOwnedSlice(); - }, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - symbol.segment = imm; - symbol.offset = try leb.readILEB128(i64, reader); - }, - macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { - symbol.dylib_ordinal = imm; - }, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { - symbol.dylib_ordinal = try leb.readILEB128(i64, reader); - }, - macho.BIND_OPCODE_SET_ADDEND_SLEB => { - symbol.addend = try leb.readILEB128(i64, reader); - }, - else => { - std.log.warn("unhandled BIND_OPCODE_: 0x{x}", .{opcode}); - }, - } + if (symbol.name) |name| { + try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags. + try writer.writeAll(name); + try writer.writeByte(0); } - assert(done); - } - /// Write the binding info table to byte stream. - pub fn write(self: LazyBindingInfoTable, writer: anytype) !void { - for (self.symbols.items) |symbol| { - try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment)); - try leb.writeILEB128(writer, symbol.offset); - - if (symbol.addend) |addend| { - try writer.writeByte(macho.BIND_OPCODE_SET_ADDEND_SLEB); - try leb.writeILEB128(writer, addend); - } - - if (symbol.dylib_ordinal > 15) { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); - try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal)); - } else if (symbol.dylib_ordinal > 0) { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal))); - } else { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal))); - } + try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment)); + try leb.writeILEB128(writer, symbol.offset); + try writer.writeByte(macho.BIND_OPCODE_DO_BIND); + try writer.writeByte(macho.BIND_OPCODE_DONE); + } +} - if (symbol.name) |name| { - try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags. - try writer.writeAll(name); - try writer.writeByte(0); - } +pub fn lazyBindInfoSize(symbols: []*const ExternSymbol) !u64 { + var stream = std.io.countingWriter(std.io.null_writer); + var writer = stream.writer(); + var size: u64 = 0; - try writer.writeByte(macho.BIND_OPCODE_DO_BIND); - try writer.writeByte(macho.BIND_OPCODE_DONE); + for (symbols) |symbol| { + size += 1; + try leb.writeILEB128(writer, symbol.offset); + size += 1; + if (symbol.dylib_ordinal > 15) { + try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal)); } + if (symbol.name) |name| { + size += 1; + size += name.len; + size += 1; + } + size += 2; } - /// Calculate size in bytes of this binding info table. - pub fn calcSize(self: *LazyBindingInfoTable) !u64 { - var stream = std.io.countingWriter(std.io.null_writer); - var writer = stream.writer(); - var size: u64 = 0; + size += stream.bytes_written; + return size; +} - for (self.symbols.items) |symbol| { - size += 1; - try leb.writeILEB128(writer, symbol.offset); +pub fn writeLazyBindInfo(symbols: []*const ExternSymbol, writer: anytype) !void { + for (symbols) |symbol| { + try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment)); + try leb.writeILEB128(writer, symbol.offset); - if (symbol.addend) |addend| { - size += 1; - try leb.writeILEB128(writer, addend); - } + if (symbol.dylib_ordinal > 15) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal)); + } else if (symbol.dylib_ordinal > 0) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal))); + } else { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal))); + } - size += 1; - if (symbol.dylib_ordinal > 15) { - try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal)); - } - if (symbol.name) |name| { - size += 1; - size += name.len; - size += 1; - } - size += 2; + if (symbol.name) |name| { + try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags. + try writer.writeAll(name); + try writer.writeByte(0); } - size += stream.bytes_written; - return size; + try writer.writeByte(macho.BIND_OPCODE_DO_BIND); + try writer.writeByte(macho.BIND_OPCODE_DONE); } -}; +} |
