diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2020-12-19 12:13:03 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-12-19 12:13:03 +0100 |
| commit | b090451646904006ac41b2b99e532489d89ea837 (patch) | |
| tree | b0a5ec423dc42f5bf6dcf533b90f8c67a69e9b99 /src | |
| parent | 506af7e52e0985b410ea089bf5fa3247ab2377cb (diff) | |
| parent | 3f81ddb735bfc8e6fb1776df7407ace213816252 (diff) | |
| download | zig-b090451646904006ac41b2b99e532489d89ea837.tar.gz zig-b090451646904006ac41b2b99e532489d89ea837.zip | |
Merge pull request #7318 from kubkon/cc-macho
stage1: cross compile to x86_64 and arm64 macOS from anywhere with LLVM
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/MachO.zig | 309 | ||||
| -rw-r--r-- | src/link/MachO/Trie.zig | 35 | ||||
| -rw-r--r-- | src/link/MachO/imports.zig | 328 | ||||
| -rw-r--r-- | src/target.zig | 1 |
4 files changed, 604 insertions, 69 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 6fd0561cb1..13704d8839 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -25,6 +25,7 @@ const Trie = @import("MachO/Trie.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); usingnamespace @import("MachO/commands.zig"); +usingnamespace @import("MachO/imports.zig"); pub const base_tag: File.Tag = File.Tag.macho; @@ -104,6 +105,11 @@ string_table: std.ArrayListUnmanaged(u8) = .{}, /// table needs to be rewritten. offset_table: std.ArrayListUnmanaged(u64) = .{}, +/// Table of binding info entries. +binding_info_table: BindingInfoTable = .{}, +/// Table of lazy binding info entries. +lazy_binding_info_table: LazyBindingInfoTable = .{}, + error_flags: File.ErrorFlags = File.ErrorFlags{}, cmd_table_dirty: bool = false, @@ -752,44 +758,137 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { // At this stage, LLD has done its job. It is time to patch the resultant // binaries up! - // This is currently needed only for aarch64 targets. - if (target.cpu.arch == .aarch64) { - const out_file = try directory.handle.openFile(self.base.options.emit.?.sub_path, .{ .write = true }); - try self.parseFromFile(out_file); - if (self.code_signature_cmd_index == null) { - const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; - const text_section = text_segment.sections.items[self.text_section_index.?]; - const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64); - const needed_size = @sizeOf(macho.linkedit_data_command) * alloc_num / alloc_den; - - if (needed_size + after_last_cmd_offset > text_section.offset) { - std.log.err("Unable to extend padding between the end of load commands and start of __text section.", .{}); - std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size}); - std.log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{}); - return error.NotEnoughPadding; + const out_file = try directory.handle.openFile(self.base.options.emit.?.sub_path, .{ .write = true }); + try self.parseFromFile(out_file); + + if (self.libsystem_cmd_index == null and self.header.?.filetype == macho.MH_EXECUTE) { + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const text_section = text_segment.sections.items[self.text_section_index.?]; + const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64); + const needed_size = @sizeOf(macho.linkedit_data_command) * alloc_num / alloc_den; + + if (needed_size + after_last_cmd_offset > text_section.offset) { + std.log.err("Unable to extend padding between the end of load commands and start of __text section.", .{}); + std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size}); + std.log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{}); + return error.NotEnoughPadding; + } + + // Calculate next available dylib ordinal. + const next_ordinal = blk: { + var ordinal: u32 = 1; + for (self.load_commands.items) |cmd| { + switch (cmd) { + .Dylib => ordinal += 1, + else => {}, + } } + break :blk ordinal; + }; + + // Add load dylib load command + self.libsystem_cmd_index = @intCast(u16, self.load_commands.items.len); + const cmdsize = mem.alignForwardGeneric(u64, @sizeOf(macho.dylib_command) + mem.lenZ(LIB_SYSTEM_PATH), @sizeOf(u64)); + // TODO Find a way to work out runtime version from the OS version triple stored in std.Target. + // In the meantime, we're gonna hardcode to the minimum compatibility version of 0.0.0. + const min_version = 0x0; + var dylib_cmd = emptyGenericCommandWithData(macho.dylib_command{ + .cmd = macho.LC_LOAD_DYLIB, + .cmdsize = @intCast(u32, cmdsize), + .dylib = .{ + .name = @sizeOf(macho.dylib_command), + .timestamp = 2, // not sure why not simply 0; this is reverse engineered from Mach-O files + .current_version = min_version, + .compatibility_version = min_version, + }, + }); + dylib_cmd.data = try self.base.allocator.alloc(u8, cmdsize - dylib_cmd.inner.dylib.name); + mem.set(u8, dylib_cmd.data, 0); + mem.copy(u8, dylib_cmd.data, mem.spanZ(LIB_SYSTEM_PATH)); + try self.load_commands.append(self.base.allocator, .{ .Dylib = dylib_cmd }); + + if (self.symtab_cmd_index == null or self.dysymtab_cmd_index == null) { + std.log.err("Incomplete Mach-O binary: no LC_SYMTAB or LC_DYSYMTAB load command found!", .{}); + std.log.err("Without the symbol table, it is not possible to patch up the binary for cross-compilation.", .{}); + return error.NoSymbolTableFound; + } - const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; - // TODO This is clunky. - self.linkedit_segment_next_offset = @intCast(u32, mem.alignForwardGeneric(u64, linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize, @sizeOf(u64))); - // Add code signature load command - self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); - try self.load_commands.append(self.base.allocator, .{ - .LinkeditData = .{ - .cmd = macho.LC_CODE_SIGNATURE, - .cmdsize = @sizeOf(macho.linkedit_data_command), - .dataoff = 0, - .datasize = 0, - }, - }); - // Pad out space for code signature - try self.writeCodeSignaturePadding(); - // Write updated load commands and the header - try self.writeLoadCommands(); - try self.writeHeader(); - // Generate adhoc code signature - try self.writeCodeSignature(); + // Parse dyld info + try self.parseBindingInfoTable(); + try self.parseLazyBindingInfoTable(); + + // Update the dylib ordinals. + self.binding_info_table.dylib_ordinal = next_ordinal; + for (self.lazy_binding_info_table.symbols.items) |*symbol| { + symbol.dylib_ordinal = next_ordinal; + } + + // Write update dyld info + const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + { + const size = try self.binding_info_table.calcSize(); + assert(dyld_info.bind_size >= size); + + var buffer = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try self.binding_info_table.write(stream.writer()); + + try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); } + { + const size = try self.lazy_binding_info_table.calcSize(); + assert(dyld_info.lazy_bind_size >= size); + + var buffer = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try self.lazy_binding_info_table.write(stream.writer()); + + try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); + } + + // Write updated load commands and the header + try self.writeLoadCommands(); + try self.writeHeader(); + } + if (self.code_signature_cmd_index == null) outer: { + if (target.cpu.arch != .aarch64) break :outer; // This is currently needed only for aarch64 targets. + const text_segment = self.load_commands.items[self.text_segment_cmd_index.?].Segment; + const text_section = text_segment.sections.items[self.text_section_index.?]; + const after_last_cmd_offset = self.header.?.sizeofcmds + @sizeOf(macho.mach_header_64); + const needed_size = @sizeOf(macho.linkedit_data_command) * alloc_num / alloc_den; + + if (needed_size + after_last_cmd_offset > text_section.offset) { + std.log.err("Unable to extend padding between the end of load commands and start of __text section.", .{}); + std.log.err("Re-run the linker with '-headerpad 0x{x}' option if available, or", .{needed_size}); + std.log.err("fall back to the system linker by exporting 'ZIG_SYSTEM_LINKER_HACK=1'.", .{}); + return error.NotEnoughPadding; + } + + const linkedit_segment = self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + // TODO This is clunky. + self.linkedit_segment_next_offset = @intCast(u32, mem.alignForwardGeneric(u64, linkedit_segment.inner.fileoff + linkedit_segment.inner.filesize, @sizeOf(u64))); + // Add code signature load command + self.code_signature_cmd_index = @intCast(u16, self.load_commands.items.len); + try self.load_commands.append(self.base.allocator, .{ + .LinkeditData = .{ + .cmd = macho.LC_CODE_SIGNATURE, + .cmdsize = @sizeOf(macho.linkedit_data_command), + .dataoff = 0, + .datasize = 0, + }, + }); + + // Pad out space for code signature + try self.writeCodeSignaturePadding(); + // Write updated load commands and the header + try self.writeLoadCommands(); + try self.writeHeader(); + // Generate adhoc code signature + try self.writeCodeSignature(); } } } @@ -823,6 +922,8 @@ fn darwinArchString(arch: std.Target.Cpu.Arch) []const u8 { } pub fn deinit(self: *MachO) void { + self.binding_info_table.deinit(self.base.allocator); + self.lazy_binding_info_table.deinit(self.base.allocator); self.pie_fixups.deinit(self.base.allocator); self.text_block_free_list.deinit(self.base.allocator); self.offset_table.deinit(self.base.allocator); @@ -1850,6 +1951,68 @@ fn writeExportTrie(self: *MachO) !void { self.cmd_table_dirty = true; } +fn writeBindingInfoTable(self: *MachO) !void { + const size = self.binding_info_table.calcSize(); + var buffer = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try self.binding_info_table.write(stream.writer()); + + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64))); + dyld_info.bind_off = self.linkedit_segment_next_offset.?; + dyld_info.bind_size = bind_size; + + log.debug("writing binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.bind_off, dyld_info.bind_off + bind_size }); + + if (bind_size > buffer.len) { + // Pad out to align(8). + try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.bind_off + bind_size); + } + try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); + + self.linkedit_segment_next_offset = dyld_info.bind_off + dyld_info.bind_size; + // Advance size of __LINKEDIT segment + const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + linkedit.inner.filesize += dyld_info.bind_size; + if (linkedit.inner.vmsize < linkedit.inner.filesize) { + linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size); + } + self.cmd_table_dirty = true; +} + +fn writeLazyBindingInfoTable(self: *MachO) !void { + const size = self.lazy_binding_info_table.calcSize(); + var buffer = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try self.lazy_binding_info_table.write(stream.writer()); + + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64))); + dyld_info.lazy_bind_off = self.linkedit_segment_next_offset.?; + dyld_info.lazy_bind_size = bind_size; + + log.debug("writing lazy binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + bind_size }); + + if (bind_size > buffer.len) { + // Pad out to align(8). + try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.lazy_bind_off + bind_size); + } + try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); + + self.linkedit_segment_next_offset = dyld_info.lazy_bind_off + dyld_info.lazy_bind_size; + // Advance size of __LINKEDIT segment + const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + linkedit.inner.filesize += dyld_info.lazy_bind_size; + if (linkedit.inner.vmsize < linkedit.inner.filesize) { + linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size); + } + self.cmd_table_dirty = true; +} + fn writeStringTable(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const needed_size = self.string_table.items.len; @@ -1925,18 +2088,18 @@ fn parseFromFile(self: *MachO, file: fs.File) !void { switch (cmd.cmd()) { macho.LC_SEGMENT_64 => { const x = cmd.Segment; - if (isSegmentOrSection(&x.inner.segname, "__PAGEZERO")) { + if (parseAndCmpName(x.inner.segname[0..], "__PAGEZERO")) { self.pagezero_segment_cmd_index = i; - } else if (isSegmentOrSection(&x.inner.segname, "__LINKEDIT")) { + } else if (parseAndCmpName(x.inner.segname[0..], "__LINKEDIT")) { self.linkedit_segment_cmd_index = i; - } else if (isSegmentOrSection(&x.inner.segname, "__TEXT")) { + } else if (parseAndCmpName(x.inner.segname[0..], "__TEXT")) { self.text_segment_cmd_index = i; for (x.sections.items) |sect, j| { - if (isSegmentOrSection(§.sectname, "__text")) { + if (parseAndCmpName(sect.sectname[0..], "__text")) { self.text_section_index = @intCast(u16, j); } } - } else if (isSegmentOrSection(&x.inner.segname, "__DATA")) { + } else if (parseAndCmpName(x.inner.segname[0..], "__DATA")) { self.data_segment_cmd_index = i; } }, @@ -1962,7 +2125,10 @@ fn parseFromFile(self: *MachO, file: fs.File) !void { self.main_cmd_index = i; }, macho.LC_LOAD_DYLIB => { - self.libsystem_cmd_index = i; // TODO This is incorrect, but we'll fixup later. + const x = cmd.Dylib; + if (parseAndCmpName(x.data, mem.spanZ(LIB_SYSTEM_PATH))) { + self.libsystem_cmd_index = i; + } }, macho.LC_FUNCTION_STARTS => { self.function_starts_cmd_index = i; @@ -1973,19 +2139,68 @@ fn parseFromFile(self: *MachO, file: fs.File) !void { macho.LC_CODE_SIGNATURE => { self.code_signature_cmd_index = i; }, - // TODO populate more MachO fields else => { - std.log.err("Unknown load command detected: 0x{x}.", .{cmd.cmd()}); - return error.UnknownLoadCommand; + std.log.warn("Unknown load command detected: 0x{x}.", .{cmd.cmd()}); }, } self.load_commands.appendAssumeCapacity(cmd); } self.header = header; +} - // TODO parse memory mapped segments +fn parseAndCmpName(name: []const u8, needle: []const u8) bool { + const len = mem.indexOfScalar(u8, name[0..], @as(u8, 0)) orelse name.len; + return mem.eql(u8, name[0..len], needle); } -fn isSegmentOrSection(name: *const [16]u8, needle: []const u8) bool { - return mem.eql(u8, mem.trimRight(u8, name.*[0..], &[_]u8{0}), needle); +fn parseSymbolTable(self: *MachO) !void { + const symtab = self.load_commands.items[self.symtab_cmd_index.?].Symtab; + const dysymtab = self.load_commands.items[self.dysymtab_cmd_index.?].Dysymtab; + + var buffer = try self.base.allocator.alloc(macho.nlist_64, symtab.nsyms); + defer self.base.allocator.free(buffer); + const nread = try self.base.file.?.preadAll(@ptrCast([*]u8, buffer)[0 .. symtab.nsyms * @sizeOf(macho.nlist_64)], symtab.symoff); + assert(@divExact(nread, @sizeOf(macho.nlist_64)) == buffer.len); + + try self.local_symbols.ensureCapacity(self.base.allocator, dysymtab.nlocalsym); + try self.global_symbols.ensureCapacity(self.base.allocator, dysymtab.nextdefsym); + try self.undef_symbols.ensureCapacity(self.base.allocator, dysymtab.nundefsym); + + self.local_symbols.appendSliceAssumeCapacity(buffer[dysymtab.ilocalsym .. dysymtab.ilocalsym + dysymtab.nlocalsym]); + self.global_symbols.appendSliceAssumeCapacity(buffer[dysymtab.iextdefsym .. dysymtab.iextdefsym + dysymtab.nextdefsym]); + self.undef_symbols.appendSliceAssumeCapacity(buffer[dysymtab.iundefsym .. dysymtab.iundefsym + dysymtab.nundefsym]); +} + +fn parseStringTable(self: *MachO) !void { + const symtab = self.load_commands.items[self.symtab_cmd_index.?].Symtab; + + var buffer = try self.base.allocator.alloc(u8, symtab.strsize); + defer self.base.allocator.free(buffer); + const nread = try self.base.file.?.preadAll(buffer, symtab.stroff); + assert(nread == buffer.len); + + try self.string_table.ensureCapacity(self.base.allocator, symtab.strsize); + self.string_table.appendSliceAssumeCapacity(buffer); +} + +fn parseBindingInfoTable(self: *MachO) !void { + const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + var buffer = try self.base.allocator.alloc(u8, dyld_info.bind_size); + defer self.base.allocator.free(buffer); + const nread = try self.base.file.?.preadAll(buffer, dyld_info.bind_off); + assert(nread == buffer.len); + + var stream = std.io.fixedBufferStream(buffer); + try self.binding_info_table.read(stream.reader(), self.base.allocator); +} + +fn parseLazyBindingInfoTable(self: *MachO) !void { + const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + var buffer = try self.base.allocator.alloc(u8, dyld_info.lazy_bind_size); + defer self.base.allocator.free(buffer); + const nread = try self.base.file.?.preadAll(buffer, dyld_info.lazy_bind_off); + assert(nread == buffer.len); + + var stream = std.io.fixedBufferStream(buffer); + try self.lazy_binding_info_table.read(stream.reader(), self.base.allocator); } diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index 0016ff329c..e445ea4fef 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -241,12 +241,15 @@ pub const Node = struct { }; /// Updates offset of this node in the output byte stream. - fn finalize(self: *Node, offset_in_trie: usize) FinalizeResult { + fn finalize(self: *Node, offset_in_trie: usize) !FinalizeResult { + var stream = std.io.countingWriter(std.io.null_writer); + var writer = stream.writer(); + var node_size: usize = 0; if (self.terminal_info) |info| { - node_size += sizeULEB128Mem(info.export_flags); - node_size += sizeULEB128Mem(info.vmaddr_offset); - node_size += sizeULEB128Mem(node_size); + try leb.writeULEB128(writer, info.export_flags); + try leb.writeULEB128(writer, info.vmaddr_offset); + try leb.writeULEB128(writer, stream.bytes_written); } else { node_size += 1; // 0x0 for non-terminal nodes } @@ -254,27 +257,17 @@ pub const Node = struct { for (self.edges.items) |edge| { const next_node_offset = edge.to.trie_offset orelse 0; - node_size += edge.label.len + 1 + sizeULEB128Mem(next_node_offset); + node_size += edge.label.len + 1; + try leb.writeULEB128(writer, next_node_offset); } const trie_offset = self.trie_offset orelse 0; const updated = offset_in_trie != trie_offset; self.trie_offset = offset_in_trie; self.node_dirty = false; + node_size += stream.bytes_written; - return .{ .node_size = node_size, .updated = updated }; - } - - /// Calculates number of bytes in ULEB128 encoding of value. - fn sizeULEB128Mem(value: u64) usize { - var res: usize = 0; - var v = value; - while (true) { - v = v >> 7; - res += 1; - if (v == 0) break; - } - return res; + return FinalizeResult{ .node_size = node_size, .updated = updated }; } }; @@ -358,7 +351,7 @@ pub fn finalize(self: *Trie) !void { self.size = 0; more = false; for (self.ordered_nodes.items) |node| { - const res = node.finalize(self.size); + const res = try node.finalize(self.size); self.size += res.node_size; if (res.updated) more = true; } @@ -380,9 +373,7 @@ pub fn read(self: *Trie, reader: anytype) ReadError!usize { } /// Write the trie to a byte stream. -/// Caller owns the memory and needs to free it. -/// Panics if the trie was not finalized using `finalize` -/// before calling this method. +/// Panics if the trie was not finalized using `finalize` before calling this method. pub fn write(self: Trie, writer: anytype) !usize { assert(!self.trie_dirty); var counting_writer = std.io.countingWriter(writer); diff --git a/src/link/MachO/imports.zig b/src/link/MachO/imports.zig new file mode 100644 index 0000000000..988c72c151 --- /dev/null +++ b/src/link/MachO/imports.zig @@ -0,0 +1,328 @@ +const std = @import("std"); +const leb = std.leb; +const macho = std.macho; +const mem = std.mem; + +const assert = std.debug.assert; +const Allocator = mem.Allocator; + +/// Table of binding info entries used to tell the dyld which +/// symbols to bind at loading time. +pub const BindingInfoTable = struct { + /// Id of the dynamic library where the specified entries can be found. + dylib_ordinal: i64 = 0, + + /// Binding type; defaults to pointer type. + binding_type: u8 = macho.BIND_TYPE_POINTER, + + symbols: std.ArrayListUnmanaged(Symbol) = .{}, + + pub const Symbol = struct { + /// Symbol name. + name: ?[]u8 = null, + + /// Id of the segment where to bind this symbol to. + segment: u8, + + /// Offset of this symbol wrt to the segment id encoded in `segment`. + offset: i64, + + /// Addend value (if any). + addend: ?i64 = null, + }; + + pub fn deinit(self: *BindingInfoTable, allocator: *Allocator) void { + for (self.symbols.items) |*symbol| { + if (symbol.name) |name| { + allocator.free(name); + } + } + self.symbols.deinit(allocator); + } + + /// Parse the binding info table from byte stream. + pub fn read(self: *BindingInfoTable, reader: anytype, allocator: *Allocator) !void { + var symbol: Symbol = .{ + .segment = 0, + .offset = 0, + }; + + var dylib_ordinal_set = false; + var done = false; + while (true) { + const inst = reader.readByte() catch |err| switch (err) { + error.EndOfStream => break, + else => return err, + }; + const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK; + const opcode: u8 = inst & macho.BIND_OPCODE_MASK; + + switch (opcode) { + macho.BIND_OPCODE_DO_BIND => { + try self.symbols.append(allocator, symbol); + symbol = .{ + .segment = 0, + .offset = 0, + }; + }, + macho.BIND_OPCODE_DONE => { + done = true; + break; + }, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { + var name = std.ArrayList(u8).init(allocator); + var next = try reader.readByte(); + while (next != @as(u8, 0)) { + try name.append(next); + next = try reader.readByte(); + } + symbol.name = name.toOwnedSlice(); + }, + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + symbol.segment = imm; + symbol.offset = try leb.readILEB128(i64, reader); + }, + macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { + assert(!dylib_ordinal_set); + self.dylib_ordinal = imm; + }, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { + assert(!dylib_ordinal_set); + self.dylib_ordinal = try leb.readILEB128(i64, reader); + }, + macho.BIND_OPCODE_SET_TYPE_IMM => { + self.binding_type = imm; + }, + macho.BIND_OPCODE_SET_ADDEND_SLEB => { + symbol.addend = try leb.readILEB128(i64, reader); + }, + else => { + std.log.warn("unhandled BIND_OPCODE_: 0x{x}", .{opcode}); + }, + } + } + assert(done); + } + + /// Write the binding info table to byte stream. + pub fn write(self: BindingInfoTable, writer: anytype) !void { + if (self.dylib_ordinal > 15) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + try leb.writeULEB128(writer, @bitCast(u64, self.dylib_ordinal)); + } else if (self.dylib_ordinal > 0) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal))); + } else { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal))); + } + try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, self.binding_type)); + + for (self.symbols.items) |symbol| { + if (symbol.name) |name| { + try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags. + try writer.writeAll(name); + try writer.writeByte(0); + } + + try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment)); + try leb.writeILEB128(writer, symbol.offset); + + if (symbol.addend) |addend| { + try writer.writeByte(macho.BIND_OPCODE_SET_ADDEND_SLEB); + try leb.writeILEB128(writer, addend); + } + + try writer.writeByte(macho.BIND_OPCODE_DO_BIND); + } + + try writer.writeByte(macho.BIND_OPCODE_DONE); + } + + /// Calculate size in bytes of this binding info table. + pub fn calcSize(self: *BindingInfoTable) !usize { + var stream = std.io.countingWriter(std.io.null_writer); + var writer = stream.writer(); + var size: usize = 1; + + if (self.dylib_ordinal > 15) { + try leb.writeULEB128(writer, @bitCast(u64, self.dylib_ordinal)); + } + + size += 1; + + for (self.symbols.items) |symbol| { + if (symbol.name) |name| { + size += 1; + size += name.len; + size += 1; + } + + size += 1; + try leb.writeILEB128(writer, symbol.offset); + + if (symbol.addend) |addend| { + size += 1; + try leb.writeILEB128(writer, addend); + } + + size += 1; + } + + size += 1 + stream.bytes_written; + return size; + } +}; + +/// Table of lazy binding info entries used to tell the dyld which +/// symbols to lazily bind at first load of a dylib. +pub const LazyBindingInfoTable = struct { + symbols: std.ArrayListUnmanaged(Symbol) = .{}, + + pub const Symbol = struct { + /// Symbol name. + name: ?[]u8 = null, + + /// Offset of this symbol wrt to the segment id encoded in `segment`. + offset: i64, + + /// Id of the dylib where this symbol is expected to reside. + /// Positive ordinals point at dylibs imported with LC_LOAD_DYLIB, + /// 0 means this binary, -1 the main executable, and -2 flat lookup. + dylib_ordinal: i64, + + /// Id of the segment where to bind this symbol to. + segment: u8, + + /// Addend value (if any). + addend: ?i64 = null, + }; + + pub fn deinit(self: *LazyBindingInfoTable, allocator: *Allocator) void { + for (self.symbols.items) |*symbol| { + if (symbol.name) |name| { + allocator.free(name); + } + } + self.symbols.deinit(allocator); + } + + /// Parse the binding info table from byte stream. + pub fn read(self: *LazyBindingInfoTable, reader: anytype, allocator: *Allocator) !void { + var symbol: Symbol = .{ + .offset = 0, + .segment = 0, + .dylib_ordinal = 0, + }; + + var done = false; + while (true) { + const inst = reader.readByte() catch |err| switch (err) { + error.EndOfStream => break, + else => return err, + }; + const imm: u8 = inst & macho.BIND_IMMEDIATE_MASK; + const opcode: u8 = inst & macho.BIND_OPCODE_MASK; + + switch (opcode) { + macho.BIND_OPCODE_DO_BIND => { + try self.symbols.append(allocator, symbol); + }, + macho.BIND_OPCODE_DONE => { + done = true; + symbol = .{ + .offset = 0, + .segment = 0, + .dylib_ordinal = 0, + }; + }, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { + var name = std.ArrayList(u8).init(allocator); + var next = try reader.readByte(); + while (next != @as(u8, 0)) { + try name.append(next); + next = try reader.readByte(); + } + symbol.name = name.toOwnedSlice(); + }, + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { + symbol.segment = imm; + symbol.offset = try leb.readILEB128(i64, reader); + }, + macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { + symbol.dylib_ordinal = imm; + }, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { + symbol.dylib_ordinal = try leb.readILEB128(i64, reader); + }, + macho.BIND_OPCODE_SET_ADDEND_SLEB => { + symbol.addend = try leb.readILEB128(i64, reader); + }, + else => { + std.log.warn("unhandled BIND_OPCODE_: 0x{x}", .{opcode}); + }, + } + } + assert(done); + } + + /// Write the binding info table to byte stream. + pub fn write(self: LazyBindingInfoTable, writer: anytype) !void { + for (self.symbols.items) |symbol| { + try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment)); + try leb.writeILEB128(writer, symbol.offset); + + if (symbol.addend) |addend| { + try writer.writeByte(macho.BIND_OPCODE_SET_ADDEND_SLEB); + try leb.writeILEB128(writer, addend); + } + + if (symbol.dylib_ordinal > 15) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal)); + } else if (symbol.dylib_ordinal > 0) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal))); + } else { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal))); + } + + if (symbol.name) |name| { + try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags. + try writer.writeAll(name); + try writer.writeByte(0); + } + + try writer.writeByte(macho.BIND_OPCODE_DO_BIND); + try writer.writeByte(macho.BIND_OPCODE_DONE); + } + } + + /// Calculate size in bytes of this binding info table. + pub fn calcSize(self: *LazyBindingInfoTable) !usize { + var stream = std.io.countingWriter(std.io.null_writer); + var writer = stream.writer(); + var size: usize = 0; + + for (self.symbols.items) |symbol| { + size += 1; + try leb.writeILEB128(writer, symbol.offset); + + if (symbol.addend) |addend| { + size += 1; + try leb.writeILEB128(writer, addend); + } + + size += 1; + if (symbol.dylib_ordinal > 15) { + try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal)); + } + if (symbol.name) |name| { + size += 1; + size += name.len; + size += 1; + } + size += 2; + } + + size += stream.bytes_written; + return size; + } +}; diff --git a/src/target.zig b/src/target.zig index 647237b680..9749675a89 100644 --- a/src/target.zig +++ b/src/target.zig @@ -14,6 +14,7 @@ pub const available_libcs = [_]ArchOsAbi{ .{ .arch = .aarch64, .os = .linux, .abi = .gnu }, .{ .arch = .aarch64, .os = .linux, .abi = .musl }, .{ .arch = .aarch64, .os = .windows, .abi = .gnu }, + .{ .arch = .aarch64, .os = .macos, .abi = .gnu }, .{ .arch = .armeb, .os = .linux, .abi = .gnueabi }, .{ .arch = .armeb, .os = .linux, .abi = .gnueabihf }, .{ .arch = .armeb, .os = .linux, .abi = .musleabi }, |
