diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2023-01-19 00:03:31 +0100 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2023-01-19 00:05:45 +0100 |
| commit | 61e38d0c34337b63a2174ff89cc5d34c06eecb22 (patch) | |
| tree | 2ce2db8d27d8d85b97b4e7a4178c48173bffb563 | |
| parent | 72c09b7b3b2b804bcc57befb5d82c7e89b6deeac (diff) | |
| download | zig-61e38d0c34337b63a2174ff89cc5d34c06eecb22.tar.gz zig-61e38d0c34337b63a2174ff89cc5d34c06eecb22.zip | |
macho+zld: add improved dyld opcodes emitters
| -rw-r--r-- | src/link/MachO.zig | 210 | ||||
| -rw-r--r-- | src/link/MachO/bind.zig | 138 | ||||
| -rw-r--r-- | src/link/MachO/dyld_info/Rebase.zig | 574 | ||||
| -rw-r--r-- | src/link/MachO/dyld_info/bind.zig | 740 | ||||
| -rw-r--r-- | src/link/MachO/zld.zig | 239 |
5 files changed, 1457 insertions, 444 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index be770574b8..97e87a45b6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -14,7 +14,6 @@ const mem = std.mem; const meta = std.meta; const aarch64 = @import("../arch/aarch64/bits.zig"); -const bind = @import("MachO/bind.zig"); const codegen = @import("../codegen.zig"); const dead_strip = @import("MachO/dead_strip.zig"); const fat = @import("MachO/fat.zig"); @@ -50,6 +49,10 @@ const Value = @import("../value.zig").Value; pub const DebugSymbols = @import("MachO/DebugSymbols.zig"); +const Bind = @import("MachO/dyld_info/bind.zig").Bind(*const MachO, MachO.SymbolWithLoc); +const LazyBind = @import("MachO/dyld_info/bind.zig").LazyBind(*const MachO, MachO.SymbolWithLoc); +const Rebase = @import("MachO/dyld_info/Rebase.zig"); + pub const base_tag: File.Tag = File.Tag.macho; pub const SearchStrategy = enum { @@ -3192,32 +3195,14 @@ fn writeLinkeditSegmentData(self: *MachO) !void { seg.vmsize = mem.alignForwardGeneric(u64, seg.filesize, self.page_size); } -const AtomLessThanByAddressContext = struct { - macho_file: *MachO, -}; - -fn atomLessThanByAddress(ctx: AtomLessThanByAddressContext, lhs: *Atom, rhs: *Atom) bool { - return lhs.getSymbol(ctx.macho_file).n_value < rhs.getSymbol(ctx.macho_file).n_value; -} - -fn collectRebaseData(self: *MachO, pointers: *std.ArrayList(bind.Pointer)) !void { +fn collectRebaseData(self: *MachO, rebase: *Rebase) !void { const gpa = self.base.allocator; - - var sorted_atoms_by_address = std.ArrayList(*Atom).init(gpa); - defer sorted_atoms_by_address.deinit(); - try sorted_atoms_by_address.ensureTotalCapacityPrecise(self.rebases.count()); - + const slice = self.sections.slice(); var it = self.rebases.keyIterator(); - while (it.next()) |key_ptr| { - sorted_atoms_by_address.appendAssumeCapacity(key_ptr.*); - } - std.sort.sort(*Atom, sorted_atoms_by_address.items, AtomLessThanByAddressContext{ - .macho_file = self, - }, atomLessThanByAddress); + while (it.next()) |key_ptr| { + const atom = key_ptr.*; - const slice = self.sections.slice(); - for (sorted_atoms_by_address.items) |atom| { log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); const sym = atom.getSymbol(self); @@ -3227,36 +3212,29 @@ fn collectRebaseData(self: *MachO, pointers: *std.ArrayList(bind.Pointer)) !void const base_offset = sym.n_value - seg.vmaddr; const rebases = self.rebases.get(atom).?; - try pointers.ensureUnusedCapacity(rebases.items.len); + try rebase.entries.ensureUnusedCapacity(gpa, rebases.items.len); + for (rebases.items) |offset| { log.debug(" | rebase at {x}", .{base_offset + offset}); - pointers.appendAssumeCapacity(.{ + rebase.entries.appendAssumeCapacity(.{ .offset = base_offset + offset, .segment_id = segment_index, }); } } + + try rebase.finalize(gpa); } -fn collectBindData(self: *MachO, pointers: *std.ArrayList(bind.Pointer), raw_bindings: anytype) !void { +fn collectBindData(self: *MachO, bind: anytype, raw_bindings: anytype) !void { const gpa = self.base.allocator; - - var sorted_atoms_by_address = std.ArrayList(*Atom).init(gpa); - defer sorted_atoms_by_address.deinit(); - try sorted_atoms_by_address.ensureTotalCapacityPrecise(raw_bindings.count()); - + const slice = self.sections.slice(); var it = raw_bindings.keyIterator(); - while (it.next()) |key_ptr| { - sorted_atoms_by_address.appendAssumeCapacity(key_ptr.*); - } - std.sort.sort(*Atom, sorted_atoms_by_address.items, AtomLessThanByAddressContext{ - .macho_file = self, - }, atomLessThanByAddress); + while (it.next()) |key_ptr| { + const atom = key_ptr.*; - const slice = self.sections.slice(); - for (sorted_atoms_by_address.items) |atom| { log.debug(" ATOM(%{d}, '{s}')", .{ atom.sym_index, atom.getName(self) }); const sym = atom.getSymbol(self); @@ -3266,7 +3244,8 @@ fn collectBindData(self: *MachO, pointers: *std.ArrayList(bind.Pointer), raw_bin const base_offset = sym.n_value - seg.vmaddr; const bindings = raw_bindings.get(atom).?; - try pointers.ensureUnusedCapacity(bindings.items.len); + try bind.entries.ensureUnusedCapacity(gpa, bindings.items.len); + for (bindings.items) |binding| { const bind_sym = self.getSymbol(binding.target); const bind_sym_name = self.getSymbolName(binding.target); @@ -3274,7 +3253,6 @@ fn collectBindData(self: *MachO, pointers: *std.ArrayList(bind.Pointer), raw_bin @bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER, ); - var flags: u4 = 0; log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ binding.offset + base_offset, bind_sym_name, @@ -3282,17 +3260,17 @@ fn collectBindData(self: *MachO, pointers: *std.ArrayList(bind.Pointer), raw_bin }); if (bind_sym.weakRef()) { log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } - pointers.appendAssumeCapacity(.{ + bind.entries.appendAssumeCapacity(.{ + .target = binding.target, .offset = binding.offset + base_offset, .segment_id = segment_index, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, + .addend = 0, }); } } + + try bind.finalize(gpa, self); } fn collectExportData(self: *MachO, trie: *Trie) !void { @@ -3345,17 +3323,17 @@ fn writeDyldInfoData(self: *MachO) !void { const gpa = self.base.allocator; - var rebase_pointers = std.ArrayList(bind.Pointer).init(gpa); - defer rebase_pointers.deinit(); - try self.collectRebaseData(&rebase_pointers); + var rebase = Rebase{}; + defer rebase.deinit(gpa); + try self.collectRebaseData(&rebase); - var bind_pointers = std.ArrayList(bind.Pointer).init(gpa); - defer bind_pointers.deinit(); - try self.collectBindData(&bind_pointers, self.bindings); + var bind = Bind{}; + defer bind.deinit(gpa); + try self.collectBindData(&bind, self.bindings); - var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); - defer lazy_bind_pointers.deinit(); - try self.collectBindData(&lazy_bind_pointers, self.lazy_bindings); + var lazy_bind = LazyBind{}; + defer lazy_bind.deinit(gpa); + try self.collectBindData(&lazy_bind, self.lazy_bindings); var trie: Trie = .{}; defer trie.deinit(gpa); @@ -3364,17 +3342,17 @@ fn writeDyldInfoData(self: *MachO) !void { const link_seg = self.getLinkeditSegmentPtr(); assert(mem.isAlignedGeneric(u64, link_seg.fileoff, @alignOf(u64))); const rebase_off = link_seg.fileoff; - const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); + const rebase_size = rebase.size(); const rebase_size_aligned = mem.alignForwardGeneric(u64, rebase_size, @alignOf(u64)); log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size_aligned }); const bind_off = rebase_off + rebase_size_aligned; - const bind_size = try bind.bindInfoSize(bind_pointers.items); + const bind_size = bind.size(); const bind_size_aligned = mem.alignForwardGeneric(u64, bind_size, @alignOf(u64)); log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size_aligned }); const lazy_bind_off = bind_off + bind_size_aligned; - const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); + const lazy_bind_size = lazy_bind.size(); const lazy_bind_size_aligned = mem.alignForwardGeneric(u64, lazy_bind_size, @alignOf(u64)); log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ lazy_bind_off, @@ -3398,13 +3376,13 @@ fn writeDyldInfoData(self: *MachO) !void { var stream = std.io.fixedBufferStream(buffer); const writer = stream.writer(); - try bind.writeRebaseInfo(rebase_pointers.items, writer); + try rebase.write(writer); try stream.seekTo(bind_off - rebase_off); - try bind.writeBindInfo(bind_pointers.items, writer); + try bind.write(writer); try stream.seekTo(lazy_bind_off - rebase_off); - try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); + try lazy_bind.write(writer); try stream.seekTo(export_off - rebase_off); _ = try trie.write(writer); @@ -3415,9 +3393,7 @@ fn writeDyldInfoData(self: *MachO) !void { }); try self.base.file.?.pwriteAll(buffer, rebase_off); - const start = math.cast(usize, lazy_bind_off - rebase_off) orelse return error.Overflow; - const end = start + (math.cast(usize, lazy_bind_size) orelse return error.Overflow); - try self.populateLazyBindOffsetsInStubHelper(buffer[start..end]); + try self.populateLazyBindOffsetsInStubHelper(lazy_bind); self.dyld_info_cmd.rebase_off = @intCast(u32, rebase_off); self.dyld_info_cmd.rebase_size = @intCast(u32, rebase_size_aligned); @@ -3429,102 +3405,33 @@ fn writeDyldInfoData(self: *MachO) !void { self.dyld_info_cmd.export_size = @intCast(u32, export_size_aligned); } -fn populateLazyBindOffsetsInStubHelper(self: *MachO, buffer: []const u8) !void { - const gpa = self.base.allocator; +fn populateLazyBindOffsetsInStubHelper(self: *MachO, lazy_bind: LazyBind) !void { + if (lazy_bind.size() == 0) return; - const stub_helper_section_index = self.stub_helper_section_index orelse return; - if (self.stub_helper_preamble_atom == null) return; + const stub_helper_section_index = self.stub_helper_section_index.?; + assert(self.stub_helper_preamble_atom != null); const section = self.sections.get(stub_helper_section_index); - const last_atom = section.last_atom orelse return; - if (last_atom == self.stub_helper_preamble_atom.?) return; // TODO is this a redundant check? - - var table = std.AutoHashMap(i64, *Atom).init(gpa); - defer table.deinit(); - { - var stub_atom = last_atom; - var laptr_atom = self.sections.items(.last_atom)[self.la_symbol_ptr_section_index.?].?; - const base_addr = self.getSegment(self.la_symbol_ptr_section_index.?).vmaddr; - - while (true) { - const laptr_off = blk: { - const sym = laptr_atom.getSymbol(self); - break :blk @intCast(i64, sym.n_value - base_addr); - }; - try table.putNoClobber(laptr_off, stub_atom); - if (laptr_atom.prev) |prev| { - laptr_atom = prev; - stub_atom = stub_atom.prev.?; - } else break; - } - } - - var stream = std.io.fixedBufferStream(buffer); - var reader = stream.reader(); - var offsets = std.ArrayList(struct { sym_offset: i64, offset: u32 }).init(gpa); - try offsets.append(.{ .sym_offset = undefined, .offset = 0 }); - defer offsets.deinit(); - var valid_block = false; - - while (true) { - const inst = reader.readByte() catch |err| switch (err) { - error.EndOfStream => break, - }; - const opcode: u8 = inst & macho.BIND_OPCODE_MASK; - - switch (opcode) { - macho.BIND_OPCODE_DO_BIND => { - valid_block = true; - }, - macho.BIND_OPCODE_DONE => { - if (valid_block) { - const offset = try stream.getPos(); - try offsets.append(.{ .sym_offset = undefined, .offset = @intCast(u32, offset) }); - } - valid_block = false; - }, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - var next = try reader.readByte(); - while (next != @as(u8, 0)) { - next = try reader.readByte(); - } - }, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - var inserted = offsets.pop(); - inserted.sym_offset = try std.leb.readILEB128(i64, reader); - try offsets.append(inserted); - }, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { - _ = try std.leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_ADDEND_SLEB => { - _ = try std.leb.readILEB128(i64, reader); - }, - else => {}, - } - } - - const header = self.sections.items(.header)[stub_helper_section_index]; const stub_offset: u4 = switch (self.base.options.target.cpu.arch) { .x86_64 => 1, .aarch64 => 2 * @sizeOf(u32), else => unreachable, }; - var buf: [@sizeOf(u32)]u8 = undefined; - _ = offsets.pop(); + const header = section.header; + var atom = section.last_atom.?; - while (offsets.popOrNull()) |bind_offset| { - const atom = table.get(bind_offset.sym_offset).?; + var index: usize = lazy_bind.offsets.items.len; + while (index > 0) : (index -= 1) { const sym = atom.getSymbol(self); const file_offset = header.offset + sym.n_value - header.addr + stub_offset; - mem.writeIntLittle(u32, &buf, bind_offset.offset); - log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ - bind_offset.offset, - atom.getName(self), - file_offset, - }); - try self.base.file.?.pwriteAll(&buf, file_offset); + const bind_offset = lazy_bind.offsets.items[index - 1]; + + log.debug("writing lazy bind offset 0x{x} in stub helper at 0x{x}", .{ bind_offset, file_offset }); + + try self.base.file.?.pwriteAll(mem.asBytes(&bind_offset), file_offset); + + atom = atom.prev.?; } } @@ -3912,12 +3819,13 @@ pub fn getSymbolPtr(self: *MachO, sym_with_loc: SymbolWithLoc) *macho.nlist_64 { } /// Returns symbol described by `sym_with_loc` descriptor. -pub fn getSymbol(self: *MachO, sym_with_loc: SymbolWithLoc) macho.nlist_64 { - return self.getSymbolPtr(sym_with_loc).*; +pub fn getSymbol(self: *const MachO, sym_with_loc: SymbolWithLoc) macho.nlist_64 { + assert(sym_with_loc.file == null); + return self.locals.items[sym_with_loc.sym_index]; } /// Returns name of the symbol described by `sym_with_loc` descriptor. -pub fn getSymbolName(self: *MachO, sym_with_loc: SymbolWithLoc) []const u8 { +pub fn getSymbolName(self: *const MachO, sym_with_loc: SymbolWithLoc) []const u8 { assert(sym_with_loc.file == null); const sym = self.locals.items[sym_with_loc.sym_index]; return self.strtab.get(sym.n_strx).?; diff --git a/src/link/MachO/bind.zig b/src/link/MachO/bind.zig deleted file mode 100644 index 9e34581a23..0000000000 --- a/src/link/MachO/bind.zig +++ /dev/null @@ -1,138 +0,0 @@ -const std = @import("std"); -const leb = std.leb; -const macho = std.macho; - -pub const Pointer = struct { - offset: u64, - segment_id: u16, - dylib_ordinal: ?i64 = null, - name: ?[]const u8 = null, - bind_flags: u4 = 0, -}; - -pub fn rebaseInfoSize(pointers: []const Pointer) !u64 { - var stream = std.io.countingWriter(std.io.null_writer); - var writer = stream.writer(); - var size: u64 = 0; - - for (pointers) |pointer| { - size += 2; - try leb.writeILEB128(writer, pointer.offset); - size += 1; - } - - size += 1 + stream.bytes_written; - return size; -} - -pub fn writeRebaseInfo(pointers: []const Pointer, writer: anytype) !void { - for (pointers) |pointer| { - try writer.writeByte(macho.REBASE_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.REBASE_TYPE_POINTER)); - try writer.writeByte(macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, pointer.segment_id)); - - try leb.writeILEB128(writer, pointer.offset); - try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | @truncate(u4, 1)); - } - try writer.writeByte(macho.REBASE_OPCODE_DONE); -} - -pub fn bindInfoSize(pointers: []const Pointer) !u64 { - var stream = std.io.countingWriter(std.io.null_writer); - var writer = stream.writer(); - var size: u64 = 0; - - for (pointers) |pointer| { - size += 1; - if (pointer.dylib_ordinal.? > 15) { - try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?)); - } - size += 1; - - size += 1; - size += pointer.name.?.len; - size += 1; - - size += 1; - - try leb.writeILEB128(writer, pointer.offset); - size += 1; - } - - size += stream.bytes_written + 1; - return size; -} - -pub fn writeBindInfo(pointers: []const Pointer, writer: anytype) !void { - for (pointers) |pointer| { - if (pointer.dylib_ordinal.? > 15) { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); - try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?)); - } else if (pointer.dylib_ordinal.? > 0) { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?))); - } else { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?))); - } - try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.BIND_TYPE_POINTER)); - - try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | pointer.bind_flags); - try writer.writeAll(pointer.name.?); - try writer.writeByte(0); - - try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, pointer.segment_id)); - - try leb.writeILEB128(writer, pointer.offset); - try writer.writeByte(macho.BIND_OPCODE_DO_BIND); - } - - try writer.writeByte(macho.BIND_OPCODE_DONE); -} - -pub fn lazyBindInfoSize(pointers: []const Pointer) !u64 { - var stream = std.io.countingWriter(std.io.null_writer); - var writer = stream.writer(); - var size: u64 = 0; - - for (pointers) |pointer| { - size += 1; - - try leb.writeILEB128(writer, pointer.offset); - - size += 1; - if (pointer.dylib_ordinal.? > 15) { - try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?)); - } - - size += 1; - size += pointer.name.?.len; - size += 1; - - size += 2; - } - - size += stream.bytes_written; - return size; -} - -pub fn writeLazyBindInfo(pointers: []const Pointer, writer: anytype) !void { - for (pointers) |pointer| { - try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, pointer.segment_id)); - - try leb.writeILEB128(writer, pointer.offset); - - if (pointer.dylib_ordinal.? > 15) { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); - try leb.writeULEB128(writer, @bitCast(u64, pointer.dylib_ordinal.?)); - } else if (pointer.dylib_ordinal.? > 0) { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?))); - } else { - try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, pointer.dylib_ordinal.?))); - } - - try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | pointer.bind_flags); - try writer.writeAll(pointer.name.?); - try writer.writeByte(0); - - try writer.writeByte(macho.BIND_OPCODE_DO_BIND); - try writer.writeByte(macho.BIND_OPCODE_DONE); - } -} diff --git a/src/link/MachO/dyld_info/Rebase.zig b/src/link/MachO/dyld_info/Rebase.zig new file mode 100644 index 0000000000..5004262b4d --- /dev/null +++ b/src/link/MachO/dyld_info/Rebase.zig @@ -0,0 +1,574 @@ +const Rebase = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const testing = std.testing; + +const Allocator = std.mem.Allocator; + +entries: std.ArrayListUnmanaged(Entry) = .{}, +buffer: std.ArrayListUnmanaged(u8) = .{}, + +const Entry = struct { + offset: u64, + segment_id: u8, + + pub fn lessThan(ctx: void, entry: Entry, other: Entry) bool { + _ = ctx; + if (entry.segment_id == other.segment_id) { + return entry.offset < other.offset; + } + return entry.segment_id < other.segment_id; + } +}; + +pub fn deinit(rebase: *Rebase, gpa: Allocator) void { + rebase.entries.deinit(gpa); + rebase.buffer.deinit(gpa); +} + +pub fn size(rebase: Rebase) u64 { + return @intCast(u64, rebase.buffer.items.len); +} + +pub fn finalize(rebase: *Rebase, gpa: Allocator) !void { + if (rebase.entries.items.len == 0) return; + + const writer = rebase.buffer.writer(gpa); + + std.sort.sort(Entry, rebase.entries.items, {}, Entry.lessThan); + + try setTypePointer(writer); + + var start: usize = 0; + var seg_id: ?u8 = null; + for (rebase.entries.items) |entry, i| { + if (seg_id != null and seg_id.? == entry.segment_id) continue; + try finalizeSegment(rebase.entries.items[start..i], writer); + seg_id = entry.segment_id; + start = i; + } + + try finalizeSegment(rebase.entries.items[start..], writer); + try done(writer); +} + +fn finalizeSegment(entries: []const Entry, writer: anytype) !void { + if (entries.len == 0) return; + + const segment_id = entries[0].segment_id; + var offset = entries[0].offset; + try setSegmentOffset(segment_id, offset, writer); + + var count: usize = 0; + var skip: u64 = 0; + var state: enum { + start, + times, + times_skip, + } = .times; + + var i: usize = 0; + while (i < entries.len) : (i += 1) { + log.debug("{x}, {d}, {x}, {s}", .{ offset, count, skip, @tagName(state) }); + const current_offset = entries[i].offset; + log.debug(" => {x}", .{current_offset}); + switch (state) { + .start => { + if (offset < current_offset) { + const delta = current_offset - offset; + try addAddr(delta, writer); + offset += delta; + } + state = .times; + offset += @sizeOf(u64); + count = 1; + }, + .times => { + const delta = current_offset - offset; + if (delta == 0) { + count += 1; + offset += @sizeOf(u64); + continue; + } + if (count == 1) { + state = .times_skip; + skip = delta; + offset += skip; + i -= 1; + } else { + try rebaseTimes(count, writer); + state = .start; + i -= 1; + } + }, + .times_skip => { + if (current_offset < offset) { + count -= 1; + if (count == 1) { + try rebaseAddAddr(skip, writer); + } else { + try rebaseTimesSkip(count, skip, writer); + } + state = .start; + offset = offset - (@sizeOf(u64) + skip); + i -= 2; + continue; + } + + const delta = current_offset - offset; + if (delta == 0) { + count += 1; + offset += @sizeOf(u64) + skip; + } else { + try rebaseTimesSkip(count, skip, writer); + state = .start; + i -= 1; + } + }, + } + } + + switch (state) { + .start => unreachable, + .times => { + try rebaseTimes(count, writer); + }, + .times_skip => { + try rebaseTimesSkip(count, skip, writer); + }, + } +} + +fn setTypePointer(writer: anytype) !void { + log.debug(">>> set type: {d}", .{macho.REBASE_TYPE_POINTER}); + try writer.writeByte(macho.REBASE_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.REBASE_TYPE_POINTER)); +} + +fn setSegmentOffset(segment_id: u8, offset: u64, writer: anytype) !void { + log.debug(">>> set segment: {d} and offset: {x}", .{ segment_id, offset }); + try writer.writeByte(macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, segment_id)); + try std.leb.writeULEB128(writer, offset); +} + +fn rebaseAddAddr(addr: u64, writer: anytype) !void { + log.debug(">>> rebase with add: {x}", .{addr}); + try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_ADD_ADDR_ULEB); + try std.leb.writeULEB128(writer, addr); +} + +fn rebaseTimes(count: usize, writer: anytype) !void { + log.debug(">>> rebase with count: {d}", .{count}); + if (count <= 0xf) { + try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | @truncate(u4, count)); + } else { + try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES); + try std.leb.writeULEB128(writer, count); + } +} + +fn rebaseTimesSkip(count: usize, skip: u64, writer: anytype) !void { + log.debug(">>> rebase with count: {d} and skip: {x}", .{ count, skip }); + try writer.writeByte(macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB); + try std.leb.writeULEB128(writer, count); + try std.leb.writeULEB128(writer, skip); +} + +fn addAddr(addr: u64, writer: anytype) !void { + log.debug(">>> add: {x}", .{addr}); + if (std.mem.isAligned(addr, @sizeOf(u64))) { + const imm = @divExact(addr, @sizeOf(u64)); + if (imm <= 0xf) { + try writer.writeByte(macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | @truncate(u4, imm)); + return; + } + } + try writer.writeByte(macho.REBASE_OPCODE_ADD_ADDR_ULEB); + try std.leb.writeULEB128(writer, addr); +} + +fn done(writer: anytype) !void { + log.debug(">>> done", .{}); + try writer.writeByte(macho.REBASE_OPCODE_DONE); +} + +pub fn write(rebase: Rebase, writer: anytype) !void { + if (rebase.size() == 0) return; + try writer.writeAll(rebase.buffer.items); +} + +test "rebase - no entries" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + try rebase.finalize(gpa); + try testing.expectEqual(@as(u64, 0), rebase.size()); +} + +test "rebase - single entry" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x10, + }); + try rebase.finalize(gpa); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x10, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - emitTimes - IMM" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + var i: u64 = 0; + while (i < 10) : (i += 1) { + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = i * @sizeOf(u64), + }); + } + + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 10, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - emitTimes - ULEB" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + var i: u64 = 0; + while (i < 100) : (i += 1) { + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = i * @sizeOf(u64), + }); + } + + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES, + 0x64, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - emitTimes followed by addAddr followed by emitTimes" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + var offset: u64 = 0; + var i: u64 = 0; + while (i < 15) : (i += 1) { + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = offset, + }); + offset += @sizeOf(u64); + } + + offset += @sizeOf(u64); + + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = offset, + }); + + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 15, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 1, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - emitTimesSkip" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + var offset: u64 = 0; + var i: u64 = 0; + while (i < 15) : (i += 1) { + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = offset, + }); + offset += 2 * @sizeOf(u64); + } + + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0xf, + 0x8, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - complex" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x10, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x40, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x48, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x50, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x58, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x70, + }); + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x8, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 4, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 4, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 2, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - complex 2" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x10, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x28, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x48, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x78, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xb8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 2, + .offset = 0x0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 2, + .offset = 0x8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 2, + .offset = 0x10, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 2, + .offset = 0x18, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 3, + .offset = 0x0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 3, + .offset = 0x20, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 3, + .offset = 0x40, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 3, + .offset = 0x60, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 3, + .offset = 0x68, + }); + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x8, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 1, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x18, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 2, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x38, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 2, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 4, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 3, + 0x0, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x3, + 0x18, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 2, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} + +test "rebase - composite" { + const gpa = testing.allocator; + + var rebase = Rebase{}; + defer rebase.deinit(gpa); + + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x38, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xa0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xa8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xb0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xc0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xc8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xd0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xd8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xe0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xe8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xf0, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0xf8, + }); + try rebase.entries.append(gpa, .{ + .segment_id = 1, + .offset = 0x108, + }); + try rebase.finalize(gpa); + + try testing.expectEqualSlices(u8, &[_]u8{ + macho.REBASE_OPCODE_SET_TYPE_IMM | macho.REBASE_TYPE_POINTER, + macho.REBASE_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x8, + macho.REBASE_OPCODE_DO_REBASE_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x28, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 7, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 3, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 1, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 8, + macho.REBASE_OPCODE_ADD_ADDR_IMM_SCALED | 1, + macho.REBASE_OPCODE_DO_REBASE_IMM_TIMES | 1, + macho.REBASE_OPCODE_DONE, + }, rebase.buffer.items); +} diff --git a/src/link/MachO/dyld_info/bind.zig b/src/link/MachO/dyld_info/bind.zig new file mode 100644 index 0000000000..7f7dc498b8 --- /dev/null +++ b/src/link/MachO/dyld_info/bind.zig @@ -0,0 +1,740 @@ +const std = @import("std"); +const assert = std.debug.assert; +const leb = std.leb; +const log = std.log.scoped(.dyld_info); +const macho = std.macho; +const testing = std.testing; + +const Allocator = std.mem.Allocator; + +pub fn Bind(comptime Ctx: type, comptime Target: type) type { + return struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + buffer: std.ArrayListUnmanaged(u8) = .{}, + + const Self = @This(); + + const Entry = struct { + target: Target, + offset: u64, + segment_id: u8, + addend: i64, + + pub fn lessThan(ctx: Ctx, entry: Entry, other: Entry) bool { + if (entry.segment_id == other.segment_id) { + if (entry.target.eql(other.target)) { + return entry.offset < other.offset; + } + const entry_name = ctx.getSymbolName(entry.target); + const other_name = ctx.getSymbolName(other.target); + return std.mem.lessThan(u8, entry_name, other_name); + } + return entry.segment_id < other.segment_id; + } + }; + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.entries.deinit(gpa); + self.buffer.deinit(gpa); + } + + pub fn size(self: Self) u64 { + return @intCast(u64, self.buffer.items.len); + } + + pub fn finalize(self: *Self, gpa: Allocator, ctx: Ctx) !void { + if (self.entries.items.len == 0) return; + + const writer = self.buffer.writer(gpa); + + std.sort.sort(Entry, self.entries.items, ctx, Entry.lessThan); + + var start: usize = 0; + var seg_id: ?u8 = null; + for (self.entries.items) |entry, i| { + if (seg_id != null and seg_id.? == entry.segment_id) continue; + try finalizeSegment(self.entries.items[start..i], ctx, writer); + seg_id = entry.segment_id; + start = i; + } + + try finalizeSegment(self.entries.items[start..], ctx, writer); + try done(writer); + } + + fn finalizeSegment(entries: []const Entry, ctx: Ctx, writer: anytype) !void { + if (entries.len == 0) return; + + const seg_id = entries[0].segment_id; + try setSegmentOffset(seg_id, 0, writer); + + var offset: u64 = 0; + var addend: i64 = 0; + var count: usize = 0; + var skip: u64 = 0; + var target: ?Target = null; + + var state: enum { + start, + bind_single, + bind_times_skip, + } = .start; + + var i: usize = 0; + while (i < entries.len) : (i += 1) { + const current = entries[i]; + if (target == null or !target.?.eql(current.target)) { + switch (state) { + .start => {}, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), + } + state = .start; + target = current.target; + + const sym = ctx.getSymbol(current.target); + const name = ctx.getSymbolName(current.target); + const flags: u8 = if (sym.weakRef()) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; + const ordinal = @divTrunc(@bitCast(i16, sym.n_desc), macho.N_SYMBOL_RESOLVER); + + try setSymbol(name, flags, writer); + try setTypePointer(writer); + try setDylibOrdinal(ordinal, writer); + + if (current.addend != addend) { + addend = current.addend; + try setAddend(addend, writer); + } + } + + log.debug("{x}, {d}, {x}, {?x}, {s}", .{ offset, count, skip, addend, @tagName(state) }); + log.debug(" => {x}", .{current.offset}); + switch (state) { + .start => { + if (current.offset < offset) { + try addAddr(@bitCast(u64, @intCast(i64, current.offset) - @intCast(i64, offset)), writer); + offset = offset - (offset - current.offset); + } else if (current.offset > offset) { + const delta = current.offset - offset; + try addAddr(delta, writer); + offset += delta; + } + state = .bind_single; + offset += @sizeOf(u64); + count = 1; + }, + .bind_single => { + if (current.offset == offset) { + try doBind(writer); + state = .start; + } else if (current.offset > offset) { + const delta = current.offset - offset; + state = .bind_times_skip; + skip = @intCast(u64, delta); + offset += skip; + } else unreachable; + i -= 1; + }, + .bind_times_skip => { + if (current.offset < offset) { + count -= 1; + if (count == 1) { + try doBindAddAddr(skip, writer); + } else { + try doBindTimesSkip(count, skip, writer); + } + state = .start; + offset = offset - (@sizeOf(u64) + skip); + i -= 2; + } else if (current.offset == offset) { + count += 1; + offset += @sizeOf(u64) + skip; + } else { + try doBindTimesSkip(count, skip, writer); + state = .start; + i -= 1; + } + }, + } + } + + switch (state) { + .start => unreachable, + .bind_single => try doBind(writer), + .bind_times_skip => try doBindTimesSkip(count, skip, writer), + } + } + + pub fn write(self: Self, writer: anytype) !void { + if (self.size() == 0) return; + try writer.writeAll(self.buffer.items); + } + }; +} + +pub fn LazyBind(comptime Ctx: type, comptime Target: type) type { + return struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + buffer: std.ArrayListUnmanaged(u8) = .{}, + offsets: std.ArrayListUnmanaged(u32) = .{}, + + const Self = @This(); + + const Entry = struct { + target: Target, + offset: u64, + segment_id: u8, + addend: i64, + }; + + pub fn deinit(self: *Self, gpa: Allocator) void { + self.entries.deinit(gpa); + self.buffer.deinit(gpa); + self.offsets.deinit(gpa); + } + + pub fn size(self: Self) u64 { + return @intCast(u64, self.buffer.items.len); + } + + pub fn finalize(self: *Self, gpa: Allocator, ctx: Ctx) !void { + if (self.entries.items.len == 0) return; + + try self.offsets.ensureTotalCapacityPrecise(gpa, self.entries.items.len); + + var cwriter = std.io.countingWriter(self.buffer.writer(gpa)); + const writer = cwriter.writer(); + + var addend: i64 = 0; + + for (self.entries.items) |entry| { + self.offsets.appendAssumeCapacity(@intCast(u32, cwriter.bytes_written)); + + const sym = ctx.getSymbol(entry.target); + const name = ctx.getSymbolName(entry.target); + const flags: u8 = if (sym.weakRef()) macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT else 0; + const ordinal = @divTrunc(@bitCast(i16, sym.n_desc), macho.N_SYMBOL_RESOLVER); + + try setSegmentOffset(entry.segment_id, entry.offset, writer); + try setSymbol(name, flags, writer); + try setDylibOrdinal(ordinal, writer); + + if (entry.addend != addend) { + try setAddend(entry.addend, writer); + addend = entry.addend; + } + + try doBind(writer); + try done(writer); + } + } + + pub fn write(self: Self, writer: anytype) !void { + if (self.size() == 0) return; + try writer.writeAll(self.buffer.items); + } + }; +} + +fn setSegmentOffset(segment_id: u8, offset: u64, writer: anytype) !void { + log.debug(">>> set segment: {d} and offset: {x}", .{ segment_id, offset }); + try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, segment_id)); + try std.leb.writeULEB128(writer, offset); +} + +fn setSymbol(name: []const u8, flags: u8, writer: anytype) !void { + log.debug(">>> set symbol: {s} with flags: {x}", .{ name, flags }); + try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | @truncate(u4, flags)); + try writer.writeAll(name); + try writer.writeByte(0); +} + +fn setTypePointer(writer: anytype) !void { + log.debug(">>> set type: {d}", .{macho.BIND_TYPE_POINTER}); + try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, macho.BIND_TYPE_POINTER)); +} + +fn setDylibOrdinal(ordinal: i16, writer: anytype) !void { + if (ordinal <= 0) { + switch (ordinal) { + macho.BIND_SPECIAL_DYLIB_SELF, + macho.BIND_SPECIAL_DYLIB_MAIN_EXECUTABLE, + macho.BIND_SPECIAL_DYLIB_FLAT_LOOKUP, + => {}, + else => unreachable, // Invalid dylib special binding + } + log.debug(">>> set dylib special: {d}", .{ordinal}); + const cast = @bitCast(u16, ordinal); + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, cast)); + } else { + const cast = @bitCast(u16, ordinal); + log.debug(">>> set dylib ordinal: {d}", .{ordinal}); + if (cast <= 0xf) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, cast)); + } else { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + try std.leb.writeULEB128(writer, cast); + } + } +} + +fn setAddend(addend: i64, writer: anytype) !void { + log.debug(">>> set addend: {x}", .{addend}); + try writer.writeByte(macho.BIND_OPCODE_SET_ADDEND_SLEB); + try std.leb.writeILEB128(writer, addend); +} + +fn doBind(writer: anytype) !void { + log.debug(">>> bind", .{}); + try writer.writeByte(macho.BIND_OPCODE_DO_BIND); +} + +fn doBindAddAddr(addr: u64, writer: anytype) !void { + log.debug(">>> bind with add: {x}", .{addr}); + if (std.mem.isAligned(addr, @sizeOf(u64))) { + const imm = @divExact(addr, @sizeOf(u64)); + if (imm <= 0xf) { + try writer.writeByte( + macho.BIND_OPCODE_DO_BIND_ADD_ADDR_IMM_SCALED | @truncate(u4, imm), + ); + return; + } + } + try writer.writeByte(macho.BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB); + try std.leb.writeULEB128(writer, addr); +} + +fn doBindTimesSkip(count: usize, skip: u64, writer: anytype) !void { + log.debug(">>> bind with count: {d} and skip: {x}", .{ count, skip }); + try writer.writeByte(macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB); + try std.leb.writeULEB128(writer, count); + try std.leb.writeULEB128(writer, skip); +} + +fn addAddr(addr: u64, writer: anytype) !void { + log.debug(">>> add: {x}", .{addr}); + try writer.writeByte(macho.BIND_OPCODE_ADD_ADDR_ULEB); + try std.leb.writeULEB128(writer, addr); +} + +fn done(writer: anytype) !void { + log.debug(">>> done", .{}); + try writer.writeByte(macho.BIND_OPCODE_DONE); +} + +const TestContext = struct { + symbols: std.ArrayListUnmanaged(macho.nlist_64) = .{}, + strtab: std.ArrayListUnmanaged(u8) = .{}, + + const Target = struct { + index: u32, + + fn eql(this: Target, other: Target) bool { + return this.index == other.index; + } + }; + + fn deinit(ctx: *TestContext, gpa: Allocator) void { + ctx.symbols.deinit(gpa); + ctx.strtab.deinit(gpa); + } + + fn addSymbol(ctx: *TestContext, gpa: Allocator, name: []const u8, ordinal: i16, flags: u16) !void { + const n_strx = try ctx.addString(gpa, name); + var n_desc = @bitCast(u16, ordinal * macho.N_SYMBOL_RESOLVER); + n_desc |= flags; + try ctx.symbols.append(gpa, .{ + .n_value = 0, + .n_strx = n_strx, + .n_desc = n_desc, + .n_type = macho.N_EXT, + .n_sect = 0, + }); + } + + fn addString(ctx: *TestContext, gpa: Allocator, name: []const u8) !u32 { + const n_strx = @intCast(u32, ctx.strtab.items.len); + try ctx.strtab.appendSlice(gpa, name); + try ctx.strtab.append(gpa, 0); + return n_strx; + } + + fn getSymbol(ctx: TestContext, target: Target) macho.nlist_64 { + return ctx.symbols.items[target.index]; + } + + fn getSymbolName(ctx: TestContext, target: Target) []const u8 { + const sym = ctx.getSymbol(target); + assert(sym.n_strx < ctx.strtab.items.len); + return std.mem.sliceTo(@ptrCast([*:0]const u8, ctx.strtab.items.ptr + sym.n_strx), 0); + } +}; + +fn generateTestContext() !TestContext { + const gpa = testing.allocator; + var ctx = TestContext{}; + try ctx.addSymbol(gpa, "_import_1", 1, 0); + try ctx.addSymbol(gpa, "_import_2", 1, 0); + try ctx.addSymbol(gpa, "_import_3", 1, 0); + try ctx.addSymbol(gpa, "_import_4", 2, 0); + try ctx.addSymbol(gpa, "_import_5_weak", 2, macho.N_WEAK_REF); + try ctx.addSymbol(gpa, "_import_6", 2, 0); + return ctx; +} + +test "bind - no entries" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = Bind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.finalize(gpa, test_context); + try testing.expectEqual(@as(u64, 0), bind.size()); +} + +test "bind - single entry" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = Bind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.entries.append(gpa, .{ + .offset = 0x10, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.finalize(gpa, test_context); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x31, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0x10, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DONE, + }, bind.buffer.items); +} + +test "bind - multiple occurrences within the same segment" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = Bind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.entries.append(gpa, .{ + .offset = 0x10, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.entries.append(gpa, .{ + .offset = 0x18, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.entries.append(gpa, .{ + .offset = 0x20, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.entries.append(gpa, .{ + .offset = 0x28, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + + try bind.finalize(gpa, test_context); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x31, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0x10, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DONE, + }, bind.buffer.items); +} + +test "bind - multiple occurrences with skip and addend" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = Bind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.entries.append(gpa, .{ + .offset = 0x0, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x10, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x20, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x30, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0x10, + }); + + try bind.finalize(gpa, test_context); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x31, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_SET_ADDEND_SLEB, + 0x10, + macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, + 0x4, + 0x8, + macho.BIND_OPCODE_DONE, + }, bind.buffer.items); +} + +test "bind - complex" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = Bind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.entries.append(gpa, .{ + .offset = 0x58, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.entries.append(gpa, .{ + .offset = 0x100, + .segment_id = 1, + .target = TestContext.Target{ .index = 1 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x110, + .segment_id = 1, + .target = TestContext.Target{ .index = 1 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x130, + .segment_id = 1, + .target = TestContext.Target{ .index = 1 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x140, + .segment_id = 1, + .target = TestContext.Target{ .index = 1 }, + .addend = 0x10, + }); + try bind.entries.append(gpa, .{ + .offset = 0x148, + .segment_id = 1, + .target = TestContext.Target{ .index = 2 }, + .addend = 0, + }); + + try bind.finalize(gpa, test_context); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x0, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x31, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0x58, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x32, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_SET_ADDEND_SLEB, + 0x10, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0xa0, + 0x1, + macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x8, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0x10, + macho.BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB, + 0x2, + 0x8, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x33, + 0x0, + macho.BIND_OPCODE_SET_TYPE_IMM | 1, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_SET_ADDEND_SLEB, + 0x0, + macho.BIND_OPCODE_ADD_ADDR_ULEB, + 0xf8, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0x1, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DONE, + }, bind.buffer.items); +} + +test "lazy bind" { + const gpa = testing.allocator; + + var test_context = try generateTestContext(); + defer test_context.deinit(gpa); + + var bind = LazyBind(TestContext, TestContext.Target){}; + defer bind.deinit(gpa); + + try bind.entries.append(gpa, .{ + .offset = 0x10, + .segment_id = 1, + .target = TestContext.Target{ .index = 0 }, + .addend = 0, + }); + try bind.entries.append(gpa, .{ + .offset = 0x20, + .segment_id = 2, + .target = TestContext.Target{ .index = 1 }, + .addend = 0x10, + }); + + try bind.finalize(gpa, test_context); + try testing.expectEqualSlices(u8, &[_]u8{ + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 1, + 0x10, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x31, + 0x0, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DONE, + macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | 2, + 0x20, + macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM | 0, + 0x5f, + 0x69, + 0x6d, + 0x70, + 0x6f, + 0x72, + 0x74, + 0x5f, + 0x32, + 0x0, + macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | 1, + macho.BIND_OPCODE_SET_ADDEND_SLEB, + 0x10, + macho.BIND_OPCODE_DO_BIND, + macho.BIND_OPCODE_DONE, + }, bind.buffer.items); +} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 3305267b62..9b3022f3e3 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -9,7 +9,6 @@ const math = std.math; const mem = std.mem; const aarch64 = @import("../../arch/aarch64/bits.zig"); -const bind = @import("bind.zig"); const dead_strip = @import("dead_strip.zig"); const fat = @import("fat.zig"); const link = @import("../../link.zig"); @@ -32,6 +31,10 @@ const Object = @import("Object.zig"); const StringTable = @import("../strtab.zig").StringTable; const Trie = @import("Trie.zig"); +const Bind = @import("dyld_info/bind.zig").Bind(*const Zld, SymbolWithLoc); +const LazyBind = @import("dyld_info/bind.zig").LazyBind(*const Zld, SymbolWithLoc); +const Rebase = @import("dyld_info/Rebase.zig"); + pub const Zld = struct { gpa: Allocator, file: fs.File, @@ -1778,14 +1781,14 @@ pub const Zld = struct { fn collectRebaseDataFromContainer( self: *Zld, sect_id: u8, - pointers: *std.ArrayList(bind.Pointer), + rebase: *Rebase, container: anytype, ) !void { const slice = self.sections.slice(); const segment_index = slice.items(.segment_index)[sect_id]; const seg = self.getSegment(sect_id); - try pointers.ensureUnusedCapacity(container.items.len); + try rebase.entries.ensureUnusedCapacity(self.gpa, container.items.len); for (container.items) |entry| { const target_sym = entry.getTargetSymbol(self); @@ -1796,19 +1799,19 @@ pub const Zld = struct { log.debug(" | rebase at {x}", .{base_offset}); - pointers.appendAssumeCapacity(.{ + rebase.entries.appendAssumeCapacity(.{ .offset = base_offset, .segment_id = segment_index, }); } } - fn collectRebaseData(self: *Zld, pointers: *std.ArrayList(bind.Pointer)) !void { + fn collectRebaseData(self: *Zld, rebase: *Rebase, reverse_lookups: [][]u32) !void { log.debug("collecting rebase data", .{}); // First, unpack GOT entries if (self.getSectionByName("__DATA_CONST", "__got")) |sect_id| { - try self.collectRebaseDataFromContainer(sect_id, pointers, self.got_entries); + try self.collectRebaseDataFromContainer(sect_id, rebase, self.got_entries); } const slice = self.sections.slice(); @@ -1820,7 +1823,7 @@ pub const Zld = struct { const seg = self.getSegment(sect_id); var atom_index = slice.items(.first_atom_index)[sect_id]; - try pointers.ensureUnusedCapacity(self.stubs.items.len); + try rebase.entries.ensureUnusedCapacity(self.gpa, self.stubs.items.len); while (true) { const atom = self.getAtom(atom_index); @@ -1829,7 +1832,7 @@ pub const Zld = struct { log.debug(" | rebase at {x}", .{base_offset}); - pointers.appendAssumeCapacity(.{ + rebase.entries.appendAssumeCapacity(.{ .offset = base_offset, .segment_id = segment_index, }); @@ -1896,13 +1899,16 @@ pub const Zld = struct { }, else => unreachable, } + const target = Atom.parseRelocTarget(self, atom_index, rel, reverse_lookups[atom.getFile().?]); + const target_sym = self.getSymbol(target); + if (target_sym.undf()) continue; const base_offset = @intCast(i32, sym.n_value - segment.vmaddr); const rel_offset = rel.r_address - base_rel_offset; const offset = @intCast(u64, base_offset + rel_offset); log.debug(" | rebase at {x}", .{offset}); - try pointers.append(.{ + try rebase.entries.append(self.gpa, .{ .offset = offset, .segment_id = segment_index, }); @@ -1914,19 +1920,21 @@ pub const Zld = struct { } else break; } } + + try rebase.finalize(self.gpa); } fn collectBindDataFromContainer( self: *Zld, sect_id: u8, - pointers: *std.ArrayList(bind.Pointer), + bind: *Bind, container: anytype, ) !void { const slice = self.sections.slice(); const segment_index = slice.items(.segment_index)[sect_id]; const seg = self.getSegment(sect_id); - try pointers.ensureUnusedCapacity(container.items.len); + try bind.entries.ensureUnusedCapacity(self.gpa, container.items.len); for (container.items) |entry| { const bind_sym_name = entry.getTargetSymbolName(self); @@ -1937,7 +1945,6 @@ pub const Zld = struct { const base_offset = sym.n_value - seg.vmaddr; const dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER); - var flags: u4 = 0; log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ base_offset, bind_sym_name, @@ -1945,29 +1952,27 @@ pub const Zld = struct { }); if (bind_sym.weakRef()) { log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } - pointers.appendAssumeCapacity(.{ + bind.entries.appendAssumeCapacity(.{ + .target = entry.target, .offset = base_offset, .segment_id = segment_index, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, + .addend = 0, }); } } - fn collectBindData(self: *Zld, pointers: *std.ArrayList(bind.Pointer), reverse_lookups: [][]u32) !void { + fn collectBindData(self: *Zld, bind: *Bind, reverse_lookups: [][]u32) !void { log.debug("collecting bind data", .{}); // First, unpack GOT section if (self.getSectionByName("__DATA_CONST", "__got")) |sect_id| { - try self.collectBindDataFromContainer(sect_id, pointers, self.got_entries); + try self.collectBindDataFromContainer(sect_id, bind, self.got_entries); } // Next, unpack TLV pointers section if (self.getSectionByName("__DATA", "__thread_ptrs")) |sect_id| { - try self.collectBindDataFromContainer(sect_id, pointers, self.tlv_ptr_entries); + try self.collectBindDataFromContainer(sect_id, bind, self.tlv_ptr_entries); } // Finally, unpack the rest. @@ -2033,27 +2038,27 @@ pub const Zld = struct { const bind_sym = self.getSymbol(global); if (!bind_sym.undf()) continue; - const base_offset = @intCast(i32, sym.n_value - segment.vmaddr); - const rel_offset = rel.r_address - base_rel_offset; + const base_offset = sym.n_value - segment.vmaddr; + const rel_offset = @intCast(u32, rel.r_address - base_rel_offset); const offset = @intCast(u64, base_offset + rel_offset); + const code = Atom.getAtomCode(self, atom_index); + const addend = mem.readIntLittle(i64, code[rel_offset..][0..8]); const dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER); - var flags: u4 = 0; log.debug(" | bind at {x}, import('{s}') in dylib({d})", .{ base_offset, bind_sym_name, dylib_ordinal, }); + log.debug(" | with addend {x}", .{addend}); if (bind_sym.weakRef()) { log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } - try pointers.append(.{ + try bind.entries.append(self.gpa, .{ + .target = global, .offset = offset, .segment_id = segment_index, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, + .addend = addend, }); } } @@ -2062,9 +2067,11 @@ pub const Zld = struct { } else break; } } + + try bind.finalize(self.gpa, self); } - fn collectLazyBindData(self: *Zld, pointers: *std.ArrayList(bind.Pointer)) !void { + fn collectLazyBindData(self: *Zld, lazy_bind: *LazyBind) !void { const sect_id = self.getSectionByName("__DATA", "__la_symbol_ptr") orelse return; log.debug("collecting lazy bind data", .{}); @@ -2075,7 +2082,7 @@ pub const Zld = struct { var atom_index = slice.items(.first_atom_index)[sect_id]; // TODO: we actually don't need to store lazy pointer atoms as they are synthetically generated by the linker - try pointers.ensureUnusedCapacity(self.stubs.items.len); + try lazy_bind.entries.ensureUnusedCapacity(self.gpa, self.stubs.items.len); var count: u32 = 0; while (true) : (count += 1) { @@ -2090,7 +2097,6 @@ pub const Zld = struct { const bind_sym = stub_entry.getTargetSymbol(self); const bind_sym_name = stub_entry.getTargetSymbolName(self); const dylib_ordinal = @divTrunc(@bitCast(i16, bind_sym.n_desc), macho.N_SYMBOL_RESOLVER); - var flags: u4 = 0; log.debug(" | lazy bind at {x}, import('{s}') in dylib({d})", .{ base_offset, bind_sym_name, @@ -2098,20 +2104,20 @@ pub const Zld = struct { }); if (bind_sym.weakRef()) { log.debug(" | marking as weak ref ", .{}); - flags |= @truncate(u4, macho.BIND_SYMBOL_FLAGS_WEAK_IMPORT); } - pointers.appendAssumeCapacity(.{ + lazy_bind.entries.appendAssumeCapacity(.{ + .target = stub_entry.target, .offset = base_offset, .segment_id = segment_index, - .dylib_ordinal = dylib_ordinal, - .name = bind_sym_name, - .bind_flags = flags, + .addend = 0, }); if (atom.next_index) |next_index| { atom_index = next_index; } else break; } + + try lazy_bind.finalize(self.gpa, self); } fn collectExportData(self: *Zld, trie: *Trie) !void { @@ -2161,17 +2167,17 @@ pub const Zld = struct { fn writeDyldInfoData(self: *Zld, reverse_lookups: [][]u32) !void { const gpa = self.gpa; - var rebase_pointers = std.ArrayList(bind.Pointer).init(gpa); - defer rebase_pointers.deinit(); - try self.collectRebaseData(&rebase_pointers); + var rebase = Rebase{}; + defer rebase.deinit(gpa); + try self.collectRebaseData(&rebase, reverse_lookups); - var bind_pointers = std.ArrayList(bind.Pointer).init(gpa); - defer bind_pointers.deinit(); - try self.collectBindData(&bind_pointers, reverse_lookups); + var bind = Bind{}; + defer bind.deinit(gpa); + try self.collectBindData(&bind, reverse_lookups); - var lazy_bind_pointers = std.ArrayList(bind.Pointer).init(gpa); - defer lazy_bind_pointers.deinit(); - try self.collectLazyBindData(&lazy_bind_pointers); + var lazy_bind = LazyBind{}; + defer lazy_bind.deinit(gpa); + try self.collectLazyBindData(&lazy_bind); var trie = Trie{}; defer trie.deinit(gpa); @@ -2180,17 +2186,17 @@ pub const Zld = struct { const link_seg = self.getLinkeditSegmentPtr(); assert(mem.isAlignedGeneric(u64, link_seg.fileoff, @alignOf(u64))); const rebase_off = link_seg.fileoff; - const rebase_size = try bind.rebaseInfoSize(rebase_pointers.items); + const rebase_size = rebase.size(); const rebase_size_aligned = mem.alignForwardGeneric(u64, rebase_size, @alignOf(u64)); log.debug("writing rebase info from 0x{x} to 0x{x}", .{ rebase_off, rebase_off + rebase_size_aligned }); const bind_off = rebase_off + rebase_size_aligned; - const bind_size = try bind.bindInfoSize(bind_pointers.items); + const bind_size = bind.size(); const bind_size_aligned = mem.alignForwardGeneric(u64, bind_size, @alignOf(u64)); log.debug("writing bind info from 0x{x} to 0x{x}", .{ bind_off, bind_off + bind_size_aligned }); const lazy_bind_off = bind_off + bind_size_aligned; - const lazy_bind_size = try bind.lazyBindInfoSize(lazy_bind_pointers.items); + const lazy_bind_size = lazy_bind.size(); const lazy_bind_size_aligned = mem.alignForwardGeneric(u64, lazy_bind_size, @alignOf(u64)); log.debug("writing lazy bind info from 0x{x} to 0x{x}", .{ lazy_bind_off, @@ -2214,13 +2220,13 @@ pub const Zld = struct { var stream = std.io.fixedBufferStream(buffer); const writer = stream.writer(); - try bind.writeRebaseInfo(rebase_pointers.items, writer); + try rebase.write(writer); try stream.seekTo(bind_off - rebase_off); - try bind.writeBindInfo(bind_pointers.items, writer); + try bind.write(writer); try stream.seekTo(lazy_bind_off - rebase_off); - try bind.writeLazyBindInfo(lazy_bind_pointers.items, writer); + try lazy_bind.write(writer); try stream.seekTo(export_off - rebase_off); _ = try trie.write(writer); @@ -2231,10 +2237,7 @@ pub const Zld = struct { }); try self.file.pwriteAll(buffer, rebase_off); - - const offset = math.cast(usize, lazy_bind_off - rebase_off) orelse return error.Overflow; - const size = math.cast(usize, lazy_bind_size) orelse return error.Overflow; - try self.populateLazyBindOffsetsInStubHelper(buffer[offset..][0..size]); + try self.populateLazyBindOffsetsInStubHelper(lazy_bind); self.dyld_info_cmd.rebase_off = @intCast(u32, rebase_off); self.dyld_info_cmd.rebase_size = @intCast(u32, rebase_size_aligned); @@ -2246,116 +2249,37 @@ pub const Zld = struct { self.dyld_info_cmd.export_size = @intCast(u32, export_size_aligned); } - fn populateLazyBindOffsetsInStubHelper(self: *Zld, buffer: []const u8) !void { - const gpa = self.gpa; - - const stub_helper_section_index = self.getSectionByName("__TEXT", "__stub_helper") orelse return; - const la_symbol_ptr_section_index = self.getSectionByName("__DATA", "__la_symbol_ptr") orelse return; + fn populateLazyBindOffsetsInStubHelper(self: *Zld, lazy_bind: LazyBind) !void { + if (lazy_bind.size() == 0) return; - if (self.stub_helper_preamble_sym_index == null) return; + const stub_helper_section_index = self.getSectionByName("__TEXT", "__stub_helper").?; + assert(self.stub_helper_preamble_sym_index != null); const section = self.sections.get(stub_helper_section_index); - const last_atom_index = section.last_atom_index; - - var table = std.AutoHashMap(i64, AtomIndex).init(gpa); - defer table.deinit(); - - { - var stub_atom_index = last_atom_index; - var laptr_atom_index = self.sections.items(.last_atom_index)[la_symbol_ptr_section_index]; - - const base_addr = blk: { - const segment_index = self.getSegmentByName("__DATA").?; - const seg = self.segments.items[segment_index]; - break :blk seg.vmaddr; - }; - - while (true) { - const stub_atom = self.getAtom(stub_atom_index); - const laptr_atom = self.getAtom(laptr_atom_index); - const laptr_off = blk: { - const sym = self.getSymbolPtr(laptr_atom.getSymbolWithLoc()); - break :blk @intCast(i64, sym.n_value - base_addr); - }; - - try table.putNoClobber(laptr_off, stub_atom_index); - - if (laptr_atom.prev_index) |prev_index| { - laptr_atom_index = prev_index; - stub_atom_index = stub_atom.prev_index.?; - } else break; - } - } - - var stream = std.io.fixedBufferStream(buffer); - var reader = stream.reader(); - var offsets = std.ArrayList(struct { sym_offset: i64, offset: u32 }).init(gpa); - try offsets.append(.{ .sym_offset = undefined, .offset = 0 }); - defer offsets.deinit(); - var valid_block = false; - - while (true) { - const inst = reader.readByte() catch |err| switch (err) { - error.EndOfStream => break, - }; - const opcode: u8 = inst & macho.BIND_OPCODE_MASK; - - switch (opcode) { - macho.BIND_OPCODE_DO_BIND => { - valid_block = true; - }, - macho.BIND_OPCODE_DONE => { - if (valid_block) { - const offset = try stream.getPos(); - try offsets.append(.{ .sym_offset = undefined, .offset = @intCast(u32, offset) }); - } - valid_block = false; - }, - macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - var next = try reader.readByte(); - while (next != @as(u8, 0)) { - next = try reader.readByte(); - } - }, - macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - var inserted = offsets.pop(); - inserted.sym_offset = try std.leb.readILEB128(i64, reader); - try offsets.append(inserted); - }, - macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { - _ = try std.leb.readULEB128(u64, reader); - }, - macho.BIND_OPCODE_SET_ADDEND_SLEB => { - _ = try std.leb.readILEB128(i64, reader); - }, - else => {}, - } - } - - const header = self.sections.items(.header)[stub_helper_section_index]; const stub_offset: u4 = switch (self.options.target.cpu.arch) { .x86_64 => 1, .aarch64 => 2 * @sizeOf(u32), else => unreachable, }; - var buf: [@sizeOf(u32)]u8 = undefined; - _ = offsets.pop(); + const header = section.header; + var atom_index = section.first_atom_index; + atom_index = self.getAtom(atom_index).next_index.?; // skip preamble - while (offsets.popOrNull()) |bind_offset| { - const atom_index = table.get(bind_offset.sym_offset).?; + var index: usize = 0; + while (true) { const atom = self.getAtom(atom_index); - const sym = self.getSymbol(atom.getSymbolWithLoc()); + const atom_sym = self.getSymbol(atom.getSymbolWithLoc()); + const file_offset = header.offset + atom_sym.n_value - header.addr + stub_offset; + const bind_offset = lazy_bind.offsets.items[index]; - const file_offset = header.offset + sym.n_value - header.addr + stub_offset; - mem.writeIntLittle(u32, &buf, bind_offset.offset); + log.debug("writing lazy bind offset 0x{x} in stub helper at 0x{x}", .{ bind_offset, file_offset }); - log.debug("writing lazy bind offset in stub helper of 0x{x} for symbol {s} at offset 0x{x}", .{ - bind_offset.offset, - self.getSymbolName(atom.getSymbolWithLoc()), - file_offset, - }); + try self.file.pwriteAll(mem.asBytes(&bind_offset), file_offset); - try self.file.pwriteAll(&buf, file_offset); + if (atom.next_index) |next_index| { + atom_index = next_index; + index += 1; + } else break; } } @@ -3018,12 +2942,17 @@ pub const Zld = struct { } /// Returns symbol described by `sym_with_loc` descriptor. - pub fn getSymbol(self: *Zld, sym_with_loc: SymbolWithLoc) macho.nlist_64 { - return self.getSymbolPtr(sym_with_loc).*; + pub fn getSymbol(self: *const Zld, sym_with_loc: SymbolWithLoc) macho.nlist_64 { + if (sym_with_loc.getFile()) |file| { + const object = &self.objects.items[file]; + return object.symtab[sym_with_loc.sym_index]; + } else { + return self.locals.items[sym_with_loc.sym_index]; + } } /// Returns name of the symbol described by `sym_with_loc` descriptor. - pub fn getSymbolName(self: *Zld, sym_with_loc: SymbolWithLoc) []const u8 { + pub fn getSymbolName(self: *const Zld, sym_with_loc: SymbolWithLoc) []const u8 { if (sym_with_loc.getFile()) |file| { const object = self.objects.items[file]; return object.getSymbolName(sym_with_loc.sym_index); |
