diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2020-12-13 21:26:57 +0100 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2020-12-17 10:04:53 +0100 |
| commit | ae535111a40ad0a5fe87304e171fd093a575494e (patch) | |
| tree | efa2d1c03dbf676e9bc8f61121b86503ae63dfc0 /src | |
| parent | 5e913c9c2c41e44620911ffce0ebc20d9af041ca (diff) | |
| download | zig-ae535111a40ad0a5fe87304e171fd093a575494e.tar.gz zig-ae535111a40ad0a5fe87304e171fd093a575494e.zip | |
macho: cleanup (lazy) binding info tables
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/MachO.zig | 147 | ||||
| -rw-r--r-- | src/link/MachO/Trie.zig | 25 | ||||
| -rw-r--r-- | src/link/MachO/imports.zig | 188 |
3 files changed, 267 insertions, 93 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 22df059718..f6ff809cb9 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -810,47 +810,50 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void { if (self.symtab_cmd_index == null or self.dysymtab_cmd_index == null) { std.log.err("Incomplete Mach-O binary: no LC_SYMTAB or LC_DYSYMTAB load command found!", .{}); std.log.err("Without the symbol table, it is not possible to patch up the binary for cross-compilation.", .{}); - return error.NoSymbolTable; + return error.NoSymbolTableFound; } // Parse symbol and string tables. try self.parseSymbolTable(); try self.parseStringTable(); - std.debug.print("Undef symbols\n", .{}); - for (self.undef_symbols.items) |sym| { - const name = self.string_table.items[sym.n_strx..]; - const len = blk: { - var end: usize = 0; - while (true) { - if (name[end] == @as(u8, 0)) break; - end += 1; - } - break :blk end; - }; - std.debug.print("name={},sym={}\n", .{ name[0..len], sym }); + // Parse dyld info + try self.parseBindingInfoTable(); + try self.parseLazyBindingInfoTable(); + + // Update the dylib ordinals. + self.binding_info_table.dylib_ordinal = next_ordinal; + for (self.lazy_binding_info_table.symbols.items) |*symbol| { + symbol.dylib_ordinal = next_ordinal; } - // Parse dyld info - var symbols_by_name = std.StringHashMap(u16).init(self.base.allocator); - defer symbols_by_name.deinit(); - try symbols_by_name.ensureCapacity(@intCast(u32, self.undef_symbols.items.len)); - - for (self.undef_symbols.items) |sym, i| { - const name = self.string_table.items[sym.n_strx..]; - const len = blk: { - var end: usize = 0; - while (true) { - if (name[end] == @as(u8, 0)) break; - end += 1; - } - break :blk end; - }; - symbols_by_name.putAssumeCapacityNoClobber(name[0..len], @intCast(u16, i)); + // Write update dyld info + const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + { + const size = self.binding_info_table.calcSize(); + assert(dyld_info.bind_size == size); + + var buffer = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try self.binding_info_table.write(stream.writer()); + + try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); + } + { + const size = self.lazy_binding_info_table.calcSize(); + assert(dyld_info.lazy_bind_size == size); + + var buffer = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try self.lazy_binding_info_table.write(stream.writer()); + + try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); } - try self.parseBindingInfoTable(symbols_by_name); - try self.parseLazyBindingInfoTable(symbols_by_name); // Write updated load commands and the header try self.writeLoadCommands(); try self.writeHeader(); @@ -1952,6 +1955,68 @@ fn writeExportTrie(self: *MachO) !void { self.cmd_table_dirty = true; } +fn writeBindingInfoTable(self: *MachO) !void { + const size = self.binding_info_table.calcSize(); + var buffer = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try self.binding_info_table.write(stream.writer()); + + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64))); + dyld_info.bind_off = self.linkedit_segment_next_offset.?; + dyld_info.bind_size = bind_size; + + log.debug("writing binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.bind_off, dyld_info.bind_off + bind_size }); + + if (bind_size > buffer.len) { + // Pad out to align(8). + try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.bind_off + bind_size); + } + try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off); + + self.linkedit_segment_next_offset = dyld_info.bind_off + dyld_info.bind_size; + // Advance size of __LINKEDIT segment + const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + linkedit.inner.filesize += dyld_info.bind_size; + if (linkedit.inner.vmsize < linkedit.inner.filesize) { + linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size); + } + self.cmd_table_dirty = true; +} + +fn writeLazyBindingInfoTable(self: *MachO) !void { + const size = self.lazy_binding_info_table.calcSize(); + var buffer = try self.base.allocator.alloc(u8, size); + defer self.base.allocator.free(buffer); + + var stream = std.io.fixedBufferStream(buffer); + try self.lazy_binding_info_table.write(stream.writer()); + + const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; + const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64))); + dyld_info.lazy_bind_off = self.linkedit_segment_next_offset.?; + dyld_info.lazy_bind_size = bind_size; + + log.debug("writing lazy binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + bind_size }); + + if (bind_size > buffer.len) { + // Pad out to align(8). + try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.lazy_bind_off + bind_size); + } + try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off); + + self.linkedit_segment_next_offset = dyld_info.lazy_bind_off + dyld_info.lazy_bind_size; + // Advance size of __LINKEDIT segment + const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment; + linkedit.inner.filesize += dyld_info.lazy_bind_size; + if (linkedit.inner.vmsize < linkedit.inner.filesize) { + linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size); + } + self.cmd_table_dirty = true; +} + fn writeStringTable(self: *MachO) !void { const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab; const needed_size = self.string_table.items.len; @@ -2122,7 +2187,7 @@ fn parseStringTable(self: *MachO) !void { self.string_table.appendSliceAssumeCapacity(buffer); } -fn parseBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16)) !void { +fn parseBindingInfoTable(self: *MachO) !void { const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; var buffer = try self.base.allocator.alloc(u8, dyld_info.bind_size); defer self.base.allocator.free(buffer); @@ -2130,10 +2195,10 @@ fn parseBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16)) assert(nread == buffer.len); var stream = std.io.fixedBufferStream(buffer); - try self.binding_info_table.read(self.base.allocator, symbols_by_name, stream.reader()); + try self.binding_info_table.read(stream.reader(), self.base.allocator); } -fn parseLazyBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16)) !void { +fn parseLazyBindingInfoTable(self: *MachO) !void { const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly; var buffer = try self.base.allocator.alloc(u8, dyld_info.lazy_bind_size); defer self.base.allocator.free(buffer); @@ -2141,5 +2206,17 @@ fn parseLazyBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u1 assert(nread == buffer.len); var stream = std.io.fixedBufferStream(buffer); - try self.lazy_binding_info_table.read(self.base.allocator, symbols_by_name, stream.reader()); + try self.lazy_binding_info_table.read(stream.reader(), self.base.allocator); +} + +/// Calculates number of bytes in LEB128 encoding of value. +pub fn sizeLEB128(value: anytype) usize { + var res: usize = 0; + var v = value; + while (true) { + v = v >> 7; + res += 1; + if (v == 0) break; + } + return res; } diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig index 0016ff329c..690588cd53 100644 --- a/src/link/MachO/Trie.zig +++ b/src/link/MachO/Trie.zig @@ -38,6 +38,7 @@ const macho = std.macho; const testing = std.testing; const assert = std.debug.assert; const Allocator = mem.Allocator; +const sizeLEB128 = @import("../MachO.zig").sizeLEB128; pub const Node = struct { base: *Trie, @@ -244,9 +245,9 @@ pub const Node = struct { fn finalize(self: *Node, offset_in_trie: usize) FinalizeResult { var node_size: usize = 0; if (self.terminal_info) |info| { - node_size += sizeULEB128Mem(info.export_flags); - node_size += sizeULEB128Mem(info.vmaddr_offset); - node_size += sizeULEB128Mem(node_size); + node_size += sizeLEB128(info.export_flags); + node_size += sizeLEB128(info.vmaddr_offset); + node_size += sizeLEB128(node_size); } else { node_size += 1; // 0x0 for non-terminal nodes } @@ -254,7 +255,7 @@ pub const Node = struct { for (self.edges.items) |edge| { const next_node_offset = edge.to.trie_offset orelse 0; - node_size += edge.label.len + 1 + sizeULEB128Mem(next_node_offset); + node_size += edge.label.len + 1 + sizeLEB128(next_node_offset); } const trie_offset = self.trie_offset orelse 0; @@ -264,18 +265,6 @@ pub const Node = struct { return .{ .node_size = node_size, .updated = updated }; } - - /// Calculates number of bytes in ULEB128 encoding of value. - fn sizeULEB128Mem(value: u64) usize { - var res: usize = 0; - var v = value; - while (true) { - v = v >> 7; - res += 1; - if (v == 0) break; - } - return res; - } }; /// The root node of the trie. @@ -380,9 +369,7 @@ pub fn read(self: *Trie, reader: anytype) ReadError!usize { } /// Write the trie to a byte stream. -/// Caller owns the memory and needs to free it. -/// Panics if the trie was not finalized using `finalize` -/// before calling this method. +/// Panics if the trie was not finalized using `finalize` before calling this method. pub fn write(self: Trie, writer: anytype) !usize { assert(!self.trie_dirty); var counting_writer = std.io.countingWriter(writer); diff --git a/src/link/MachO/imports.zig b/src/link/MachO/imports.zig index c9917139ea..6c03649b84 100644 --- a/src/link/MachO/imports.zig +++ b/src/link/MachO/imports.zig @@ -5,16 +5,22 @@ const mem = std.mem; const assert = std.debug.assert; const Allocator = mem.Allocator; +const sizeLEB128 = @import("../MachO.zig").sizeLEB128; +/// Table of binding info entries used to tell the dyld which +/// symbols to bind at loading time. pub const BindingInfoTable = struct { + /// Id of the dynamic library where the specified entries can be found. dylib_ordinal: i64 = 0, + + /// Binding type; defaults to pointer type. binding_type: u8 = macho.BIND_TYPE_POINTER, - entries: std.ArrayListUnmanaged(Entry) = .{}, - pub const Entry = struct { - /// Id of the symbol in the undef symbol table. - /// Can be null. - symbol: ?u16 = null, + symbols: std.ArrayListUnmanaged(Symbol) = .{}, + + pub const Symbol = struct { + /// Symbol name. + name: ?[]u8 = null, /// Id of the segment where to bind this symbol to. segment: u8, @@ -24,14 +30,17 @@ pub const BindingInfoTable = struct { }; pub fn deinit(self: *BindingInfoTable, allocator: *Allocator) void { - self.entries.deinit(allocator); + for (self.symbols.items) |*symbol| { + if (symbol.name) |name| { + allocator.free(name); + } + } + self.symbols.deinit(allocator); } - pub fn read(self: *BindingInfoTable, allocator: *Allocator, symbols_by_name: anytype, reader: anytype) !void { - var name = std.ArrayList(u8).init(allocator); - defer name.deinit(); - - var entry: Entry = .{ + /// Parse the binding info table from byte stream. + pub fn read(self: *BindingInfoTable, reader: anytype, allocator: *Allocator) !void { + var symbol: Symbol = .{ .segment = 0, .offset = 0, }; @@ -48,8 +57,8 @@ pub const BindingInfoTable = struct { switch (opcode) { macho.BIND_OPCODE_DO_BIND => { - try self.entries.append(allocator, entry); - entry = .{ + try self.symbols.append(allocator, symbol); + symbol = .{ .segment = 0, .offset = 0, }; @@ -59,17 +68,17 @@ pub const BindingInfoTable = struct { break; }, macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - name.shrinkRetainingCapacity(0); + var name = std.ArrayList(u8).init(allocator); var next = try reader.readByte(); while (next != @as(u8, 0)) { try name.append(next); next = try reader.readByte(); } - entry.symbol = symbols_by_name.get(name.items[0..]); + symbol.name = name.toOwnedSlice(); }, macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - entry.segment = imm; - entry.offset = try leb.readILEB128(i64, reader); + symbol.segment = imm; + symbol.offset = try leb.readILEB128(i64, reader); }, macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { assert(!dylib_ordinal_set); @@ -90,15 +99,69 @@ pub const BindingInfoTable = struct { assert(done); } - pub fn write(self: BindingInfoTable, writer: anytype) !void {} + /// Write the binding info table to byte stream. + pub fn write(self: BindingInfoTable, writer: anytype) !void { + if (self.dylib_ordinal > 15) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + try leb.writeULEB128(writer, @bitCast(u64, self.dylib_ordinal)); + } else if (self.dylib_ordinal > 0) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal))); + } else { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal))); + } + try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, self.binding_type)); + + for (self.symbols.items) |symbol| { + if (symbol.name) |name| { + try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags. + try writer.writeAll(name); + try writer.writeByte(0); + } + + try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment)); + try leb.writeILEB128(writer, symbol.offset); + + try writer.writeByte(macho.BIND_OPCODE_DO_BIND); + } + + try writer.writeByte(macho.BIND_OPCODE_DONE); + } + + /// Calculate size in bytes of this binding info table. + pub fn calcSize(self: *BindingInfoTable) usize { + var size: usize = 1; + if (self.dylib_ordinal > 15) { + size += sizeLEB128(self.dylib_ordinal); + } + + size += 1; + + for (self.symbols.items) |symbol| { + if (symbol.name) |name| { + size += 1; + size += name.len; + size += 1; + } + + size += 1; + size += sizeLEB128(symbol.offset); + + size += 1; + } + + size += 1; + return size; + } }; +/// Table of lazy binding info entries used to tell the dyld which +/// symbols to lazily bind at first load of a dylib. pub const LazyBindingInfoTable = struct { - entries: std.ArrayListUnmanaged(Entry) = .{}, + symbols: std.ArrayListUnmanaged(Symbol) = .{}, - pub const Entry = struct { - /// Id of the symbol in the undef symbol table. - symbol: u16, + pub const Symbol = struct { + /// Symbol name. + name: ?[]u8 = null, /// Offset of this symbol wrt to the segment id encoded in `segment`. offset: i64, @@ -113,15 +176,17 @@ pub const LazyBindingInfoTable = struct { }; pub fn deinit(self: *LazyBindingInfoTable, allocator: *Allocator) void { - self.entries.deinit(allocator); + for (self.symbols.items) |*symbol| { + if (symbol.name) |name| { + allocator.free(name); + } + } + self.symbols.deinit(allocator); } - pub fn read(self: *LazyBindingInfoTable, allocator: *Allocator, symbols_by_name: anytype, reader: anytype) !void { - var name = std.ArrayList(u8).init(allocator); - defer name.deinit(); - - var entry: Entry = .{ - .symbol = 0, + /// Parse the binding info table from byte stream. + pub fn read(self: *LazyBindingInfoTable, reader: anytype, allocator: *Allocator) !void { + var symbol: Symbol = .{ .offset = 0, .segment = 0, .dylib_ordinal = 0, @@ -138,35 +203,34 @@ pub const LazyBindingInfoTable = struct { switch (opcode) { macho.BIND_OPCODE_DO_BIND => { - try self.entries.append(allocator, entry); + try self.symbols.append(allocator, symbol); }, macho.BIND_OPCODE_DONE => { done = true; - entry = .{ - .symbol = 0, + symbol = .{ .offset = 0, .segment = 0, .dylib_ordinal = 0, }; }, macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => { - name.shrinkRetainingCapacity(0); + var name = std.ArrayList(u8).init(allocator); var next = try reader.readByte(); while (next != @as(u8, 0)) { try name.append(next); next = try reader.readByte(); } - entry.symbol = symbols_by_name.get(name.items[0..]) orelse unreachable; + symbol.name = name.toOwnedSlice(); }, macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => { - entry.segment = imm; - entry.offset = try leb.readILEB128(i64, reader); + symbol.segment = imm; + symbol.offset = try leb.readILEB128(i64, reader); }, macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => { - entry.dylib_ordinal = imm; + symbol.dylib_ordinal = imm; }, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => { - entry.dylib_ordinal = try leb.readILEB128(i64, reader); + symbol.dylib_ordinal = try leb.readILEB128(i64, reader); }, else => { std.log.warn("unhandled BIND_OPCODE_: 0x{x}", .{opcode}); @@ -176,5 +240,51 @@ pub const LazyBindingInfoTable = struct { assert(done); } - pub fn write(self: LazyBindingInfoTable, writer: anytype) !void {} + /// Write the binding info table to byte stream. + pub fn write(self: LazyBindingInfoTable, writer: anytype) !void { + for (self.symbols.items) |symbol| { + try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment)); + try leb.writeILEB128(writer, symbol.offset); + + if (symbol.dylib_ordinal > 15) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB); + try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal)); + } else if (symbol.dylib_ordinal > 0) { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal))); + } else { + try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal))); + } + + if (symbol.name) |name| { + try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags. + try writer.writeAll(name); + try writer.writeByte(0); + } + + try writer.writeByte(macho.BIND_OPCODE_DO_BIND); + try writer.writeByte(macho.BIND_OPCODE_DONE); + } + } + + /// Calculate size in bytes of this binding info table. + pub fn calcSize(self: *LazyBindingInfoTable) usize { + var size: usize = 0; + + for (self.symbols.items) |symbol| { + size += 1; + size += sizeLEB128(symbol.offset); + size += 1; + if (symbol.dylib_ordinal > 15) { + size += sizeLEB128(symbol.dylib_ordinal); + } + if (symbol.name) |name| { + size += 1; + size += name.len; + size += 1; + } + size += 2; + } + + return size; + } }; |
