aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJakub Konka <kubkon@jakubkonka.com>2020-12-13 21:26:57 +0100
committerJakub Konka <kubkon@jakubkonka.com>2020-12-17 10:04:53 +0100
commitae535111a40ad0a5fe87304e171fd093a575494e (patch)
treeefa2d1c03dbf676e9bc8f61121b86503ae63dfc0 /src
parent5e913c9c2c41e44620911ffce0ebc20d9af041ca (diff)
downloadzig-ae535111a40ad0a5fe87304e171fd093a575494e.tar.gz
zig-ae535111a40ad0a5fe87304e171fd093a575494e.zip
macho: cleanup (lazy) binding info tables
Diffstat (limited to 'src')
-rw-r--r--src/link/MachO.zig147
-rw-r--r--src/link/MachO/Trie.zig25
-rw-r--r--src/link/MachO/imports.zig188
3 files changed, 267 insertions, 93 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index 22df059718..f6ff809cb9 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -810,47 +810,50 @@ fn linkWithLLD(self: *MachO, comp: *Compilation) !void {
if (self.symtab_cmd_index == null or self.dysymtab_cmd_index == null) {
std.log.err("Incomplete Mach-O binary: no LC_SYMTAB or LC_DYSYMTAB load command found!", .{});
std.log.err("Without the symbol table, it is not possible to patch up the binary for cross-compilation.", .{});
- return error.NoSymbolTable;
+ return error.NoSymbolTableFound;
}
// Parse symbol and string tables.
try self.parseSymbolTable();
try self.parseStringTable();
- std.debug.print("Undef symbols\n", .{});
- for (self.undef_symbols.items) |sym| {
- const name = self.string_table.items[sym.n_strx..];
- const len = blk: {
- var end: usize = 0;
- while (true) {
- if (name[end] == @as(u8, 0)) break;
- end += 1;
- }
- break :blk end;
- };
- std.debug.print("name={},sym={}\n", .{ name[0..len], sym });
+ // Parse dyld info
+ try self.parseBindingInfoTable();
+ try self.parseLazyBindingInfoTable();
+
+ // Update the dylib ordinals.
+ self.binding_info_table.dylib_ordinal = next_ordinal;
+ for (self.lazy_binding_info_table.symbols.items) |*symbol| {
+ symbol.dylib_ordinal = next_ordinal;
}
- // Parse dyld info
- var symbols_by_name = std.StringHashMap(u16).init(self.base.allocator);
- defer symbols_by_name.deinit();
- try symbols_by_name.ensureCapacity(@intCast(u32, self.undef_symbols.items.len));
-
- for (self.undef_symbols.items) |sym, i| {
- const name = self.string_table.items[sym.n_strx..];
- const len = blk: {
- var end: usize = 0;
- while (true) {
- if (name[end] == @as(u8, 0)) break;
- end += 1;
- }
- break :blk end;
- };
- symbols_by_name.putAssumeCapacityNoClobber(name[0..len], @intCast(u16, i));
+ // Write update dyld info
+ const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+ {
+ const size = self.binding_info_table.calcSize();
+ assert(dyld_info.bind_size == size);
+
+ var buffer = try self.base.allocator.alloc(u8, size);
+ defer self.base.allocator.free(buffer);
+
+ var stream = std.io.fixedBufferStream(buffer);
+ try self.binding_info_table.write(stream.writer());
+
+ try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off);
+ }
+ {
+ const size = self.lazy_binding_info_table.calcSize();
+ assert(dyld_info.lazy_bind_size == size);
+
+ var buffer = try self.base.allocator.alloc(u8, size);
+ defer self.base.allocator.free(buffer);
+
+ var stream = std.io.fixedBufferStream(buffer);
+ try self.lazy_binding_info_table.write(stream.writer());
+
+ try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off);
}
- try self.parseBindingInfoTable(symbols_by_name);
- try self.parseLazyBindingInfoTable(symbols_by_name);
// Write updated load commands and the header
try self.writeLoadCommands();
try self.writeHeader();
@@ -1952,6 +1955,68 @@ fn writeExportTrie(self: *MachO) !void {
self.cmd_table_dirty = true;
}
+fn writeBindingInfoTable(self: *MachO) !void {
+ const size = self.binding_info_table.calcSize();
+ var buffer = try self.base.allocator.alloc(u8, size);
+ defer self.base.allocator.free(buffer);
+
+ var stream = std.io.fixedBufferStream(buffer);
+ try self.binding_info_table.write(stream.writer());
+
+ const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+ const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64)));
+ dyld_info.bind_off = self.linkedit_segment_next_offset.?;
+ dyld_info.bind_size = bind_size;
+
+ log.debug("writing binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.bind_off, dyld_info.bind_off + bind_size });
+
+ if (bind_size > buffer.len) {
+ // Pad out to align(8).
+ try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.bind_off + bind_size);
+ }
+ try self.base.file.?.pwriteAll(buffer, dyld_info.bind_off);
+
+ self.linkedit_segment_next_offset = dyld_info.bind_off + dyld_info.bind_size;
+ // Advance size of __LINKEDIT segment
+ const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+ linkedit.inner.filesize += dyld_info.bind_size;
+ if (linkedit.inner.vmsize < linkedit.inner.filesize) {
+ linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size);
+ }
+ self.cmd_table_dirty = true;
+}
+
+fn writeLazyBindingInfoTable(self: *MachO) !void {
+ const size = self.lazy_binding_info_table.calcSize();
+ var buffer = try self.base.allocator.alloc(u8, size);
+ defer self.base.allocator.free(buffer);
+
+ var stream = std.io.fixedBufferStream(buffer);
+ try self.lazy_binding_info_table.write(stream.writer());
+
+ const dyld_info = &self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
+ const bind_size = @intCast(u32, mem.alignForward(buffer.len, @sizeOf(u64)));
+ dyld_info.lazy_bind_off = self.linkedit_segment_next_offset.?;
+ dyld_info.lazy_bind_size = bind_size;
+
+ log.debug("writing lazy binding info table from 0x{x} to 0x{x}\n", .{ dyld_info.lazy_bind_off, dyld_info.lazy_bind_off + bind_size });
+
+ if (bind_size > buffer.len) {
+ // Pad out to align(8).
+ try self.base.file.?.pwriteAll(&[_]u8{0}, dyld_info.lazy_bind_off + bind_size);
+ }
+ try self.base.file.?.pwriteAll(buffer, dyld_info.lazy_bind_off);
+
+ self.linkedit_segment_next_offset = dyld_info.lazy_bind_off + dyld_info.lazy_bind_size;
+ // Advance size of __LINKEDIT segment
+ const linkedit = &self.load_commands.items[self.linkedit_segment_cmd_index.?].Segment;
+ linkedit.inner.filesize += dyld_info.lazy_bind_size;
+ if (linkedit.inner.vmsize < linkedit.inner.filesize) {
+ linkedit.inner.vmsize = mem.alignForwardGeneric(u64, linkedit.inner.filesize, self.page_size);
+ }
+ self.cmd_table_dirty = true;
+}
+
fn writeStringTable(self: *MachO) !void {
const symtab = &self.load_commands.items[self.symtab_cmd_index.?].Symtab;
const needed_size = self.string_table.items.len;
@@ -2122,7 +2187,7 @@ fn parseStringTable(self: *MachO) !void {
self.string_table.appendSliceAssumeCapacity(buffer);
}
-fn parseBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16)) !void {
+fn parseBindingInfoTable(self: *MachO) !void {
const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
var buffer = try self.base.allocator.alloc(u8, dyld_info.bind_size);
defer self.base.allocator.free(buffer);
@@ -2130,10 +2195,10 @@ fn parseBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16))
assert(nread == buffer.len);
var stream = std.io.fixedBufferStream(buffer);
- try self.binding_info_table.read(self.base.allocator, symbols_by_name, stream.reader());
+ try self.binding_info_table.read(stream.reader(), self.base.allocator);
}
-fn parseLazyBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u16)) !void {
+fn parseLazyBindingInfoTable(self: *MachO) !void {
const dyld_info = self.load_commands.items[self.dyld_info_cmd_index.?].DyldInfoOnly;
var buffer = try self.base.allocator.alloc(u8, dyld_info.lazy_bind_size);
defer self.base.allocator.free(buffer);
@@ -2141,5 +2206,17 @@ fn parseLazyBindingInfoTable(self: *MachO, symbols_by_name: std.StringHashMap(u1
assert(nread == buffer.len);
var stream = std.io.fixedBufferStream(buffer);
- try self.lazy_binding_info_table.read(self.base.allocator, symbols_by_name, stream.reader());
+ try self.lazy_binding_info_table.read(stream.reader(), self.base.allocator);
+}
+
+/// Calculates number of bytes in LEB128 encoding of value.
+pub fn sizeLEB128(value: anytype) usize {
+ var res: usize = 0;
+ var v = value;
+ while (true) {
+ v = v >> 7;
+ res += 1;
+ if (v == 0) break;
+ }
+ return res;
}
diff --git a/src/link/MachO/Trie.zig b/src/link/MachO/Trie.zig
index 0016ff329c..690588cd53 100644
--- a/src/link/MachO/Trie.zig
+++ b/src/link/MachO/Trie.zig
@@ -38,6 +38,7 @@ const macho = std.macho;
const testing = std.testing;
const assert = std.debug.assert;
const Allocator = mem.Allocator;
+const sizeLEB128 = @import("../MachO.zig").sizeLEB128;
pub const Node = struct {
base: *Trie,
@@ -244,9 +245,9 @@ pub const Node = struct {
fn finalize(self: *Node, offset_in_trie: usize) FinalizeResult {
var node_size: usize = 0;
if (self.terminal_info) |info| {
- node_size += sizeULEB128Mem(info.export_flags);
- node_size += sizeULEB128Mem(info.vmaddr_offset);
- node_size += sizeULEB128Mem(node_size);
+ node_size += sizeLEB128(info.export_flags);
+ node_size += sizeLEB128(info.vmaddr_offset);
+ node_size += sizeLEB128(node_size);
} else {
node_size += 1; // 0x0 for non-terminal nodes
}
@@ -254,7 +255,7 @@ pub const Node = struct {
for (self.edges.items) |edge| {
const next_node_offset = edge.to.trie_offset orelse 0;
- node_size += edge.label.len + 1 + sizeULEB128Mem(next_node_offset);
+ node_size += edge.label.len + 1 + sizeLEB128(next_node_offset);
}
const trie_offset = self.trie_offset orelse 0;
@@ -264,18 +265,6 @@ pub const Node = struct {
return .{ .node_size = node_size, .updated = updated };
}
-
- /// Calculates number of bytes in ULEB128 encoding of value.
- fn sizeULEB128Mem(value: u64) usize {
- var res: usize = 0;
- var v = value;
- while (true) {
- v = v >> 7;
- res += 1;
- if (v == 0) break;
- }
- return res;
- }
};
/// The root node of the trie.
@@ -380,9 +369,7 @@ pub fn read(self: *Trie, reader: anytype) ReadError!usize {
}
/// Write the trie to a byte stream.
-/// Caller owns the memory and needs to free it.
-/// Panics if the trie was not finalized using `finalize`
-/// before calling this method.
+/// Panics if the trie was not finalized using `finalize` before calling this method.
pub fn write(self: Trie, writer: anytype) !usize {
assert(!self.trie_dirty);
var counting_writer = std.io.countingWriter(writer);
diff --git a/src/link/MachO/imports.zig b/src/link/MachO/imports.zig
index c9917139ea..6c03649b84 100644
--- a/src/link/MachO/imports.zig
+++ b/src/link/MachO/imports.zig
@@ -5,16 +5,22 @@ const mem = std.mem;
const assert = std.debug.assert;
const Allocator = mem.Allocator;
+const sizeLEB128 = @import("../MachO.zig").sizeLEB128;
+/// Table of binding info entries used to tell the dyld which
+/// symbols to bind at loading time.
pub const BindingInfoTable = struct {
+ /// Id of the dynamic library where the specified entries can be found.
dylib_ordinal: i64 = 0,
+
+ /// Binding type; defaults to pointer type.
binding_type: u8 = macho.BIND_TYPE_POINTER,
- entries: std.ArrayListUnmanaged(Entry) = .{},
- pub const Entry = struct {
- /// Id of the symbol in the undef symbol table.
- /// Can be null.
- symbol: ?u16 = null,
+ symbols: std.ArrayListUnmanaged(Symbol) = .{},
+
+ pub const Symbol = struct {
+ /// Symbol name.
+ name: ?[]u8 = null,
/// Id of the segment where to bind this symbol to.
segment: u8,
@@ -24,14 +30,17 @@ pub const BindingInfoTable = struct {
};
pub fn deinit(self: *BindingInfoTable, allocator: *Allocator) void {
- self.entries.deinit(allocator);
+ for (self.symbols.items) |*symbol| {
+ if (symbol.name) |name| {
+ allocator.free(name);
+ }
+ }
+ self.symbols.deinit(allocator);
}
- pub fn read(self: *BindingInfoTable, allocator: *Allocator, symbols_by_name: anytype, reader: anytype) !void {
- var name = std.ArrayList(u8).init(allocator);
- defer name.deinit();
-
- var entry: Entry = .{
+ /// Parse the binding info table from byte stream.
+ pub fn read(self: *BindingInfoTable, reader: anytype, allocator: *Allocator) !void {
+ var symbol: Symbol = .{
.segment = 0,
.offset = 0,
};
@@ -48,8 +57,8 @@ pub const BindingInfoTable = struct {
switch (opcode) {
macho.BIND_OPCODE_DO_BIND => {
- try self.entries.append(allocator, entry);
- entry = .{
+ try self.symbols.append(allocator, symbol);
+ symbol = .{
.segment = 0,
.offset = 0,
};
@@ -59,17 +68,17 @@ pub const BindingInfoTable = struct {
break;
},
macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
- name.shrinkRetainingCapacity(0);
+ var name = std.ArrayList(u8).init(allocator);
var next = try reader.readByte();
while (next != @as(u8, 0)) {
try name.append(next);
next = try reader.readByte();
}
- entry.symbol = symbols_by_name.get(name.items[0..]);
+ symbol.name = name.toOwnedSlice();
},
macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
- entry.segment = imm;
- entry.offset = try leb.readILEB128(i64, reader);
+ symbol.segment = imm;
+ symbol.offset = try leb.readILEB128(i64, reader);
},
macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
assert(!dylib_ordinal_set);
@@ -90,15 +99,69 @@ pub const BindingInfoTable = struct {
assert(done);
}
- pub fn write(self: BindingInfoTable, writer: anytype) !void {}
+ /// Write the binding info table to byte stream.
+ pub fn write(self: BindingInfoTable, writer: anytype) !void {
+ if (self.dylib_ordinal > 15) {
+ try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
+ try leb.writeULEB128(writer, @bitCast(u64, self.dylib_ordinal));
+ } else if (self.dylib_ordinal > 0) {
+ try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal)));
+ } else {
+ try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, self.dylib_ordinal)));
+ }
+ try writer.writeByte(macho.BIND_OPCODE_SET_TYPE_IMM | @truncate(u4, self.binding_type));
+
+ for (self.symbols.items) |symbol| {
+ if (symbol.name) |name| {
+ try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
+ try writer.writeAll(name);
+ try writer.writeByte(0);
+ }
+
+ try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
+ try leb.writeILEB128(writer, symbol.offset);
+
+ try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
+ }
+
+ try writer.writeByte(macho.BIND_OPCODE_DONE);
+ }
+
+ /// Calculate size in bytes of this binding info table.
+ pub fn calcSize(self: *BindingInfoTable) usize {
+ var size: usize = 1;
+ if (self.dylib_ordinal > 15) {
+ size += sizeLEB128(self.dylib_ordinal);
+ }
+
+ size += 1;
+
+ for (self.symbols.items) |symbol| {
+ if (symbol.name) |name| {
+ size += 1;
+ size += name.len;
+ size += 1;
+ }
+
+ size += 1;
+ size += sizeLEB128(symbol.offset);
+
+ size += 1;
+ }
+
+ size += 1;
+ return size;
+ }
};
+/// Table of lazy binding info entries used to tell the dyld which
+/// symbols to lazily bind at first load of a dylib.
pub const LazyBindingInfoTable = struct {
- entries: std.ArrayListUnmanaged(Entry) = .{},
+ symbols: std.ArrayListUnmanaged(Symbol) = .{},
- pub const Entry = struct {
- /// Id of the symbol in the undef symbol table.
- symbol: u16,
+ pub const Symbol = struct {
+ /// Symbol name.
+ name: ?[]u8 = null,
/// Offset of this symbol wrt to the segment id encoded in `segment`.
offset: i64,
@@ -113,15 +176,17 @@ pub const LazyBindingInfoTable = struct {
};
pub fn deinit(self: *LazyBindingInfoTable, allocator: *Allocator) void {
- self.entries.deinit(allocator);
+ for (self.symbols.items) |*symbol| {
+ if (symbol.name) |name| {
+ allocator.free(name);
+ }
+ }
+ self.symbols.deinit(allocator);
}
- pub fn read(self: *LazyBindingInfoTable, allocator: *Allocator, symbols_by_name: anytype, reader: anytype) !void {
- var name = std.ArrayList(u8).init(allocator);
- defer name.deinit();
-
- var entry: Entry = .{
- .symbol = 0,
+ /// Parse the binding info table from byte stream.
+ pub fn read(self: *LazyBindingInfoTable, reader: anytype, allocator: *Allocator) !void {
+ var symbol: Symbol = .{
.offset = 0,
.segment = 0,
.dylib_ordinal = 0,
@@ -138,35 +203,34 @@ pub const LazyBindingInfoTable = struct {
switch (opcode) {
macho.BIND_OPCODE_DO_BIND => {
- try self.entries.append(allocator, entry);
+ try self.symbols.append(allocator, symbol);
},
macho.BIND_OPCODE_DONE => {
done = true;
- entry = .{
- .symbol = 0,
+ symbol = .{
.offset = 0,
.segment = 0,
.dylib_ordinal = 0,
};
},
macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM => {
- name.shrinkRetainingCapacity(0);
+ var name = std.ArrayList(u8).init(allocator);
var next = try reader.readByte();
while (next != @as(u8, 0)) {
try name.append(next);
next = try reader.readByte();
}
- entry.symbol = symbols_by_name.get(name.items[0..]) orelse unreachable;
+ symbol.name = name.toOwnedSlice();
},
macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB => {
- entry.segment = imm;
- entry.offset = try leb.readILEB128(i64, reader);
+ symbol.segment = imm;
+ symbol.offset = try leb.readILEB128(i64, reader);
},
macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM, macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM => {
- entry.dylib_ordinal = imm;
+ symbol.dylib_ordinal = imm;
},
macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB => {
- entry.dylib_ordinal = try leb.readILEB128(i64, reader);
+ symbol.dylib_ordinal = try leb.readILEB128(i64, reader);
},
else => {
std.log.warn("unhandled BIND_OPCODE_: 0x{x}", .{opcode});
@@ -176,5 +240,51 @@ pub const LazyBindingInfoTable = struct {
assert(done);
}
- pub fn write(self: LazyBindingInfoTable, writer: anytype) !void {}
+ /// Write the binding info table to byte stream.
+ pub fn write(self: LazyBindingInfoTable, writer: anytype) !void {
+ for (self.symbols.items) |symbol| {
+ try writer.writeByte(macho.BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB | @truncate(u4, symbol.segment));
+ try leb.writeILEB128(writer, symbol.offset);
+
+ if (symbol.dylib_ordinal > 15) {
+ try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_ULEB);
+ try leb.writeULEB128(writer, @bitCast(u64, symbol.dylib_ordinal));
+ } else if (symbol.dylib_ordinal > 0) {
+ try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_ORDINAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
+ } else {
+ try writer.writeByte(macho.BIND_OPCODE_SET_DYLIB_SPECIAL_IMM | @truncate(u4, @bitCast(u64, symbol.dylib_ordinal)));
+ }
+
+ if (symbol.name) |name| {
+ try writer.writeByte(macho.BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM); // TODO Sometimes we might want to add flags.
+ try writer.writeAll(name);
+ try writer.writeByte(0);
+ }
+
+ try writer.writeByte(macho.BIND_OPCODE_DO_BIND);
+ try writer.writeByte(macho.BIND_OPCODE_DONE);
+ }
+ }
+
+ /// Calculate size in bytes of this binding info table.
+ pub fn calcSize(self: *LazyBindingInfoTable) usize {
+ var size: usize = 0;
+
+ for (self.symbols.items) |symbol| {
+ size += 1;
+ size += sizeLEB128(symbol.offset);
+ size += 1;
+ if (symbol.dylib_ordinal > 15) {
+ size += sizeLEB128(symbol.dylib_ordinal);
+ }
+ if (symbol.name) |name| {
+ size += 1;
+ size += name.len;
+ size += 1;
+ }
+ size += 2;
+ }
+
+ return size;
+ }
};