aboutsummaryrefslogtreecommitdiff
path: root/src/link/MachO/TextBlock.zig
diff options
context:
space:
mode:
authorJakub Konka <kubkon@jakubkonka.com>2021-09-09 18:32:03 +0200
committerJakub Konka <kubkon@jakubkonka.com>2021-09-09 18:32:03 +0200
commitaaacfc0d0a23918c6712272e10bb1cdca1daaf04 (patch)
treebb5bdeb8dd441f1024d7236fa7b55e35c64fe149 /src/link/MachO/TextBlock.zig
parent56fdada577d5d7f871bed8e5ae74e395291d4140 (diff)
downloadzig-aaacfc0d0a23918c6712272e10bb1cdca1daaf04.tar.gz
zig-aaacfc0d0a23918c6712272e10bb1cdca1daaf04.zip
macho: init process of renaming TextBlock to Atom
Initially, internally within the linker.
Diffstat (limited to 'src/link/MachO/TextBlock.zig')
-rw-r--r--src/link/MachO/TextBlock.zig1301
1 files changed, 0 insertions, 1301 deletions
diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig
deleted file mode 100644
index 57e93543b1..0000000000
--- a/src/link/MachO/TextBlock.zig
+++ /dev/null
@@ -1,1301 +0,0 @@
-const TextBlock = @This();
-
-const std = @import("std");
-const build_options = @import("build_options");
-const aarch64 = @import("../../codegen/aarch64.zig");
-const assert = std.debug.assert;
-const commands = @import("commands.zig");
-const log = std.log.scoped(.text_block);
-const macho = std.macho;
-const math = std.math;
-const mem = std.mem;
-const meta = std.meta;
-
-const Allocator = mem.Allocator;
-const Arch = std.Target.Cpu.Arch;
-const MachO = @import("../MachO.zig");
-const Object = @import("Object.zig");
-
-/// Each decl always gets a local symbol with the fully qualified name.
-/// The vaddr and size are found here directly.
-/// The file offset is found by computing the vaddr offset from the section vaddr
-/// the symbol references, and adding that to the file offset of the section.
-/// If this field is 0, it means the codegen size = 0 and there is no symbol or
-/// offset table entry.
-local_sym_index: u32,
-
-/// List of symbol aliases pointing to the same block via different nlists
-aliases: std.ArrayListUnmanaged(u32) = .{},
-
-/// List of symbols contained within this block
-contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{},
-
-/// Code (may be non-relocated) this block represents
-code: std.ArrayListUnmanaged(u8) = .{},
-
-/// Size and alignment of this text block
-/// Unlike in Elf, we need to store the size of this symbol as part of
-/// the TextBlock since macho.nlist_64 lacks this information.
-size: u64,
-alignment: u32,
-
-relocs: std.ArrayListUnmanaged(Relocation) = .{},
-
-/// List of offsets contained within this block that need rebasing by the dynamic
-/// loader in presence of ASLR
-rebases: std.ArrayListUnmanaged(u64) = .{},
-
-/// List of offsets contained within this block that will be dynamically bound
-/// by the dynamic loader and contain pointers to resolved (at load time) extern
-/// symbols (aka proxies aka imports)
-bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{},
-
-/// List of lazy bindings
-lazy_bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{},
-
-/// List of data-in-code entries. This is currently specific to x86_64 only.
-dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{},
-
-/// Stab entry for this block. This is currently specific to a binary created
-/// by linking object files in a traditional sense - in incremental sense, we
-/// bypass stabs altogether to produce dSYM bundle directly with fully relocated
-/// DWARF sections.
-stab: ?Stab = null,
-
-/// Points to the previous and next neighbours
-next: ?*TextBlock,
-prev: ?*TextBlock,
-
-/// Previous/next linked list pointers.
-/// This is the linked list node for this Decl's corresponding .debug_info tag.
-dbg_info_prev: ?*TextBlock,
-dbg_info_next: ?*TextBlock,
-/// Offset into .debug_info pointing to the tag for this Decl.
-dbg_info_off: u32,
-/// Size of the .debug_info tag for this Decl, not including padding.
-dbg_info_len: u32,
-
-dirty: bool = true,
-
-pub const SymbolAtOffset = struct {
- local_sym_index: u32,
- offset: u64,
- stab: ?Stab = null,
-
- pub fn format(
- self: SymbolAtOffset,
- comptime fmt: []const u8,
- options: std.fmt.FormatOptions,
- writer: anytype,
- ) !void {
- _ = fmt;
- _ = options;
- try std.fmt.format(writer, "{{ {d}: .offset = {d}", .{ self.local_sym_index, self.offset });
- if (self.stab) |stab| {
- try std.fmt.format(writer, ", .stab = {any}", .{stab});
- }
- try std.fmt.format(writer, " }}", .{});
- }
-};
-
-pub const Stab = union(enum) {
- function: u64,
- static,
- global,
-
- pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 {
- var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator);
- defer nlists.deinit();
-
- const sym = macho_file.locals.items[local_sym_index];
- switch (stab) {
- .function => |size| {
- try nlists.ensureUnusedCapacity(4);
- nlists.appendAssumeCapacity(.{
- .n_strx = 0,
- .n_type = macho.N_BNSYM,
- .n_sect = sym.n_sect,
- .n_desc = 0,
- .n_value = sym.n_value,
- });
- nlists.appendAssumeCapacity(.{
- .n_strx = sym.n_strx,
- .n_type = macho.N_FUN,
- .n_sect = sym.n_sect,
- .n_desc = 0,
- .n_value = sym.n_value,
- });
- nlists.appendAssumeCapacity(.{
- .n_strx = 0,
- .n_type = macho.N_FUN,
- .n_sect = 0,
- .n_desc = 0,
- .n_value = size,
- });
- nlists.appendAssumeCapacity(.{
- .n_strx = 0,
- .n_type = macho.N_ENSYM,
- .n_sect = sym.n_sect,
- .n_desc = 0,
- .n_value = size,
- });
- },
- .global => {
- try nlists.append(.{
- .n_strx = sym.n_strx,
- .n_type = macho.N_GSYM,
- .n_sect = 0,
- .n_desc = 0,
- .n_value = 0,
- });
- },
- .static => {
- try nlists.append(.{
- .n_strx = sym.n_strx,
- .n_type = macho.N_STSYM,
- .n_sect = sym.n_sect,
- .n_desc = 0,
- .n_value = sym.n_value,
- });
- },
- }
-
- return nlists.toOwnedSlice();
- }
-};
-
-pub const Relocation = struct {
- /// Offset within the `block`s code buffer.
- /// Note relocation size can be inferred by relocation's kind.
- offset: u32,
-
- where: enum {
- local,
- undef,
- },
-
- where_index: u32,
-
- payload: union(enum) {
- unsigned: Unsigned,
- branch: Branch,
- page: Page,
- page_off: PageOff,
- pointer_to_got: PointerToGot,
- signed: Signed,
- load: Load,
- },
-
- const ResolveArgs = struct {
- block: *TextBlock,
- offset: u32,
- source_addr: u64,
- target_addr: u64,
- macho_file: *MachO,
- };
-
- pub const Unsigned = struct {
- subtractor: ?u32,
-
- /// Addend embedded directly in the relocation slot
- addend: i64,
-
- /// Extracted from r_length:
- /// => 3 implies true
- /// => 2 implies false
- /// => * is unreachable
- is_64bit: bool,
-
- pub fn resolve(self: Unsigned, args: ResolveArgs) !void {
- const result = blk: {
- if (self.subtractor) |subtractor| {
- const sym = args.macho_file.locals.items[subtractor];
- break :blk @intCast(i64, args.target_addr) - @intCast(i64, sym.n_value) + self.addend;
- } else {
- break :blk @intCast(i64, args.target_addr) + self.addend;
- }
- };
-
- if (self.is_64bit) {
- mem.writeIntLittle(u64, args.block.code.items[args.offset..][0..8], @bitCast(u64, result));
- } else {
- mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @truncate(u32, @bitCast(u64, result)));
- }
- }
-
- pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
- _ = fmt;
- _ = options;
- try std.fmt.format(writer, "Unsigned {{ ", .{});
- if (self.subtractor) |sub| {
- try std.fmt.format(writer, ".subtractor = {}, ", .{sub});
- }
- try std.fmt.format(writer, ".addend = {}, ", .{self.addend});
- const length: usize = if (self.is_64bit) 8 else 4;
- try std.fmt.format(writer, ".length = {}, ", .{length});
- try std.fmt.format(writer, "}}", .{});
- }
- };
-
- pub const Branch = struct {
- arch: Arch,
-
- pub fn resolve(self: Branch, args: ResolveArgs) !void {
- switch (self.arch) {
- .aarch64 => {
- const displacement = math.cast(
- i28,
- @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr),
- ) catch |err| switch (err) {
- error.Overflow => {
- log.err("jump too big to encode as i28 displacement value", .{});
- log.err(" (target - source) = displacement => 0x{x} - 0x{x} = 0x{x}", .{
- args.target_addr,
- args.source_addr,
- @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr),
- });
- log.err(" TODO implement branch islands to extend jump distance for arm64", .{});
- return error.TODOImplementBranchIslands;
- },
- };
- const code = args.block.code.items[args.offset..][0..4];
- var inst = aarch64.Instruction{
- .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.unconditional_branch_immediate,
- ), code),
- };
- inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2));
- mem.writeIntLittle(u32, code, inst.toU32());
- },
- .x86_64 => {
- const displacement = try math.cast(
- i32,
- @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4,
- );
- mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement));
- },
- else => return error.UnsupportedCpuArchitecture,
- }
- }
-
- pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
- _ = self;
- _ = fmt;
- _ = options;
- try std.fmt.format(writer, "Branch {{}}", .{});
- }
- };
-
- pub const Page = struct {
- kind: enum {
- page,
- got,
- tlvp,
- },
- addend: u32 = 0,
-
- pub fn resolve(self: Page, args: ResolveArgs) !void {
- const target_addr = args.target_addr + self.addend;
- const source_page = @intCast(i32, args.source_addr >> 12);
- const target_page = @intCast(i32, target_addr >> 12);
- const pages = @bitCast(u21, @intCast(i21, target_page - source_page));
-
- const code = args.block.code.items[args.offset..][0..4];
- var inst = aarch64.Instruction{
- .pc_relative_address = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.pc_relative_address,
- ), code),
- };
- inst.pc_relative_address.immhi = @truncate(u19, pages >> 2);
- inst.pc_relative_address.immlo = @truncate(u2, pages);
-
- mem.writeIntLittle(u32, code, inst.toU32());
- }
-
- pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
- _ = fmt;
- _ = options;
- try std.fmt.format(writer, "Page {{ ", .{});
- switch (self.kind) {
- .page => {},
- .got => {
- try std.fmt.format(writer, ".got, ", .{});
- },
- .tlvp => {
- try std.fmt.format(writer, ".tlvp", .{});
- },
- }
- try std.fmt.format(writer, ".addend = {}, ", .{self.addend});
- try std.fmt.format(writer, "}}", .{});
- }
- };
-
- pub const PageOff = struct {
- kind: enum {
- page,
- got,
- tlvp,
- },
- addend: u32 = 0,
- op_kind: ?OpKind = null,
-
- pub const OpKind = enum {
- arithmetic,
- load,
- };
-
- pub fn resolve(self: PageOff, args: ResolveArgs) !void {
- const code = args.block.code.items[args.offset..][0..4];
-
- switch (self.kind) {
- .page => {
- const target_addr = args.target_addr + self.addend;
- const narrowed = @truncate(u12, target_addr);
-
- const op_kind = self.op_kind orelse unreachable;
- var inst: aarch64.Instruction = blk: {
- switch (op_kind) {
- .arithmetic => {
- break :blk .{
- .add_subtract_immediate = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.add_subtract_immediate,
- ), code),
- };
- },
- .load => {
- break :blk .{
- .load_store_register = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.load_store_register,
- ), code),
- };
- },
- }
- };
-
- if (op_kind == .arithmetic) {
- inst.add_subtract_immediate.imm12 = narrowed;
- } else {
- const offset: u12 = blk: {
- if (inst.load_store_register.size == 0) {
- if (inst.load_store_register.v == 1) {
- // 128-bit SIMD is scaled by 16.
- break :blk try math.divExact(u12, narrowed, 16);
- }
- // Otherwise, 8-bit SIMD or ldrb.
- break :blk narrowed;
- } else {
- const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size);
- break :blk try math.divExact(u12, narrowed, denom);
- }
- };
- inst.load_store_register.offset = offset;
- }
-
- mem.writeIntLittle(u32, code, inst.toU32());
- },
- .got => {
- const narrowed = @truncate(u12, args.target_addr);
- var inst: aarch64.Instruction = .{
- .load_store_register = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.load_store_register,
- ), code),
- };
- const offset = try math.divExact(u12, narrowed, 8);
- inst.load_store_register.offset = offset;
- mem.writeIntLittle(u32, code, inst.toU32());
- },
- .tlvp => {
- const RegInfo = struct {
- rd: u5,
- rn: u5,
- size: u1,
- };
- const reg_info: RegInfo = blk: {
- if (isArithmeticOp(code)) {
- const inst = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.add_subtract_immediate,
- ), code);
- break :blk .{
- .rd = inst.rd,
- .rn = inst.rn,
- .size = inst.sf,
- };
- } else {
- const inst = mem.bytesToValue(meta.TagPayload(
- aarch64.Instruction,
- aarch64.Instruction.load_store_register,
- ), code);
- break :blk .{
- .rd = inst.rt,
- .rn = inst.rn,
- .size = @truncate(u1, inst.size),
- };
- }
- };
- const narrowed = @truncate(u12, args.target_addr);
- var inst = aarch64.Instruction{
- .add_subtract_immediate = .{
- .rd = reg_info.rd,
- .rn = reg_info.rn,
- .imm12 = narrowed,
- .sh = 0,
- .s = 0,
- .op = 0,
- .sf = reg_info.size,
- },
- };
- mem.writeIntLittle(u32, code, inst.toU32());
- },
- }
- }
-
- pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
- _ = fmt;
- _ = options;
- try std.fmt.format(writer, "PageOff {{ ", .{});
- switch (self.kind) {
- .page => {},
- .got => {
- try std.fmt.format(writer, ".got, ", .{});
- },
- .tlvp => {
- try std.fmt.format(writer, ".tlvp, ", .{});
- },
- }
- try std.fmt.format(writer, ".addend = {}, ", .{self.addend});
- try std.fmt.format(writer, ".op_kind = {s}, ", .{self.op_kind});
- try std.fmt.format(writer, "}}", .{});
- }
- };
-
- pub const PointerToGot = struct {
- pub fn resolve(_: PointerToGot, args: ResolveArgs) !void {
- const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr));
- mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, result));
- }
-
- pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
- _ = self;
- _ = fmt;
- _ = options;
- try std.fmt.format(writer, "PointerToGot {{}}", .{});
- }
- };
-
- pub const Signed = struct {
- addend: i64,
- correction: u3,
-
- pub fn resolve(self: Signed, args: ResolveArgs) !void {
- const target_addr = @intCast(i64, args.target_addr) + self.addend;
- const displacement = try math.cast(
- i32,
- target_addr - @intCast(i64, args.source_addr + self.correction + 4),
- );
- mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement));
- }
-
- pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
- _ = fmt;
- _ = options;
- try std.fmt.format(writer, "Signed {{ ", .{});
- try std.fmt.format(writer, ".addend = {}, ", .{self.addend});
- try std.fmt.format(writer, ".correction = {}, ", .{self.correction});
- try std.fmt.format(writer, "}}", .{});
- }
- };
-
- pub const Load = struct {
- kind: enum {
- got,
- tlvp,
- },
- addend: i32 = 0,
-
- pub fn resolve(self: Load, args: ResolveArgs) !void {
- if (self.kind == .tlvp) {
- // We need to rewrite the opcode from movq to leaq.
- args.block.code.items[args.offset - 2] = 0x8d;
- }
- const displacement = try math.cast(
- i32,
- @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + self.addend,
- );
- mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement));
- }
-
- pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
- _ = fmt;
- _ = options;
- try std.fmt.format(writer, "Load {{ ", .{});
- try std.fmt.format(writer, "{s}, ", .{self.kind});
- try std.fmt.format(writer, ".addend = {}, ", .{self.addend});
- try std.fmt.format(writer, "}}", .{});
- }
- };
-
- pub fn resolve(self: Relocation, args: ResolveArgs) !void {
- switch (self.payload) {
- .unsigned => |unsigned| try unsigned.resolve(args),
- .branch => |branch| try branch.resolve(args),
- .page => |page| try page.resolve(args),
- .page_off => |page_off| try page_off.resolve(args),
- .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(args),
- .signed => |signed| try signed.resolve(args),
- .load => |load| try load.resolve(args),
- }
- }
-
- pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
- try std.fmt.format(writer, "Relocation {{ ", .{});
- try std.fmt.format(writer, ".offset = {}, ", .{self.offset});
- try std.fmt.format(writer, ".where = {}, ", .{self.where});
- try std.fmt.format(writer, ".where_index = {d}, ", .{self.where_index});
-
- switch (self.payload) {
- .unsigned => |unsigned| try unsigned.format(fmt, options, writer),
- .branch => |branch| try branch.format(fmt, options, writer),
- .page => |page| try page.format(fmt, options, writer),
- .page_off => |page_off| try page_off.format(fmt, options, writer),
- .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer),
- .signed => |signed| try signed.format(fmt, options, writer),
- .load => |load| try load.format(fmt, options, writer),
- }
-
- try std.fmt.format(writer, "}}", .{});
- }
-};
-
-pub const empty = TextBlock{
- .local_sym_index = 0,
- .size = 0,
- .alignment = 0,
- .prev = null,
- .next = null,
- .dbg_info_prev = null,
- .dbg_info_next = null,
- .dbg_info_off = undefined,
- .dbg_info_len = undefined,
-};
-
-pub fn deinit(self: *TextBlock, allocator: *Allocator) void {
- self.dices.deinit(allocator);
- self.lazy_bindings.deinit(allocator);
- self.bindings.deinit(allocator);
- self.rebases.deinit(allocator);
- self.relocs.deinit(allocator);
- self.contained.deinit(allocator);
- self.aliases.deinit(allocator);
- self.code.deinit(allocator);
-}
-
-/// Returns how much room there is to grow in virtual address space.
-/// File offset relocation happens transparently, so it is not included in
-/// this calculation.
-pub fn capacity(self: TextBlock, macho_file: MachO) u64 {
- const self_sym = macho_file.locals.items[self.local_sym_index];
- if (self.next) |next| {
- const next_sym = macho_file.locals.items[next.local_sym_index];
- return next_sym.n_value - self_sym.n_value;
- } else {
- // We are the last block.
- // The capacity is limited only by virtual address space.
- return std.math.maxInt(u64) - self_sym.n_value;
- }
-}
-
-pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool {
- // No need to keep a free list node for the last block.
- const next = self.next orelse return false;
- const self_sym = macho_file.locals.items[self.local_sym_index];
- const next_sym = macho_file.locals.items[next.local_sym_index];
- const cap = next_sym.n_value - self_sym.n_value;
- const ideal_cap = MachO.padToIdeal(self.size);
- if (cap <= ideal_cap) return false;
- const surplus = cap - ideal_cap;
- return surplus >= MachO.min_text_capacity;
-}
-
-const RelocContext = struct {
- base_addr: u64 = 0,
- base_offset: u64 = 0,
- allocator: *Allocator,
- object: *Object,
- macho_file: *MachO,
- parsed_atoms: *Object.ParsedAtoms,
-};
-
-fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Relocation {
- var parsed_rel = Relocation{
- .offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_offset),
- .where = undefined,
- .where_index = undefined,
- .payload = undefined,
- };
-
- if (rel.r_extern == 0) {
- const sect_id = @intCast(u16, rel.r_symbolnum - 1);
-
- const local_sym_index = context.object.sections_as_symbols.get(sect_id) orelse blk: {
- const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment;
- const sect = seg.sections.items[sect_id];
- const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable;
- const local_sym_index = @intCast(u32, context.macho_file.locals.items.len);
- const sym_name = try std.fmt.allocPrint(context.allocator, "l_{s}_{s}_{s}", .{
- context.object.name,
- commands.segmentName(sect),
- commands.sectionName(sect),
- });
- defer context.allocator.free(sym_name);
-
- try context.macho_file.locals.append(context.allocator, .{
- .n_strx = try context.macho_file.makeString(sym_name),
- .n_type = macho.N_SECT,
- .n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1),
- .n_desc = 0,
- .n_value = 0,
- });
- try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index);
- break :blk local_sym_index;
- };
-
- parsed_rel.where = .local;
- parsed_rel.where_index = local_sym_index;
- } else {
- const sym = context.object.symtab.items[rel.r_symbolnum];
- const sym_name = context.object.getString(sym.n_strx);
-
- if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) {
- const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable;
- parsed_rel.where = .local;
- parsed_rel.where_index = where_index;
- } else {
- const n_strx = context.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{
- .strtab = &context.macho_file.strtab,
- }) orelse unreachable;
- const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable;
- switch (resolv.where) {
- .global => {
- parsed_rel.where = .local;
- parsed_rel.where_index = resolv.local_sym_index;
- },
- .undef => {
- parsed_rel.where = .undef;
- parsed_rel.where_index = resolv.where_index;
- },
- }
- }
- }
-
- return parsed_rel;
-}
-
-pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: RelocContext) !void {
- const filtered_relocs = filterRelocs(relocs, context.base_offset, context.base_offset + self.size);
- var it = RelocIterator{
- .buffer = filtered_relocs,
- };
-
- var addend: u32 = 0;
- var subtractor: ?u32 = null;
- const arch = context.macho_file.base.options.target.cpu.arch;
-
- while (it.next()) |rel| {
- if (isAddend(rel, arch)) {
- // Addend is not a relocation with effect on the TextBlock, so
- // parse it and carry on.
- assert(addend == 0); // Oh no, addend was not reset!
- addend = rel.r_symbolnum;
-
- // Verify ADDEND is followed by a PAGE21 or PAGEOFF12.
- const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type);
- switch (next) {
- .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {},
- else => {
- log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next});
- return error.UnexpectedRelocationType;
- },
- }
- continue;
- }
-
- if (isSubtractor(rel, arch)) {
- // Subtractor is not a relocation with effect on the TextBlock, so
- // parse it and carry on.
- assert(subtractor == null); // Oh no, subtractor was not reset!
- assert(rel.r_extern == 1);
- const sym = context.object.symtab.items[rel.r_symbolnum];
- const sym_name = context.object.getString(sym.n_strx);
-
- if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) {
- const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable;
- subtractor = where_index;
- } else {
- const n_strx = context.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{
- .strtab = &context.macho_file.strtab,
- }) orelse unreachable;
- const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable;
- assert(resolv.where == .global);
- subtractor = resolv.local_sym_index;
- }
-
- // Verify SUBTRACTOR is followed by UNSIGNED.
- switch (arch) {
- .aarch64 => {
- const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type);
- if (next != .ARM64_RELOC_UNSIGNED) {
- log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next});
- return error.UnexpectedRelocationType;
- }
- },
- .x86_64 => {
- const next = @intToEnum(macho.reloc_type_x86_64, it.peek().r_type);
- if (next != .X86_64_RELOC_UNSIGNED) {
- log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next});
- return error.UnexpectedRelocationType;
- }
- },
- else => unreachable,
- }
- continue;
- }
-
- var parsed_rel = try initRelocFromObject(rel, context);
-
- switch (arch) {
- .aarch64 => {
- const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type);
- switch (rel_type) {
- .ARM64_RELOC_ADDEND => unreachable,
- .ARM64_RELOC_SUBTRACTOR => unreachable,
- .ARM64_RELOC_BRANCH26 => {
- self.parseBranch(rel, &parsed_rel, context);
- },
- .ARM64_RELOC_UNSIGNED => {
- self.parseUnsigned(rel, &parsed_rel, subtractor, context);
- subtractor = null;
- },
- .ARM64_RELOC_PAGE21,
- .ARM64_RELOC_GOT_LOAD_PAGE21,
- .ARM64_RELOC_TLVP_LOAD_PAGE21,
- => {
- self.parsePage(rel, &parsed_rel, addend);
- if (rel_type == .ARM64_RELOC_PAGE21)
- addend = 0;
- },
- .ARM64_RELOC_PAGEOFF12,
- .ARM64_RELOC_GOT_LOAD_PAGEOFF12,
- .ARM64_RELOC_TLVP_LOAD_PAGEOFF12,
- => {
- self.parsePageOff(rel, &parsed_rel, addend);
- if (rel_type == .ARM64_RELOC_PAGEOFF12)
- addend = 0;
- },
- .ARM64_RELOC_POINTER_TO_GOT => {
- self.parsePointerToGot(rel, &parsed_rel);
- },
- }
- },
- .x86_64 => {
- switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) {
- .X86_64_RELOC_SUBTRACTOR => unreachable,
- .X86_64_RELOC_BRANCH => {
- self.parseBranch(rel, &parsed_rel, context);
- },
- .X86_64_RELOC_UNSIGNED => {
- self.parseUnsigned(rel, &parsed_rel, subtractor, context);
- subtractor = null;
- },
- .X86_64_RELOC_SIGNED,
- .X86_64_RELOC_SIGNED_1,
- .X86_64_RELOC_SIGNED_2,
- .X86_64_RELOC_SIGNED_4,
- => {
- self.parseSigned(rel, &parsed_rel, context);
- },
- .X86_64_RELOC_GOT_LOAD,
- .X86_64_RELOC_GOT,
- .X86_64_RELOC_TLV,
- => {
- self.parseLoad(rel, &parsed_rel);
- },
- }
- },
- else => unreachable,
- }
-
- try self.relocs.append(context.allocator, parsed_rel);
-
- const is_via_got = switch (parsed_rel.payload) {
- .pointer_to_got => true,
- .load => |load| load.kind == .got,
- .page => |page| page.kind == .got,
- .page_off => |page_off| page_off.kind == .got,
- else => false,
- };
-
- if (is_via_got) blk: {
- const key = MachO.GotIndirectionKey{
- .where = switch (parsed_rel.where) {
- .local => .local,
- .undef => .undef,
- },
- .where_index = parsed_rel.where_index,
- };
- if (context.macho_file.got_entries_map.contains(key)) break :blk;
-
- const atom = try context.macho_file.createGotAtom(key);
- try context.macho_file.got_entries_map.putNoClobber(context.macho_file.base.allocator, key, atom);
- const match = MachO.MatchingSection{
- .seg = context.macho_file.data_const_segment_cmd_index.?,
- .sect = context.macho_file.got_section_index.?,
- };
-
- if (context.parsed_atoms.getPtr(match)) |last| {
- last.*.next = atom;
- atom.prev = last.*;
- last.* = atom;
- } else {
- try context.parsed_atoms.putNoClobber(match, atom);
- }
- } else if (parsed_rel.payload == .unsigned) {
- switch (parsed_rel.where) {
- .undef => {
- try self.bindings.append(context.allocator, .{
- .local_sym_index = parsed_rel.where_index,
- .offset = parsed_rel.offset,
- });
- },
- .local => {
- const source_sym = context.macho_file.locals.items[self.local_sym_index];
- const match = context.macho_file.section_ordinals.keys()[source_sym.n_sect - 1];
- const seg = context.macho_file.load_commands.items[match.seg].Segment;
- const sect = seg.sections.items[match.sect];
- const sect_type = commands.sectionType(sect);
-
- const should_rebase = rebase: {
- if (!parsed_rel.payload.unsigned.is_64bit) break :rebase false;
-
- // TODO actually, a check similar to what dyld is doing, that is, verifying
- // that the segment is writable should be enough here.
- const is_right_segment = blk: {
- if (context.macho_file.data_segment_cmd_index) |idx| {
- if (match.seg == idx) {
- break :blk true;
- }
- }
- if (context.macho_file.data_const_segment_cmd_index) |idx| {
- if (match.seg == idx) {
- break :blk true;
- }
- }
- break :blk false;
- };
-
- if (!is_right_segment) break :rebase false;
- if (sect_type != macho.S_LITERAL_POINTERS and
- sect_type != macho.S_REGULAR and
- sect_type != macho.S_MOD_INIT_FUNC_POINTERS and
- sect_type != macho.S_MOD_TERM_FUNC_POINTERS)
- {
- break :rebase false;
- }
-
- break :rebase true;
- };
-
- if (should_rebase) {
- try self.rebases.append(context.allocator, parsed_rel.offset);
- }
- },
- }
- } else if (parsed_rel.payload == .branch) blk: {
- if (parsed_rel.where != .undef) break :blk;
- if (context.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk;
-
- const stub_helper_atom = try context.macho_file.createStubHelperAtom();
- const laptr_atom = try context.macho_file.createLazyPointerAtom(
- stub_helper_atom.local_sym_index,
- parsed_rel.where_index,
- );
- const stub_atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index);
- try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stub_atom);
- // TODO clean this up!
- if (context.parsed_atoms.getPtr(.{
- .seg = context.macho_file.text_segment_cmd_index.?,
- .sect = context.macho_file.stub_helper_section_index.?,
- })) |last| {
- last.*.next = stub_helper_atom;
- stub_helper_atom.prev = last.*;
- last.* = stub_helper_atom;
- } else {
- try context.parsed_atoms.putNoClobber(.{
- .seg = context.macho_file.text_segment_cmd_index.?,
- .sect = context.macho_file.stub_helper_section_index.?,
- }, stub_helper_atom);
- }
- if (context.parsed_atoms.getPtr(.{
- .seg = context.macho_file.text_segment_cmd_index.?,
- .sect = context.macho_file.stubs_section_index.?,
- })) |last| {
- last.*.next = stub_atom;
- stub_atom.prev = last.*;
- last.* = stub_atom;
- } else {
- try context.parsed_atoms.putNoClobber(.{
- .seg = context.macho_file.text_segment_cmd_index.?,
- .sect = context.macho_file.stubs_section_index.?,
- }, stub_atom);
- }
- if (context.parsed_atoms.getPtr(.{
- .seg = context.macho_file.data_segment_cmd_index.?,
- .sect = context.macho_file.la_symbol_ptr_section_index.?,
- })) |last| {
- last.*.next = laptr_atom;
- laptr_atom.prev = last.*;
- last.* = laptr_atom;
- } else {
- try context.parsed_atoms.putNoClobber(.{
- .seg = context.macho_file.data_segment_cmd_index.?,
- .sect = context.macho_file.la_symbol_ptr_section_index.?,
- }, laptr_atom);
- }
- }
- }
-}
-
-fn isAddend(rel: macho.relocation_info, arch: Arch) bool {
- if (arch != .aarch64) return false;
- return @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_ADDEND;
-}
-
-fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool {
- return switch (arch) {
- .aarch64 => @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_SUBTRACTOR,
- .x86_64 => @intToEnum(macho.reloc_type_x86_64, rel.r_type) == .X86_64_RELOC_SUBTRACTOR,
- else => unreachable,
- };
-}
-
-fn parseUnsigned(
- self: TextBlock,
- rel: macho.relocation_info,
- out: *Relocation,
- subtractor: ?u32,
- context: RelocContext,
-) void {
- assert(rel.r_pcrel == 0);
-
- const is_64bit: bool = switch (rel.r_length) {
- 3 => true,
- 2 => false,
- else => unreachable,
- };
-
- var addend: i64 = if (is_64bit)
- mem.readIntLittle(i64, self.code.items[out.offset..][0..8])
- else
- mem.readIntLittle(i32, self.code.items[out.offset..][0..4]);
-
- if (rel.r_extern == 0) {
- const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment;
- const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr;
- addend -= @intCast(i64, target_sect_base_addr);
- }
-
- out.payload = .{
- .unsigned = .{
- .subtractor = subtractor,
- .is_64bit = is_64bit,
- .addend = addend,
- },
- };
-}
-
-fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void {
- _ = self;
- assert(rel.r_pcrel == 1);
- assert(rel.r_length == 2);
-
- out.payload = .{
- .branch = .{
- .arch = context.macho_file.base.options.target.cpu.arch,
- },
- };
-}
-
-fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void {
- _ = self;
- assert(rel.r_pcrel == 1);
- assert(rel.r_length == 2);
-
- out.payload = .{
- .page = .{
- .kind = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) {
- .ARM64_RELOC_PAGE21 => .page,
- .ARM64_RELOC_GOT_LOAD_PAGE21 => .got,
- .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp,
- else => unreachable,
- },
- .addend = addend,
- },
- };
-}
-
-fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void {
- assert(rel.r_pcrel == 0);
- assert(rel.r_length == 2);
-
- const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type);
- const op_kind: ?Relocation.PageOff.OpKind = blk: {
- if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null;
- const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.code.items[out.offset..][0..4]))
- .arithmetic
- else
- .load;
- break :blk op_kind;
- };
-
- out.payload = .{
- .page_off = .{
- .kind = switch (rel_type) {
- .ARM64_RELOC_PAGEOFF12 => .page,
- .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got,
- .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp,
- else => unreachable,
- },
- .addend = addend,
- .op_kind = op_kind,
- },
- };
-}
-
-fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void {
- _ = self;
- assert(rel.r_pcrel == 1);
- assert(rel.r_length == 2);
-
- out.payload = .{
- .pointer_to_got = .{},
- };
-}
-
-fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void {
- assert(rel.r_pcrel == 1);
- assert(rel.r_length == 2);
-
- const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type);
- const correction: u3 = switch (rel_type) {
- .X86_64_RELOC_SIGNED => 0,
- .X86_64_RELOC_SIGNED_1 => 1,
- .X86_64_RELOC_SIGNED_2 => 2,
- .X86_64_RELOC_SIGNED_4 => 4,
- else => unreachable,
- };
- var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction;
-
- if (rel.r_extern == 0) {
- const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment;
- const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr;
- addend += @intCast(i64, context.base_addr + out.offset + correction + 4) - @intCast(i64, target_sect_base_addr);
- }
-
- out.payload = .{
- .signed = .{
- .correction = correction,
- .addend = addend,
- },
- };
-}
-
-fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void {
- assert(rel.r_pcrel == 1);
- assert(rel.r_length == 2);
-
- const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type);
- const addend: i32 = if (rel_type == .X86_64_RELOC_GOT)
- mem.readIntLittle(i32, self.code.items[out.offset..][0..4])
- else
- 0;
-
- out.payload = .{
- .load = .{
- .kind = switch (rel_type) {
- .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got,
- .X86_64_RELOC_TLV => .tlvp,
- else => unreachable,
- },
- .addend = addend,
- },
- };
-}
-
-pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void {
- for (self.relocs.items) |rel| {
- log.debug("relocating {}", .{rel});
-
- const source_addr = blk: {
- const sym = macho_file.locals.items[self.local_sym_index];
- break :blk sym.n_value + rel.offset;
- };
- const target_addr = blk: {
- const is_via_got = switch (rel.payload) {
- .pointer_to_got => true,
- .page => |page| page.kind == .got,
- .page_off => |page_off| page_off.kind == .got,
- .load => |load| load.kind == .got,
- else => false,
- };
-
- if (is_via_got) {
- const atom = macho_file.got_entries_map.get(.{
- .where = switch (rel.where) {
- .local => .local,
- .undef => .undef,
- },
- .where_index = rel.where_index,
- }) orelse {
- const sym = switch (rel.where) {
- .local => macho_file.locals.items[rel.where_index],
- .undef => macho_file.undefs.items[rel.where_index],
- };
- log.err("expected GOT entry for symbol '{s}'", .{macho_file.getString(sym.n_strx)});
- log.err(" this is an internal linker error", .{});
- return error.FailedToResolveRelocationTarget;
- };
- break :blk macho_file.locals.items[atom.local_sym_index].n_value;
- }
-
- switch (rel.where) {
- .local => {
- const sym = macho_file.locals.items[rel.where_index];
- const is_tlv = is_tlv: {
- const source_sym = macho_file.locals.items[self.local_sym_index];
- const match = macho_file.section_ordinals.keys()[source_sym.n_sect - 1];
- const seg = macho_file.load_commands.items[match.seg].Segment;
- const sect = seg.sections.items[match.sect];
- break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES;
- };
- if (is_tlv) {
- // For TLV relocations, the value specified as a relocation is the displacement from the
- // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first
- // defined TLV template init section in the following order:
- // * wrt to __thread_data if defined, then
- // * wrt to __thread_bss
- const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].Segment;
- const base_address = inner: {
- if (macho_file.tlv_data_section_index) |i| {
- break :inner seg.sections.items[i].addr;
- } else if (macho_file.tlv_bss_section_index) |i| {
- break :inner seg.sections.items[i].addr;
- } else {
- log.err("threadlocal variables present but no initializer sections found", .{});
- log.err(" __thread_data not found", .{});
- log.err(" __thread_bss not found", .{});
- return error.FailedToResolveRelocationTarget;
- }
- };
- break :blk sym.n_value - base_address;
- }
-
- break :blk sym.n_value;
- },
- .undef => {
- const atom = macho_file.stubs_map.get(rel.where_index) orelse {
- // TODO this is required for incremental when we don't have every symbol
- // resolved when creating relocations. In this case, we will insert a branch
- // reloc to an undef symbol which may happen to be defined within the binary.
- // Then, the undef we point at will be a null symbol (free symbol) which we
- // should remove/repurpose. To circumvent this (for now), we check if the symbol
- // we point to is garbage, and if so we fall back to symbol resolver to find by name.
- const n_strx = macho_file.undefs.items[rel.where_index].n_strx;
- if (macho_file.symbol_resolver.get(n_strx)) |resolv| inner: {
- if (resolv.where != .global) break :inner;
- break :blk macho_file.globals.items[resolv.where_index].n_value;
- }
-
- // TODO verify in TextBlock that the symbol is indeed dynamically bound.
- break :blk 0; // Dynamically bound by dyld.
- };
-
- break :blk macho_file.locals.items[atom.local_sym_index].n_value;
- },
- }
- };
-
- log.debug(" | source_addr = 0x{x}", .{source_addr});
- log.debug(" | target_addr = 0x{x}", .{target_addr});
-
- try rel.resolve(.{
- .block = self,
- .offset = rel.offset,
- .source_addr = source_addr,
- .target_addr = target_addr,
- .macho_file = macho_file,
- });
- }
-}
-
-pub fn format(self: TextBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
- _ = fmt;
- _ = options;
- try std.fmt.format(writer, "TextBlock {{ ", .{});
- try std.fmt.format(writer, ".local_sym_index = {d}, ", .{self.local_sym_index});
- try std.fmt.format(writer, ".aliases = {any}, ", .{self.aliases.items});
- try std.fmt.format(writer, ".contained = {any}, ", .{self.contained.items});
- try std.fmt.format(writer, ".code = {*}, ", .{self.code.items});
- try std.fmt.format(writer, ".size = {d}, ", .{self.size});
- try std.fmt.format(writer, ".alignment = {d}, ", .{self.alignment});
- try std.fmt.format(writer, ".relocs = {any}, ", .{self.relocs.items});
- try std.fmt.format(writer, ".rebases = {any}, ", .{self.rebases.items});
- try std.fmt.format(writer, ".bindings = {any}, ", .{self.bindings.items});
- try std.fmt.format(writer, ".dices = {any}, ", .{self.dices.items});
- if (self.stab) |stab| {
- try std.fmt.format(writer, ".stab = {any}, ", .{stab});
- }
- try std.fmt.format(writer, "}}", .{});
-}
-
-const RelocIterator = struct {
- buffer: []const macho.relocation_info,
- index: i32 = -1,
-
- pub fn next(self: *RelocIterator) ?macho.relocation_info {
- self.index += 1;
- if (self.index < self.buffer.len) {
- return self.buffer[@intCast(u32, self.index)];
- }
- return null;
- }
-
- pub fn peek(self: RelocIterator) macho.relocation_info {
- assert(self.index + 1 < self.buffer.len);
- return self.buffer[@intCast(u32, self.index + 1)];
- }
-};
-
-fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info {
- const Predicate = struct {
- addr: u64,
-
- pub fn predicate(self: @This(), rel: macho.relocation_info) bool {
- return rel.r_address < self.addr;
- }
- };
-
- const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr });
- const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr });
-
- return relocs[start..end];
-}
-
-inline fn isArithmeticOp(inst: *const [4]u8) bool {
- const group_decode = @truncate(u5, inst[3]);
- return ((group_decode >> 2) == 4);
-}