From aaacfc0d0a23918c6712272e10bb1cdca1daaf04 Mon Sep 17 00:00:00 2001 From: Jakub Konka Date: Thu, 9 Sep 2021 18:32:03 +0200 Subject: macho: init process of renaming TextBlock to Atom Initially, internally within the linker. --- src/link/MachO.zig | 4 +- src/link/MachO/Atom.zig | 1305 ++++++++++++++++++++++++++++++++++++++++++ src/link/MachO/Object.zig | 4 +- src/link/MachO/TextBlock.zig | 1301 ----------------------------------------- 4 files changed, 1311 insertions(+), 1303 deletions(-) create mode 100644 src/link/MachO/Atom.zig delete mode 100644 src/link/MachO/TextBlock.zig (limited to 'src') diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 2ccedd70ea..2705d47a85 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -24,6 +24,7 @@ const trace = @import("../tracy.zig").trace; const Air = @import("../Air.zig"); const Allocator = mem.Allocator; const Archive = @import("MachO/Archive.zig"); +const Atom = @import("MachO/Atom.zig"); const Cache = @import("../Cache.zig"); const CodeSignature = @import("MachO/CodeSignature.zig"); const Compilation = @import("../Compilation.zig"); @@ -37,9 +38,10 @@ const LlvmObject = @import("../codegen/llvm.zig").Object; const LoadCommand = commands.LoadCommand; const Module = @import("../Module.zig"); const SegmentCommand = commands.SegmentCommand; -pub const TextBlock = @import("MachO/TextBlock.zig"); const Trie = @import("MachO/Trie.zig"); +pub const TextBlock = Atom; + pub const base_tag: File.Tag = File.Tag.macho; base: File, diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig new file mode 100644 index 0000000000..41e34bc6f7 --- /dev/null +++ b/src/link/MachO/Atom.zig @@ -0,0 +1,1305 @@ +const Atom = @This(); + +const std = @import("std"); +const build_options = @import("build_options"); +const aarch64 = @import("../../codegen/aarch64.zig"); +const assert = std.debug.assert; +const commands = @import("commands.zig"); +const log = std.log.scoped(.text_block); +const macho = std.macho; +const math = std.math; +const mem = std.mem; +const meta = std.meta; + +const Allocator = mem.Allocator; +const Arch = std.Target.Cpu.Arch; +const MachO = @import("../MachO.zig"); +const Object = @import("Object.zig"); + +/// Each decl always gets a local symbol with the fully qualified name. +/// The vaddr and size are found here directly. +/// The file offset is found by computing the vaddr offset from the section vaddr +/// the symbol references, and adding that to the file offset of the section. +/// If this field is 0, it means the codegen size = 0 and there is no symbol or +/// offset table entry. +local_sym_index: u32, + +/// List of symbol aliases pointing to the same atom via different nlists +aliases: std.ArrayListUnmanaged(u32) = .{}, + +/// List of symbols contained within this atom +contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// Code (may be non-relocated) this atom represents +code: std.ArrayListUnmanaged(u8) = .{}, + +/// Size and alignment of this atom +/// Unlike in Elf, we need to store the size of this symbol as part of +/// the atom since macho.nlist_64 lacks this information. +size: u64, + +/// Alignment of this atom as a power of 2. +/// For instance, aligmment of 0 should be read as 2^0 = 1 byte aligned. +alignment: u32, + +/// List of relocations belonging to this atom. +relocs: std.ArrayListUnmanaged(Relocation) = .{}, + +/// List of offsets contained within this atom that need rebasing by the dynamic +/// loader in presence of ASLR. +rebases: std.ArrayListUnmanaged(u64) = .{}, + +/// List of offsets contained within this atom that will be dynamically bound +/// by the dynamic loader and contain pointers to resolved (at load time) extern +/// symbols (aka proxies aka imports) +bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// List of lazy bindings +lazy_bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, + +/// List of data-in-code entries. This is currently specific to x86_64 only. +dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, + +/// Stab entry for this atom. This is currently specific to a binary created +/// by linking object files in a traditional sense - in incremental sense, we +/// bypass stabs altogether to produce dSYM bundle directly with fully relocated +/// DWARF sections. +stab: ?Stab = null, + +/// Points to the previous and next neighbours +next: ?*Atom, +prev: ?*Atom, + +/// Previous/next linked list pointers. +/// This is the linked list node for this Decl's corresponding .debug_info tag. +dbg_info_prev: ?*Atom, +dbg_info_next: ?*Atom, +/// Offset into .debug_info pointing to the tag for this Decl. +dbg_info_off: u32, +/// Size of the .debug_info tag for this Decl, not including padding. +dbg_info_len: u32, + +dirty: bool = true, + +pub const SymbolAtOffset = struct { + local_sym_index: u32, + offset: u64, + stab: ?Stab = null, + + pub fn format( + self: SymbolAtOffset, + comptime fmt: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "{{ {d}: .offset = {d}", .{ self.local_sym_index, self.offset }); + if (self.stab) |stab| { + try std.fmt.format(writer, ", .stab = {any}", .{stab}); + } + try std.fmt.format(writer, " }}", .{}); + } +}; + +pub const Stab = union(enum) { + function: u64, + static, + global, + + pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 { + var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator); + defer nlists.deinit(); + + const sym = macho_file.locals.items[local_sym_index]; + switch (stab) { + .function => |size| { + try nlists.ensureUnusedCapacity(4); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_BNSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = sym.n_strx, + .n_type = macho.N_FUN, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_FUN, + .n_sect = 0, + .n_desc = 0, + .n_value = size, + }); + nlists.appendAssumeCapacity(.{ + .n_strx = 0, + .n_type = macho.N_ENSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = size, + }); + }, + .global => { + try nlists.append(.{ + .n_strx = sym.n_strx, + .n_type = macho.N_GSYM, + .n_sect = 0, + .n_desc = 0, + .n_value = 0, + }); + }, + .static => { + try nlists.append(.{ + .n_strx = sym.n_strx, + .n_type = macho.N_STSYM, + .n_sect = sym.n_sect, + .n_desc = 0, + .n_value = sym.n_value, + }); + }, + } + + return nlists.toOwnedSlice(); + } +}; + +pub const Relocation = struct { + /// Offset within the atom's code buffer. + /// Note relocation size can be inferred by relocation's kind. + offset: u32, + + where: enum { + local, + undef, + }, + + where_index: u32, + + payload: union(enum) { + unsigned: Unsigned, + branch: Branch, + page: Page, + page_off: PageOff, + pointer_to_got: PointerToGot, + signed: Signed, + load: Load, + }, + + const ResolveArgs = struct { + block: *Atom, + offset: u32, + source_addr: u64, + target_addr: u64, + macho_file: *MachO, + }; + + pub const Unsigned = struct { + subtractor: ?u32, + + /// Addend embedded directly in the relocation slot + addend: i64, + + /// Extracted from r_length: + /// => 3 implies true + /// => 2 implies false + /// => * is unreachable + is_64bit: bool, + + pub fn resolve(self: Unsigned, args: ResolveArgs) !void { + const result = blk: { + if (self.subtractor) |subtractor| { + const sym = args.macho_file.locals.items[subtractor]; + break :blk @intCast(i64, args.target_addr) - @intCast(i64, sym.n_value) + self.addend; + } else { + break :blk @intCast(i64, args.target_addr) + self.addend; + } + }; + + if (self.is_64bit) { + mem.writeIntLittle(u64, args.block.code.items[args.offset..][0..8], @bitCast(u64, result)); + } else { + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); + } + } + + pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Unsigned {{ ", .{}); + if (self.subtractor) |sub| { + try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + const length: usize = if (self.is_64bit) 8 else 4; + try std.fmt.format(writer, ".length = {}, ", .{length}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const Branch = struct { + arch: Arch, + + pub fn resolve(self: Branch, args: ResolveArgs) !void { + switch (self.arch) { + .aarch64 => { + const displacement = math.cast( + i28, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), + ) catch |err| switch (err) { + error.Overflow => { + log.err("jump too big to encode as i28 displacement value", .{}); + log.err(" (target - source) = displacement => 0x{x} - 0x{x} = 0x{x}", .{ + args.target_addr, + args.source_addr, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), + }); + log.err(" TODO implement branch islands to extend jump distance for arm64", .{}); + return error.TODOImplementBranchIslands; + }, + }; + const code = args.block.code.items[args.offset..][0..4]; + var inst = aarch64.Instruction{ + .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.unconditional_branch_immediate, + ), code), + }; + inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .x86_64 => { + const displacement = try math.cast( + i32, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, + ); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); + }, + else => return error.UnsupportedCpuArchitecture, + } + } + + pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + try std.fmt.format(writer, "Branch {{}}", .{}); + } + }; + + pub const Page = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: u32 = 0, + + pub fn resolve(self: Page, args: ResolveArgs) !void { + const target_addr = args.target_addr + self.addend; + const source_page = @intCast(i32, args.source_addr >> 12); + const target_page = @intCast(i32, target_addr >> 12); + const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); + + const code = args.block.code.items[args.offset..][0..4]; + var inst = aarch64.Instruction{ + .pc_relative_address = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.pc_relative_address, + ), code), + }; + inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); + inst.pc_relative_address.immlo = @truncate(u2, pages); + + mem.writeIntLittle(u32, code, inst.toU32()); + } + + pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Page {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp", .{}); + }, + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PageOff = struct { + kind: enum { + page, + got, + tlvp, + }, + addend: u32 = 0, + op_kind: ?OpKind = null, + + pub const OpKind = enum { + arithmetic, + load, + }; + + pub fn resolve(self: PageOff, args: ResolveArgs) !void { + const code = args.block.code.items[args.offset..][0..4]; + + switch (self.kind) { + .page => { + const target_addr = args.target_addr + self.addend; + const narrowed = @truncate(u12, target_addr); + + const op_kind = self.op_kind orelse unreachable; + var inst: aarch64.Instruction = blk: { + switch (op_kind) { + .arithmetic => { + break :blk .{ + .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code), + }; + }, + .load => { + break :blk .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + }, + } + }; + + if (op_kind == .arithmetic) { + inst.add_subtract_immediate.imm12 = narrowed; + } else { + const offset: u12 = blk: { + if (inst.load_store_register.size == 0) { + if (inst.load_store_register.v == 1) { + // 128-bit SIMD is scaled by 16. + break :blk try math.divExact(u12, narrowed, 16); + } + // Otherwise, 8-bit SIMD or ldrb. + break :blk narrowed; + } else { + const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); + break :blk try math.divExact(u12, narrowed, denom); + } + }; + inst.load_store_register.offset = offset; + } + + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .got => { + const narrowed = @truncate(u12, args.target_addr); + var inst: aarch64.Instruction = .{ + .load_store_register = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code), + }; + const offset = try math.divExact(u12, narrowed, 8); + inst.load_store_register.offset = offset; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + .tlvp => { + const RegInfo = struct { + rd: u5, + rn: u5, + size: u1, + }; + const reg_info: RegInfo = blk: { + if (isArithmeticOp(code)) { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.add_subtract_immediate, + ), code); + break :blk .{ + .rd = inst.rd, + .rn = inst.rn, + .size = inst.sf, + }; + } else { + const inst = mem.bytesToValue(meta.TagPayload( + aarch64.Instruction, + aarch64.Instruction.load_store_register, + ), code); + break :blk .{ + .rd = inst.rt, + .rn = inst.rn, + .size = @truncate(u1, inst.size), + }; + } + }; + const narrowed = @truncate(u12, args.target_addr); + var inst = aarch64.Instruction{ + .add_subtract_immediate = .{ + .rd = reg_info.rd, + .rn = reg_info.rn, + .imm12 = narrowed, + .sh = 0, + .s = 0, + .op = 0, + .sf = reg_info.size, + }, + }; + mem.writeIntLittle(u32, code, inst.toU32()); + }, + } + } + + pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "PageOff {{ ", .{}); + switch (self.kind) { + .page => {}, + .got => { + try std.fmt.format(writer, ".got, ", .{}); + }, + .tlvp => { + try std.fmt.format(writer, ".tlvp, ", .{}); + }, + } + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".op_kind = {s}, ", .{self.op_kind}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const PointerToGot = struct { + pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { + const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, result)); + } + + pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = self; + _ = fmt; + _ = options; + try std.fmt.format(writer, "PointerToGot {{}}", .{}); + } + }; + + pub const Signed = struct { + addend: i64, + correction: u3, + + pub fn resolve(self: Signed, args: ResolveArgs) !void { + const target_addr = @intCast(i64, args.target_addr) + self.addend; + const displacement = try math.cast( + i32, + target_addr - @intCast(i64, args.source_addr + self.correction + 4), + ); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Signed {{ ", .{}); + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub const Load = struct { + kind: enum { + got, + tlvp, + }, + addend: i32 = 0, + + pub fn resolve(self: Load, args: ResolveArgs) !void { + if (self.kind == .tlvp) { + // We need to rewrite the opcode from movq to leaq. + args.block.code.items[args.offset - 2] = 0x8d; + } + const displacement = try math.cast( + i32, + @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + self.addend, + ); + mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); + } + + pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "Load {{ ", .{}); + try std.fmt.format(writer, "{s}, ", .{self.kind}); + try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); + try std.fmt.format(writer, "}}", .{}); + } + }; + + pub fn resolve(self: Relocation, args: ResolveArgs) !void { + switch (self.payload) { + .unsigned => |unsigned| try unsigned.resolve(args), + .branch => |branch| try branch.resolve(args), + .page => |page| try page.resolve(args), + .page_off => |page_off| try page_off.resolve(args), + .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(args), + .signed => |signed| try signed.resolve(args), + .load => |load| try load.resolve(args), + } + } + + pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + try std.fmt.format(writer, "Relocation {{ ", .{}); + try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); + try std.fmt.format(writer, ".where = {}, ", .{self.where}); + try std.fmt.format(writer, ".where_index = {d}, ", .{self.where_index}); + + switch (self.payload) { + .unsigned => |unsigned| try unsigned.format(fmt, options, writer), + .branch => |branch| try branch.format(fmt, options, writer), + .page => |page| try page.format(fmt, options, writer), + .page_off => |page_off| try page_off.format(fmt, options, writer), + .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer), + .signed => |signed| try signed.format(fmt, options, writer), + .load => |load| try load.format(fmt, options, writer), + } + + try std.fmt.format(writer, "}}", .{}); + } +}; + +pub const empty = Atom{ + .local_sym_index = 0, + .size = 0, + .alignment = 0, + .prev = null, + .next = null, + .dbg_info_prev = null, + .dbg_info_next = null, + .dbg_info_off = undefined, + .dbg_info_len = undefined, +}; + +pub fn deinit(self: *Atom, allocator: *Allocator) void { + self.dices.deinit(allocator); + self.lazy_bindings.deinit(allocator); + self.bindings.deinit(allocator); + self.rebases.deinit(allocator); + self.relocs.deinit(allocator); + self.contained.deinit(allocator); + self.aliases.deinit(allocator); + self.code.deinit(allocator); +} + +/// Returns how much room there is to grow in virtual address space. +/// File offset relocation happens transparently, so it is not included in +/// this calculation. +pub fn capacity(self: Atom, macho_file: MachO) u64 { + const self_sym = macho_file.locals.items[self.local_sym_index]; + if (self.next) |next| { + const next_sym = macho_file.locals.items[next.local_sym_index]; + return next_sym.n_value - self_sym.n_value; + } else { + // We are the last atom. + // The capacity is limited only by virtual address space. + return std.math.maxInt(u64) - self_sym.n_value; + } +} + +pub fn freeListEligible(self: Atom, macho_file: MachO) bool { + // No need to keep a free list node for the last atom. + const next = self.next orelse return false; + const self_sym = macho_file.locals.items[self.local_sym_index]; + const next_sym = macho_file.locals.items[next.local_sym_index]; + const cap = next_sym.n_value - self_sym.n_value; + const ideal_cap = MachO.padToIdeal(self.size); + if (cap <= ideal_cap) return false; + const surplus = cap - ideal_cap; + return surplus >= MachO.min_text_capacity; +} + +const RelocContext = struct { + base_addr: u64 = 0, + base_offset: u64 = 0, + allocator: *Allocator, + object: *Object, + macho_file: *MachO, + parsed_atoms: *Object.ParsedAtoms, +}; + +fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Relocation { + var parsed_rel = Relocation{ + .offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_offset), + .where = undefined, + .where_index = undefined, + .payload = undefined, + }; + + if (rel.r_extern == 0) { + const sect_id = @intCast(u16, rel.r_symbolnum - 1); + + const local_sym_index = context.object.sections_as_symbols.get(sect_id) orelse blk: { + const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const sect = seg.sections.items[sect_id]; + const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable; + const local_sym_index = @intCast(u32, context.macho_file.locals.items.len); + const sym_name = try std.fmt.allocPrint(context.allocator, "l_{s}_{s}_{s}", .{ + context.object.name, + commands.segmentName(sect), + commands.sectionName(sect), + }); + defer context.allocator.free(sym_name); + + try context.macho_file.locals.append(context.allocator, .{ + .n_strx = try context.macho_file.makeString(sym_name), + .n_type = macho.N_SECT, + .n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1), + .n_desc = 0, + .n_value = 0, + }); + try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index); + break :blk local_sym_index; + }; + + parsed_rel.where = .local; + parsed_rel.where_index = local_sym_index; + } else { + const sym = context.object.symtab.items[rel.r_symbolnum]; + const sym_name = context.object.getString(sym.n_strx); + + if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { + const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; + parsed_rel.where = .local; + parsed_rel.where_index = where_index; + } else { + const n_strx = context.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{ + .strtab = &context.macho_file.strtab, + }) orelse unreachable; + const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable; + switch (resolv.where) { + .global => { + parsed_rel.where = .local; + parsed_rel.where_index = resolv.local_sym_index; + }, + .undef => { + parsed_rel.where = .undef; + parsed_rel.where_index = resolv.where_index; + }, + } + } + } + + return parsed_rel; +} + +pub fn parseRelocs(self: *Atom, relocs: []macho.relocation_info, context: RelocContext) !void { + const filtered_relocs = filterRelocs(relocs, context.base_offset, context.base_offset + self.size); + var it = RelocIterator{ + .buffer = filtered_relocs, + }; + + var addend: u32 = 0; + var subtractor: ?u32 = null; + const arch = context.macho_file.base.options.target.cpu.arch; + + while (it.next()) |rel| { + if (isAddend(rel, arch)) { + // Addend is not a relocation with effect on the TextBlock, so + // parse it and carry on. + assert(addend == 0); // Oh no, addend was not reset! + addend = rel.r_symbolnum; + + // Verify ADDEND is followed by a PAGE21 or PAGEOFF12. + const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); + switch (next) { + .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, + else => { + log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); + return error.UnexpectedRelocationType; + }, + } + continue; + } + + if (isSubtractor(rel, arch)) { + // Subtractor is not a relocation with effect on the TextBlock, so + // parse it and carry on. + assert(subtractor == null); // Oh no, subtractor was not reset! + assert(rel.r_extern == 1); + const sym = context.object.symtab.items[rel.r_symbolnum]; + const sym_name = context.object.getString(sym.n_strx); + + if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { + const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; + subtractor = where_index; + } else { + const n_strx = context.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{ + .strtab = &context.macho_file.strtab, + }) orelse unreachable; + const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable; + assert(resolv.where == .global); + subtractor = resolv.local_sym_index; + } + + // Verify SUBTRACTOR is followed by UNSIGNED. + switch (arch) { + .aarch64 => { + const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); + if (next != .ARM64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + }, + .x86_64 => { + const next = @intToEnum(macho.reloc_type_x86_64, it.peek().r_type); + if (next != .X86_64_RELOC_UNSIGNED) { + log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); + return error.UnexpectedRelocationType; + } + }, + else => unreachable, + } + continue; + } + + var parsed_rel = try initRelocFromObject(rel, context); + + switch (arch) { + .aarch64 => { + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + switch (rel_type) { + .ARM64_RELOC_ADDEND => unreachable, + .ARM64_RELOC_SUBTRACTOR => unreachable, + .ARM64_RELOC_BRANCH26 => { + self.parseBranch(rel, &parsed_rel, context); + }, + .ARM64_RELOC_UNSIGNED => { + self.parseUnsigned(rel, &parsed_rel, subtractor, context); + subtractor = null; + }, + .ARM64_RELOC_PAGE21, + .ARM64_RELOC_GOT_LOAD_PAGE21, + .ARM64_RELOC_TLVP_LOAD_PAGE21, + => { + self.parsePage(rel, &parsed_rel, addend); + if (rel_type == .ARM64_RELOC_PAGE21) + addend = 0; + }, + .ARM64_RELOC_PAGEOFF12, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, + => { + self.parsePageOff(rel, &parsed_rel, addend); + if (rel_type == .ARM64_RELOC_PAGEOFF12) + addend = 0; + }, + .ARM64_RELOC_POINTER_TO_GOT => { + self.parsePointerToGot(rel, &parsed_rel); + }, + } + }, + .x86_64 => { + switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { + .X86_64_RELOC_SUBTRACTOR => unreachable, + .X86_64_RELOC_BRANCH => { + self.parseBranch(rel, &parsed_rel, context); + }, + .X86_64_RELOC_UNSIGNED => { + self.parseUnsigned(rel, &parsed_rel, subtractor, context); + subtractor = null; + }, + .X86_64_RELOC_SIGNED, + .X86_64_RELOC_SIGNED_1, + .X86_64_RELOC_SIGNED_2, + .X86_64_RELOC_SIGNED_4, + => { + self.parseSigned(rel, &parsed_rel, context); + }, + .X86_64_RELOC_GOT_LOAD, + .X86_64_RELOC_GOT, + .X86_64_RELOC_TLV, + => { + self.parseLoad(rel, &parsed_rel); + }, + } + }, + else => unreachable, + } + + try self.relocs.append(context.allocator, parsed_rel); + + const is_via_got = switch (parsed_rel.payload) { + .pointer_to_got => true, + .load => |load| load.kind == .got, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + else => false, + }; + + if (is_via_got) blk: { + const key = MachO.GotIndirectionKey{ + .where = switch (parsed_rel.where) { + .local => .local, + .undef => .undef, + }, + .where_index = parsed_rel.where_index, + }; + if (context.macho_file.got_entries_map.contains(key)) break :blk; + + const atom = try context.macho_file.createGotAtom(key); + try context.macho_file.got_entries_map.putNoClobber(context.macho_file.base.allocator, key, atom); + const match = MachO.MatchingSection{ + .seg = context.macho_file.data_const_segment_cmd_index.?, + .sect = context.macho_file.got_section_index.?, + }; + + if (context.parsed_atoms.getPtr(match)) |last| { + last.*.next = atom; + atom.prev = last.*; + last.* = atom; + } else { + try context.parsed_atoms.putNoClobber(match, atom); + } + } else if (parsed_rel.payload == .unsigned) { + switch (parsed_rel.where) { + .undef => { + try self.bindings.append(context.allocator, .{ + .local_sym_index = parsed_rel.where_index, + .offset = parsed_rel.offset, + }); + }, + .local => { + const source_sym = context.macho_file.locals.items[self.local_sym_index]; + const match = context.macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; + const seg = context.macho_file.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + const sect_type = commands.sectionType(sect); + + const should_rebase = rebase: { + if (!parsed_rel.payload.unsigned.is_64bit) break :rebase false; + + // TODO actually, a check similar to what dyld is doing, that is, verifying + // that the segment is writable should be enough here. + const is_right_segment = blk: { + if (context.macho_file.data_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; + } + } + if (context.macho_file.data_const_segment_cmd_index) |idx| { + if (match.seg == idx) { + break :blk true; + } + } + break :blk false; + }; + + if (!is_right_segment) break :rebase false; + if (sect_type != macho.S_LITERAL_POINTERS and + sect_type != macho.S_REGULAR and + sect_type != macho.S_MOD_INIT_FUNC_POINTERS and + sect_type != macho.S_MOD_TERM_FUNC_POINTERS) + { + break :rebase false; + } + + break :rebase true; + }; + + if (should_rebase) { + try self.rebases.append(context.allocator, parsed_rel.offset); + } + }, + } + } else if (parsed_rel.payload == .branch) blk: { + if (parsed_rel.where != .undef) break :blk; + if (context.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk; + + const stub_helper_atom = try context.macho_file.createStubHelperAtom(); + const laptr_atom = try context.macho_file.createLazyPointerAtom( + stub_helper_atom.local_sym_index, + parsed_rel.where_index, + ); + const stub_atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index); + try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stub_atom); + // TODO clean this up! + if (context.parsed_atoms.getPtr(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stub_helper_section_index.?, + })) |last| { + last.*.next = stub_helper_atom; + stub_helper_atom.prev = last.*; + last.* = stub_helper_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stub_helper_section_index.?, + }, stub_helper_atom); + } + if (context.parsed_atoms.getPtr(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stubs_section_index.?, + })) |last| { + last.*.next = stub_atom; + stub_atom.prev = last.*; + last.* = stub_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.text_segment_cmd_index.?, + .sect = context.macho_file.stubs_section_index.?, + }, stub_atom); + } + if (context.parsed_atoms.getPtr(.{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + })) |last| { + last.*.next = laptr_atom; + laptr_atom.prev = last.*; + last.* = laptr_atom; + } else { + try context.parsed_atoms.putNoClobber(.{ + .seg = context.macho_file.data_segment_cmd_index.?, + .sect = context.macho_file.la_symbol_ptr_section_index.?, + }, laptr_atom); + } + } + } +} + +fn isAddend(rel: macho.relocation_info, arch: Arch) bool { + if (arch != .aarch64) return false; + return @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_ADDEND; +} + +fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool { + return switch (arch) { + .aarch64 => @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_SUBTRACTOR, + .x86_64 => @intToEnum(macho.reloc_type_x86_64, rel.r_type) == .X86_64_RELOC_SUBTRACTOR, + else => unreachable, + }; +} + +fn parseUnsigned( + self: Atom, + rel: macho.relocation_info, + out: *Relocation, + subtractor: ?u32, + context: RelocContext, +) void { + assert(rel.r_pcrel == 0); + + const is_64bit: bool = switch (rel.r_length) { + 3 => true, + 2 => false, + else => unreachable, + }; + + var addend: i64 = if (is_64bit) + mem.readIntLittle(i64, self.code.items[out.offset..][0..8]) + else + mem.readIntLittle(i32, self.code.items[out.offset..][0..4]); + + if (rel.r_extern == 0) { + const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + addend -= @intCast(i64, target_sect_base_addr); + } + + out.payload = .{ + .unsigned = .{ + .subtractor = subtractor, + .is_64bit = is_64bit, + .addend = addend, + }, + }; +} + +fn parseBranch(self: Atom, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .branch = .{ + .arch = context.macho_file.base.options.target.cpu.arch, + }, + }; +} + +fn parsePage(self: Atom, rel: macho.relocation_info, out: *Relocation, addend: u32) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .page = .{ + .kind = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { + .ARM64_RELOC_PAGE21 => .page, + .ARM64_RELOC_GOT_LOAD_PAGE21 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; +} + +fn parsePageOff(self: Atom, rel: macho.relocation_info, out: *Relocation, addend: u32) void { + assert(rel.r_pcrel == 0); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); + const op_kind: ?Relocation.PageOff.OpKind = blk: { + if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; + const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.code.items[out.offset..][0..4])) + .arithmetic + else + .load; + break :blk op_kind; + }; + + out.payload = .{ + .page_off = .{ + .kind = switch (rel_type) { + .ARM64_RELOC_PAGEOFF12 => .page, + .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got, + .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp, + else => unreachable, + }, + .addend = addend, + .op_kind = op_kind, + }, + }; +} + +fn parsePointerToGot(self: Atom, rel: macho.relocation_info, out: *Relocation) void { + _ = self; + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + out.payload = .{ + .pointer_to_got = .{}, + }; +} + +fn parseSigned(self: Atom, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const correction: u3 = switch (rel_type) { + .X86_64_RELOC_SIGNED => 0, + .X86_64_RELOC_SIGNED_1 => 1, + .X86_64_RELOC_SIGNED_2 => 2, + .X86_64_RELOC_SIGNED_4 => 4, + else => unreachable, + }; + var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction; + + if (rel.r_extern == 0) { + const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; + const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; + addend += @intCast(i64, context.base_addr + out.offset + correction + 4) - @intCast(i64, target_sect_base_addr); + } + + out.payload = .{ + .signed = .{ + .correction = correction, + .addend = addend, + }, + }; +} + +fn parseLoad(self: Atom, rel: macho.relocation_info, out: *Relocation) void { + assert(rel.r_pcrel == 1); + assert(rel.r_length == 2); + + const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); + const addend: i32 = if (rel_type == .X86_64_RELOC_GOT) + mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + else + 0; + + out.payload = .{ + .load = .{ + .kind = switch (rel_type) { + .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got, + .X86_64_RELOC_TLV => .tlvp, + else => unreachable, + }, + .addend = addend, + }, + }; +} + +pub fn resolveRelocs(self: *Atom, macho_file: *MachO) !void { + for (self.relocs.items) |rel| { + log.debug("relocating {}", .{rel}); + + const source_addr = blk: { + const sym = macho_file.locals.items[self.local_sym_index]; + break :blk sym.n_value + rel.offset; + }; + const target_addr = blk: { + const is_via_got = switch (rel.payload) { + .pointer_to_got => true, + .page => |page| page.kind == .got, + .page_off => |page_off| page_off.kind == .got, + .load => |load| load.kind == .got, + else => false, + }; + + if (is_via_got) { + const atom = macho_file.got_entries_map.get(.{ + .where = switch (rel.where) { + .local => .local, + .undef => .undef, + }, + .where_index = rel.where_index, + }) orelse { + const sym = switch (rel.where) { + .local => macho_file.locals.items[rel.where_index], + .undef => macho_file.undefs.items[rel.where_index], + }; + log.err("expected GOT entry for symbol '{s}'", .{macho_file.getString(sym.n_strx)}); + log.err(" this is an internal linker error", .{}); + return error.FailedToResolveRelocationTarget; + }; + break :blk macho_file.locals.items[atom.local_sym_index].n_value; + } + + switch (rel.where) { + .local => { + const sym = macho_file.locals.items[rel.where_index]; + const is_tlv = is_tlv: { + const source_sym = macho_file.locals.items[self.local_sym_index]; + const match = macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; + const seg = macho_file.load_commands.items[match.seg].Segment; + const sect = seg.sections.items[match.sect]; + break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; + }; + if (is_tlv) { + // For TLV relocations, the value specified as a relocation is the displacement from the + // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first + // defined TLV template init section in the following order: + // * wrt to __thread_data if defined, then + // * wrt to __thread_bss + const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].Segment; + const base_address = inner: { + if (macho_file.tlv_data_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else if (macho_file.tlv_bss_section_index) |i| { + break :inner seg.sections.items[i].addr; + } else { + log.err("threadlocal variables present but no initializer sections found", .{}); + log.err(" __thread_data not found", .{}); + log.err(" __thread_bss not found", .{}); + return error.FailedToResolveRelocationTarget; + } + }; + break :blk sym.n_value - base_address; + } + + break :blk sym.n_value; + }, + .undef => { + const atom = macho_file.stubs_map.get(rel.where_index) orelse { + // TODO this is required for incremental when we don't have every symbol + // resolved when creating relocations. In this case, we will insert a branch + // reloc to an undef symbol which may happen to be defined within the binary. + // Then, the undef we point at will be a null symbol (free symbol) which we + // should remove/repurpose. To circumvent this (for now), we check if the symbol + // we point to is garbage, and if so we fall back to symbol resolver to find by name. + const n_strx = macho_file.undefs.items[rel.where_index].n_strx; + if (macho_file.symbol_resolver.get(n_strx)) |resolv| inner: { + if (resolv.where != .global) break :inner; + break :blk macho_file.globals.items[resolv.where_index].n_value; + } + + // TODO verify in TextBlock that the symbol is indeed dynamically bound. + break :blk 0; // Dynamically bound by dyld. + }; + + break :blk macho_file.locals.items[atom.local_sym_index].n_value; + }, + } + }; + + log.debug(" | source_addr = 0x{x}", .{source_addr}); + log.debug(" | target_addr = 0x{x}", .{target_addr}); + + try rel.resolve(.{ + .block = self, + .offset = rel.offset, + .source_addr = source_addr, + .target_addr = target_addr, + .macho_file = macho_file, + }); + } +} + +pub fn format(self: Atom, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + try std.fmt.format(writer, "TextBlock {{ ", .{}); + try std.fmt.format(writer, ".local_sym_index = {d}, ", .{self.local_sym_index}); + try std.fmt.format(writer, ".aliases = {any}, ", .{self.aliases.items}); + try std.fmt.format(writer, ".contained = {any}, ", .{self.contained.items}); + try std.fmt.format(writer, ".code = {*}, ", .{self.code.items}); + try std.fmt.format(writer, ".size = {d}, ", .{self.size}); + try std.fmt.format(writer, ".alignment = {d}, ", .{self.alignment}); + try std.fmt.format(writer, ".relocs = {any}, ", .{self.relocs.items}); + try std.fmt.format(writer, ".rebases = {any}, ", .{self.rebases.items}); + try std.fmt.format(writer, ".bindings = {any}, ", .{self.bindings.items}); + try std.fmt.format(writer, ".dices = {any}, ", .{self.dices.items}); + if (self.stab) |stab| { + try std.fmt.format(writer, ".stab = {any}, ", .{stab}); + } + try std.fmt.format(writer, "}}", .{}); +} + +const RelocIterator = struct { + buffer: []const macho.relocation_info, + index: i32 = -1, + + pub fn next(self: *RelocIterator) ?macho.relocation_info { + self.index += 1; + if (self.index < self.buffer.len) { + return self.buffer[@intCast(u32, self.index)]; + } + return null; + } + + pub fn peek(self: RelocIterator) macho.relocation_info { + assert(self.index + 1 < self.buffer.len); + return self.buffer[@intCast(u32, self.index + 1)]; + } +}; + +fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info { + const Predicate = struct { + addr: u64, + + pub fn predicate(self: @This(), rel: macho.relocation_info) bool { + return rel.r_address < self.addr; + } + }; + + const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); + const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); + + return relocs[start..end]; +} + +inline fn isArithmeticOp(inst: *const [4]u8) bool { + const group_decode = @truncate(u5, inst[3]); + return ((group_decode >> 2) == 4); +} diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 3bfd6c9f1a..cacf2721a7 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -16,9 +16,11 @@ const segmentName = commands.segmentName; const sectionName = commands.sectionName; const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); const LoadCommand = commands.LoadCommand; const MachO = @import("../MachO.zig"); -const TextBlock = @import("TextBlock.zig"); + +const TextBlock = Atom; file: fs.File, name: []const u8, diff --git a/src/link/MachO/TextBlock.zig b/src/link/MachO/TextBlock.zig deleted file mode 100644 index 57e93543b1..0000000000 --- a/src/link/MachO/TextBlock.zig +++ /dev/null @@ -1,1301 +0,0 @@ -const TextBlock = @This(); - -const std = @import("std"); -const build_options = @import("build_options"); -const aarch64 = @import("../../codegen/aarch64.zig"); -const assert = std.debug.assert; -const commands = @import("commands.zig"); -const log = std.log.scoped(.text_block); -const macho = std.macho; -const math = std.math; -const mem = std.mem; -const meta = std.meta; - -const Allocator = mem.Allocator; -const Arch = std.Target.Cpu.Arch; -const MachO = @import("../MachO.zig"); -const Object = @import("Object.zig"); - -/// Each decl always gets a local symbol with the fully qualified name. -/// The vaddr and size are found here directly. -/// The file offset is found by computing the vaddr offset from the section vaddr -/// the symbol references, and adding that to the file offset of the section. -/// If this field is 0, it means the codegen size = 0 and there is no symbol or -/// offset table entry. -local_sym_index: u32, - -/// List of symbol aliases pointing to the same block via different nlists -aliases: std.ArrayListUnmanaged(u32) = .{}, - -/// List of symbols contained within this block -contained: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, - -/// Code (may be non-relocated) this block represents -code: std.ArrayListUnmanaged(u8) = .{}, - -/// Size and alignment of this text block -/// Unlike in Elf, we need to store the size of this symbol as part of -/// the TextBlock since macho.nlist_64 lacks this information. -size: u64, -alignment: u32, - -relocs: std.ArrayListUnmanaged(Relocation) = .{}, - -/// List of offsets contained within this block that need rebasing by the dynamic -/// loader in presence of ASLR -rebases: std.ArrayListUnmanaged(u64) = .{}, - -/// List of offsets contained within this block that will be dynamically bound -/// by the dynamic loader and contain pointers to resolved (at load time) extern -/// symbols (aka proxies aka imports) -bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, - -/// List of lazy bindings -lazy_bindings: std.ArrayListUnmanaged(SymbolAtOffset) = .{}, - -/// List of data-in-code entries. This is currently specific to x86_64 only. -dices: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, - -/// Stab entry for this block. This is currently specific to a binary created -/// by linking object files in a traditional sense - in incremental sense, we -/// bypass stabs altogether to produce dSYM bundle directly with fully relocated -/// DWARF sections. -stab: ?Stab = null, - -/// Points to the previous and next neighbours -next: ?*TextBlock, -prev: ?*TextBlock, - -/// Previous/next linked list pointers. -/// This is the linked list node for this Decl's corresponding .debug_info tag. -dbg_info_prev: ?*TextBlock, -dbg_info_next: ?*TextBlock, -/// Offset into .debug_info pointing to the tag for this Decl. -dbg_info_off: u32, -/// Size of the .debug_info tag for this Decl, not including padding. -dbg_info_len: u32, - -dirty: bool = true, - -pub const SymbolAtOffset = struct { - local_sym_index: u32, - offset: u64, - stab: ?Stab = null, - - pub fn format( - self: SymbolAtOffset, - comptime fmt: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, - ) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "{{ {d}: .offset = {d}", .{ self.local_sym_index, self.offset }); - if (self.stab) |stab| { - try std.fmt.format(writer, ", .stab = {any}", .{stab}); - } - try std.fmt.format(writer, " }}", .{}); - } -}; - -pub const Stab = union(enum) { - function: u64, - static, - global, - - pub fn asNlists(stab: Stab, local_sym_index: u32, macho_file: anytype) ![]macho.nlist_64 { - var nlists = std.ArrayList(macho.nlist_64).init(macho_file.base.allocator); - defer nlists.deinit(); - - const sym = macho_file.locals.items[local_sym_index]; - switch (stab) { - .function => |size| { - try nlists.ensureUnusedCapacity(4); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_BNSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = sym.n_strx, - .n_type = macho.N_FUN, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_FUN, - .n_sect = 0, - .n_desc = 0, - .n_value = size, - }); - nlists.appendAssumeCapacity(.{ - .n_strx = 0, - .n_type = macho.N_ENSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = size, - }); - }, - .global => { - try nlists.append(.{ - .n_strx = sym.n_strx, - .n_type = macho.N_GSYM, - .n_sect = 0, - .n_desc = 0, - .n_value = 0, - }); - }, - .static => { - try nlists.append(.{ - .n_strx = sym.n_strx, - .n_type = macho.N_STSYM, - .n_sect = sym.n_sect, - .n_desc = 0, - .n_value = sym.n_value, - }); - }, - } - - return nlists.toOwnedSlice(); - } -}; - -pub const Relocation = struct { - /// Offset within the `block`s code buffer. - /// Note relocation size can be inferred by relocation's kind. - offset: u32, - - where: enum { - local, - undef, - }, - - where_index: u32, - - payload: union(enum) { - unsigned: Unsigned, - branch: Branch, - page: Page, - page_off: PageOff, - pointer_to_got: PointerToGot, - signed: Signed, - load: Load, - }, - - const ResolveArgs = struct { - block: *TextBlock, - offset: u32, - source_addr: u64, - target_addr: u64, - macho_file: *MachO, - }; - - pub const Unsigned = struct { - subtractor: ?u32, - - /// Addend embedded directly in the relocation slot - addend: i64, - - /// Extracted from r_length: - /// => 3 implies true - /// => 2 implies false - /// => * is unreachable - is_64bit: bool, - - pub fn resolve(self: Unsigned, args: ResolveArgs) !void { - const result = blk: { - if (self.subtractor) |subtractor| { - const sym = args.macho_file.locals.items[subtractor]; - break :blk @intCast(i64, args.target_addr) - @intCast(i64, sym.n_value) + self.addend; - } else { - break :blk @intCast(i64, args.target_addr) + self.addend; - } - }; - - if (self.is_64bit) { - mem.writeIntLittle(u64, args.block.code.items[args.offset..][0..8], @bitCast(u64, result)); - } else { - mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @truncate(u32, @bitCast(u64, result))); - } - } - - pub fn format(self: Unsigned, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Unsigned {{ ", .{}); - if (self.subtractor) |sub| { - try std.fmt.format(writer, ".subtractor = {}, ", .{sub}); - } - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - const length: usize = if (self.is_64bit) 8 else 4; - try std.fmt.format(writer, ".length = {}, ", .{length}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const Branch = struct { - arch: Arch, - - pub fn resolve(self: Branch, args: ResolveArgs) !void { - switch (self.arch) { - .aarch64 => { - const displacement = math.cast( - i28, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), - ) catch |err| switch (err) { - error.Overflow => { - log.err("jump too big to encode as i28 displacement value", .{}); - log.err(" (target - source) = displacement => 0x{x} - 0x{x} = 0x{x}", .{ - args.target_addr, - args.source_addr, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr), - }); - log.err(" TODO implement branch islands to extend jump distance for arm64", .{}); - return error.TODOImplementBranchIslands; - }, - }; - const code = args.block.code.items[args.offset..][0..4]; - var inst = aarch64.Instruction{ - .unconditional_branch_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.unconditional_branch_immediate, - ), code), - }; - inst.unconditional_branch_immediate.imm26 = @truncate(u26, @bitCast(u28, displacement >> 2)); - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .x86_64 => { - const displacement = try math.cast( - i32, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4, - ); - mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); - }, - else => return error.UnsupportedCpuArchitecture, - } - } - - pub fn format(self: Branch, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - try std.fmt.format(writer, "Branch {{}}", .{}); - } - }; - - pub const Page = struct { - kind: enum { - page, - got, - tlvp, - }, - addend: u32 = 0, - - pub fn resolve(self: Page, args: ResolveArgs) !void { - const target_addr = args.target_addr + self.addend; - const source_page = @intCast(i32, args.source_addr >> 12); - const target_page = @intCast(i32, target_addr >> 12); - const pages = @bitCast(u21, @intCast(i21, target_page - source_page)); - - const code = args.block.code.items[args.offset..][0..4]; - var inst = aarch64.Instruction{ - .pc_relative_address = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.pc_relative_address, - ), code), - }; - inst.pc_relative_address.immhi = @truncate(u19, pages >> 2); - inst.pc_relative_address.immlo = @truncate(u2, pages); - - mem.writeIntLittle(u32, code, inst.toU32()); - } - - pub fn format(self: Page, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Page {{ ", .{}); - switch (self.kind) { - .page => {}, - .got => { - try std.fmt.format(writer, ".got, ", .{}); - }, - .tlvp => { - try std.fmt.format(writer, ".tlvp", .{}); - }, - } - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const PageOff = struct { - kind: enum { - page, - got, - tlvp, - }, - addend: u32 = 0, - op_kind: ?OpKind = null, - - pub const OpKind = enum { - arithmetic, - load, - }; - - pub fn resolve(self: PageOff, args: ResolveArgs) !void { - const code = args.block.code.items[args.offset..][0..4]; - - switch (self.kind) { - .page => { - const target_addr = args.target_addr + self.addend; - const narrowed = @truncate(u12, target_addr); - - const op_kind = self.op_kind orelse unreachable; - var inst: aarch64.Instruction = blk: { - switch (op_kind) { - .arithmetic => { - break :blk .{ - .add_subtract_immediate = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code), - }; - }, - .load => { - break :blk .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - }, - } - }; - - if (op_kind == .arithmetic) { - inst.add_subtract_immediate.imm12 = narrowed; - } else { - const offset: u12 = blk: { - if (inst.load_store_register.size == 0) { - if (inst.load_store_register.v == 1) { - // 128-bit SIMD is scaled by 16. - break :blk try math.divExact(u12, narrowed, 16); - } - // Otherwise, 8-bit SIMD or ldrb. - break :blk narrowed; - } else { - const denom: u4 = try math.powi(u4, 2, inst.load_store_register.size); - break :blk try math.divExact(u12, narrowed, denom); - } - }; - inst.load_store_register.offset = offset; - } - - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .got => { - const narrowed = @truncate(u12, args.target_addr); - var inst: aarch64.Instruction = .{ - .load_store_register = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code), - }; - const offset = try math.divExact(u12, narrowed, 8); - inst.load_store_register.offset = offset; - mem.writeIntLittle(u32, code, inst.toU32()); - }, - .tlvp => { - const RegInfo = struct { - rd: u5, - rn: u5, - size: u1, - }; - const reg_info: RegInfo = blk: { - if (isArithmeticOp(code)) { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.add_subtract_immediate, - ), code); - break :blk .{ - .rd = inst.rd, - .rn = inst.rn, - .size = inst.sf, - }; - } else { - const inst = mem.bytesToValue(meta.TagPayload( - aarch64.Instruction, - aarch64.Instruction.load_store_register, - ), code); - break :blk .{ - .rd = inst.rt, - .rn = inst.rn, - .size = @truncate(u1, inst.size), - }; - } - }; - const narrowed = @truncate(u12, args.target_addr); - var inst = aarch64.Instruction{ - .add_subtract_immediate = .{ - .rd = reg_info.rd, - .rn = reg_info.rn, - .imm12 = narrowed, - .sh = 0, - .s = 0, - .op = 0, - .sf = reg_info.size, - }, - }; - mem.writeIntLittle(u32, code, inst.toU32()); - }, - } - } - - pub fn format(self: PageOff, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "PageOff {{ ", .{}); - switch (self.kind) { - .page => {}, - .got => { - try std.fmt.format(writer, ".got, ", .{}); - }, - .tlvp => { - try std.fmt.format(writer, ".tlvp, ", .{}); - }, - } - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - try std.fmt.format(writer, ".op_kind = {s}, ", .{self.op_kind}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const PointerToGot = struct { - pub fn resolve(_: PointerToGot, args: ResolveArgs) !void { - const result = try math.cast(i32, @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr)); - mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, result)); - } - - pub fn format(self: PointerToGot, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = self; - _ = fmt; - _ = options; - try std.fmt.format(writer, "PointerToGot {{}}", .{}); - } - }; - - pub const Signed = struct { - addend: i64, - correction: u3, - - pub fn resolve(self: Signed, args: ResolveArgs) !void { - const target_addr = @intCast(i64, args.target_addr) + self.addend; - const displacement = try math.cast( - i32, - target_addr - @intCast(i64, args.source_addr + self.correction + 4), - ); - mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); - } - - pub fn format(self: Signed, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Signed {{ ", .{}); - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - try std.fmt.format(writer, ".correction = {}, ", .{self.correction}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub const Load = struct { - kind: enum { - got, - tlvp, - }, - addend: i32 = 0, - - pub fn resolve(self: Load, args: ResolveArgs) !void { - if (self.kind == .tlvp) { - // We need to rewrite the opcode from movq to leaq. - args.block.code.items[args.offset - 2] = 0x8d; - } - const displacement = try math.cast( - i32, - @intCast(i64, args.target_addr) - @intCast(i64, args.source_addr) - 4 + self.addend, - ); - mem.writeIntLittle(u32, args.block.code.items[args.offset..][0..4], @bitCast(u32, displacement)); - } - - pub fn format(self: Load, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "Load {{ ", .{}); - try std.fmt.format(writer, "{s}, ", .{self.kind}); - try std.fmt.format(writer, ".addend = {}, ", .{self.addend}); - try std.fmt.format(writer, "}}", .{}); - } - }; - - pub fn resolve(self: Relocation, args: ResolveArgs) !void { - switch (self.payload) { - .unsigned => |unsigned| try unsigned.resolve(args), - .branch => |branch| try branch.resolve(args), - .page => |page| try page.resolve(args), - .page_off => |page_off| try page_off.resolve(args), - .pointer_to_got => |pointer_to_got| try pointer_to_got.resolve(args), - .signed => |signed| try signed.resolve(args), - .load => |load| try load.resolve(args), - } - } - - pub fn format(self: Relocation, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - try std.fmt.format(writer, "Relocation {{ ", .{}); - try std.fmt.format(writer, ".offset = {}, ", .{self.offset}); - try std.fmt.format(writer, ".where = {}, ", .{self.where}); - try std.fmt.format(writer, ".where_index = {d}, ", .{self.where_index}); - - switch (self.payload) { - .unsigned => |unsigned| try unsigned.format(fmt, options, writer), - .branch => |branch| try branch.format(fmt, options, writer), - .page => |page| try page.format(fmt, options, writer), - .page_off => |page_off| try page_off.format(fmt, options, writer), - .pointer_to_got => |pointer_to_got| try pointer_to_got.format(fmt, options, writer), - .signed => |signed| try signed.format(fmt, options, writer), - .load => |load| try load.format(fmt, options, writer), - } - - try std.fmt.format(writer, "}}", .{}); - } -}; - -pub const empty = TextBlock{ - .local_sym_index = 0, - .size = 0, - .alignment = 0, - .prev = null, - .next = null, - .dbg_info_prev = null, - .dbg_info_next = null, - .dbg_info_off = undefined, - .dbg_info_len = undefined, -}; - -pub fn deinit(self: *TextBlock, allocator: *Allocator) void { - self.dices.deinit(allocator); - self.lazy_bindings.deinit(allocator); - self.bindings.deinit(allocator); - self.rebases.deinit(allocator); - self.relocs.deinit(allocator); - self.contained.deinit(allocator); - self.aliases.deinit(allocator); - self.code.deinit(allocator); -} - -/// Returns how much room there is to grow in virtual address space. -/// File offset relocation happens transparently, so it is not included in -/// this calculation. -pub fn capacity(self: TextBlock, macho_file: MachO) u64 { - const self_sym = macho_file.locals.items[self.local_sym_index]; - if (self.next) |next| { - const next_sym = macho_file.locals.items[next.local_sym_index]; - return next_sym.n_value - self_sym.n_value; - } else { - // We are the last block. - // The capacity is limited only by virtual address space. - return std.math.maxInt(u64) - self_sym.n_value; - } -} - -pub fn freeListEligible(self: TextBlock, macho_file: MachO) bool { - // No need to keep a free list node for the last block. - const next = self.next orelse return false; - const self_sym = macho_file.locals.items[self.local_sym_index]; - const next_sym = macho_file.locals.items[next.local_sym_index]; - const cap = next_sym.n_value - self_sym.n_value; - const ideal_cap = MachO.padToIdeal(self.size); - if (cap <= ideal_cap) return false; - const surplus = cap - ideal_cap; - return surplus >= MachO.min_text_capacity; -} - -const RelocContext = struct { - base_addr: u64 = 0, - base_offset: u64 = 0, - allocator: *Allocator, - object: *Object, - macho_file: *MachO, - parsed_atoms: *Object.ParsedAtoms, -}; - -fn initRelocFromObject(rel: macho.relocation_info, context: RelocContext) !Relocation { - var parsed_rel = Relocation{ - .offset = @intCast(u32, @intCast(u64, rel.r_address) - context.base_offset), - .where = undefined, - .where_index = undefined, - .payload = undefined, - }; - - if (rel.r_extern == 0) { - const sect_id = @intCast(u16, rel.r_symbolnum - 1); - - const local_sym_index = context.object.sections_as_symbols.get(sect_id) orelse blk: { - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; - const sect = seg.sections.items[sect_id]; - const match = (try context.macho_file.getMatchingSection(sect)) orelse unreachable; - const local_sym_index = @intCast(u32, context.macho_file.locals.items.len); - const sym_name = try std.fmt.allocPrint(context.allocator, "l_{s}_{s}_{s}", .{ - context.object.name, - commands.segmentName(sect), - commands.sectionName(sect), - }); - defer context.allocator.free(sym_name); - - try context.macho_file.locals.append(context.allocator, .{ - .n_strx = try context.macho_file.makeString(sym_name), - .n_type = macho.N_SECT, - .n_sect = @intCast(u8, context.macho_file.section_ordinals.getIndex(match).? + 1), - .n_desc = 0, - .n_value = 0, - }); - try context.object.sections_as_symbols.putNoClobber(context.allocator, sect_id, local_sym_index); - break :blk local_sym_index; - }; - - parsed_rel.where = .local; - parsed_rel.where_index = local_sym_index; - } else { - const sym = context.object.symtab.items[rel.r_symbolnum]; - const sym_name = context.object.getString(sym.n_strx); - - if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { - const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; - parsed_rel.where = .local; - parsed_rel.where_index = where_index; - } else { - const n_strx = context.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{ - .strtab = &context.macho_file.strtab, - }) orelse unreachable; - const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable; - switch (resolv.where) { - .global => { - parsed_rel.where = .local; - parsed_rel.where_index = resolv.local_sym_index; - }, - .undef => { - parsed_rel.where = .undef; - parsed_rel.where_index = resolv.where_index; - }, - } - } - } - - return parsed_rel; -} - -pub fn parseRelocs(self: *TextBlock, relocs: []macho.relocation_info, context: RelocContext) !void { - const filtered_relocs = filterRelocs(relocs, context.base_offset, context.base_offset + self.size); - var it = RelocIterator{ - .buffer = filtered_relocs, - }; - - var addend: u32 = 0; - var subtractor: ?u32 = null; - const arch = context.macho_file.base.options.target.cpu.arch; - - while (it.next()) |rel| { - if (isAddend(rel, arch)) { - // Addend is not a relocation with effect on the TextBlock, so - // parse it and carry on. - assert(addend == 0); // Oh no, addend was not reset! - addend = rel.r_symbolnum; - - // Verify ADDEND is followed by a PAGE21 or PAGEOFF12. - const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); - switch (next) { - .ARM64_RELOC_PAGE21, .ARM64_RELOC_PAGEOFF12 => {}, - else => { - log.err("unexpected relocation type: expected PAGE21 or PAGEOFF12, found {s}", .{next}); - return error.UnexpectedRelocationType; - }, - } - continue; - } - - if (isSubtractor(rel, arch)) { - // Subtractor is not a relocation with effect on the TextBlock, so - // parse it and carry on. - assert(subtractor == null); // Oh no, subtractor was not reset! - assert(rel.r_extern == 1); - const sym = context.object.symtab.items[rel.r_symbolnum]; - const sym_name = context.object.getString(sym.n_strx); - - if (MachO.symbolIsSect(sym) and !MachO.symbolIsExt(sym)) { - const where_index = context.object.symbol_mapping.get(rel.r_symbolnum) orelse unreachable; - subtractor = where_index; - } else { - const n_strx = context.macho_file.strtab_dir.getAdapted(@as([]const u8, sym_name), MachO.StringSliceAdapter{ - .strtab = &context.macho_file.strtab, - }) orelse unreachable; - const resolv = context.macho_file.symbol_resolver.get(n_strx) orelse unreachable; - assert(resolv.where == .global); - subtractor = resolv.local_sym_index; - } - - // Verify SUBTRACTOR is followed by UNSIGNED. - switch (arch) { - .aarch64 => { - const next = @intToEnum(macho.reloc_type_arm64, it.peek().r_type); - if (next != .ARM64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - }, - .x86_64 => { - const next = @intToEnum(macho.reloc_type_x86_64, it.peek().r_type); - if (next != .X86_64_RELOC_UNSIGNED) { - log.err("unexpected relocation type: expected UNSIGNED, found {s}", .{next}); - return error.UnexpectedRelocationType; - } - }, - else => unreachable, - } - continue; - } - - var parsed_rel = try initRelocFromObject(rel, context); - - switch (arch) { - .aarch64 => { - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - switch (rel_type) { - .ARM64_RELOC_ADDEND => unreachable, - .ARM64_RELOC_SUBTRACTOR => unreachable, - .ARM64_RELOC_BRANCH26 => { - self.parseBranch(rel, &parsed_rel, context); - }, - .ARM64_RELOC_UNSIGNED => { - self.parseUnsigned(rel, &parsed_rel, subtractor, context); - subtractor = null; - }, - .ARM64_RELOC_PAGE21, - .ARM64_RELOC_GOT_LOAD_PAGE21, - .ARM64_RELOC_TLVP_LOAD_PAGE21, - => { - self.parsePage(rel, &parsed_rel, addend); - if (rel_type == .ARM64_RELOC_PAGE21) - addend = 0; - }, - .ARM64_RELOC_PAGEOFF12, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12, - => { - self.parsePageOff(rel, &parsed_rel, addend); - if (rel_type == .ARM64_RELOC_PAGEOFF12) - addend = 0; - }, - .ARM64_RELOC_POINTER_TO_GOT => { - self.parsePointerToGot(rel, &parsed_rel); - }, - } - }, - .x86_64 => { - switch (@intToEnum(macho.reloc_type_x86_64, rel.r_type)) { - .X86_64_RELOC_SUBTRACTOR => unreachable, - .X86_64_RELOC_BRANCH => { - self.parseBranch(rel, &parsed_rel, context); - }, - .X86_64_RELOC_UNSIGNED => { - self.parseUnsigned(rel, &parsed_rel, subtractor, context); - subtractor = null; - }, - .X86_64_RELOC_SIGNED, - .X86_64_RELOC_SIGNED_1, - .X86_64_RELOC_SIGNED_2, - .X86_64_RELOC_SIGNED_4, - => { - self.parseSigned(rel, &parsed_rel, context); - }, - .X86_64_RELOC_GOT_LOAD, - .X86_64_RELOC_GOT, - .X86_64_RELOC_TLV, - => { - self.parseLoad(rel, &parsed_rel); - }, - } - }, - else => unreachable, - } - - try self.relocs.append(context.allocator, parsed_rel); - - const is_via_got = switch (parsed_rel.payload) { - .pointer_to_got => true, - .load => |load| load.kind == .got, - .page => |page| page.kind == .got, - .page_off => |page_off| page_off.kind == .got, - else => false, - }; - - if (is_via_got) blk: { - const key = MachO.GotIndirectionKey{ - .where = switch (parsed_rel.where) { - .local => .local, - .undef => .undef, - }, - .where_index = parsed_rel.where_index, - }; - if (context.macho_file.got_entries_map.contains(key)) break :blk; - - const atom = try context.macho_file.createGotAtom(key); - try context.macho_file.got_entries_map.putNoClobber(context.macho_file.base.allocator, key, atom); - const match = MachO.MatchingSection{ - .seg = context.macho_file.data_const_segment_cmd_index.?, - .sect = context.macho_file.got_section_index.?, - }; - - if (context.parsed_atoms.getPtr(match)) |last| { - last.*.next = atom; - atom.prev = last.*; - last.* = atom; - } else { - try context.parsed_atoms.putNoClobber(match, atom); - } - } else if (parsed_rel.payload == .unsigned) { - switch (parsed_rel.where) { - .undef => { - try self.bindings.append(context.allocator, .{ - .local_sym_index = parsed_rel.where_index, - .offset = parsed_rel.offset, - }); - }, - .local => { - const source_sym = context.macho_file.locals.items[self.local_sym_index]; - const match = context.macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; - const seg = context.macho_file.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - const sect_type = commands.sectionType(sect); - - const should_rebase = rebase: { - if (!parsed_rel.payload.unsigned.is_64bit) break :rebase false; - - // TODO actually, a check similar to what dyld is doing, that is, verifying - // that the segment is writable should be enough here. - const is_right_segment = blk: { - if (context.macho_file.data_segment_cmd_index) |idx| { - if (match.seg == idx) { - break :blk true; - } - } - if (context.macho_file.data_const_segment_cmd_index) |idx| { - if (match.seg == idx) { - break :blk true; - } - } - break :blk false; - }; - - if (!is_right_segment) break :rebase false; - if (sect_type != macho.S_LITERAL_POINTERS and - sect_type != macho.S_REGULAR and - sect_type != macho.S_MOD_INIT_FUNC_POINTERS and - sect_type != macho.S_MOD_TERM_FUNC_POINTERS) - { - break :rebase false; - } - - break :rebase true; - }; - - if (should_rebase) { - try self.rebases.append(context.allocator, parsed_rel.offset); - } - }, - } - } else if (parsed_rel.payload == .branch) blk: { - if (parsed_rel.where != .undef) break :blk; - if (context.macho_file.stubs_map.contains(parsed_rel.where_index)) break :blk; - - const stub_helper_atom = try context.macho_file.createStubHelperAtom(); - const laptr_atom = try context.macho_file.createLazyPointerAtom( - stub_helper_atom.local_sym_index, - parsed_rel.where_index, - ); - const stub_atom = try context.macho_file.createStubAtom(laptr_atom.local_sym_index); - try context.macho_file.stubs_map.putNoClobber(context.allocator, parsed_rel.where_index, stub_atom); - // TODO clean this up! - if (context.parsed_atoms.getPtr(.{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stub_helper_section_index.?, - })) |last| { - last.*.next = stub_helper_atom; - stub_helper_atom.prev = last.*; - last.* = stub_helper_atom; - } else { - try context.parsed_atoms.putNoClobber(.{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stub_helper_section_index.?, - }, stub_helper_atom); - } - if (context.parsed_atoms.getPtr(.{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stubs_section_index.?, - })) |last| { - last.*.next = stub_atom; - stub_atom.prev = last.*; - last.* = stub_atom; - } else { - try context.parsed_atoms.putNoClobber(.{ - .seg = context.macho_file.text_segment_cmd_index.?, - .sect = context.macho_file.stubs_section_index.?, - }, stub_atom); - } - if (context.parsed_atoms.getPtr(.{ - .seg = context.macho_file.data_segment_cmd_index.?, - .sect = context.macho_file.la_symbol_ptr_section_index.?, - })) |last| { - last.*.next = laptr_atom; - laptr_atom.prev = last.*; - last.* = laptr_atom; - } else { - try context.parsed_atoms.putNoClobber(.{ - .seg = context.macho_file.data_segment_cmd_index.?, - .sect = context.macho_file.la_symbol_ptr_section_index.?, - }, laptr_atom); - } - } - } -} - -fn isAddend(rel: macho.relocation_info, arch: Arch) bool { - if (arch != .aarch64) return false; - return @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_ADDEND; -} - -fn isSubtractor(rel: macho.relocation_info, arch: Arch) bool { - return switch (arch) { - .aarch64 => @intToEnum(macho.reloc_type_arm64, rel.r_type) == .ARM64_RELOC_SUBTRACTOR, - .x86_64 => @intToEnum(macho.reloc_type_x86_64, rel.r_type) == .X86_64_RELOC_SUBTRACTOR, - else => unreachable, - }; -} - -fn parseUnsigned( - self: TextBlock, - rel: macho.relocation_info, - out: *Relocation, - subtractor: ?u32, - context: RelocContext, -) void { - assert(rel.r_pcrel == 0); - - const is_64bit: bool = switch (rel.r_length) { - 3 => true, - 2 => false, - else => unreachable, - }; - - var addend: i64 = if (is_64bit) - mem.readIntLittle(i64, self.code.items[out.offset..][0..8]) - else - mem.readIntLittle(i32, self.code.items[out.offset..][0..4]); - - if (rel.r_extern == 0) { - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; - const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; - addend -= @intCast(i64, target_sect_base_addr); - } - - out.payload = .{ - .unsigned = .{ - .subtractor = subtractor, - .is_64bit = is_64bit, - .addend = addend, - }, - }; -} - -fn parseBranch(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { - _ = self; - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - out.payload = .{ - .branch = .{ - .arch = context.macho_file.base.options.target.cpu.arch, - }, - }; -} - -fn parsePage(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { - _ = self; - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - out.payload = .{ - .page = .{ - .kind = switch (@intToEnum(macho.reloc_type_arm64, rel.r_type)) { - .ARM64_RELOC_PAGE21 => .page, - .ARM64_RELOC_GOT_LOAD_PAGE21 => .got, - .ARM64_RELOC_TLVP_LOAD_PAGE21 => .tlvp, - else => unreachable, - }, - .addend = addend, - }, - }; -} - -fn parsePageOff(self: TextBlock, rel: macho.relocation_info, out: *Relocation, addend: u32) void { - assert(rel.r_pcrel == 0); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - const op_kind: ?Relocation.PageOff.OpKind = blk: { - if (rel_type != .ARM64_RELOC_PAGEOFF12) break :blk null; - const op_kind: Relocation.PageOff.OpKind = if (isArithmeticOp(self.code.items[out.offset..][0..4])) - .arithmetic - else - .load; - break :blk op_kind; - }; - - out.payload = .{ - .page_off = .{ - .kind = switch (rel_type) { - .ARM64_RELOC_PAGEOFF12 => .page, - .ARM64_RELOC_GOT_LOAD_PAGEOFF12 => .got, - .ARM64_RELOC_TLVP_LOAD_PAGEOFF12 => .tlvp, - else => unreachable, - }, - .addend = addend, - .op_kind = op_kind, - }, - }; -} - -fn parsePointerToGot(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { - _ = self; - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - out.payload = .{ - .pointer_to_got = .{}, - }; -} - -fn parseSigned(self: TextBlock, rel: macho.relocation_info, out: *Relocation, context: RelocContext) void { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const correction: u3 = switch (rel_type) { - .X86_64_RELOC_SIGNED => 0, - .X86_64_RELOC_SIGNED_1 => 1, - .X86_64_RELOC_SIGNED_2 => 2, - .X86_64_RELOC_SIGNED_4 => 4, - else => unreachable, - }; - var addend: i64 = mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) + correction; - - if (rel.r_extern == 0) { - const seg = context.object.load_commands.items[context.object.segment_cmd_index.?].Segment; - const target_sect_base_addr = seg.sections.items[rel.r_symbolnum - 1].addr; - addend += @intCast(i64, context.base_addr + out.offset + correction + 4) - @intCast(i64, target_sect_base_addr); - } - - out.payload = .{ - .signed = .{ - .correction = correction, - .addend = addend, - }, - }; -} - -fn parseLoad(self: TextBlock, rel: macho.relocation_info, out: *Relocation) void { - assert(rel.r_pcrel == 1); - assert(rel.r_length == 2); - - const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const addend: i32 = if (rel_type == .X86_64_RELOC_GOT) - mem.readIntLittle(i32, self.code.items[out.offset..][0..4]) - else - 0; - - out.payload = .{ - .load = .{ - .kind = switch (rel_type) { - .X86_64_RELOC_GOT_LOAD, .X86_64_RELOC_GOT => .got, - .X86_64_RELOC_TLV => .tlvp, - else => unreachable, - }, - .addend = addend, - }, - }; -} - -pub fn resolveRelocs(self: *TextBlock, macho_file: *MachO) !void { - for (self.relocs.items) |rel| { - log.debug("relocating {}", .{rel}); - - const source_addr = blk: { - const sym = macho_file.locals.items[self.local_sym_index]; - break :blk sym.n_value + rel.offset; - }; - const target_addr = blk: { - const is_via_got = switch (rel.payload) { - .pointer_to_got => true, - .page => |page| page.kind == .got, - .page_off => |page_off| page_off.kind == .got, - .load => |load| load.kind == .got, - else => false, - }; - - if (is_via_got) { - const atom = macho_file.got_entries_map.get(.{ - .where = switch (rel.where) { - .local => .local, - .undef => .undef, - }, - .where_index = rel.where_index, - }) orelse { - const sym = switch (rel.where) { - .local => macho_file.locals.items[rel.where_index], - .undef => macho_file.undefs.items[rel.where_index], - }; - log.err("expected GOT entry for symbol '{s}'", .{macho_file.getString(sym.n_strx)}); - log.err(" this is an internal linker error", .{}); - return error.FailedToResolveRelocationTarget; - }; - break :blk macho_file.locals.items[atom.local_sym_index].n_value; - } - - switch (rel.where) { - .local => { - const sym = macho_file.locals.items[rel.where_index]; - const is_tlv = is_tlv: { - const source_sym = macho_file.locals.items[self.local_sym_index]; - const match = macho_file.section_ordinals.keys()[source_sym.n_sect - 1]; - const seg = macho_file.load_commands.items[match.seg].Segment; - const sect = seg.sections.items[match.sect]; - break :is_tlv commands.sectionType(sect) == macho.S_THREAD_LOCAL_VARIABLES; - }; - if (is_tlv) { - // For TLV relocations, the value specified as a relocation is the displacement from the - // TLV initializer (either value in __thread_data or zero-init in __thread_bss) to the first - // defined TLV template init section in the following order: - // * wrt to __thread_data if defined, then - // * wrt to __thread_bss - const seg = macho_file.load_commands.items[macho_file.data_segment_cmd_index.?].Segment; - const base_address = inner: { - if (macho_file.tlv_data_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else if (macho_file.tlv_bss_section_index) |i| { - break :inner seg.sections.items[i].addr; - } else { - log.err("threadlocal variables present but no initializer sections found", .{}); - log.err(" __thread_data not found", .{}); - log.err(" __thread_bss not found", .{}); - return error.FailedToResolveRelocationTarget; - } - }; - break :blk sym.n_value - base_address; - } - - break :blk sym.n_value; - }, - .undef => { - const atom = macho_file.stubs_map.get(rel.where_index) orelse { - // TODO this is required for incremental when we don't have every symbol - // resolved when creating relocations. In this case, we will insert a branch - // reloc to an undef symbol which may happen to be defined within the binary. - // Then, the undef we point at will be a null symbol (free symbol) which we - // should remove/repurpose. To circumvent this (for now), we check if the symbol - // we point to is garbage, and if so we fall back to symbol resolver to find by name. - const n_strx = macho_file.undefs.items[rel.where_index].n_strx; - if (macho_file.symbol_resolver.get(n_strx)) |resolv| inner: { - if (resolv.where != .global) break :inner; - break :blk macho_file.globals.items[resolv.where_index].n_value; - } - - // TODO verify in TextBlock that the symbol is indeed dynamically bound. - break :blk 0; // Dynamically bound by dyld. - }; - - break :blk macho_file.locals.items[atom.local_sym_index].n_value; - }, - } - }; - - log.debug(" | source_addr = 0x{x}", .{source_addr}); - log.debug(" | target_addr = 0x{x}", .{target_addr}); - - try rel.resolve(.{ - .block = self, - .offset = rel.offset, - .source_addr = source_addr, - .target_addr = target_addr, - .macho_file = macho_file, - }); - } -} - -pub fn format(self: TextBlock, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; - try std.fmt.format(writer, "TextBlock {{ ", .{}); - try std.fmt.format(writer, ".local_sym_index = {d}, ", .{self.local_sym_index}); - try std.fmt.format(writer, ".aliases = {any}, ", .{self.aliases.items}); - try std.fmt.format(writer, ".contained = {any}, ", .{self.contained.items}); - try std.fmt.format(writer, ".code = {*}, ", .{self.code.items}); - try std.fmt.format(writer, ".size = {d}, ", .{self.size}); - try std.fmt.format(writer, ".alignment = {d}, ", .{self.alignment}); - try std.fmt.format(writer, ".relocs = {any}, ", .{self.relocs.items}); - try std.fmt.format(writer, ".rebases = {any}, ", .{self.rebases.items}); - try std.fmt.format(writer, ".bindings = {any}, ", .{self.bindings.items}); - try std.fmt.format(writer, ".dices = {any}, ", .{self.dices.items}); - if (self.stab) |stab| { - try std.fmt.format(writer, ".stab = {any}, ", .{stab}); - } - try std.fmt.format(writer, "}}", .{}); -} - -const RelocIterator = struct { - buffer: []const macho.relocation_info, - index: i32 = -1, - - pub fn next(self: *RelocIterator) ?macho.relocation_info { - self.index += 1; - if (self.index < self.buffer.len) { - return self.buffer[@intCast(u32, self.index)]; - } - return null; - } - - pub fn peek(self: RelocIterator) macho.relocation_info { - assert(self.index + 1 < self.buffer.len); - return self.buffer[@intCast(u32, self.index + 1)]; - } -}; - -fn filterRelocs(relocs: []macho.relocation_info, start_addr: u64, end_addr: u64) []macho.relocation_info { - const Predicate = struct { - addr: u64, - - pub fn predicate(self: @This(), rel: macho.relocation_info) bool { - return rel.r_address < self.addr; - } - }; - - const start = MachO.findFirst(macho.relocation_info, relocs, 0, Predicate{ .addr = end_addr }); - const end = MachO.findFirst(macho.relocation_info, relocs, start, Predicate{ .addr = start_addr }); - - return relocs[start..end]; -} - -inline fn isArithmeticOp(inst: *const [4]u8) bool { - const group_decode = @truncate(u5, inst[3]); - return ((group_decode >> 2) == 4); -} -- cgit v1.2.3