diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2021-05-02 23:40:08 +0200 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2021-05-04 13:09:32 +0200 |
| commit | 86ab6ca56c4e6d115b017eed40dc62815a6a8e3d (patch) | |
| tree | 7d429b531800dfe8614ea4789686338d5fd9a5df /src/link | |
| parent | b6be28ddcc50bd4bf085294ffcb696e31a7a1de5 (diff) | |
| download | zig-86ab6ca56c4e6d115b017eed40dc62815a6a8e3d.tar.gz zig-86ab6ca56c4e6d115b017eed40dc62815a6a8e3d.zip | |
zld: rewrite Object to include pointers to Symbols
Diffstat (limited to 'src/link')
| -rw-r--r-- | src/link/MachO/Object.zig | 178 | ||||
| -rw-r--r-- | src/link/MachO/Symbol.zig | 111 | ||||
| -rw-r--r-- | src/link/MachO/reloc.zig | 10 | ||||
| -rw-r--r-- | src/link/MachO/reloc/aarch64.zig | 16 | ||||
| -rw-r--r-- | src/link/MachO/reloc/x86_64.zig | 16 |
5 files changed, 186 insertions, 145 deletions
diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 6703a5bfb7..d599a6edbe 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -43,17 +43,13 @@ dwarf_debug_str_index: ?u16 = null, dwarf_debug_line_index: ?u16 = null, dwarf_debug_ranges_index: ?u16 = null, -symtab: std.ArrayListUnmanaged(macho.nlist_64) = .{}, -strtab: std.ArrayListUnmanaged(u8) = .{}, +symbols: std.ArrayListUnmanaged(*Symbol) = .{}, +initializers: std.ArrayListUnmanaged(*Symbol) = .{}, +data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, -locals: std.StringArrayHashMapUnmanaged(Symbol) = .{}, -stabs: std.ArrayListUnmanaged(Stab) = .{}, tu_path: ?[]const u8 = null, tu_mtime: ?u64 = null, -initializers: std.ArrayListUnmanaged(CppStatic) = .{}, -data_in_code_entries: std.ArrayListUnmanaged(macho.data_in_code_entry) = .{}, - pub const Section = struct { inner: macho.section_64, code: []u8, @@ -71,23 +67,6 @@ pub const Section = struct { } }; -const CppStatic = struct { - symbol: u32, - target_addr: u64, -}; - -const Stab = struct { - tag: Tag, - symbol: u32, - size: ?u64 = null, - - const Tag = enum { - function, - global, - static, - }; -}; - const DebugInfo = struct { inner: dwarf.DwarfInfo, debug_info: []u8, @@ -169,14 +148,12 @@ pub fn deinit(self: *Object) void { } self.sections.deinit(self.allocator); - for (self.locals.items()) |*entry| { - entry.value.deinit(self.allocator); + for (self.symbols.items) |sym| { + sym.deinit(self.allocator); + self.allocator.destroy(sym); } - self.locals.deinit(self.allocator); + self.symbols.deinit(self.allocator); - self.symtab.deinit(self.allocator); - self.strtab.deinit(self.allocator); - self.stabs.deinit(self.allocator); self.data_in_code_entries.deinit(self.allocator); self.initializers.deinit(self.allocator); @@ -222,9 +199,9 @@ pub fn parse(self: *Object) !void { } try self.readLoadCommands(reader); + try self.parseSymbols(); try self.parseSections(); - if (self.symtab_cmd_index != null) try self.parseSymtab(); - if (self.data_in_code_cmd_index != null) try self.readDataInCode(); + try self.parseDataInCode(); try self.parseInitializers(); try self.parseDebugInfo(); } @@ -298,9 +275,10 @@ pub fn readLoadCommands(self: *Object, reader: anytype) !void { } pub fn parseSections(self: *Object) !void { - log.debug("parsing sections in {s}", .{self.name.?}); const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; + log.debug("parsing sections in {s}", .{self.name.?}); + try self.sections.ensureCapacity(self.allocator, seg.sections.items.len); for (seg.sections.items) |sect| { @@ -327,6 +305,7 @@ pub fn parseSections(self: *Object) !void { self.arch.?, section.code, mem.bytesAsSlice(macho.relocation_info, raw_relocs), + self.symbols.items, ); } @@ -344,60 +323,70 @@ pub fn parseInitializers(self: *Object) !void { const relocs = section.relocs orelse unreachable; try self.initializers.ensureCapacity(self.allocator, relocs.len); for (relocs) |rel| { - self.initializers.appendAssumeCapacity(.{ - .symbol = rel.target.symbol, - .target_addr = undefined, - }); + self.initializers.appendAssumeCapacity(rel.target.symbol); } - mem.reverse(CppStatic, self.initializers.items); - - for (self.initializers.items) |initializer| { - const sym = self.symtab.items[initializer.symbol]; - const sym_name = self.getString(sym.n_strx); - log.debug(" | {s}", .{sym_name}); - } + mem.reverse(*Symbol, self.initializers.items); } -pub fn parseSymtab(self: *Object) !void { - const symtab_cmd = self.load_commands.items[self.symtab_cmd_index.?].Symtab; +pub fn parseSymbols(self: *Object) !void { + const index = self.symtab_cmd_index orelse return; + const symtab_cmd = self.load_commands.items[index].Symtab; var symtab = try self.allocator.alloc(u8, @sizeOf(macho.nlist_64) * symtab_cmd.nsyms); defer self.allocator.free(symtab); - _ = try self.file.?.preadAll(symtab, symtab_cmd.symoff); const slice = @alignCast(@alignOf(macho.nlist_64), mem.bytesAsSlice(macho.nlist_64, symtab)); - try self.symtab.appendSlice(self.allocator, slice); var strtab = try self.allocator.alloc(u8, symtab_cmd.strsize); defer self.allocator.free(strtab); - _ = try self.file.?.preadAll(strtab, symtab_cmd.stroff); - try self.strtab.appendSlice(self.allocator, strtab); - for (self.symtab.items) |sym, sym_id| { - if (Symbol.isStab(sym) or Symbol.isUndef(sym)) continue; + for (slice) |sym| { + if (Symbol.isStab(sym)) { + log.err("TODO handle stabs embedded within object files", .{}); + return error.HandleStabsInObjects; + } - const sym_name = self.getString(sym.n_strx); - const tag: Symbol.Tag = tag: { - if (Symbol.isLocal(sym)) { - if (self.arch.? == .aarch64 and mem.startsWith(u8, sym_name, "l")) continue; - break :tag .local; - } - if (Symbol.isWeakDef(sym)) { - break :tag .weak; + const sym_name = mem.spanZ(@ptrCast([*:0]const u8, strtab.ptr + sym.n_strx)); + const name = try self.allocator.dupe(u8, sym_name); + + const symbol: *Symbol = symbol: { + if (Symbol.isSect(sym)) { + const linkage: Symbol.Regular.Linkage = linkage: { + if (!Symbol.isExt(sym)) break :linkage .translation_unit; + if (Symbol.isWeakDef(sym) or Symbol.isPext(sym)) break :linkage .linkage_unit; + break :linkage .global; + }; + const regular = try self.allocator.create(Symbol.Regular); + errdefer self.allocator.destroy(regular); + regular.* = .{ + .base = .{ + .@"type" = .regular, + .name = name, + }, + .linkage = .translation_unit, + .address = sym.n_value, + .section = sym.n_sect - 1, + .weak_ref = Symbol.isWeakRef(sym), + .file = self, + }; + break :symbol ®ular.base; } - break :tag .strong; + + const undef = try self.allocator.create(Symbol.Unresolved); + errdefer self.allocator.destroy(undef); + undef.* = .{ + .base = .{ + .@"type" = .unresolved, + .name = name, + }, + .file = self, + }; + break :symbol &undef.base; }; - const name = try self.allocator.dupe(u8, sym_name); - try self.locals.putNoClobber(self.allocator, name, .{ - .tag = tag, - .name = name, - .address = 0, - .section = 0, - .index = @intCast(u32, sym_id), - }); + try self.symbols.append(self.allocator, symbol); } } @@ -429,38 +418,31 @@ pub fn parseDebugInfo(self: *Object) !void { break :mtime @intCast(u64, @divFloor(stat.mtime, 1_000_000_000)); }; - for (self.locals.items()) |entry, index| { - const local = entry.value; - const source_sym = self.symtab.items[local.index.?]; - const size = blk: for (debug_info.inner.func_list.items) |func| { - if (func.pc_range) |range| { - if (source_sym.n_value >= range.start and source_sym.n_value < range.end) { - break :blk range.end - range.start; + for (self.symbols.items) |sym| { + if (sym.cast(Symbol.Regular)) |reg| { + const size: u64 = blk: for (debug_info.inner.func_list.items) |func| { + if (func.pc_range) |range| { + if (reg.address >= range.start and reg.address < range.end) { + break :blk range.end - range.start; + } } - } - } else null; - const tag: Stab.Tag = tag: { - if (size != null) break :tag .function; - switch (local.tag) { - .weak, .strong => break :tag .global, - else => break :tag .static, - } - }; - - try self.stabs.append(self.allocator, .{ - .tag = tag, - .size = size, - .symbol = @intCast(u32, index), - }); + } else 0; + + reg.stab = .{ + .kind = kind: { + if (size > 0) break :kind .function; + switch (reg.linkage) { + .translation_unit => break :kind .static, + else => break :kind .global, + } + }, + .size = size, + }; + } } } -pub fn getString(self: *const Object, str_off: u32) []const u8 { - assert(str_off < self.strtab.items.len); - return mem.spanZ(@ptrCast([*:0]const u8, self.strtab.items.ptr + str_off)); -} - -pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 { +fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 { const seg = self.load_commands.items[self.segment_cmd_index.?].Segment; const sect = seg.sections.items[index]; var buffer = try allocator.alloc(u8, sect.size); @@ -468,7 +450,7 @@ pub fn readSection(self: Object, allocator: *Allocator, index: u16) ![]u8 { return buffer; } -pub fn readDataInCode(self: *Object) !void { +pub fn parseDataInCode(self: *Object) !void { const index = self.data_in_code_cmd_index orelse return; const data_in_code = self.load_commands.items[index].LinkeditData; diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index 9e6c2bf68a..a907146e25 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -2,31 +2,93 @@ const Symbol = @This(); const std = @import("std"); const macho = std.macho; +const mem = std.mem; -const Allocator = std.mem.Allocator; +const Allocator = mem.Allocator; +const Object = @import("Object.zig"); -pub const Tag = enum { - local, - weak, - strong, - import, - undef, +pub const Type = enum { + regular, + proxy, + unresolved, }; -tag: Tag, +/// Symbol type. +@"type": Type, + +/// Symbol name. Owned slice. name: []u8, -address: u64, -section: u8, -/// Index of file where to locate this symbol. -/// Depending on context, this is either an object file, or a dylib. -file: ?u16 = null, +pub const Regular = struct { + base: Symbol, + + /// Linkage type. + linkage: Linkage, + + /// Symbol address. + address: u64, + + /// Section ID where the symbol resides. + section: u8, + + /// Whether the symbol is a weak ref. + weak_ref: bool, + + /// File where to locate this symbol. + file: *Object, + + /// Debug stab if defined. + stab: ?struct { + /// Stab kind + kind: enum { + function, + global, + static, + }, -/// Index of this symbol within the file's symbol table. -index: ?u32 = null, + /// Size of the stab. + size: u64, + } = null, -pub fn deinit(self: *Symbol, allocator: *Allocator) void { - allocator.free(self.name); + pub const base_type: Symbol.Type = .regular; + + pub const Linkage = enum { + translation_unit, + linkage_unit, + global, + }; +}; + +pub const Proxy = struct { + base: Symbol, + + /// Dylib ordinal. + dylib: u16, + + pub const base_type: Symbol.Type = .proxy; +}; + +pub const Unresolved = struct { + base: Symbol, + + /// Alias of. + alias: ?*Symbol = null, + + /// File where this symbol was referenced. + file: *Object, + + pub const base_type: Symbol.Type = .unresolved; +}; + +pub fn deinit(base: *Symbol, allocator: *Allocator) void { + allocator.free(base.name); +} + +pub fn cast(base: *Symbol, comptime T: type) ?*T { + if (base.@"type" != T.base_type) { + return null; + } + return @fieldParentPtr(T, "base", base); } pub fn isStab(sym: macho.nlist_64) bool { @@ -55,17 +117,6 @@ pub fn isWeakDef(sym: macho.nlist_64) bool { return (sym.n_desc & macho.N_WEAK_DEF) != 0; } -/// Symbol is local if it is defined and not an extern. -pub fn isLocal(sym: macho.nlist_64) bool { - return isSect(sym) and !isExt(sym); -} - -/// Symbol is global if it is defined and an extern. -pub fn isGlobal(sym: macho.nlist_64) bool { - return isSect(sym) and isExt(sym); -} - -/// Symbol is undefined if it is not defined and an extern. -pub fn isUndef(sym: macho.nlist_64) bool { - return isUndf(sym) and isExt(sym); +pub fn isWeakRef(sym: macho.nlist_64) bool { + return (sym.n_desc & macho.N_WEAK_REF) != 0; } diff --git a/src/link/MachO/reloc.zig b/src/link/MachO/reloc.zig index 57825149d1..1ce9fa2c2d 100644 --- a/src/link/MachO/reloc.zig +++ b/src/link/MachO/reloc.zig @@ -10,6 +10,7 @@ const aarch64 = @import("reloc/aarch64.zig"); const x86_64 = @import("reloc/x86_64.zig"); const Allocator = mem.Allocator; +const Symbol = @import("Symbol.zig"); pub const Relocation = struct { @"type": Type, @@ -75,12 +76,12 @@ pub const Relocation = struct { }; pub const Target = union(enum) { - symbol: u32, + symbol: *Symbol, section: u16, - pub fn from_reloc(reloc: macho.relocation_info) Target { + pub fn from_reloc(reloc: macho.relocation_info, symbols: []*Symbol) Target { return if (reloc.r_extern == 1) .{ - .symbol = reloc.r_symbolnum, + .symbol = symbols[reloc.r_symbolnum], } else .{ .section = @intCast(u16, reloc.r_symbolnum - 1), }; @@ -136,6 +137,7 @@ pub fn parse( arch: std.Target.Cpu.Arch, code: []u8, relocs: []const macho.relocation_info, + symbols: []*Symbol, ) ![]*Relocation { var it = RelocIterator{ .buffer = relocs, @@ -148,6 +150,7 @@ pub fn parse( .it = &it, .code = code, .parsed = std.ArrayList(*Relocation).init(allocator), + .symbols = symbols, }; defer parser.deinit(); try parser.parse(); @@ -160,6 +163,7 @@ pub fn parse( .it = &it, .code = code, .parsed = std.ArrayList(*Relocation).init(allocator), + .symbols = symbols, }; defer parser.deinit(); try parser.parse(); diff --git a/src/link/MachO/reloc/aarch64.zig b/src/link/MachO/reloc/aarch64.zig index d8e7cebddd..dbc233b3a5 100644 --- a/src/link/MachO/reloc/aarch64.zig +++ b/src/link/MachO/reloc/aarch64.zig @@ -10,6 +10,7 @@ const reloc = @import("../reloc.zig"); const Allocator = mem.Allocator; const Relocation = reloc.Relocation; +const Symbol = @import("../Symbol.zig"); pub const Branch = struct { base: Relocation, @@ -188,6 +189,7 @@ pub const Parser = struct { it: *reloc.RelocIterator, code: []u8, parsed: std.ArrayList(*Relocation), + symbols: []*Symbol, addend: ?u32 = null, subtractor: ?Relocation.Target = null, @@ -273,7 +275,7 @@ pub const Parser = struct { var branch = try parser.allocator.create(Branch); errdefer parser.allocator.destroy(branch); - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); branch.* = .{ .base = .{ @@ -294,7 +296,7 @@ pub const Parser = struct { assert(rel.r_length == 2); const rel_type = @intToEnum(macho.reloc_type_arm64, rel.r_type); - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); const offset = @intCast(u32, rel.r_address); const inst = parser.code[offset..][0..4]; @@ -400,7 +402,7 @@ pub const Parser = struct { aarch64.Instruction.load_store_register, ), inst) }; } - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); var page_off = try parser.allocator.create(PageOff); errdefer parser.allocator.destroy(page_off); @@ -437,7 +439,7 @@ pub const Parser = struct { ), inst); assert(parsed_inst.size == 3); - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); var page_off = try parser.allocator.create(GotPageOff); errdefer parser.allocator.destroy(page_off); @@ -496,7 +498,7 @@ pub const Parser = struct { } }; - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); var page_off = try parser.allocator.create(TlvpPageOff); errdefer parser.allocator.destroy(page_off); @@ -531,7 +533,7 @@ pub const Parser = struct { assert(rel.r_pcrel == 0); assert(parser.subtractor == null); - parser.subtractor = Relocation.Target.from_reloc(rel); + parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols); // Verify SUBTRACTOR is followed by UNSIGNED. const next = @intToEnum(macho.reloc_type_arm64, parser.it.peek().r_type); @@ -554,7 +556,7 @@ pub const Parser = struct { var unsigned = try parser.allocator.create(reloc.Unsigned); errdefer parser.allocator.destroy(unsigned); - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); const is_64bit: bool = switch (rel.r_length) { 3 => true, 2 => false, diff --git a/src/link/MachO/reloc/x86_64.zig b/src/link/MachO/reloc/x86_64.zig index cdc90aac90..32f83924e8 100644 --- a/src/link/MachO/reloc/x86_64.zig +++ b/src/link/MachO/reloc/x86_64.zig @@ -9,6 +9,7 @@ const reloc = @import("../reloc.zig"); const Allocator = mem.Allocator; const Relocation = reloc.Relocation; +const Symbol = @import("../Symbol.zig"); pub const Branch = struct { base: Relocation, @@ -95,6 +96,7 @@ pub const Parser = struct { it: *reloc.RelocIterator, code: []u8, parsed: std.ArrayList(*Relocation), + symbols: []*Symbol, subtractor: ?Relocation.Target = null, pub fn deinit(parser: *Parser) void { @@ -145,7 +147,7 @@ pub const Parser = struct { var branch = try parser.allocator.create(Branch); errdefer parser.allocator.destroy(branch); - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); branch.* = .{ .base = .{ @@ -165,7 +167,7 @@ pub const Parser = struct { assert(rel.r_length == 2); const rel_type = @intToEnum(macho.reloc_type_x86_64, rel.r_type); - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); const is_extern = rel.r_extern == 1; const offset = @intCast(u32, rel.r_address); @@ -211,7 +213,7 @@ pub const Parser = struct { const offset = @intCast(u32, rel.r_address); const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); var got_load = try parser.allocator.create(GotLoad); errdefer parser.allocator.destroy(got_load); @@ -237,7 +239,7 @@ pub const Parser = struct { const offset = @intCast(u32, rel.r_address); const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); var got = try parser.allocator.create(Got); errdefer parser.allocator.destroy(got); @@ -263,7 +265,7 @@ pub const Parser = struct { const offset = @intCast(u32, rel.r_address); const inst = parser.code[offset..][0..4]; - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); var tlv = try parser.allocator.create(Tlv); errdefer parser.allocator.destroy(tlv); @@ -288,7 +290,7 @@ pub const Parser = struct { assert(rel.r_pcrel == 0); assert(parser.subtractor == null); - parser.subtractor = Relocation.Target.from_reloc(rel); + parser.subtractor = Relocation.Target.from_reloc(rel, parser.symbols); // Verify SUBTRACTOR is followed by UNSIGNED. const next = @intToEnum(macho.reloc_type_x86_64, parser.it.peek().r_type); @@ -311,7 +313,7 @@ pub const Parser = struct { var unsigned = try parser.allocator.create(reloc.Unsigned); errdefer parser.allocator.destroy(unsigned); - const target = Relocation.Target.from_reloc(rel); + const target = Relocation.Target.from_reloc(rel, parser.symbols); const is_64bit: bool = switch (rel.r_length) { 3 => true, 2 => false, |
