diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2023-09-13 10:07:07 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-09-13 10:07:07 +0200 |
| commit | 4d29b3967873d6ff7e7426b2ab99c00c9e0fa284 (patch) | |
| tree | 90bfc83c9ef5aafbdbd626d7e1767db18730dbeb /src | |
| parent | 89ea67aee2aeb4c041f55625f22fb9a1c56cf9ea (diff) | |
| parent | 8142349d699140ff71801d31f1d1958599f5adda (diff) | |
| download | zig-4d29b3967873d6ff7e7426b2ab99c00c9e0fa284.tar.gz zig-4d29b3967873d6ff7e7426b2ab99c00c9e0fa284.zip | |
Merge pull request #17113 from ziglang/elf-linker
elf: upstream zld/ELF functionality, part 1
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/aarch64/CodeGen.zig | 8 | ||||
| -rw-r--r-- | src/arch/arm/CodeGen.zig | 8 | ||||
| -rw-r--r-- | src/arch/riscv64/CodeGen.zig | 8 | ||||
| -rw-r--r-- | src/arch/sparc64/CodeGen.zig | 8 | ||||
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 40 | ||||
| -rw-r--r-- | src/arch/x86_64/Emit.zig | 10 | ||||
| -rw-r--r-- | src/codegen.zig | 11 | ||||
| -rw-r--r-- | src/link.zig | 3 | ||||
| -rw-r--r-- | src/link/Dwarf.zig | 24 | ||||
| -rw-r--r-- | src/link/Elf.zig | 2705 | ||||
| -rw-r--r-- | src/link/Elf/Atom.zig | 644 | ||||
| -rw-r--r-- | src/link/Elf/LinkerDefined.zig | 112 | ||||
| -rw-r--r-- | src/link/Elf/Object.zig | 872 | ||||
| -rw-r--r-- | src/link/Elf/Symbol.zig | 362 | ||||
| -rw-r--r-- | src/link/Elf/ZigModule.zig | 295 | ||||
| -rw-r--r-- | src/link/Elf/eh_frame.zig | 449 | ||||
| -rw-r--r-- | src/link/Elf/file.zig | 105 | ||||
| -rw-r--r-- | src/link/Elf/synthetic_sections.zig | 433 | ||||
| -rw-r--r-- | src/link/strtab.zig | 4 |
19 files changed, 4871 insertions, 1230 deletions
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 0f088fa867..2c6e3f5a7b 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -4314,10 +4314,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier if (try self.air.value(callee, mod)) |func_value| { if (func_value.getFunction(mod)) |func| { if (self.bin_file.cast(link.File.Elf)) |elf_file| { - const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); - const atom = elf_file.getAtom(atom_index); - _ = try atom.getOrCreateOffsetTableEntry(elf_file); - const got_addr = @as(u32, @intCast(atom.getOffsetTableAddress(elf_file))); + const sym_index = try elf_file.getOrCreateMetadataForDecl(func.owner_decl); + const sym = elf_file.symbol(sym_index); + _ = try sym.getOrCreateGotEntry(elf_file); + const got_addr = @as(u32, @intCast(sym.gotAddress(elf_file))); try self.genSetReg(Type.usize, .x30, .{ .memory = got_addr }); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl); diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 135f118731..937d4aa1dd 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -4294,10 +4294,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier if (try self.air.value(callee, mod)) |func_value| { if (func_value.getFunction(mod)) |func| { if (self.bin_file.cast(link.File.Elf)) |elf_file| { - const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); - const atom = elf_file.getAtom(atom_index); - _ = try atom.getOrCreateOffsetTableEntry(elf_file); - const got_addr = @as(u32, @intCast(atom.getOffsetTableAddress(elf_file))); + const sym_index = try elf_file.getOrCreateMetadataForDecl(func.owner_decl); + const sym = elf_file.symbol(sym_index); + _ = try sym.getOrCreateGotEntry(elf_file); + const got_addr = @as(u32, @intCast(sym.gotAddress(elf_file))); try self.genSetReg(Type.usize, .lr, .{ .memory = got_addr }); } else if (self.bin_file.cast(link.File.MachO)) |_| { unreachable; // unsupported architecture for MachO diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index e43778510b..d3f162ae0f 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -1747,10 +1747,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier if (try self.air.value(callee, mod)) |func_value| { switch (mod.intern_pool.indexToKey(func_value.ip_index)) { .func => |func| { - const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); - const atom = elf_file.getAtom(atom_index); - _ = try atom.getOrCreateOffsetTableEntry(elf_file); - const got_addr = @as(u32, @intCast(atom.getOffsetTableAddress(elf_file))); + const sym_index = try elf_file.getOrCreateMetadataForDecl(func.owner_decl); + const sym = elf_file.symbol(sym_index); + _ = try sym.getOrCreateGotEntry(elf_file); + const got_addr = @as(u32, @intCast(sym.gotAddress(elf_file))); try self.genSetReg(Type.usize, .ra, .{ .memory = got_addr }); _ = try self.addInst(.{ .tag = .jalr, diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index 6d575a9d45..df1a7cf970 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -1349,10 +1349,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier switch (mod.intern_pool.indexToKey(func_value.ip_index)) { .func => |func| { const got_addr = if (self.bin_file.cast(link.File.Elf)) |elf_file| blk: { - const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); - const atom = elf_file.getAtom(atom_index); - _ = try atom.getOrCreateOffsetTableEntry(elf_file); - break :blk @as(u32, @intCast(atom.getOffsetTableAddress(elf_file))); + const sym_index = try elf_file.getOrCreateMetadataForDecl(func.owner_decl); + const sym = elf_file.symbol(sym_index); + _ = try sym.getOrCreateGotEntry(elf_file); + break :blk @as(u32, @intCast(sym.gotAddress(elf_file))); } else unreachable; try self.genSetReg(Type.usize, .o7, .{ .memory = got_addr }); diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index aec28b992c..e535894604 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -125,7 +125,9 @@ const Owner = union(enum) { .func_index => |func_index| { const mod = ctx.bin_file.options.module.?; const decl_index = mod.funcOwnerDeclIndex(func_index); - if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { + if (ctx.bin_file.cast(link.File.Elf)) |elf_file| { + return elf_file.getOrCreateMetadataForDecl(decl_index); + } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { const atom = try macho_file.getOrCreateAtomForDecl(decl_index); return macho_file.getAtom(atom).getSymbolIndex().?; } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| { @@ -136,7 +138,10 @@ const Owner = union(enum) { } else unreachable; }, .lazy_sym => |lazy_sym| { - if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { + if (ctx.bin_file.cast(link.File.Elf)) |elf_file| { + return elf_file.getOrCreateMetadataForLazySymbol(lazy_sym) catch |err| + ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); + } else if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { const atom = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); return macho_file.getAtom(atom).getSymbolIndex().?; @@ -8149,10 +8154,11 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier else => null, }) |owner_decl| { if (self.bin_file.cast(link.File.Elf)) |elf_file| { - const atom_index = try elf_file.getOrCreateAtomForDecl(owner_decl); - const atom = elf_file.getAtom(atom_index); - _ = try atom.getOrCreateOffsetTableEntry(elf_file); - const got_addr = atom.getOffsetTableAddress(elf_file); + const sym_index = try elf_file.getOrCreateMetadataForDecl(owner_decl); + const sym = elf_file.symbol(sym_index); + sym.flags.needs_got = true; + _ = try sym.getOrCreateGotEntry(elf_file); + const got_addr = sym.gotAddress(elf_file); try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(got_addr), @@ -8178,7 +8184,18 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier } else if (func_value.getExternFunc(mod)) |extern_func| { const decl_name = mod.intern_pool.stringToSlice(mod.declPtr(extern_func.decl).name); const lib_name = mod.intern_pool.stringToSliceUnwrap(extern_func.lib_name); - if (self.bin_file.cast(link.File.Coff)) |coff_file| { + if (self.bin_file.cast(link.File.Elf)) |elf_file| { + const atom_index = try self.owner.getSymbolIndex(self); + const sym_index = try elf_file.getGlobalSymbol(decl_name, lib_name); + _ = try self.addInst(.{ + .tag = .call, + .ops = .extern_fn_reloc, + .data = .{ .reloc = .{ + .atom_index = atom_index, + .sym_index = sym_index, + } }, + }); + } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const atom_index = try self.owner.getSymbolIndex(self); const sym_index = try coff_file.getGlobalSymbol(decl_name, lib_name); _ = try self.addInst(.{ @@ -10215,11 +10232,12 @@ fn genLazySymbolRef( lazy_sym: link.File.LazySymbol, ) InnerError!void { if (self.bin_file.cast(link.File.Elf)) |elf_file| { - const atom_index = elf_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + const sym_index = elf_file.getOrCreateMetadataForLazySymbol(lazy_sym) catch |err| return self.fail("{s} creating lazy symbol", .{@errorName(err)}); - const atom = elf_file.getAtom(atom_index); - _ = try atom.getOrCreateOffsetTableEntry(elf_file); - const got_addr = atom.getOffsetTableAddress(elf_file); + const sym = elf_file.symbol(sym_index); + sym.flags.needs_got = true; + _ = try sym.getOrCreateGotEntry(elf_file); + const got_addr = sym.gotAddress(elf_file); const got_mem = Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(got_addr) }); switch (tag) { diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 050772b2ce..ab1d63e64c 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -41,7 +41,15 @@ pub fn emitMir(emit: *Emit) Error!void { .offset = end_offset - 4, .length = @as(u5, @intCast(end_offset - start_offset)), }), - .linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { + .linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.Elf)) |elf_file| { + // Add relocation to the decl. + const atom_ptr = elf_file.symbol(symbol.atom_index).atom(elf_file).?; + try atom_ptr.addReloc(elf_file, .{ + .r_offset = end_offset - 4, + .r_info = (@as(u64, @intCast(symbol.sym_index)) << 32) | std.elf.R_X86_64_PLT32, + .r_addend = -4, + }); + } else if (emit.bin_file.cast(link.File.MachO)) |macho_file| { // Add relocation to the decl. const atom_index = macho_file.getAtomIndexForSymbol(.{ .sym_index = symbol.atom_index }).?; const target = macho_file.getGlobalByIndex(symbol.sym_index); diff --git a/src/codegen.zig b/src/codegen.zig index 4d1993434a..3d86e4b6bc 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -858,10 +858,11 @@ fn genDeclRef( const is_threadlocal = tv.val.isPtrToThreadLocal(mod) and !bin_file.options.single_threaded; if (bin_file.cast(link.File.Elf)) |elf_file| { - const atom_index = try elf_file.getOrCreateAtomForDecl(decl_index); - const atom = elf_file.getAtom(atom_index); - _ = try atom.getOrCreateOffsetTableEntry(elf_file); - return GenResult.mcv(.{ .memory = atom.getOffsetTableAddress(elf_file) }); + const sym_index = try elf_file.getOrCreateMetadataForDecl(decl_index); + const sym = elf_file.symbol(sym_index); + sym.flags.needs_got = true; + _ = try sym.getOrCreateGotEntry(elf_file); + return GenResult.mcv(.{ .memory = sym.gotAddress(elf_file) }); } else if (bin_file.cast(link.File.MachO)) |macho_file| { const atom_index = try macho_file.getOrCreateAtomForDecl(decl_index); const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; @@ -896,7 +897,7 @@ fn genUnnamedConst( return GenResult.fail(bin_file.allocator, src_loc, "lowering unnamed constant failed: {s}", .{@errorName(err)}); }; if (bin_file.cast(link.File.Elf)) |elf_file| { - return GenResult.mcv(.{ .memory = elf_file.getSymbol(local_sym_index).st_value }); + return GenResult.mcv(.{ .memory = elf_file.symbol(local_sym_index).value }); } else if (bin_file.cast(link.File.MachO)) |_| { return GenResult.mcv(.{ .load_direct = local_sym_index }); } else if (bin_file.cast(link.File.Coff)) |_| { diff --git a/src/link.zig b/src/link.zig index 634f9679c4..4db946658a 100644 --- a/src/link.zig +++ b/src/link.zig @@ -549,7 +549,7 @@ pub const File = struct { switch (base.tag) { // zig fmt: off .coff => return @fieldParentPtr(Coff, "base", base).getGlobalSymbol(name, lib_name), - .elf => unreachable, + .elf => return @fieldParentPtr(Elf, "base", base).getGlobalSymbol(name, lib_name), .macho => return @fieldParentPtr(MachO, "base", base).getGlobalSymbol(name, lib_name), .plan9 => unreachable, .spirv => unreachable, @@ -849,6 +849,7 @@ pub const File = struct { pub fn miscErrors(base: *File) []const ErrorMsg { switch (base.tag) { + .elf => return @fieldParentPtr(Elf, "base", base).misc_errors.items, .macho => return @fieldParentPtr(MachO, "base", base).misc_errors.items, else => return &.{}, } diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index 48c828e54a..52d6550bcb 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -1108,7 +1108,7 @@ pub fn commitDeclState( switch (self.bin_file.tag) { .elf => { const elf_file = self.bin_file.cast(File.Elf).?; - const debug_line_sect = &elf_file.sections.items(.shdr)[elf_file.debug_line_section_index.?]; + const debug_line_sect = &elf_file.shdrs.items[elf_file.debug_line_section_index.?]; const file_pos = debug_line_sect.sh_offset + src_fn.off; try pwriteDbgLineNops(elf_file.base.file.?, file_pos, 0, &[0]u8{}, src_fn.len); }, @@ -1170,7 +1170,7 @@ pub fn commitDeclState( const elf_file = self.bin_file.cast(File.Elf).?; const shdr_index = elf_file.debug_line_section_index.?; try elf_file.growNonAllocSection(shdr_index, needed_size, 1, true); - const debug_line_sect = elf_file.sections.items(.shdr)[shdr_index]; + const debug_line_sect = elf_file.shdrs.items[shdr_index]; const file_pos = debug_line_sect.sh_offset + src_fn.off; try pwriteDbgLineNops( elf_file.base.file.?, @@ -1337,7 +1337,7 @@ fn updateDeclDebugInfoAllocation(self: *Dwarf, atom_index: Atom.Index, len: u32) switch (self.bin_file.tag) { .elf => { const elf_file = self.bin_file.cast(File.Elf).?; - const debug_info_sect = &elf_file.sections.items(.shdr)[elf_file.debug_info_section_index.?]; + const debug_info_sect = &elf_file.shdrs.items[elf_file.debug_info_section_index.?]; const file_pos = debug_info_sect.sh_offset + atom.off; try pwriteDbgInfoNops(elf_file.base.file.?, file_pos, 0, &[0]u8{}, atom.len, false); }, @@ -1415,7 +1415,7 @@ fn writeDeclDebugInfo(self: *Dwarf, atom_index: Atom.Index, dbg_info_buf: []cons const elf_file = self.bin_file.cast(File.Elf).?; const shdr_index = elf_file.debug_info_section_index.?; try elf_file.growNonAllocSection(shdr_index, needed_size, 1, true); - const debug_info_sect = elf_file.sections.items(.shdr)[shdr_index]; + const debug_info_sect = &elf_file.shdrs.items[shdr_index]; const file_pos = debug_info_sect.sh_offset + atom.off; try pwriteDbgInfoNops( elf_file.base.file.?, @@ -1496,7 +1496,7 @@ pub fn updateDeclLineNumber(self: *Dwarf, mod: *Module, decl_index: Module.Decl. switch (self.bin_file.tag) { .elf => { const elf_file = self.bin_file.cast(File.Elf).?; - const shdr = elf_file.sections.items(.shdr)[elf_file.debug_line_section_index.?]; + const shdr = elf_file.shdrs.items[elf_file.debug_line_section_index.?]; const file_pos = shdr.sh_offset + atom.off + self.getRelocDbgLineOff(); try elf_file.base.file.?.pwriteAll(&data, file_pos); }, @@ -1713,7 +1713,7 @@ pub fn writeDbgAbbrev(self: *Dwarf) !void { const elf_file = self.bin_file.cast(File.Elf).?; const shdr_index = elf_file.debug_abbrev_section_index.?; try elf_file.growNonAllocSection(shdr_index, needed_size, 1, false); - const debug_abbrev_sect = elf_file.sections.items(.shdr)[shdr_index]; + const debug_abbrev_sect = &elf_file.shdrs.items[shdr_index]; const file_pos = debug_abbrev_sect.sh_offset + abbrev_offset; try elf_file.base.file.?.pwriteAll(&abbrev_buf, file_pos); }, @@ -1828,7 +1828,7 @@ pub fn writeDbgInfoHeader(self: *Dwarf, module: *Module, low_pc: u64, high_pc: u switch (self.bin_file.tag) { .elf => { const elf_file = self.bin_file.cast(File.Elf).?; - const debug_info_sect = elf_file.sections.items(.shdr)[elf_file.debug_info_section_index.?]; + const debug_info_sect = &elf_file.shdrs.items[elf_file.debug_info_section_index.?]; const file_pos = debug_info_sect.sh_offset; try pwriteDbgInfoNops(elf_file.base.file.?, file_pos, 0, di_buf.items, jmp_amt, false); }, @@ -2147,7 +2147,7 @@ pub fn writeDbgAranges(self: *Dwarf, addr: u64, size: u64) !void { const elf_file = self.bin_file.cast(File.Elf).?; const shdr_index = elf_file.debug_aranges_section_index.?; try elf_file.growNonAllocSection(shdr_index, needed_size, 16, false); - const debug_aranges_sect = elf_file.sections.items(.shdr)[shdr_index]; + const debug_aranges_sect = &elf_file.shdrs.items[shdr_index]; const file_pos = debug_aranges_sect.sh_offset; try elf_file.base.file.?.pwriteAll(di_buf.items, file_pos); }, @@ -2312,9 +2312,9 @@ pub fn writeDbgLineHeader(self: *Dwarf) !void { .elf => { const elf_file = self.bin_file.cast(File.Elf).?; const shdr_index = elf_file.debug_line_section_index.?; - const needed_size = elf_file.sections.items(.shdr)[shdr_index].sh_size + delta; + const needed_size = elf_file.shdrs.items[shdr_index].sh_size + delta; try elf_file.growNonAllocSection(shdr_index, needed_size, 1, true); - const file_pos = elf_file.sections.items(.shdr)[shdr_index].sh_offset + first_fn.off; + const file_pos = elf_file.shdrs.items[shdr_index].sh_offset + first_fn.off; const amt = try elf_file.base.file.?.preadAll(buffer, file_pos); if (amt != buffer.len) return error.InputOutput; @@ -2377,7 +2377,7 @@ pub fn writeDbgLineHeader(self: *Dwarf) !void { switch (self.bin_file.tag) { .elf => { const elf_file = self.bin_file.cast(File.Elf).?; - const debug_line_sect = elf_file.sections.items(.shdr)[elf_file.debug_line_section_index.?]; + const debug_line_sect = &elf_file.shdrs.items[elf_file.debug_line_section_index.?]; const file_pos = debug_line_sect.sh_offset; try pwriteDbgLineNops(elf_file.base.file.?, file_pos, 0, di_buf.items, jmp_amt); }, @@ -2500,7 +2500,7 @@ pub fn flushModule(self: *Dwarf, module: *Module) !void { switch (self.bin_file.tag) { .elf => { const elf_file = self.bin_file.cast(File.Elf).?; - const debug_info_sect = &elf_file.sections.items(.shdr)[elf_file.debug_info_section_index.?]; + const debug_info_sect = &elf_file.shdrs.items[elf_file.debug_info_section_index.?]; break :blk debug_info_sect.sh_offset; }, .macho => { diff --git a/src/link/Elf.zig b/src/link/Elf.zig index b039b8d761..3a600662bc 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -1,100 +1,4 @@ -const Elf = @This(); - -const std = @import("std"); -const build_options = @import("build_options"); -const builtin = @import("builtin"); -const assert = std.debug.assert; -const elf = std.elf; -const fs = std.fs; -const log = std.log.scoped(.link); -const math = std.math; -const mem = std.mem; - -const codegen = @import("../codegen.zig"); -const glibc = @import("../glibc.zig"); -const link = @import("../link.zig"); -const lldMain = @import("../main.zig").lldMain; -const musl = @import("../musl.zig"); -const target_util = @import("../target.zig"); -const trace = @import("../tracy.zig").trace; - -const Air = @import("../Air.zig"); -const Allocator = std.mem.Allocator; -pub const Atom = @import("Elf/Atom.zig"); -const Cache = std.Build.Cache; -const Compilation = @import("../Compilation.zig"); -const Dwarf = @import("Dwarf.zig"); -const File = link.File; -const Liveness = @import("../Liveness.zig"); -const LlvmObject = @import("../codegen/llvm.zig").Object; -const Module = @import("../Module.zig"); -const InternPool = @import("../InternPool.zig"); -const Package = @import("../Package.zig"); -const StringTable = @import("strtab.zig").StringTable; -const TableSection = @import("table_section.zig").TableSection; -const Type = @import("../type.zig").Type; -const TypedValue = @import("../TypedValue.zig"); -const Value = @import("../value.zig").Value; - -const default_entry_addr = 0x8000000; - -pub const base_tag: File.Tag = .elf; - -const Section = struct { - shdr: elf.Elf64_Shdr, - phdr_index: u16, - - /// Index of the last allocated atom in this section. - last_atom_index: ?Atom.Index = null, - - /// A list of atoms that have surplus capacity. This list can have false - /// positives, as functions grow and shrink over time, only sometimes being added - /// or removed from the freelist. - /// - /// An atom has surplus capacity when its overcapacity value is greater than - /// padToIdeal(minimum_atom_size). That is, when it has so - /// much extra capacity, that we could fit a small new symbol in it, itself with - /// ideal_capacity or more. - /// - /// Ideal capacity is defined by size + (size / ideal_factor) - /// - /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that - /// overcapacity can be negative. A simple way to have negative overcapacity is to - /// allocate a fresh text block, which will have ideal capacity, and then grow it - /// by 1 byte. It will then have -1 overcapacity. - free_list: std.ArrayListUnmanaged(Atom.Index) = .{}, -}; - -const LazySymbolMetadata = struct { - const State = enum { unused, pending_flush, flushed }; - text_atom: Atom.Index = undefined, - rodata_atom: Atom.Index = undefined, - text_state: State = .unused, - rodata_state: State = .unused, -}; - -const DeclMetadata = struct { - atom: Atom.Index, - shdr: u16, - /// A list of all exports aliases of this Decl. - exports: std.ArrayListUnmanaged(u32) = .{}, - - fn getExport(m: DeclMetadata, elf_file: *const Elf, name: []const u8) ?u32 { - for (m.exports.items) |exp| { - if (mem.eql(u8, name, elf_file.getGlobalName(exp))) return exp; - } - return null; - } - - fn getExportPtr(m: *DeclMetadata, elf_file: *Elf, name: []const u8) ?*u32 { - for (m.exports.items) |*exp| { - if (mem.eql(u8, name, elf_file.getGlobalName(exp.*))) return exp; - } - return null; - } -}; - -base: File, +base: link.File, dwarf: ?Dwarf = null, ptr_width: PtrWidth, @@ -102,14 +6,25 @@ ptr_width: PtrWidth, /// If this is not null, an object file is created by LLVM and linked with LLD afterwards. llvm_object: ?*LlvmObject = null, +/// A list of all input files. +/// Index of each input file also encodes the priority or precedence of one input file +/// over another. +files: std.MultiArrayList(File.Entry) = .{}, +zig_module_index: ?File.Index = null, +linker_defined_index: ?File.Index = null, +objects: std.ArrayListUnmanaged(File.Index) = .{}, + /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. /// Same order as in the file. -sections: std.MultiArrayList(Section) = .{}, +shdrs: std.ArrayListUnmanaged(elf.Elf64_Shdr) = .{}, +/// Given index to a section, pulls index of containing phdr if any. +phdr_to_shdr_table: std.AutoHashMapUnmanaged(u16, u16) = .{}, +/// File offset into the shdr table. shdr_table_offset: ?u64 = null, /// Stored in native-endian format, depending on target endianness needs to be bswapped on read/write. /// Same order as in the file. -program_headers: std.ArrayListUnmanaged(elf.Elf64_Phdr) = .{}, +phdrs: std.ArrayListUnmanaged(elf.Elf64_Phdr) = .{}, /// The index into the program headers of the PT_PHDR program header phdr_table_index: ?u16 = null, /// The index into the program headers of the PT_LOAD program header containing the phdr @@ -128,17 +43,26 @@ phdr_load_rw_index: ?u16 = null, entry_addr: ?u64 = null, page_size: u32, +default_sym_version: elf.Elf64_Versym, /// .shstrtab buffer shstrtab: StringTable(.strtab) = .{}, /// .strtab buffer strtab: StringTable(.strtab) = .{}, -symtab_section_index: ?u16 = null, +/// Representation of the GOT table as committed to the file. +got: GotSection = .{}, + text_section_index: ?u16 = null, rodata_section_index: ?u16 = null, -got_section_index: ?u16 = null, data_section_index: ?u16 = null, +eh_frame_section_index: ?u16 = null, +eh_frame_hdr_section_index: ?u16 = null, +dynamic_section_index: ?u16 = null, +got_section_index: ?u16 = null, +got_plt_section_index: ?u16 = null, +plt_section_index: ?u16 = null, +rela_dyn_section_index: ?u16 = null, debug_info_section_index: ?u16 = null, debug_abbrev_section_index: ?u16 = null, debug_str_section_index: ?u16 = null, @@ -146,24 +70,36 @@ debug_aranges_section_index: ?u16 = null, debug_line_section_index: ?u16 = null, shstrtab_section_index: ?u16 = null, strtab_section_index: ?u16 = null, +symtab_section_index: ?u16 = null, -/// The same order as in the file. ELF requires global symbols to all be after the -/// local symbols, they cannot be mixed. So we must buffer all the global symbols and -/// write them at the end. These are only the local symbols. The length of this array -/// is the value used for sh_info in the .symtab section. -local_symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{}, -global_symbols: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{}, - -local_symbol_free_list: std.ArrayListUnmanaged(u32) = .{}, -global_symbol_free_list: std.ArrayListUnmanaged(u32) = .{}, - -got_table: TableSection(u32) = .{}, +// Linker-defined symbols +dynamic_index: ?Symbol.Index = null, +ehdr_start_index: ?Symbol.Index = null, +init_array_start_index: ?Symbol.Index = null, +init_array_end_index: ?Symbol.Index = null, +fini_array_start_index: ?Symbol.Index = null, +fini_array_end_index: ?Symbol.Index = null, +preinit_array_start_index: ?Symbol.Index = null, +preinit_array_end_index: ?Symbol.Index = null, +got_index: ?Symbol.Index = null, +plt_index: ?Symbol.Index = null, +end_index: ?Symbol.Index = null, +gnu_eh_frame_hdr_index: ?Symbol.Index = null, +dso_handle_index: ?Symbol.Index = null, +rela_iplt_start_index: ?Symbol.Index = null, +rela_iplt_end_index: ?Symbol.Index = null, +start_stop_indexes: std.ArrayListUnmanaged(u32) = .{}, + +/// An array of symbols parsed across all input files. +symbols: std.ArrayListUnmanaged(Symbol) = .{}, +symbols_extra: std.ArrayListUnmanaged(u32) = .{}, +resolver: std.AutoArrayHashMapUnmanaged(u32, Symbol.Index) = .{}, +symbols_free_list: std.ArrayListUnmanaged(Symbol.Index) = .{}, phdr_table_dirty: bool = false, shdr_table_dirty: bool = false, shstrtab_dirty: bool = false, strtab_dirty: bool = false, -got_table_count_dirty: bool = false, debug_strtab_dirty: bool = false, debug_abbrev_section_dirty: bool = false, @@ -171,7 +107,8 @@ debug_aranges_section_dirty: bool = false, debug_info_header_dirty: bool = false, debug_line_header_dirty: bool = false, -error_flags: File.ErrorFlags = File.ErrorFlags{}, +error_flags: link.File.ErrorFlags = link.File.ErrorFlags{}, +misc_errors: std.ArrayListUnmanaged(link.File.ErrorMsg) = .{}, /// Table of tracked LazySymbols. lazy_syms: LazySymbolTable = .{}, @@ -181,9 +118,8 @@ decls: std.AutoHashMapUnmanaged(Module.Decl.Index, DeclMetadata) = .{}, /// List of atoms that are owned directly by the linker. atoms: std.ArrayListUnmanaged(Atom) = .{}, - -/// Table of atoms indexed by the symbol index. -atom_by_index_table: std.AutoHashMapUnmanaged(u32, Atom.Index) = .{}, +/// Table of last atom index in a section and matching atom free list if any. +last_atom_and_free_list_table: std.AutoArrayHashMapUnmanaged(u16, LastAtomAndFreeList) = .{}, /// Table of unnamed constants associated with a parent `Decl`. /// We store them here so that we can free the constants whenever the `Decl` @@ -204,15 +140,13 @@ atom_by_index_table: std.AutoHashMapUnmanaged(u32, Atom.Index) = .{}, /// /// value assigned to label `foo` is an unnamed constant belonging/associated /// with `Decl` `main`, and lives as long as that `Decl`. -unnamed_const_atoms: UnnamedConstTable = .{}, +unnamed_consts: UnnamedConstTable = .{}, -/// A table of relocations indexed by the owning them `TextBlock`. -/// Note that once we refactor `TextBlock`'s lifetime and ownership rules, -/// this will be a table indexed by index into the list of Atoms. -relocs: RelocTable = .{}, +comdat_groups: std.ArrayListUnmanaged(ComdatGroup) = .{}, +comdat_groups_owners: std.ArrayListUnmanaged(ComdatGroupOwner) = .{}, +comdat_groups_table: std.AutoHashMapUnmanaged(u32, ComdatGroupOwner.Index) = .{}, -const RelocTable = std.AutoHashMapUnmanaged(Atom.Index, std.ArrayListUnmanaged(Atom.Reloc)); -const UnnamedConstTable = std.AutoHashMapUnmanaged(Module.Decl.Index, std.ArrayListUnmanaged(Atom.Index)); +const UnnamedConstTable = std.AutoHashMapUnmanaged(Module.Decl.Index, std.ArrayListUnmanaged(Symbol.Index)); const LazySymbolTable = std.AutoArrayHashMapUnmanaged(Module.Decl.OptionalIndex, LazySymbolMetadata); /// When allocating, the ideal_capacity is calculated by @@ -237,40 +171,34 @@ pub fn openPath(allocator: Allocator, sub_path: []const u8, options: link.Option const self = try createEmpty(allocator, options); errdefer self.base.destroy(); - const file = try options.emit.?.directory.handle.createFile(sub_path, .{ + self.base.file = try options.emit.?.directory.handle.createFile(sub_path, .{ .truncate = false, .read = true, .mode = link.determineMode(options), }); - self.base.file = file; self.shdr_table_dirty = true; // Index 0 is always a null symbol. - try self.local_symbols.append(allocator, .{ - .st_name = 0, - .st_info = 0, - .st_other = 0, - .st_shndx = 0, - .st_value = 0, - .st_size = 0, - }); - - // There must always be a null section in index 0 - try self.sections.append(allocator, .{ - .shdr = .{ - .sh_name = 0, - .sh_type = elf.SHT_NULL, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 0, - .sh_entsize = 0, - }, - .phdr_index = undefined, + try self.symbols.append(allocator, .{}); + // Index 0 is always a null symbol. + try self.symbols_extra.append(allocator, 0); + // Allocate atom index 0 to null atom + try self.atoms.append(allocator, .{}); + // Append null file at index 0 + try self.files.append(allocator, .null); + // There must always be a null shdr in index 0 + try self.shdrs.append(allocator, .{ + .sh_name = 0, + .sh_type = elf.SHT_NULL, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 0, + .sh_entsize = 0, }); try self.populateMissingMetadata(); @@ -292,6 +220,10 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Elf { .sparc64 => 0x2000, else => 0x1000, }; + const default_sym_version: elf.Elf64_Versym = if (options.output_mode == .Lib and options.link_mode == .Dynamic) + elf.VER_NDX_GLOBAL + else + elf.VER_NDX_LOCAL; var dwarf: ?Dwarf = if (!options.strip and options.module != null) Dwarf.init(gpa, &self.base, options.target) @@ -308,11 +240,13 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Elf { .dwarf = dwarf, .ptr_width = ptr_width, .page_size = page_size, + .default_sym_version = default_sym_version, }; const use_llvm = options.use_llvm; if (use_llvm) { self.llvm_object = try LlvmObject.create(gpa, options); } + return self; } @@ -321,19 +255,27 @@ pub fn deinit(self: *Elf) void { if (self.llvm_object) |llvm_object| llvm_object.destroy(gpa); - for (self.sections.items(.free_list)) |*free_list| { - free_list.deinit(gpa); - } - self.sections.deinit(gpa); + for (self.files.items(.tags), self.files.items(.data)) |tag, *data| switch (tag) { + .null => {}, + .zig_module => data.zig_module.deinit(gpa), + .linker_defined => data.linker_defined.deinit(gpa), + .object => data.object.deinit(gpa), + // .shared_object => data.shared_object.deinit(gpa), + }; + self.files.deinit(gpa); + self.objects.deinit(gpa); - self.program_headers.deinit(gpa); + self.shdrs.deinit(gpa); + self.phdr_to_shdr_table.deinit(gpa); + self.phdrs.deinit(gpa); self.shstrtab.deinit(gpa); self.strtab.deinit(gpa); - self.local_symbols.deinit(gpa); - self.global_symbols.deinit(gpa); - self.global_symbol_free_list.deinit(gpa); - self.local_symbol_free_list.deinit(gpa); - self.got_table.deinit(gpa); + self.symbols.deinit(gpa); + self.symbols_extra.deinit(gpa); + self.symbols_free_list.deinit(gpa); + self.got.deinit(gpa); + self.resolver.deinit(gpa); + self.start_stop_indexes.deinit(gpa); { var it = self.decls.iterator(); @@ -344,43 +286,41 @@ pub fn deinit(self: *Elf) void { } self.atoms.deinit(gpa); - self.atom_by_index_table.deinit(gpa); - self.lazy_syms.deinit(gpa); - - { - var it = self.unnamed_const_atoms.valueIterator(); - while (it.next()) |atoms| { - atoms.deinit(gpa); - } - self.unnamed_const_atoms.deinit(gpa); + for (self.last_atom_and_free_list_table.values()) |*value| { + value.free_list.deinit(gpa); } + self.last_atom_and_free_list_table.deinit(gpa); + self.lazy_syms.deinit(gpa); { - var it = self.relocs.valueIterator(); - while (it.next()) |relocs| { - relocs.deinit(gpa); + var it = self.unnamed_consts.valueIterator(); + while (it.next()) |syms| { + syms.deinit(gpa); } - self.relocs.deinit(gpa); + self.unnamed_consts.deinit(gpa); } if (self.dwarf) |*dw| { dw.deinit(); } + + self.misc_errors.deinit(gpa); + self.comdat_groups.deinit(gpa); + self.comdat_groups_owners.deinit(gpa); + self.comdat_groups_table.deinit(gpa); } -pub fn getDeclVAddr(self: *Elf, decl_index: Module.Decl.Index, reloc_info: File.RelocInfo) !u64 { +pub fn getDeclVAddr(self: *Elf, decl_index: Module.Decl.Index, reloc_info: link.File.RelocInfo) !u64 { assert(self.llvm_object == null); - const this_atom_index = try self.getOrCreateAtomForDecl(decl_index); - const this_atom = self.getAtom(this_atom_index); - const target = this_atom.getSymbolIndex().?; - const vaddr = this_atom.getSymbol(self).st_value; - const atom_index = self.getAtomIndexForSymbol(reloc_info.parent_atom_index).?; - try Atom.addRelocation(self, atom_index, .{ - .target = target, - .offset = reloc_info.offset, - .addend = reloc_info.addend, - .prev_vaddr = vaddr, + const this_sym_index = try self.getOrCreateMetadataForDecl(decl_index); + const this_sym = self.symbol(this_sym_index); + const vaddr = this_sym.value; + const parent_atom = self.symbol(reloc_info.parent_atom_index).atom(self).?; + try parent_atom.addReloc(self, .{ + .r_offset = reloc_info.offset, + .r_info = (@as(u64, @intCast(this_sym.esym_index)) << 32) | elf.R_X86_64_64, + .r_addend = reloc_info.addend, }); return vaddr; @@ -397,7 +337,7 @@ fn detectAllocCollision(self: *Elf, start: u64, size: u64) ?u64 { if (self.shdr_table_offset) |off| { const shdr_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Shdr) else @sizeOf(elf.Elf64_Shdr); - const tight_size = self.sections.slice().len * shdr_size; + const tight_size = self.shdrs.items.len * shdr_size; const increased_size = padToIdeal(tight_size); const test_end = off + increased_size; if (end > off and start < test_end) { @@ -405,17 +345,17 @@ fn detectAllocCollision(self: *Elf, start: u64, size: u64) ?u64 { } } - for (self.sections.items(.shdr)) |section| { + for (self.shdrs.items) |section| { const increased_size = padToIdeal(section.sh_size); const test_end = section.sh_offset + increased_size; if (end > section.sh_offset and start < test_end) { return test_end; } } - for (self.program_headers.items) |program_header| { - const increased_size = padToIdeal(program_header.p_filesz); - const test_end = program_header.p_offset + increased_size; - if (end > program_header.p_offset and start < test_end) { + for (self.phdrs.items) |phdr| { + const increased_size = padToIdeal(phdr.p_filesz); + const test_end = phdr.p_offset + increased_size; + if (end > phdr.p_offset and start < test_end) { return test_end; } } @@ -429,13 +369,13 @@ pub fn allocatedSize(self: *Elf, start: u64) u64 { if (self.shdr_table_offset) |off| { if (off > start and off < min_pos) min_pos = off; } - for (self.sections.items(.shdr)) |section| { + for (self.shdrs.items) |section| { if (section.sh_offset <= start) continue; if (section.sh_offset < min_pos) min_pos = section.sh_offset; } - for (self.program_headers.items) |program_header| { - if (program_header.p_offset <= start) continue; - if (program_header.p_offset < min_pos) min_pos = program_header.p_offset; + for (self.phdrs.items) |phdr| { + if (phdr.p_offset <= start) continue; + if (phdr.p_offset < min_pos) min_pos = phdr.p_offset; } return min_pos - start; } @@ -457,19 +397,20 @@ pub fn populateMissingMetadata(self: *Elf) !void { .p64 => false, }; const ptr_size: u8 = self.ptrWidthBytes(); + const image_base = self.calcImageBase(); if (self.phdr_table_index == null) { - self.phdr_table_index = @as(u16, @intCast(self.program_headers.items.len)); + self.phdr_table_index = @as(u16, @intCast(self.phdrs.items.len)); const p_align: u16 = switch (self.ptr_width) { .p32 => @alignOf(elf.Elf32_Phdr), .p64 => @alignOf(elf.Elf64_Phdr), }; - try self.program_headers.append(gpa, .{ + try self.phdrs.append(gpa, .{ .p_type = elf.PT_PHDR, .p_offset = 0, .p_filesz = 0, - .p_vaddr = 0, - .p_paddr = 0, + .p_vaddr = image_base, + .p_paddr = image_base, .p_memsz = 0, .p_align = p_align, .p_flags = elf.PF_R, @@ -478,31 +419,29 @@ pub fn populateMissingMetadata(self: *Elf) !void { } if (self.phdr_table_load_index == null) { - self.phdr_table_load_index = @as(u16, @intCast(self.program_headers.items.len)); + self.phdr_table_load_index = @as(u16, @intCast(self.phdrs.items.len)); // TODO Same as for GOT - const phdr_addr: u64 = if (self.base.options.target.ptrBitWidth() >= 32) 0x1000000 else 0x1000; - const p_align = self.page_size; - try self.program_headers.append(gpa, .{ + try self.phdrs.append(gpa, .{ .p_type = elf.PT_LOAD, .p_offset = 0, .p_filesz = 0, - .p_vaddr = phdr_addr, - .p_paddr = phdr_addr, + .p_vaddr = image_base, + .p_paddr = image_base, .p_memsz = 0, - .p_align = p_align, + .p_align = self.page_size, .p_flags = elf.PF_R, }); self.phdr_table_dirty = true; } if (self.phdr_load_re_index == null) { - self.phdr_load_re_index = @as(u16, @intCast(self.program_headers.items.len)); + self.phdr_load_re_index = @as(u16, @intCast(self.phdrs.items.len)); const file_size = self.base.options.program_code_size_hint; const p_align = self.page_size; const off = self.findFreeSpace(file_size, p_align); log.debug("found PT_LOAD RE free space 0x{x} to 0x{x}", .{ off, off + file_size }); - const entry_addr: u64 = self.entry_addr orelse if (self.base.options.target.cpu.arch == .spu_2) @as(u64, 0) else default_entry_addr; - try self.program_headers.append(gpa, .{ + const entry_addr = self.defaultEntryAddress(); + try self.phdrs.append(gpa, .{ .p_type = elf.PT_LOAD, .p_offset = off, .p_filesz = file_size, @@ -517,7 +456,7 @@ pub fn populateMissingMetadata(self: *Elf) !void { } if (self.phdr_got_index == null) { - self.phdr_got_index = @as(u16, @intCast(self.program_headers.items.len)); + self.phdr_got_index = @as(u16, @intCast(self.phdrs.items.len)); const file_size = @as(u64, ptr_size) * self.base.options.symbol_count_hint; // We really only need ptr alignment but since we are using PROGBITS, linux requires // page align. @@ -528,7 +467,7 @@ pub fn populateMissingMetadata(self: *Elf) !void { // we'll need to re-use that function anyway, in case the GOT grows and overlaps something // else in virtual memory. const got_addr: u32 = if (self.base.options.target.ptrBitWidth() >= 32) 0x4000000 else 0x8000; - try self.program_headers.append(gpa, .{ + try self.phdrs.append(gpa, .{ .p_type = elf.PT_LOAD, .p_offset = off, .p_filesz = file_size, @@ -542,7 +481,7 @@ pub fn populateMissingMetadata(self: *Elf) !void { } if (self.phdr_load_ro_index == null) { - self.phdr_load_ro_index = @as(u16, @intCast(self.program_headers.items.len)); + self.phdr_load_ro_index = @as(u16, @intCast(self.phdrs.items.len)); // TODO Find a hint about how much data need to be in rodata ? const file_size = 1024; // Same reason as for GOT @@ -551,7 +490,7 @@ pub fn populateMissingMetadata(self: *Elf) !void { log.debug("found PT_LOAD RO free space 0x{x} to 0x{x}", .{ off, off + file_size }); // TODO Same as for GOT const rodata_addr: u32 = if (self.base.options.target.ptrBitWidth() >= 32) 0xc000000 else 0xa000; - try self.program_headers.append(gpa, .{ + try self.phdrs.append(gpa, .{ .p_type = elf.PT_LOAD, .p_offset = off, .p_filesz = file_size, @@ -565,7 +504,7 @@ pub fn populateMissingMetadata(self: *Elf) !void { } if (self.phdr_load_rw_index == null) { - self.phdr_load_rw_index = @as(u16, @intCast(self.program_headers.items.len)); + self.phdr_load_rw_index = @as(u16, @intCast(self.phdrs.items.len)); // TODO Find a hint about how much data need to be in data ? const file_size = 1024; // Same reason as for GOT @@ -574,7 +513,7 @@ pub fn populateMissingMetadata(self: *Elf) !void { log.debug("found PT_LOAD RW free space 0x{x} to 0x{x}", .{ off, off + file_size }); // TODO Same as for GOT const rwdata_addr: u32 = if (self.base.options.target.ptrBitWidth() >= 32) 0x10000000 else 0xc000; - try self.program_headers.append(gpa, .{ + try self.phdrs.append(gpa, .{ .p_type = elf.PT_LOAD, .p_offset = off, .p_filesz = file_size, @@ -588,197 +527,174 @@ pub fn populateMissingMetadata(self: *Elf) !void { } if (self.shstrtab_section_index == null) { - self.shstrtab_section_index = @as(u16, @intCast(self.sections.slice().len)); + self.shstrtab_section_index = @as(u16, @intCast(self.shdrs.items.len)); assert(self.shstrtab.buffer.items.len == 0); try self.shstrtab.buffer.append(gpa, 0); // need a 0 at position 0 const off = self.findFreeSpace(self.shstrtab.buffer.items.len, 1); log.debug("found .shstrtab free space 0x{x} to 0x{x}", .{ off, off + self.shstrtab.buffer.items.len }); - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".shstrtab"), - .sh_type = elf.SHT_STRTAB, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = off, - .sh_size = self.shstrtab.buffer.items.len, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = 0, - }, - .phdr_index = undefined, + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".shstrtab"), + .sh_type = elf.SHT_STRTAB, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = off, + .sh_size = self.shstrtab.buffer.items.len, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, }); self.shstrtab_dirty = true; self.shdr_table_dirty = true; } if (self.strtab_section_index == null) { - self.strtab_section_index = @as(u16, @intCast(self.sections.slice().len)); + self.strtab_section_index = @as(u16, @intCast(self.shdrs.items.len)); assert(self.strtab.buffer.items.len == 0); try self.strtab.buffer.append(gpa, 0); // need a 0 at position 0 const off = self.findFreeSpace(self.strtab.buffer.items.len, 1); log.debug("found .strtab free space 0x{x} to 0x{x}", .{ off, off + self.strtab.buffer.items.len }); - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".strtab"), - .sh_type = elf.SHT_STRTAB, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = off, - .sh_size = self.strtab.buffer.items.len, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = 0, - }, - .phdr_index = undefined, + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".strtab"), + .sh_type = elf.SHT_STRTAB, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = off, + .sh_size = self.strtab.buffer.items.len, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, }); self.strtab_dirty = true; self.shdr_table_dirty = true; } if (self.text_section_index == null) { - self.text_section_index = @as(u16, @intCast(self.sections.slice().len)); - const phdr = &self.program_headers.items[self.phdr_load_re_index.?]; - - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".text"), - .sh_type = elf.SHT_PROGBITS, - .sh_flags = elf.SHF_ALLOC | elf.SHF_EXECINSTR, - .sh_addr = phdr.p_vaddr, - .sh_offset = phdr.p_offset, - .sh_size = phdr.p_filesz, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = 0, - }, - .phdr_index = self.phdr_load_re_index.?, + self.text_section_index = @as(u16, @intCast(self.shdrs.items.len)); + const phdr = &self.phdrs.items[self.phdr_load_re_index.?]; + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".text"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = elf.SHF_ALLOC | elf.SHF_EXECINSTR, + .sh_addr = phdr.p_vaddr, + .sh_offset = phdr.p_offset, + .sh_size = phdr.p_filesz, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, }); + try self.phdr_to_shdr_table.putNoClobber(gpa, self.text_section_index.?, self.phdr_load_re_index.?); + try self.last_atom_and_free_list_table.putNoClobber(gpa, self.text_section_index.?, .{}); self.shdr_table_dirty = true; } if (self.got_section_index == null) { - self.got_section_index = @as(u16, @intCast(self.sections.slice().len)); - const phdr = &self.program_headers.items[self.phdr_got_index.?]; - - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".got"), - .sh_type = elf.SHT_PROGBITS, - .sh_flags = elf.SHF_ALLOC, - .sh_addr = phdr.p_vaddr, - .sh_offset = phdr.p_offset, - .sh_size = phdr.p_filesz, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = @as(u16, ptr_size), - .sh_entsize = 0, - }, - .phdr_index = self.phdr_got_index.?, + self.got_section_index = @as(u16, @intCast(self.shdrs.items.len)); + const phdr = &self.phdrs.items[self.phdr_got_index.?]; + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".got"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = elf.SHF_ALLOC, + .sh_addr = phdr.p_vaddr, + .sh_offset = phdr.p_offset, + .sh_size = phdr.p_filesz, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = @as(u16, ptr_size), + .sh_entsize = 0, }); + try self.phdr_to_shdr_table.putNoClobber(gpa, self.got_section_index.?, self.phdr_got_index.?); self.shdr_table_dirty = true; } if (self.rodata_section_index == null) { - self.rodata_section_index = @as(u16, @intCast(self.sections.slice().len)); - const phdr = &self.program_headers.items[self.phdr_load_ro_index.?]; - - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".rodata"), - .sh_type = elf.SHT_PROGBITS, - .sh_flags = elf.SHF_ALLOC, - .sh_addr = phdr.p_vaddr, - .sh_offset = phdr.p_offset, - .sh_size = phdr.p_filesz, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = 0, - }, - .phdr_index = self.phdr_load_ro_index.?, + self.rodata_section_index = @as(u16, @intCast(self.shdrs.items.len)); + const phdr = &self.phdrs.items[self.phdr_load_ro_index.?]; + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".rodata"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = elf.SHF_ALLOC, + .sh_addr = phdr.p_vaddr, + .sh_offset = phdr.p_offset, + .sh_size = phdr.p_filesz, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 0, }); + try self.phdr_to_shdr_table.putNoClobber(gpa, self.rodata_section_index.?, self.phdr_load_ro_index.?); + try self.last_atom_and_free_list_table.putNoClobber(gpa, self.rodata_section_index.?, .{}); self.shdr_table_dirty = true; } if (self.data_section_index == null) { - self.data_section_index = @as(u16, @intCast(self.sections.slice().len)); - const phdr = &self.program_headers.items[self.phdr_load_rw_index.?]; - - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".data"), - .sh_type = elf.SHT_PROGBITS, - .sh_flags = elf.SHF_WRITE | elf.SHF_ALLOC, - .sh_addr = phdr.p_vaddr, - .sh_offset = phdr.p_offset, - .sh_size = phdr.p_filesz, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = @as(u16, ptr_size), - .sh_entsize = 0, - }, - .phdr_index = self.phdr_load_rw_index.?, + self.data_section_index = @as(u16, @intCast(self.shdrs.items.len)); + const phdr = &self.phdrs.items[self.phdr_load_rw_index.?]; + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".data"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = elf.SHF_WRITE | elf.SHF_ALLOC, + .sh_addr = phdr.p_vaddr, + .sh_offset = phdr.p_offset, + .sh_size = phdr.p_filesz, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = @as(u16, ptr_size), + .sh_entsize = 0, }); + try self.phdr_to_shdr_table.putNoClobber(gpa, self.data_section_index.?, self.phdr_load_rw_index.?); + try self.last_atom_and_free_list_table.putNoClobber(gpa, self.data_section_index.?, .{}); self.shdr_table_dirty = true; } if (self.symtab_section_index == null) { - self.symtab_section_index = @as(u16, @intCast(self.sections.slice().len)); + self.symtab_section_index = @as(u16, @intCast(self.shdrs.items.len)); const min_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); const each_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym); const file_size = self.base.options.symbol_count_hint * each_size; const off = self.findFreeSpace(file_size, min_align); log.debug("found symtab free space 0x{x} to 0x{x}", .{ off, off + file_size }); - - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".symtab"), - .sh_type = elf.SHT_SYMTAB, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = off, - .sh_size = file_size, - // The section header index of the associated string table. - .sh_link = self.strtab_section_index.?, - .sh_info = @as(u32, @intCast(self.local_symbols.items.len)), - .sh_addralign = min_align, - .sh_entsize = each_size, - }, - .phdr_index = undefined, + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".symtab"), + .sh_type = elf.SHT_SYMTAB, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = off, + .sh_size = file_size, + // The section header index of the associated string table. + .sh_link = self.strtab_section_index.?, + .sh_info = @as(u32, @intCast(self.symbols.items.len)), + .sh_addralign = min_align, + .sh_entsize = each_size, }); self.shdr_table_dirty = true; } if (self.dwarf) |*dw| { if (self.debug_str_section_index == null) { - self.debug_str_section_index = @as(u16, @intCast(self.sections.slice().len)); + self.debug_str_section_index = @as(u16, @intCast(self.shdrs.items.len)); assert(dw.strtab.buffer.items.len == 0); try dw.strtab.buffer.append(gpa, 0); - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".debug_str"), - .sh_type = elf.SHT_PROGBITS, - .sh_flags = elf.SHF_MERGE | elf.SHF_STRINGS, - .sh_addr = 0, - .sh_offset = 0, - .sh_size = 0, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = 1, - .sh_entsize = 1, - }, - .phdr_index = undefined, + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".debug_str"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = elf.SHF_MERGE | elf.SHF_STRINGS, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = 0, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = 1, + .sh_entsize = 1, }); self.debug_strtab_dirty = true; self.shdr_table_dirty = true; } if (self.debug_info_section_index == null) { - self.debug_info_section_index = @as(u16, @intCast(self.sections.slice().len)); - + self.debug_info_section_index = @as(u16, @intCast(self.shdrs.items.len)); const file_size_hint = 200; const p_align = 1; const off = self.findFreeSpace(file_size_hint, p_align); @@ -786,28 +702,24 @@ pub fn populateMissingMetadata(self: *Elf) !void { off, off + file_size_hint, }); - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".debug_info"), - .sh_type = elf.SHT_PROGBITS, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = off, - .sh_size = file_size_hint, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = p_align, - .sh_entsize = 0, - }, - .phdr_index = undefined, + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".debug_info"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = off, + .sh_size = file_size_hint, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = p_align, + .sh_entsize = 0, }); self.shdr_table_dirty = true; self.debug_info_header_dirty = true; } if (self.debug_abbrev_section_index == null) { - self.debug_abbrev_section_index = @as(u16, @intCast(self.sections.slice().len)); - + self.debug_abbrev_section_index = @as(u16, @intCast(self.shdrs.items.len)); const file_size_hint = 128; const p_align = 1; const off = self.findFreeSpace(file_size_hint, p_align); @@ -815,28 +727,24 @@ pub fn populateMissingMetadata(self: *Elf) !void { off, off + file_size_hint, }); - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".debug_abbrev"), - .sh_type = elf.SHT_PROGBITS, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = off, - .sh_size = file_size_hint, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = p_align, - .sh_entsize = 0, - }, - .phdr_index = undefined, + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".debug_abbrev"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = off, + .sh_size = file_size_hint, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = p_align, + .sh_entsize = 0, }); self.shdr_table_dirty = true; self.debug_abbrev_section_dirty = true; } if (self.debug_aranges_section_index == null) { - self.debug_aranges_section_index = @as(u16, @intCast(self.sections.slice().len)); - + self.debug_aranges_section_index = @as(u16, @intCast(self.shdrs.items.len)); const file_size_hint = 160; const p_align = 16; const off = self.findFreeSpace(file_size_hint, p_align); @@ -844,28 +752,24 @@ pub fn populateMissingMetadata(self: *Elf) !void { off, off + file_size_hint, }); - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".debug_aranges"), - .sh_type = elf.SHT_PROGBITS, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = off, - .sh_size = file_size_hint, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = p_align, - .sh_entsize = 0, - }, - .phdr_index = undefined, + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".debug_aranges"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = off, + .sh_size = file_size_hint, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = p_align, + .sh_entsize = 0, }); self.shdr_table_dirty = true; self.debug_aranges_section_dirty = true; } if (self.debug_line_section_index == null) { - self.debug_line_section_index = @as(u16, @intCast(self.sections.slice().len)); - + self.debug_line_section_index = @as(u16, @intCast(self.shdrs.items.len)); const file_size_hint = 250; const p_align = 1; const off = self.findFreeSpace(file_size_hint, p_align); @@ -873,20 +777,17 @@ pub fn populateMissingMetadata(self: *Elf) !void { off, off + file_size_hint, }); - try self.sections.append(gpa, .{ - .shdr = .{ - .sh_name = try self.shstrtab.insert(gpa, ".debug_line"), - .sh_type = elf.SHT_PROGBITS, - .sh_flags = 0, - .sh_addr = 0, - .sh_offset = off, - .sh_size = file_size_hint, - .sh_link = 0, - .sh_info = 0, - .sh_addralign = p_align, - .sh_entsize = 0, - }, - .phdr_index = undefined, + try self.shdrs.append(gpa, .{ + .sh_name = try self.shstrtab.insert(gpa, ".debug_line"), + .sh_type = elf.SHT_PROGBITS, + .sh_flags = 0, + .sh_addr = 0, + .sh_offset = off, + .sh_size = file_size_hint, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = p_align, + .sh_entsize = 0, }); self.shdr_table_dirty = true; self.debug_line_header_dirty = true; @@ -902,13 +803,13 @@ pub fn populateMissingMetadata(self: *Elf) !void { .p64 => @alignOf(elf.Elf64_Shdr), }; if (self.shdr_table_offset == null) { - self.shdr_table_offset = self.findFreeSpace(self.sections.slice().len * shsize, shalign); + self.shdr_table_offset = self.findFreeSpace(self.shdrs.items.len * shsize, shalign); self.shdr_table_dirty = true; } { // Iterate over symbols, populating free_list and last_text_block. - if (self.local_symbols.items.len != 1) { + if (self.symbols.items.len != 1) { @panic("TODO implement setting up free_list and last_text_block from existing ELF file"); } // We are starting with an empty file. The default values are correct, null and empty list. @@ -920,7 +821,7 @@ pub fn populateMissingMetadata(self: *Elf) !void { // offset + it's filesize. var max_file_offset: u64 = 0; - for (self.sections.items(.shdr)) |shdr| { + for (self.shdrs.items) |shdr| { if (shdr.sh_offset + shdr.sh_size > max_file_offset) { max_file_offset = shdr.sh_offset + shdr.sh_size; } @@ -928,25 +829,47 @@ pub fn populateMissingMetadata(self: *Elf) !void { try self.base.file.?.pwriteAll(&[_]u8{0}, max_file_offset); } + + if (self.base.options.module) |module| { + if (self.zig_module_index == null) { + const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); + self.files.set(index, .{ .zig_module = .{ + .index = index, + .path = module.main_pkg.root_src_path, + } }); + self.zig_module_index = index; + const zig_module = self.file(index).?.zig_module; + + const name_off = try self.strtab.insert(gpa, std.fs.path.stem(module.main_pkg.root_src_path)); + const symbol_index = try self.addSymbol(); + try zig_module.local_symbols.append(gpa, symbol_index); + const symbol_ptr = self.symbol(symbol_index); + symbol_ptr.file_index = zig_module.index; + symbol_ptr.name_offset = name_off; + + const esym_index = try zig_module.addLocalEsym(gpa); + const esym = &zig_module.local_esyms.items[esym_index]; + esym.st_name = name_off; + esym.st_info |= elf.STT_FILE; + esym.st_shndx = elf.SHN_ABS; + symbol_ptr.esym_index = esym_index; + } + } } -fn growAllocSection(self: *Elf, shdr_index: u16, needed_size: u64) !void { +pub fn growAllocSection(self: *Elf, shdr_index: u16, needed_size: u64) !void { // TODO Also detect virtual address collisions. - const shdr = &self.sections.items(.shdr)[shdr_index]; - const phdr_index = self.sections.items(.phdr_index)[shdr_index]; - const phdr = &self.program_headers.items[phdr_index]; - const maybe_last_atom_index = self.sections.items(.last_atom_index)[shdr_index]; + const shdr = &self.shdrs.items[shdr_index]; + const phdr_index = self.phdr_to_shdr_table.get(shdr_index).?; + const phdr = &self.phdrs.items[phdr_index]; if (needed_size > self.allocatedSize(shdr.sh_offset)) { // Must move the entire section. const new_offset = self.findFreeSpace(needed_size, self.page_size); - const existing_size = if (maybe_last_atom_index) |last_atom_index| blk: { - const last = self.getAtom(last_atom_index); - const sym = last.getSymbol(self); - break :blk (sym.st_value + sym.st_size) - phdr.p_vaddr; - } else if (shdr_index == self.got_section_index.?) blk: { - break :blk shdr.sh_size; - } else 0; + const existing_size = if (self.last_atom_and_free_list_table.get(shdr_index)) |meta| blk: { + const last = self.atom(meta.last_atom_index) orelse break :blk 0; + break :blk (last.value + last.size) - phdr.p_vaddr; + } else shdr.sh_size; shdr.sh_size = 0; log.debug("new '{?s}' file offset 0x{x} to 0x{x}", .{ @@ -976,7 +899,7 @@ pub fn growNonAllocSection( min_alignment: u32, requires_file_copy: bool, ) !void { - const shdr = &self.sections.items(.shdr)[shdr_index]; + const shdr = &self.shdrs.items[shdr_index]; if (needed_size > self.allocatedSize(shdr.sh_offset)) { const existing_size = if (self.symtab_section_index.? == shdr_index) blk: { @@ -989,7 +912,12 @@ pub fn growNonAllocSection( shdr.sh_size = 0; // Move all the symbols to a new file location. const new_offset = self.findFreeSpace(needed_size, min_alignment); - log.debug("moving '{?s}' from 0x{x} to 0x{x}", .{ self.shstrtab.get(shdr.sh_name), shdr.sh_offset, new_offset }); + + log.debug("moving '{?s}' from 0x{x} to 0x{x}", .{ + self.shstrtab.get(shdr.sh_name), + shdr.sh_offset, + new_offset, + }); if (requires_file_copy) { const amt = try self.base.file.?.copyRangeAll( @@ -1065,21 +993,50 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node // corresponds to the Zig source code. const module = self.base.options.module orelse return error.LinkingWithoutZigSourceUnimplemented; + const compiler_rt_path: ?[]const u8 = blk: { + if (comp.compiler_rt_lib) |x| break :blk x.full_object_path; + if (comp.compiler_rt_obj) |x| break :blk x.full_object_path; + break :blk null; + }; + _ = compiler_rt_path; + + // Here we will parse input positional and library files (if referenced). + // This will roughly match in any linker backend we support. + var positionals = std.ArrayList(Compilation.LinkObject).init(gpa); + defer positionals.deinit(); + try positionals.ensureUnusedCapacity(self.base.options.objects.len); + positionals.appendSliceAssumeCapacity(self.base.options.objects); + + // This is a set of object files emitted by clang in a single `build-exe` invocation. + // For instance, the implicit `a.o` as compiled by `zig build-exe a.c` will end up + // in this set. + for (comp.c_object_table.keys()) |key| { + try positionals.append(.{ .path = key.status.success.object_path }); + } + + for (positionals.items) |obj| { + const in_file = try std.fs.cwd().openFile(obj.path, .{}); + defer in_file.close(); + + var parse_ctx: ParseErrorCtx = .{ .detected_cpu_arch = undefined }; + self.parsePositional(in_file, obj.path, obj.must_link, &parse_ctx) catch |err| + try self.handleAndReportParseError(obj.path, err, &parse_ctx); + } + + // Handle any lazy symbols that were emitted by incremental compilation. if (self.lazy_syms.getPtr(.none)) |metadata| { // Most lazy symbols can be updated on first use, but // anyerror needs to wait for everything to be flushed. - if (metadata.text_state != .unused) self.updateLazySymbolAtom( - File.LazySymbol.initDecl(.code, null, module), - metadata.text_atom, - self.text_section_index.?, + if (metadata.text_state != .unused) self.updateLazySymbol( + link.File.LazySymbol.initDecl(.code, null, module), + metadata.text_symbol_index, ) catch |err| return switch (err) { error.CodegenFail => error.FlushFailure, else => |e| e, }; - if (metadata.rodata_state != .unused) self.updateLazySymbolAtom( - File.LazySymbol.initDecl(.const_data, null, module), - metadata.rodata_atom, - self.rodata_section_index.?, + if (metadata.rodata_state != .unused) self.updateLazySymbol( + link.File.LazySymbol.initDecl(.const_data, null, module), + metadata.rodata_symbol_index, ) catch |err| return switch (err) { error.CodegenFail => error.FlushFailure, else => |e| e, @@ -1097,46 +1054,59 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node try dw.flushModule(module); } - { - var it = self.relocs.iterator(); - while (it.next()) |entry| { - const atom_index = entry.key_ptr.*; - const relocs = entry.value_ptr.*; - const atom = self.getAtom(atom_index); - const source_sym = atom.getSymbol(self); - const source_shdr = self.sections.items(.shdr)[source_sym.st_shndx]; - - log.debug("relocating '{?s}'", .{self.strtab.get(source_sym.st_name)}); - - for (relocs.items) |*reloc| { - const target_sym = self.local_symbols.items[reloc.target]; - const target_vaddr = target_sym.st_value + reloc.addend; - - if (target_vaddr == reloc.prev_vaddr) continue; - - const section_offset = (source_sym.st_value + reloc.offset) - source_shdr.sh_addr; - const file_offset = source_shdr.sh_offset + section_offset; - - log.debug(" ({x}: [() => 0x{x}] ({?s}))", .{ - reloc.offset, - target_vaddr, - self.strtab.get(target_sym.st_name), - }); - - switch (self.ptr_width) { - .p32 => try self.base.file.?.pwriteAll(mem.asBytes(&@as(u32, @intCast(target_vaddr))), file_offset), - .p64 => try self.base.file.?.pwriteAll(mem.asBytes(&target_vaddr), file_offset), - } - - reloc.prev_vaddr = target_vaddr; - } - } - } - - try self.writeSymbols(); - + // If we haven't already, create a linker-generated input file comprising of + // linker-defined synthetic symbols only such as `_DYNAMIC`, etc. + if (self.linker_defined_index == null) { + const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); + self.files.set(index, .{ .linker_defined = .{ .index = index } }); + self.linker_defined_index = index; + } + try self.addLinkerDefinedSymbols(); + + // Now, we are ready to resolve the symbols across all input files. + // We will first resolve the files in the ZigModule, next in the parsed + // input Object files. + // Any qualifing unresolved symbol will be upgraded to an absolute, weak + // symbol for potential resolution at load-time. + self.resolveSymbols(); + self.markImportsExports(); + self.claimUnresolved(); + + // Scan and create missing synthetic entries such as GOT indirection. + try self.scanRelocs(); + + // Allocate atoms parsed from input object files, followed by allocating + // linker-defined synthetic symbols. + try self.allocateObjects(); + self.allocateLinkerDefinedSymbols(); + + // Beyond this point, everything has been allocated a virtual address and we can resolve + // the relocations, and commit objects to file. + if (self.zig_module_index) |index| { + for (self.file(index).?.zig_module.atoms.keys()) |atom_index| { + const atom_ptr = self.atom(atom_index).?; + if (!atom_ptr.alive) continue; + const shdr = &self.shdrs.items[atom_ptr.output_section_index]; + const file_offset = shdr.sh_offset + atom_ptr.value - shdr.sh_addr; + const size = math.cast(usize, atom_ptr.size) orelse return error.Overflow; + const code = try gpa.alloc(u8, size); + defer gpa.free(code); + const amt = try self.base.file.?.preadAll(code, file_offset); + if (amt != code.len) return error.InputOutput; + try atom_ptr.resolveRelocs(self, code); + try self.base.file.?.pwriteAll(code, file_offset); + } + } + try self.writeObjects(); + + // Generate and emit the symbol table. + try self.updateSymtabSize(); + try self.writeSymtab(); + + // Dump the state for easy debugging. + // State can be dumped via `--debug-log link_state`. if (build_options.enable_logging) { - self.logSymtab(); + state_log.debug("{}", .{self.dumpState()}); } if (self.dwarf) |*dw| { @@ -1144,7 +1114,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node try dw.writeDbgAbbrev(); if (!self.shdr_table_dirty) { // Then it won't get written with the others and we need to do it. - try self.writeSectHeader(self.debug_abbrev_section_index.?); + try self.writeShdr(self.debug_abbrev_section_index.?); } self.debug_abbrev_section_dirty = false; } @@ -1152,7 +1122,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node if (self.debug_info_header_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. - const text_phdr = &self.program_headers.items[self.phdr_load_re_index.?]; + const text_phdr = &self.phdrs.items[self.phdr_load_re_index.?]; const low_pc = text_phdr.p_vaddr; const high_pc = text_phdr.p_vaddr + text_phdr.p_memsz; try dw.writeDbgInfoHeader(module, low_pc, high_pc); @@ -1162,11 +1132,11 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node if (self.debug_aranges_section_dirty) { // Currently only one compilation unit is supported, so the address range is simply // identical to the main program header virtual address and memory size. - const text_phdr = &self.program_headers.items[self.phdr_load_re_index.?]; + const text_phdr = &self.phdrs.items[self.phdr_load_re_index.?]; try dw.writeDbgAranges(text_phdr.p_vaddr, text_phdr.p_memsz); if (!self.shdr_table_dirty) { // Then it won't get written with the others and we need to do it. - try self.writeSectHeader(self.debug_aranges_section_index.?); + try self.writeShdr(self.debug_aranges_section_index.?); } self.debug_aranges_section_dirty = false; } @@ -1184,11 +1154,11 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node }; const phdr_table_index = self.phdr_table_index.?; - const phdr_table = &self.program_headers.items[phdr_table_index]; - const phdr_table_load = &self.program_headers.items[self.phdr_table_load_index.?]; + const phdr_table = &self.phdrs.items[phdr_table_index]; + const phdr_table_load = &self.phdrs.items[self.phdr_table_load_index.?]; const allocated_size = self.allocatedSize(phdr_table.p_offset); - const needed_size = self.program_headers.items.len * phsize; + const needed_size = self.phdrs.items.len * phsize; if (needed_size > allocated_size) { phdr_table.p_offset = 0; // free the space @@ -1207,11 +1177,11 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node switch (self.ptr_width) { .p32 => { - const buf = try gpa.alloc(elf.Elf32_Phdr, self.program_headers.items.len); + const buf = try gpa.alloc(elf.Elf32_Phdr, self.phdrs.items.len); defer gpa.free(buf); for (buf, 0..) |*phdr, i| { - phdr.* = progHeaderTo32(self.program_headers.items[i]); + phdr.* = phdrTo32(self.phdrs.items[i]); if (foreign_endian) { mem.byteSwapAllFields(elf.Elf32_Phdr, phdr); } @@ -1219,11 +1189,11 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), phdr_table.p_offset); }, .p64 => { - const buf = try gpa.alloc(elf.Elf64_Phdr, self.program_headers.items.len); + const buf = try gpa.alloc(elf.Elf64_Phdr, self.phdrs.items.len); defer gpa.free(buf); for (buf, 0..) |*phdr, i| { - phdr.* = self.program_headers.items[i]; + phdr.* = self.phdrs.items[i]; if (foreign_endian) { mem.byteSwapAllFields(elf.Elf64_Phdr, phdr); } @@ -1241,9 +1211,9 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node { const shdr_index = self.shstrtab_section_index.?; - if (self.shstrtab_dirty or self.shstrtab.buffer.items.len != self.sections.items(.shdr)[shdr_index].sh_size) { + if (self.shstrtab_dirty or self.shstrtab.buffer.items.len != self.shdrs.items[shdr_index].sh_size) { try self.growNonAllocSection(shdr_index, self.shstrtab.buffer.items.len, 1, false); - const shstrtab_sect = self.sections.items(.shdr)[shdr_index]; + const shstrtab_sect = &self.shdrs.items[shdr_index]; try self.base.file.?.pwriteAll(self.shstrtab.buffer.items, shstrtab_sect.sh_offset); self.shstrtab_dirty = false; } @@ -1251,9 +1221,9 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node { const shdr_index = self.strtab_section_index.?; - if (self.strtab_dirty or self.strtab.buffer.items.len != self.sections.items(.shdr)[shdr_index].sh_size) { + if (self.strtab_dirty or self.strtab.buffer.items.len != self.shdrs.items[shdr_index].sh_size) { try self.growNonAllocSection(shdr_index, self.strtab.buffer.items.len, 1, false); - const strtab_sect = self.sections.items(.shdr)[shdr_index]; + const strtab_sect = self.shdrs.items[shdr_index]; try self.base.file.?.pwriteAll(self.strtab.buffer.items, strtab_sect.sh_offset); self.strtab_dirty = false; } @@ -1261,9 +1231,9 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node if (self.dwarf) |dwarf| { const shdr_index = self.debug_str_section_index.?; - if (self.debug_strtab_dirty or dwarf.strtab.buffer.items.len != self.sections.items(.shdr)[shdr_index].sh_size) { + if (self.debug_strtab_dirty or dwarf.strtab.buffer.items.len != self.shdrs.items[shdr_index].sh_size) { try self.growNonAllocSection(shdr_index, dwarf.strtab.buffer.items.len, 1, false); - const debug_strtab_sect = self.sections.items(.shdr)[shdr_index]; + const debug_strtab_sect = self.shdrs.items[shdr_index]; try self.base.file.?.pwriteAll(dwarf.strtab.buffer.items, debug_strtab_sect.sh_offset); self.debug_strtab_dirty = false; } @@ -1279,7 +1249,7 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node .p64 => @alignOf(elf.Elf64_Shdr), }; const allocated_size = self.allocatedSize(self.shdr_table_offset.?); - const needed_size = self.sections.slice().len * shsize; + const needed_size = self.shdrs.items.len * shsize; if (needed_size > allocated_size) { self.shdr_table_offset = null; // free the space @@ -1288,12 +1258,11 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node switch (self.ptr_width) { .p32 => { - const slice = self.sections.slice(); - const buf = try gpa.alloc(elf.Elf32_Shdr, slice.len); + const buf = try gpa.alloc(elf.Elf32_Shdr, self.shdrs.items.len); defer gpa.free(buf); for (buf, 0..) |*shdr, i| { - shdr.* = sectHeaderTo32(slice.items(.shdr)[i]); + shdr.* = shdrTo32(self.shdrs.items[i]); log.debug("writing section {?s}: {}", .{ self.shstrtab.get(shdr.sh_name), shdr.* }); if (foreign_endian) { mem.byteSwapAllFields(elf.Elf32_Shdr, shdr); @@ -1302,12 +1271,11 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), self.shdr_table_offset.?); }, .p64 => { - const slice = self.sections.slice(); - const buf = try gpa.alloc(elf.Elf64_Shdr, slice.len); + const buf = try gpa.alloc(elf.Elf64_Shdr, self.shdrs.items.len); defer gpa.free(buf); for (buf, 0..) |*shdr, i| { - shdr.* = slice.items(.shdr)[i]; + shdr.* = self.shdrs.items[i]; log.debug("writing section {?s}: {}", .{ self.shstrtab.get(shdr.sh_name), shdr.* }); if (foreign_endian) { mem.byteSwapAllFields(elf.Elf64_Shdr, shdr); @@ -1338,7 +1306,194 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node assert(!self.shstrtab_dirty); assert(!self.strtab_dirty); assert(!self.debug_strtab_dirty); - assert(!self.got_table_count_dirty); + assert(!self.got.dirty); +} + +const ParseError = error{ + UnknownFileType, + InvalidCpuArch, + OutOfMemory, + Overflow, + InputOutput, + EndOfStream, + FileSystem, + NotSupported, +} || std.os.SeekError || std.fs.File.OpenError || std.fs.File.ReadError; + +fn parsePositional( + self: *Elf, + in_file: std.fs.File, + path: []const u8, + must_link: bool, + ctx: *ParseErrorCtx, +) ParseError!void { + const tracy = trace(@src()); + defer tracy.end(); + _ = must_link; + + if (Object.isObject(in_file)) { + try self.parseObject(in_file, path, ctx); + } else return error.UnknownFileType; +} + +fn parseObject(self: *Elf, in_file: std.fs.File, path: []const u8, ctx: *ParseErrorCtx) ParseError!void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = self.base.allocator; + const data = try in_file.readToEndAlloc(gpa, std.math.maxInt(u32)); + const index = @as(File.Index, @intCast(try self.files.addOne(gpa))); + self.files.set(index, .{ .object = .{ + .path = path, + .data = data, + .index = index, + } }); + try self.objects.append(gpa, index); + + const object = self.file(index).?.object; + try object.parse(self); + + ctx.detected_cpu_arch = object.header.?.e_machine.toTargetCpuArch().?; + if (ctx.detected_cpu_arch != self.base.options.target.cpu.arch) return error.InvalidCpuArch; +} + +fn resolveSymbols(self: *Elf) void { + if (self.zig_module_index) |index| { + const zig_module = self.file(index).?.zig_module; + zig_module.resolveSymbols(self); + } + + for (self.objects.items) |index| { + const object = self.file(index).?.object; + object.resolveSymbols(self); + } +} + +fn markImportsExports(self: *Elf) void { + const mark = struct { + fn mark(elf_file: *Elf, file_index: File.Index) void { + for (elf_file.file(file_index).?.globals()) |global_index| { + const global = elf_file.symbol(global_index); + if (global.version_index == elf.VER_NDX_LOCAL) continue; + const file_ptr = global.file(elf_file) orelse continue; + const vis = @as(elf.STV, @enumFromInt(global.elfSym(elf_file).st_other)); + if (vis == .HIDDEN) continue; + // if (file == .shared and !global.isAbs(self)) { + // global.flags.import = true; + // continue; + // } + if (file_ptr.index() == file_index) { + global.flags.@"export" = true; + if (elf_file.isDynLib() and vis != .PROTECTED) { + global.flags.import = true; + } + } + } + } + }.mark; + + if (self.zig_module_index) |index| { + mark(self, index); + } + + for (self.objects.items) |index| { + mark(self, index); + } +} + +fn claimUnresolved(self: *Elf) void { + if (self.zig_module_index) |index| { + const zig_module = self.file(index).?.zig_module; + zig_module.claimUnresolved(self); + } + for (self.objects.items) |index| { + const object = self.file(index).?.object; + object.claimUnresolved(self); + } +} + +/// In scanRelocs we will go over all live atoms and scan their relocs. +/// This will help us work out what synthetics to emit, GOT indirection, etc. +/// This is also the point where we will report undefined symbols for any +/// alloc sections. +fn scanRelocs(self: *Elf) !void { + const gpa = self.base.allocator; + + var undefs = std.AutoHashMap(Symbol.Index, std.ArrayList(Atom.Index)).init(gpa); + defer { + var it = undefs.iterator(); + while (it.next()) |entry| { + entry.value_ptr.deinit(); + } + undefs.deinit(); + } + + if (self.zig_module_index) |index| { + const zig_module = self.file(index).?.zig_module; + try zig_module.scanRelocs(self, &undefs); + } + for (self.objects.items) |index| { + const object = self.file(index).?.object; + try object.scanRelocs(self, &undefs); + } + + try self.reportUndefined(&undefs); + + for (self.symbols.items) |*sym| { + if (sym.flags.needs_got) { + log.debug("'{s}' needs GOT", .{sym.name(self)}); + // TODO how can we tell we need to write it again, aka the entry is dirty? + const gop = try sym.getOrCreateGotEntry(self); + try self.got.writeEntry(self, gop.index); + } + } +} + +fn allocateObjects(self: *Elf) !void { + for (self.objects.items) |index| { + const object = self.file(index).?.object; + for (object.atoms.items) |atom_index| { + const atom_ptr = self.atom(atom_index) orelse continue; + if (!atom_ptr.alive) continue; + try atom_ptr.allocate(self); + } + + for (object.locals()) |local_index| { + const local = self.symbol(local_index); + const atom_ptr = local.atom(self) orelse continue; + if (!atom_ptr.alive) continue; + local.value = atom_ptr.value; + } + + for (object.globals()) |global_index| { + const global = self.symbol(global_index); + const atom_ptr = global.atom(self) orelse continue; + if (!atom_ptr.alive) continue; + if (global.file_index == index) { + global.value = atom_ptr.value; + } + } + } +} + +fn writeObjects(self: *Elf) !void { + const gpa = self.base.allocator; + + for (self.objects.items) |index| { + const object = self.file(index).?.object; + for (object.atoms.items) |atom_index| { + const atom_ptr = self.atom(atom_index) orelse continue; + if (!atom_ptr.alive) continue; + + const shdr = &self.shdrs.items[atom_ptr.output_section_index]; + const file_offset = shdr.sh_offset + atom_ptr.value - shdr.sh_addr; + const code = try atom_ptr.codeInObjectUncompressAlloc(self); + defer gpa.free(code); + + try atom_ptr.resolveRelocs(self, code); + try self.base.file.?.pwriteAll(code, file_offset); + } + } } fn linkWithLLD(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node) !void { @@ -1563,9 +1718,9 @@ fn linkWithLLD(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node) !v try argv.append(entry); } - for (self.base.options.force_undefined_symbols.keys()) |symbol| { + for (self.base.options.force_undefined_symbols.keys()) |sym| { try argv.append("-u"); - try argv.append(symbol); + try argv.append(sym); } switch (self.base.options.hash_style) { @@ -2092,7 +2247,7 @@ fn writeElfHeader(self: *Elf) !void { const e_entry = if (elf_type == .REL) 0 else self.entry_addr.?; - const phdr_table_offset = self.program_headers.items[self.phdr_table_index.?].p_offset; + const phdr_table_offset = self.phdrs.items[self.phdr_table_index.?].p_offset; switch (self.ptr_width) { .p32 => { mem.writeInt(u32, hdr_buf[index..][0..4], @as(u32, @intCast(e_entry)), endian); @@ -2139,7 +2294,7 @@ fn writeElfHeader(self: *Elf) !void { mem.writeInt(u16, hdr_buf[index..][0..2], e_phentsize, endian); index += 2; - const e_phnum = @as(u16, @intCast(self.program_headers.items.len)); + const e_phnum = @as(u16, @intCast(self.phdrs.items.len)); mem.writeInt(u16, hdr_buf[index..][0..2], e_phnum, endian); index += 2; @@ -2150,7 +2305,7 @@ fn writeElfHeader(self: *Elf) !void { mem.writeInt(u16, hdr_buf[index..][0..2], e_shentsize, endian); index += 2; - const e_shnum = @as(u16, @intCast(self.sections.slice().len)); + const e_shnum = @as(u16, @intCast(self.shdrs.items.len)); mem.writeInt(u16, hdr_buf[index..][0..2], e_shnum, endian); index += 2; @@ -2162,268 +2317,23 @@ fn writeElfHeader(self: *Elf) !void { try self.base.file.?.pwriteAll(hdr_buf[0..index], 0); } -fn freeAtom(self: *Elf, atom_index: Atom.Index) void { - const atom = self.getAtom(atom_index); - log.debug("freeAtom {d} ({s})", .{ atom_index, atom.getName(self) }); - - Atom.freeRelocations(self, atom_index); - - const gpa = self.base.allocator; - const shndx = atom.getSymbol(self).st_shndx; - const free_list = &self.sections.items(.free_list)[shndx]; - var already_have_free_list_node = false; - { - var i: usize = 0; - // TODO turn free_list into a hash map - while (i < free_list.items.len) { - if (free_list.items[i] == atom_index) { - _ = free_list.swapRemove(i); - continue; - } - if (free_list.items[i] == atom.prev_index) { - already_have_free_list_node = true; - } - i += 1; - } - } - - const maybe_last_atom_index = &self.sections.items(.last_atom_index)[shndx]; - if (maybe_last_atom_index.*) |last_atom_index| { - if (last_atom_index == atom_index) { - if (atom.prev_index) |prev_index| { - // TODO shrink the section size here - maybe_last_atom_index.* = prev_index; - } else { - maybe_last_atom_index.* = null; - } - } - } - - if (atom.prev_index) |prev_index| { - const prev = self.getAtomPtr(prev_index); - prev.next_index = atom.next_index; - - if (!already_have_free_list_node and prev.*.freeListEligible(self)) { - // The free list is heuristics, it doesn't have to be perfect, so we can - // ignore the OOM here. - free_list.append(gpa, prev_index) catch {}; - } - } else { - self.getAtomPtr(atom_index).prev_index = null; - } - - if (atom.next_index) |next_index| { - self.getAtomPtr(next_index).prev_index = atom.prev_index; - } else { - self.getAtomPtr(atom_index).next_index = null; - } - - // Appending to free lists is allowed to fail because the free lists are heuristics based anyway. - const local_sym_index = atom.getSymbolIndex().?; - - log.debug("adding %{d} to local symbols free list", .{local_sym_index}); - self.local_symbol_free_list.append(gpa, local_sym_index) catch {}; - self.local_symbols.items[local_sym_index] = .{ - .st_name = 0, - .st_info = 0, - .st_other = 0, - .st_shndx = 0, - .st_value = 0, - .st_size = 0, - }; - _ = self.atom_by_index_table.remove(local_sym_index); - self.getAtomPtr(atom_index).local_sym_index = 0; - - self.got_table.freeEntry(gpa, local_sym_index); -} - -fn shrinkAtom(self: *Elf, atom_index: Atom.Index, new_block_size: u64) void { - _ = self; - _ = atom_index; - _ = new_block_size; -} - -fn growAtom(self: *Elf, atom_index: Atom.Index, new_block_size: u64, alignment: u64) !u64 { - const atom = self.getAtom(atom_index); - const sym = atom.getSymbol(self); - const align_ok = mem.alignBackward(u64, sym.st_value, alignment) == sym.st_value; - const need_realloc = !align_ok or new_block_size > atom.capacity(self); - if (!need_realloc) return sym.st_value; - return self.allocateAtom(atom_index, new_block_size, alignment); -} - -pub fn createAtom(self: *Elf) !Atom.Index { - const gpa = self.base.allocator; - const atom_index = @as(Atom.Index, @intCast(self.atoms.items.len)); - const atom = try self.atoms.addOne(gpa); - const local_sym_index = try self.allocateLocalSymbol(); - try self.atom_by_index_table.putNoClobber(gpa, local_sym_index, atom_index); - atom.* = .{ - .local_sym_index = local_sym_index, - .prev_index = null, - .next_index = null, - }; - log.debug("creating ATOM(%{d}) at index {d}", .{ local_sym_index, atom_index }); - return atom_index; -} - -fn allocateAtom(self: *Elf, atom_index: Atom.Index, new_block_size: u64, alignment: u64) !u64 { - const atom = self.getAtom(atom_index); - const sym = atom.getSymbol(self); - const phdr_index = self.sections.items(.phdr_index)[sym.st_shndx]; - const phdr = &self.program_headers.items[phdr_index]; - const shdr = &self.sections.items(.shdr)[sym.st_shndx]; - const free_list = &self.sections.items(.free_list)[sym.st_shndx]; - const maybe_last_atom_index = &self.sections.items(.last_atom_index)[sym.st_shndx]; - const new_atom_ideal_capacity = padToIdeal(new_block_size); - - // We use these to indicate our intention to update metadata, placing the new atom, - // and possibly removing a free list node. - // It would be simpler to do it inside the for loop below, but that would cause a - // problem if an error was returned later in the function. So this action - // is actually carried out at the end of the function, when errors are no longer possible. - var atom_placement: ?Atom.Index = null; - var free_list_removal: ?usize = null; - - // First we look for an appropriately sized free list node. - // The list is unordered. We'll just take the first thing that works. - const vaddr = blk: { - var i: usize = if (self.base.child_pid == null) 0 else free_list.items.len; - while (i < free_list.items.len) { - const big_atom_index = free_list.items[i]; - const big_atom = self.getAtom(big_atom_index); - // We now have a pointer to a live atom that has too much capacity. - // Is it enough that we could fit this new atom? - const big_atom_sym = big_atom.getSymbol(self); - const capacity = big_atom.capacity(self); - const ideal_capacity = padToIdeal(capacity); - const ideal_capacity_end_vaddr = std.math.add(u64, big_atom_sym.st_value, ideal_capacity) catch ideal_capacity; - const capacity_end_vaddr = big_atom_sym.st_value + capacity; - const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; - const new_start_vaddr = mem.alignBackward(u64, new_start_vaddr_unaligned, alignment); - if (new_start_vaddr < ideal_capacity_end_vaddr) { - // Additional bookkeeping here to notice if this free list node - // should be deleted because the block that it points to has grown to take up - // more of the extra capacity. - if (!big_atom.freeListEligible(self)) { - _ = free_list.swapRemove(i); - } else { - i += 1; - } - continue; - } - // At this point we know that we will place the new block here. But the - // remaining question is whether there is still yet enough capacity left - // over for there to still be a free list node. - const remaining_capacity = new_start_vaddr - ideal_capacity_end_vaddr; - const keep_free_list_node = remaining_capacity >= min_text_capacity; - - // Set up the metadata to be updated, after errors are no longer possible. - atom_placement = big_atom_index; - if (!keep_free_list_node) { - free_list_removal = i; - } - break :blk new_start_vaddr; - } else if (maybe_last_atom_index.*) |last_index| { - const last = self.getAtom(last_index); - const last_sym = last.getSymbol(self); - const ideal_capacity = padToIdeal(last_sym.st_size); - const ideal_capacity_end_vaddr = last_sym.st_value + ideal_capacity; - const new_start_vaddr = mem.alignForward(u64, ideal_capacity_end_vaddr, alignment); - // Set up the metadata to be updated, after errors are no longer possible. - atom_placement = last_index; - break :blk new_start_vaddr; - } else { - break :blk phdr.p_vaddr; - } - }; - - const expand_section = if (atom_placement) |placement_index| - self.getAtom(placement_index).next_index == null - else - true; - if (expand_section) { - const needed_size = (vaddr + new_block_size) - phdr.p_vaddr; - try self.growAllocSection(sym.st_shndx, needed_size); - maybe_last_atom_index.* = atom_index; - - if (self.dwarf) |_| { - // The .debug_info section has `low_pc` and `high_pc` values which is the virtual address - // range of the compilation unit. When we expand the text section, this range changes, - // so the DW_TAG.compile_unit tag of the .debug_info section becomes dirty. - self.debug_info_header_dirty = true; - // This becomes dirty for the same reason. We could potentially make this more - // fine-grained with the addition of support for more compilation units. It is planned to - // model each package as a different compilation unit. - self.debug_aranges_section_dirty = true; - } - } - shdr.sh_addralign = @max(shdr.sh_addralign, alignment); - - // This function can also reallocate an atom. - // In this case we need to "unplug" it from its previous location before - // plugging it in to its new location. - if (atom.prev_index) |prev_index| { - const prev = self.getAtomPtr(prev_index); - prev.next_index = atom.next_index; - } - if (atom.next_index) |next_index| { - const next = self.getAtomPtr(next_index); - next.prev_index = atom.prev_index; - } - - if (atom_placement) |big_atom_index| { - const big_atom = self.getAtomPtr(big_atom_index); - const atom_ptr = self.getAtomPtr(atom_index); - atom_ptr.prev_index = big_atom_index; - atom_ptr.next_index = big_atom.next_index; - big_atom.next_index = atom_index; - } else { - const atom_ptr = self.getAtomPtr(atom_index); - atom_ptr.prev_index = null; - atom_ptr.next_index = null; - } - if (free_list_removal) |i| { - _ = free_list.swapRemove(i); - } - return vaddr; -} - -pub fn allocateLocalSymbol(self: *Elf) !u32 { - try self.local_symbols.ensureUnusedCapacity(self.base.allocator, 1); - - const index = blk: { - if (self.local_symbol_free_list.popOrNull()) |index| { - log.debug(" (reusing symbol index {d})", .{index}); - break :blk index; - } else { - log.debug(" (allocating symbol index {d})", .{self.local_symbols.items.len}); - const index = @as(u32, @intCast(self.local_symbols.items.len)); - _ = self.local_symbols.addOneAssumeCapacity(); - break :blk index; - } - }; - - self.local_symbols.items[index] = .{ - .st_name = 0, - .st_info = 0, - .st_other = 0, - .st_shndx = 0, - .st_value = 0, - .st_size = 0, - }; - - return index; -} - fn freeUnnamedConsts(self: *Elf, decl_index: Module.Decl.Index) void { - const unnamed_consts = self.unnamed_const_atoms.getPtr(decl_index) orelse return; - for (unnamed_consts.items) |atom| { - self.freeAtom(atom); + const unnamed_consts = self.unnamed_consts.getPtr(decl_index) orelse return; + for (unnamed_consts.items) |sym_index| { + self.freeDeclMetadata(sym_index); } unnamed_consts.clearAndFree(self.base.allocator); } +fn freeDeclMetadata(self: *Elf, sym_index: Symbol.Index) void { + const sym = self.symbol(sym_index); + sym.atom(self).?.free(self); + log.debug("adding %{d} to local symbols free list", .{sym_index}); + self.symbols_free_list.append(self.base.allocator, sym_index) catch {}; + self.symbols.items[sym_index] = .{}; + // TODO free GOT entry here +} + pub fn freeDecl(self: *Elf, decl_index: Module.Decl.Index) void { if (self.llvm_object) |llvm_object| return llvm_object.freeDecl(decl_index); @@ -2434,7 +2344,8 @@ pub fn freeDecl(self: *Elf, decl_index: Module.Decl.Index) void { if (self.decls.fetchRemove(decl_index)) |const_kv| { var kv = const_kv; - self.freeAtom(kv.value.atom); + const sym_index = kv.value.symbol_index; + self.freeDeclMetadata(sym_index); self.freeUnnamedConsts(decl_index); kv.value.exports.deinit(self.base.allocator); } @@ -2444,40 +2355,50 @@ pub fn freeDecl(self: *Elf, decl_index: Module.Decl.Index) void { } } -pub fn getOrCreateAtomForLazySymbol(self: *Elf, sym: File.LazySymbol) !Atom.Index { +pub fn getOrCreateMetadataForLazySymbol(self: *Elf, sym: link.File.LazySymbol) !Symbol.Index { const mod = self.base.options.module.?; const gop = try self.lazy_syms.getOrPut(self.base.allocator, sym.getDecl(mod)); errdefer _ = if (!gop.found_existing) self.lazy_syms.pop(); if (!gop.found_existing) gop.value_ptr.* = .{}; - const metadata: struct { atom: *Atom.Index, state: *LazySymbolMetadata.State } = switch (sym.kind) { - .code => .{ .atom = &gop.value_ptr.text_atom, .state = &gop.value_ptr.text_state }, - .const_data => .{ .atom = &gop.value_ptr.rodata_atom, .state = &gop.value_ptr.rodata_state }, + const metadata: struct { + symbol_index: *Symbol.Index, + state: *LazySymbolMetadata.State, + } = switch (sym.kind) { + .code => .{ + .symbol_index = &gop.value_ptr.text_symbol_index, + .state = &gop.value_ptr.text_state, + }, + .const_data => .{ + .symbol_index = &gop.value_ptr.rodata_symbol_index, + .state = &gop.value_ptr.rodata_state, + }, }; + const zig_module = self.file(self.zig_module_index.?).?.zig_module; switch (metadata.state.*) { - .unused => metadata.atom.* = try self.createAtom(), - .pending_flush => return metadata.atom.*, + .unused => metadata.symbol_index.* = try zig_module.addAtom(switch (sym.kind) { + .code => self.text_section_index.?, + .const_data => self.rodata_section_index.?, + }, self), + .pending_flush => return metadata.symbol_index.*, .flushed => {}, } metadata.state.* = .pending_flush; - const atom = metadata.atom.*; + const symbol_index = metadata.symbol_index.*; // anyerror needs to be deferred until flushModule - if (sym.getDecl(mod) != .none) try self.updateLazySymbolAtom(sym, atom, switch (sym.kind) { - .code => self.text_section_index.?, - .const_data => self.rodata_section_index.?, - }); - return atom; + if (sym.getDecl(mod) != .none) try self.updateLazySymbol(sym, symbol_index); + return symbol_index; } -pub fn getOrCreateAtomForDecl(self: *Elf, decl_index: Module.Decl.Index) !Atom.Index { +pub fn getOrCreateMetadataForDecl(self: *Elf, decl_index: Module.Decl.Index) !Symbol.Index { const gop = try self.decls.getOrPut(self.base.allocator, decl_index); if (!gop.found_existing) { + const zig_module = self.file(self.zig_module_index.?).?.zig_module; gop.value_ptr.* = .{ - .atom = try self.createAtom(), - .shdr = self.getDeclShdrIndex(decl_index), + .symbol_index = try zig_module.addAtom(self.getDeclShdrIndex(decl_index), self), .exports = .{}, }; } - return gop.value_ptr.atom; + return gop.value_ptr.symbol_index; } fn getDeclShdrIndex(self: *Elf, decl_index: Module.Decl.Index) u16 { @@ -2506,9 +2427,16 @@ fn getDeclShdrIndex(self: *Elf, decl_index: Module.Decl.Index) u16 { return shdr_index; } -fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, stt_bits: u8) !*elf.Elf64_Sym { +fn updateDeclCode( + self: *Elf, + decl_index: Module.Decl.Index, + sym_index: Symbol.Index, + code: []const u8, + stt_bits: u8, +) !void { const gpa = self.base.allocator; const mod = self.base.options.module.?; + const zig_module = self.file(self.zig_module_index.?).?.zig_module; const decl = mod.declPtr(decl_index); const decl_name = mod.intern_pool.stringToSlice(try decl.getFullyQualifiedName(mod)); @@ -2516,63 +2444,60 @@ fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, s log.debug("updateDeclCode {s}{*}", .{ decl_name, decl }); const required_alignment = decl.getAlignment(mod); - const decl_metadata = self.decls.get(decl_index).?; - const atom_index = decl_metadata.atom; - const atom = self.getAtom(atom_index); - const local_sym_index = atom.getSymbolIndex().?; - - const shdr_index = decl_metadata.shdr; - if (atom.getSymbol(self).st_size != 0 and self.base.child_pid == null) { - const local_sym = atom.getSymbolPtr(self); - local_sym.st_name = try self.strtab.insert(gpa, decl_name); - local_sym.st_info = (elf.STB_LOCAL << 4) | stt_bits; - local_sym.st_other = 0; - local_sym.st_shndx = shdr_index; - - const capacity = atom.capacity(self); - const need_realloc = code.len > capacity or - !mem.isAlignedGeneric(u64, local_sym.st_value, required_alignment); - + const sym = self.symbol(sym_index); + const esym = &zig_module.local_esyms.items[sym.esym_index]; + const atom_ptr = sym.atom(self).?; + const shdr_index = sym.output_section_index; + + sym.name_offset = try self.strtab.insert(gpa, decl_name); + atom_ptr.alive = true; + atom_ptr.name_offset = sym.name_offset; + esym.st_name = sym.name_offset; + esym.st_info |= stt_bits; + esym.st_size = code.len; + + const old_size = atom_ptr.size; + const old_vaddr = atom_ptr.value; + atom_ptr.alignment = math.log2_int(u64, required_alignment); + atom_ptr.size = code.len; + + if (old_size > 0 and self.base.child_pid == null) { + const capacity = atom_ptr.capacity(self); + const need_realloc = code.len > capacity or !mem.isAlignedGeneric(u64, sym.value, required_alignment); if (need_realloc) { - const vaddr = try self.growAtom(atom_index, code.len, required_alignment); - log.debug("growing {s} from 0x{x} to 0x{x}", .{ decl_name, local_sym.st_value, vaddr }); - if (vaddr != local_sym.st_value) { - local_sym.st_value = vaddr; + try atom_ptr.grow(self); + log.debug("growing {s} from 0x{x} to 0x{x}", .{ decl_name, old_vaddr, atom_ptr.value }); + if (old_vaddr != atom_ptr.value) { + sym.value = atom_ptr.value; + esym.st_value = atom_ptr.value; log.debug(" (writing new offset table entry)", .{}); - const got_entry_index = self.got_table.lookup.get(local_sym_index).?; - self.got_table.entries.items[got_entry_index] = local_sym_index; - try self.writeOffsetTableEntry(got_entry_index); + const extra = sym.extra(self).?; + try self.got.writeEntry(self, extra.got); } - } else if (code.len < local_sym.st_size) { - self.shrinkAtom(atom_index, code.len); + } else if (code.len < old_size) { + atom_ptr.shrink(self); } - local_sym.st_size = code.len; } else { - const local_sym = atom.getSymbolPtr(self); - local_sym.* = .{ - .st_name = try self.strtab.insert(gpa, decl_name), - .st_info = (elf.STB_LOCAL << 4) | stt_bits, - .st_other = 0, - .st_shndx = shdr_index, - .st_value = 0, - .st_size = 0, - }; - const vaddr = try self.allocateAtom(atom_index, code.len, required_alignment); - errdefer self.freeAtom(atom_index); - log.debug("allocated text block for {s} at 0x{x}", .{ decl_name, vaddr }); + try atom_ptr.allocate(self); + errdefer self.freeDeclMetadata(sym_index); + log.debug("allocated atom for {s} at 0x{x} to 0x{x}", .{ + decl_name, + atom_ptr.value, + atom_ptr.value + atom_ptr.size, + }); - local_sym.st_value = vaddr; - local_sym.st_size = code.len; + sym.value = atom_ptr.value; + esym.st_value = atom_ptr.value; - const got_entry_index = try atom.getOrCreateOffsetTableEntry(self); - try self.writeOffsetTableEntry(got_entry_index); + sym.flags.needs_got = true; + const gop = try sym.getOrCreateGotEntry(self); + try self.got.writeEntry(self, gop.index); } - const local_sym = atom.getSymbolPtr(self); - const phdr_index = self.sections.items(.phdr_index)[shdr_index]; - const section_offset = local_sym.st_value - self.program_headers.items[phdr_index].p_vaddr; - const file_offset = self.sections.items(.shdr)[shdr_index].sh_offset + section_offset; + const phdr_index = self.phdr_to_shdr_table.get(shdr_index).?; + const section_offset = sym.value - self.phdrs.items[phdr_index].p_vaddr; + const file_offset = self.shdrs.items[shdr_index].sh_offset + section_offset; if (self.base.child_pid) |pid| { switch (builtin.os.tag) { @@ -2582,7 +2507,7 @@ fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, s .iov_len = code.len, }}; var remote_vec: [1]std.os.iovec_const = .{.{ - .iov_base = @as([*]u8, @ptrFromInt(@as(usize, @intCast(local_sym.st_value)))), + .iov_base = @as([*]u8, @ptrFromInt(@as(usize, @intCast(sym.value)))), .iov_len = code.len, }}; const rc = std.os.linux.process_vm_writev(pid, &code_vec, &remote_vec, 0); @@ -2596,8 +2521,6 @@ fn updateDeclCode(self: *Elf, decl_index: Module.Decl.Index, code: []const u8, s } try self.base.file.?.pwriteAll(code, file_offset); - - return local_sym; } pub fn updateFunc(self: *Elf, mod: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void { @@ -2613,9 +2536,9 @@ pub fn updateFunc(self: *Elf, mod: *Module, func_index: InternPool.Index, air: A const decl_index = func.owner_decl; const decl = mod.declPtr(decl_index); - const atom_index = try self.getOrCreateAtomForDecl(decl_index); + const sym_index = try self.getOrCreateMetadataForDecl(decl_index); self.freeUnnamedConsts(decl_index); - Atom.freeRelocations(self, atom_index); + self.symbol(sym_index).atom(self).?.freeRelocs(self); var code_buffer = std.ArrayList(u8).init(self.base.allocator); defer code_buffer.deinit(); @@ -2638,13 +2561,14 @@ pub fn updateFunc(self: *Elf, mod: *Module, func_index: InternPool.Index, air: A return; }, }; - const local_sym = try self.updateDeclCode(decl_index, code, elf.STT_FUNC); + try self.updateDeclCode(decl_index, sym_index, code, elf.STT_FUNC); if (decl_state) |*ds| { + const sym = self.symbol(sym_index); try self.dwarf.?.commitDeclState( mod, decl_index, - local_sym.st_value, - local_sym.st_size, + sym.value, + sym.atom(self).?.size, ds, ); } @@ -2658,7 +2582,7 @@ pub fn updateDecl( self: *Elf, mod: *Module, decl_index: Module.Decl.Index, -) File.UpdateDeclError!void { +) link.File.UpdateDeclError!void { if (build_options.skip_non_native and builtin.object_format != .elf) { @panic("Attempted to compile for object format that was disabled by build configuration"); } @@ -2678,9 +2602,8 @@ pub fn updateDecl( } } - const atom_index = try self.getOrCreateAtomForDecl(decl_index); - Atom.freeRelocations(self, atom_index); - const atom = self.getAtom(atom_index); + const sym_index = try self.getOrCreateMetadataForDecl(decl_index); + self.symbol(sym_index).atom(self).?.freeRelocs(self); var code_buffer = std.ArrayList(u8).init(self.base.allocator); defer code_buffer.deinit(); @@ -2697,14 +2620,14 @@ pub fn updateDecl( }, &code_buffer, .{ .dwarf = ds, }, .{ - .parent_atom_index = atom.getSymbolIndex().?, + .parent_atom_index = sym_index, }) else try codegen.generateSymbol(&self.base, decl.srcLoc(mod), .{ .ty = decl.ty, .val = decl_val, }, &code_buffer, .none, .{ - .parent_atom_index = atom.getSymbolIndex().?, + .parent_atom_index = sym_index, }); const code = switch (res) { @@ -2716,13 +2639,14 @@ pub fn updateDecl( }, }; - const local_sym = try self.updateDeclCode(decl_index, code, elf.STT_OBJECT); + try self.updateDeclCode(decl_index, sym_index, code, elf.STT_OBJECT); if (decl_state) |*ds| { + const sym = self.symbol(sym_index); try self.dwarf.?.commitDeclState( mod, decl_index, - local_sym.st_value, - local_sym.st_size, + sym.value, + sym.atom(self).?.size, ds, ); } @@ -2732,14 +2656,10 @@ pub fn updateDecl( return self.updateDeclExports(mod, decl_index, mod.getDeclExports(decl_index)); } -fn updateLazySymbolAtom( - self: *Elf, - sym: File.LazySymbol, - atom_index: Atom.Index, - shdr_index: u16, -) !void { +fn updateLazySymbol(self: *Elf, sym: link.File.LazySymbol, symbol_index: Symbol.Index) !void { const gpa = self.base.allocator; const mod = self.base.options.module.?; + const zig_module = self.file(self.zig_module_index.?).?.zig_module; var required_alignment: u32 = undefined; var code_buffer = std.ArrayList(u8).init(gpa); @@ -2755,9 +2675,6 @@ fn updateLazySymbolAtom( }; const name = self.strtab.get(name_str_index).?; - const atom = self.getAtom(atom_index); - const local_sym_index = atom.getSymbolIndex().?; - const src = if (sym.ty.getOwnerDeclOrNull(mod)) |owner_decl| mod.declPtr(owner_decl).srcLoc(mod) else @@ -2773,7 +2690,7 @@ fn updateLazySymbolAtom( &required_alignment, &code_buffer, .none, - .{ .parent_atom_index = local_sym_index }, + .{ .parent_atom_index = symbol_index }, ); const code = switch (res) { .ok => code_buffer.items, @@ -2783,28 +2700,37 @@ fn updateLazySymbolAtom( }, }; - const phdr_index = self.sections.items(.phdr_index)[shdr_index]; - const local_sym = atom.getSymbolPtr(self); - local_sym.* = .{ - .st_name = name_str_index, - .st_info = (elf.STB_LOCAL << 4) | elf.STT_OBJECT, - .st_other = 0, - .st_shndx = shdr_index, - .st_value = 0, - .st_size = 0, - }; - const vaddr = try self.allocateAtom(atom_index, code.len, required_alignment); - errdefer self.freeAtom(atom_index); - log.debug("allocated text block for {s} at 0x{x}", .{ name, vaddr }); + const local_sym = self.symbol(symbol_index); + const phdr_index = self.phdr_to_shdr_table.get(local_sym.output_section_index).?; + local_sym.name_offset = name_str_index; + const local_esym = &zig_module.local_esyms.items[local_sym.esym_index]; + local_esym.st_name = name_str_index; + local_esym.st_info |= elf.STT_OBJECT; + local_esym.st_size = code.len; + const atom_ptr = local_sym.atom(self).?; + atom_ptr.alive = true; + atom_ptr.name_offset = name_str_index; + atom_ptr.alignment = math.log2_int(u64, required_alignment); + atom_ptr.size = code.len; + + try atom_ptr.allocate(self); + errdefer self.freeDeclMetadata(symbol_index); + + log.debug("allocated atom for {s} at 0x{x} to 0x{x}", .{ + name, + atom_ptr.value, + atom_ptr.value + atom_ptr.size, + }); - local_sym.st_value = vaddr; - local_sym.st_size = code.len; + local_sym.value = atom_ptr.value; + local_esym.st_value = atom_ptr.value; - const got_entry_index = try atom.getOrCreateOffsetTableEntry(self); - try self.writeOffsetTableEntry(got_entry_index); + local_sym.flags.needs_got = true; + const gop = try local_sym.getOrCreateGotEntry(self); + try self.got.writeEntry(self, gop.index); - const section_offset = vaddr - self.program_headers.items[phdr_index].p_vaddr; - const file_offset = self.sections.items(.shdr)[shdr_index].sh_offset + section_offset; + const section_offset = atom_ptr.value - self.phdrs.items[phdr_index].p_vaddr; + const file_offset = self.shdrs.items[local_sym.output_section_index].sh_offset + section_offset; try self.base.file.?.pwriteAll(code, file_offset); } @@ -2815,7 +2741,7 @@ pub fn lowerUnnamedConst(self: *Elf, typed_value: TypedValue, decl_index: Module defer code_buffer.deinit(); const mod = self.base.options.module.?; - const gop = try self.unnamed_const_atoms.getOrPut(gpa, decl_index); + const gop = try self.unnamed_consts.getOrPut(gpa, decl_index); if (!gop.found_existing) { gop.value_ptr.* = .{}; } @@ -2831,12 +2757,13 @@ pub fn lowerUnnamedConst(self: *Elf, typed_value: TypedValue, decl_index: Module }; const name = self.strtab.get(name_str_index).?; - const atom_index = try self.createAtom(); + const zig_module = self.file(self.zig_module_index.?).?.zig_module; + const sym_index = try zig_module.addAtom(self.rodata_section_index.?, self); const res = try codegen.generateSymbol(&self.base, decl.srcLoc(mod), typed_value, &code_buffer, .{ .none = {}, }, .{ - .parent_atom_index = self.getAtom(atom_index).getSymbolIndex().?, + .parent_atom_index = sym_index, }); const code = switch (res) { .ok => code_buffer.items, @@ -2850,25 +2777,34 @@ pub fn lowerUnnamedConst(self: *Elf, typed_value: TypedValue, decl_index: Module const required_alignment = typed_value.ty.abiAlignment(mod); const shdr_index = self.rodata_section_index.?; - const phdr_index = self.sections.items(.phdr_index)[shdr_index]; - const local_sym = self.getAtom(atom_index).getSymbolPtr(self); - local_sym.st_name = name_str_index; - local_sym.st_info = (elf.STB_LOCAL << 4) | elf.STT_OBJECT; - local_sym.st_other = 0; - local_sym.st_shndx = shdr_index; - local_sym.st_size = code.len; - local_sym.st_value = try self.allocateAtom(atom_index, code.len, required_alignment); - errdefer self.freeAtom(atom_index); - - log.debug("allocated text block for {s} at 0x{x}", .{ name, local_sym.st_value }); - - try unnamed_consts.append(gpa, atom_index); - - const section_offset = local_sym.st_value - self.program_headers.items[phdr_index].p_vaddr; - const file_offset = self.sections.items(.shdr)[shdr_index].sh_offset + section_offset; + const phdr_index = self.phdr_to_shdr_table.get(shdr_index).?; + const local_sym = self.symbol(sym_index); + local_sym.name_offset = name_str_index; + const local_esym = &zig_module.local_esyms.items[local_sym.esym_index]; + local_esym.st_name = name_str_index; + local_esym.st_info |= elf.STT_OBJECT; + local_esym.st_size = code.len; + const atom_ptr = local_sym.atom(self).?; + atom_ptr.alive = true; + atom_ptr.name_offset = name_str_index; + atom_ptr.alignment = math.log2_int(u64, required_alignment); + atom_ptr.size = code.len; + + try atom_ptr.allocate(self); + errdefer self.freeDeclMetadata(sym_index); + + log.debug("allocated atom for {s} at 0x{x} to 0x{x}", .{ name, atom_ptr.value, atom_ptr.value + atom_ptr.size }); + + local_sym.value = atom_ptr.value; + local_esym.st_value = atom_ptr.value; + + try unnamed_consts.append(gpa, atom_ptr.atom_index); + + const section_offset = atom_ptr.value - self.phdrs.items[phdr_index].p_vaddr; + const file_offset = self.shdrs.items[shdr_index].sh_offset + section_offset; try self.base.file.?.pwriteAll(code, file_offset); - return self.getAtom(atom_index).getSymbolIndex().?; + return sym_index; } pub fn updateDeclExports( @@ -2876,7 +2812,7 @@ pub fn updateDeclExports( mod: *Module, decl_index: Module.Decl.Index, exports: []const *Module.Export, -) File.UpdateDeclExportsError!void { +) link.File.UpdateDeclExportsError!void { if (build_options.skip_non_native and builtin.object_format != .elf) { @panic("Attempted to compile for object format that was disabled by build configuration"); } @@ -2889,14 +2825,12 @@ pub fn updateDeclExports( const gpa = self.base.allocator; + const zig_module = self.file(self.zig_module_index.?).?.zig_module; const decl = mod.declPtr(decl_index); - const atom_index = try self.getOrCreateAtomForDecl(decl_index); - const atom = self.getAtom(atom_index); - const decl_sym = atom.getSymbol(self); + const decl_sym_index = try self.getOrCreateMetadataForDecl(decl_index); + const decl_sym = self.symbol(decl_sym_index); + const decl_esym = zig_module.local_esyms.items[decl_sym.esym_index]; const decl_metadata = self.decls.getPtr(decl_index).?; - const shdr_index = decl_metadata.shdr; - - try self.global_symbols.ensureUnusedCapacity(gpa, exports.len); for (exports) |exp| { const exp_name = mod.intern_pool.stringToSlice(exp.opts.name); @@ -2905,7 +2839,7 @@ pub fn updateDeclExports( try mod.failed_exports.ensureUnusedCapacity(mod.gpa, 1); mod.failed_exports.putAssumeCapacityNoClobber( exp, - try Module.ErrorMsg.create(self.base.allocator, decl.srcLoc(mod), "Unimplemented: ExportOptions.section", .{}), + try Module.ErrorMsg.create(gpa, decl.srcLoc(mod), "Unimplemented: ExportOptions.section", .{}), ); continue; } @@ -2915,7 +2849,7 @@ pub fn updateDeclExports( .Strong => blk: { const entry_name = self.base.options.entry orelse "_start"; if (mem.eql(u8, exp_name, entry_name)) { - self.entry_addr = decl_sym.st_value; + self.entry_addr = decl_sym.value; } break :blk elf.STB_GLOBAL; }, @@ -2924,37 +2858,30 @@ pub fn updateDeclExports( try mod.failed_exports.ensureUnusedCapacity(mod.gpa, 1); mod.failed_exports.putAssumeCapacityNoClobber( exp, - try Module.ErrorMsg.create(self.base.allocator, decl.srcLoc(mod), "Unimplemented: GlobalLinkage.LinkOnce", .{}), + try Module.ErrorMsg.create(gpa, decl.srcLoc(mod), "Unimplemented: GlobalLinkage.LinkOnce", .{}), ); continue; }, }; - const stt_bits: u8 = @as(u4, @truncate(decl_sym.st_info)); - if (decl_metadata.getExport(self, exp_name)) |i| { - const sym = &self.global_symbols.items[i]; - sym.* = .{ - .st_name = try self.strtab.insert(gpa, exp_name), - .st_info = (stb_bits << 4) | stt_bits, - .st_other = 0, - .st_shndx = shdr_index, - .st_value = decl_sym.st_value, - .st_size = decl_sym.st_size, - }; - } else { - const i = if (self.global_symbol_free_list.popOrNull()) |i| i else blk: { - _ = self.global_symbols.addOneAssumeCapacity(); - break :blk self.global_symbols.items.len - 1; - }; - try decl_metadata.exports.append(gpa, @as(u32, @intCast(i))); - self.global_symbols.items[i] = .{ - .st_name = try self.strtab.insert(gpa, exp_name), - .st_info = (stb_bits << 4) | stt_bits, - .st_other = 0, - .st_shndx = shdr_index, - .st_value = decl_sym.st_value, - .st_size = decl_sym.st_size, - }; - } + const stt_bits: u8 = @as(u4, @truncate(decl_esym.st_info)); + + const name_off = try self.strtab.insert(gpa, exp_name); + const sym_index = if (decl_metadata.@"export"(self, exp_name)) |exp_index| exp_index.* else blk: { + const sym_index = try zig_module.addGlobalEsym(gpa); + const lookup_gop = try zig_module.globals_lookup.getOrPut(gpa, name_off); + const esym = zig_module.elfSym(sym_index); + esym.st_name = name_off; + lookup_gop.value_ptr.* = sym_index; + try decl_metadata.exports.append(gpa, sym_index); + const gop = try self.getOrPutGlobal(name_off); + try zig_module.global_symbols.append(gpa, gop.index); + break :blk sym_index; + }; + const esym = &zig_module.global_esyms.items[sym_index & 0x0fffffff]; + esym.st_value = decl_sym.value; + esym.st_shndx = decl_sym.atom_index; + esym.st_info = (stb_bits << 4) | stt_bits; + esym.st_name = name_off; } } @@ -2982,124 +2909,224 @@ pub fn deleteDeclExport( if (self.llvm_object) |_| return; const metadata = self.decls.getPtr(decl_index) orelse return; const mod = self.base.options.module.?; - const sym_index = metadata.getExportPtr(self, mod.intern_pool.stringToSlice(name)) orelse return; - self.global_symbol_free_list.append(self.base.allocator, sym_index.*) catch {}; - self.global_symbols.items[sym_index.*].st_info = 0; - sym_index.* = 0; + const zig_module = self.file(self.zig_module_index.?).?.zig_module; + const exp_name = mod.intern_pool.stringToSlice(name); + const esym_index = metadata.@"export"(self, exp_name) orelse return; + log.debug("deleting export '{s}'", .{exp_name}); + const esym = &zig_module.global_esyms.items[esym_index.*]; + _ = zig_module.globals_lookup.remove(esym.st_name); + const sym_index = self.resolver.get(esym.st_name).?; + const sym = self.symbol(sym_index); + if (sym.file_index == zig_module.index) { + _ = self.resolver.swapRemove(esym.st_name); + sym.* = .{}; + } + esym.* = null_sym; } -fn writeProgHeader(self: *Elf, index: usize) !void { - const foreign_endian = self.base.options.target.cpu.arch.endian() != builtin.cpu.arch.endian(); - const offset = self.program_headers.items[index].p_offset; - switch (self.ptr_width) { - .p32 => { - var phdr = [1]elf.Elf32_Phdr{progHeaderTo32(self.program_headers.items[index])}; - if (foreign_endian) { - mem.byteSwapAllFields(elf.Elf32_Phdr, &phdr[0]); - } - return self.base.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset); - }, - .p64 => { - var phdr = [1]elf.Elf64_Phdr{self.program_headers.items[index]}; - if (foreign_endian) { - mem.byteSwapAllFields(elf.Elf64_Phdr, &phdr[0]); - } - return self.base.file.?.pwriteAll(mem.sliceAsBytes(&phdr), offset); - }, - } +fn addLinkerDefinedSymbols(self: *Elf) !void { + const linker_defined_index = self.linker_defined_index orelse return; + const linker_defined = self.file(linker_defined_index).?.linker_defined; + self.dynamic_index = try linker_defined.addGlobal("_DYNAMIC", self); + self.ehdr_start_index = try linker_defined.addGlobal("__ehdr_start", self); + self.init_array_start_index = try linker_defined.addGlobal("__init_array_start", self); + self.init_array_end_index = try linker_defined.addGlobal("__init_array_end", self); + self.fini_array_start_index = try linker_defined.addGlobal("__fini_array_start", self); + self.fini_array_end_index = try linker_defined.addGlobal("__fini_array_end", self); + self.preinit_array_start_index = try linker_defined.addGlobal("__preinit_array_start", self); + self.preinit_array_end_index = try linker_defined.addGlobal("__preinit_array_end", self); + self.got_index = try linker_defined.addGlobal("_GLOBAL_OFFSET_TABLE_", self); + self.plt_index = try linker_defined.addGlobal("_PROCEDURE_LINKAGE_TABLE_", self); + self.end_index = try linker_defined.addGlobal("_end", self); + + if (self.base.options.eh_frame_hdr) { + self.gnu_eh_frame_hdr_index = try linker_defined.addGlobal("__GNU_EH_FRAME_HDR", self); + } + + if (self.globalByName("__dso_handle")) |index| { + if (self.symbol(index).file(self) == null) + self.dso_handle_index = try linker_defined.addGlobal("__dso_handle", self); + } + + self.rela_iplt_start_index = try linker_defined.addGlobal("__rela_iplt_start", self); + self.rela_iplt_end_index = try linker_defined.addGlobal("__rela_iplt_end", self); + + // for (self.objects.items) |index| { + // const object = self.getFile(index).?.object; + // for (object.atoms.items) |atom_index| { + // if (self.getStartStopBasename(atom_index)) |name| { + // const gpa = self.base.allocator; + // try self.start_stop_indexes.ensureUnusedCapacity(gpa, 2); + + // const start = try std.fmt.allocPrintZ(gpa, "__start_{s}", .{name}); + // defer gpa.free(start); + // const stop = try std.fmt.allocPrintZ(gpa, "__stop_{s}", .{name}); + // defer gpa.free(stop); + + // self.start_stop_indexes.appendAssumeCapacity(try internal.addSyntheticGlobal(start, self)); + // self.start_stop_indexes.appendAssumeCapacity(try internal.addSyntheticGlobal(stop, self)); + // } + // } + // } + + linker_defined.resolveSymbols(self); } -fn writeSectHeader(self: *Elf, index: usize) !void { - const foreign_endian = self.base.options.target.cpu.arch.endian() != builtin.cpu.arch.endian(); - switch (self.ptr_width) { - .p32 => { - var shdr: [1]elf.Elf32_Shdr = undefined; - shdr[0] = sectHeaderTo32(self.sections.items(.shdr)[index]); - if (foreign_endian) { - mem.byteSwapAllFields(elf.Elf32_Shdr, &shdr[0]); - } - const offset = self.shdr_table_offset.? + index * @sizeOf(elf.Elf32_Shdr); - return self.base.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); - }, - .p64 => { - var shdr = [1]elf.Elf64_Shdr{self.sections.items(.shdr)[index]}; - if (foreign_endian) { - mem.byteSwapAllFields(elf.Elf64_Shdr, &shdr[0]); +fn allocateLinkerDefinedSymbols(self: *Elf) void { + // _DYNAMIC + if (self.dynamic_section_index) |shndx| { + const shdr = &self.shdrs.items[shndx]; + const symbol_ptr = self.symbol(self.dynamic_index.?); + symbol_ptr.value = shdr.sh_addr; + symbol_ptr.output_section_index = shndx; + } + + // __ehdr_start + { + const symbol_ptr = self.symbol(self.ehdr_start_index.?); + symbol_ptr.value = self.calcImageBase(); + symbol_ptr.output_section_index = 1; + } + + // __init_array_start, __init_array_end + if (self.sectionByName(".init_array")) |shndx| { + const start_sym = self.symbol(self.init_array_start_index.?); + const end_sym = self.symbol(self.init_array_end_index.?); + const shdr = &self.shdrs.items[shndx]; + start_sym.output_section_index = shndx; + start_sym.value = shdr.sh_addr; + end_sym.output_section_index = shndx; + end_sym.value = shdr.sh_addr + shdr.sh_size; + } + + // __fini_array_start, __fini_array_end + if (self.sectionByName(".fini_array")) |shndx| { + const start_sym = self.symbol(self.fini_array_start_index.?); + const end_sym = self.symbol(self.fini_array_end_index.?); + const shdr = &self.shdrs.items[shndx]; + start_sym.output_section_index = shndx; + start_sym.value = shdr.sh_addr; + end_sym.output_section_index = shndx; + end_sym.value = shdr.sh_addr + shdr.sh_size; + } + + // __preinit_array_start, __preinit_array_end + if (self.sectionByName(".preinit_array")) |shndx| { + const start_sym = self.symbol(self.preinit_array_start_index.?); + const end_sym = self.symbol(self.preinit_array_end_index.?); + const shdr = &self.shdrs.items[shndx]; + start_sym.output_section_index = shndx; + start_sym.value = shdr.sh_addr; + end_sym.output_section_index = shndx; + end_sym.value = shdr.sh_addr + shdr.sh_size; + } + + // _GLOBAL_OFFSET_TABLE_ + if (self.got_plt_section_index) |shndx| { + const shdr = &self.shdrs.items[shndx]; + const symbol_ptr = self.symbol(self.got_index.?); + symbol_ptr.value = shdr.sh_addr; + symbol_ptr.output_section_index = shndx; + } + + // _PROCEDURE_LINKAGE_TABLE_ + if (self.plt_section_index) |shndx| { + const shdr = &self.shdrs.items[shndx]; + const symbol_ptr = self.symbol(self.plt_index.?); + symbol_ptr.value = shdr.sh_addr; + symbol_ptr.output_section_index = shndx; + } + + // __dso_handle + if (self.dso_handle_index) |index| { + const shdr = &self.shdrs.items[1]; + const symbol_ptr = self.symbol(index); + symbol_ptr.value = shdr.sh_addr; + symbol_ptr.output_section_index = 0; + } + + // __GNU_EH_FRAME_HDR + if (self.eh_frame_hdr_section_index) |shndx| { + const shdr = &self.shdrs.items[shndx]; + const symbol_ptr = self.symbol(self.gnu_eh_frame_hdr_index.?); + symbol_ptr.value = shdr.sh_addr; + symbol_ptr.output_section_index = shndx; + } + + // __rela_iplt_start, __rela_iplt_end + if (self.rela_dyn_section_index) |shndx| blk: { + if (self.base.options.link_mode != .Static or self.base.options.pie) break :blk; + const shdr = &self.shdrs.items[shndx]; + const end_addr = shdr.sh_addr + shdr.sh_size; + const start_addr = end_addr - self.calcNumIRelativeRelocs() * @sizeOf(elf.Elf64_Rela); + const start_sym = self.symbol(self.rela_iplt_start_index.?); + const end_sym = self.symbol(self.rela_iplt_end_index.?); + start_sym.value = start_addr; + start_sym.output_section_index = shndx; + end_sym.value = end_addr; + end_sym.output_section_index = shndx; + } + + // _end + { + const end_symbol = self.symbol(self.end_index.?); + for (self.shdrs.items, 0..) |*shdr, shndx| { + if (shdr.sh_flags & elf.SHF_ALLOC != 0) { + end_symbol.value = shdr.sh_addr + shdr.sh_size; + end_symbol.output_section_index = @intCast(shndx); } - const offset = self.shdr_table_offset.? + index * @sizeOf(elf.Elf64_Shdr); - return self.base.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); - }, + } + } + + // __start_*, __stop_* + { + var index: usize = 0; + while (index < self.start_stop_indexes.items.len) : (index += 2) { + const start = self.symbol(self.start_stop_indexes.items[index]); + const name = start.name(self); + const stop = self.symbol(self.start_stop_indexes.items[index + 1]); + const shndx = self.sectionByName(name["__start_".len..]).?; + const shdr = &self.shdrs.items[shndx]; + start.value = shdr.sh_addr; + start.output_section_index = shndx; + stop.value = shdr.sh_addr + shdr.sh_size; + stop.output_section_index = shndx; + } } } -fn writeOffsetTableEntry(self: *Elf, index: @TypeOf(self.got_table).Index) !void { - const entry_size: u16 = self.archPtrWidthBytes(); - if (self.got_table_count_dirty) { - const needed_size = self.got_table.entries.items.len * entry_size; - try self.growAllocSection(self.got_section_index.?, needed_size); - self.got_table_count_dirty = false; +fn updateSymtabSize(self: *Elf) !void { + var sizes = SymtabSize{}; + + if (self.zig_module_index) |index| { + const zig_module = self.file(index).?.zig_module; + zig_module.updateSymtabSize(self); + sizes.nlocals += zig_module.output_symtab_size.nlocals; + sizes.nglobals += zig_module.output_symtab_size.nglobals; } - const endian = self.base.options.target.cpu.arch.endian(); - const shdr = &self.sections.items(.shdr)[self.got_section_index.?]; - const off = shdr.sh_offset + @as(u64, entry_size) * index; - const phdr = &self.program_headers.items[self.phdr_got_index.?]; - const vaddr = phdr.p_vaddr + @as(u64, entry_size) * index; - const got_entry = self.got_table.entries.items[index]; - const got_value = self.getSymbol(got_entry).st_value; - switch (entry_size) { - 2 => { - var buf: [2]u8 = undefined; - mem.writeInt(u16, &buf, @as(u16, @intCast(got_value)), endian); - try self.base.file.?.pwriteAll(&buf, off); - }, - 4 => { - var buf: [4]u8 = undefined; - mem.writeInt(u32, &buf, @as(u32, @intCast(got_value)), endian); - try self.base.file.?.pwriteAll(&buf, off); - }, - 8 => { - var buf: [8]u8 = undefined; - mem.writeInt(u64, &buf, got_value, endian); - try self.base.file.?.pwriteAll(&buf, off); - - if (self.base.child_pid) |pid| { - switch (builtin.os.tag) { - .linux => { - var local_vec: [1]std.os.iovec_const = .{.{ - .iov_base = &buf, - .iov_len = buf.len, - }}; - var remote_vec: [1]std.os.iovec_const = .{.{ - .iov_base = @as([*]u8, @ptrFromInt(@as(usize, @intCast(vaddr)))), - .iov_len = buf.len, - }}; - const rc = std.os.linux.process_vm_writev(pid, &local_vec, &remote_vec, 0); - switch (std.os.errno(rc)) { - .SUCCESS => assert(rc == buf.len), - else => |errno| log.warn("process_vm_writev failure: {s}", .{@tagName(errno)}), - } - }, - else => return error.HotSwapUnavailableOnHostOperatingSystem, - } - } - }, - else => unreachable, + + for (self.objects.items) |index| { + const object = self.file(index).?.object; + object.updateSymtabSize(self); + sizes.nlocals += object.output_symtab_size.nlocals; + sizes.nglobals += object.output_symtab_size.nglobals; } -} -fn elf32SymFromSym(sym: elf.Elf64_Sym, out: *elf.Elf32_Sym) void { - out.* = .{ - .st_name = sym.st_name, - .st_value = @as(u32, @intCast(sym.st_value)), - .st_size = @as(u32, @intCast(sym.st_size)), - .st_info = sym.st_info, - .st_other = sym.st_other, - .st_shndx = sym.st_shndx, - }; -} + if (self.got_section_index) |_| { + self.got.updateSymtabSize(self); + sizes.nlocals += self.got.output_symtab_size.nlocals; + } + + if (self.linker_defined_index) |index| { + const linker_defined = self.file(index).?.linker_defined; + linker_defined.updateSymtabSize(self); + sizes.nlocals += linker_defined.output_symtab_size.nlocals; + } + + const shdr = &self.shdrs.items[self.symtab_section_index.?]; + shdr.sh_info = sizes.nlocals + 1; + self.markDirty(self.symtab_section_index.?, null); -fn writeSymbols(self: *Elf) !void { - const gpa = self.base.allocator; const sym_size: u64 = switch (self.ptr_width) { .p32 => @sizeOf(elf.Elf32_Sym), .p64 => @sizeOf(elf.Elf64_Sym), @@ -3108,54 +3135,80 @@ fn writeSymbols(self: *Elf) !void { .p32 => @alignOf(elf.Elf32_Sym), .p64 => @alignOf(elf.Elf64_Sym), }; + const needed_size = (sizes.nlocals + sizes.nglobals + 1) * sym_size; + try self.growNonAllocSection(self.symtab_section_index.?, needed_size, sym_align, true); +} - const shdr = &self.sections.items(.shdr)[self.symtab_section_index.?]; - shdr.sh_info = @intCast(self.local_symbols.items.len); - self.markDirty(self.symtab_section_index.?, null); +fn writeSymtab(self: *Elf) !void { + const gpa = self.base.allocator; + const shdr = &self.shdrs.items[self.symtab_section_index.?]; + const sym_size: u64 = switch (self.ptr_width) { + .p32 => @sizeOf(elf.Elf32_Sym), + .p64 => @sizeOf(elf.Elf64_Sym), + }; + const nsyms = math.cast(usize, @divExact(shdr.sh_size, sym_size)) orelse return error.Overflow; - const nsyms = self.local_symbols.items.len + self.global_symbols.items.len; - const needed_size = nsyms * sym_size; - try self.growNonAllocSection(self.symtab_section_index.?, needed_size, sym_align, true); + log.debug("writing {d} symbols at 0x{x}", .{ nsyms, shdr.sh_offset }); + + const symtab = try gpa.alloc(elf.Elf64_Sym, nsyms); + defer gpa.free(symtab); + symtab[0] = null_sym; + + var ctx: struct { ilocal: usize, iglobal: usize, symtab: []elf.Elf64_Sym } = .{ + .ilocal = 1, + .iglobal = shdr.sh_info, + .symtab = symtab, + }; + + if (self.zig_module_index) |index| { + const zig_module = self.file(index).?.zig_module; + zig_module.writeSymtab(self, ctx); + ctx.ilocal += zig_module.output_symtab_size.nlocals; + ctx.iglobal += zig_module.output_symtab_size.nglobals; + } + + for (self.objects.items) |index| { + const object = self.file(index).?.object; + object.writeSymtab(self, ctx); + ctx.ilocal += object.output_symtab_size.nlocals; + ctx.iglobal += object.output_symtab_size.nglobals; + } + + if (self.got_section_index) |_| { + try self.got.writeSymtab(self, ctx); + ctx.ilocal += self.got.output_symtab_size.nlocals; + } + + if (self.linker_defined_index) |index| { + const linker_defined = self.file(index).?.linker_defined; + linker_defined.writeSymtab(self, ctx); + ctx.ilocal += linker_defined.output_symtab_size.nlocals; + } const foreign_endian = self.base.options.target.cpu.arch.endian() != builtin.cpu.arch.endian(); - log.debug("writing {d} symbols at 0x{x}", .{ nsyms, shdr.sh_offset }); switch (self.ptr_width) { .p32 => { - const buf = try gpa.alloc(elf.Elf32_Sym, nsyms); + const buf = try gpa.alloc(elf.Elf32_Sym, symtab.len); defer gpa.free(buf); - for (buf[0..self.local_symbols.items.len], self.local_symbols.items) |*sym, local| { - elf32SymFromSym(local, sym); - if (foreign_endian) { - mem.byteSwapAllFields(elf.Elf32_Sym, sym); - } - } - - for (buf[self.local_symbols.items.len..], self.global_symbols.items) |*sym, global| { - elf32SymFromSym(global, sym); - if (foreign_endian) { - mem.byteSwapAllFields(elf.Elf32_Sym, sym); - } + for (buf, symtab) |*out, sym| { + out.* = .{ + .st_name = sym.st_name, + .st_info = sym.st_info, + .st_other = sym.st_other, + .st_shndx = sym.st_shndx, + .st_value = @as(u32, @intCast(sym.st_value)), + .st_size = @as(u32, @intCast(sym.st_size)), + }; + if (foreign_endian) mem.byteSwapAllFields(elf.Elf32_Sym, out); } try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), shdr.sh_offset); }, .p64 => { - const buf = try gpa.alloc(elf.Elf64_Sym, nsyms); - defer gpa.free(buf); - for (buf[0..self.local_symbols.items.len], self.local_symbols.items) |*sym, local| { - sym.* = local; - if (foreign_endian) { - mem.byteSwapAllFields(elf.Elf64_Sym, sym); - } - } - - for (buf[self.local_symbols.items.len..], self.global_symbols.items) |*sym, global| { - sym.* = global; - if (foreign_endian) { - mem.byteSwapAllFields(elf.Elf64_Sym, sym); - } + if (foreign_endian) { + for (symtab) |*sym| mem.byteSwapAllFields(elf.Elf64_Sym, sym); } - try self.base.file.?.pwriteAll(mem.sliceAsBytes(buf), shdr.sh_offset); + try self.base.file.?.pwriteAll(mem.sliceAsBytes(symtab), shdr.sh_offset); }, } } @@ -3170,11 +3223,11 @@ fn ptrWidthBytes(self: Elf) u8 { /// Does not necessarily match `ptrWidthBytes` for example can be 2 bytes /// in a 32-bit ELF file. -fn archPtrWidthBytes(self: Elf) u8 { - return @as(u8, @intCast(self.base.options.target.ptrBitWidth() / 8)); +pub fn archPtrWidthBytes(self: Elf) u8 { + return @as(u8, @intCast(@divExact(self.base.options.target.ptrBitWidth(), 8))); } -fn progHeaderTo32(phdr: elf.Elf64_Phdr) elf.Elf32_Phdr { +fn phdrTo32(phdr: elf.Elf64_Phdr) elf.Elf32_Phdr { return .{ .p_type = phdr.p_type, .p_flags = phdr.p_flags, @@ -3187,7 +3240,30 @@ fn progHeaderTo32(phdr: elf.Elf64_Phdr) elf.Elf32_Phdr { }; } -fn sectHeaderTo32(shdr: elf.Elf64_Shdr) elf.Elf32_Shdr { +fn writeShdr(self: *Elf, index: usize) !void { + const foreign_endian = self.base.options.target.cpu.arch.endian() != builtin.cpu.arch.endian(); + switch (self.ptr_width) { + .p32 => { + var shdr: [1]elf.Elf32_Shdr = undefined; + shdr[0] = shdrTo32(self.shdrs.items[index]); + if (foreign_endian) { + mem.byteSwapAllFields(elf.Elf32_Shdr, &shdr[0]); + } + const offset = self.shdr_table_offset.? + index * @sizeOf(elf.Elf32_Shdr); + return self.base.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); + }, + .p64 => { + var shdr = [1]elf.Elf64_Shdr{self.shdrs.items[index]}; + if (foreign_endian) { + mem.byteSwapAllFields(elf.Elf64_Shdr, &shdr[0]); + } + const offset = self.shdr_table_offset.? + index * @sizeOf(elf.Elf64_Shdr); + return self.base.file.?.pwriteAll(mem.sliceAsBytes(&shdr), offset); + }, + } +} + +fn shdrTo32(shdr: elf.Elf64_Shdr) elf.Elf32_Shdr { return .{ .sh_name = shdr.sh_name, .sh_type = shdr.sh_type, @@ -3448,61 +3524,474 @@ const CsuObjects = struct { } }; -fn logSymtab(self: Elf) void { - log.debug("locals:", .{}); - for (self.local_symbols.items, 0..) |sym, id| { - log.debug(" {d}: {?s}: @{x} in {d}", .{ id, self.strtab.get(sym.st_name), sym.st_value, sym.st_shndx }); +pub fn calcImageBase(self: Elf) u64 { + if (self.base.options.pic) return 0; // TODO flag an error if PIC and image_base_override + return self.base.options.image_base_override orelse switch (self.ptr_width) { + .p32 => 0x1000, + .p64 => 0x1000000, + }; +} + +pub fn defaultEntryAddress(self: Elf) u64 { + if (self.entry_addr) |addr| return addr; + return switch (self.base.options.target.cpu.arch) { + .spu_2 => 0, + else => default_entry_addr, + }; +} + +pub fn isDynLib(self: Elf) bool { + return self.base.options.output_mode == .Lib and self.base.options.link_mode == .Dynamic; +} + +pub fn sectionByName(self: *Elf, name: [:0]const u8) ?u16 { + for (self.shdrs.items, 0..) |*shdr, i| { + const this_name = self.shstrtab.getAssumeExists(shdr.sh_name); + if (mem.eql(u8, this_name, name)) return @as(u16, @intCast(i)); + } else return null; +} + +pub fn calcNumIRelativeRelocs(self: *Elf) u64 { + _ = self; + unreachable; // TODO +} + +pub fn atom(self: *Elf, atom_index: Atom.Index) ?*Atom { + if (atom_index == 0) return null; + assert(atom_index < self.atoms.items.len); + return &self.atoms.items[atom_index]; +} + +pub fn addAtom(self: *Elf) !Atom.Index { + const index = @as(Atom.Index, @intCast(self.atoms.items.len)); + const atom_ptr = try self.atoms.addOne(self.base.allocator); + atom_ptr.* = .{ .atom_index = index }; + return index; +} + +pub fn file(self: *Elf, index: File.Index) ?File { + const tag = self.files.items(.tags)[index]; + return switch (tag) { + .null => null, + .linker_defined => .{ .linker_defined = &self.files.items(.data)[index].linker_defined }, + .zig_module => .{ .zig_module = &self.files.items(.data)[index].zig_module }, + .object => .{ .object = &self.files.items(.data)[index].object }, + }; +} + +/// Returns pointer-to-symbol described at sym_index. +pub fn symbol(self: *Elf, sym_index: Symbol.Index) *Symbol { + return &self.symbols.items[sym_index]; +} + +pub fn addSymbol(self: *Elf) !Symbol.Index { + try self.symbols.ensureUnusedCapacity(self.base.allocator, 1); + const index = blk: { + if (self.symbols_free_list.popOrNull()) |index| { + log.debug(" (reusing symbol index {d})", .{index}); + break :blk index; + } else { + log.debug(" (allocating symbol index {d})", .{self.symbols.items.len}); + const index = @as(Symbol.Index, @intCast(self.symbols.items.len)); + _ = self.symbols.addOneAssumeCapacity(); + break :blk index; + } + }; + self.symbols.items[index] = .{ .index = index }; + return index; +} + +pub fn addSymbolExtra(self: *Elf, extra: Symbol.Extra) !u32 { + const fields = @typeInfo(Symbol.Extra).Struct.fields; + try self.symbols_extra.ensureUnusedCapacity(self.base.allocator, fields.len); + return self.addSymbolExtraAssumeCapacity(extra); +} + +pub fn addSymbolExtraAssumeCapacity(self: *Elf, extra: Symbol.Extra) u32 { + const index = @as(u32, @intCast(self.symbols_extra.items.len)); + const fields = @typeInfo(Symbol.Extra).Struct.fields; + inline for (fields) |field| { + self.symbols_extra.appendAssumeCapacity(switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }); + } + return index; +} + +pub fn symbolExtra(self: *Elf, index: u32) ?Symbol.Extra { + if (index == 0) return null; + const fields = @typeInfo(Symbol.Extra).Struct.fields; + var i: usize = index; + var result: Symbol.Extra = undefined; + inline for (fields) |field| { + @field(result, field.name) = switch (field.type) { + u32 => self.symbols_extra.items[i], + else => @compileError("bad field type"), + }; + i += 1; + } + return result; +} + +pub fn setSymbolExtra(self: *Elf, index: u32, extra: Symbol.Extra) void { + assert(index > 0); + const fields = @typeInfo(Symbol.Extra).Struct.fields; + inline for (fields, 0..) |field, i| { + self.symbols_extra.items[index + i] = switch (field.type) { + u32 => @field(extra, field.name), + else => @compileError("bad field type"), + }; + } +} + +const GetOrPutGlobalResult = struct { + found_existing: bool, + index: Symbol.Index, +}; + +pub fn getOrPutGlobal(self: *Elf, name_off: u32) !GetOrPutGlobalResult { + const gpa = self.base.allocator; + const gop = try self.resolver.getOrPut(gpa, name_off); + if (!gop.found_existing) { + const index = try self.addSymbol(); + const global = self.symbol(index); + global.name_offset = name_off; + gop.value_ptr.* = index; } - log.debug("globals:", .{}); - for (self.global_symbols.items, 0..) |sym, id| { - log.debug(" {d}: {?s}: @{x} in {d}", .{ id, self.strtab.get(sym.st_name), sym.st_value, sym.st_shndx }); + return .{ + .found_existing = gop.found_existing, + .index = gop.value_ptr.*, + }; +} + +pub fn globalByName(self: *Elf, name: []const u8) ?Symbol.Index { + const name_off = self.strtab.getOffset(name) orelse return null; + return self.resolver.get(name_off); +} + +pub fn getGlobalSymbol(self: *Elf, name: []const u8, lib_name: ?[]const u8) !u32 { + _ = lib_name; + const gpa = self.base.allocator; + const off = try self.strtab.insert(gpa, name); + const zig_module = self.file(self.zig_module_index.?).?.zig_module; + const lookup_gop = try zig_module.globals_lookup.getOrPut(gpa, off); + if (!lookup_gop.found_existing) { + const esym_index = try zig_module.addGlobalEsym(gpa); + const esym = zig_module.elfSym(esym_index); + esym.st_name = off; + lookup_gop.value_ptr.* = esym_index; + const gop = try self.getOrPutGlobal(off); + try zig_module.global_symbols.append(gpa, gop.index); + } + return lookup_gop.value_ptr.*; +} + +const GetOrCreateComdatGroupOwnerResult = struct { + found_existing: bool, + index: ComdatGroupOwner.Index, +}; + +pub fn getOrCreateComdatGroupOwner(self: *Elf, off: u32) !GetOrCreateComdatGroupOwnerResult { + const gpa = self.base.allocator; + const gop = try self.comdat_groups_table.getOrPut(gpa, off); + if (!gop.found_existing) { + const index = @as(ComdatGroupOwner.Index, @intCast(self.comdat_groups_owners.items.len)); + const owner = try self.comdat_groups_owners.addOne(gpa); + owner.* = .{}; + gop.value_ptr.* = index; } + return .{ + .found_existing = gop.found_existing, + .index = gop.value_ptr.*, + }; } -pub fn getProgramHeader(self: *const Elf, shdr_index: u16) elf.Elf64_Phdr { - const index = self.sections.items(.phdr_index)[shdr_index]; - return self.program_headers.items[index]; +pub fn addComdatGroup(self: *Elf) !ComdatGroup.Index { + const index = @as(ComdatGroup.Index, @intCast(self.comdat_groups.items.len)); + _ = try self.comdat_groups.addOne(self.base.allocator); + return index; } -pub fn getProgramHeaderPtr(self: *Elf, shdr_index: u16) *elf.Elf64_Phdr { - const index = self.sections.items(.phdr_index)[shdr_index]; - return &self.program_headers.items[index]; +pub fn comdatGroup(self: *Elf, index: ComdatGroup.Index) *ComdatGroup { + assert(index < self.comdat_groups.items.len); + return &self.comdat_groups.items[index]; } -/// Returns pointer-to-symbol described at sym_index. -pub fn getSymbolPtr(self: *Elf, sym_index: u32) *elf.Elf64_Sym { - return &self.local_symbols.items[sym_index]; +pub fn comdatGroupOwner(self: *Elf, index: ComdatGroupOwner.Index) *ComdatGroupOwner { + assert(index < self.comdat_groups_owners.items.len); + return &self.comdat_groups_owners.items[index]; } -/// Returns symbol at sym_index. -pub fn getSymbol(self: *const Elf, sym_index: u32) elf.Elf64_Sym { - return self.local_symbols.items[sym_index]; +fn reportUndefined(self: *Elf, undefs: anytype) !void { + const gpa = self.base.allocator; + const max_notes = 4; + + try self.misc_errors.ensureUnusedCapacity(gpa, undefs.count()); + + var it = undefs.iterator(); + while (it.next()) |entry| { + const undef_index = entry.key_ptr.*; + const atoms = entry.value_ptr.*.items; + const nnotes = @min(atoms.len, max_notes); + + var notes = try std.ArrayList(link.File.ErrorMsg).initCapacity(gpa, max_notes + 1); + defer notes.deinit(); + + for (atoms[0..nnotes]) |atom_index| { + const atom_ptr = self.atom(atom_index).?; + const file_ptr = self.file(atom_ptr.file_index).?; + const note = try std.fmt.allocPrint(gpa, "referenced by {s}:{s}", .{ + file_ptr.fmtPath(), + atom_ptr.name(self), + }); + notes.appendAssumeCapacity(.{ .msg = note }); + } + + if (atoms.len > max_notes) { + const remaining = atoms.len - max_notes; + const note = try std.fmt.allocPrint(gpa, "referenced {d} more times", .{remaining}); + notes.appendAssumeCapacity(.{ .msg = note }); + } + + var err_msg = link.File.ErrorMsg{ + .msg = try std.fmt.allocPrint(gpa, "undefined symbol: {s}", .{self.symbol(undef_index).name(self)}), + }; + err_msg.notes = try notes.toOwnedSlice(); + + self.misc_errors.appendAssumeCapacity(err_msg); + } } -/// Returns name of the symbol at sym_index. -pub fn getSymbolName(self: *const Elf, sym_index: u32) []const u8 { - const sym = self.local_symbols.items[sym_index]; - return self.strtab.get(sym.st_name).?; +const ParseErrorCtx = struct { + detected_cpu_arch: std.Target.Cpu.Arch, +}; + +fn handleAndReportParseError( + self: *Elf, + path: []const u8, + err: ParseError, + ctx: *const ParseErrorCtx, +) error{OutOfMemory}!void { + const cpu_arch = self.base.options.target.cpu.arch; + switch (err) { + error.UnknownFileType => try self.reportParseError(path, "unknown file type", .{}), + error.InvalidCpuArch => try self.reportParseError( + path, + "invalid cpu architecture: expected '{s}', but found '{s}'", + .{ @tagName(cpu_arch), @tagName(ctx.detected_cpu_arch) }, + ), + else => |e| try self.reportParseError( + path, + "unexpected error: parsing object failed with error {s}", + .{@errorName(e)}, + ), + } } -/// Returns name of the global symbol at index. -pub fn getGlobalName(self: *const Elf, index: u32) []const u8 { - const sym = self.global_symbols.items[index]; - return self.strtab.get(sym.st_name).?; +fn reportParseError( + self: *Elf, + path: []const u8, + comptime format: []const u8, + args: anytype, +) error{OutOfMemory}!void { + const gpa = self.base.allocator; + try self.misc_errors.ensureUnusedCapacity(gpa, 1); + var notes = try gpa.alloc(link.File.ErrorMsg, 1); + errdefer gpa.free(notes); + notes[0] = .{ .msg = try std.fmt.allocPrint(gpa, "while parsing {s}", .{path}) }; + self.misc_errors.appendAssumeCapacity(.{ + .msg = try std.fmt.allocPrint(gpa, format, args), + .notes = notes, + }); } -pub fn getAtom(self: *const Elf, atom_index: Atom.Index) Atom { - assert(atom_index < self.atoms.items.len); - return self.atoms.items[atom_index]; +fn dumpState(self: *Elf) std.fmt.Formatter(fmtDumpState) { + return .{ .data = self }; } -pub fn getAtomPtr(self: *Elf, atom_index: Atom.Index) *Atom { - assert(atom_index < self.atoms.items.len); - return &self.atoms.items[atom_index]; +fn fmtDumpState( + self: *Elf, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + + if (self.zig_module_index) |index| { + const zig_module = self.file(index).?.zig_module; + try writer.print("zig_module({d}) : {s}\n", .{ index, zig_module.path }); + try writer.print("{}\n", .{zig_module.fmtSymtab(self)}); + } + + for (self.objects.items) |index| { + const object = self.file(index).?.object; + try writer.print("object({d}) : {}", .{ index, object.fmtPath() }); + if (!object.alive) try writer.writeAll(" : [*]"); + try writer.writeByte('\n'); + try writer.print("{}{}{}{}{}\n", .{ + object.fmtAtoms(self), + object.fmtCies(self), + object.fmtFdes(self), + object.fmtSymtab(self), + object.fmtComdatGroups(self), + }); + } + + if (self.linker_defined_index) |index| { + const linker_defined = self.file(index).?.linker_defined; + try writer.print("linker_defined({d}) : (linker defined)\n", .{index}); + try writer.print("{}\n", .{linker_defined.fmtSymtab(self)}); + } + try writer.print("{}\n", .{self.got.fmt(self)}); +} + +/// Binary search +pub fn bsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + var min: usize = 0; + var max: usize = haystack.len; + while (min < max) { + const index = (min + max) / 2; + const curr = haystack[index]; + if (predicate.predicate(curr)) { + min = index + 1; + } else { + max = index; + } + } + return min; } -/// Returns atom if there is an atom referenced by the symbol. -/// Returns null on failure. -pub fn getAtomIndexForSymbol(self: *Elf, sym_index: u32) ?Atom.Index { - return self.atom_by_index_table.get(sym_index); +/// Linear search +pub fn lsearch(comptime T: type, haystack: []align(1) const T, predicate: anytype) usize { + if (!@hasDecl(@TypeOf(predicate), "predicate")) + @compileError("Predicate is required to define fn predicate(@This(), T) bool"); + + var i: usize = 0; + while (i < haystack.len) : (i += 1) { + if (predicate.predicate(haystack[i])) break; + } + return i; } + +const default_entry_addr = 0x8000000; + +pub const base_tag: link.File.Tag = .elf; + +const LastAtomAndFreeList = struct { + /// Index of the last allocated atom in this section. + last_atom_index: Atom.Index = 0, + + /// A list of atoms that have surplus capacity. This list can have false + /// positives, as functions grow and shrink over time, only sometimes being added + /// or removed from the freelist. + /// + /// An atom has surplus capacity when its overcapacity value is greater than + /// padToIdeal(minimum_atom_size). That is, when it has so + /// much extra capacity, that we could fit a small new symbol in it, itself with + /// ideal_capacity or more. + /// + /// Ideal capacity is defined by size + (size / ideal_factor) + /// + /// Overcapacity is measured by actual_capacity - ideal_capacity. Note that + /// overcapacity can be negative. A simple way to have negative overcapacity is to + /// allocate a fresh text block, which will have ideal capacity, and then grow it + /// by 1 byte. It will then have -1 overcapacity. + free_list: std.ArrayListUnmanaged(Atom.Index) = .{}, +}; + +const LazySymbolMetadata = struct { + const State = enum { unused, pending_flush, flushed }; + text_symbol_index: Symbol.Index = undefined, + rodata_symbol_index: Symbol.Index = undefined, + text_state: State = .unused, + rodata_state: State = .unused, +}; + +const DeclMetadata = struct { + symbol_index: Symbol.Index, + /// A list of all exports aliases of this Decl. + exports: std.ArrayListUnmanaged(Symbol.Index) = .{}, + + fn @"export"(m: DeclMetadata, elf_file: *Elf, name: []const u8) ?*u32 { + const zig_module = elf_file.file(elf_file.zig_module_index.?).?.zig_module; + for (m.exports.items) |*exp| { + const exp_name = elf_file.strtab.getAssumeExists(zig_module.elfSym(exp.*).st_name); + if (mem.eql(u8, name, exp_name)) return exp; + } + return null; + } +}; + +const ComdatGroupOwner = struct { + file: File.Index = 0, + const Index = u32; +}; + +pub const ComdatGroup = struct { + owner: ComdatGroupOwner.Index, + shndx: u16, + pub const Index = u32; +}; + +pub const SymtabSize = struct { + nlocals: u32 = 0, + nglobals: u32 = 0, +}; + +pub const null_sym = elf.Elf64_Sym{ + .st_name = 0, + .st_info = 0, + .st_other = 0, + .st_shndx = 0, + .st_value = 0, + .st_size = 0, +}; + +const std = @import("std"); +const build_options = @import("build_options"); +const builtin = @import("builtin"); +const assert = std.debug.assert; +const elf = std.elf; +const fs = std.fs; +const log = std.log.scoped(.link); +const state_log = std.log.scoped(.link_state); +const math = std.math; +const mem = std.mem; + +const codegen = @import("../codegen.zig"); +const glibc = @import("../glibc.zig"); +const link = @import("../link.zig"); +const lldMain = @import("../main.zig").lldMain; +const musl = @import("../musl.zig"); +const target_util = @import("../target.zig"); +const trace = @import("../tracy.zig").trace; +const synthetic_sections = @import("Elf/synthetic_sections.zig"); + +const Air = @import("../Air.zig"); +const Allocator = std.mem.Allocator; +pub const Atom = @import("Elf/Atom.zig"); +const Cache = std.Build.Cache; +const Compilation = @import("../Compilation.zig"); +const Dwarf = @import("Dwarf.zig"); +const Elf = @This(); +const File = @import("Elf/file.zig").File; +const GotSection = synthetic_sections.GotSection; +const LinkerDefined = @import("Elf/LinkerDefined.zig"); +const Liveness = @import("../Liveness.zig"); +const LlvmObject = @import("../codegen/llvm.zig").Object; +const Module = @import("../Module.zig"); +const Object = @import("Elf/Object.zig"); +const InternPool = @import("../InternPool.zig"); +const Package = @import("../Package.zig"); +const Symbol = @import("Elf/Symbol.zig"); +const StringTable = @import("strtab.zig").StringTable; +const TableSection = @import("table_section.zig").TableSection; +const Type = @import("../type.zig").Type; +const TypedValue = @import("../TypedValue.zig"); +const Value = @import("../value.zig").Value; +const ZigModule = @import("Elf/ZigModule.zig"); diff --git a/src/link/Elf/Atom.zig b/src/link/Elf/Atom.zig index b437be3282..82c2b46d1d 100644 --- a/src/link/Elf/Atom.zig +++ b/src/link/Elf/Atom.zig @@ -1,108 +1,604 @@ -const Atom = @This(); +/// Address allocated for this Atom. +value: u64 = 0, -const std = @import("std"); -const assert = std.debug.assert; -const elf = std.elf; +/// Name of this Atom. +name_offset: u32 = 0, -const Elf = @import("../Elf.zig"); +/// Index into linker's input file table. +file_index: File.Index = 0, -/// Each decl always gets a local symbol with the fully qualified name. -/// The vaddr and size are found here directly. -/// The file offset is found by computing the vaddr offset from the section vaddr -/// the symbol references, and adding that to the file offset of the section. -/// If this field is 0, it means the codegen size = 0 and there is no symbol or -/// offset table entry. -local_sym_index: u32, +/// Size of this atom +size: u64 = 0, -/// Points to the previous and next neighbors, based on the `text_offset`. -/// This can be used to find, for example, the capacity of this `TextBlock`. -prev_index: ?Index, -next_index: ?Index, +/// Alignment of this atom as a power of two. +alignment: u8 = 0, -pub const Index = u32; +/// Index of the input section. +input_section_index: Index = 0, -pub const Reloc = struct { - target: u32, - offset: u64, - addend: u32, - prev_vaddr: u64, -}; +/// Index of the output section. +output_section_index: Index = 0, -pub fn getSymbolIndex(self: Atom) ?u32 { - if (self.local_sym_index == 0) return null; - return self.local_sym_index; -} +/// Index of the input section containing this atom's relocs. +relocs_section_index: Index = 0, + +/// Index of this atom in the linker's atoms table. +atom_index: Index = 0, + +/// Specifies whether this atom is alive or has been garbage collected. +alive: bool = false, + +/// Specifies if the atom has been visited during garbage collection. +visited: bool = false, + +/// Start index of FDEs referencing this atom. +fde_start: u32 = 0, -pub fn getSymbol(self: Atom, elf_file: *const Elf) elf.Elf64_Sym { - return elf_file.getSymbol(self.getSymbolIndex().?); +/// End index of FDEs referencing this atom. +fde_end: u32 = 0, + +/// Points to the previous and next neighbors, based on the `text_offset`. +/// This can be used to find, for example, the capacity of this `TextBlock`. +prev_index: Index = 0, +next_index: Index = 0, + +pub fn name(self: Atom, elf_file: *Elf) []const u8 { + return elf_file.strtab.getAssumeExists(self.name_offset); } -pub fn getSymbolPtr(self: Atom, elf_file: *Elf) *elf.Elf64_Sym { - return elf_file.getSymbolPtr(self.getSymbolIndex().?); +pub fn inputShdr(self: Atom, elf_file: *Elf) elf.Elf64_Shdr { + const object = elf_file.file(self.file_index).?.object; + return object.shdrs.items[self.input_section_index]; } -pub fn getName(self: Atom, elf_file: *const Elf) []const u8 { - return elf_file.getSymbolName(self.getSymbolIndex().?); +pub fn codeInObject(self: Atom, elf_file: *Elf) error{Overflow}![]const u8 { + const object = elf_file.file(self.file_index).?.object; + return object.shdrContents(self.input_section_index); } -/// If entry already exists, returns index to it. -/// Otherwise, creates a new entry in the Global Offset Table for this Atom. -pub fn getOrCreateOffsetTableEntry(self: Atom, elf_file: *Elf) !u32 { - const sym_index = self.getSymbolIndex().?; - if (elf_file.got_table.lookup.get(sym_index)) |index| return index; - const index = try elf_file.got_table.allocateEntry(elf_file.base.allocator, sym_index); - elf_file.got_table_count_dirty = true; - return index; +/// Returns atom's code and optionally uncompresses data if required (for compressed sections). +/// Caller owns the memory. +pub fn codeInObjectUncompressAlloc(self: Atom, elf_file: *Elf) ![]u8 { + const gpa = elf_file.base.allocator; + const data = try self.codeInObject(elf_file); + const shdr = self.inputShdr(elf_file); + if (shdr.sh_flags & elf.SHF_COMPRESSED != 0) { + const chdr = @as(*align(1) const elf.Elf64_Chdr, @ptrCast(data.ptr)).*; + switch (chdr.ch_type) { + .ZLIB => { + var stream = std.io.fixedBufferStream(data[@sizeOf(elf.Elf64_Chdr)..]); + var zlib_stream = std.compress.zlib.decompressStream(gpa, stream.reader()) catch + return error.InputOutput; + defer zlib_stream.deinit(); + const size = std.math.cast(usize, chdr.ch_size) orelse return error.Overflow; + const decomp = try gpa.alloc(u8, size); + const nread = zlib_stream.reader().readAll(decomp) catch return error.InputOutput; + if (nread != decomp.len) { + return error.InputOutput; + } + return decomp; + }, + else => @panic("TODO unhandled compression scheme"), + } + } else return gpa.dupe(u8, data); } -pub fn getOffsetTableAddress(self: Atom, elf_file: *Elf) u64 { - const sym_index = self.getSymbolIndex().?; - const got_entry_index = elf_file.got_table.lookup.get(sym_index).?; - const target = elf_file.base.options.target; - const ptr_bits = target.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - const got = elf_file.program_headers.items[elf_file.phdr_got_index.?]; - return got.p_vaddr + got_entry_index * ptr_bytes; +pub fn priority(self: Atom, elf_file: *Elf) u64 { + const index = elf_file.file(self.file_index).?.index(); + return (@as(u64, @intCast(index)) << 32) | @as(u64, @intCast(self.input_section_index)); } /// Returns how much room there is to grow in virtual address space. /// File offset relocation happens transparently, so it is not included in /// this calculation. -pub fn capacity(self: Atom, elf_file: *const Elf) u64 { - const self_sym = self.getSymbol(elf_file); - if (self.next_index) |next_index| { - const next = elf_file.getAtom(next_index); - const next_sym = next.getSymbol(elf_file); - return next_sym.st_value - self_sym.st_value; - } else { - // We are the last block. The capacity is limited only by virtual address space. - return std.math.maxInt(u32) - self_sym.st_value; - } +pub fn capacity(self: Atom, elf_file: *Elf) u64 { + const next_value = if (elf_file.atom(self.next_index)) |next| next.value else std.math.maxInt(u32); + return next_value - self.value; } -pub fn freeListEligible(self: Atom, elf_file: *const Elf) bool { +pub fn freeListEligible(self: Atom, elf_file: *Elf) bool { // No need to keep a free list node for the last block. - const next_index = self.next_index orelse return false; - const next = elf_file.getAtom(next_index); - const self_sym = self.getSymbol(elf_file); - const next_sym = next.getSymbol(elf_file); - const cap = next_sym.st_value - self_sym.st_value; - const ideal_cap = Elf.padToIdeal(self_sym.st_size); + const next = elf_file.atom(self.next_index) orelse return false; + const cap = next.value - self.value; + const ideal_cap = Elf.padToIdeal(self.size); if (cap <= ideal_cap) return false; const surplus = cap - ideal_cap; return surplus >= Elf.min_text_capacity; } -pub fn addRelocation(elf_file: *Elf, atom_index: Index, reloc: Reloc) !void { +pub fn allocate(self: *Atom, elf_file: *Elf) !void { + const shdr = &elf_file.shdrs.items[self.output_section_index]; + const meta = elf_file.last_atom_and_free_list_table.getPtr(self.output_section_index).?; + const free_list = &meta.free_list; + const last_atom_index = &meta.last_atom_index; + const new_atom_ideal_capacity = Elf.padToIdeal(self.size); + const alignment = try std.math.powi(u64, 2, self.alignment); + + // We use these to indicate our intention to update metadata, placing the new atom, + // and possibly removing a free list node. + // It would be simpler to do it inside the for loop below, but that would cause a + // problem if an error was returned later in the function. So this action + // is actually carried out at the end of the function, when errors are no longer possible. + var atom_placement: ?Atom.Index = null; + var free_list_removal: ?usize = null; + + // First we look for an appropriately sized free list node. + // The list is unordered. We'll just take the first thing that works. + self.value = blk: { + var i: usize = if (elf_file.base.child_pid == null) 0 else free_list.items.len; + while (i < free_list.items.len) { + const big_atom_index = free_list.items[i]; + const big_atom = elf_file.atom(big_atom_index).?; + // We now have a pointer to a live atom that has too much capacity. + // Is it enough that we could fit this new atom? + const cap = big_atom.capacity(elf_file); + const ideal_capacity = Elf.padToIdeal(cap); + const ideal_capacity_end_vaddr = std.math.add(u64, big_atom.value, ideal_capacity) catch ideal_capacity; + const capacity_end_vaddr = big_atom.value + cap; + const new_start_vaddr_unaligned = capacity_end_vaddr - new_atom_ideal_capacity; + const new_start_vaddr = std.mem.alignBackward(u64, new_start_vaddr_unaligned, alignment); + if (new_start_vaddr < ideal_capacity_end_vaddr) { + // Additional bookkeeping here to notice if this free list node + // should be deleted because the block that it points to has grown to take up + // more of the extra capacity. + if (!big_atom.freeListEligible(elf_file)) { + _ = free_list.swapRemove(i); + } else { + i += 1; + } + continue; + } + // At this point we know that we will place the new block here. But the + // remaining question is whether there is still yet enough capacity left + // over for there to still be a free list node. + const remaining_capacity = new_start_vaddr - ideal_capacity_end_vaddr; + const keep_free_list_node = remaining_capacity >= Elf.min_text_capacity; + + // Set up the metadata to be updated, after errors are no longer possible. + atom_placement = big_atom_index; + if (!keep_free_list_node) { + free_list_removal = i; + } + break :blk new_start_vaddr; + } else if (elf_file.atom(last_atom_index.*)) |last| { + const ideal_capacity = Elf.padToIdeal(last.size); + const ideal_capacity_end_vaddr = last.value + ideal_capacity; + const new_start_vaddr = std.mem.alignForward(u64, ideal_capacity_end_vaddr, alignment); + // Set up the metadata to be updated, after errors are no longer possible. + atom_placement = last.atom_index; + break :blk new_start_vaddr; + } else { + break :blk shdr.sh_addr; + } + }; + + const expand_section = if (atom_placement) |placement_index| + elf_file.atom(placement_index).?.next_index == 0 + else + true; + if (expand_section) { + const needed_size = (self.value + self.size) - shdr.sh_addr; + try elf_file.growAllocSection(self.output_section_index, needed_size); + last_atom_index.* = self.atom_index; + + if (elf_file.dwarf) |_| { + // The .debug_info section has `low_pc` and `high_pc` values which is the virtual address + // range of the compilation unit. When we expand the text section, this range changes, + // so the DW_TAG.compile_unit tag of the .debug_info section becomes dirty. + elf_file.debug_info_header_dirty = true; + // This becomes dirty for the same reason. We could potentially make this more + // fine-grained with the addition of support for more compilation units. It is planned to + // model each package as a different compilation unit. + elf_file.debug_aranges_section_dirty = true; + } + } + shdr.sh_addralign = @max(shdr.sh_addralign, alignment); + + // This function can also reallocate an atom. + // In this case we need to "unplug" it from its previous location before + // plugging it in to its new location. + if (elf_file.atom(self.prev_index)) |prev| { + prev.next_index = self.next_index; + } + if (elf_file.atom(self.next_index)) |next| { + next.prev_index = self.prev_index; + } + + if (atom_placement) |big_atom_index| { + const big_atom = elf_file.atom(big_atom_index).?; + self.prev_index = big_atom_index; + self.next_index = big_atom.next_index; + big_atom.next_index = self.atom_index; + } else { + self.prev_index = 0; + self.next_index = 0; + } + if (free_list_removal) |i| { + _ = free_list.swapRemove(i); + } +} + +pub fn shrink(self: *Atom, elf_file: *Elf) void { + _ = self; + _ = elf_file; +} + +pub fn grow(self: *Atom, elf_file: *Elf) !void { + const alignment = try std.math.powi(u64, 2, self.alignment); + const align_ok = std.mem.alignBackward(u64, self.value, alignment) == self.value; + const need_realloc = !align_ok or self.size > self.capacity(elf_file); + if (need_realloc) try self.allocate(elf_file); +} + +pub fn free(self: *Atom, elf_file: *Elf) void { + log.debug("freeAtom {d} ({s})", .{ self.atom_index, self.name(elf_file) }); + + const gpa = elf_file.base.allocator; + const zig_module = elf_file.file(self.file_index).?.zig_module; + const shndx = self.output_section_index; + const meta = elf_file.last_atom_and_free_list_table.getPtr(shndx).?; + const free_list = &meta.free_list; + const last_atom_index = &meta.last_atom_index; + var already_have_free_list_node = false; + { + var i: usize = 0; + // TODO turn free_list into a hash map + while (i < free_list.items.len) { + if (free_list.items[i] == self.atom_index) { + _ = free_list.swapRemove(i); + continue; + } + if (free_list.items[i] == self.prev_index) { + already_have_free_list_node = true; + } + i += 1; + } + } + + if (elf_file.atom(last_atom_index.*)) |last_atom| { + if (last_atom.atom_index == self.atom_index) { + if (elf_file.atom(self.prev_index)) |_| { + // TODO shrink the section size here + last_atom_index.* = self.prev_index; + } else { + last_atom_index.* = 0; + } + } + } + + if (elf_file.atom(self.prev_index)) |prev| { + prev.next_index = self.next_index; + if (!already_have_free_list_node and prev.*.freeListEligible(elf_file)) { + // The free list is heuristics, it doesn't have to be perfect, so we can + // ignore the OOM here. + free_list.append(gpa, prev.atom_index) catch {}; + } + } else { + self.prev_index = 0; + } + + if (elf_file.atom(self.next_index)) |next| { + next.prev_index = self.prev_index; + } else { + self.next_index = 0; + } + + // TODO create relocs free list + self.freeRelocs(elf_file); + assert(zig_module.atoms.swapRemove(self.atom_index)); + self.* = .{}; +} + +pub fn relocs(self: Atom, elf_file: *Elf) error{Overflow}![]align(1) const elf.Elf64_Rela { + return switch (elf_file.file(self.file_index).?) { + .zig_module => |x| x.relocs.items[self.relocs_section_index].items, + .object => |x| x.getRelocs(self.relocs_section_index), + else => unreachable, + }; +} + +pub fn addReloc(self: Atom, elf_file: *Elf, reloc: elf.Elf64_Rela) !void { const gpa = elf_file.base.allocator; - const gop = try elf_file.relocs.getOrPut(gpa, atom_index); - if (!gop.found_existing) { - gop.value_ptr.* = .{}; + const file_ptr = elf_file.file(self.file_index).?; + assert(file_ptr == .zig_module); + const zig_module = file_ptr.zig_module; + const rels = &zig_module.relocs.items[self.relocs_section_index]; + try rels.append(gpa, reloc); +} + +pub fn freeRelocs(self: Atom, elf_file: *Elf) void { + const file_ptr = elf_file.file(self.file_index).?; + assert(file_ptr == .zig_module); + const zig_module = file_ptr.zig_module; + zig_module.relocs.items[self.relocs_section_index].clearRetainingCapacity(); +} + +pub fn scanRelocs(self: Atom, elf_file: *Elf, undefs: anytype) !void { + const file_ptr = elf_file.file(self.file_index).?; + const rels = try self.relocs(elf_file); + var i: usize = 0; + while (i < rels.len) : (i += 1) { + const rel = rels[i]; + + if (rel.r_type() == elf.R_X86_64_NONE) continue; + + const symbol = switch (file_ptr) { + .zig_module => |x| elf_file.symbol(x.symbol(rel.r_sym())), + .object => |x| elf_file.symbol(x.symbols.items[rel.r_sym()]), + else => unreachable, + }; + + // Check for violation of One Definition Rule for COMDATs. + if (symbol.file(elf_file) == null) { + // TODO convert into an error + log.debug("{}: {s}: {s} refers to a discarded COMDAT section", .{ + file_ptr.fmtPath(), + self.name(elf_file), + symbol.name(elf_file), + }); + continue; + } + + // Report an undefined symbol. + try self.reportUndefined(elf_file, symbol, rel, undefs); + + // While traversing relocations, mark symbols that require special handling such as + // pointer indirection via GOT, or a stub trampoline via PLT. + switch (rel.r_type()) { + elf.R_X86_64_64 => {}, + + elf.R_X86_64_32, + elf.R_X86_64_32S, + => {}, + + elf.R_X86_64_GOT32, + elf.R_X86_64_GOT64, + elf.R_X86_64_GOTPC32, + elf.R_X86_64_GOTPC64, + elf.R_X86_64_GOTPCREL, + elf.R_X86_64_GOTPCREL64, + elf.R_X86_64_GOTPCRELX, + elf.R_X86_64_REX_GOTPCRELX, + => { + symbol.flags.needs_got = true; + }, + + elf.R_X86_64_PLT32, + elf.R_X86_64_PLTOFF64, + => { + if (symbol.flags.import) { + symbol.flags.needs_plt = true; + } + }, + + elf.R_X86_64_PC32 => {}, + + else => @panic("TODO"), + } + } +} + +// This function will report any undefined non-weak symbols that are not imports. +fn reportUndefined(self: Atom, elf_file: *Elf, sym: *const Symbol, rel: elf.Elf64_Rela, undefs: anytype) !void { + const rel_esym = switch (elf_file.file(self.file_index).?) { + .zig_module => |x| x.elfSym(rel.r_sym()).*, + .object => |x| x.symtab[rel.r_sym()], + else => unreachable, + }; + const esym = sym.elfSym(elf_file); + if (rel_esym.st_shndx == elf.SHN_UNDEF and + rel_esym.st_bind() == elf.STB_GLOBAL and + sym.esym_index > 0 and + !sym.flags.import and + esym.st_shndx == elf.SHN_UNDEF) + { + const gop = try undefs.getOrPut(sym.index); + if (!gop.found_existing) { + gop.value_ptr.* = std.ArrayList(Atom.Index).init(elf_file.base.allocator); + } + try gop.value_ptr.append(self.atom_index); + } +} + +/// TODO mark relocs dirty +pub fn resolveRelocs(self: Atom, elf_file: *Elf, code: []u8) !void { + relocs_log.debug("0x{x}: {s}", .{ self.value, self.name(elf_file) }); + + const file_ptr = elf_file.file(self.file_index).?; + var stream = std.io.fixedBufferStream(code); + const cwriter = stream.writer(); + + for (try self.relocs(elf_file)) |rel| { + const r_type = rel.r_type(); + if (r_type == elf.R_X86_64_NONE) continue; + + const target = switch (file_ptr) { + .zig_module => |x| elf_file.symbol(x.symbol(rel.r_sym())), + .object => |x| elf_file.symbol(x.symbols.items[rel.r_sym()]), + else => unreachable, + }; + + // We will use equation format to resolve relocations: + // https://intezer.com/blog/malware-analysis/executable-and-linkable-format-101-part-3-relocations/ + // + // Address of the source atom. + const P = @as(i64, @intCast(self.value + rel.r_offset)); + // Addend from the relocation. + const A = rel.r_addend; + // Address of the target symbol - can be address of the symbol within an atom or address of PLT stub. + const S = @as(i64, @intCast(target.address(.{}, elf_file))); + // Address of the global offset table. + const GOT = blk: { + const shndx = if (elf_file.got_plt_section_index) |shndx| + shndx + else if (elf_file.got_section_index) |shndx| + shndx + else + null; + break :blk if (shndx) |index| @as(i64, @intCast(elf_file.shdrs.items[index].sh_addr)) else 0; + }; + // Relative offset to the start of the global offset table. + const G = @as(i64, @intCast(target.gotAddress(elf_file))) - GOT; + // // Address of the thread pointer. + // const TP = @as(i64, @intCast(elf_file.getTpAddress())); + // // Address of the dynamic thread pointer. + // const DTP = @as(i64, @intCast(elf_file.getDtpAddress())); + + relocs_log.debug(" {s}: {x}: [{x} => {x}] G({x}) ({s})", .{ + fmtRelocType(r_type), + rel.r_offset, + P, + S + A, + G + GOT + A, + target.name(elf_file), + }); + + try stream.seekTo(rel.r_offset); + + switch (rel.r_type()) { + elf.R_X86_64_NONE => unreachable, + + elf.R_X86_64_64 => try cwriter.writeIntLittle(i64, S + A), + + elf.R_X86_64_PLT32, + elf.R_X86_64_PC32, + => try cwriter.writeIntLittle(i32, @as(i32, @intCast(S + A - P))), + + else => { + log.err("TODO: unhandled relocation type {}", .{fmtRelocType(rel.r_type())}); + @panic("TODO unhandled relocation type"); + }, + } } - try gop.value_ptr.append(gpa, reloc); } -pub fn freeRelocations(elf_file: *Elf, atom_index: Index) void { - var removed_relocs = elf_file.relocs.fetchRemove(atom_index); - if (removed_relocs) |*relocs| relocs.value.deinit(elf_file.base.allocator); +pub fn fmtRelocType(r_type: u32) std.fmt.Formatter(formatRelocType) { + return .{ .data = r_type }; +} + +fn formatRelocType( + r_type: u32, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const str = switch (r_type) { + elf.R_X86_64_NONE => "R_X86_64_NONE", + elf.R_X86_64_64 => "R_X86_64_64", + elf.R_X86_64_PC32 => "R_X86_64_PC32", + elf.R_X86_64_GOT32 => "R_X86_64_GOT32", + elf.R_X86_64_PLT32 => "R_X86_64_PLT32", + elf.R_X86_64_COPY => "R_X86_64_COPY", + elf.R_X86_64_GLOB_DAT => "R_X86_64_GLOB_DAT", + elf.R_X86_64_JUMP_SLOT => "R_X86_64_JUMP_SLOT", + elf.R_X86_64_RELATIVE => "R_X86_64_RELATIVE", + elf.R_X86_64_GOTPCREL => "R_X86_64_GOTPCREL", + elf.R_X86_64_32 => "R_X86_64_32", + elf.R_X86_64_32S => "R_X86_64_32S", + elf.R_X86_64_16 => "R_X86_64_16", + elf.R_X86_64_PC16 => "R_X86_64_PC16", + elf.R_X86_64_8 => "R_X86_64_8", + elf.R_X86_64_PC8 => "R_X86_64_PC8", + elf.R_X86_64_DTPMOD64 => "R_X86_64_DTPMOD64", + elf.R_X86_64_DTPOFF64 => "R_X86_64_DTPOFF64", + elf.R_X86_64_TPOFF64 => "R_X86_64_TPOFF64", + elf.R_X86_64_TLSGD => "R_X86_64_TLSGD", + elf.R_X86_64_TLSLD => "R_X86_64_TLSLD", + elf.R_X86_64_DTPOFF32 => "R_X86_64_DTPOFF32", + elf.R_X86_64_GOTTPOFF => "R_X86_64_GOTTPOFF", + elf.R_X86_64_TPOFF32 => "R_X86_64_TPOFF32", + elf.R_X86_64_PC64 => "R_X86_64_PC64", + elf.R_X86_64_GOTOFF64 => "R_X86_64_GOTOFF64", + elf.R_X86_64_GOTPC32 => "R_X86_64_GOTPC32", + elf.R_X86_64_GOT64 => "R_X86_64_GOT64", + elf.R_X86_64_GOTPCREL64 => "R_X86_64_GOTPCREL64", + elf.R_X86_64_GOTPC64 => "R_X86_64_GOTPC64", + elf.R_X86_64_GOTPLT64 => "R_X86_64_GOTPLT64", + elf.R_X86_64_PLTOFF64 => "R_X86_64_PLTOFF64", + elf.R_X86_64_SIZE32 => "R_X86_64_SIZE32", + elf.R_X86_64_SIZE64 => "R_X86_64_SIZE64", + elf.R_X86_64_GOTPC32_TLSDESC => "R_X86_64_GOTPC32_TLSDESC", + elf.R_X86_64_TLSDESC_CALL => "R_X86_64_TLSDESC_CALL", + elf.R_X86_64_TLSDESC => "R_X86_64_TLSDESC", + elf.R_X86_64_IRELATIVE => "R_X86_64_IRELATIVE", + elf.R_X86_64_RELATIVE64 => "R_X86_64_RELATIVE64", + elf.R_X86_64_GOTPCRELX => "R_X86_64_GOTPCRELX", + elf.R_X86_64_REX_GOTPCRELX => "R_X86_64_REX_GOTPCRELX", + elf.R_X86_64_NUM => "R_X86_64_NUM", + else => "R_X86_64_UNKNOWN", + }; + try writer.print("{s}", .{str}); +} + +pub fn format( + atom: Atom, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = atom; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format symbols directly"); +} + +pub fn fmt(atom: Atom, elf_file: *Elf) std.fmt.Formatter(format2) { + return .{ .data = .{ + .atom = atom, + .elf_file = elf_file, + } }; } + +const FormatContext = struct { + atom: Atom, + elf_file: *Elf, +}; + +fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + const atom = ctx.atom; + const elf_file = ctx.elf_file; + try writer.print("atom({d}) : {s} : @{x} : sect({d}) : align({x}) : size({x})", .{ + atom.atom_index, atom.name(elf_file), atom.value, + atom.output_section_index, atom.alignment, atom.size, + }); + // if (atom.fde_start != atom.fde_end) { + // try writer.writeAll(" : fdes{ "); + // for (atom.getFdes(elf_file), atom.fde_start..) |fde, i| { + // try writer.print("{d}", .{i}); + // if (!fde.alive) try writer.writeAll("([*])"); + // if (i < atom.fde_end - 1) try writer.writeAll(", "); + // } + // try writer.writeAll(" }"); + // } + const gc_sections = if (elf_file.base.options.gc_sections) |gc_sections| gc_sections else false; + if (gc_sections and !atom.alive) { + try writer.writeAll(" : [*]"); + } +} + +// TODO this has to be u32 but for now, to avoid redesigning elfSym machinery for +// ZigModule, keep it at u16 with the intention of bumping it to u32 in the near +// future. +pub const Index = u16; + +const std = @import("std"); +const assert = std.debug.assert; +const elf = std.elf; +const log = std.log.scoped(.link); +const relocs_log = std.log.scoped(.link_relocs); + +const Allocator = std.mem.Allocator; +const Atom = @This(); +const Elf = @import("../Elf.zig"); +const File = @import("file.zig").File; +const Symbol = @import("Symbol.zig"); diff --git a/src/link/Elf/LinkerDefined.zig b/src/link/Elf/LinkerDefined.zig new file mode 100644 index 0000000000..0c6666e8bb --- /dev/null +++ b/src/link/Elf/LinkerDefined.zig @@ -0,0 +1,112 @@ +index: File.Index, +symtab: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{}, +symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, + +output_symtab_size: Elf.SymtabSize = .{}, + +pub fn deinit(self: *LinkerDefined, allocator: Allocator) void { + self.symtab.deinit(allocator); + self.symbols.deinit(allocator); +} + +pub fn addGlobal(self: *LinkerDefined, name: [:0]const u8, elf_file: *Elf) !u32 { + const gpa = elf_file.base.allocator; + try self.symtab.ensureUnusedCapacity(gpa, 1); + try self.symbols.ensureUnusedCapacity(gpa, 1); + self.symtab.appendAssumeCapacity(.{ + .st_name = try elf_file.strtab.insert(gpa, name), + .st_info = elf.STB_GLOBAL << 4, + .st_other = @intFromEnum(elf.STV.HIDDEN), + .st_shndx = elf.SHN_ABS, + .st_value = 0, + .st_size = 0, + }); + const off = try elf_file.strtab.insert(gpa, name); + const gop = try elf_file.getOrPutGlobal(off); + self.symbols.addOneAssumeCapacity().* = gop.index; + return gop.index; +} + +pub fn resolveSymbols(self: *LinkerDefined, elf_file: *Elf) void { + for (self.symbols.items, 0..) |index, i| { + const sym_idx = @as(Symbol.Index, @intCast(i)); + const this_sym = self.symtab.items[sym_idx]; + + if (this_sym.st_shndx == elf.SHN_UNDEF) continue; + + const global = elf_file.symbol(index); + if (self.asFile().symbolRank(this_sym, false) < global.symbolRank(elf_file)) { + global.value = 0; + global.name_offset = global.name_offset; + global.atom_index = 0; + global.file_index = self.index; + global.esym_index = sym_idx; + global.version_index = elf_file.default_sym_version; + } + } +} + +pub fn updateSymtabSize(self: *LinkerDefined, elf_file: *Elf) void { + for (self.globals()) |global_index| { + const global = elf_file.symbol(global_index); + if (global.file(elf_file)) |file| if (file.index() != self.index) continue; + global.flags.output_symtab = true; + self.output_symtab_size.nlocals += 1; + } +} + +pub fn writeSymtab(self: *LinkerDefined, elf_file: *Elf, ctx: anytype) void { + var ilocal = ctx.ilocal; + for (self.globals()) |global_index| { + const global = elf_file.symbol(global_index); + if (global.file(elf_file)) |file| if (file.index() != self.index) continue; + if (!global.flags.output_symtab) continue; + global.setOutputSym(elf_file, &ctx.symtab[ilocal]); + ilocal += 1; + } +} + +pub fn globals(self: *LinkerDefined) []const Symbol.Index { + return self.symbols.items; +} + +pub fn asFile(self: *LinkerDefined) File { + return .{ .linker_defined = self }; +} + +pub fn fmtSymtab(self: *LinkerDefined, elf_file: *Elf) std.fmt.Formatter(formatSymtab) { + return .{ .data = .{ + .self = self, + .elf_file = elf_file, + } }; +} + +const FormatContext = struct { + self: *LinkerDefined, + elf_file: *Elf, +}; + +fn formatSymtab( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeAll(" globals\n"); + for (ctx.self.globals()) |index| { + const global = ctx.elf_file.symbol(index); + try writer.print(" {}\n", .{global.fmt(ctx.elf_file)}); + } +} + +const std = @import("std"); +const elf = std.elf; + +const Allocator = std.mem.Allocator; +const Elf = @import("../Elf.zig"); +const File = @import("file.zig").File; +const LinkerDefined = @This(); +// const Object = @import("Object.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/Elf/Object.zig b/src/link/Elf/Object.zig new file mode 100644 index 0000000000..b0a6ef2a1c --- /dev/null +++ b/src/link/Elf/Object.zig @@ -0,0 +1,872 @@ +archive: ?[]const u8 = null, +path: []const u8, +data: []const u8, +index: File.Index, + +header: ?elf.Elf64_Ehdr = null, +shdrs: std.ArrayListUnmanaged(elf.Elf64_Shdr) = .{}, +strings: StringTable(.object_strings) = .{}, +symtab: []align(1) const elf.Elf64_Sym = &[0]elf.Elf64_Sym{}, +strtab: []const u8 = &[0]u8{}, +first_global: ?Symbol.Index = null, + +symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, +comdat_groups: std.ArrayListUnmanaged(Elf.ComdatGroup.Index) = .{}, + +fdes: std.ArrayListUnmanaged(Fde) = .{}, +cies: std.ArrayListUnmanaged(Cie) = .{}, + +alive: bool = true, +num_dynrelocs: u32 = 0, + +output_symtab_size: Elf.SymtabSize = .{}, + +pub fn isObject(file: std.fs.File) bool { + const reader = file.reader(); + const header = reader.readStruct(elf.Elf64_Ehdr) catch return false; + defer file.seekTo(0) catch {}; + if (!mem.eql(u8, header.e_ident[0..4], "\x7fELF")) return false; + if (header.e_ident[elf.EI_VERSION] != 1) return false; + if (header.e_type != elf.ET.REL) return false; + if (header.e_version != 1) return false; + return true; +} + +pub fn deinit(self: *Object, allocator: Allocator) void { + allocator.free(self.data); + self.shdrs.deinit(allocator); + self.strings.deinit(allocator); + self.symbols.deinit(allocator); + self.atoms.deinit(allocator); + self.comdat_groups.deinit(allocator); + self.fdes.deinit(allocator); + self.cies.deinit(allocator); +} + +pub fn parse(self: *Object, elf_file: *Elf) !void { + var stream = std.io.fixedBufferStream(self.data); + const reader = stream.reader(); + + self.header = try reader.readStruct(elf.Elf64_Ehdr); + + if (self.header.?.e_shnum == 0) return; + + const gpa = elf_file.base.allocator; + + const shoff = math.cast(usize, self.header.?.e_shoff) orelse return error.Overflow; + const shdrs = @as( + [*]align(1) const elf.Elf64_Shdr, + @ptrCast(self.data.ptr + shoff), + )[0..self.header.?.e_shnum]; + try self.shdrs.appendUnalignedSlice(gpa, shdrs); + try self.strings.buffer.appendSlice(gpa, try self.shdrContents(self.header.?.e_shstrndx)); + + const symtab_index = for (self.shdrs.items, 0..) |shdr, i| switch (shdr.sh_type) { + elf.SHT_SYMTAB => break @as(u16, @intCast(i)), + else => {}, + } else null; + + if (symtab_index) |index| { + const shdr = shdrs[index]; + self.first_global = shdr.sh_info; + + const symtab = try self.shdrContents(index); + const nsyms = @divExact(symtab.len, @sizeOf(elf.Elf64_Sym)); + self.symtab = @as([*]align(1) const elf.Elf64_Sym, @ptrCast(symtab.ptr))[0..nsyms]; + self.strtab = try self.shdrContents(@as(u16, @intCast(shdr.sh_link))); + } + + try self.initAtoms(elf_file); + try self.initSymtab(elf_file); + + // for (self.shdrs.items, 0..) |shdr, i| { + // const atom = elf_file.atom(self.atoms.items[i]) orelse continue; + // if (!atom.alive) continue; + // if (shdr.sh_type == elf.SHT_X86_64_UNWIND or mem.eql(u8, atom.name(elf_file), ".eh_frame")) + // try self.parseEhFrame(@as(u16, @intCast(i)), elf_file); + // } +} + +fn initAtoms(self: *Object, elf_file: *Elf) !void { + const shdrs = self.shdrs.items; + try self.atoms.resize(elf_file.base.allocator, shdrs.len); + @memset(self.atoms.items, 0); + + for (shdrs, 0..) |shdr, i| { + if (shdr.sh_flags & elf.SHF_EXCLUDE != 0 and + shdr.sh_flags & elf.SHF_ALLOC == 0 and + shdr.sh_type != elf.SHT_LLVM_ADDRSIG) continue; + + switch (shdr.sh_type) { + elf.SHT_GROUP => { + if (shdr.sh_info >= self.symtab.len) { + // TODO convert into an error + log.debug("{}: invalid symbol index in sh_info", .{self.fmtPath()}); + continue; + } + const group_info_sym = self.symtab[shdr.sh_info]; + const group_signature = blk: { + if (group_info_sym.st_name == 0 and group_info_sym.st_type() == elf.STT_SECTION) { + const sym_shdr = shdrs[group_info_sym.st_shndx]; + break :blk self.strings.getAssumeExists(sym_shdr.sh_name); + } + break :blk self.getString(group_info_sym.st_name); + }; + + const shndx = @as(u16, @intCast(i)); + const group_raw_data = try self.shdrContents(shndx); + const group_nmembers = @divExact(group_raw_data.len, @sizeOf(u32)); + const group_members = @as([*]align(1) const u32, @ptrCast(group_raw_data.ptr))[0..group_nmembers]; + + if (group_members[0] != 0x1) { // GRP_COMDAT + // TODO convert into an error + log.debug("{}: unknown SHT_GROUP format", .{self.fmtPath()}); + continue; + } + + const group_signature_off = try self.strings.insert(elf_file.base.allocator, group_signature); + const gop = try elf_file.getOrCreateComdatGroupOwner(group_signature_off); + const comdat_group_index = try elf_file.addComdatGroup(); + const comdat_group = elf_file.comdatGroup(comdat_group_index); + comdat_group.* = .{ + .owner = gop.index, + .shndx = shndx, + }; + try self.comdat_groups.append(elf_file.base.allocator, comdat_group_index); + }, + + elf.SHT_SYMTAB_SHNDX => @panic("TODO"), + + elf.SHT_NULL, + elf.SHT_REL, + elf.SHT_RELA, + elf.SHT_SYMTAB, + elf.SHT_STRTAB, + => {}, + + else => { + const name = self.strings.getAssumeExists(shdr.sh_name); + const shndx = @as(u16, @intCast(i)); + if (self.skipShdr(shndx, elf_file)) continue; + try self.addAtom(shdr, shndx, name, elf_file); + }, + } + } + + // Parse relocs sections if any. + for (shdrs, 0..) |shdr, i| switch (shdr.sh_type) { + elf.SHT_REL, elf.SHT_RELA => { + const atom_index = self.atoms.items[shdr.sh_info]; + if (elf_file.atom(atom_index)) |atom| { + atom.relocs_section_index = @as(u16, @intCast(i)); + } + }, + else => {}, + }; +} + +fn addAtom(self: *Object, shdr: elf.Elf64_Shdr, shndx: u16, name: [:0]const u8, elf_file: *Elf) !void { + const atom_index = try elf_file.addAtom(); + const atom = elf_file.atom(atom_index).?; + atom.atom_index = atom_index; + atom.name_offset = try elf_file.strtab.insert(elf_file.base.allocator, name); + atom.file_index = self.index; + atom.input_section_index = shndx; + atom.output_section_index = self.getOutputSectionIndex(elf_file, shdr); + atom.alive = true; + self.atoms.items[shndx] = atom_index; + + if (shdr.sh_flags & elf.SHF_COMPRESSED != 0) { + const data = try self.shdrContents(shndx); + const chdr = @as(*align(1) const elf.Elf64_Chdr, @ptrCast(data.ptr)).*; + atom.size = chdr.ch_size; + atom.alignment = math.log2_int(u64, chdr.ch_addralign); + } else { + atom.size = shdr.sh_size; + atom.alignment = math.log2_int(u64, shdr.sh_addralign); + } +} + +fn getOutputSectionIndex(self: *Object, elf_file: *Elf, shdr: elf.Elf64_Shdr) u16 { + const name = blk: { + const name = self.strings.getAssumeExists(shdr.sh_name); + // if (shdr.sh_flags & elf.SHF_MERGE != 0) break :blk name; + const sh_name_prefixes: []const [:0]const u8 = &.{ + ".text", ".data.rel.ro", ".data", ".rodata", ".bss.rel.ro", ".bss", + ".init_array", ".fini_array", ".tbss", ".tdata", ".gcc_except_table", ".ctors", + ".dtors", ".gnu.warning", + }; + inline for (sh_name_prefixes) |prefix| { + if (std.mem.eql(u8, name, prefix) or std.mem.startsWith(u8, name, prefix ++ ".")) { + break :blk prefix; + } + } + break :blk name; + }; + const @"type" = switch (shdr.sh_type) { + elf.SHT_NULL => unreachable, + elf.SHT_PROGBITS => blk: { + if (std.mem.eql(u8, name, ".init_array") or std.mem.startsWith(u8, name, ".init_array.")) + break :blk elf.SHT_INIT_ARRAY; + if (std.mem.eql(u8, name, ".fini_array") or std.mem.startsWith(u8, name, ".fini_array.")) + break :blk elf.SHT_FINI_ARRAY; + break :blk shdr.sh_type; + }, + elf.SHT_X86_64_UNWIND => elf.SHT_PROGBITS, + else => shdr.sh_type, + }; + const flags = blk: { + const flags = shdr.sh_flags & ~@as(u64, elf.SHF_COMPRESSED | elf.SHF_GROUP | elf.SHF_GNU_RETAIN); + break :blk switch (@"type") { + elf.SHT_INIT_ARRAY, elf.SHT_FINI_ARRAY => flags | elf.SHF_WRITE, + else => flags, + }; + }; + _ = flags; + const out_shndx = elf_file.sectionByName(name) orelse { + log.err("{}: output section {s} not found", .{ self.fmtPath(), name }); + @panic("TODO: missing output section!"); + }; + return out_shndx; +} + +fn skipShdr(self: *Object, index: u16, elf_file: *Elf) bool { + _ = elf_file; + const shdr = self.shdrs.items[index]; + const name = self.strings.getAssumeExists(shdr.sh_name); + const ignore = blk: { + if (mem.startsWith(u8, name, ".note")) break :blk true; + if (mem.startsWith(u8, name, ".comment")) break :blk true; + if (mem.startsWith(u8, name, ".llvm_addrsig")) break :blk true; + if (mem.startsWith(u8, name, ".eh_frame")) break :blk true; + // if (elf_file.base.options.strip and shdr.sh_flags & elf.SHF_ALLOC == 0 and + // mem.startsWith(u8, name, ".debug")) break :blk true; + if (shdr.sh_flags & elf.SHF_ALLOC == 0 and mem.startsWith(u8, name, ".debug")) break :blk true; + break :blk false; + }; + return ignore; +} + +fn initSymtab(self: *Object, elf_file: *Elf) !void { + const gpa = elf_file.base.allocator; + const first_global = self.first_global orelse self.symtab.len; + const shdrs = self.shdrs.items; + + try self.symbols.ensureTotalCapacityPrecise(gpa, self.symtab.len); + + for (self.symtab[0..first_global], 0..) |sym, i| { + const index = try elf_file.addSymbol(); + self.symbols.appendAssumeCapacity(index); + const sym_ptr = elf_file.symbol(index); + const name = blk: { + if (sym.st_name == 0 and sym.st_type() == elf.STT_SECTION) { + const shdr = shdrs[sym.st_shndx]; + break :blk self.strings.getAssumeExists(shdr.sh_name); + } + break :blk self.getString(sym.st_name); + }; + sym_ptr.value = sym.st_value; + sym_ptr.name_offset = try elf_file.strtab.insert(gpa, name); + sym_ptr.esym_index = @as(u32, @intCast(i)); + sym_ptr.atom_index = if (sym.st_shndx == elf.SHN_ABS) 0 else self.atoms.items[sym.st_shndx]; + sym_ptr.file_index = self.index; + sym_ptr.output_section_index = if (sym_ptr.atom(elf_file)) |atom_ptr| + atom_ptr.output_section_index + else + 0; + } + + for (self.symtab[first_global..]) |sym| { + const name = self.getString(sym.st_name); + const off = try elf_file.strtab.insert(gpa, name); + const gop = try elf_file.getOrPutGlobal(off); + self.symbols.addOneAssumeCapacity().* = gop.index; + } +} + +fn parseEhFrame(self: *Object, shndx: u16, elf_file: *Elf) !void { + const relocs_shndx = for (self.shdrs.items, 0..) |shdr, i| switch (shdr.sh_type) { + elf.SHT_RELA => if (shdr.sh_info == shndx) break @as(u16, @intCast(i)), + else => {}, + } else { + log.debug("{s}: missing reloc section for unwind info section", .{self.fmtPath()}); + return; + }; + + const gpa = elf_file.base.allocator; + const raw = try self.shdrContents(shndx); + const relocs = try self.getRelocs(relocs_shndx); + const fdes_start = self.fdes.items.len; + const cies_start = self.cies.items.len; + + var it = eh_frame.Iterator{ .data = raw }; + while (try it.next()) |rec| { + const rel_range = filterRelocs(relocs, rec.offset, rec.size + 4); + switch (rec.tag) { + .cie => try self.cies.append(gpa, .{ + .offset = rec.offset, + .size = rec.size, + .rel_index = @as(u32, @intCast(rel_range.start)), + .rel_num = @as(u32, @intCast(rel_range.len)), + .rel_section_index = relocs_shndx, + .input_section_index = shndx, + .file_index = self.index, + }), + .fde => try self.fdes.append(gpa, .{ + .offset = rec.offset, + .size = rec.size, + .cie_index = undefined, + .rel_index = @as(u32, @intCast(rel_range.start)), + .rel_num = @as(u32, @intCast(rel_range.len)), + .rel_section_index = relocs_shndx, + .input_section_index = shndx, + .file_index = self.index, + }), + } + } + + // Tie each FDE to its CIE + for (self.fdes.items[fdes_start..]) |*fde| { + const cie_ptr = fde.offset + 4 - fde.ciePointer(elf_file); + const cie_index = for (self.cies.items[cies_start..], cies_start..) |cie, cie_index| { + if (cie.offset == cie_ptr) break @as(u32, @intCast(cie_index)); + } else { + // TODO convert into an error + log.debug("{s}: no matching CIE found for FDE at offset {x}", .{ + self.fmtPath(), + fde.offset, + }); + continue; + }; + fde.cie_index = cie_index; + } + + // Tie each FDE record to its matching atom + const SortFdes = struct { + pub fn lessThan(ctx: *Elf, lhs: Fde, rhs: Fde) bool { + const lhs_atom = lhs.atom(ctx); + const rhs_atom = rhs.atom(ctx); + return lhs_atom.priority(ctx) < rhs_atom.priority(ctx); + } + }; + mem.sort(Fde, self.fdes.items[fdes_start..], elf_file, SortFdes.lessThan); + + // Create a back-link from atom to FDEs + var i: u32 = @as(u32, @intCast(fdes_start)); + while (i < self.fdes.items.len) { + const fde = self.fdes.items[i]; + const atom = fde.atom(elf_file); + atom.fde_start = i; + i += 1; + while (i < self.fdes.items.len) : (i += 1) { + const next_fde = self.fdes.items[i]; + if (atom.atom_index != next_fde.atom(elf_file).atom_index) break; + } + atom.fde_end = i; + } +} + +fn filterRelocs( + relocs: []align(1) const elf.Elf64_Rela, + start: u64, + len: u64, +) struct { start: u64, len: u64 } { + const Predicate = struct { + value: u64, + + pub fn predicate(self: @This(), rel: elf.Elf64_Rela) bool { + return rel.r_offset < self.value; + } + }; + const LPredicate = struct { + value: u64, + + pub fn predicate(self: @This(), rel: elf.Elf64_Rela) bool { + return rel.r_offset >= self.value; + } + }; + + const f_start = Elf.bsearch(elf.Elf64_Rela, relocs, Predicate{ .value = start }); + const f_len = Elf.lsearch(elf.Elf64_Rela, relocs[f_start..], LPredicate{ .value = start + len }); + + return .{ .start = f_start, .len = f_len }; +} + +pub fn scanRelocs(self: *Object, elf_file: *Elf, undefs: anytype) !void { + for (self.atoms.items) |atom_index| { + const atom = elf_file.atom(atom_index) orelse continue; + if (!atom.alive) continue; + const shdr = atom.inputShdr(elf_file); + if (shdr.sh_flags & elf.SHF_ALLOC == 0) continue; + if (shdr.sh_type == elf.SHT_NOBITS) continue; + try atom.scanRelocs(elf_file, undefs); + } + + for (self.cies.items) |cie| { + for (try cie.relocs(elf_file)) |rel| { + const sym = elf_file.symbol(self.symbols.items[rel.r_sym()]); + if (sym.flags.import) { + if (sym.type(elf_file) != elf.STT_FUNC) + // TODO convert into an error + log.debug("{s}: {s}: CIE referencing external data reference", .{ + self.fmtPath(), + sym.name(elf_file), + }); + sym.flags.needs_plt = true; + } + } + } +} + +pub fn resolveSymbols(self: *Object, elf_file: *Elf) void { + const first_global = self.first_global orelse return; + for (self.globals(), 0..) |index, i| { + const esym_index = @as(Symbol.Index, @intCast(first_global + i)); + const esym = self.symtab[esym_index]; + + if (esym.st_shndx == elf.SHN_UNDEF) continue; + + if (esym.st_shndx != elf.SHN_ABS and esym.st_shndx != elf.SHN_COMMON) { + const atom_index = self.atoms.items[esym.st_shndx]; + const atom = elf_file.atom(atom_index) orelse continue; + if (!atom.alive) continue; + } + + const global = elf_file.symbol(index); + if (self.asFile().symbolRank(esym, !self.alive) < global.symbolRank(elf_file)) { + const atom_index = switch (esym.st_shndx) { + elf.SHN_ABS, elf.SHN_COMMON => 0, + else => self.atoms.items[esym.st_shndx], + }; + const output_section_index = if (elf_file.atom(atom_index)) |atom| + atom.output_section_index + else + 0; + global.value = esym.st_value; + global.atom_index = atom_index; + global.esym_index = esym_index; + global.file_index = self.index; + global.output_section_index = output_section_index; + global.version_index = elf_file.default_sym_version; + if (esym.st_bind() == elf.STB_WEAK) global.flags.weak = true; + } + } +} + +pub fn claimUnresolved(self: *Object, elf_file: *Elf) void { + const first_global = self.first_global orelse return; + for (self.globals(), 0..) |index, i| { + const esym_index = @as(u32, @intCast(first_global + i)); + const esym = self.symtab[esym_index]; + if (esym.st_shndx != elf.SHN_UNDEF) continue; + + const global = elf_file.symbol(index); + if (global.file(elf_file)) |_| { + if (global.elfSym(elf_file).st_shndx != elf.SHN_UNDEF) continue; + } + + const is_import = blk: { + if (!elf_file.isDynLib()) break :blk false; + const vis = @as(elf.STV, @enumFromInt(esym.st_other)); + if (vis == .HIDDEN) break :blk false; + break :blk true; + }; + + global.value = 0; + global.atom_index = 0; + global.esym_index = esym_index; + global.file_index = self.index; + global.version_index = if (is_import) elf.VER_NDX_LOCAL else elf_file.default_sym_version; + global.flags.import = is_import; + } +} + +pub fn resetGlobals(self: *Object, elf_file: *Elf) void { + for (self.globals()) |index| { + const global = elf_file.symbol(index); + const name = global.name; + global.* = .{}; + global.name = name; + } +} + +pub fn markLive(self: *Object, elf_file: *Elf) void { + const first_global = self.first_global orelse return; + for (self.globals(), 0..) |index, i| { + const sym_idx = first_global + i; + const sym = self.symtab[sym_idx]; + if (sym.st_bind() == elf.STB_WEAK) continue; + + const global = elf_file.symbol(index); + const file = global.getFile(elf_file) orelse continue; + const should_keep = sym.st_shndx == elf.SHN_UNDEF or + (sym.st_shndx == elf.SHN_COMMON and global.elfSym(elf_file).st_shndx != elf.SHN_COMMON); + if (should_keep and !file.isAlive()) { + file.setAlive(); + file.markLive(elf_file); + } + } +} + +pub fn checkDuplicates(self: *Object, elf_file: *Elf) void { + const first_global = self.first_global orelse return; + for (self.globals(), 0..) |index, i| { + const sym_idx = @as(u32, @intCast(first_global + i)); + const this_sym = self.symtab[sym_idx]; + const global = elf_file.symbol(index); + const global_file = global.getFile(elf_file) orelse continue; + + if (self.index == global_file.getIndex() or + this_sym.st_shndx == elf.SHN_UNDEF or + this_sym.st_bind() == elf.STB_WEAK or + this_sym.st_shndx == elf.SHN_COMMON) continue; + + if (this_sym.st_shndx != elf.SHN_ABS) { + const atom_index = self.atoms.items[this_sym.st_shndx]; + const atom = elf_file.atom(atom_index) orelse continue; + if (!atom.alive) continue; + } + + elf_file.base.fatal("multiple definition: {}: {}: {s}", .{ + self.fmtPath(), + global_file.fmtPath(), + global.getName(elf_file), + }); + } +} + +/// We will create dummy shdrs per each resolved common symbols to make it +/// play nicely with the rest of the system. +pub fn convertCommonSymbols(self: *Object, elf_file: *Elf) !void { + const first_global = self.first_global orelse return; + for (self.globals(), 0..) |index, i| { + const sym_idx = @as(u32, @intCast(first_global + i)); + const this_sym = self.symtab[sym_idx]; + if (this_sym.st_shndx != elf.SHN_COMMON) continue; + + const global = elf_file.symbol(index); + const global_file = global.getFile(elf_file).?; + if (global_file.getIndex() != self.index) { + if (elf_file.options.warn_common) { + elf_file.base.warn("{}: multiple common symbols: {s}", .{ + self.fmtPath(), + global.getName(elf_file), + }); + } + continue; + } + + const gpa = elf_file.base.allocator; + + const atom_index = try elf_file.addAtom(); + try self.atoms.append(gpa, atom_index); + + const is_tls = global.getType(elf_file) == elf.STT_TLS; + const name = if (is_tls) ".tls_common" else ".common"; + + const atom = elf_file.atom(atom_index).?; + atom.atom_index = atom_index; + atom.name = try elf_file.strtab.insert(gpa, name); + atom.file = self.index; + atom.size = this_sym.st_size; + const alignment = this_sym.st_value; + atom.alignment = math.log2_int(u64, alignment); + + var sh_flags: u32 = elf.SHF_ALLOC | elf.SHF_WRITE; + if (is_tls) sh_flags |= elf.SHF_TLS; + const shndx = @as(u16, @intCast(self.shdrs.items.len)); + const shdr = try self.shdrs.addOne(gpa); + shdr.* = .{ + .sh_name = try self.strings.insert(gpa, name), + .sh_type = elf.SHT_NOBITS, + .sh_flags = sh_flags, + .sh_addr = 0, + .sh_offset = 0, + .sh_size = this_sym.st_size, + .sh_link = 0, + .sh_info = 0, + .sh_addralign = alignment, + .sh_entsize = 0, + }; + atom.shndx = shndx; + + global.value = 0; + global.atom = atom_index; + global.flags.weak = false; + } +} + +pub fn updateSymtabSize(self: *Object, elf_file: *Elf) void { + for (self.locals()) |local_index| { + const local = elf_file.symbol(local_index); + if (local.atom(elf_file)) |atom| if (!atom.alive) continue; + const esym = local.elfSym(elf_file); + switch (esym.st_type()) { + elf.STT_SECTION, elf.STT_NOTYPE => continue, + else => {}, + } + local.flags.output_symtab = true; + self.output_symtab_size.nlocals += 1; + } + + for (self.globals()) |global_index| { + const global = elf_file.symbol(global_index); + if (global.file(elf_file)) |file| if (file.index() != self.index) continue; + if (global.atom(elf_file)) |atom| if (!atom.alive) continue; + global.flags.output_symtab = true; + if (global.isLocal()) { + self.output_symtab_size.nlocals += 1; + } else { + self.output_symtab_size.nglobals += 1; + } + } +} + +pub fn writeSymtab(self: *Object, elf_file: *Elf, ctx: anytype) void { + var ilocal = ctx.ilocal; + for (self.locals()) |local_index| { + const local = elf_file.symbol(local_index); + if (!local.flags.output_symtab) continue; + local.setOutputSym(elf_file, &ctx.symtab[ilocal]); + ilocal += 1; + } + + var iglobal = ctx.iglobal; + for (self.globals()) |global_index| { + const global = elf_file.symbol(global_index); + if (global.file(elf_file)) |file| if (file.index() != self.index) continue; + if (!global.flags.output_symtab) continue; + if (global.isLocal()) { + global.setOutputSym(elf_file, &ctx.symtab[ilocal]); + ilocal += 1; + } else { + global.setOutputSym(elf_file, &ctx.symtab[iglobal]); + iglobal += 1; + } + } +} + +pub fn locals(self: *Object) []const Symbol.Index { + const end = self.first_global orelse self.symbols.items.len; + return self.symbols.items[0..end]; +} + +pub fn globals(self: *Object) []const Symbol.Index { + const start = self.first_global orelse self.symbols.items.len; + return self.symbols.items[start..]; +} + +pub fn shdrContents(self: *Object, index: u32) error{Overflow}![]const u8 { + assert(index < self.shdrs.items.len); + const shdr = self.shdrs.items[index]; + const offset = math.cast(usize, shdr.sh_offset) orelse return error.Overflow; + const size = math.cast(usize, shdr.sh_size) orelse return error.Overflow; + return self.data[offset..][0..size]; +} + +fn getString(self: *Object, off: u32) [:0]const u8 { + assert(off < self.strtab.len); + return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.ptr + off)), 0); +} + +pub fn comdatGroupMembers(self: *Object, index: u16) error{Overflow}![]align(1) const u32 { + const raw = try self.shdrContents(index); + const nmembers = @divExact(raw.len, @sizeOf(u32)); + const members = @as([*]align(1) const u32, @ptrCast(raw.ptr))[1..nmembers]; + return members; +} + +pub fn asFile(self: *Object) File { + return .{ .object = self }; +} + +pub fn getRelocs(self: *Object, shndx: u32) error{Overflow}![]align(1) const elf.Elf64_Rela { + const raw = try self.shdrContents(shndx); + const num = @divExact(raw.len, @sizeOf(elf.Elf64_Rela)); + return @as([*]align(1) const elf.Elf64_Rela, @ptrCast(raw.ptr))[0..num]; +} + +pub fn format( + self: *Object, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = self; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format objects directly"); +} + +pub fn fmtSymtab(self: *Object, elf_file: *Elf) std.fmt.Formatter(formatSymtab) { + return .{ .data = .{ + .object = self, + .elf_file = elf_file, + } }; +} + +const FormatContext = struct { + object: *Object, + elf_file: *Elf, +}; + +fn formatSymtab( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" locals\n"); + for (object.locals()) |index| { + const local = ctx.elf_file.symbol(index); + try writer.print(" {}\n", .{local.fmt(ctx.elf_file)}); + } + try writer.writeAll(" globals\n"); + for (object.globals()) |index| { + const global = ctx.elf_file.symbol(index); + try writer.print(" {}\n", .{global.fmt(ctx.elf_file)}); + } +} + +pub fn fmtAtoms(self: *Object, elf_file: *Elf) std.fmt.Formatter(formatAtoms) { + return .{ .data = .{ + .object = self, + .elf_file = elf_file, + } }; +} + +fn formatAtoms( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" atoms\n"); + for (object.atoms.items) |atom_index| { + const atom = ctx.elf_file.atom(atom_index) orelse continue; + try writer.print(" {}\n", .{atom.fmt(ctx.elf_file)}); + } +} + +pub fn fmtCies(self: *Object, elf_file: *Elf) std.fmt.Formatter(formatCies) { + return .{ .data = .{ + .object = self, + .elf_file = elf_file, + } }; +} + +fn formatCies( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" cies\n"); + for (object.cies.items, 0..) |cie, i| { + try writer.print(" cie({d}) : {}\n", .{ i, cie.fmt(ctx.elf_file) }); + } +} + +pub fn fmtFdes(self: *Object, elf_file: *Elf) std.fmt.Formatter(formatFdes) { + return .{ .data = .{ + .object = self, + .elf_file = elf_file, + } }; +} + +fn formatFdes( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + try writer.writeAll(" fdes\n"); + for (object.fdes.items, 0..) |fde, i| { + try writer.print(" fde({d}) : {}\n", .{ i, fde.fmt(ctx.elf_file) }); + } +} + +pub fn fmtComdatGroups(self: *Object, elf_file: *Elf) std.fmt.Formatter(formatComdatGroups) { + return .{ .data = .{ + .object = self, + .elf_file = elf_file, + } }; +} + +fn formatComdatGroups( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + const object = ctx.object; + const elf_file = ctx.elf_file; + try writer.writeAll(" comdat groups\n"); + for (object.comdat_groups.items) |cg_index| { + const cg = elf_file.comdatGroup(cg_index); + const cg_owner = elf_file.comdatGroupOwner(cg.owner); + if (cg_owner.file != object.index) continue; + const cg_members = object.comdatGroupMembers(cg.shndx) catch continue; + for (cg_members) |shndx| { + const atom_index = object.atoms.items[shndx]; + const atom = elf_file.atom(atom_index) orelse continue; + try writer.print(" atom({d}) : {s}\n", .{ atom_index, atom.name(elf_file) }); + } + } +} + +pub fn fmtPath(self: *Object) std.fmt.Formatter(formatPath) { + return .{ .data = self }; +} + +fn formatPath( + object: *Object, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + if (object.archive) |path| { + try writer.writeAll(path); + try writer.writeByte('('); + try writer.writeAll(object.path); + try writer.writeByte(')'); + } else try writer.writeAll(object.path); +} + +const Object = @This(); + +const std = @import("std"); +const assert = std.debug.assert; +const eh_frame = @import("eh_frame.zig"); +const elf = std.elf; +const fs = std.fs; +const log = std.log.scoped(.link); +const math = std.math; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Atom = @import("Atom.zig"); +const Cie = eh_frame.Cie; +const Elf = @import("../Elf.zig"); +const Fde = eh_frame.Fde; +const File = @import("file.zig").File; +const StringTable = @import("../strtab.zig").StringTable; +const Symbol = @import("Symbol.zig"); diff --git a/src/link/Elf/Symbol.zig b/src/link/Elf/Symbol.zig new file mode 100644 index 0000000000..42b9b81ef9 --- /dev/null +++ b/src/link/Elf/Symbol.zig @@ -0,0 +1,362 @@ +//! Represents a defined symbol. + +index: Index = 0, + +/// Allocated address value of this symbol. +value: u64 = 0, + +/// Offset into the linker's string table. +name_offset: u32 = 0, + +/// Index of file where this symbol is defined. +file_index: File.Index = 0, + +/// Index of atom containing this symbol. +/// Index of 0 means there is no associated atom with this symbol. +/// Use `atom` to get the pointer to the atom. +atom_index: Atom.Index = 0, + +/// Assigned output section index for this atom. +output_section_index: u16 = 0, + +/// Index of the source symbol this symbol references. +/// Use `elfSym` to pull the source symbol from the relevant file. +esym_index: Index = 0, + +/// Index of the source version symbol this symbol references if any. +/// If the symbol is unversioned it will have either VER_NDX_LOCAL or VER_NDX_GLOBAL. +version_index: elf.Elf64_Versym = elf.VER_NDX_LOCAL, + +/// Misc flags for the symbol packaged as packed struct for compression. +flags: Flags = .{}, + +extra_index: u32 = 0, + +pub fn isAbs(symbol: Symbol, elf_file: *Elf) bool { + const file_ptr = symbol.file(elf_file).?; + // if (file_ptr == .shared) return symbol.sourceSymbol(elf_file).st_shndx == elf.SHN_ABS; + return !symbol.flags.import and symbol.atom(elf_file) == null and symbol.output_section_index == 0 and + file_ptr != .linker_defined; +} + +pub fn isLocal(symbol: Symbol) bool { + return !(symbol.flags.import or symbol.flags.@"export"); +} + +pub fn isIFunc(symbol: Symbol, elf_file: *Elf) bool { + return symbol.type(elf_file) == elf.STT_GNU_IFUNC; +} + +pub fn @"type"(symbol: Symbol, elf_file: *Elf) u4 { + const s_sym = symbol.elfSym(elf_file); + // const file_ptr = symbol.file(elf_file).?; + // if (s_sym.st_type() == elf.STT_GNU_IFUNC and file_ptr == .shared) return elf.STT_FUNC; + return s_sym.st_type(); +} + +pub fn name(symbol: Symbol, elf_file: *Elf) [:0]const u8 { + return elf_file.strtab.getAssumeExists(symbol.name_offset); +} + +pub fn atom(symbol: Symbol, elf_file: *Elf) ?*Atom { + return elf_file.atom(symbol.atom_index); +} + +pub fn file(symbol: Symbol, elf_file: *Elf) ?File { + return elf_file.file(symbol.file_index); +} + +pub fn elfSym(symbol: Symbol, elf_file: *Elf) elf.Elf64_Sym { + const file_ptr = symbol.file(elf_file).?; + switch (file_ptr) { + .zig_module => |x| return x.elfSym(symbol.esym_index).*, + .linker_defined => |x| return x.symtab.items[symbol.esym_index], + .object => |x| return x.symtab[symbol.esym_index], + } +} + +pub fn symbolRank(symbol: Symbol, elf_file: *Elf) u32 { + const file_ptr = symbol.file(elf_file) orelse return std.math.maxInt(u32); + const sym = symbol.elfSym(elf_file); + const in_archive = switch (file_ptr) { + .object => |x| !x.alive, + else => false, + }; + return file_ptr.symbolRank(sym, in_archive); +} + +pub fn address(symbol: Symbol, opts: struct { + plt: bool = true, +}, elf_file: *Elf) u64 { + _ = elf_file; + _ = opts; + // if (symbol.flags.copy_rel) { + // return elf_file.sectionAddress(elf_file.copy_rel_sect_index.?) + symbol.value; + // } + // if (symbol.flags.plt and opts.plt) { + // const extra = symbol.getExtra(elf_file).?; + // if (!symbol.flags.is_canonical and symbol.flags.got) { + // // We have a non-lazy bound function pointer, use that! + // return elf_file.getPltGotEntryAddress(extra.plt_got); + // } + // // Lazy-bound function it is! + // return elf_file.getPltEntryAddress(extra.plt); + // } + return symbol.value; +} + +pub fn gotAddress(symbol: Symbol, elf_file: *Elf) u64 { + if (!symbol.flags.has_got) return 0; + const extras = symbol.extra(elf_file).?; + const entry = elf_file.got.entries.items[extras.got]; + return entry.address(elf_file); +} + +const GetOrCreateGotEntryResult = struct { + found_existing: bool, + index: GotSection.Index, +}; + +pub fn getOrCreateGotEntry(symbol: *Symbol, elf_file: *Elf) !GetOrCreateGotEntryResult { + assert(symbol.flags.needs_got); + if (symbol.flags.has_got) return .{ .found_existing = true, .index = symbol.extra(elf_file).?.got }; + const index = try elf_file.got.addGotSymbol(symbol.index, elf_file); + symbol.flags.has_got = true; + return .{ .found_existing = false, .index = index }; +} + +// pub fn tlsGdAddress(symbol: Symbol, elf_file: *Elf) u64 { +// if (!symbol.flags.tlsgd) return 0; +// const extra = symbol.getExtra(elf_file).?; +// return elf_file.getGotEntryAddress(extra.tlsgd); +// } + +// pub fn gotTpAddress(symbol: Symbol, elf_file: *Elf) u64 { +// if (!symbol.flags.gottp) return 0; +// const extra = symbol.getExtra(elf_file).?; +// return elf_file.getGotEntryAddress(extra.gottp); +// } + +// pub fn tlsDescAddress(symbol: Symbol, elf_file: *Elf) u64 { +// if (!symbol.flags.tlsdesc) return 0; +// const extra = symbol.getExtra(elf_file).?; +// return elf_file.getGotEntryAddress(extra.tlsdesc); +// } + +// pub fn alignment(symbol: Symbol, elf_file: *Elf) !u64 { +// const file = symbol.getFile(elf_file) orelse return 0; +// const shared = file.shared; +// const s_sym = symbol.getSourceSymbol(elf_file); +// const shdr = shared.getShdrs()[s_sym.st_shndx]; +// const alignment = @max(1, shdr.sh_addralign); +// return if (s_sym.st_value == 0) +// alignment +// else +// @min(alignment, try std.math.powi(u64, 2, @ctz(s_sym.st_value))); +// } + +pub fn addExtra(symbol: *Symbol, extras: Extra, elf_file: *Elf) !void { + symbol.extra_index = try elf_file.addSymbolExtra(extras); +} + +pub fn extra(symbol: Symbol, elf_file: *Elf) ?Extra { + return elf_file.symbolExtra(symbol.extra_index); +} + +pub fn setExtra(symbol: Symbol, extras: Extra, elf_file: *Elf) void { + elf_file.setSymbolExtra(symbol.extra_index, extras); +} + +pub fn setOutputSym(symbol: Symbol, elf_file: *Elf, out: *elf.Elf64_Sym) void { + const file_ptr = symbol.file(elf_file) orelse { + out.* = Elf.null_sym; + return; + }; + const esym = symbol.elfSym(elf_file); + const st_type = symbol.type(elf_file); + const st_bind: u8 = blk: { + if (symbol.isLocal()) break :blk 0; + if (symbol.flags.weak) break :blk elf.STB_WEAK; + // if (file_ptr == .shared) break :blk elf.STB_GLOBAL; + break :blk esym.st_bind(); + }; + const st_shndx = blk: { + // if (symbol.flags.copy_rel) break :blk elf_file.copy_rel_sect_index.?; + // if (file_ptr == .shared or s_sym.st_shndx == elf.SHN_UNDEF) break :blk elf.SHN_UNDEF; + if (symbol.atom(elf_file) == null and file_ptr != .linker_defined) + break :blk elf.SHN_ABS; + break :blk symbol.output_section_index; + }; + const st_value = blk: { + // if (symbol.flags.copy_rel) break :blk symbol.address(.{}, elf_file); + // if (file_ptr == .shared or s_sym.st_shndx == elf.SHN_UNDEF) { + // if (symbol.flags.is_canonical) break :blk symbol.address(.{}, elf_file); + // break :blk 0; + // } + // if (st_shndx == elf.SHN_ABS) break :blk symbol.value; + // const shdr = &elf_file.sections.items(.shdr)[st_shndx]; + // if (Elf.shdrIsTls(shdr)) break :blk symbol.value - elf_file.getTlsAddress(); + break :blk symbol.value; + }; + out.* = .{ + .st_name = symbol.name_offset, + .st_info = (st_bind << 4) | st_type, + .st_other = esym.st_other, + .st_shndx = st_shndx, + .st_value = st_value, + .st_size = esym.st_size, + }; +} + +pub fn format( + symbol: Symbol, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = symbol; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format symbols directly"); +} + +const FormatContext = struct { + symbol: Symbol, + elf_file: *Elf, +}; + +pub fn fmtName(symbol: Symbol, elf_file: *Elf) std.fmt.Formatter(formatName) { + return .{ .data = .{ + .symbol = symbol, + .elf_file = elf_file, + } }; +} + +fn formatName( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + const elf_file = ctx.elf_file; + const symbol = ctx.symbol; + try writer.writeAll(symbol.name(elf_file)); + switch (symbol.version_index & elf.VERSYM_VERSION) { + elf.VER_NDX_LOCAL, elf.VER_NDX_GLOBAL => {}, + else => { + unreachable; + // const shared = symbol.getFile(elf_file).?.shared; + // try writer.print("@{s}", .{shared.getVersionString(symbol.version_index)}); + }, + } +} + +pub fn fmt(symbol: Symbol, elf_file: *Elf) std.fmt.Formatter(format2) { + return .{ .data = .{ + .symbol = symbol, + .elf_file = elf_file, + } }; +} + +fn format2( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = options; + _ = unused_fmt_string; + const symbol = ctx.symbol; + try writer.print("%{d} : {s} : @{x}", .{ symbol.index, symbol.fmtName(ctx.elf_file), symbol.value }); + if (symbol.file(ctx.elf_file)) |file_ptr| { + if (symbol.isAbs(ctx.elf_file)) { + if (symbol.elfSym(ctx.elf_file).st_shndx == elf.SHN_UNDEF) { + try writer.writeAll(" : undef"); + } else { + try writer.writeAll(" : absolute"); + } + } else if (symbol.output_section_index != 0) { + try writer.print(" : sect({d})", .{symbol.output_section_index}); + } + if (symbol.atom(ctx.elf_file)) |atom_ptr| { + try writer.print(" : atom({d})", .{atom_ptr.atom_index}); + } + var buf: [2]u8 = .{'_'} ** 2; + if (symbol.flags.@"export") buf[0] = 'E'; + if (symbol.flags.import) buf[1] = 'I'; + try writer.print(" : {s}", .{&buf}); + if (symbol.flags.weak) try writer.writeAll(" : weak"); + switch (file_ptr) { + inline else => |x| try writer.print(" : {s}({d})", .{ @tagName(file_ptr), x.index }), + } + } else try writer.writeAll(" : unresolved"); +} + +pub const Flags = packed struct { + /// Whether the symbol is imported at runtime. + import: bool = false, + + /// Whether the symbol is exported at runtime. + @"export": bool = false, + + /// Whether this symbol is weak. + weak: bool = false, + + /// Whether the symbol makes into the output symtab or not. + output_symtab: bool = false, + + /// Whether the symbol contains GOT indirection. + needs_got: bool = false, + has_got: bool = false, + + /// Whether the symbol contains PLT indirection. + needs_plt: bool = false, + plt: bool = false, + /// Whether the PLT entry is canonical. + is_canonical: bool = false, + + /// Whether the symbol contains COPYREL directive. + copy_rel: bool = false, + has_copy_rel: bool = false, + has_dynamic: bool = false, + + /// Whether the symbol contains TLSGD indirection. + tlsgd: bool = false, + + /// Whether the symbol contains GOTTP indirection. + gottp: bool = false, + + /// Whether the symbol contains TLSDESC indirection. + tlsdesc: bool = false, +}; + +pub const Extra = struct { + got: u32 = 0, + plt: u32 = 0, + plt_got: u32 = 0, + dynamic: u32 = 0, + copy_rel: u32 = 0, + tlsgd: u32 = 0, + gottp: u32 = 0, + tlsdesc: u32 = 0, +}; + +pub const Index = u32; + +const assert = std.debug.assert; +const elf = std.elf; +const std = @import("std"); +const synthetic_sections = @import("synthetic_sections.zig"); + +const Atom = @import("Atom.zig"); +const Elf = @import("../Elf.zig"); +const File = @import("file.zig").File; +const GotSection = synthetic_sections.GotSection; +const LinkerDefined = @import("LinkerDefined.zig"); +// const Object = @import("Object.zig"); +// const SharedObject = @import("SharedObject.zig"); +const Symbol = @This(); +const ZigModule = @import("ZigModule.zig"); diff --git a/src/link/Elf/ZigModule.zig b/src/link/Elf/ZigModule.zig new file mode 100644 index 0000000000..46a382abf9 --- /dev/null +++ b/src/link/Elf/ZigModule.zig @@ -0,0 +1,295 @@ +//! ZigModule encapsulates the state of the incrementally compiled Zig module. +//! It stores the associated input local and global symbols, allocated atoms, +//! and any relocations that may have been emitted. +//! Think about this as fake in-memory Object file for the Zig module. + +/// Path is owned by Module and lives as long as *Module. +path: []const u8, +index: File.Index, + +local_esyms: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{}, +global_esyms: std.ArrayListUnmanaged(elf.Elf64_Sym) = .{}, +local_symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +global_symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, +globals_lookup: std.AutoHashMapUnmanaged(u32, Symbol.Index) = .{}, + +atoms: std.AutoArrayHashMapUnmanaged(Atom.Index, void) = .{}, +relocs: std.ArrayListUnmanaged(std.ArrayListUnmanaged(elf.Elf64_Rela)) = .{}, + +output_symtab_size: Elf.SymtabSize = .{}, + +pub fn deinit(self: *ZigModule, allocator: Allocator) void { + self.local_esyms.deinit(allocator); + self.global_esyms.deinit(allocator); + self.local_symbols.deinit(allocator); + self.global_symbols.deinit(allocator); + self.globals_lookup.deinit(allocator); + self.atoms.deinit(allocator); + for (self.relocs.items) |*list| { + list.deinit(allocator); + } + self.relocs.deinit(allocator); +} + +pub fn addLocalEsym(self: *ZigModule, allocator: Allocator) !Symbol.Index { + try self.local_esyms.ensureUnusedCapacity(allocator, 1); + const index = @as(Symbol.Index, @intCast(self.local_esyms.items.len)); + const esym = self.local_esyms.addOneAssumeCapacity(); + esym.* = Elf.null_sym; + esym.st_info = elf.STB_LOCAL << 4; + return index; +} + +pub fn addGlobalEsym(self: *ZigModule, allocator: Allocator) !Symbol.Index { + try self.global_esyms.ensureUnusedCapacity(allocator, 1); + const index = @as(Symbol.Index, @intCast(self.global_esyms.items.len)); + const esym = self.global_esyms.addOneAssumeCapacity(); + esym.* = Elf.null_sym; + esym.st_info = elf.STB_GLOBAL << 4; + return index | 0x10000000; +} + +pub fn addAtom(self: *ZigModule, output_section_index: u16, elf_file: *Elf) !Symbol.Index { + const gpa = elf_file.base.allocator; + + const atom_index = try elf_file.addAtom(); + try self.atoms.putNoClobber(gpa, atom_index, {}); + const atom_ptr = elf_file.atom(atom_index).?; + atom_ptr.file_index = self.index; + atom_ptr.output_section_index = output_section_index; + + const symbol_index = try elf_file.addSymbol(); + try self.local_symbols.append(gpa, symbol_index); + const symbol_ptr = elf_file.symbol(symbol_index); + symbol_ptr.file_index = self.index; + symbol_ptr.atom_index = atom_index; + symbol_ptr.output_section_index = output_section_index; + + const esym_index = try self.addLocalEsym(gpa); + const esym = &self.local_esyms.items[esym_index]; + esym.st_shndx = atom_index; + symbol_ptr.esym_index = esym_index; + + const relocs_index = @as(Atom.Index, @intCast(self.relocs.items.len)); + const relocs = try self.relocs.addOne(gpa); + relocs.* = .{}; + atom_ptr.relocs_section_index = relocs_index; + + return symbol_index; +} + +pub fn resolveSymbols(self: *ZigModule, elf_file: *Elf) void { + for (self.globals(), 0..) |index, i| { + const esym_index = @as(Symbol.Index, @intCast(i)) | 0x10000000; + const esym = self.global_esyms.items[i]; + + if (esym.st_shndx == elf.SHN_UNDEF) continue; + + if (esym.st_shndx != elf.SHN_ABS and esym.st_shndx != elf.SHN_COMMON) { + const atom_index = esym.st_shndx; + const atom = elf_file.atom(atom_index) orelse continue; + if (!atom.alive) continue; + } + + const global = elf_file.symbol(index); + if (self.asFile().symbolRank(esym, false) < global.symbolRank(elf_file)) { + const atom_index = switch (esym.st_shndx) { + elf.SHN_ABS, elf.SHN_COMMON => 0, + else => esym.st_shndx, + }; + const output_section_index = if (elf_file.atom(atom_index)) |atom| + atom.output_section_index + else + 0; + global.value = esym.st_value; + global.atom_index = atom_index; + global.esym_index = esym_index; + global.file_index = self.index; + global.output_section_index = output_section_index; + global.version_index = elf_file.default_sym_version; + if (esym.st_bind() == elf.STB_WEAK) global.flags.weak = true; + } + } +} + +pub fn claimUnresolved(self: *ZigModule, elf_file: *Elf) void { + for (self.globals(), 0..) |index, i| { + const esym_index = @as(Symbol.Index, @intCast(i)) | 0x10000000; + const esym = self.global_esyms.items[i]; + + if (esym.st_shndx != elf.SHN_UNDEF) continue; + + const global = elf_file.symbol(index); + if (global.file(elf_file)) |_| { + if (global.elfSym(elf_file).st_shndx != elf.SHN_UNDEF) continue; + } + + const is_import = blk: { + if (!elf_file.isDynLib()) break :blk false; + const vis = @as(elf.STV, @enumFromInt(esym.st_other)); + if (vis == .HIDDEN) break :blk false; + break :blk true; + }; + + global.value = 0; + global.atom_index = 0; + global.esym_index = esym_index; + global.file_index = self.index; + global.version_index = if (is_import) elf.VER_NDX_LOCAL else elf_file.default_sym_version; + global.flags.import = is_import; + } +} + +pub fn scanRelocs(self: *ZigModule, elf_file: *Elf, undefs: anytype) !void { + for (self.atoms.keys()) |atom_index| { + const atom = elf_file.atom(atom_index) orelse continue; + if (!atom.alive) continue; + try atom.scanRelocs(elf_file, undefs); + } +} + +pub fn updateSymtabSize(self: *ZigModule, elf_file: *Elf) void { + for (self.locals()) |local_index| { + const local = elf_file.symbol(local_index); + const esym = local.elfSym(elf_file); + switch (esym.st_type()) { + elf.STT_SECTION, elf.STT_NOTYPE => { + local.flags.output_symtab = false; + continue; + }, + else => {}, + } + local.flags.output_symtab = true; + self.output_symtab_size.nlocals += 1; + } + + for (self.globals()) |global_index| { + const global = elf_file.symbol(global_index); + if (global.file(elf_file)) |file| if (file.index() != self.index) { + global.flags.output_symtab = false; + continue; + }; + global.flags.output_symtab = true; + if (global.isLocal()) { + self.output_symtab_size.nlocals += 1; + } else { + self.output_symtab_size.nglobals += 1; + } + } +} + +pub fn writeSymtab(self: *ZigModule, elf_file: *Elf, ctx: anytype) void { + var ilocal = ctx.ilocal; + for (self.locals()) |local_index| { + const local = elf_file.symbol(local_index); + if (!local.flags.output_symtab) continue; + local.setOutputSym(elf_file, &ctx.symtab[ilocal]); + ilocal += 1; + } + + var iglobal = ctx.iglobal; + for (self.globals()) |global_index| { + const global = elf_file.symbol(global_index); + if (global.file(elf_file)) |file| if (file.index() != self.index) continue; + if (!global.flags.output_symtab) continue; + if (global.isLocal()) { + global.setOutputSym(elf_file, &ctx.symtab[ilocal]); + ilocal += 1; + } else { + global.setOutputSym(elf_file, &ctx.symtab[iglobal]); + iglobal += 1; + } + } +} + +pub fn symbol(self: *ZigModule, index: Symbol.Index) Symbol.Index { + const is_global = index & 0x10000000 != 0; + const actual_index = index & 0x0fffffff; + if (is_global) return self.global_symbols.items[actual_index]; + return self.local_symbols.items[actual_index]; +} + +pub fn elfSym(self: *ZigModule, index: Symbol.Index) *elf.Elf64_Sym { + const is_global = index & 0x10000000 != 0; + const actual_index = index & 0x0fffffff; + if (is_global) return &self.global_esyms.items[actual_index]; + return &self.local_esyms.items[actual_index]; +} + +pub fn locals(self: *ZigModule) []const Symbol.Index { + return self.local_symbols.items; +} + +pub fn globals(self: *ZigModule) []const Symbol.Index { + return self.global_symbols.items; +} + +pub fn asFile(self: *ZigModule) File { + return .{ .zig_module = self }; +} + +pub fn fmtSymtab(self: *ZigModule, elf_file: *Elf) std.fmt.Formatter(formatSymtab) { + return .{ .data = .{ + .self = self, + .elf_file = elf_file, + } }; +} + +const FormatContext = struct { + self: *ZigModule, + elf_file: *Elf, +}; + +fn formatSymtab( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeAll(" locals\n"); + for (ctx.self.locals()) |index| { + const local = ctx.elf_file.symbol(index); + try writer.print(" {}\n", .{local.fmt(ctx.elf_file)}); + } + try writer.writeAll(" globals\n"); + for (ctx.self.globals()) |index| { + const global = ctx.elf_file.symbol(index); + try writer.print(" {}\n", .{global.fmt(ctx.elf_file)}); + } +} + +pub fn fmtAtoms(self: *ZigModule, elf_file: *Elf) std.fmt.Formatter(formatAtoms) { + return .{ .data = .{ + .self = self, + .elf_file = elf_file, + } }; +} + +fn formatAtoms( + ctx: FormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + _ = unused_fmt_string; + _ = options; + try writer.writeAll(" atoms\n"); + for (ctx.self.atoms.keys()) |atom_index| { + const atom = ctx.elf_file.atom(atom_index) orelse continue; + try writer.print(" {}\n", .{atom.fmt(ctx.elf_file)}); + } +} + +const assert = std.debug.assert; +const std = @import("std"); +const elf = std.elf; + +const Allocator = std.mem.Allocator; +const Atom = @import("Atom.zig"); +const Elf = @import("../Elf.zig"); +const File = @import("file.zig").File; +const Module = @import("../../Module.zig"); +const Symbol = @import("Symbol.zig"); +const ZigModule = @This(); diff --git a/src/link/Elf/eh_frame.zig b/src/link/Elf/eh_frame.zig new file mode 100644 index 0000000000..8c676504e2 --- /dev/null +++ b/src/link/Elf/eh_frame.zig @@ -0,0 +1,449 @@ +pub const Fde = struct { + /// Includes 4byte size cell. + offset: u64, + size: u64, + cie_index: u32, + rel_index: u32 = 0, + rel_num: u32 = 0, + rel_section_index: u32 = 0, + input_section_index: u32 = 0, + file_index: u32 = 0, + alive: bool = true, + /// Includes 4byte size cell. + out_offset: u64 = 0, + + pub fn address(fde: Fde, elf_file: *Elf) u64 { + const base: u64 = if (elf_file.eh_frame_section_index) |shndx| + elf_file.shdrs.items[shndx].sh_addr + else + 0; + return base + fde.out_offset; + } + + pub fn data(fde: Fde, elf_file: *Elf) error{Overflow}![]const u8 { + const object = elf_file.file(fde.file_index).?.object; + const contents = try object.shdrContents(fde.input_section_index); + return contents[fde.offset..][0..fde.calcSize()]; + } + + pub fn cie(fde: Fde, elf_file: *Elf) Cie { + const object = elf_file.file(fde.file_index).?.object; + return object.cies.items[fde.cie_index]; + } + + pub fn ciePointer(fde: Fde, elf_file: *Elf) u32 { + return std.mem.readIntLittle(u32, fde.data(elf_file)[4..8]); + } + + pub fn calcSize(fde: Fde) u64 { + return fde.size + 4; + } + + pub fn atom(fde: Fde, elf_file: *Elf) error{Overflow}!*Atom { + const object = elf_file.file(fde.file_index).?.object; + const rel = (try fde.relocs(elf_file))[0]; + const sym = object.symtab[rel.r_sym()]; + const atom_index = object.atoms.items[sym.st_shndx]; + return elf_file.atom(atom_index).?; + } + + pub fn relocs(fde: Fde, elf_file: *Elf) error{Overflow}![]align(1) const elf.Elf64_Rela { + const object = elf_file.file(fde.file_index).?.object; + return (try object.getRelocs(fde.rel_section_index))[fde.rel_index..][0..fde.rel_num]; + } + + pub fn format( + fde: Fde, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = fde; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format FDEs directly"); + } + + pub fn fmt(fde: Fde, elf_file: *Elf) std.fmt.Formatter(format2) { + return .{ .data = .{ + .fde = fde, + .elf_file = elf_file, + } }; + } + + const FdeFormatContext = struct { + fde: Fde, + elf_file: *Elf, + }; + + fn format2( + ctx: FdeFormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + const fde = ctx.fde; + const elf_file = ctx.elf_file; + const base_addr = fde.address(elf_file); + const atom_name = if (fde.atom(elf_file)) |atom_ptr| + atom_ptr.name(elf_file) + else |_| + ""; + try writer.print("@{x} : size({x}) : cie({d}) : {s}", .{ + base_addr + fde.out_offset, + fde.calcSize(), + fde.cie_index, + atom_name, + }); + if (!fde.alive) try writer.writeAll(" : [*]"); + } +}; + +pub const Cie = struct { + /// Includes 4byte size cell. + offset: u64, + size: u64, + rel_index: u32 = 0, + rel_num: u32 = 0, + rel_section_index: u32 = 0, + input_section_index: u32 = 0, + file_index: u32 = 0, + /// Includes 4byte size cell. + out_offset: u64 = 0, + alive: bool = false, + + pub fn address(cie: Cie, elf_file: *Elf) u64 { + const base: u64 = if (elf_file.eh_frame_section_index) |shndx| + elf_file.shdrs.items[shndx].sh_addr + else + 0; + return base + cie.out_offset; + } + + pub fn data(cie: Cie, elf_file: *Elf) error{Overflow}![]const u8 { + const object = elf_file.file(cie.file_index).?.object; + const contents = try object.shdrContents(cie.input_section_index); + return contents[cie.offset..][0..cie.calcSize()]; + } + + pub fn calcSize(cie: Cie) u64 { + return cie.size + 4; + } + + pub fn relocs(cie: Cie, elf_file: *Elf) error{Overflow}![]align(1) const elf.Elf64_Rela { + const object = elf_file.file(cie.file_index).?.object; + return (try object.getRelocs(cie.rel_section_index))[cie.rel_index..][0..cie.rel_num]; + } + + pub fn eql(cie: Cie, other: Cie, elf_file: *Elf) error{Overflow}!bool { + if (!std.mem.eql(u8, try cie.data(elf_file), try other.data(elf_file))) return false; + + const cie_relocs = try cie.relocs(elf_file); + const other_relocs = try other.relocs(elf_file); + if (cie_relocs.len != other_relocs.len) return false; + + for (cie_relocs, other_relocs) |cie_rel, other_rel| { + if (cie_rel.r_offset - cie.offset != other_rel.r_offset - other.offset) return false; + if (cie_rel.r_type() != other_rel.r_type()) return false; + if (cie_rel.r_addend != other_rel.r_addend) return false; + + const cie_object = elf_file.file(cie.file_index).?.object; + const other_object = elf_file.file(other.file_index).?.object; + const cie_sym = cie_object.symbol(cie_rel.r_sym(), elf_file); + const other_sym = other_object.symbol(other_rel.r_sym(), elf_file); + if (!std.mem.eql(u8, std.mem.asBytes(&cie_sym), std.mem.asBytes(&other_sym))) return false; + } + return true; + } + + pub fn format( + cie: Cie, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = cie; + _ = unused_fmt_string; + _ = options; + _ = writer; + @compileError("do not format CIEs directly"); + } + + pub fn fmt(cie: Cie, elf_file: *Elf) std.fmt.Formatter(format2) { + return .{ .data = .{ + .cie = cie, + .elf_file = elf_file, + } }; + } + + const CieFormatContext = struct { + cie: Cie, + elf_file: *Elf, + }; + + fn format2( + ctx: CieFormatContext, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + const cie = ctx.cie; + const elf_file = ctx.elf_file; + const base_addr = cie.address(elf_file); + try writer.print("@{x} : size({x})", .{ + base_addr + cie.out_offset, + cie.calcSize(), + }); + if (!cie.alive) try writer.writeAll(" : [*]"); + } +}; + +pub const Iterator = struct { + data: []const u8, + pos: u64 = 0, + + pub const Record = struct { + tag: enum { fde, cie }, + offset: u64, + size: u64, + }; + + pub fn next(it: *Iterator) !?Record { + if (it.pos >= it.data.len) return null; + + var stream = std.io.fixedBufferStream(it.data[it.pos..]); + const reader = stream.reader(); + + var size = try reader.readIntLittle(u32); + if (size == 0xFFFFFFFF) @panic("TODO"); + + const id = try reader.readIntLittle(u32); + const record = Record{ + .tag = if (id == 0) .cie else .fde, + .offset = it.pos, + .size = size, + }; + it.pos += size + 4; + + return record; + } +}; + +pub fn calcEhFrameSize(elf_file: *Elf) !usize { + var offset: u64 = 0; + + var cies = std.ArrayList(Cie).init(elf_file.base.allocator); + defer cies.deinit(); + + for (elf_file.objects.items) |index| { + const object = elf_file.file(index).?.object; + + outer: for (object.cies.items) |*cie| { + for (cies.items) |other| { + if (other.eql(cie.*, elf_file)) { + // We already have a CIE record that has the exact same contents, so instead of + // duplicating them, we mark this one dead and set its output offset to be + // equal to that of the alive record. This way, we won't have to rewrite + // Fde.cie_index field when committing the records to file. + cie.out_offset = other.out_offset; + continue :outer; + } + } + cie.alive = true; + cie.out_offset = offset; + offset += cie.calcSize(); + try cies.append(cie.*); + } + } + + for (elf_file.objects.items) |index| { + const object = elf_file.file(index).?.object; + for (object.fdes.items) |*fde| { + if (!fde.alive) continue; + fde.out_offset = offset; + offset += fde.calcSize(); + } + } + + return offset + 4; // NULL terminator +} + +pub fn calcEhFrameHdrSize(elf_file: *Elf) usize { + var count: usize = 0; + for (elf_file.objects.items) |index| { + for (elf_file.file(index).?.object.fdes.items) |fde| { + if (!fde.alive) continue; + count += 1; + } + } + return eh_frame_hdr_header_size + count * 8; +} + +fn resolveReloc(rec: anytype, sym: *const Symbol, rel: elf.Elf64_Rela, elf_file: *Elf, contents: []u8) !void { + const offset = rel.r_offset - rec.offset; + const P = @as(i64, @intCast(rec.address(elf_file) + offset)); + const S = @as(i64, @intCast(sym.address(.{}, elf_file))); + const A = rel.r_addend; + + relocs_log.debug(" {s}: {x}: [{x} => {x}] ({s})", .{ + Atom.fmtRelocType(rel.r_type()), + offset, + P, + S + A, + sym.name(elf_file), + }); + + var where = contents[offset..]; + switch (rel.r_type()) { + elf.R_X86_64_32 => std.mem.writeIntLittle(i32, where[0..4], @as(i32, @truncate(S + A))), + elf.R_X86_64_64 => std.mem.writeIntLittle(i64, where[0..8], S + A), + elf.R_X86_64_PC32 => std.mem.writeIntLittle(i32, where[0..4], @as(i32, @intCast(S - P + A))), + elf.R_X86_64_PC64 => std.mem.writeIntLittle(i64, where[0..8], S - P + A), + else => unreachable, + } +} + +pub fn writeEhFrame(elf_file: *Elf, writer: anytype) !void { + const gpa = elf_file.base.allocator; + + relocs_log.debug("{x}: .eh_frame", .{elf_file.shdrs.items[elf_file.eh_frame_section_index.?].sh_addr}); + + for (elf_file.objects.items) |index| { + const object = elf_file.file(index).?.object; + + for (object.cies.items) |cie| { + if (!cie.alive) continue; + + const contents = try gpa.dupe(u8, try cie.data(elf_file)); + defer gpa.free(contents); + + for (try cie.relocs(elf_file)) |rel| { + const sym = object.symbol(rel.r_sym(), elf_file); + try resolveReloc(cie, sym, rel, elf_file, contents); + } + + try writer.writeAll(contents); + } + } + + for (elf_file.objects.items) |index| { + const object = elf_file.file(index).?.object; + + for (object.fdes.items) |fde| { + if (!fde.alive) continue; + + const contents = try gpa.dupe(u8, try fde.data(elf_file)); + defer gpa.free(contents); + + std.mem.writeIntLittle( + i32, + contents[4..8], + @as(i32, @truncate(@as(i64, @intCast(fde.out_offset + 4)) - @as(i64, @intCast(fde.cie(elf_file).out_offset)))), + ); + + for (try fde.relocs(elf_file)) |rel| { + const sym = object.symbol(rel.r_sym(), elf_file); + try resolveReloc(fde, sym, rel, elf_file, contents); + } + + try writer.writeAll(contents); + } + } + + try writer.writeIntLittle(u32, 0); +} + +pub fn writeEhFrameHdr(elf_file: *Elf, writer: anytype) !void { + try writer.writeByte(1); // version + try writer.writeByte(EH_PE.pcrel | EH_PE.sdata4); + try writer.writeByte(EH_PE.udata4); + try writer.writeByte(EH_PE.datarel | EH_PE.sdata4); + + const eh_frame_shdr = elf_file.shdrs.items[elf_file.eh_frame_section_index.?]; + const eh_frame_hdr_shdr = elf_file.shdrs.items[elf_file.eh_frame_hdr_section_index.?]; + const num_fdes = @as(u32, @intCast(@divExact(eh_frame_hdr_shdr.sh_size - eh_frame_hdr_header_size, 8))); + try writer.writeIntLittle( + u32, + @as(u32, @bitCast(@as( + i32, + @truncate(@as(i64, @intCast(eh_frame_shdr.sh_addr)) - @as(i64, @intCast(eh_frame_hdr_shdr.sh_addr)) - 4), + ))), + ); + try writer.writeIntLittle(u32, num_fdes); + + const Entry = struct { + init_addr: u32, + fde_addr: u32, + + pub fn lessThan(ctx: void, lhs: @This(), rhs: @This()) bool { + _ = ctx; + return lhs.init_addr < rhs.init_addr; + } + }; + + var entries = std.ArrayList(Entry).init(elf_file.base.allocator); + defer entries.deinit(); + try entries.ensureTotalCapacityPrecise(num_fdes); + + for (elf_file.objects.items) |index| { + const object = elf_file.file(index).?.object; + for (object.fdes.items) |fde| { + if (!fde.alive) continue; + + const relocs = try fde.relocs(elf_file); + assert(relocs.len > 0); // Should this be an error? Things are completely broken anyhow if this trips... + const rel = relocs[0]; + const sym = object.symbol(rel.r_sym(), elf_file); + const P = @as(i64, @intCast(fde.address(elf_file))); + const S = @as(i64, @intCast(sym.address(.{}, elf_file))); + const A = rel.r_addend; + entries.appendAssumeCapacity(.{ + .init_addr = @as(u32, @bitCast(@as(i32, @truncate(S + A - @as(i64, @intCast(eh_frame_hdr_shdr.sh_addr)))))), + .fde_addr = @as( + u32, + @bitCast(@as(i32, @truncate(P - @as(i64, @intCast(eh_frame_hdr_shdr.sh_addr))))), + ), + }); + } + } + + std.mem.sort(Entry, entries.items, {}, Entry.lessThan); + try writer.writeAll(std.mem.sliceAsBytes(entries.items)); +} + +const eh_frame_hdr_header_size: u64 = 12; + +const EH_PE = struct { + pub const absptr = 0x00; + pub const uleb128 = 0x01; + pub const udata2 = 0x02; + pub const udata4 = 0x03; + pub const udata8 = 0x04; + pub const sleb128 = 0x09; + pub const sdata2 = 0x0A; + pub const sdata4 = 0x0B; + pub const sdata8 = 0x0C; + pub const pcrel = 0x10; + pub const textrel = 0x20; + pub const datarel = 0x30; + pub const funcrel = 0x40; + pub const aligned = 0x50; + pub const indirect = 0x80; + pub const omit = 0xFF; +}; + +const std = @import("std"); +const assert = std.debug.assert; +const elf = std.elf; +const relocs_log = std.log.scoped(.link_relocs); + +const Allocator = std.mem.Allocator; +const Atom = @import("Atom.zig"); +const Elf = @import("../Elf.zig"); +const Object = @import("Object.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/Elf/file.zig b/src/link/Elf/file.zig new file mode 100644 index 0000000000..2b49f43bf1 --- /dev/null +++ b/src/link/Elf/file.zig @@ -0,0 +1,105 @@ +pub const File = union(enum) { + zig_module: *ZigModule, + linker_defined: *LinkerDefined, + object: *Object, + // shared_object: *SharedObject, + + pub fn index(file: File) Index { + return switch (file) { + inline else => |x| x.index, + }; + } + + pub fn fmtPath(file: File) std.fmt.Formatter(formatPath) { + return .{ .data = file }; + } + + fn formatPath( + file: File, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = unused_fmt_string; + _ = options; + switch (file) { + .zig_module => |x| try writer.print("{s}", .{x.path}), + .linker_defined => try writer.writeAll("(linker defined)"), + .object => |x| try writer.print("{}", .{x.fmtPath()}), + // .shared_object => |x| try writer.writeAll(x.path), + } + } + + pub fn isAlive(file: File) bool { + return switch (file) { + .zig_module => true, + .linker_defined => true, + inline else => |x| x.alive, + }; + } + + /// Encodes symbol rank so that the following ordering applies: + /// * strong defined + /// * weak defined + /// * strong in lib (dso/archive) + /// * weak in lib (dso/archive) + /// * common + /// * common in lib (archive) + /// * unclaimed + pub fn symbolRank(file: File, sym: elf.Elf64_Sym, in_archive: bool) u32 { + const base: u3 = blk: { + if (sym.st_shndx == elf.SHN_COMMON) break :blk if (in_archive) 6 else 5; + // if (file == .shared or in_archive) break :blk switch (sym.st_bind()) { + if (in_archive) break :blk switch (sym.st_bind()) { + elf.STB_GLOBAL => 3, + else => 4, + }; + break :blk switch (sym.st_bind()) { + elf.STB_GLOBAL => 1, + else => 2, + }; + }; + return (@as(u32, base) << 24) + file.index(); + } + + pub fn setAlive(file: File) void { + switch (file) { + .zig_module, .linker_defined => {}, + inline else => |x| x.alive = true, + } + } + + pub fn markLive(file: File, elf_file: *Elf) void { + switch (file) { + .zig_module, .linker_defined => {}, + inline else => |x| x.markLive(elf_file), + } + } + + pub fn globals(file: File) []const Symbol.Index { + return switch (file) { + inline else => |x| x.globals(), + }; + } + + pub const Index = u32; + + pub const Entry = union(enum) { + null: void, + zig_module: ZigModule, + linker_defined: LinkerDefined, + object: Object, + // shared_object: SharedObject, + }; +}; + +const std = @import("std"); +const elf = std.elf; + +const Allocator = std.mem.Allocator; +const Elf = @import("../Elf.zig"); +const LinkerDefined = @import("LinkerDefined.zig"); +const Object = @import("Object.zig"); +// const SharedObject = @import("SharedObject.zig"); +const Symbol = @import("Symbol.zig"); +const ZigModule = @import("ZigModule.zig"); diff --git a/src/link/Elf/synthetic_sections.zig b/src/link/Elf/synthetic_sections.zig new file mode 100644 index 0000000000..f178f1370f --- /dev/null +++ b/src/link/Elf/synthetic_sections.zig @@ -0,0 +1,433 @@ +pub const GotSection = struct { + entries: std.ArrayListUnmanaged(Entry) = .{}, + needs_rela: bool = false, + dirty: bool = false, + output_symtab_size: Elf.SymtabSize = .{}, + + pub const Index = u32; + + const Tag = enum { + got, + tlsld, + tlsgd, + gottp, + tlsdesc, + }; + + const Entry = struct { + tag: Tag, + symbol_index: Symbol.Index, + cell_index: Index, + + /// Returns how many indexes in the GOT this entry uses. + pub inline fn len(entry: Entry) usize { + return switch (entry.tag) { + .got, .gottp => 1, + .tlsld, .tlsgd, .tlsdesc => 2, + }; + } + + pub fn address(entry: Entry, elf_file: *Elf) u64 { + const ptr_bytes = @as(u64, elf_file.archPtrWidthBytes()); + const shdr = &elf_file.shdrs.items[elf_file.got_section_index.?]; + return shdr.sh_addr + @as(u64, entry.cell_index) * ptr_bytes; + } + }; + + pub fn deinit(got: *GotSection, allocator: Allocator) void { + got.entries.deinit(allocator); + } + + fn allocateEntry(got: *GotSection, allocator: Allocator) !Index { + try got.entries.ensureUnusedCapacity(allocator, 1); + // TODO add free list + const index = @as(Index, @intCast(got.entries.items.len)); + const entry = got.entries.addOneAssumeCapacity(); + const cell_index: Index = if (index > 0) blk: { + const last = got.entries.items[index - 1]; + break :blk last.cell_index + @as(Index, @intCast(last.len())); + } else 0; + entry.* = .{ .tag = undefined, .symbol_index = undefined, .cell_index = cell_index }; + got.dirty = true; + return index; + } + + pub fn addGotSymbol(got: *GotSection, sym_index: Symbol.Index, elf_file: *Elf) !Index { + const index = try got.allocateEntry(elf_file.base.allocator); + const entry = &got.entries.items[index]; + entry.tag = .got; + entry.symbol_index = sym_index; + const symbol = elf_file.symbol(sym_index); + if (symbol.flags.import or symbol.isIFunc(elf_file) or (elf_file.base.options.pic and !symbol.isAbs(elf_file))) + got.needs_rela = true; + if (symbol.extra(elf_file)) |extra| { + var new_extra = extra; + new_extra.got = index; + symbol.setExtra(new_extra, elf_file); + } else try symbol.addExtra(.{ .got = index }, elf_file); + return index; + } + + // pub fn addTlsGdSymbol(got: *GotSection, sym_index: Symbol.Index, elf_file: *Elf) !void { + // const index = got.next_index; + // const symbol = elf_file.getSymbol(sym_index); + // if (symbol.flags.import or elf_file.options.output_mode == .lib) got.needs_rela = true; + // if (symbol.getExtra(elf_file)) |extra| { + // var new_extra = extra; + // new_extra.tlsgd = index; + // symbol.setExtra(new_extra, elf_file); + // } else try symbol.addExtra(.{ .tlsgd = index }, elf_file); + // try got.symbols.append(elf_file.base.allocator, .{ .tlsgd = sym_index }); + // got.next_index += 2; + // } + + // pub fn addGotTpSymbol(got: *GotSection, sym_index: Symbol.Index, elf_file: *Elf) !void { + // const index = got.next_index; + // const symbol = elf_file.getSymbol(sym_index); + // if (symbol.flags.import or elf_file.options.output_mode == .lib) got.needs_rela = true; + // if (symbol.getExtra(elf_file)) |extra| { + // var new_extra = extra; + // new_extra.gottp = index; + // symbol.setExtra(new_extra, elf_file); + // } else try symbol.addExtra(.{ .gottp = index }, elf_file); + // try got.symbols.append(elf_file.base.allocator, .{ .gottp = sym_index }); + // got.next_index += 1; + // } + + // pub fn addTlsDescSymbol(got: *GotSection, sym_index: Symbol.Index, elf_file: *Elf) !void { + // const index = got.next_index; + // const symbol = elf_file.getSymbol(sym_index); + // got.needs_rela = true; + // if (symbol.getExtra(elf_file)) |extra| { + // var new_extra = extra; + // new_extra.tlsdesc = index; + // symbol.setExtra(new_extra, elf_file); + // } else try symbol.addExtra(.{ .tlsdesc = index }, elf_file); + // try got.symbols.append(elf_file.base.allocator, .{ .tlsdesc = sym_index }); + // got.next_index += 2; + // } + + pub fn size(got: GotSection, elf_file: *Elf) usize { + var s: usize = 0; + for (got.entries.items) |entry| { + s += elf_file.archPtrWidthBytes() * entry.len(); + } + return s; + } + + pub fn writeEntry(got: *GotSection, elf_file: *Elf, index: Index) !void { + const entry_size: u16 = elf_file.archPtrWidthBytes(); + if (got.dirty) { + const needed_size = got.size(elf_file); + try elf_file.growAllocSection(elf_file.got_section_index.?, needed_size); + got.dirty = false; + } + const endian = elf_file.base.options.target.cpu.arch.endian(); + const entry = got.entries.items[index]; + const shdr = &elf_file.shdrs.items[elf_file.got_section_index.?]; + const off = shdr.sh_offset + @as(u64, entry_size) * entry.cell_index; + const vaddr = shdr.sh_addr + @as(u64, entry_size) * entry.cell_index; + const value = elf_file.symbol(entry.symbol_index).value; + switch (entry_size) { + 2 => { + var buf: [2]u8 = undefined; + std.mem.writeInt(u16, &buf, @as(u16, @intCast(value)), endian); + try elf_file.base.file.?.pwriteAll(&buf, off); + }, + 4 => { + var buf: [4]u8 = undefined; + std.mem.writeInt(u32, &buf, @as(u32, @intCast(value)), endian); + try elf_file.base.file.?.pwriteAll(&buf, off); + }, + 8 => { + var buf: [8]u8 = undefined; + std.mem.writeInt(u64, &buf, value, endian); + try elf_file.base.file.?.pwriteAll(&buf, off); + + if (elf_file.base.child_pid) |pid| { + switch (builtin.os.tag) { + .linux => { + var local_vec: [1]std.os.iovec_const = .{.{ + .iov_base = &buf, + .iov_len = buf.len, + }}; + var remote_vec: [1]std.os.iovec_const = .{.{ + .iov_base = @as([*]u8, @ptrFromInt(@as(usize, @intCast(vaddr)))), + .iov_len = buf.len, + }}; + const rc = std.os.linux.process_vm_writev(pid, &local_vec, &remote_vec, 0); + switch (std.os.errno(rc)) { + .SUCCESS => assert(rc == buf.len), + else => |errno| log.warn("process_vm_writev failure: {s}", .{@tagName(errno)}), + } + }, + else => return error.HotSwapUnavailableOnHostOperatingSystem, + } + } + }, + else => unreachable, + } + } + + // pub fn write(got: GotSection, elf_file: *Elf, writer: anytype) !void { + // const is_shared = elf_file.options.output_mode == .lib; + // const apply_relocs = elf_file.options.apply_dynamic_relocs; + + // for (got.symbols.items) |sym| { + // const symbol = elf_file.getSymbol(sym.getIndex()); + // switch (sym) { + // .got => { + // const value: u64 = blk: { + // const value = symbol.getAddress(.{ .plt = false }, elf_file); + // if (symbol.flags.import) break :blk 0; + // if (symbol.isIFunc(elf_file)) + // break :blk if (apply_relocs) value else 0; + // if (elf_file.options.pic and !symbol.isAbs(elf_file)) + // break :blk if (apply_relocs) value else 0; + // break :blk value; + // }; + // try writer.writeIntLittle(u64, value); + // }, + + // .tlsgd => { + // if (symbol.flags.import) { + // try writer.writeIntLittle(u64, 0); + // try writer.writeIntLittle(u64, 0); + // } else { + // try writer.writeIntLittle(u64, if (is_shared) @as(u64, 0) else 1); + // const offset = symbol.getAddress(.{}, elf_file) - elf_file.getDtpAddress(); + // try writer.writeIntLittle(u64, offset); + // } + // }, + + // .gottp => { + // if (symbol.flags.import) { + // try writer.writeIntLittle(u64, 0); + // } else if (is_shared) { + // const offset = if (apply_relocs) + // symbol.getAddress(.{}, elf_file) - elf_file.getTlsAddress() + // else + // 0; + // try writer.writeIntLittle(u64, offset); + // } else { + // const offset = @as(i64, @intCast(symbol.getAddress(.{}, elf_file))) - + // @as(i64, @intCast(elf_file.getTpAddress())); + // try writer.writeIntLittle(u64, @as(u64, @bitCast(offset))); + // } + // }, + + // .tlsdesc => { + // try writer.writeIntLittle(u64, 0); + // try writer.writeIntLittle(u64, 0); + // }, + // } + // } + + // if (got.emit_tlsld) { + // try writer.writeIntLittle(u64, if (is_shared) @as(u64, 0) else 1); + // try writer.writeIntLittle(u64, 0); + // } + // } + + // pub fn addRela(got: GotSection, elf_file: *Elf) !void { + // const is_shared = elf_file.options.output_mode == .lib; + // try elf_file.rela_dyn.ensureUnusedCapacity(elf_file.base.allocator, got.numRela(elf_file)); + + // for (got.symbols.items) |sym| { + // const symbol = elf_file.getSymbol(sym.getIndex()); + // const extra = symbol.getExtra(elf_file).?; + + // switch (sym) { + // .got => { + // const offset = symbol.gotAddress(elf_file); + + // if (symbol.flags.import) { + // elf_file.addRelaDynAssumeCapacity(.{ + // .offset = offset, + // .sym = extra.dynamic, + // .type = elf.R_X86_64_GLOB_DAT, + // }); + // continue; + // } + + // if (symbol.isIFunc(elf_file)) { + // elf_file.addRelaDynAssumeCapacity(.{ + // .offset = offset, + // .type = elf.R_X86_64_IRELATIVE, + // .addend = @intCast(symbol.getAddress(.{ .plt = false }, elf_file)), + // }); + // continue; + // } + + // if (elf_file.options.pic and !symbol.isAbs(elf_file)) { + // elf_file.addRelaDynAssumeCapacity(.{ + // .offset = offset, + // .type = elf.R_X86_64_RELATIVE, + // .addend = @intCast(symbol.getAddress(.{ .plt = false }, elf_file)), + // }); + // } + // }, + + // .tlsgd => { + // const offset = symbol.getTlsGdAddress(elf_file); + // if (symbol.flags.import) { + // elf_file.addRelaDynAssumeCapacity(.{ + // .offset = offset, + // .sym = extra.dynamic, + // .type = elf.R_X86_64_DTPMOD64, + // }); + // elf_file.addRelaDynAssumeCapacity(.{ + // .offset = offset + 8, + // .sym = extra.dynamic, + // .type = elf.R_X86_64_DTPOFF64, + // }); + // } else if (is_shared) { + // elf_file.addRelaDynAssumeCapacity(.{ + // .offset = offset, + // .sym = extra.dynamic, + // .type = elf.R_X86_64_DTPMOD64, + // }); + // } + // }, + + // .gottp => { + // const offset = symbol.getGotTpAddress(elf_file); + // if (symbol.flags.import) { + // elf_file.addRelaDynAssumeCapacity(.{ + // .offset = offset, + // .sym = extra.dynamic, + // .type = elf.R_X86_64_TPOFF64, + // }); + // } else if (is_shared) { + // elf_file.addRelaDynAssumeCapacity(.{ + // .offset = offset, + // .type = elf.R_X86_64_TPOFF64, + // .addend = @intCast(symbol.getAddress(.{}, elf_file) - elf_file.getTlsAddress()), + // }); + // } + // }, + + // .tlsdesc => { + // const offset = symbol.getTlsDescAddress(elf_file); + // elf_file.addRelaDynAssumeCapacity(.{ + // .offset = offset, + // .sym = extra.dynamic, + // .type = elf.R_X86_64_TLSDESC, + // }); + // }, + // } + // } + + // if (is_shared and got.emit_tlsld) { + // const offset = elf_file.getTlsLdAddress(); + // elf_file.addRelaDynAssumeCapacity(.{ + // .offset = offset, + // .type = elf.R_X86_64_DTPMOD64, + // }); + // } + // } + + // pub fn numRela(got: GotSection, elf_file: *Elf) usize { + // const is_shared = elf_file.options.output_mode == .lib; + // var num: usize = 0; + // for (got.symbols.items) |sym| { + // const symbol = elf_file.symbol(sym.index()); + // switch (sym) { + // .got => if (symbol.flags.import or + // symbol.isIFunc(elf_file) or (elf_file.options.pic and !symbol.isAbs(elf_file))) + // { + // num += 1; + // }, + + // .tlsgd => if (symbol.flags.import) { + // num += 2; + // } else if (is_shared) { + // num += 1; + // }, + + // .gottp => if (symbol.flags.import or is_shared) { + // num += 1; + // }, + + // .tlsdesc => num += 1, + // } + // } + // if (is_shared and got.emit_tlsld) num += 1; + // return num; + // } + + pub fn updateSymtabSize(got: *GotSection, elf_file: *Elf) void { + _ = elf_file; + got.output_symtab_size.nlocals = @as(u32, @intCast(got.entries.items.len)); + } + + pub fn writeSymtab(got: GotSection, elf_file: *Elf, ctx: anytype) !void { + const gpa = elf_file.base.allocator; + for (got.entries.items, ctx.ilocal..) |entry, ilocal| { + const suffix = switch (entry.tag) { + .tlsld => "$tlsld", + .tlsgd => "$tlsgd", + .got => "$got", + .gottp => "$gottp", + .tlsdesc => "$tlsdesc", + }; + const symbol = elf_file.symbol(entry.symbol_index); + const name = try std.fmt.allocPrint(gpa, "{s}{s}", .{ symbol.name(elf_file), suffix }); + defer gpa.free(name); + const st_name = try elf_file.strtab.insert(gpa, name); + const st_value = switch (entry.tag) { + .got => symbol.gotAddress(elf_file), + else => unreachable, + }; + const st_size: u64 = entry.len() * elf_file.archPtrWidthBytes(); + ctx.symtab[ilocal] = .{ + .st_name = st_name, + .st_info = elf.STT_OBJECT, + .st_other = 0, + .st_shndx = elf_file.got_section_index.?, + .st_value = st_value, + .st_size = st_size, + }; + } + } + + const FormatCtx = struct { + got: GotSection, + elf_file: *Elf, + }; + + pub fn fmt(got: GotSection, elf_file: *Elf) std.fmt.Formatter(format2) { + return .{ .data = .{ .got = got, .elf_file = elf_file } }; + } + + pub fn format2( + ctx: FormatCtx, + comptime unused_fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, + ) !void { + _ = options; + _ = unused_fmt_string; + try writer.writeAll("GOT\n"); + for (ctx.got.entries.items) |entry| { + const symbol = ctx.elf_file.symbol(entry.symbol_index); + try writer.print(" {d}@0x{x} => {d}@0x{x} ({s})\n", .{ + entry.cell_index, + entry.address(ctx.elf_file), + entry.symbol_index, + symbol.address(.{}, ctx.elf_file), + symbol.name(ctx.elf_file), + }); + } + } +}; + +const assert = std.debug.assert; +const builtin = @import("builtin"); +const elf = std.elf; +const log = std.log.scoped(.link); +const std = @import("std"); + +const Allocator = std.mem.Allocator; +const Elf = @import("../Elf.zig"); +const Symbol = @import("Symbol.zig"); diff --git a/src/link/strtab.zig b/src/link/strtab.zig index 0d71c9bf83..f854225ef6 100644 --- a/src/link/strtab.zig +++ b/src/link/strtab.zig @@ -100,13 +100,13 @@ pub fn StringTable(comptime log_scope: @Type(.EnumLiteral)) type { }); } - pub fn get(self: Self, off: u32) ?[]const u8 { + pub fn get(self: Self, off: u32) ?[:0]const u8 { log.debug("getting string at 0x{x}", .{off}); if (off >= self.buffer.items.len) return null; return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.buffer.items.ptr + off)), 0); } - pub fn getAssumeExists(self: Self, off: u32) []const u8 { + pub fn getAssumeExists(self: Self, off: u32) [:0]const u8 { return self.get(off) orelse unreachable; } |
