diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2023-09-30 08:43:33 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-09-30 08:43:33 +0200 |
| commit | 873c695c41dffd89ba7ef1b3ed6662e429bfa00d (patch) | |
| tree | b2131a824259cf307d2e626b0f38941336af97a8 /src | |
| parent | 101df768a06ef85753efdd6dc558bca68d50d1a5 (diff) | |
| parent | e72fd185e01aac14d7962f2eeb718653dc0c8e68 (diff) | |
| download | zig-873c695c41dffd89ba7ef1b3ed6662e429bfa00d.tar.gz zig-873c695c41dffd89ba7ef1b3ed6662e429bfa00d.zip | |
Merge pull request #17319 from ziglang/elf-tls
elf: add basic TLS segment handling
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/Elf.zig | 270 | ||||
| -rw-r--r-- | src/link/Elf/Atom.zig | 215 | ||||
| -rw-r--r-- | src/link/Elf/Object.zig | 48 | ||||
| -rw-r--r-- | src/link/Elf/Symbol.zig | 13 | ||||
| -rw-r--r-- | src/link/Elf/ZigModule.zig | 25 |
5 files changed, 433 insertions, 138 deletions
diff --git a/src/link/Elf.zig b/src/link/Elf.zig index bf5c232d5c..4e43a20433 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -43,6 +43,12 @@ phdr_load_ro_index: ?u16 = null, phdr_load_rw_index: ?u16 = null, /// The index into the program headers of a PT_LOAD program header with zerofill data. phdr_load_zerofill_index: ?u16 = null, +/// The index into the program headers of the PT_TLS program header. +phdr_tls_index: ?u16 = null, +/// The index into the program headers of a PT_LOAD program header with TLS data. +phdr_load_tls_data_index: ?u16 = null, +/// The index into the program headers of a PT_LOAD program header with TLS zerofill data. +phdr_load_tls_zerofill_index: ?u16 = null, entry_addr: ?u64 = null, page_size: u32, @@ -56,10 +62,13 @@ strtab: StringTable(.strtab) = .{}, /// Representation of the GOT table as committed to the file. got: GotSection = .{}, +/// Tracked section headers text_section_index: ?u16 = null, rodata_section_index: ?u16 = null, data_section_index: ?u16 = null, bss_section_index: ?u16 = null, +tdata_section_index: ?u16 = null, +tbss_section_index: ?u16 = null, eh_frame_section_index: ?u16 = null, eh_frame_hdr_section_index: ?u16 = null, dynamic_section_index: ?u16 = null, @@ -238,7 +247,8 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Elf { else elf.VER_NDX_LOCAL; - var dwarf: ?Dwarf = if (!options.strip and options.module != null) + const use_llvm = options.use_llvm; + var dwarf: ?Dwarf = if (!options.strip and options.module != null and !use_llvm) Dwarf.init(gpa, &self.base, options.target) else null; @@ -255,7 +265,6 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Elf { .page_size = page_size, .default_sym_version = default_sym_version, }; - const use_llvm = options.use_llvm; if (use_llvm and options.module != null) { self.llvm_object = try LlvmObject.create(gpa, options); } @@ -358,10 +367,12 @@ fn detectAllocCollision(self: *Elf, start: u64, size: u64) ?u64 { } } - for (self.shdrs.items) |section| { - const increased_size = padToIdeal(section.sh_size); - const test_end = section.sh_offset + increased_size; - if (end > section.sh_offset and start < test_end) { + for (self.shdrs.items) |shdr| { + // SHT_NOBITS takes no physical space in the output file so set its size to 0. + const sh_size = if (shdr.sh_type == elf.SHT_NOBITS) 0 else shdr.sh_size; + const increased_size = padToIdeal(sh_size); + const test_end = shdr.sh_offset + increased_size; + if (end > shdr.sh_offset and start < test_end) { return test_end; } } @@ -429,15 +440,15 @@ pub fn allocateSegment(self: *Elf, opts: AllocateSegmentOpts) error{OutOfMemory} const addr = opts.addr orelse blk: { const reserved_capacity = self.calcImageBase() * 4; // Calculate largest VM address - const count = self.phdrs.items.len; var addresses = std.ArrayList(u64).init(gpa); defer addresses.deinit(); - try addresses.ensureTotalCapacityPrecise(count); + try addresses.ensureTotalCapacityPrecise(self.phdrs.items.len); for (self.phdrs.items) |phdr| { + if (phdr.p_type != elf.PT_LOAD) continue; addresses.appendAssumeCapacity(phdr.p_vaddr + reserved_capacity); } mem.sort(u64, addresses.items, {}, std.sort.asc(u64)); - break :blk mem.alignForward(u64, addresses.items[count - 1], opts.alignment); + break :blk mem.alignForward(u64, addresses.pop(), opts.alignment); }; log.debug("allocating phdr({d})({c}{c}{c}) from 0x{x} to 0x{x} (0x{x} - 0x{x})", .{ index, @@ -492,7 +503,7 @@ pub fn allocateAllocSection(self: *Elf, opts: AllocateAllocSectionOpts) error{Ou .sh_flags = opts.flags, .sh_addr = phdr.p_vaddr, .sh_offset = phdr.p_offset, - .sh_size = phdr.p_filesz, + .sh_size = phdr.p_memsz, .sh_link = 0, .sh_info = 0, .sh_addralign = opts.alignment, @@ -543,7 +554,6 @@ pub fn populateMissingMetadata(self: *Elf) !void { }; const ptr_size: u8 = self.ptrWidthBytes(); const is_linux = self.base.options.target.os.tag == .linux; - const large_addrspace = self.base.options.target.ptrBitWidth() >= 32; const image_base = self.calcImageBase(); if (self.phdr_table_index == null) { @@ -566,23 +576,16 @@ pub fn populateMissingMetadata(self: *Elf) !void { } if (self.phdr_table_load_index == null) { - self.phdr_table_load_index = @intCast(self.phdrs.items.len); - try self.phdrs.append(gpa, .{ - .p_type = elf.PT_LOAD, - .p_offset = 0, - .p_filesz = 0, - .p_vaddr = image_base, - .p_paddr = image_base, - .p_memsz = 0, - .p_align = self.page_size, - .p_flags = elf.PF_R, + self.phdr_table_load_index = try self.allocateSegment(.{ + .addr = image_base, + .size = 0, + .alignment = self.page_size, }); self.phdr_table_dirty = true; } if (self.phdr_load_re_index == null) { self.phdr_load_re_index = try self.allocateSegment(.{ - .addr = self.defaultEntryAddress(), .size = self.base.options.program_code_size_hint, .alignment = self.page_size, .flags = elf.PF_X | elf.PF_R | elf.PF_W, @@ -591,12 +594,10 @@ pub fn populateMissingMetadata(self: *Elf) !void { } if (self.phdr_got_index == null) { - const addr: u64 = if (large_addrspace) 0x4000000 else 0x8000; // We really only need ptr alignment but since we are using PROGBITS, linux requires // page align. const alignment = if (is_linux) self.page_size else @as(u16, ptr_size); self.phdr_got_index = try self.allocateSegment(.{ - .addr = addr, .size = @as(u64, ptr_size) * self.base.options.symbol_count_hint, .alignment = alignment, .flags = elf.PF_R | elf.PF_W, @@ -604,10 +605,8 @@ pub fn populateMissingMetadata(self: *Elf) !void { } if (self.phdr_load_ro_index == null) { - const addr: u64 = if (large_addrspace) 0xc000000 else 0xa000; const alignment = if (is_linux) self.page_size else @as(u16, ptr_size); self.phdr_load_ro_index = try self.allocateSegment(.{ - .addr = addr, .size = 1024, .alignment = alignment, .flags = elf.PF_R | elf.PF_W, @@ -615,10 +614,8 @@ pub fn populateMissingMetadata(self: *Elf) !void { } if (self.phdr_load_rw_index == null) { - const addr: u64 = if (large_addrspace) 0x10000000 else 0xc000; const alignment = if (is_linux) self.page_size else @as(u16, ptr_size); self.phdr_load_rw_index = try self.allocateSegment(.{ - .addr = addr, .size = 1024, .alignment = alignment, .flags = elf.PF_R | elf.PF_W, @@ -626,10 +623,8 @@ pub fn populateMissingMetadata(self: *Elf) !void { } if (self.phdr_load_zerofill_index == null) { - const addr: u64 = if (large_addrspace) 0x14000000 else 0xf000; const alignment = if (is_linux) self.page_size else @as(u16, ptr_size); self.phdr_load_zerofill_index = try self.allocateSegment(.{ - .addr = addr, .size = 0, .alignment = alignment, .flags = elf.PF_R | elf.PF_W, @@ -639,6 +634,53 @@ pub fn populateMissingMetadata(self: *Elf) !void { phdr.p_memsz = 1024; } + if (!self.base.options.single_threaded) { + if (self.phdr_load_tls_data_index == null) { + const alignment = if (is_linux) self.page_size else @as(u16, ptr_size); + self.phdr_load_tls_data_index = try self.allocateSegment(.{ + .size = 1024, + .alignment = alignment, + .flags = elf.PF_R | elf.PF_W, + }); + } + + if (self.phdr_load_tls_zerofill_index == null) { + // TODO .tbss doesn't need any physical or memory representation (aka a loadable segment) + // since the loader only cares about the PT_TLS to work out TLS size. However, when + // relocating we need to have .tdata and .tbss contiguously laid out so that we can + // work out correct offsets to the start/end of the TLS segment. I am thinking that + // perhaps it's possible to completely spoof it by having an abstracted mechanism + // for this that wouldn't require us to explicitly track .tbss. Anyhow, for now, + // we go the savage route of treating .tbss like .bss. + const alignment = if (is_linux) self.page_size else @as(u16, ptr_size); + self.phdr_load_tls_zerofill_index = try self.allocateSegment(.{ + .size = 0, + .alignment = alignment, + .flags = elf.PF_R | elf.PF_W, + }); + const phdr = &self.phdrs.items[self.phdr_load_tls_zerofill_index.?]; + phdr.p_offset = self.phdrs.items[self.phdr_load_tls_data_index.?].p_offset; // .tbss overlaps .tdata + phdr.p_memsz = 1024; + } + + if (self.phdr_tls_index == null) { + self.phdr_tls_index = @intCast(self.phdrs.items.len); + const phdr_tdata = &self.phdrs.items[self.phdr_load_tls_data_index.?]; + const phdr_tbss = &self.phdrs.items[self.phdr_load_tls_zerofill_index.?]; + try self.phdrs.append(gpa, .{ + .p_type = elf.PT_TLS, + .p_offset = phdr_tdata.p_offset, + .p_vaddr = phdr_tdata.p_vaddr, + .p_paddr = phdr_tdata.p_paddr, + .p_filesz = phdr_tdata.p_filesz, + .p_memsz = phdr_tbss.p_vaddr + phdr_tbss.p_memsz - phdr_tdata.p_vaddr, + .p_align = ptr_size, + .p_flags = elf.PF_R, + }); + self.phdr_table_dirty = true; + } + } + if (self.shstrtab_section_index == null) { assert(self.shstrtab.buffer.items.len == 0); try self.shstrtab.buffer.append(gpa, 0); // need a 0 at position 0 @@ -707,6 +749,31 @@ pub fn populateMissingMetadata(self: *Elf) !void { try self.last_atom_and_free_list_table.putNoClobber(gpa, self.bss_section_index.?, .{}); } + if (self.phdr_load_tls_data_index) |phdr_index| { + if (self.tdata_section_index == null) { + self.tdata_section_index = try self.allocateAllocSection(.{ + .name = ".tdata", + .phdr_index = phdr_index, + .alignment = ptr_size, + .flags = elf.SHF_ALLOC | elf.SHF_WRITE | elf.SHF_TLS, + }); + try self.last_atom_and_free_list_table.putNoClobber(gpa, self.tdata_section_index.?, .{}); + } + } + + if (self.phdr_load_tls_zerofill_index) |phdr_index| { + if (self.tbss_section_index == null) { + self.tbss_section_index = try self.allocateAllocSection(.{ + .name = ".tbss", + .phdr_index = phdr_index, + .alignment = ptr_size, + .flags = elf.SHF_ALLOC | elf.SHF_WRITE | elf.SHF_TLS, + .type = elf.SHT_NOBITS, + }); + try self.last_atom_and_free_list_table.putNoClobber(gpa, self.tbss_section_index.?, .{}); + } + } + if (self.symtab_section_index == null) { const min_align: u16 = if (small_ptr) @alignOf(elf.Elf32_Sym) else @alignOf(elf.Elf64_Sym); const each_size: u64 = if (small_ptr) @sizeOf(elf.Elf32_Sym) else @sizeOf(elf.Elf64_Sym); @@ -844,10 +911,7 @@ pub fn growAllocSection(self: *Elf, shdr_index: u16, needed_size: u64) !void { if (needed_size > self.allocatedSize(shdr.sh_offset) and !is_zerofill) { // Must move the entire section. const new_offset = self.findFreeSpace(needed_size, self.page_size); - const existing_size = if (self.last_atom_and_free_list_table.get(shdr_index)) |meta| blk: { - const last = self.atom(meta.last_atom_index) orelse break :blk 0; - break :blk (last.value + last.size) - phdr.p_vaddr; - } else shdr.sh_size; + const existing_size = shdr.sh_size; shdr.sh_size = 0; log.debug("new '{s}' file offset 0x{x} to 0x{x}", .{ @@ -857,12 +921,18 @@ pub fn growAllocSection(self: *Elf, shdr_index: u16, needed_size: u64) !void { }); const amt = try self.base.file.?.copyRangeAll(shdr.sh_offset, self.base.file.?, new_offset, existing_size); + // TODO figure out what to about this error condition - how to communicate it up. if (amt != existing_size) return error.InputOutput; shdr.sh_offset = new_offset; phdr.p_offset = new_offset; } + shdr.sh_size = needed_size; + if (!is_zerofill) { + phdr.p_filesz = needed_size; + } + const mem_capacity = self.allocatedVirtualSize(phdr.p_vaddr); if (needed_size > mem_capacity) { // We are exceeding our allocated VM capacity so we need to shift everything in memory @@ -889,13 +959,8 @@ pub fn growAllocSection(self: *Elf, shdr_index: u16, needed_size: u64) !void { } } - shdr.sh_size = needed_size; phdr.p_memsz = needed_size; - if (!is_zerofill) { - phdr.p_filesz = needed_size; - } - self.markDirty(shdr_index, phdr_index); } @@ -965,21 +1030,15 @@ pub fn growNonAllocSection( const shdr = &self.shdrs.items[shdr_index]; if (needed_size > self.allocatedSize(shdr.sh_offset)) { - const existing_size = if (self.symtab_section_index.? == shdr_index) blk: { - const sym_size: u64 = switch (self.ptr_width) { - .p32 => @sizeOf(elf.Elf32_Sym), - .p64 => @sizeOf(elf.Elf64_Sym), - }; - break :blk @as(u64, shdr.sh_info) * sym_size; - } else shdr.sh_size; + const existing_size = shdr.sh_size; shdr.sh_size = 0; // Move all the symbols to a new file location. const new_offset = self.findFreeSpace(needed_size, min_alignment); - log.debug("moving '{?s}' from 0x{x} to 0x{x}", .{ - self.shstrtab.get(shdr.sh_name), - shdr.sh_offset, + log.debug("new '{s}' file offset 0x{x} to 0x{x}", .{ + self.shstrtab.getAssumeExists(shdr.sh_name), new_offset, + new_offset + existing_size, }); if (requires_file_copy) { @@ -1223,19 +1282,48 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node try self.allocateObjects(); self.allocateLinkerDefinedSymbols(); + // .bss always overlaps .data in file offset, but is zero-sized in file so it doesn't + // get mapped by the loader + if (self.data_section_index) |data_shndx| blk: { + const bss_shndx = self.bss_section_index orelse break :blk; + const data_phndx = self.phdr_to_shdr_table.get(data_shndx).?; + const bss_phndx = self.phdr_to_shdr_table.get(bss_shndx).?; + self.shdrs.items[bss_shndx].sh_offset = self.shdrs.items[data_shndx].sh_offset; + self.phdrs.items[bss_phndx].p_offset = self.phdrs.items[data_phndx].p_offset; + } + + // Same treatment for .tbss section. + if (self.tdata_section_index) |tdata_shndx| blk: { + const tbss_shndx = self.tbss_section_index orelse break :blk; + const tdata_phndx = self.phdr_to_shdr_table.get(tdata_shndx).?; + const tbss_phndx = self.phdr_to_shdr_table.get(tbss_shndx).?; + self.shdrs.items[tbss_shndx].sh_offset = self.shdrs.items[tdata_shndx].sh_offset; + self.phdrs.items[tbss_phndx].p_offset = self.phdrs.items[tdata_phndx].p_offset; + } + + if (self.phdr_tls_index) |tls_index| { + const tdata_phdr = &self.phdrs.items[self.phdr_load_tls_data_index.?]; + const tbss_phdr = &self.phdrs.items[self.phdr_load_tls_zerofill_index.?]; + const phdr = &self.phdrs.items[tls_index]; + phdr.p_offset = tdata_phdr.p_offset; + phdr.p_filesz = tdata_phdr.p_filesz; + phdr.p_vaddr = tdata_phdr.p_vaddr; + phdr.p_paddr = tdata_phdr.p_vaddr; + phdr.p_memsz = tbss_phdr.p_vaddr + tbss_phdr.p_memsz - tdata_phdr.p_vaddr; + } + // Beyond this point, everything has been allocated a virtual address and we can resolve // the relocations, and commit objects to file. if (self.zig_module_index) |index| { - for (self.file(index).?.zig_module.atoms.keys()) |atom_index| { + const zig_module = self.file(index).?.zig_module; + for (zig_module.atoms.keys()) |atom_index| { const atom_ptr = self.atom(atom_index).?; if (!atom_ptr.flags.alive) continue; const shdr = &self.shdrs.items[atom_ptr.outputShndx().?]; - const file_offset = shdr.sh_offset + atom_ptr.value - shdr.sh_addr; - const size = math.cast(usize, atom_ptr.size) orelse return error.Overflow; - const code = try gpa.alloc(u8, size); + if (shdr.sh_type == elf.SHT_NOBITS) continue; + const code = try zig_module.codeAlloc(self, atom_index); defer gpa.free(code); - const amt = try self.base.file.?.preadAll(code, file_offset); - if (amt != code.len) return error.InputOutput; + const file_offset = shdr.sh_offset + atom_ptr.value - shdr.sh_addr; try atom_ptr.resolveRelocs(self, code); try self.base.file.?.pwriteAll(code, file_offset); } @@ -1268,22 +1356,6 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node try self.updateSymtabSize(); try self.writeSymtab(); - // .bss always overlaps .data in file offset, but is zero-sized in file so it doesn't - // get mapped by the loader - if (self.data_section_index) |data_shndx| blk: { - const bss_shndx = self.bss_section_index orelse break :blk; - const data_phndx = self.phdr_to_shdr_table.get(data_shndx).?; - const bss_phndx = self.phdr_to_shdr_table.get(bss_shndx).?; - self.shdrs.items[bss_shndx].sh_offset = self.shdrs.items[data_shndx].sh_offset; - self.phdrs.items[bss_phndx].p_offset = self.phdrs.items[data_phndx].p_offset; - } - - // Dump the state for easy debugging. - // State can be dumped via `--debug-log link_state`. - if (build_options.enable_logging) { - state_log.debug("{}", .{self.dumpState()}); - } - if (self.dwarf) |*dw| { if (self.debug_abbrev_section_dirty) { try dw.writeDbgAbbrev(); @@ -1470,6 +1542,12 @@ pub fn flushModule(self: *Elf, comp: *Compilation, prog_node: *std.Progress.Node try self.writeElfHeader(); } + // Dump the state for easy debugging. + // State can be dumped via `--debug-log link_state`. + if (build_options.enable_logging) { + state_log.debug("{}", .{self.dumpState()}); + } + // The point of flush() is to commit changes, so in theory, nothing should // be dirty after this. However, it is possible for some things to remain // dirty because they fail to be written in the event of compile errors, @@ -1779,7 +1857,7 @@ fn writeObjects(self: *Elf) !void { const file_offset = shdr.sh_offset + atom_ptr.value - shdr.sh_addr; log.debug("writing atom({d}) at 0x{x}", .{ atom_ptr.atom_index, file_offset }); - const code = try atom_ptr.codeInObjectUncompressAlloc(self); + const code = try object.codeDecompressAlloc(self, atom_ptr.atom_index); defer gpa.free(code); try atom_ptr.resolveRelocs(self, code); @@ -2785,10 +2863,6 @@ fn updateDeclCode( try self.got.writeEntry(self, gop.index); } - const phdr_index = self.phdr_to_shdr_table.get(shdr_index).?; - const section_offset = sym.value - self.phdrs.items[phdr_index].p_vaddr; - const file_offset = self.shdrs.items[shdr_index].sh_offset + section_offset; - if (self.base.child_pid) |pid| { switch (builtin.os.tag) { .linux => { @@ -2810,7 +2884,13 @@ fn updateDeclCode( } } - try self.base.file.?.pwriteAll(code, file_offset); + const shdr = self.shdrs.items[shdr_index]; + if (shdr.sh_type != elf.SHT_NOBITS) { + const phdr_index = self.phdr_to_shdr_table.get(shdr_index).?; + const section_offset = sym.value - self.phdrs.items[phdr_index].p_vaddr; + const file_offset = shdr.sh_offset + section_offset; + try self.base.file.?.pwriteAll(code, file_offset); + } } pub fn updateFunc(self: *Elf, mod: *Module, func_index: InternPool.Index, air: Air, liveness: Liveness) !void { @@ -3358,9 +3438,14 @@ fn allocateLinkerDefinedSymbols(self: *Elf) void { // _end { const end_symbol = self.symbol(self.end_index.?); + end_symbol.value = 0; for (self.shdrs.items, 0..) |*shdr, shndx| { - if (shdr.sh_flags & elf.SHF_ALLOC != 0) { - end_symbol.value = shdr.sh_addr + shdr.sh_size; + if (shdr.sh_flags & elf.SHF_ALLOC == 0) continue; + const phdr_index = self.phdr_to_shdr_table.get(@intCast(shndx)).?; + const phdr = self.phdrs.items[phdr_index]; + const value = phdr.p_vaddr + phdr.p_memsz; + if (end_symbol.value < value) { + end_symbol.value = value; end_symbol.output_section_index = @intCast(shndx); } } @@ -3424,6 +3509,7 @@ fn updateSymtabSize(self: *Elf) !void { .p64 => @alignOf(elf.Elf64_Sym), }; const needed_size = (sizes.nlocals + sizes.nglobals + 1) * sym_size; + shdr.sh_size = needed_size; try self.growNonAllocSection(self.symtab_section_index.?, needed_size, sym_align, true); } @@ -3820,12 +3906,8 @@ pub fn calcImageBase(self: Elf) u64 { }; } -pub fn defaultEntryAddress(self: Elf) u64 { - if (self.entry_addr) |addr| return addr; - return switch (self.base.options.target.cpu.arch) { - .spu_2 => 0, - else => default_entry_addr, - }; +pub fn isStatic(self: Elf) bool { + return self.base.options.link_mode == .Static; } pub fn isDynLib(self: Elf) bool { @@ -4011,6 +4093,22 @@ pub fn comdatGroupOwner(self: *Elf, index: ComdatGroupOwner.Index) *ComdatGroupO return &self.comdat_groups_owners.items[index]; } +pub fn tpAddress(self: *Elf) u64 { + const index = self.phdr_tls_index orelse return 0; + const phdr = self.phdrs.items[index]; + return mem.alignForward(u64, phdr.p_vaddr + phdr.p_memsz, phdr.p_align); +} + +pub fn dtpAddress(self: *Elf) u64 { + return self.tlsAddress(); +} + +pub fn tlsAddress(self: *Elf) u64 { + const index = self.phdr_tls_index orelse return 0; + const phdr = self.phdrs.items[index]; + return phdr.p_vaddr; +} + const ErrorWithNotes = struct { /// Allocated index in misc_errors array. index: usize, @@ -4043,7 +4141,7 @@ const ErrorWithNotes = struct { } }; -fn addErrorWithNotes(self: *Elf, note_count: usize) error{OutOfMemory}!ErrorWithNotes { +pub fn addErrorWithNotes(self: *Elf, note_count: usize) error{OutOfMemory}!ErrorWithNotes { try self.misc_errors.ensureUnusedCapacity(self.base.allocator, 1); return self.addErrorWithNotesAssumeCapacity(note_count); } diff --git a/src/link/Elf/Atom.zig b/src/link/Elf/Atom.zig index d044ef40a2..5b91dddff4 100644 --- a/src/link/Elf/Atom.zig +++ b/src/link/Elf/Atom.zig @@ -59,38 +59,6 @@ pub fn outputShndx(self: Atom) ?u16 { return self.output_section_index; } -pub fn codeInObject(self: Atom, elf_file: *Elf) error{Overflow}![]const u8 { - const object = self.file(elf_file).?.object; - return object.shdrContents(self.input_section_index); -} - -/// Returns atom's code and optionally uncompresses data if required (for compressed sections). -/// Caller owns the memory. -pub fn codeInObjectUncompressAlloc(self: Atom, elf_file: *Elf) ![]u8 { - const gpa = elf_file.base.allocator; - const data = try self.codeInObject(elf_file); - const shdr = self.inputShdr(elf_file); - if (shdr.sh_flags & elf.SHF_COMPRESSED != 0) { - const chdr = @as(*align(1) const elf.Elf64_Chdr, @ptrCast(data.ptr)).*; - switch (chdr.ch_type) { - .ZLIB => { - var stream = std.io.fixedBufferStream(data[@sizeOf(elf.Elf64_Chdr)..]); - var zlib_stream = std.compress.zlib.decompressStream(gpa, stream.reader()) catch - return error.InputOutput; - defer zlib_stream.deinit(); - const size = std.math.cast(usize, chdr.ch_size) orelse return error.Overflow; - const decomp = try gpa.alloc(u8, size); - const nread = zlib_stream.reader().readAll(decomp) catch return error.InputOutput; - if (nread != decomp.len) { - return error.InputOutput; - } - return decomp; - }, - else => @panic("TODO unhandled compression scheme"), - } - } else return gpa.dupe(u8, data); -} - pub fn priority(self: Atom, elf_file: *Elf) u64 { const index = self.file(elf_file).?.index(); return (@as(u64, @intCast(index)) << 32) | @as(u64, @intCast(self.input_section_index)); @@ -327,7 +295,15 @@ pub fn freeRelocs(self: Atom, elf_file: *Elf) void { zig_module.relocs.items[self.relocs_section_index].clearRetainingCapacity(); } -pub fn scanRelocs(self: Atom, elf_file: *Elf, undefs: anytype) !void { +pub fn scanRelocsRequiresCode(self: Atom, elf_file: *Elf) error{Overflow}!bool { + for (try self.relocs(elf_file)) |rel| { + if (rel.r_type() == elf.R_X86_64_GOTTPOFF) return true; + } + return false; +} + +pub fn scanRelocs(self: Atom, elf_file: *Elf, code: ?[]const u8, undefs: anytype) !void { + const is_dyn_lib = elf_file.isDynLib(); const file_ptr = self.file(elf_file).?; const rels = try self.relocs(elf_file); var i: usize = 0; @@ -336,6 +312,8 @@ pub fn scanRelocs(self: Atom, elf_file: *Elf, undefs: anytype) !void { if (rel.r_type() == elf.R_X86_64_NONE) continue; + const r_offset = std.math.cast(usize, rel.r_offset) orelse return error.Overflow; + const symbol_index = switch (file_ptr) { .zig_module => |x| x.symbol(rel.r_sym()), .object => |x| x.symbols.items[rel.r_sym()], @@ -388,7 +366,54 @@ pub fn scanRelocs(self: Atom, elf_file: *Elf, undefs: anytype) !void { elf.R_X86_64_PC32 => {}, - else => @panic("TODO"), + elf.R_X86_64_TPOFF32, + elf.R_X86_64_TPOFF64, + => { + if (is_dyn_lib) { + // TODO + // self.picError(symbol, rel, elf_file); + } + }, + + elf.R_X86_64_TLSGD => { + // TODO verify followed by appropriate relocation such as PLT32 __tls_get_addr + + if (elf_file.isStatic() or + (!symbol.flags.import and !is_dyn_lib)) + { + // Relax if building with -static flag as __tls_get_addr() will not be present in libc.a + // We skip the next relocation. + i += 1; + } else if (!symbol.flags.import and is_dyn_lib) { + symbol.flags.needs_gottp = true; + i += 1; + } else { + symbol.flags.needs_tlsgd = true; + } + }, + + elf.R_X86_64_GOTTPOFF => { + const should_relax = blk: { + // if (!elf_file.options.relax or is_shared or symbol.flags.import) break :blk false; + if (!x86_64.canRelaxGotTpOff(code.?[r_offset - 3 ..])) break :blk false; + break :blk true; + }; + if (!should_relax) { + symbol.flags.needs_gottp = true; + } + }, + + else => { + var err = try elf_file.addErrorWithNotes(1); + try err.addMsg(elf_file, "fatal linker error: unhandled relocation type {}", .{ + fmtRelocType(rel.r_type()), + }); + try err.addNote(elf_file, "in {}:{s} at offset 0x{x}", .{ + self.file(elf_file).?.fmtPath(), + self.name(elf_file), + r_offset, + }); + }, } } } @@ -430,7 +455,10 @@ pub fn resolveRelocs(self: Atom, elf_file: *Elf, code: []u8) !void { var stream = std.io.fixedBufferStream(code); const cwriter = stream.writer(); - for (try self.relocs(elf_file)) |rel| { + const rels = try self.relocs(elf_file); + var i: usize = 0; + while (i < rels.len) : (i += 1) { + const rel = rels[i]; const r_type = rel.r_type(); if (r_type == elf.R_X86_64_NONE) continue; @@ -463,9 +491,9 @@ pub fn resolveRelocs(self: Atom, elf_file: *Elf, code: []u8) !void { // Relative offset to the start of the global offset table. const G = @as(i64, @intCast(target.gotAddress(elf_file))) - GOT; // // Address of the thread pointer. - // const TP = @as(i64, @intCast(elf_file.getTpAddress())); + const TP = @as(i64, @intCast(elf_file.tpAddress())); // // Address of the dynamic thread pointer. - // const DTP = @as(i64, @intCast(elf_file.getDtpAddress())); + // const DTP = @as(i64, @intCast(elf_file.dtpAddress())); relocs_log.debug(" {s}: {x}: [{x} => {x}] G({x}) ({s})", .{ fmtRelocType(r_type), @@ -512,10 +540,43 @@ pub fn resolveRelocs(self: Atom, elf_file: *Elf, code: []u8) !void { try cwriter.writeIntLittle(i32, @as(i32, @intCast(G + GOT + A - P))); }, - else => { - log.err("TODO: unhandled relocation type {}", .{fmtRelocType(rel.r_type())}); - @panic("TODO unhandled relocation type"); + elf.R_X86_64_TPOFF32 => try cwriter.writeIntLittle(i32, @as(i32, @truncate(S + A - TP))), + elf.R_X86_64_TPOFF64 => try cwriter.writeIntLittle(i64, S + A - TP), + + elf.R_X86_64_TLSGD => { + if (target.flags.has_tlsgd) { + // TODO + // const S_ = @as(i64, @intCast(target.tlsGdAddress(elf_file))); + // try cwriter.writeIntLittle(i32, @as(i32, @intCast(S_ + A - P))); + } else if (target.flags.has_gottp) { + // TODO + // const S_ = @as(i64, @intCast(target.getGotTpAddress(elf_file))); + // try relaxTlsGdToIe(relocs[i .. i + 2], @intCast(S_ - P), elf_file, &stream); + i += 1; + } else { + try x86_64.relaxTlsGdToLe( + self, + rels[i .. i + 2], + @as(i32, @intCast(S - TP)), + elf_file, + &stream, + ); + i += 1; + } + }, + + elf.R_X86_64_GOTTPOFF => { + if (target.flags.has_gottp) { + // TODO + // const S_ = @as(i64, @intCast(target.gotTpAddress(elf_file))); + // try cwriter.writeIntLittle(i32, @as(i32, @intCast(S_ + A - P))); + } else { + x86_64.relaxGotTpOff(code[r_offset - 3 ..]) catch unreachable; + try cwriter.writeIntLittle(i32, @as(i32, @intCast(S - TP))); + } }, + + else => {}, } } } @@ -681,6 +742,80 @@ const x86_64 = struct { } } + pub fn canRelaxGotTpOff(code: []const u8) bool { + const old_inst = disassemble(code) orelse return false; + switch (old_inst.encoding.mnemonic) { + .mov => if (Instruction.new(old_inst.prefix, .mov, &.{ + old_inst.ops[0], + // TODO: hack to force imm32s in the assembler + .{ .imm = Immediate.s(-129) }, + })) |inst| { + inst.encode(std.io.null_writer, .{}) catch return false; + return true; + } else |_| return false, + else => return false, + } + } + + pub fn relaxGotTpOff(code: []u8) !void { + const old_inst = disassemble(code) orelse return error.RelaxFail; + switch (old_inst.encoding.mnemonic) { + .mov => { + const inst = try Instruction.new(old_inst.prefix, .mov, &.{ + old_inst.ops[0], + // TODO: hack to force imm32s in the assembler + .{ .imm = Immediate.s(-129) }, + }); + relocs_log.debug(" relaxing {} => {}", .{ old_inst.encoding, inst.encoding }); + encode(&.{inst}, code) catch return error.RelaxFail; + }, + else => return error.RelaxFail, + } + } + + pub fn relaxTlsGdToLe( + self: Atom, + rels: []align(1) const elf.Elf64_Rela, + value: i32, + elf_file: *Elf, + stream: anytype, + ) !void { + assert(rels.len == 2); + const writer = stream.writer(); + switch (rels[1].r_type()) { + elf.R_X86_64_PC32, + elf.R_X86_64_PLT32, + elf.R_X86_64_GOTPCREL, + elf.R_X86_64_GOTPCRELX, + => { + var insts = [_]u8{ + 0x64, 0x48, 0x8b, 0x04, 0x25, 0, 0, 0, 0, // movq %fs:0,%rax + 0x48, 0x81, 0xc0, 0, 0, 0, 0, // add $tp_offset, %rax + }; + std.mem.writeIntLittle(i32, insts[12..][0..4], value); + try stream.seekBy(-4); + try writer.writeAll(&insts); + relocs_log.debug(" relaxing {} and {}", .{ + fmtRelocType(rels[0].r_type()), + fmtRelocType(rels[1].r_type()), + }); + }, + + else => { + var err = try elf_file.addErrorWithNotes(1); + try err.addMsg(elf_file, "fatal linker error: rewrite {} when followed by {}", .{ + fmtRelocType(rels[0].r_type()), + fmtRelocType(rels[1].r_type()), + }); + try err.addNote(elf_file, "in {}:{s} at offset 0x{x}", .{ + self.file(elf_file).?.fmtPath(), + self.name(elf_file), + rels[0].r_offset, + }); + }, + } + } + fn disassemble(code: []const u8) ?Instruction { var disas = Disassembler.init(code); const inst = disas.next() catch return null; diff --git a/src/link/Elf/Object.zig b/src/link/Elf/Object.zig index fe14831d1a..191f6774a5 100644 --- a/src/link/Elf/Object.zig +++ b/src/link/Elf/Object.zig @@ -208,6 +208,8 @@ fn getOutputSectionIndex(self: *Object, elf_file: *Elf, shdr: elf.Elf64_Shdr) er break :blk prefix; } } + if (std.mem.eql(u8, name, ".tcommon")) break :blk ".tbss"; + if (std.mem.eql(u8, name, ".common")) break :blk ".bss"; break :blk name; }; const @"type" = switch (shdr.sh_type) { @@ -233,8 +235,7 @@ fn getOutputSectionIndex(self: *Object, elf_file: *Elf, shdr: elf.Elf64_Shdr) er const is_alloc = flags & elf.SHF_ALLOC != 0; const is_write = flags & elf.SHF_WRITE != 0; const is_exec = flags & elf.SHF_EXECINSTR != 0; - const is_tls = flags & elf.SHF_TLS != 0; - if (!is_alloc or is_tls) { + if (!is_alloc) { log.err("{}: output section {s} not found", .{ self.fmtPath(), name }); @panic("TODO: missing output section!"); } @@ -243,7 +244,7 @@ fn getOutputSectionIndex(self: *Object, elf_file: *Elf, shdr: elf.Elf64_Shdr) er if (is_exec) phdr_flags |= elf.PF_X; const phdr_index = try elf_file.allocateSegment(.{ .size = Elf.padToIdeal(shdr.sh_size), - .alignment = if (is_tls) shdr.sh_addralign else elf_file.page_size, + .alignment = elf_file.page_size, .flags = phdr_flags, }); const shndx = try elf_file.allocateAllocSection(.{ @@ -428,7 +429,13 @@ pub fn scanRelocs(self: *Object, elf_file: *Elf, undefs: anytype) !void { const shdr = atom.inputShdr(elf_file); if (shdr.sh_flags & elf.SHF_ALLOC == 0) continue; if (shdr.sh_type == elf.SHT_NOBITS) continue; - try atom.scanRelocs(elf_file, undefs); + if (try atom.scanRelocsRequiresCode(elf_file)) { + // TODO ideally, we don't have to decompress at this stage (should already be done) + // and we just fetch the code slice. + const code = try self.codeDecompressAlloc(elf_file, atom_index); + defer elf_file.base.allocator.free(code); + try atom.scanRelocs(elf_file, code, undefs); + } else try atom.scanRelocs(elf_file, null, undefs); } for (self.cies.items) |cie| { @@ -591,7 +598,7 @@ pub fn convertCommonSymbols(self: *Object, elf_file: *Elf) !void { try self.atoms.append(gpa, atom_index); const is_tls = global.getType(elf_file) == elf.STT_TLS; - const name = if (is_tls) ".tls_common" else ".common"; + const name = if (is_tls) ".tbss" else ".bss"; const atom = elf_file.atom(atom_index).?; atom.atom_index = atom_index; @@ -685,7 +692,7 @@ pub fn globals(self: *Object) []const Symbol.Index { return self.symbols.items[start..]; } -pub fn shdrContents(self: *Object, index: u32) error{Overflow}![]const u8 { +fn shdrContents(self: Object, index: u32) error{Overflow}![]const u8 { assert(index < self.shdrs.items.len); const shdr = self.shdrs.items[index]; const offset = math.cast(usize, shdr.sh_offset) orelse return error.Overflow; @@ -693,6 +700,35 @@ pub fn shdrContents(self: *Object, index: u32) error{Overflow}![]const u8 { return self.data[offset..][0..size]; } +/// Returns atom's code and optionally uncompresses data if required (for compressed sections). +/// Caller owns the memory. +pub fn codeDecompressAlloc(self: Object, elf_file: *Elf, atom_index: Atom.Index) ![]u8 { + const gpa = elf_file.base.allocator; + const atom_ptr = elf_file.atom(atom_index).?; + assert(atom_ptr.file_index == self.index); + const data = try self.shdrContents(atom_ptr.input_section_index); + const shdr = atom_ptr.inputShdr(elf_file); + if (shdr.sh_flags & elf.SHF_COMPRESSED != 0) { + const chdr = @as(*align(1) const elf.Elf64_Chdr, @ptrCast(data.ptr)).*; + switch (chdr.ch_type) { + .ZLIB => { + var stream = std.io.fixedBufferStream(data[@sizeOf(elf.Elf64_Chdr)..]); + var zlib_stream = std.compress.zlib.decompressStream(gpa, stream.reader()) catch + return error.InputOutput; + defer zlib_stream.deinit(); + const size = std.math.cast(usize, chdr.ch_size) orelse return error.Overflow; + const decomp = try gpa.alloc(u8, size); + const nread = zlib_stream.reader().readAll(decomp) catch return error.InputOutput; + if (nread != decomp.len) { + return error.InputOutput; + } + return decomp; + }, + else => @panic("TODO unhandled compression scheme"), + } + } else return gpa.dupe(u8, data); +} + fn getString(self: *Object, off: u32) [:0]const u8 { assert(off < self.strtab.len); return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.ptr + off)), 0); diff --git a/src/link/Elf/Symbol.zig b/src/link/Elf/Symbol.zig index 1be66c33e2..a0236f4c87 100644 --- a/src/link/Elf/Symbol.zig +++ b/src/link/Elf/Symbol.zig @@ -196,9 +196,10 @@ pub fn setOutputSym(symbol: Symbol, elf_file: *Elf, out: *elf.Elf64_Sym) void { // if (symbol.flags.is_canonical) break :blk symbol.address(.{}, elf_file); // break :blk 0; // } - // if (st_shndx == elf.SHN_ABS) break :blk symbol.value; - // const shdr = &elf_file.sections.items(.shdr)[st_shndx]; - // if (Elf.shdrIsTls(shdr)) break :blk symbol.value - elf_file.getTlsAddress(); + if (st_shndx == elf.SHN_ABS) break :blk symbol.value; + const shdr = &elf_file.shdrs.items[st_shndx]; + if (shdr.sh_flags & elf.SHF_TLS != 0 and file_ptr != .linker_defined) + break :blk symbol.value - elf_file.tlsAddress(); break :blk symbol.value; }; out.* = .{ @@ -327,10 +328,12 @@ pub const Flags = packed struct { has_dynamic: bool = false, /// Whether the symbol contains TLSGD indirection. - tlsgd: bool = false, + needs_tlsgd: bool = false, + has_tlsgd: bool = false, /// Whether the symbol contains GOTTP indirection. - gottp: bool = false, + needs_gottp: bool = false, + has_gottp: bool = false, /// Whether the symbol contains TLSDESC indirection. tlsdesc: bool = false, diff --git a/src/link/Elf/ZigModule.zig b/src/link/Elf/ZigModule.zig index 93908e5f1f..c79680dbf5 100644 --- a/src/link/Elf/ZigModule.zig +++ b/src/link/Elf/ZigModule.zig @@ -144,7 +144,14 @@ pub fn scanRelocs(self: *ZigModule, elf_file: *Elf, undefs: anytype) !void { for (self.atoms.keys()) |atom_index| { const atom = elf_file.atom(atom_index) orelse continue; if (!atom.flags.alive) continue; - try atom.scanRelocs(elf_file, undefs); + if (try atom.scanRelocsRequiresCode(elf_file)) { + // TODO ideally we don't have to fetch the code here. + // Perhaps it would make sense to save the code until flushModule where we + // would free all of generated code? + const code = try self.codeAlloc(elf_file, atom_index); + defer elf_file.base.allocator.free(code); + try atom.scanRelocs(elf_file, code, undefs); + } else try atom.scanRelocs(elf_file, null, undefs); } } @@ -253,6 +260,22 @@ pub fn asFile(self: *ZigModule) File { return .{ .zig_module = self }; } +/// Returns atom's code. +/// Caller owns the memory. +pub fn codeAlloc(self: ZigModule, elf_file: *Elf, atom_index: Atom.Index) ![]u8 { + const gpa = elf_file.base.allocator; + const atom = elf_file.atom(atom_index).?; + assert(atom.file_index == self.index); + const shdr = &elf_file.shdrs.items[atom.outputShndx().?]; + const file_offset = shdr.sh_offset + atom.value - shdr.sh_addr; + const size = std.math.cast(usize, atom.size) orelse return error.Overflow; + const code = try gpa.alloc(u8, size); + errdefer gpa.free(code); + const amt = try elf_file.base.file.?.preadAll(code, file_offset); + if (amt != code.len) return error.InputOutput; + return code; +} + pub fn fmtSymtab(self: *ZigModule, elf_file: *Elf) std.fmt.Formatter(formatSymtab) { return .{ .data = .{ .self = self, |
