diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2024-05-19 22:42:35 +0200 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2024-05-23 12:04:17 +0200 |
| commit | 434e69482ed29de26ceea16dbc5679f32281c502 (patch) | |
| tree | df307c783b90dd598ae3a5afe24506b6fb59905d /src | |
| parent | 9be8a9000faead40b1aec4877506ff10b066659c (diff) | |
| download | zig-434e69482ed29de26ceea16dbc5679f32281c502.tar.gz zig-434e69482ed29de26ceea16dbc5679f32281c502.zip | |
link/macho: dedup literals in objects and internal object file
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/MachO.zig | 116 | ||||
| -rw-r--r-- | src/link/MachO/Atom.zig | 19 | ||||
| -rw-r--r-- | src/link/MachO/InternalObject.zig | 129 | ||||
| -rw-r--r-- | src/link/MachO/Object.zig | 257 | ||||
| -rw-r--r-- | src/link/MachO/Symbol.zig | 4 | ||||
| -rw-r--r-- | src/link/MachO/ZigObject.zig | 7 | ||||
| -rw-r--r-- | src/link/MachO/relocatable.zig | 21 |
7 files changed, 453 insertions, 100 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 4255b298bc..5dadf8a60c 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -539,6 +539,7 @@ pub fn flushModule(self: *MachO, arena: Allocator, prog_node: *std.Progress.Node try self.convertTentativeDefinitions(); try self.createObjcSections(); + try self.dedupLiterals(); try self.claimUnresolved(); if (self.base.gc_sections) { @@ -1491,6 +1492,22 @@ fn createObjcSections(self: *MachO) !void { const name = eatPrefix(sym.getName(self), "_objc_msgSend$").?; const selrefs_index = try internal.addObjcMsgsendSections(name, self); try sym.addExtra(.{ .objc_selrefs = selrefs_index }, self); + sym.flags.objc_stubs = true; + } +} + +pub fn dedupLiterals(self: *MachO) !void { + const gpa = self.base.comp.gpa; + var lp: LiteralPool = .{}; + defer lp.deinit(gpa); + if (self.getZigObject()) |zo| { + try zo.dedupLiterals(&lp, self); + } + for (self.objects.items) |index| { + try self.getFile(index).?.object.dedupLiterals(&lp, self); + } + if (self.getInternalObject()) |object| { + try object.dedupLiterals(&lp, self); } } @@ -1728,20 +1745,18 @@ fn initOutputSections(self: *MachO) !void { atom.out_n_sect = try Atom.initOutputSection(atom.getInputSection(self), self); } } - if (self.text_sect_index == null) { - self.text_sect_index = try self.addSection("__TEXT", "__text", .{ - .alignment = switch (self.getTarget().cpu.arch) { - .x86_64 => 0, - .aarch64 => 2, - else => unreachable, - }, - .flags = macho.S_REGULAR | - macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, - }); - } - if (self.data_sect_index == null) { - self.data_sect_index = try self.addSection("__DATA", "__data", .{}); - } + self.text_sect_index = self.getSectionByName("__TEXT", "__text") orelse + try self.addSection("__TEXT", "__text", .{ + .alignment = switch (self.getTarget().cpu.arch) { + .x86_64 => 0, + .aarch64 => 2, + else => unreachable, + }, + .flags = macho.S_REGULAR | + macho.S_ATTR_PURE_INSTRUCTIONS | macho.S_ATTR_SOME_INSTRUCTIONS, + }); + self.data_sect_index = self.getSectionByName("__DATA", "__data") orelse + try self.addSection("__DATA", "__data", .{}); } fn initSyntheticSections(self: *MachO) !void { @@ -4387,6 +4402,78 @@ const Section = struct { last_atom_index: Atom.Index = 0, }; +pub const LiteralPool = struct { + table: std.AutoArrayHashMapUnmanaged(void, void) = .{}, + keys: std.ArrayListUnmanaged(Key) = .{}, + values: std.ArrayListUnmanaged(Atom.Index) = .{}, + data: std.ArrayListUnmanaged(u8) = .{}, + + pub fn deinit(lp: *LiteralPool, allocator: Allocator) void { + lp.table.deinit(allocator); + lp.keys.deinit(allocator); + lp.values.deinit(allocator); + lp.data.deinit(allocator); + } + + const InsertResult = struct { + found_existing: bool, + atom: *Atom.Index, + }; + + pub fn insert(lp: *LiteralPool, allocator: Allocator, @"type": u8, string: []const u8) !InsertResult { + const size: u32 = @intCast(string.len); + try lp.data.ensureUnusedCapacity(allocator, size); + const off: u32 = @intCast(lp.data.items.len); + lp.data.appendSliceAssumeCapacity(string); + const adapter = Adapter{ .lp = lp }; + const key = Key{ .off = off, .size = size, .seed = @"type" }; + const gop = try lp.table.getOrPutAdapted(allocator, key, adapter); + if (!gop.found_existing) { + try lp.keys.append(allocator, key); + _ = try lp.values.addOne(allocator); + } + return .{ + .found_existing = gop.found_existing, + .atom = &lp.values.items[gop.index], + }; + } + + const Key = struct { + off: u32, + size: u32, + seed: u8, + + fn getData(key: Key, lp: *const LiteralPool) []const u8 { + return lp.data.items[key.off..][0..key.size]; + } + + fn eql(key: Key, other: Key, lp: *const LiteralPool) bool { + const key_data = key.getData(lp); + const other_data = other.getData(lp); + return mem.eql(u8, key_data, other_data); + } + + fn hash(key: Key, lp: *const LiteralPool) u32 { + const data = key.getData(lp); + return @truncate(Hash.hash(key.seed, data)); + } + }; + + const Adapter = struct { + lp: *const LiteralPool, + + pub fn eql(ctx: @This(), key: Key, b_void: void, b_map_index: usize) bool { + _ = b_void; + const other = ctx.lp.keys.items[b_map_index]; + return key.eql(other, ctx.lp); + } + + pub fn hash(ctx: @This(), key: Key) u32 { + return key.hash(ctx.lp); + } + }; +}; + const HotUpdateState = struct { mach_task: ?std.c.MachTask = null, }; @@ -4738,6 +4825,7 @@ const Dylib = @import("MachO/Dylib.zig"); const ExportTrieSection = synthetic.ExportTrieSection; const File = @import("MachO/file.zig").File; const GotSection = synthetic.GotSection; +const Hash = std.hash.Wyhash; const Indsymtab = synthetic.Indsymtab; const InternalObject = @import("MachO/InternalObject.zig"); const ObjcStubsSection = synthetic.ObjcStubsSection; diff --git a/src/link/MachO/Atom.zig b/src/link/MachO/Atom.zig index b39157e588..e37412dd53 100644 --- a/src/link/MachO/Atom.zig +++ b/src/link/MachO/Atom.zig @@ -143,6 +143,16 @@ pub inline fn setExtra(atom: Atom, extra: Extra, macho_file: *MachO) void { } pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { + if (macho_file.base.isRelocatable()) { + const osec = macho_file.getSectionByName(sect.segName(), sect.sectName()) orelse + try macho_file.addSection( + sect.segName(), + sect.sectName(), + .{ .flags = sect.flags }, + ); + return osec; + } + const segname, const sectname, const flags = blk: { if (sect.isCode()) break :blk .{ "__TEXT", @@ -200,18 +210,11 @@ pub fn initOutputSection(sect: macho.section_64, macho_file: *MachO) !u8 { else => break :blk .{ sect.segName(), sect.sectName(), sect.flags }, } }; - const osec = macho_file.getSectionByName(segname, sectname) orelse try macho_file.addSection( + return macho_file.getSectionByName(segname, sectname) orelse try macho_file.addSection( segname, sectname, .{ .flags = flags }, ); - if (mem.eql(u8, segname, "__TEXT") and mem.eql(u8, sectname, "__text")) { - macho_file.text_sect_index = osec; - } - if (mem.eql(u8, segname, "__DATA") and mem.eql(u8, sectname, "__data")) { - macho_file.data_sect_index = osec; - } - return osec; } /// Returns how much room there is to grow in virtual address space. diff --git a/src/link/MachO/InternalObject.zig b/src/link/MachO/InternalObject.zig index 9f42eca114..f25508f037 100644 --- a/src/link/MachO/InternalObject.zig +++ b/src/link/MachO/InternalObject.zig @@ -3,7 +3,6 @@ index: File.Index, sections: std.MultiArrayList(Section) = .{}, atoms: std.ArrayListUnmanaged(Atom.Index) = .{}, symbols: std.ArrayListUnmanaged(Symbol.Index) = .{}, -strtab: std.ArrayListUnmanaged(u8) = .{}, objc_methnames: std.ArrayListUnmanaged(u8) = .{}, objc_selrefs: [@sizeOf(u64)]u8 = [_]u8{0} ** @sizeOf(u64), @@ -18,7 +17,6 @@ pub fn deinit(self: *InternalObject, allocator: Allocator) void { self.sections.deinit(allocator); self.atoms.deinit(allocator); self.symbols.deinit(allocator); - self.strtab.deinit(allocator); self.objc_methnames.deinit(allocator); } @@ -38,9 +36,9 @@ pub fn addSymbol(self: *InternalObject, name: [:0]const u8, macho_file: *MachO) } /// Creates a fake input sections __TEXT,__objc_methname and __DATA,__objc_selrefs. -pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !u32 { +pub fn addObjcMsgsendSections(self: *InternalObject, sym_name: []const u8, macho_file: *MachO) !Atom.Index { const methname_atom_index = try self.addObjcMethnameSection(sym_name, macho_file); - return try self.addObjcSelrefsSection(sym_name, methname_atom_index, macho_file); + return try self.addObjcSelrefsSection(methname_atom_index, macho_file); } fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_file: *MachO) !Atom.Index { @@ -48,11 +46,8 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil const atom_index = try macho_file.addAtom(); try self.atoms.append(gpa, atom_index); - const name = try std.fmt.allocPrintZ(gpa, "__TEXT$__objc_methname${s}", .{methname}); - defer gpa.free(name); const atom = macho_file.getAtom(atom_index).?; atom.atom_index = atom_index; - atom.name = try self.addString(gpa, name); atom.file = self.index; atom.size = methname.len + 1; atom.alignment = .@"1"; @@ -72,21 +67,13 @@ fn addObjcMethnameSection(self: *InternalObject, methname: []const u8, macho_fil return atom_index; } -fn addObjcSelrefsSection( - self: *InternalObject, - methname: []const u8, - methname_atom_index: Atom.Index, - macho_file: *MachO, -) !Atom.Index { +fn addObjcSelrefsSection(self: *InternalObject, methname_atom_index: Atom.Index, macho_file: *MachO) !Atom.Index { const gpa = macho_file.base.comp.gpa; const atom_index = try macho_file.addAtom(); try self.atoms.append(gpa, atom_index); - const name = try std.fmt.allocPrintZ(gpa, "__DATA$__objc_selrefs${s}", .{methname}); - defer gpa.free(name); const atom = macho_file.getAtom(atom_index).?; atom.atom_index = atom_index; - atom.name = try self.addString(gpa, name); atom.file = self.index; atom.size = @sizeOf(u64); atom.alignment = .@"8"; @@ -122,6 +109,83 @@ fn addObjcSelrefsSection( return atom_index; } +pub fn dedupLiterals(self: InternalObject, lp: *MachO.LiteralPool, macho_file: *MachO) !void { + const gpa = macho_file.base.comp.gpa; + + var killed_atoms = std.AutoHashMap(Atom.Index, Atom.Index).init(gpa); + defer killed_atoms.deinit(); + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + + const slice = self.sections.slice(); + for (slice.items(.header), self.atoms.items, 0..) |header, atom_index, n_sect| { + if (Object.isCstringLiteral(header) or Object.isFixedSizeLiteral(header)) { + const data = try self.getSectionData(@intCast(n_sect)); + const atom = macho_file.getAtom(atom_index).?; + const res = try lp.insert(gpa, header.type(), data); + if (!res.found_existing) { + res.atom.* = atom_index; + continue; + } + atom.flags.alive = false; + try killed_atoms.putNoClobber(atom_index, res.atom.*); + } else if (Object.isPtrLiteral(header)) { + const atom = macho_file.getAtom(atom_index).?; + const relocs = atom.getRelocs(macho_file); + assert(relocs.len == 1); + const rel = relocs[0]; + assert(rel.tag == .local); + const target = macho_file.getAtom(rel.target).?; + const addend = std.math.cast(u32, rel.addend) orelse return error.Overflow; + try buffer.ensureUnusedCapacity(target.size); + buffer.resize(target.size) catch unreachable; + try target.getData(macho_file, buffer.items); + const res = try lp.insert(gpa, header.type(), buffer.items[addend..]); + buffer.clearRetainingCapacity(); + if (!res.found_existing) { + res.atom.* = atom_index; + continue; + } + atom.flags.alive = false; + try killed_atoms.putNoClobber(atom_index, res.atom.*); + } + } + + for (self.atoms.items) |atom_index| { + if (killed_atoms.get(atom_index)) |_| continue; + const atom = macho_file.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + if (!atom.flags.relocs) continue; + + const relocs = blk: { + const extra = atom.getExtra(macho_file).?; + const relocs = slice.items(.relocs)[atom.n_sect].items; + break :blk relocs[extra.rel_index..][0..extra.rel_count]; + }; + for (relocs) |*rel| switch (rel.tag) { + .local => if (killed_atoms.get(rel.target)) |new_target| { + rel.target = new_target; + }, + .@"extern" => { + const target = rel.getTargetSymbol(macho_file); + if (killed_atoms.get(target.atom)) |new_atom| { + target.atom = new_atom; + } + }, + }; + } + + for (self.symbols.items) |sym_index| { + const sym = macho_file.getSymbol(sym_index); + if (!sym.flags.objc_stubs) continue; + const extra = sym.getExtra(macho_file).?; + if (killed_atoms.get(extra.objc_selrefs)) |new_atom| { + try sym.addExtra(.{ .objc_selrefs = new_atom }, macho_file); + } + } +} + pub fn calcSymtabSize(self: *InternalObject, macho_file: *MachO) !void { for (self.symbols.items) |sym_index| { const sym = macho_file.getSymbol(sym_index); @@ -167,18 +231,23 @@ fn addSection(self: *InternalObject, allocator: Allocator, segname: []const u8, return n_sect; } -pub fn getAtomData(self: *const InternalObject, atom: Atom, buffer: []u8) !void { - assert(buffer.len == atom.size); +fn getSectionData(self: *const InternalObject, index: u32) error{Overflow}![]const u8 { const slice = self.sections.slice(); - const sect = slice.items(.header)[atom.n_sect]; - const extra = slice.items(.extra)[atom.n_sect]; - const data = if (extra.is_objc_methname) blk: { + assert(index < slice.items(.header).len); + const sect = slice.items(.header)[index]; + const extra = slice.items(.extra)[index]; + if (extra.is_objc_methname) { const size = std.math.cast(usize, sect.size) orelse return error.Overflow; - break :blk self.objc_methnames.items[sect.offset..][0..size]; + return self.objc_methnames.items[sect.offset..][0..size]; } else if (extra.is_objc_selref) - &self.objc_selrefs + return &self.objc_selrefs else @panic("ref to non-existent section"); +} + +pub fn getAtomData(self: *const InternalObject, atom: Atom, buffer: []u8) error{Overflow}!void { + assert(buffer.len == atom.size); + const data = try self.getSectionData(atom.n_sect); const off = std.math.cast(usize, atom.off) orelse return error.Overflow; const size = std.math.cast(usize, atom.size) orelse return error.Overflow; @memcpy(buffer, data[off..][0..size]); @@ -191,17 +260,11 @@ pub fn getAtomRelocs(self: *const InternalObject, atom: Atom, macho_file: *MachO return relocs.items[extra.rel_index..][0..extra.rel_count]; } -fn addString(self: *InternalObject, allocator: Allocator, name: [:0]const u8) error{OutOfMemory}!u32 { - const off: u32 = @intCast(self.strtab.items.len); - try self.strtab.ensureUnusedCapacity(allocator, name.len + 1); - self.strtab.appendSliceAssumeCapacity(name); - self.strtab.appendAssumeCapacity(0); - return off; -} - pub fn getString(self: InternalObject, off: u32) [:0]const u8 { - assert(off < self.strtab.items.len); - return mem.sliceTo(@as([*:0]const u8, @ptrCast(self.strtab.items.ptr + off)), 0); + _ = self; + _ = off; + // We don't have any local strings for synthetic atoms. + return ""; } pub fn asFile(self: *InternalObject) File { diff --git a/src/link/MachO/Object.zig b/src/link/MachO/Object.zig index 62b27b2bb7..d32ace7a3e 100644 --- a/src/link/MachO/Object.zig +++ b/src/link/MachO/Object.zig @@ -208,7 +208,9 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { try self.initSections(nlists.items, macho_file); } - try self.initLiteralSections(macho_file); + try self.initCstringLiterals(macho_file); + try self.initFixedSizeLiterals(macho_file); + try self.initPointerLiterals(macho_file); try self.linkNlistToAtom(macho_file); try self.sortAtoms(macho_file); @@ -263,25 +265,33 @@ pub fn parse(self: *Object, macho_file: *MachO) !void { } } -inline fn isLiteral(sect: macho.section_64) bool { +pub fn isCstringLiteral(sect: macho.section_64) bool { + return sect.type() == macho.S_CSTRING_LITERALS; +} + +pub fn isFixedSizeLiteral(sect: macho.section_64) bool { return switch (sect.type()) { - macho.S_CSTRING_LITERALS, macho.S_4BYTE_LITERALS, macho.S_8BYTE_LITERALS, macho.S_16BYTE_LITERALS, - macho.S_LITERAL_POINTERS, => true, else => false, }; } +pub fn isPtrLiteral(sect: macho.section_64) bool { + return sect.type() == macho.S_LITERAL_POINTERS; +} + fn initSubsections(self: *Object, nlists: anytype, macho_file: *MachO) !void { const tracy = trace(@src()); defer tracy.end(); const gpa = macho_file.base.comp.gpa; const slice = self.sections.slice(); for (slice.items(.header), slice.items(.subsections), 0..) |sect, *subsections, n_sect| { - if (isLiteral(sect)) continue; + if (isCstringLiteral(sect)) continue; + if (isFixedSizeLiteral(sect)) continue; + if (isPtrLiteral(sect)) continue; const nlist_start = for (nlists, 0..) |nlist, i| { if (nlist.nlist.n_sect - 1 == n_sect) break i; @@ -352,7 +362,9 @@ fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void { try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len); for (slice.items(.header), 0..) |sect, n_sect| { - if (isLiteral(sect)) continue; + if (isCstringLiteral(sect)) continue; + if (isFixedSizeLiteral(sect)) continue; + if (isPtrLiteral(sect)) continue; const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() }); defer gpa.free(name); @@ -393,6 +405,206 @@ fn initSections(self: *Object, nlists: anytype, macho_file: *MachO) !void { } } +fn initCstringLiterals(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.comp.gpa; + const slice = self.sections.slice(); + + for (slice.items(.header), 0..) |sect, n_sect| { + if (!isCstringLiteral(sect)) continue; + + const data = try self.getSectionData(@intCast(n_sect), macho_file); + defer gpa.free(data); + + var start: u32 = 0; + while (start < data.len) { + var end = start; + while (end < data.len - 1 and data[end] != 0) : (end += 1) {} + if (data[end] != 0) { + try macho_file.reportParseError2( + self.index, + "string not null terminated in '{s},{s}'", + .{ sect.segName(), sect.sectName() }, + ); + return error.MalformedObject; + } + end += 1; + + const atom_index = try self.addAtom(.{ + .name = 0, + .n_sect = @intCast(n_sect), + .off = start, + .size = end - start, + .alignment = sect.@"align", + }, macho_file); + try slice.items(.subsections)[n_sect].append(gpa, .{ + .atom = atom_index, + .off = start, + }); + + start = end; + } + } +} + +fn initFixedSizeLiterals(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.comp.gpa; + const slice = self.sections.slice(); + + for (slice.items(.header), 0..) |sect, n_sect| { + if (!isFixedSizeLiteral(sect)) continue; + const rec_size: u8 = switch (sect.type()) { + macho.S_4BYTE_LITERALS => 4, + macho.S_8BYTE_LITERALS => 8, + macho.S_16BYTE_LITERALS => 16, + else => unreachable, + }; + if (sect.size % rec_size != 0) { + try macho_file.reportParseError2( + self.index, + "size not multiple of record size in '{s},{s}'", + .{ sect.segName(), sect.sectName() }, + ); + return error.MalformedObject; + } + var pos: u32 = 0; + while (pos < sect.size) : (pos += rec_size) { + const atom_index = try self.addAtom(.{ + .name = 0, + .n_sect = @intCast(n_sect), + .off = pos, + .size = rec_size, + .alignment = sect.@"align", + }, macho_file); + try slice.items(.subsections)[n_sect].append(gpa, .{ + .atom = atom_index, + .off = pos, + }); + } + } +} + +fn initPointerLiterals(self: *Object, macho_file: *MachO) !void { + const tracy = trace(@src()); + defer tracy.end(); + + const gpa = macho_file.base.comp.gpa; + const slice = self.sections.slice(); + + for (slice.items(.header), 0..) |sect, n_sect| { + if (!isPtrLiteral(sect)) continue; + + const rec_size: u8 = 8; + if (sect.size % rec_size != 0) { + try macho_file.reportParseError2( + self.index, + "size not multiple of record size in '{s},{s}'", + .{ sect.segName(), sect.sectName() }, + ); + return error.MalformedObject; + } + const num_ptrs = @divExact(sect.size, rec_size); + + for (0..num_ptrs) |i| { + const pos: u32 = @as(u32, @intCast(i)) * rec_size; + const atom_index = try self.addAtom(.{ + .name = 0, + .n_sect = @intCast(n_sect), + .off = pos, + .size = rec_size, + .alignment = sect.@"align", + }, macho_file); + try slice.items(.subsections)[n_sect].append(gpa, .{ + .atom = atom_index, + .off = pos, + }); + } + } +} + +pub fn dedupLiterals(self: Object, lp: *MachO.LiteralPool, macho_file: *MachO) !void { + const gpa = macho_file.base.comp.gpa; + + var killed_atoms = std.AutoHashMap(Atom.Index, Atom.Index).init(gpa); + defer killed_atoms.deinit(); + + var buffer = std.ArrayList(u8).init(gpa); + defer buffer.deinit(); + + const slice = self.sections.slice(); + for (slice.items(.header), slice.items(.subsections), 0..) |header, subs, n_sect| { + if (isCstringLiteral(header) or isFixedSizeLiteral(header)) { + const data = try self.getSectionData(@intCast(n_sect), macho_file); + defer gpa.free(data); + + for (subs.items) |sub| { + const atom = macho_file.getAtom(sub.atom).?; + const atom_data = data[atom.off..][0..atom.size]; + const res = try lp.insert(gpa, header.type(), atom_data); + if (!res.found_existing) { + res.atom.* = sub.atom; + continue; + } + atom.flags.alive = false; + try killed_atoms.putNoClobber(sub.atom, res.atom.*); + } + } else if (isPtrLiteral(header)) { + for (subs.items) |sub| { + const atom = macho_file.getAtom(sub.atom).?; + const relocs = atom.getRelocs(macho_file); + assert(relocs.len == 1); + const rel = relocs[0]; + const target = switch (rel.tag) { + .local => rel.target, + .@"extern" => rel.getTargetSymbol(macho_file).atom, + }; + const addend = math.cast(u32, rel.addend) orelse return error.Overflow; + const target_atom = macho_file.getAtom(target).?; + try buffer.ensureUnusedCapacity(target_atom.size); + buffer.resize(target_atom.size) catch unreachable; + try target_atom.getData(macho_file, buffer.items); + const res = try lp.insert(gpa, header.type(), buffer.items[addend..]); + buffer.clearRetainingCapacity(); + if (!res.found_existing) { + res.atom.* = sub.atom; + continue; + } + atom.flags.alive = false; + try killed_atoms.putNoClobber(sub.atom, res.atom.*); + } + } + } + + for (self.atoms.items) |atom_index| { + if (killed_atoms.get(atom_index)) |_| continue; + const atom = macho_file.getAtom(atom_index) orelse continue; + if (!atom.flags.alive) continue; + if (!atom.flags.relocs) continue; + + const relocs = blk: { + const extra = atom.getExtra(macho_file).?; + const relocs = slice.items(.relocs)[atom.n_sect].items; + break :blk relocs[extra.rel_index..][0..extra.rel_count]; + }; + for (relocs) |*rel| switch (rel.tag) { + .local => if (killed_atoms.get(rel.target)) |new_target| { + rel.target = new_target; + }, + .@"extern" => { + const target = rel.getTargetSymbol(macho_file); + if (killed_atoms.get(target.atom)) |new_atom| { + target.atom = new_atom; + } + }, + }; + } +} + const AddAtomArgs = struct { name: u32, n_sect: u8, @@ -416,34 +628,6 @@ fn addAtom(self: *Object, args: AddAtomArgs, macho_file: *MachO) !Atom.Index { return atom_index; } -fn initLiteralSections(self: *Object, macho_file: *MachO) !void { - const tracy = trace(@src()); - defer tracy.end(); - // TODO here we should split into equal-sized records, hash the contents, and then - // deduplicate - ICF. - // For now, we simply cover each literal section with one large atom. - const gpa = macho_file.base.comp.gpa; - const slice = self.sections.slice(); - - try self.atoms.ensureUnusedCapacity(gpa, self.sections.items(.header).len); - - for (slice.items(.header), 0..) |sect, n_sect| { - if (!isLiteral(sect)) continue; - - const name = try std.fmt.allocPrintZ(gpa, "{s}${s}", .{ sect.segName(), sect.sectName() }); - defer gpa.free(name); - - const atom_index = try self.addAtom(.{ - .name = try self.addString(gpa, name), - .n_sect = @intCast(n_sect), - .off = 0, - .size = sect.size, - .alignment = sect.@"align", - }, macho_file); - try slice.items(.subsections)[n_sect].append(gpa, .{ .atom = atom_index, .off = 0 }); - } -} - pub fn findAtom(self: Object, addr: u64) ?Atom.Index { const tracy = trace(@src()); defer tracy.end(); @@ -1369,7 +1553,10 @@ pub fn calcSymtabSize(self: *Object, macho_file: *MachO) !void { const name = sym.getName(macho_file); // TODO in -r mode, we actually want to merge symbol names and emit only one // work it out when emitting relocs - if (name.len > 0 and (name[0] == 'L' or name[0] == 'l') and !macho_file.base.isObject()) continue; + if (name.len > 0 and + (name[0] == 'L' or name[0] == 'l' or + mem.startsWith(u8, name, "_OBJC_SELECTOR_REFERENCES_")) and + !macho_file.base.isObject()) continue; sym.flags.output_symtab = true; if (sym.isLocal()) { try sym.addExtra(.{ .symtab = self.output_symtab_ctx.nlocals }, macho_file); diff --git a/src/link/MachO/Symbol.zig b/src/link/MachO/Symbol.zig index c85918457b..e413fe818d 100644 --- a/src/link/MachO/Symbol.zig +++ b/src/link/MachO/Symbol.zig @@ -14,8 +14,8 @@ file: File.Index = 0, /// Use `getAtom` to get the pointer to the atom. atom: Atom.Index = 0, -/// Assigned output section index for this atom. -out_n_sect: u16 = 0, +/// Assigned output section index for this symbol. +out_n_sect: u8 = 0, /// Index of the source nlist this symbol references. /// Use `getNlist` to pull the nlist from the relevant file. diff --git a/src/link/MachO/ZigObject.zig b/src/link/MachO/ZigObject.zig index 1e0297f5ec..338840e521 100644 --- a/src/link/MachO/ZigObject.zig +++ b/src/link/MachO/ZigObject.zig @@ -314,6 +314,13 @@ pub fn checkDuplicates(self: *ZigObject, dupes: anytype, macho_file: *MachO) !vo } } +pub fn dedupLiterals(self: *ZigObject, lp: *MachO.LiteralPool, macho_file: *MachO) !void { + _ = self; + _ = lp; + _ = macho_file; + // TODO +} + /// This is just a temporary helper function that allows us to re-read what we wrote to file into a buffer. /// We need this so that we can write to an archive. /// TODO implement writing ZigObject data directly to a buffer instead. diff --git a/src/link/MachO/relocatable.zig b/src/link/MachO/relocatable.zig index 711aa01fb4..4f7a7cfa2d 100644 --- a/src/link/MachO/relocatable.zig +++ b/src/link/MachO/relocatable.zig @@ -46,6 +46,7 @@ pub fn flushObject(macho_file: *MachO, comp: *Compilation, module_obj_path: ?[]c try macho_file.addUndefinedGlobals(); try macho_file.resolveSymbols(); + try macho_file.dedupLiterals(); markExports(macho_file); claimUnresolved(macho_file); try initOutputSections(macho_file); @@ -542,6 +543,9 @@ fn writeAtoms(macho_file: *MachO) !void { const cpu_arch = macho_file.getTarget().cpu.arch; const slice = macho_file.sections.slice(); + var relocs = std.ArrayList(macho.relocation_info).init(gpa); + defer relocs.deinit(); + for (slice.items(.header), slice.items(.atoms), 0..) |header, atoms, i| { if (atoms.items.len == 0) continue; if (header.isZerofill()) continue; @@ -553,8 +557,7 @@ fn writeAtoms(macho_file: *MachO) !void { const padding_byte: u8 = if (header.isCode() and cpu_arch == .x86_64) 0xcc else 0; @memset(code, padding_byte); - var relocs = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc); - defer relocs.deinit(); + try relocs.ensureTotalCapacity(header.nreloc); for (atoms.items) |atom_index| { const atom = macho_file.getAtom(atom_index).?; @@ -572,22 +575,24 @@ fn writeAtoms(macho_file: *MachO) !void { // TODO scattered writes? try macho_file.base.file.?.pwriteAll(code, header.offset); try macho_file.base.file.?.pwriteAll(mem.sliceAsBytes(relocs.items), header.reloff); + + relocs.clearRetainingCapacity(); } if (macho_file.getZigObject()) |zo| { // TODO: this is ugly; perhaps we should aggregrate before? - var relocs = std.AutoArrayHashMap(u8, std.ArrayList(macho.relocation_info)).init(gpa); + var zo_relocs = std.AutoArrayHashMap(u8, std.ArrayList(macho.relocation_info)).init(gpa); defer { - for (relocs.values()) |*list| { + for (zo_relocs.values()) |*list| { list.deinit(); } - relocs.deinit(); + zo_relocs.deinit(); } for (macho_file.sections.items(.header), 0..) |header, n_sect| { if (header.isZerofill()) continue; if (!macho_file.isZigSection(@intCast(n_sect)) and !macho_file.isDebugSection(@intCast(n_sect))) continue; - const gop = try relocs.getOrPut(@intCast(n_sect)); + const gop = try zo_relocs.getOrPut(@intCast(n_sect)); if (gop.found_existing) continue; gop.value_ptr.* = try std.ArrayList(macho.relocation_info).initCapacity(gpa, header.nreloc); } @@ -618,12 +623,12 @@ fn writeAtoms(macho_file: *MachO) !void { }, }; const file_offset = header.offset + atom.value; - const rels = relocs.getPtr(atom.out_n_sect).?; + const rels = zo_relocs.getPtr(atom.out_n_sect).?; try atom.writeRelocs(macho_file, code, rels); try macho_file.base.file.?.pwriteAll(code, file_offset); } - for (relocs.keys(), relocs.values()) |sect_id, rels| { + for (zo_relocs.keys(), zo_relocs.values()) |sect_id, rels| { const header = macho_file.sections.items(.header)[sect_id]; assert(rels.items.len == header.nreloc); mem.sort(macho.relocation_info, rels.items, {}, sortReloc); |
