diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/MachO.zig | 19 | ||||
| -rw-r--r-- | src/link/MachO/uuid.zig | 46 | ||||
| -rw-r--r-- | src/link/MachO/zld.zig | 168 |
3 files changed, 67 insertions, 166 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index 024fe1f8d9..18fb37babf 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -13,6 +13,7 @@ const mem = std.mem; const meta = std.meta; const aarch64 = @import("../arch/aarch64/bits.zig"); +const calcUuid = @import("MachO/uuid.zig").calcUuid; const codegen = @import("../codegen.zig"); const dead_strip = @import("MachO/dead_strip.zig"); const fat = @import("MachO/fat.zig"); @@ -756,11 +757,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No }); try load_commands.writeBuildVersionLC(&self.base.options, lc_writer); - if (self.cold_start) { - std.crypto.random.bytes(&self.uuid_cmd.uuid); - Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{}); - conformUuid(&self.uuid_cmd.uuid); - } + const uuid_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len); try lc_writer.writeStruct(self.uuid_cmd); try load_commands.writeLoadDylibLCs(self.dylibs.items, self.referenced_dylibs.keys(), lc_writer); @@ -769,10 +766,10 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No try lc_writer.writeStruct(self.codesig_cmd); } - try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); - const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); + try self.base.file.?.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); try self.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len)); + try self.writeUuid(comp, uuid_cmd_offset); if (codesig) |*csig| { try self.writeCodeSignature(comp, csig); // code signing always comes last @@ -3510,6 +3507,14 @@ fn writeDysymtab(self: *MachO, ctx: SymtabCtx) !void { self.dysymtab_cmd.nindirectsyms = nindirectsyms; } +fn writeUuid(self: *MachO, comp: *const Compilation, uuid_cmd_offset: u32) !void { + const seg = self.getLinkeditSegmentPtr(); + const file_size = seg.fileoff + seg.filesize; + try calcUuid(comp, self.base.file.?, file_size, &self.uuid_cmd.uuid); + const offset = uuid_cmd_offset + @sizeOf(macho.load_command); + try self.base.file.?.pwriteAll(&self.uuid_cmd.uuid, offset); +} + fn writeCodeSignaturePadding(self: *MachO, code_sig: *CodeSignature) !void { const seg = self.getLinkeditSegmentPtr(); // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig new file mode 100644 index 0000000000..486bf43b99 --- /dev/null +++ b/src/link/MachO/uuid.zig @@ -0,0 +1,46 @@ +const std = @import("std"); +const fs = std.fs; +const mem = std.mem; + +const Allocator = mem.Allocator; +const Compilation = @import("../../Compilation.zig"); +const Md5 = std.crypto.hash.Md5; +const Hasher = @import("hasher.zig").ParallelHasher; + +/// Somewhat random chunk size for MD5 hash calculation. +pub const chunk_size = 0x4000; + +/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce +/// the final digest. +/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD +/// and we will use it too as it seems accepted by Apple OSes. +/// TODO LLD also hashes the output filename to disambiguate between same builds with different +/// output files. Should we also do that? +pub fn calcUuid(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { + const total_hashes = mem.alignForward(u64, file_size, chunk_size) / chunk_size; + + const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes); + defer comp.gpa.free(hashes); + + var hasher = Hasher(Md5){}; + try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{ + .chunk_size = chunk_size, + .max_file_size = file_size, + }); + + const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length); + defer comp.gpa.free(final_buffer); + + for (hashes, 0..) |hash, i| { + mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash); + } + + Md5.hash(final_buffer, out, .{}); + conform(out); +} + +inline fn conform(out: *[Md5.digest_length]u8) void { + // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats + out[6] = (out[6] & 0x0F) | (3 << 4); + out[8] = (out[8] & 0x3F) | 0x80; +} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 7902d67d87..a938a1cf90 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -9,14 +9,15 @@ const math = std.math; const mem = std.mem; const aarch64 = @import("../../arch/aarch64/bits.zig"); +const calcUuid = @import("uuid.zig").calcUuid; const dead_strip = @import("dead_strip.zig"); const eh_frame = @import("eh_frame.zig"); const fat = @import("fat.zig"); const link = @import("../../link.zig"); const load_commands = @import("load_commands.zig"); +const stub_helpers = @import("stubs.zig"); const thunks = @import("thunks.zig"); const trace = @import("../../tracy.zig").trace; -const stub_helpers = @import("stubs.zig"); const Allocator = mem.Allocator; const Archive = @import("Archive.zig"); @@ -2575,150 +2576,12 @@ pub const Zld = struct { self.dysymtab_cmd.nindirectsyms = nindirectsyms; } - fn writeUuid(self: *Zld, comp: *const Compilation, args: struct { - linkedit_cmd_offset: u32, - symtab_cmd_offset: u32, - uuid_cmd_offset: u32, - codesig_cmd_offset: ?u32, - }) !void { - _ = comp; - switch (self.options.optimize_mode) { - .Debug => { - // In Debug we don't really care about reproducibility, so put in a random value - // and be done with it. - std.crypto.random.bytes(&self.uuid_cmd.uuid); - Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{}); - conformUuid(&self.uuid_cmd.uuid); - }, - else => { - // We set the max file size to the actual strtab buffer length to exclude any strtab padding. - const max_file_end = @intCast(u32, self.symtab_cmd.stroff + self.strtab.buffer.items.len); - - const FileSubsection = struct { - start: u32, - end: u32, - }; - - var subsections: [5]FileSubsection = undefined; - var count: usize = 0; - - // Exclude LINKEDIT segment command as it contains file size that includes stabs contribution - // and code signature. - subsections[count] = .{ - .start = 0, - .end = args.linkedit_cmd_offset, - }; - count += 1; - - // Exclude SYMTAB and DYSYMTAB commands for the same reason. - subsections[count] = .{ - .start = subsections[count - 1].end + @sizeOf(macho.segment_command_64), - .end = args.symtab_cmd_offset, - }; - count += 1; - - // Exclude CODE_SIGNATURE command (if present). - if (args.codesig_cmd_offset) |offset| { - subsections[count] = .{ - .start = subsections[count - 1].end + @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command), - .end = offset, - }; - count += 1; - } - - if (!self.options.strip) { - // Exclude region comprising all symbol stabs. - const nlocals = self.dysymtab_cmd.nlocalsym; - - const locals = try self.gpa.alloc(macho.nlist_64, nlocals); - defer self.gpa.free(locals); - - const locals_buf = @ptrCast([*]u8, locals.ptr)[0 .. @sizeOf(macho.nlist_64) * nlocals]; - const amt = try self.file.preadAll(locals_buf, self.symtab_cmd.symoff); - if (amt != locals_buf.len) return error.InputOutput; - - const istab: usize = for (locals, 0..) |local, i| { - if (local.stab()) break i; - } else locals.len; - const nstabs = locals.len - istab; - - if (nstabs == 0) { - subsections[count] = .{ - .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null) - @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command)) - else - @sizeOf(macho.linkedit_data_command), - .end = max_file_end, - }; - count += 1; - } else { - // Exclude a subsection of the strtab with names of the stabs. - // We do not care about anything succeeding strtab as it is the code signature data which is - // not part of the UUID calculation anyway. - const stab_stroff = locals[istab].n_strx; - - subsections[count] = .{ - .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null) - @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command)) - else - @sizeOf(macho.linkedit_data_command), - .end = @intCast(u32, self.symtab_cmd.symoff + istab * @sizeOf(macho.nlist_64)), - }; - count += 1; - - subsections[count] = .{ - .start = subsections[count - 1].end + @intCast(u32, nstabs * @sizeOf(macho.nlist_64)), - .end = self.symtab_cmd.stroff + stab_stroff, - }; - count += 1; - } - } else { - subsections[count] = .{ - .start = subsections[count - 1].end + if (args.codesig_cmd_offset == null) - @as(u32, @sizeOf(macho.symtab_command) + @sizeOf(macho.dysymtab_command)) - else - @sizeOf(macho.linkedit_data_command), - .end = max_file_end, - }; - count += 1; - } - - const chunk_size = 0x4000; - - var hasher = Md5.init(.{}); - var buffer: [chunk_size]u8 = undefined; - - for (subsections[0..count]) |cut| { - const size = cut.end - cut.start; - const num_chunks = mem.alignForward(usize, size, chunk_size) / chunk_size; - - var i: usize = 0; - while (i < num_chunks) : (i += 1) { - const fstart = cut.start + i * chunk_size; - const fsize = if (fstart + chunk_size > cut.end) - cut.end - fstart - else - chunk_size; - const amt = try self.file.preadAll(buffer[0..fsize], fstart); - if (amt != fsize) return error.InputOutput; - - hasher.update(buffer[0..fsize]); - } - } - - hasher.final(&self.uuid_cmd.uuid); - conformUuid(&self.uuid_cmd.uuid); - }, - } - - const in_file = args.uuid_cmd_offset + @sizeOf(macho.load_command); - try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file); - } - - inline fn conformUuid(out: *[Md5.digest_length]u8) void { - // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats - out[6] = (out[6] & 0x0F) | (3 << 4); - out[8] = (out[8] & 0x3F) | 0x80; + fn writeUuid(self: *Zld, comp: *const Compilation, uuid_cmd_offset: u32) !void { + const seg = self.getLinkeditSegmentPtr(); + const file_size = seg.fileoff + seg.filesize; + try calcUuid(comp, self.file, file_size, &self.uuid_cmd.uuid); + const offset = uuid_cmd_offset + @sizeOf(macho.load_command); + try self.file.pwriteAll(&self.uuid_cmd.uuid, offset); } fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void { @@ -4041,16 +3904,11 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr const lc_writer = lc_buffer.writer(); try zld.writeSegmentHeaders(lc_writer); - const linkedit_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len - @sizeOf(macho.segment_command_64)); - try lc_writer.writeStruct(zld.dyld_info_cmd); try lc_writer.writeStruct(zld.function_starts_cmd); try lc_writer.writeStruct(zld.data_in_code_cmd); - - const symtab_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len); try lc_writer.writeStruct(zld.symtab_cmd); try lc_writer.writeStruct(zld.dysymtab_cmd); - try load_commands.writeDylinkerLC(lc_writer); if (zld.options.output_mode == .Exe) { @@ -4088,22 +3946,14 @@ pub fn linkWithZld(macho_file: *MachO, comp: *Compilation, prog_node: *std.Progr try load_commands.writeLoadDylibLCs(zld.dylibs.items, zld.referenced_dylibs.keys(), lc_writer); - var codesig_cmd_offset: ?u32 = null; if (requires_codesig) { - codesig_cmd_offset = @sizeOf(macho.mach_header_64) + @intCast(u32, lc_buffer.items.len); try lc_writer.writeStruct(zld.codesig_cmd); } const ncmds = load_commands.calcNumOfLCs(lc_buffer.items); try zld.file.pwriteAll(lc_buffer.items, @sizeOf(macho.mach_header_64)); try zld.writeHeader(ncmds, @intCast(u32, lc_buffer.items.len)); - - try zld.writeUuid(comp, .{ - .linkedit_cmd_offset = linkedit_cmd_offset, - .symtab_cmd_offset = symtab_cmd_offset, - .uuid_cmd_offset = uuid_cmd_offset, - .codesig_cmd_offset = codesig_cmd_offset, - }); + try zld.writeUuid(comp, uuid_cmd_offset); if (codesig) |*csig| { try zld.writeCodeSignature(comp, csig); // code signing always comes last |
