diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2022-12-15 15:10:35 +0100 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2022-12-16 18:05:58 +0100 |
| commit | 660270b7a9c492dbd7c0b76a823bcba5a13da71c (patch) | |
| tree | 959e27c42fcdeb5f71b0649e9c501f763cba972f /src | |
| parent | 09dee744145fc423feb2b74ffa22cc1679a2749e (diff) | |
| download | zig-660270b7a9c492dbd7c0b76a823bcba5a13da71c.tar.gz zig-660270b7a9c492dbd7c0b76a823bcba5a13da71c.zip | |
macho: calculate UUID excluding stabs and part of contributing strtab
Diffstat (limited to 'src')
| -rw-r--r-- | src/link/MachO.zig | 8 | ||||
| -rw-r--r-- | src/link/MachO/hasher.zig | 10 | ||||
| -rw-r--r-- | src/link/MachO/uuid.zig | 69 | ||||
| -rw-r--r-- | src/link/MachO/zld.zig | 88 |
4 files changed, 102 insertions, 73 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig index b06552bc2a..72a24b0ac6 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -39,6 +39,7 @@ const Object = @import("MachO/Object.zig"); const LibStub = @import("tapi.zig").LibStub; const Liveness = @import("../Liveness.zig"); const LlvmObject = @import("../codegen/llvm.zig").Object; +const Md5 = std.crypto.hash.Md5; const Module = @import("../Module.zig"); const Relocation = @import("MachO/Relocation.zig"); const StringTable = @import("strtab.zig").StringTable; @@ -598,6 +599,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No if (self.cold_start) { std.crypto.random.bytes(&self.uuid_cmd.uuid); + Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{}); + conformUuid(&self.uuid_cmd.uuid); } try lc_writer.writeStruct(self.uuid_cmd); @@ -662,6 +665,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No self.cold_start = false; } +inline fn conformUuid(out: *[Md5.digest_length]u8) void { + // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats + out[6] = (out[6] & 0x0F) | (3 << 4); + out[8] = (out[8] & 0x3F) | 0x80; +} pub fn resolveLibSystem( arena: Allocator, diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig index 29099ad2d9..bb0531286c 100644 --- a/src/link/MachO/hasher.zig +++ b/src/link/MachO/hasher.zig @@ -13,6 +13,7 @@ pub fn ParallelHasher(comptime Hasher: type) type { return struct { pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct { chunk_size: u16 = 0x4000, + file_pos: u64 = 0, max_file_size: ?u64 = null, }) !void { _ = self; @@ -38,7 +39,14 @@ pub fn ParallelHasher(comptime Hasher: type) type { const fstart = i * opts.chunk_size; const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size; wg.start(); - try pool.spawn(worker, .{ file, fstart, buffer[fstart..][0..fsize], &out[i], &results[i], &wg }); + try pool.spawn(worker, .{ + file, + fstart + opts.file_pos, + buffer[fstart..][0..fsize], + &out[i], + &results[i], + &wg, + }); } } for (results) |result| _ = try result; diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig deleted file mode 100644 index 987b156a4b..0000000000 --- a/src/link/MachO/uuid.zig +++ /dev/null @@ -1,69 +0,0 @@ -const std = @import("std"); -const fs = std.fs; -const mem = std.mem; - -const Allocator = mem.Allocator; -const Compilation = @import("../../Compilation.zig"); -const Md5 = std.crypto.hash.Md5; -const Hasher = @import("hasher.zig").ParallelHasher; - -/// Somewhat random chunk size for MD5 hash calculation. -pub const chunk_size = 0x4000; - -/// Calculates Md5 hash of the file contents. -/// Hash is calculated in a streaming manner which may be slow. -pub fn calcUuidStreaming(file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { - const total_num_chunks = mem.alignForward(file_size, chunk_size) / chunk_size; - - var hasher = Md5.init(.{}); - var buffer: [chunk_size]u8 = undefined; - - var i: usize = 0; - while (i < total_num_chunks) : (i += 1) { - const start = i * chunk_size; - const size = if (start + chunk_size > file_size) - file_size - start - else - chunk_size; - const amt = try file.preadAll(&buffer, start); - if (amt != size) return error.InputOutput; - - hasher.update(buffer[0..size]); - } - - hasher.final(out); - conform(out); -} - -/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce -/// the final digest. -/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD -/// and we will use it too as it seems accepted by Apple OSes. -pub fn calcUuidParallel(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void { - const total_hashes = mem.alignForward(file_size, chunk_size) / chunk_size; - - const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes); - defer comp.gpa.free(hashes); - - var hasher = Hasher(Md5){}; - try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{ - .chunk_size = chunk_size, - .max_file_size = file_size, - }); - - const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length); - defer comp.gpa.free(final_buffer); - - for (hashes) |hash, i| { - mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash); - } - - Md5.hash(final_buffer, out, .{}); - conform(out); -} - -inline fn conform(out: *[Md5.digest_length]u8) void { - // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats - out[6] = (out[6] & 0x0F) | (3 << 4); - out[8] = (out[8] & 0x3F) | 0x80; -} diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig index 5ca1afd98c..e8646f5dd9 100644 --- a/src/link/MachO/zld.zig +++ b/src/link/MachO/zld.zig @@ -16,7 +16,6 @@ const link = @import("../../link.zig"); const load_commands = @import("load_commands.zig"); const thunks = @import("thunks.zig"); const trace = @import("../../tracy.zig").trace; -const uuid = @import("uuid.zig"); const Allocator = mem.Allocator; const Archive = @import("Archive.zig"); @@ -26,7 +25,9 @@ const CodeSignature = @import("CodeSignature.zig"); const Compilation = @import("../../Compilation.zig"); const DwarfInfo = @import("DwarfInfo.zig"); const Dylib = @import("Dylib.zig"); +const Hasher = @import("hasher.zig").ParallelHasher; const MachO = @import("../MachO.zig"); +const Md5 = std.crypto.hash.Md5; const LibStub = @import("../tapi.zig").LibStub; const Object = @import("Object.zig"); const StringTable = @import("../strtab.zig").StringTable; @@ -2680,17 +2681,98 @@ pub const Zld = struct { // In Debug we don't really care about reproducibility, so put in a random value // and be done with it. std.crypto.random.bytes(&self.uuid_cmd.uuid); + Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{}); + conformUuid(&self.uuid_cmd.uuid); }, else => { const seg = self.getLinkeditSegmentPtr(); - const file_size = seg.fileoff + seg.filesize; - try uuid.calcUuidParallel(comp, self.file, file_size, &self.uuid_cmd.uuid); + const max_file_size = @intCast(u32, seg.fileoff + seg.filesize); + + var hashes = std.ArrayList([Md5.digest_length]u8).init(self.gpa); + defer hashes.deinit(); + + if (!self.options.strip) { + // First exclusion region will comprise all symbol stabs. + const nlocals = self.dysymtab_cmd.nlocalsym; + + const locals_buf = try self.gpa.alloc(u8, nlocals * @sizeOf(macho.nlist_64)); + defer self.gpa.free(locals_buf); + + const amt = try self.file.preadAll(locals_buf, self.symtab_cmd.symoff); + if (amt != locals_buf.len) return error.InputOutput; + const locals = @ptrCast([*]macho.nlist_64, @alignCast(@alignOf(macho.nlist_64), locals_buf))[0..nlocals]; + + const istab: usize = for (locals) |local, i| { + if (local.stab()) break i; + } else locals.len; + const nstabs = locals.len - istab; + + // Next, a subsection of the strtab. + // We do not care about anything succeeding strtab as it is the code signature data which is + // not part of the UUID calculation anyway. + const stab_stroff = locals[istab].n_strx; + + const first_cut = FileSubsection{ + .start = 0, + .end = @intCast(u32, self.symtab_cmd.symoff + istab * @sizeOf(macho.nlist_64)), + }; + const second_cut = FileSubsection{ + .start = first_cut.end + @intCast(u32, nstabs * @sizeOf(macho.nlist_64)), + .end = self.symtab_cmd.stroff + stab_stroff, + }; + + for (&[_]FileSubsection{ first_cut, second_cut }) |cut| { + try self.calcUuidHashes(comp, cut, &hashes); + } + } else { + try self.calcUuidHashes(comp, .{ .start = 0, .end = max_file_size }, &hashes); + } + + const final_buffer = try self.gpa.alloc(u8, hashes.items.len * Md5.digest_length); + defer self.gpa.free(final_buffer); + + for (hashes.items) |hash, i| { + mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash); + } + + Md5.hash(final_buffer, &self.uuid_cmd.uuid, .{}); + conformUuid(&self.uuid_cmd.uuid); }, } + const in_file = @sizeOf(macho.mach_header_64) + offset + @sizeOf(macho.load_command); try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file); } + inline fn conformUuid(out: *[Md5.digest_length]u8) void { + // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats + out[6] = (out[6] & 0x0F) | (3 << 4); + out[8] = (out[8] & 0x3F) | 0x80; + } + + const FileSubsection = struct { + start: u32, + end: u32, + }; + + fn calcUuidHashes( + self: *Zld, + comp: *const Compilation, + cut: FileSubsection, + hashes: *std.ArrayList([Md5.digest_length]u8), + ) !void { + const chunk_size = 0x4000; + const total_hashes = mem.alignForward(cut.end - cut.start, chunk_size) / chunk_size; + try hashes.resize(hashes.items.len + total_hashes); + + var hasher = Hasher(Md5){}; + try hasher.hash(self.gpa, comp.thread_pool, self.file, hashes.items, .{ + .chunk_size = chunk_size, + .file_pos = cut.start, + .max_file_size = cut.end - cut.start, + }); + } + fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void { const seg = self.getLinkeditSegmentPtr(); // Code signature data has to be 16-bytes aligned for Apple tools to recognize the file |
