aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJakub Konka <kubkon@jakubkonka.com>2022-12-15 15:10:35 +0100
committerJakub Konka <kubkon@jakubkonka.com>2022-12-16 18:05:58 +0100
commit660270b7a9c492dbd7c0b76a823bcba5a13da71c (patch)
tree959e27c42fcdeb5f71b0649e9c501f763cba972f /src
parent09dee744145fc423feb2b74ffa22cc1679a2749e (diff)
downloadzig-660270b7a9c492dbd7c0b76a823bcba5a13da71c.tar.gz
zig-660270b7a9c492dbd7c0b76a823bcba5a13da71c.zip
macho: calculate UUID excluding stabs and part of contributing strtab
Diffstat (limited to 'src')
-rw-r--r--src/link/MachO.zig8
-rw-r--r--src/link/MachO/hasher.zig10
-rw-r--r--src/link/MachO/uuid.zig69
-rw-r--r--src/link/MachO/zld.zig88
4 files changed, 102 insertions, 73 deletions
diff --git a/src/link/MachO.zig b/src/link/MachO.zig
index b06552bc2a..72a24b0ac6 100644
--- a/src/link/MachO.zig
+++ b/src/link/MachO.zig
@@ -39,6 +39,7 @@ const Object = @import("MachO/Object.zig");
const LibStub = @import("tapi.zig").LibStub;
const Liveness = @import("../Liveness.zig");
const LlvmObject = @import("../codegen/llvm.zig").Object;
+const Md5 = std.crypto.hash.Md5;
const Module = @import("../Module.zig");
const Relocation = @import("MachO/Relocation.zig");
const StringTable = @import("strtab.zig").StringTable;
@@ -598,6 +599,8 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
if (self.cold_start) {
std.crypto.random.bytes(&self.uuid_cmd.uuid);
+ Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{});
+ conformUuid(&self.uuid_cmd.uuid);
}
try lc_writer.writeStruct(self.uuid_cmd);
@@ -662,6 +665,11 @@ pub fn flushModule(self: *MachO, comp: *Compilation, prog_node: *std.Progress.No
self.cold_start = false;
}
+inline fn conformUuid(out: *[Md5.digest_length]u8) void {
+ // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
+ out[6] = (out[6] & 0x0F) | (3 << 4);
+ out[8] = (out[8] & 0x3F) | 0x80;
+}
pub fn resolveLibSystem(
arena: Allocator,
diff --git a/src/link/MachO/hasher.zig b/src/link/MachO/hasher.zig
index 29099ad2d9..bb0531286c 100644
--- a/src/link/MachO/hasher.zig
+++ b/src/link/MachO/hasher.zig
@@ -13,6 +13,7 @@ pub fn ParallelHasher(comptime Hasher: type) type {
return struct {
pub fn hash(self: @This(), gpa: Allocator, pool: *ThreadPool, file: fs.File, out: [][hash_size]u8, opts: struct {
chunk_size: u16 = 0x4000,
+ file_pos: u64 = 0,
max_file_size: ?u64 = null,
}) !void {
_ = self;
@@ -38,7 +39,14 @@ pub fn ParallelHasher(comptime Hasher: type) type {
const fstart = i * opts.chunk_size;
const fsize = if (fstart + opts.chunk_size > file_size) file_size - fstart else opts.chunk_size;
wg.start();
- try pool.spawn(worker, .{ file, fstart, buffer[fstart..][0..fsize], &out[i], &results[i], &wg });
+ try pool.spawn(worker, .{
+ file,
+ fstart + opts.file_pos,
+ buffer[fstart..][0..fsize],
+ &out[i],
+ &results[i],
+ &wg,
+ });
}
}
for (results) |result| _ = try result;
diff --git a/src/link/MachO/uuid.zig b/src/link/MachO/uuid.zig
deleted file mode 100644
index 987b156a4b..0000000000
--- a/src/link/MachO/uuid.zig
+++ /dev/null
@@ -1,69 +0,0 @@
-const std = @import("std");
-const fs = std.fs;
-const mem = std.mem;
-
-const Allocator = mem.Allocator;
-const Compilation = @import("../../Compilation.zig");
-const Md5 = std.crypto.hash.Md5;
-const Hasher = @import("hasher.zig").ParallelHasher;
-
-/// Somewhat random chunk size for MD5 hash calculation.
-pub const chunk_size = 0x4000;
-
-/// Calculates Md5 hash of the file contents.
-/// Hash is calculated in a streaming manner which may be slow.
-pub fn calcUuidStreaming(file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
- const total_num_chunks = mem.alignForward(file_size, chunk_size) / chunk_size;
-
- var hasher = Md5.init(.{});
- var buffer: [chunk_size]u8 = undefined;
-
- var i: usize = 0;
- while (i < total_num_chunks) : (i += 1) {
- const start = i * chunk_size;
- const size = if (start + chunk_size > file_size)
- file_size - start
- else
- chunk_size;
- const amt = try file.preadAll(&buffer, start);
- if (amt != size) return error.InputOutput;
-
- hasher.update(buffer[0..size]);
- }
-
- hasher.final(out);
- conform(out);
-}
-
-/// Calculates Md5 hash of each chunk in parallel and then hashes all Md5 hashes to produce
-/// the final digest.
-/// While this is NOT a correct MD5 hash of the contents, this methodology is used by LLVM/LLD
-/// and we will use it too as it seems accepted by Apple OSes.
-pub fn calcUuidParallel(comp: *const Compilation, file: fs.File, file_size: u64, out: *[Md5.digest_length]u8) !void {
- const total_hashes = mem.alignForward(file_size, chunk_size) / chunk_size;
-
- const hashes = try comp.gpa.alloc([Md5.digest_length]u8, total_hashes);
- defer comp.gpa.free(hashes);
-
- var hasher = Hasher(Md5){};
- try hasher.hash(comp.gpa, comp.thread_pool, file, hashes, .{
- .chunk_size = chunk_size,
- .max_file_size = file_size,
- });
-
- const final_buffer = try comp.gpa.alloc(u8, total_hashes * Md5.digest_length);
- defer comp.gpa.free(final_buffer);
-
- for (hashes) |hash, i| {
- mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);
- }
-
- Md5.hash(final_buffer, out, .{});
- conform(out);
-}
-
-inline fn conform(out: *[Md5.digest_length]u8) void {
- // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
- out[6] = (out[6] & 0x0F) | (3 << 4);
- out[8] = (out[8] & 0x3F) | 0x80;
-}
diff --git a/src/link/MachO/zld.zig b/src/link/MachO/zld.zig
index 5ca1afd98c..e8646f5dd9 100644
--- a/src/link/MachO/zld.zig
+++ b/src/link/MachO/zld.zig
@@ -16,7 +16,6 @@ const link = @import("../../link.zig");
const load_commands = @import("load_commands.zig");
const thunks = @import("thunks.zig");
const trace = @import("../../tracy.zig").trace;
-const uuid = @import("uuid.zig");
const Allocator = mem.Allocator;
const Archive = @import("Archive.zig");
@@ -26,7 +25,9 @@ const CodeSignature = @import("CodeSignature.zig");
const Compilation = @import("../../Compilation.zig");
const DwarfInfo = @import("DwarfInfo.zig");
const Dylib = @import("Dylib.zig");
+const Hasher = @import("hasher.zig").ParallelHasher;
const MachO = @import("../MachO.zig");
+const Md5 = std.crypto.hash.Md5;
const LibStub = @import("../tapi.zig").LibStub;
const Object = @import("Object.zig");
const StringTable = @import("../strtab.zig").StringTable;
@@ -2680,17 +2681,98 @@ pub const Zld = struct {
// In Debug we don't really care about reproducibility, so put in a random value
// and be done with it.
std.crypto.random.bytes(&self.uuid_cmd.uuid);
+ Md5.hash(&self.uuid_cmd.uuid, &self.uuid_cmd.uuid, .{});
+ conformUuid(&self.uuid_cmd.uuid);
},
else => {
const seg = self.getLinkeditSegmentPtr();
- const file_size = seg.fileoff + seg.filesize;
- try uuid.calcUuidParallel(comp, self.file, file_size, &self.uuid_cmd.uuid);
+ const max_file_size = @intCast(u32, seg.fileoff + seg.filesize);
+
+ var hashes = std.ArrayList([Md5.digest_length]u8).init(self.gpa);
+ defer hashes.deinit();
+
+ if (!self.options.strip) {
+ // First exclusion region will comprise all symbol stabs.
+ const nlocals = self.dysymtab_cmd.nlocalsym;
+
+ const locals_buf = try self.gpa.alloc(u8, nlocals * @sizeOf(macho.nlist_64));
+ defer self.gpa.free(locals_buf);
+
+ const amt = try self.file.preadAll(locals_buf, self.symtab_cmd.symoff);
+ if (amt != locals_buf.len) return error.InputOutput;
+ const locals = @ptrCast([*]macho.nlist_64, @alignCast(@alignOf(macho.nlist_64), locals_buf))[0..nlocals];
+
+ const istab: usize = for (locals) |local, i| {
+ if (local.stab()) break i;
+ } else locals.len;
+ const nstabs = locals.len - istab;
+
+ // Next, a subsection of the strtab.
+ // We do not care about anything succeeding strtab as it is the code signature data which is
+ // not part of the UUID calculation anyway.
+ const stab_stroff = locals[istab].n_strx;
+
+ const first_cut = FileSubsection{
+ .start = 0,
+ .end = @intCast(u32, self.symtab_cmd.symoff + istab * @sizeOf(macho.nlist_64)),
+ };
+ const second_cut = FileSubsection{
+ .start = first_cut.end + @intCast(u32, nstabs * @sizeOf(macho.nlist_64)),
+ .end = self.symtab_cmd.stroff + stab_stroff,
+ };
+
+ for (&[_]FileSubsection{ first_cut, second_cut }) |cut| {
+ try self.calcUuidHashes(comp, cut, &hashes);
+ }
+ } else {
+ try self.calcUuidHashes(comp, .{ .start = 0, .end = max_file_size }, &hashes);
+ }
+
+ const final_buffer = try self.gpa.alloc(u8, hashes.items.len * Md5.digest_length);
+ defer self.gpa.free(final_buffer);
+
+ for (hashes.items) |hash, i| {
+ mem.copy(u8, final_buffer[i * Md5.digest_length ..][0..Md5.digest_length], &hash);
+ }
+
+ Md5.hash(final_buffer, &self.uuid_cmd.uuid, .{});
+ conformUuid(&self.uuid_cmd.uuid);
},
}
+
const in_file = @sizeOf(macho.mach_header_64) + offset + @sizeOf(macho.load_command);
try self.file.pwriteAll(&self.uuid_cmd.uuid, in_file);
}
+ inline fn conformUuid(out: *[Md5.digest_length]u8) void {
+ // LC_UUID uuids should conform to RFC 4122 UUID version 4 & UUID version 5 formats
+ out[6] = (out[6] & 0x0F) | (3 << 4);
+ out[8] = (out[8] & 0x3F) | 0x80;
+ }
+
+ const FileSubsection = struct {
+ start: u32,
+ end: u32,
+ };
+
+ fn calcUuidHashes(
+ self: *Zld,
+ comp: *const Compilation,
+ cut: FileSubsection,
+ hashes: *std.ArrayList([Md5.digest_length]u8),
+ ) !void {
+ const chunk_size = 0x4000;
+ const total_hashes = mem.alignForward(cut.end - cut.start, chunk_size) / chunk_size;
+ try hashes.resize(hashes.items.len + total_hashes);
+
+ var hasher = Hasher(Md5){};
+ try hasher.hash(self.gpa, comp.thread_pool, self.file, hashes.items, .{
+ .chunk_size = chunk_size,
+ .file_pos = cut.start,
+ .max_file_size = cut.end - cut.start,
+ });
+ }
+
fn writeCodeSignaturePadding(self: *Zld, code_sig: *CodeSignature) !void {
const seg = self.getLinkeditSegmentPtr();
// Code signature data has to be 16-bytes aligned for Apple tools to recognize the file