diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2023-01-10 00:38:36 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2023-01-11 15:39:49 -0800 |
| commit | e0401498e928a539677f0f9eed843a0453bc8c33 (patch) | |
| tree | 3ce6ead2fddf85de3df29723929c497b91a56bc4 /src/Package.zig | |
| parent | d4e829d0a0f260e4dacbe293cd282af8ba2e4a30 (diff) | |
| download | zig-e0401498e928a539677f0f9eed843a0453bc8c33.tar.gz zig-e0401498e928a539677f0f9eed843a0453bc8c33.zip | |
package manager: compute hash, move tmp dir into global cache
Diffstat (limited to 'src/Package.zig')
| -rw-r--r-- | src/Package.zig | 244 |
1 files changed, 195 insertions, 49 deletions
diff --git a/src/Package.zig b/src/Package.zig index 20df256459..321743412a 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -10,6 +10,7 @@ const Hash = std.crypto.hash.sha2.Sha256; const Compilation = @import("Compilation.zig"); const Module = @import("Module.zig"); const ThreadPool = @import("ThreadPool.zig"); +const WaitGroup = @import("WaitGroup.zig"); pub const Table = std.StringHashMapUnmanaged(*Package); @@ -201,7 +202,13 @@ pub fn fetchAndAddDependencies( continue; }; - const sub_pkg = try fetchAndUnpack(http_client, global_cache_directory, url, expected_hash); + const sub_pkg = try fetchAndUnpack( + thread_pool, + http_client, + global_cache_directory, + url, + expected_hash, + ); try sub_pkg.fetchAndAddDependencies( thread_pool, @@ -218,6 +225,7 @@ pub fn fetchAndAddDependencies( } fn fetchAndUnpack( + thread_pool: *ThreadPool, http_client: *std.http.Client, global_cache_directory: Compilation.Directory, url: []const u8, @@ -225,71 +233,99 @@ fn fetchAndUnpack( ) !*Package { const gpa = http_client.allocator; - // TODO check if the expected_hash is already present in the global package cache, and - // thereby avoid both fetching and unpacking. + // Check if the expected_hash is already present in the global package + // cache, and thereby avoid both fetching and unpacking. + const s = fs.path.sep_str; + if (expected_hash) |h| { + const pkg_dir_sub_path = "p" ++ s ++ hexDigest(h); + _ = pkg_dir_sub_path; + @panic("TODO check the p dir for the package"); + } const uri = try std.Uri.parse(url); - var tmp_directory: Compilation.Directory = d: { - const s = fs.path.sep_str; - const rand_int = std.crypto.random.int(u64); + const rand_int = std.crypto.random.int(u64); + const tmp_dir_sub_path = "tmp" ++ s ++ hex64(rand_int); - const tmp_dir_sub_path = try std.fmt.allocPrint(gpa, "tmp" ++ s ++ "{x}", .{rand_int}); + const actual_hash = a: { + var tmp_directory: Compilation.Directory = d: { + const path = try global_cache_directory.join(gpa, &.{tmp_dir_sub_path}); + errdefer gpa.free(path); - const path = try global_cache_directory.join(gpa, &.{tmp_dir_sub_path}); - errdefer gpa.free(path); + const iterable_dir = try global_cache_directory.handle.makeOpenPathIterable(tmp_dir_sub_path, .{}); + errdefer iterable_dir.close(); - const handle = try global_cache_directory.handle.makeOpenPath(tmp_dir_sub_path, .{}); - errdefer handle.close(); - - break :d .{ - .path = path, - .handle = handle, + break :d .{ + .path = path, + .handle = iterable_dir.dir, + }; }; - }; - defer tmp_directory.closeAndFree(gpa); + defer tmp_directory.closeAndFree(gpa); - var req = try http_client.request(uri, .{}, .{}); - defer req.deinit(); + var req = try http_client.request(uri, .{}, .{}); + defer req.deinit(); - if (mem.endsWith(u8, uri.path, ".tar.gz")) { - // I observed the gzip stream to read 1 byte at a time, so I am using a - // buffered reader on the front of it. - var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, req.reader()); + if (mem.endsWith(u8, uri.path, ".tar.gz")) { + // I observed the gzip stream to read 1 byte at a time, so I am using a + // buffered reader on the front of it. + var br = std.io.bufferedReaderSize(std.crypto.tls.max_ciphertext_record_len, req.reader()); - var gzip_stream = try std.compress.gzip.gzipStream(gpa, br.reader()); - defer gzip_stream.deinit(); + var gzip_stream = try std.compress.gzip.gzipStream(gpa, br.reader()); + defer gzip_stream.deinit(); - try std.tar.pipeToFileSystem(tmp_directory.handle, gzip_stream.reader(), .{}); - } else { - // TODO: show the build.zig.ini file and line number - std.log.err("{s}: unknown package extension for path '{s}'", .{ url, uri.path }); - return error.UnknownPackageExtension; - } + try std.tar.pipeToFileSystem(tmp_directory.handle, gzip_stream.reader(), .{ + .strip_components = 1, + }); + } else { + // TODO: show the build.zig.ini file and line number + std.log.err("{s}: unknown package extension for path '{s}'", .{ url, uri.path }); + return error.UnknownPackageExtension; + } - // TODO: delete files not included in the package prior to computing the package hash. - // for example, if the ini file has directives to include/not include certain files, - // apply those rules directly to the filesystem right here. This ensures that files - // not protected by the hash are not present on the file system. + // TODO: delete files not included in the package prior to computing the package hash. + // for example, if the ini file has directives to include/not include certain files, + // apply those rules directly to the filesystem right here. This ensures that files + // not protected by the hash are not present on the file system. - const actual_hash = try computePackageHash(tmp_directory); + const actual_hash = try computePackageHash(thread_pool, .{ .dir = tmp_directory.handle }); - if (expected_hash) |h| { - if (!mem.eql(u8, &h, &actual_hash)) { - // TODO: show the build.zig.ini file and line number - std.log.err("{s}: hash mismatch: expected: {s}, actual: {s}", .{ - url, h, actual_hash, - }); - return error.PackageHashMismatch; + if (expected_hash) |h| { + if (!mem.eql(u8, &h, &actual_hash)) { + // TODO: show the build.zig.ini file and line number + std.log.err("{s}: hash mismatch: expected: {s}, actual: {s}", .{ + url, h, actual_hash, + }); + return error.PackageHashMismatch; + } } - } - if (true) @panic("TODO move the tmp dir into place"); + break :a actual_hash; + }; + + { + // Rename the temporary directory into the global package cache. + const pkg_dir_sub_path = "p" ++ s ++ hexDigest(actual_hash); + var handled_missing_dir = false; + while (true) { + global_cache_directory.handle.rename(tmp_dir_sub_path, pkg_dir_sub_path) catch |err| switch (err) { + error.FileNotFound => { + if (handled_missing_dir) return err; + global_cache_directory.handle.makeDir("p") catch |mkd_err| switch (mkd_err) { + error.PathAlreadyExists => handled_missing_dir = true, + else => |e| return e, + }; + continue; + }, + else => |e| return e, + }; + break; + } + } if (expected_hash == null) { // TODO: show the build.zig.ini file and line number std.log.err("{s}: missing hash:\nhash={s}", .{ - url, actual_hash, + url, std.fmt.fmtSliceHexLower(&actual_hash), }); return error.PackageDependencyMissingHash; } @@ -303,7 +339,117 @@ fn fetchAndUnpack( //root_src_path: []const u8, } -fn computePackageHash(pkg_directory: Compilation.Directory) ![Hash.digest_length]u8 { - _ = pkg_directory; - @panic("TODO computePackageHash"); +const HashedFile = struct { + path: []const u8, + hash: [Hash.digest_length]u8, + failure: Error!void, + + const Error = fs.File.OpenError || fs.File.ReadError; + + fn lessThan(context: void, lhs: *const HashedFile, rhs: *const HashedFile) bool { + _ = context; + return mem.lessThan(u8, lhs.path, rhs.path); + } +}; + +fn computePackageHash( + thread_pool: *ThreadPool, + pkg_dir: fs.IterableDir, +) ![Hash.digest_length]u8 { + const gpa = thread_pool.allocator; + + // We'll use an arena allocator for the path name strings since they all + // need to be in memory for sorting. + var arena_instance = std.heap.ArenaAllocator.init(gpa); + defer arena_instance.deinit(); + const arena = arena_instance.allocator(); + + // Collect all files, recursively, then sort. + var all_files = std.ArrayList(*HashedFile).init(gpa); + defer all_files.deinit(); + + var walker = try pkg_dir.walk(gpa); + defer walker.deinit(); + + { + // The final hash will be a hash of each file hashed independently. This + // allows hashing in parallel. + var wait_group: WaitGroup = .{}; + defer wait_group.wait(); + + while (try walker.next()) |entry| { + switch (entry.kind) { + .Directory => continue, + .File => {}, + else => return error.IllegalFileTypeInPackage, + } + const hashed_file = try arena.create(HashedFile); + hashed_file.* = .{ + .path = try arena.dupe(u8, entry.path), + .hash = undefined, // to be populated by the worker + .failure = undefined, // to be populated by the worker + }; + + wait_group.start(); + try thread_pool.spawn(workerHashFile, .{ pkg_dir.dir, hashed_file, &wait_group }); + } + } + + std.sort.sort(*HashedFile, all_files.items, {}, HashedFile.lessThan); + + var hasher = Hash.init(.{}); + var any_failures = false; + for (all_files.items) |hashed_file| { + hashed_file.failure catch |err| { + any_failures = true; + std.log.err("unable to hash '{s}': {s}", .{ hashed_file.path, @errorName(err) }); + }; + hasher.update(&hashed_file.hash); + } + if (any_failures) return error.PackageHashUnavailable; + return hasher.finalResult(); +} + +fn workerHashFile(dir: fs.Dir, hashed_file: *HashedFile, wg: *WaitGroup) void { + defer wg.finish(); + hashed_file.failure = hashFileFallible(dir, hashed_file); +} + +fn hashFileFallible(dir: fs.Dir, hashed_file: *HashedFile) HashedFile.Error!void { + var buf: [8000]u8 = undefined; + var file = try dir.openFile(hashed_file.path, .{}); + var hasher = Hash.init(.{}); + while (true) { + const bytes_read = try file.read(&buf); + if (bytes_read == 0) break; + hasher.update(buf[0..bytes_read]); + } + hasher.final(&hashed_file.hash); +} + +const hex_charset = "0123456789abcdef"; + +fn hex64(x: u64) [16]u8 { + var result: [16]u8 = undefined; + var i: usize = 0; + while (i < 8) : (i += 1) { + const byte = @truncate(u8, x >> @intCast(u6, 8 * i)); + result[i * 2 + 0] = hex_charset[byte >> 4]; + result[i * 2 + 1] = hex_charset[byte & 15]; + } + return result; +} + +test hex64 { + const s = "[" ++ hex64(0x12345678_abcdef00) ++ "]"; + try std.testing.expectEqualStrings("[00efcdab78563412]", s); +} + +fn hexDigest(digest: [Hash.digest_length]u8) [Hash.digest_length * 2]u8 { + var result: [Hash.digest_length * 2]u8 = undefined; + for (digest) |byte, i| { + result[i * 2 + 0] = hex_charset[byte >> 4]; + result[i * 2 + 1] = hex_charset[byte & 15]; + } + return result; } |
