diff options
Diffstat (limited to 'lib/std/Build/Cache.zig')
| -rw-r--r-- | lib/std/Build/Cache.zig | 1253 |
1 files changed, 1253 insertions, 0 deletions
diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig new file mode 100644 index 0000000000..c459fca633 --- /dev/null +++ b/lib/std/Build/Cache.zig @@ -0,0 +1,1253 @@ +//! Manages `zig-cache` directories. +//! This is not a general-purpose cache. It is designed to be fast and simple, +//! not to withstand attacks using specially-crafted input. + +pub const Directory = struct { + /// This field is redundant for operations that can act on the open directory handle + /// directly, but it is needed when passing the directory to a child process. + /// `null` means cwd. + path: ?[]const u8, + handle: std.fs.Dir, + + pub fn join(self: Directory, allocator: Allocator, paths: []const []const u8) ![]u8 { + if (self.path) |p| { + // TODO clean way to do this with only 1 allocation + const part2 = try std.fs.path.join(allocator, paths); + defer allocator.free(part2); + return std.fs.path.join(allocator, &[_][]const u8{ p, part2 }); + } else { + return std.fs.path.join(allocator, paths); + } + } + + pub fn joinZ(self: Directory, allocator: Allocator, paths: []const []const u8) ![:0]u8 { + if (self.path) |p| { + // TODO clean way to do this with only 1 allocation + const part2 = try std.fs.path.join(allocator, paths); + defer allocator.free(part2); + return std.fs.path.joinZ(allocator, &[_][]const u8{ p, part2 }); + } else { + return std.fs.path.joinZ(allocator, paths); + } + } + + /// Whether or not the handle should be closed, or the path should be freed + /// is determined by usage, however this function is provided for convenience + /// if it happens to be what the caller needs. + pub fn closeAndFree(self: *Directory, gpa: Allocator) void { + self.handle.close(); + if (self.path) |p| gpa.free(p); + self.* = undefined; + } +}; + +gpa: Allocator, +manifest_dir: fs.Dir, +hash: HashHelper = .{}, +/// This value is accessed from multiple threads, protected by mutex. +recent_problematic_timestamp: i128 = 0, +mutex: std.Thread.Mutex = .{}, + +/// A set of strings such as the zig library directory or project source root, which +/// are stripped from the file paths before putting into the cache. They +/// are replaced with single-character indicators. This is not to save +/// space but to eliminate absolute file paths. This improves portability +/// and usefulness of the cache for advanced use cases. +prefixes_buffer: [4]Directory = undefined, +prefixes_len: usize = 0, + +pub const DepTokenizer = @import("Cache/DepTokenizer.zig"); + +const Cache = @This(); +const std = @import("std"); +const builtin = @import("builtin"); +const crypto = std.crypto; +const fs = std.fs; +const assert = std.debug.assert; +const testing = std.testing; +const mem = std.mem; +const fmt = std.fmt; +const Allocator = std.mem.Allocator; +const log = std.log.scoped(.cache); + +pub fn addPrefix(cache: *Cache, directory: Directory) void { + cache.prefixes_buffer[cache.prefixes_len] = directory; + cache.prefixes_len += 1; +} + +/// Be sure to call `Manifest.deinit` after successful initialization. +pub fn obtain(cache: *Cache) Manifest { + return Manifest{ + .cache = cache, + .hash = cache.hash, + .manifest_file = null, + .manifest_dirty = false, + .hex_digest = undefined, + }; +} + +pub fn prefixes(cache: *const Cache) []const Directory { + return cache.prefixes_buffer[0..cache.prefixes_len]; +} + +const PrefixedPath = struct { + prefix: u8, + sub_path: []u8, +}; + +fn findPrefix(cache: *const Cache, file_path: []const u8) !PrefixedPath { + const gpa = cache.gpa; + const resolved_path = try fs.path.resolve(gpa, &[_][]const u8{file_path}); + errdefer gpa.free(resolved_path); + return findPrefixResolved(cache, resolved_path); +} + +/// Takes ownership of `resolved_path` on success. +fn findPrefixResolved(cache: *const Cache, resolved_path: []u8) !PrefixedPath { + const gpa = cache.gpa; + const prefixes_slice = cache.prefixes(); + var i: u8 = 1; // Start at 1 to skip over checking the null prefix. + while (i < prefixes_slice.len) : (i += 1) { + const p = prefixes_slice[i].path.?; + if (mem.startsWith(u8, resolved_path, p)) { + // +1 to skip over the path separator here + const sub_path = try gpa.dupe(u8, resolved_path[p.len + 1 ..]); + gpa.free(resolved_path); + return PrefixedPath{ + .prefix = @intCast(u8, i), + .sub_path = sub_path, + }; + } + } + + return PrefixedPath{ + .prefix = 0, + .sub_path = resolved_path, + }; +} + +/// This is 128 bits - Even with 2^54 cache entries, the probably of a collision would be under 10^-6 +pub const bin_digest_len = 16; +pub const hex_digest_len = bin_digest_len * 2; +pub const BinDigest = [bin_digest_len]u8; + +const manifest_file_size_max = 50 * 1024 * 1024; + +/// The type used for hashing file contents. Currently, this is SipHash128(1, 3), because it +/// provides enough collision resistance for the Manifest use cases, while being one of our +/// fastest options right now. +pub const Hasher = crypto.auth.siphash.SipHash128(1, 3); + +/// Initial state, that can be copied. +pub const hasher_init: Hasher = Hasher.init(&[_]u8{0} ** Hasher.key_length); + +pub const File = struct { + prefixed_path: ?PrefixedPath, + max_file_size: ?usize, + stat: Stat, + bin_digest: BinDigest, + contents: ?[]const u8, + + pub const Stat = struct { + inode: fs.File.INode, + size: u64, + mtime: i128, + }; + + pub fn deinit(self: *File, gpa: Allocator) void { + if (self.prefixed_path) |pp| { + gpa.free(pp.sub_path); + self.prefixed_path = null; + } + if (self.contents) |contents| { + gpa.free(contents); + self.contents = null; + } + self.* = undefined; + } +}; + +pub const HashHelper = struct { + hasher: Hasher = hasher_init, + + /// Record a slice of bytes as an dependency of the process being cached + pub fn addBytes(hh: *HashHelper, bytes: []const u8) void { + hh.hasher.update(mem.asBytes(&bytes.len)); + hh.hasher.update(bytes); + } + + pub fn addOptionalBytes(hh: *HashHelper, optional_bytes: ?[]const u8) void { + hh.add(optional_bytes != null); + hh.addBytes(optional_bytes orelse return); + } + + pub fn addListOfBytes(hh: *HashHelper, list_of_bytes: []const []const u8) void { + hh.add(list_of_bytes.len); + for (list_of_bytes) |bytes| hh.addBytes(bytes); + } + + /// Convert the input value into bytes and record it as a dependency of the process being cached. + pub fn add(hh: *HashHelper, x: anytype) void { + switch (@TypeOf(x)) { + std.builtin.Version => { + hh.add(x.major); + hh.add(x.minor); + hh.add(x.patch); + }, + std.Target.Os.TaggedVersionRange => { + switch (x) { + .linux => |linux| { + hh.add(linux.range.min); + hh.add(linux.range.max); + hh.add(linux.glibc); + }, + .windows => |windows| { + hh.add(windows.min); + hh.add(windows.max); + }, + .semver => |semver| { + hh.add(semver.min); + hh.add(semver.max); + }, + .none => {}, + } + }, + else => switch (@typeInfo(@TypeOf(x))) { + .Bool, .Int, .Enum, .Array => hh.addBytes(mem.asBytes(&x)), + else => @compileError("unable to hash type " ++ @typeName(@TypeOf(x))), + }, + } + } + + pub fn addOptional(hh: *HashHelper, optional: anytype) void { + hh.add(optional != null); + hh.add(optional orelse return); + } + + /// Returns a hex encoded hash of the inputs, without modifying state. + pub fn peek(hh: HashHelper) [hex_digest_len]u8 { + var copy = hh; + return copy.final(); + } + + pub fn peekBin(hh: HashHelper) BinDigest { + var copy = hh; + var bin_digest: BinDigest = undefined; + copy.hasher.final(&bin_digest); + return bin_digest; + } + + /// Returns a hex encoded hash of the inputs, mutating the state of the hasher. + pub fn final(hh: *HashHelper) [hex_digest_len]u8 { + var bin_digest: BinDigest = undefined; + hh.hasher.final(&bin_digest); + + var out_digest: [hex_digest_len]u8 = undefined; + _ = std.fmt.bufPrint( + &out_digest, + "{s}", + .{std.fmt.fmtSliceHexLower(&bin_digest)}, + ) catch unreachable; + return out_digest; + } +}; + +pub const Lock = struct { + manifest_file: fs.File, + + pub fn release(lock: *Lock) void { + if (builtin.os.tag == .windows) { + // Windows does not guarantee that locks are immediately unlocked when + // the file handle is closed. See LockFileEx documentation. + lock.manifest_file.unlock(); + } + + lock.manifest_file.close(); + lock.* = undefined; + } +}; + +pub const Manifest = struct { + cache: *Cache, + /// Current state for incremental hashing. + hash: HashHelper, + manifest_file: ?fs.File, + manifest_dirty: bool, + /// Set this flag to true before calling hit() in order to indicate that + /// upon a cache hit, the code using the cache will not modify the files + /// within the cache directory. This allows multiple processes to utilize + /// the same cache directory at the same time. + want_shared_lock: bool = true, + have_exclusive_lock: bool = false, + // Indicate that we want isProblematicTimestamp to perform a filesystem write in + // order to obtain a problematic timestamp for the next call. Calls after that + // will then use the same timestamp, to avoid unnecessary filesystem writes. + want_refresh_timestamp: bool = true, + files: std.ArrayListUnmanaged(File) = .{}, + hex_digest: [hex_digest_len]u8, + /// Populated when hit() returns an error because of one + /// of the files listed in the manifest. + failed_file_index: ?usize = null, + /// Keeps track of the last time we performed a file system write to observe + /// what time the file system thinks it is, according to its own granularity. + recent_problematic_timestamp: i128 = 0, + + /// Add a file as a dependency of process being cached. When `hit` is + /// called, the file's contents will be checked to ensure that it matches + /// the contents from previous times. + /// + /// Max file size will be used to determine the amount of space the file contents + /// are allowed to take up in memory. If max_file_size is null, then the contents + /// will not be loaded into memory. + /// + /// Returns the index of the entry in the `files` array list. You can use it + /// to access the contents of the file after calling `hit()` like so: + /// + /// ``` + /// var file_contents = cache_hash.files.items[file_index].contents.?; + /// ``` + pub fn addFile(self: *Manifest, file_path: []const u8, max_file_size: ?usize) !usize { + assert(self.manifest_file == null); + + const gpa = self.cache.gpa; + try self.files.ensureUnusedCapacity(gpa, 1); + const prefixed_path = try self.cache.findPrefix(file_path); + errdefer gpa.free(prefixed_path.sub_path); + + self.files.addOneAssumeCapacity().* = .{ + .prefixed_path = prefixed_path, + .contents = null, + .max_file_size = max_file_size, + .stat = undefined, + .bin_digest = undefined, + }; + + self.hash.add(prefixed_path.prefix); + self.hash.addBytes(prefixed_path.sub_path); + + return self.files.items.len - 1; + } + + pub fn addOptionalFile(self: *Manifest, optional_file_path: ?[]const u8) !void { + self.hash.add(optional_file_path != null); + const file_path = optional_file_path orelse return; + _ = try self.addFile(file_path, null); + } + + pub fn addListOfFiles(self: *Manifest, list_of_files: []const []const u8) !void { + self.hash.add(list_of_files.len); + for (list_of_files) |file_path| { + _ = try self.addFile(file_path, null); + } + } + + /// Check the cache to see if the input exists in it. If it exists, returns `true`. + /// A hex encoding of its hash is available by calling `final`. + /// + /// This function will also acquire an exclusive lock to the manifest file. This means + /// that a process holding a Manifest will block any other process attempting to + /// acquire the lock. If `want_shared_lock` is `true`, a cache hit guarantees the + /// manifest file to be locked in shared mode, and a cache miss guarantees the manifest + /// file to be locked in exclusive mode. + /// + /// The lock on the manifest file is released when `deinit` is called. As another + /// option, one may call `toOwnedLock` to obtain a smaller object which can represent + /// the lock. `deinit` is safe to call whether or not `toOwnedLock` has been called. + pub fn hit(self: *Manifest) !bool { + const gpa = self.cache.gpa; + assert(self.manifest_file == null); + + self.failed_file_index = null; + + const ext = ".txt"; + var manifest_file_path: [self.hex_digest.len + ext.len]u8 = undefined; + + var bin_digest: BinDigest = undefined; + self.hash.hasher.final(&bin_digest); + + _ = std.fmt.bufPrint( + &self.hex_digest, + "{s}", + .{std.fmt.fmtSliceHexLower(&bin_digest)}, + ) catch unreachable; + + self.hash.hasher = hasher_init; + self.hash.hasher.update(&bin_digest); + + mem.copy(u8, &manifest_file_path, &self.hex_digest); + manifest_file_path[self.hex_digest.len..][0..ext.len].* = ext.*; + + if (self.files.items.len == 0) { + // If there are no file inputs, we check if the manifest file exists instead of + // comparing the hashes on the files used for the cached item + while (true) { + if (self.cache.manifest_dir.openFile(&manifest_file_path, .{ + .mode = .read_write, + .lock = .Exclusive, + .lock_nonblocking = self.want_shared_lock, + })) |manifest_file| { + self.manifest_file = manifest_file; + self.have_exclusive_lock = true; + break; + } else |open_err| switch (open_err) { + error.WouldBlock => { + self.manifest_file = try self.cache.manifest_dir.openFile(&manifest_file_path, .{ + .lock = .Shared, + }); + break; + }, + error.FileNotFound => { + if (self.cache.manifest_dir.createFile(&manifest_file_path, .{ + .read = true, + .truncate = false, + .lock = .Exclusive, + .lock_nonblocking = self.want_shared_lock, + })) |manifest_file| { + self.manifest_file = manifest_file; + self.manifest_dirty = true; + self.have_exclusive_lock = true; + return false; // cache miss; exclusive lock already held + } else |err| switch (err) { + error.WouldBlock => continue, + else => |e| return e, + } + }, + else => |e| return e, + } + } + } else { + if (self.cache.manifest_dir.createFile(&manifest_file_path, .{ + .read = true, + .truncate = false, + .lock = .Exclusive, + .lock_nonblocking = self.want_shared_lock, + })) |manifest_file| { + self.manifest_file = manifest_file; + self.have_exclusive_lock = true; + } else |err| switch (err) { + error.WouldBlock => { + self.manifest_file = try self.cache.manifest_dir.openFile(&manifest_file_path, .{ + .lock = .Shared, + }); + }, + else => |e| return e, + } + } + + self.want_refresh_timestamp = true; + + const file_contents = try self.manifest_file.?.reader().readAllAlloc(gpa, manifest_file_size_max); + defer gpa.free(file_contents); + + const input_file_count = self.files.items.len; + var any_file_changed = false; + var line_iter = mem.tokenize(u8, file_contents, "\n"); + var idx: usize = 0; + while (line_iter.next()) |line| { + defer idx += 1; + + const cache_hash_file = if (idx < input_file_count) &self.files.items[idx] else blk: { + const new = try self.files.addOne(gpa); + new.* = .{ + .prefixed_path = null, + .contents = null, + .max_file_size = null, + .stat = undefined, + .bin_digest = undefined, + }; + break :blk new; + }; + + var iter = mem.tokenize(u8, line, " "); + const size = iter.next() orelse return error.InvalidFormat; + const inode = iter.next() orelse return error.InvalidFormat; + const mtime_nsec_str = iter.next() orelse return error.InvalidFormat; + const digest_str = iter.next() orelse return error.InvalidFormat; + const prefix_str = iter.next() orelse return error.InvalidFormat; + const file_path = iter.rest(); + + cache_hash_file.stat.size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat; + cache_hash_file.stat.inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat; + cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; + _ = std.fmt.hexToBytes(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; + const prefix = fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidFormat; + if (prefix >= self.cache.prefixes_len) return error.InvalidFormat; + + if (file_path.len == 0) { + return error.InvalidFormat; + } + if (cache_hash_file.prefixed_path) |pp| { + if (pp.prefix != prefix or !mem.eql(u8, file_path, pp.sub_path)) { + return error.InvalidFormat; + } + } + + if (cache_hash_file.prefixed_path == null) { + cache_hash_file.prefixed_path = .{ + .prefix = prefix, + .sub_path = try gpa.dupe(u8, file_path), + }; + } + + const pp = cache_hash_file.prefixed_path.?; + const dir = self.cache.prefixes()[pp.prefix].handle; + const this_file = dir.openFile(pp.sub_path, .{ .mode = .read_only }) catch |err| switch (err) { + error.FileNotFound => { + try self.upgradeToExclusiveLock(); + return false; + }, + else => return error.CacheUnavailable, + }; + defer this_file.close(); + + const actual_stat = this_file.stat() catch |err| { + self.failed_file_index = idx; + return err; + }; + const size_match = actual_stat.size == cache_hash_file.stat.size; + const mtime_match = actual_stat.mtime == cache_hash_file.stat.mtime; + const inode_match = actual_stat.inode == cache_hash_file.stat.inode; + + if (!size_match or !mtime_match or !inode_match) { + self.manifest_dirty = true; + + cache_hash_file.stat = .{ + .size = actual_stat.size, + .mtime = actual_stat.mtime, + .inode = actual_stat.inode, + }; + + if (self.isProblematicTimestamp(cache_hash_file.stat.mtime)) { + // The actual file has an unreliable timestamp, force it to be hashed + cache_hash_file.stat.mtime = 0; + cache_hash_file.stat.inode = 0; + } + + var actual_digest: BinDigest = undefined; + hashFile(this_file, &actual_digest) catch |err| { + self.failed_file_index = idx; + return err; + }; + + if (!mem.eql(u8, &cache_hash_file.bin_digest, &actual_digest)) { + cache_hash_file.bin_digest = actual_digest; + // keep going until we have the input file digests + any_file_changed = true; + } + } + + if (!any_file_changed) { + self.hash.hasher.update(&cache_hash_file.bin_digest); + } + } + + if (any_file_changed) { + // cache miss + // keep the manifest file open + self.unhit(bin_digest, input_file_count); + try self.upgradeToExclusiveLock(); + return false; + } + + if (idx < input_file_count) { + self.manifest_dirty = true; + while (idx < input_file_count) : (idx += 1) { + const ch_file = &self.files.items[idx]; + self.populateFileHash(ch_file) catch |err| { + self.failed_file_index = idx; + return err; + }; + } + try self.upgradeToExclusiveLock(); + return false; + } + + if (self.want_shared_lock) { + try self.downgradeToSharedLock(); + } + + return true; + } + + pub fn unhit(self: *Manifest, bin_digest: BinDigest, input_file_count: usize) void { + // Reset the hash. + self.hash.hasher = hasher_init; + self.hash.hasher.update(&bin_digest); + + // Remove files not in the initial hash. + for (self.files.items[input_file_count..]) |*file| { + file.deinit(self.cache.gpa); + } + self.files.shrinkRetainingCapacity(input_file_count); + + for (self.files.items) |file| { + self.hash.hasher.update(&file.bin_digest); + } + } + + fn isProblematicTimestamp(man: *Manifest, file_time: i128) bool { + // If the file_time is prior to the most recent problematic timestamp + // then we don't need to access the filesystem. + if (file_time < man.recent_problematic_timestamp) + return false; + + // Next we will check the globally shared Cache timestamp, which is accessed + // from multiple threads. + man.cache.mutex.lock(); + defer man.cache.mutex.unlock(); + + // Save the global one to our local one to avoid locking next time. + man.recent_problematic_timestamp = man.cache.recent_problematic_timestamp; + if (file_time < man.recent_problematic_timestamp) + return false; + + // This flag prevents multiple filesystem writes for the same hit() call. + if (man.want_refresh_timestamp) { + man.want_refresh_timestamp = false; + + var file = man.cache.manifest_dir.createFile("timestamp", .{ + .read = true, + .truncate = true, + }) catch return true; + defer file.close(); + + // Save locally and also save globally (we still hold the global lock). + man.recent_problematic_timestamp = (file.stat() catch return true).mtime; + man.cache.recent_problematic_timestamp = man.recent_problematic_timestamp; + } + + return file_time >= man.recent_problematic_timestamp; + } + + fn populateFileHash(self: *Manifest, ch_file: *File) !void { + const pp = ch_file.prefixed_path.?; + const dir = self.cache.prefixes()[pp.prefix].handle; + const file = try dir.openFile(pp.sub_path, .{}); + defer file.close(); + + const actual_stat = try file.stat(); + ch_file.stat = .{ + .size = actual_stat.size, + .mtime = actual_stat.mtime, + .inode = actual_stat.inode, + }; + + if (self.isProblematicTimestamp(ch_file.stat.mtime)) { + // The actual file has an unreliable timestamp, force it to be hashed + ch_file.stat.mtime = 0; + ch_file.stat.inode = 0; + } + + if (ch_file.max_file_size) |max_file_size| { + if (ch_file.stat.size > max_file_size) { + return error.FileTooBig; + } + + const contents = try self.cache.gpa.alloc(u8, @intCast(usize, ch_file.stat.size)); + errdefer self.cache.gpa.free(contents); + + // Hash while reading from disk, to keep the contents in the cpu cache while + // doing hashing. + var hasher = hasher_init; + var off: usize = 0; + while (true) { + // give me everything you've got, captain + const bytes_read = try file.read(contents[off..]); + if (bytes_read == 0) break; + hasher.update(contents[off..][0..bytes_read]); + off += bytes_read; + } + hasher.final(&ch_file.bin_digest); + + ch_file.contents = contents; + } else { + try hashFile(file, &ch_file.bin_digest); + } + + self.hash.hasher.update(&ch_file.bin_digest); + } + + /// Add a file as a dependency of process being cached, after the initial hash has been + /// calculated. This is useful for processes that don't know all the files that + /// are depended on ahead of time. For example, a source file that can import other files + /// will need to be recompiled if the imported file is changed. + pub fn addFilePostFetch(self: *Manifest, file_path: []const u8, max_file_size: usize) ![]const u8 { + assert(self.manifest_file != null); + + const gpa = self.cache.gpa; + const prefixed_path = try self.cache.findPrefix(file_path); + errdefer gpa.free(prefixed_path.sub_path); + + const new_ch_file = try self.files.addOne(gpa); + new_ch_file.* = .{ + .prefixed_path = prefixed_path, + .max_file_size = max_file_size, + .stat = undefined, + .bin_digest = undefined, + .contents = null, + }; + errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); + + try self.populateFileHash(new_ch_file); + + return new_ch_file.contents.?; + } + + /// Add a file as a dependency of process being cached, after the initial hash has been + /// calculated. This is useful for processes that don't know the all the files that + /// are depended on ahead of time. For example, a source file that can import other files + /// will need to be recompiled if the imported file is changed. + pub fn addFilePost(self: *Manifest, file_path: []const u8) !void { + assert(self.manifest_file != null); + + const gpa = self.cache.gpa; + const prefixed_path = try self.cache.findPrefix(file_path); + errdefer gpa.free(prefixed_path.sub_path); + + const new_ch_file = try self.files.addOne(gpa); + new_ch_file.* = .{ + .prefixed_path = prefixed_path, + .max_file_size = null, + .stat = undefined, + .bin_digest = undefined, + .contents = null, + }; + errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); + + try self.populateFileHash(new_ch_file); + } + + /// Like `addFilePost` but when the file contents have already been loaded from disk. + /// On success, cache takes ownership of `resolved_path`. + pub fn addFilePostContents( + self: *Manifest, + resolved_path: []u8, + bytes: []const u8, + stat: File.Stat, + ) error{OutOfMemory}!void { + assert(self.manifest_file != null); + const gpa = self.cache.gpa; + + const ch_file = try self.files.addOne(gpa); + errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); + + const prefixed_path = try self.cache.findPrefixResolved(resolved_path); + errdefer gpa.free(prefixed_path.sub_path); + + ch_file.* = .{ + .prefixed_path = prefixed_path, + .max_file_size = null, + .stat = stat, + .bin_digest = undefined, + .contents = null, + }; + + if (self.isProblematicTimestamp(ch_file.stat.mtime)) { + // The actual file has an unreliable timestamp, force it to be hashed + ch_file.stat.mtime = 0; + ch_file.stat.inode = 0; + } + + { + var hasher = hasher_init; + hasher.update(bytes); + hasher.final(&ch_file.bin_digest); + } + + self.hash.hasher.update(&ch_file.bin_digest); + } + + pub fn addDepFilePost(self: *Manifest, dir: fs.Dir, dep_file_basename: []const u8) !void { + assert(self.manifest_file != null); + + const dep_file_contents = try dir.readFileAlloc(self.cache.gpa, dep_file_basename, manifest_file_size_max); + defer self.cache.gpa.free(dep_file_contents); + + var error_buf = std.ArrayList(u8).init(self.cache.gpa); + defer error_buf.deinit(); + + var it: DepTokenizer = .{ .bytes = dep_file_contents }; + + // Skip first token: target. + switch (it.next() orelse return) { // Empty dep file OK. + .target, .target_must_resolve, .prereq => {}, + else => |err| { + try err.printError(error_buf.writer()); + log.err("failed parsing {s}: {s}", .{ dep_file_basename, error_buf.items }); + return error.InvalidDepFile; + }, + } + // Process 0+ preqreqs. + // Clang is invoked in single-source mode so we never get more targets. + while (true) { + switch (it.next() orelse return) { + .target, .target_must_resolve => return, + .prereq => |file_path| try self.addFilePost(file_path), + else => |err| { + try err.printError(error_buf.writer()); + log.err("failed parsing {s}: {s}", .{ dep_file_basename, error_buf.items }); + return error.InvalidDepFile; + }, + } + } + } + + /// Returns a hex encoded hash of the inputs. + pub fn final(self: *Manifest) [hex_digest_len]u8 { + assert(self.manifest_file != null); + + // We don't close the manifest file yet, because we want to + // keep it locked until the API user is done using it. + // We also don't write out the manifest yet, because until + // cache_release is called we still might be working on creating + // the artifacts to cache. + + var bin_digest: BinDigest = undefined; + self.hash.hasher.final(&bin_digest); + + var out_digest: [hex_digest_len]u8 = undefined; + _ = std.fmt.bufPrint( + &out_digest, + "{s}", + .{std.fmt.fmtSliceHexLower(&bin_digest)}, + ) catch unreachable; + + return out_digest; + } + + /// If `want_shared_lock` is true, this function automatically downgrades the + /// lock from exclusive to shared. + pub fn writeManifest(self: *Manifest) !void { + assert(self.have_exclusive_lock); + + const manifest_file = self.manifest_file.?; + if (self.manifest_dirty) { + self.manifest_dirty = false; + + var contents = std.ArrayList(u8).init(self.cache.gpa); + defer contents.deinit(); + + const writer = contents.writer(); + var encoded_digest: [hex_digest_len]u8 = undefined; + + for (self.files.items) |file| { + _ = std.fmt.bufPrint( + &encoded_digest, + "{s}", + .{std.fmt.fmtSliceHexLower(&file.bin_digest)}, + ) catch unreachable; + try writer.print("{d} {d} {d} {s} {d} {s}\n", .{ + file.stat.size, + file.stat.inode, + file.stat.mtime, + &encoded_digest, + file.prefixed_path.?.prefix, + file.prefixed_path.?.sub_path, + }); + } + + try manifest_file.setEndPos(contents.items.len); + try manifest_file.pwriteAll(contents.items, 0); + } + + if (self.want_shared_lock) { + try self.downgradeToSharedLock(); + } + } + + fn downgradeToSharedLock(self: *Manifest) !void { + if (!self.have_exclusive_lock) return; + + // WASI does not currently support flock, so we bypass it here. + // TODO: If/when flock is supported on WASI, this check should be removed. + // See https://github.com/WebAssembly/wasi-filesystem/issues/2 + if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) { + const manifest_file = self.manifest_file.?; + try manifest_file.downgradeLock(); + } + + self.have_exclusive_lock = false; + } + + fn upgradeToExclusiveLock(self: *Manifest) !void { + if (self.have_exclusive_lock) return; + assert(self.manifest_file != null); + + // WASI does not currently support flock, so we bypass it here. + // TODO: If/when flock is supported on WASI, this check should be removed. + // See https://github.com/WebAssembly/wasi-filesystem/issues/2 + if (builtin.os.tag != .wasi or std.process.can_spawn or !builtin.single_threaded) { + const manifest_file = self.manifest_file.?; + // Here we intentionally have a period where the lock is released, in case there are + // other processes holding a shared lock. + manifest_file.unlock(); + try manifest_file.lock(.Exclusive); + } + self.have_exclusive_lock = true; + } + + /// Obtain only the data needed to maintain a lock on the manifest file. + /// The `Manifest` remains safe to deinit. + /// Don't forget to call `writeManifest` before this! + pub fn toOwnedLock(self: *Manifest) Lock { + const lock: Lock = .{ + .manifest_file = self.manifest_file.?, + }; + + self.manifest_file = null; + return lock; + } + + /// Releases the manifest file and frees any memory the Manifest was using. + /// `Manifest.hit` must be called first. + /// Don't forget to call `writeManifest` before this! + pub fn deinit(self: *Manifest) void { + if (self.manifest_file) |file| { + if (builtin.os.tag == .windows) { + // See Lock.release for why this is required on Windows + file.unlock(); + } + + file.close(); + } + for (self.files.items) |*file| { + file.deinit(self.cache.gpa); + } + self.files.deinit(self.cache.gpa); + } +}; + +/// On operating systems that support symlinks, does a readlink. On other operating systems, +/// uses the file contents. Windows supports symlinks but only with elevated privileges, so +/// it is treated as not supporting symlinks. +pub fn readSmallFile(dir: fs.Dir, sub_path: []const u8, buffer: []u8) ![]u8 { + if (builtin.os.tag == .windows) { + return dir.readFile(sub_path, buffer); + } else { + return dir.readLink(sub_path, buffer); + } +} + +/// On operating systems that support symlinks, does a symlink. On other operating systems, +/// uses the file contents. Windows supports symlinks but only with elevated privileges, so +/// it is treated as not supporting symlinks. +/// `data` must be a valid UTF-8 encoded file path and 255 bytes or fewer. +pub fn writeSmallFile(dir: fs.Dir, sub_path: []const u8, data: []const u8) !void { + assert(data.len <= 255); + if (builtin.os.tag == .windows) { + return dir.writeFile(sub_path, data); + } else { + return dir.symLink(data, sub_path, .{}); + } +} + +fn hashFile(file: fs.File, bin_digest: *[Hasher.mac_length]u8) !void { + var buf: [1024]u8 = undefined; + + var hasher = hasher_init; + while (true) { + const bytes_read = try file.read(&buf); + if (bytes_read == 0) break; + hasher.update(buf[0..bytes_read]); + } + + hasher.final(bin_digest); +} + +// Create/Write a file, close it, then grab its stat.mtime timestamp. +fn testGetCurrentFileTimestamp() !i128 { + var file = try fs.cwd().createFile("test-filetimestamp.tmp", .{ + .read = true, + .truncate = true, + }); + defer file.close(); + + return (try file.stat()).mtime; +} + +test "cache file and then recall it" { + if (builtin.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } + + const cwd = fs.cwd(); + + const temp_file = "test.txt"; + const temp_manifest_dir = "temp_manifest_dir"; + + try cwd.writeFile(temp_file, "Hello, world!\n"); + + // Wait for file timestamps to tick + const initial_time = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time) { + std.time.sleep(1); + } + + var digest1: [hex_digest_len]u8 = undefined; + var digest2: [hex_digest_len]u8 = undefined; + + { + var cache = Cache{ + .gpa = testing.allocator, + .manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}), + }; + cache.addPrefix(.{ .path = null, .handle = fs.cwd() }); + defer cache.manifest_dir.close(); + + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.add(true); + ch.hash.add(@as(u16, 1234)); + ch.hash.addBytes("1234"); + _ = try ch.addFile(temp_file, null); + + // There should be nothing in the cache + try testing.expectEqual(false, try ch.hit()); + + digest1 = ch.final(); + try ch.writeManifest(); + } + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.add(true); + ch.hash.add(@as(u16, 1234)); + ch.hash.addBytes("1234"); + _ = try ch.addFile(temp_file, null); + + // Cache hit! We just "built" the same file + try testing.expect(try ch.hit()); + digest2 = ch.final(); + + try testing.expectEqual(false, ch.have_exclusive_lock); + } + + try testing.expectEqual(digest1, digest2); + } + + try cwd.deleteTree(temp_manifest_dir); + try cwd.deleteFile(temp_file); +} + +test "check that changing a file makes cache fail" { + if (builtin.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } + const cwd = fs.cwd(); + + const temp_file = "cache_hash_change_file_test.txt"; + const temp_manifest_dir = "cache_hash_change_file_manifest_dir"; + const original_temp_file_contents = "Hello, world!\n"; + const updated_temp_file_contents = "Hello, world; but updated!\n"; + + try cwd.deleteTree(temp_manifest_dir); + try cwd.deleteTree(temp_file); + + try cwd.writeFile(temp_file, original_temp_file_contents); + + // Wait for file timestamps to tick + const initial_time = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time) { + std.time.sleep(1); + } + + var digest1: [hex_digest_len]u8 = undefined; + var digest2: [hex_digest_len]u8 = undefined; + + { + var cache = Cache{ + .gpa = testing.allocator, + .manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}), + }; + cache.addPrefix(.{ .path = null, .handle = fs.cwd() }); + defer cache.manifest_dir.close(); + + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.addBytes("1234"); + const temp_file_idx = try ch.addFile(temp_file, 100); + + // There should be nothing in the cache + try testing.expectEqual(false, try ch.hit()); + + try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?)); + + digest1 = ch.final(); + + try ch.writeManifest(); + } + + try cwd.writeFile(temp_file, updated_temp_file_contents); + + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.addBytes("1234"); + const temp_file_idx = try ch.addFile(temp_file, 100); + + // A file that we depend on has been updated, so the cache should not contain an entry for it + try testing.expectEqual(false, try ch.hit()); + + // The cache system does not keep the contents of re-hashed input files. + try testing.expect(ch.files.items[temp_file_idx].contents == null); + + digest2 = ch.final(); + + try ch.writeManifest(); + } + + try testing.expect(!mem.eql(u8, digest1[0..], digest2[0..])); + } + + try cwd.deleteTree(temp_manifest_dir); + try cwd.deleteTree(temp_file); +} + +test "no file inputs" { + if (builtin.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } + const cwd = fs.cwd(); + const temp_manifest_dir = "no_file_inputs_manifest_dir"; + defer cwd.deleteTree(temp_manifest_dir) catch {}; + + var digest1: [hex_digest_len]u8 = undefined; + var digest2: [hex_digest_len]u8 = undefined; + + var cache = Cache{ + .gpa = testing.allocator, + .manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}), + }; + cache.addPrefix(.{ .path = null, .handle = fs.cwd() }); + defer cache.manifest_dir.close(); + + { + var man = cache.obtain(); + defer man.deinit(); + + man.hash.addBytes("1234"); + + // There should be nothing in the cache + try testing.expectEqual(false, try man.hit()); + + digest1 = man.final(); + + try man.writeManifest(); + } + { + var man = cache.obtain(); + defer man.deinit(); + + man.hash.addBytes("1234"); + + try testing.expect(try man.hit()); + digest2 = man.final(); + try testing.expectEqual(false, man.have_exclusive_lock); + } + + try testing.expectEqual(digest1, digest2); +} + +test "Manifest with files added after initial hash work" { + if (builtin.os.tag == .wasi) { + // https://github.com/ziglang/zig/issues/5437 + return error.SkipZigTest; + } + const cwd = fs.cwd(); + + const temp_file1 = "cache_hash_post_file_test1.txt"; + const temp_file2 = "cache_hash_post_file_test2.txt"; + const temp_manifest_dir = "cache_hash_post_file_manifest_dir"; + + try cwd.writeFile(temp_file1, "Hello, world!\n"); + try cwd.writeFile(temp_file2, "Hello world the second!\n"); + + // Wait for file timestamps to tick + const initial_time = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time) { + std.time.sleep(1); + } + + var digest1: [hex_digest_len]u8 = undefined; + var digest2: [hex_digest_len]u8 = undefined; + var digest3: [hex_digest_len]u8 = undefined; + + { + var cache = Cache{ + .gpa = testing.allocator, + .manifest_dir = try cwd.makeOpenPath(temp_manifest_dir, .{}), + }; + cache.addPrefix(.{ .path = null, .handle = fs.cwd() }); + defer cache.manifest_dir.close(); + + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.addBytes("1234"); + _ = try ch.addFile(temp_file1, null); + + // There should be nothing in the cache + try testing.expectEqual(false, try ch.hit()); + + _ = try ch.addFilePost(temp_file2); + + digest1 = ch.final(); + try ch.writeManifest(); + } + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.addBytes("1234"); + _ = try ch.addFile(temp_file1, null); + + try testing.expect(try ch.hit()); + digest2 = ch.final(); + + try testing.expectEqual(false, ch.have_exclusive_lock); + } + try testing.expect(mem.eql(u8, &digest1, &digest2)); + + // Modify the file added after initial hash + try cwd.writeFile(temp_file2, "Hello world the second, updated\n"); + + // Wait for file timestamps to tick + const initial_time2 = try testGetCurrentFileTimestamp(); + while ((try testGetCurrentFileTimestamp()) == initial_time2) { + std.time.sleep(1); + } + + { + var ch = cache.obtain(); + defer ch.deinit(); + + ch.hash.addBytes("1234"); + _ = try ch.addFile(temp_file1, null); + + // A file that we depend on has been updated, so the cache should not contain an entry for it + try testing.expectEqual(false, try ch.hit()); + + _ = try ch.addFilePost(temp_file2); + + digest3 = ch.final(); + + try ch.writeManifest(); + } + + try testing.expect(!mem.eql(u8, &digest1, &digest3)); + } + + try cwd.deleteTree(temp_manifest_dir); + try cwd.deleteFile(temp_file1); + try cwd.deleteFile(temp_file2); +} |
