From 8778dc4bb2bb20779bee3fc964f79437ff3ef9fe Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 21 Mar 2024 16:11:59 -0700 Subject: extract std.Build.Cache.Directory into separate file --- lib/std/Build/Cache.zig | 72 +------------------------------------------------ 1 file changed, 1 insertion(+), 71 deletions(-) (limited to 'lib/std/Build/Cache.zig') diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig index c18a748de2..a33e574871 100644 --- a/lib/std/Build/Cache.zig +++ b/lib/std/Build/Cache.zig @@ -2,77 +2,6 @@ //! This is not a general-purpose cache. It is designed to be fast and simple, //! not to withstand attacks using specially-crafted input. -pub const Directory = struct { - /// This field is redundant for operations that can act on the open directory handle - /// directly, but it is needed when passing the directory to a child process. - /// `null` means cwd. - path: ?[]const u8, - handle: fs.Dir, - - pub fn clone(d: Directory, arena: Allocator) Allocator.Error!Directory { - return .{ - .path = if (d.path) |p| try arena.dupe(u8, p) else null, - .handle = d.handle, - }; - } - - pub fn cwd() Directory { - return .{ - .path = null, - .handle = fs.cwd(), - }; - } - - pub fn join(self: Directory, allocator: Allocator, paths: []const []const u8) ![]u8 { - if (self.path) |p| { - // TODO clean way to do this with only 1 allocation - const part2 = try fs.path.join(allocator, paths); - defer allocator.free(part2); - return fs.path.join(allocator, &[_][]const u8{ p, part2 }); - } else { - return fs.path.join(allocator, paths); - } - } - - pub fn joinZ(self: Directory, allocator: Allocator, paths: []const []const u8) ![:0]u8 { - if (self.path) |p| { - // TODO clean way to do this with only 1 allocation - const part2 = try fs.path.join(allocator, paths); - defer allocator.free(part2); - return fs.path.joinZ(allocator, &[_][]const u8{ p, part2 }); - } else { - return fs.path.joinZ(allocator, paths); - } - } - - /// Whether or not the handle should be closed, or the path should be freed - /// is determined by usage, however this function is provided for convenience - /// if it happens to be what the caller needs. - pub fn closeAndFree(self: *Directory, gpa: Allocator) void { - self.handle.close(); - if (self.path) |p| gpa.free(p); - self.* = undefined; - } - - pub fn format( - self: Directory, - comptime fmt_string: []const u8, - options: fmt.FormatOptions, - writer: anytype, - ) !void { - _ = options; - if (fmt_string.len != 0) fmt.invalidFmtError(fmt_string, self); - if (self.path) |p| { - try writer.writeAll(p); - try writer.writeAll(fs.path.sep_str); - } - } - - pub fn eql(self: Directory, other: Directory) bool { - return self.handle.fd == other.handle.fd; - } -}; - gpa: Allocator, manifest_dir: fs.Dir, hash: HashHelper = .{}, @@ -88,6 +17,7 @@ mutex: std.Thread.Mutex = .{}, prefixes_buffer: [4]Directory = undefined, prefixes_len: usize = 0, +pub const Directory = @import("Cache/Directory.zig"); pub const DepTokenizer = @import("Cache/DepTokenizer.zig"); const Cache = @This(); -- cgit v1.2.3 From 7bc0b74b6d57ff1a350a4f430f7d9e799a90edd0 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 21 Mar 2024 16:16:47 -0700 Subject: move Package.Path to std.Build.Cache.Path --- lib/std/Build/Cache.zig | 1 + lib/std/Build/Cache/Path.zig | 154 +++++++++++++++++++++++++++++++++++++++++ src/Package.zig | 159 ------------------------------------------- src/Package/Fetch.zig | 12 ++-- src/Package/Module.zig | 6 +- src/main.zig | 4 +- 6 files changed, 166 insertions(+), 170 deletions(-) create mode 100644 lib/std/Build/Cache/Path.zig (limited to 'lib/std/Build/Cache.zig') diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig index a33e574871..d846917fd3 100644 --- a/lib/std/Build/Cache.zig +++ b/lib/std/Build/Cache.zig @@ -17,6 +17,7 @@ mutex: std.Thread.Mutex = .{}, prefixes_buffer: [4]Directory = undefined, prefixes_len: usize = 0, +pub const Path = @import("Cache/Path.zig"); pub const Directory = @import("Cache/Directory.zig"); pub const DepTokenizer = @import("Cache/DepTokenizer.zig"); diff --git a/lib/std/Build/Cache/Path.zig b/lib/std/Build/Cache/Path.zig new file mode 100644 index 0000000000..99ce2e12ee --- /dev/null +++ b/lib/std/Build/Cache/Path.zig @@ -0,0 +1,154 @@ +root_dir: Cache.Directory, +/// The path, relative to the root dir, that this `Path` represents. +/// Empty string means the root_dir is the path. +sub_path: []const u8 = "", + +pub fn clone(p: Path, arena: Allocator) Allocator.Error!Path { + return .{ + .root_dir = try p.root_dir.clone(arena), + .sub_path = try arena.dupe(u8, p.sub_path), + }; +} + +pub fn cwd() Path { + return .{ .root_dir = Cache.Directory.cwd() }; +} + +pub fn join(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path { + if (sub_path.len == 0) return p; + const parts: []const []const u8 = + if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; + return .{ + .root_dir = p.root_dir, + .sub_path = try fs.path.join(arena, parts), + }; +} + +pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path { + if (sub_path.len == 0) return p; + return .{ + .root_dir = p.root_dir, + .sub_path = try fs.path.resolvePosix(arena, &.{ p.sub_path, sub_path }), + }; +} + +pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 { + const parts: []const []const u8 = + if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; + return p.root_dir.join(allocator, parts); +} + +pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 { + const parts: []const []const u8 = + if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; + return p.root_dir.joinZ(allocator, parts); +} + +pub fn openFile( + p: Path, + sub_path: []const u8, + flags: fs.File.OpenFlags, +) !fs.File { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.openFile(joined_path, flags); +} + +pub fn makeOpenPath(p: Path, sub_path: []const u8, opts: fs.OpenDirOptions) !fs.Dir { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.makeOpenPath(joined_path, opts); +} + +pub fn statFile(p: Path, sub_path: []const u8) !fs.Dir.Stat { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.statFile(joined_path); +} + +pub fn atomicFile( + p: Path, + sub_path: []const u8, + options: fs.Dir.AtomicFileOptions, + buf: *[fs.MAX_PATH_BYTES]u8, +) !fs.AtomicFile { + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.atomicFile(joined_path, options); +} + +pub fn access(p: Path, sub_path: []const u8, flags: fs.File.OpenFlags) !void { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.access(joined_path, flags); +} + +pub fn makePath(p: Path, sub_path: []const u8) !void { + var buf: [fs.MAX_PATH_BYTES]u8 = undefined; + const joined_path = if (p.sub_path.len == 0) sub_path else p: { + break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ + p.sub_path, sub_path, + }) catch return error.NameTooLong; + }; + return p.root_dir.handle.makePath(joined_path); +} + +pub fn format( + self: Path, + comptime fmt_string: []const u8, + options: std.fmt.FormatOptions, + writer: anytype, +) !void { + if (fmt_string.len == 1) { + // Quote-escape the string. + const stringEscape = std.zig.stringEscape; + const f = switch (fmt_string[0]) { + 'q' => "", + '\'' => '\'', + else => @compileError("unsupported format string: " ++ fmt_string), + }; + if (self.root_dir.path) |p| { + try stringEscape(p, f, options, writer); + if (self.sub_path.len > 0) try stringEscape(fs.path.sep_str, f, options, writer); + } + if (self.sub_path.len > 0) { + try stringEscape(self.sub_path, f, options, writer); + } + return; + } + if (fmt_string.len > 0) + std.fmt.invalidFmtError(fmt_string, self); + if (self.root_dir.path) |p| { + try writer.writeAll(p); + try writer.writeAll(fs.path.sep_str); + } + if (self.sub_path.len > 0) { + try writer.writeAll(self.sub_path); + try writer.writeAll(fs.path.sep_str); + } +} + +const Path = @This(); +const std = @import("../../std.zig"); +const fs = std.fs; +const Allocator = std.mem.Allocator; +const Cache = std.Build.Cache; diff --git a/src/Package.zig b/src/Package.zig index 1bb02c5a5a..e173665e11 100644 --- a/src/Package.zig +++ b/src/Package.zig @@ -2,162 +2,3 @@ pub const Module = @import("Package/Module.zig"); pub const Fetch = @import("Package/Fetch.zig"); pub const build_zig_basename = "build.zig"; pub const Manifest = @import("Package/Manifest.zig"); - -pub const Path = struct { - root_dir: Cache.Directory, - /// The path, relative to the root dir, that this `Path` represents. - /// Empty string means the root_dir is the path. - sub_path: []const u8 = "", - - pub fn clone(p: Path, arena: Allocator) Allocator.Error!Path { - return .{ - .root_dir = try p.root_dir.clone(arena), - .sub_path = try arena.dupe(u8, p.sub_path), - }; - } - - pub fn cwd() Path { - return .{ .root_dir = Cache.Directory.cwd() }; - } - - pub fn join(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path { - if (sub_path.len == 0) return p; - const parts: []const []const u8 = - if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return .{ - .root_dir = p.root_dir, - .sub_path = try fs.path.join(arena, parts), - }; - } - - pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path { - if (sub_path.len == 0) return p; - return .{ - .root_dir = p.root_dir, - .sub_path = try fs.path.resolvePosix(arena, &.{ p.sub_path, sub_path }), - }; - } - - pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 { - const parts: []const []const u8 = - if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return p.root_dir.join(allocator, parts); - } - - pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 { - const parts: []const []const u8 = - if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path }; - return p.root_dir.joinZ(allocator, parts); - } - - pub fn openFile( - p: Path, - sub_path: []const u8, - flags: fs.File.OpenFlags, - ) !fs.File { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.openFile(joined_path, flags); - } - - pub fn makeOpenPath(p: Path, sub_path: []const u8, opts: fs.OpenDirOptions) !fs.Dir { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.makeOpenPath(joined_path, opts); - } - - pub fn statFile(p: Path, sub_path: []const u8) !fs.Dir.Stat { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.statFile(joined_path); - } - - pub fn atomicFile( - p: Path, - sub_path: []const u8, - options: fs.Dir.AtomicFileOptions, - buf: *[fs.MAX_PATH_BYTES]u8, - ) !fs.AtomicFile { - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.atomicFile(joined_path, options); - } - - pub fn access(p: Path, sub_path: []const u8, flags: fs.File.OpenFlags) !void { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.access(joined_path, flags); - } - - pub fn makePath(p: Path, sub_path: []const u8) !void { - var buf: [fs.MAX_PATH_BYTES]u8 = undefined; - const joined_path = if (p.sub_path.len == 0) sub_path else p: { - break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{ - p.sub_path, sub_path, - }) catch return error.NameTooLong; - }; - return p.root_dir.handle.makePath(joined_path); - } - - pub fn format( - self: Path, - comptime fmt_string: []const u8, - options: std.fmt.FormatOptions, - writer: anytype, - ) !void { - if (fmt_string.len == 1) { - // Quote-escape the string. - const stringEscape = std.zig.stringEscape; - const f = switch (fmt_string[0]) { - 'q' => "", - '\'' => '\'', - else => @compileError("unsupported format string: " ++ fmt_string), - }; - if (self.root_dir.path) |p| { - try stringEscape(p, f, options, writer); - if (self.sub_path.len > 0) try stringEscape(fs.path.sep_str, f, options, writer); - } - if (self.sub_path.len > 0) { - try stringEscape(self.sub_path, f, options, writer); - } - return; - } - if (fmt_string.len > 0) - std.fmt.invalidFmtError(fmt_string, self); - if (self.root_dir.path) |p| { - try writer.writeAll(p); - try writer.writeAll(fs.path.sep_str); - } - if (self.sub_path.len > 0) { - try writer.writeAll(self.sub_path); - try writer.writeAll(fs.path.sep_str); - } - } -}; - -const Package = @This(); -const builtin = @import("builtin"); -const std = @import("std"); -const fs = std.fs; -const Allocator = std.mem.Allocator; -const assert = std.debug.assert; -const Cache = std.Build.Cache; diff --git a/src/Package/Fetch.zig b/src/Package/Fetch.zig index a40bb539f7..d0cfd5ab94 100644 --- a/src/Package/Fetch.zig +++ b/src/Package/Fetch.zig @@ -33,7 +33,7 @@ location_tok: std.zig.Ast.TokenIndex, hash_tok: std.zig.Ast.TokenIndex, name_tok: std.zig.Ast.TokenIndex, lazy_status: LazyStatus, -parent_package_root: Package.Path, +parent_package_root: Cache.Path, parent_manifest_ast: ?*const std.zig.Ast, prog_node: *std.Progress.Node, job_queue: *JobQueue, @@ -50,7 +50,7 @@ allow_missing_paths_field: bool, /// This will either be relative to `global_cache`, or to the build root of /// the root package. -package_root: Package.Path, +package_root: Cache.Path, error_bundle: ErrorBundle.Wip, manifest: ?Manifest, manifest_ast: std.zig.Ast, @@ -263,7 +263,7 @@ pub const JobQueue = struct { pub const Location = union(enum) { remote: Remote, /// A directory found inside the parent package. - relative_path: Package.Path, + relative_path: Cache.Path, /// Recursive Fetch tasks will never use this Location, but it may be /// passed in by the CLI. Indicates the file contents here should be copied /// into the global package cache. It may be a file relative to the cwd or @@ -564,7 +564,7 @@ fn checkBuildFileExistence(f: *Fetch) RunError!void { } /// This function populates `f.manifest` or leaves it `null`. -fn loadManifest(f: *Fetch, pkg_root: Package.Path) RunError!void { +fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void { const eb = &f.error_bundle; const arena = f.arena.allocator(); const manifest_bytes = pkg_root.root_dir.handle.readFileAllocOptions( @@ -722,7 +722,7 @@ fn queueJobsForDeps(f: *Fetch) RunError!void { } pub fn relativePathDigest( - pkg_root: Package.Path, + pkg_root: Cache.Path, cache_root: Cache.Directory, ) Manifest.MultiHashHexDigest { var hasher = Manifest.Hash.init(.{}); @@ -1658,7 +1658,7 @@ const Filter = struct { }; pub fn depDigest( - pkg_root: Package.Path, + pkg_root: Cache.Path, cache_root: Cache.Directory, dep: Manifest.Dependency, ) ?Manifest.MultiHashHexDigest { diff --git a/src/Package/Module.zig b/src/Package/Module.zig index d6b89efb41..f9cb4475ad 100644 --- a/src/Package/Module.zig +++ b/src/Package/Module.zig @@ -3,7 +3,7 @@ //! to Zcu. https://github.com/ziglang/zig/issues/14307 /// Only files inside this directory can be imported. -root: Package.Path, +root: Cache.Path, /// Relative to `root`. May contain path separators. root_src_path: []const u8, /// Name used in compile errors. Looks like "root.foo.bar". @@ -69,7 +69,7 @@ pub const CreateOptions = struct { builtin_modules: ?*std.StringHashMapUnmanaged(*Module), pub const Paths = struct { - root: Package.Path, + root: Cache.Path, /// Relative to `root`. May contain path separators. root_src_path: []const u8, }; @@ -463,7 +463,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module { /// All fields correspond to `CreateOptions`. pub const LimitedOptions = struct { - root: Package.Path, + root: Cache.Path, root_src_path: []const u8, fully_qualified_name: []const u8, }; diff --git a/src/main.zig b/src/main.zig index be2083a0f8..9629d2bf18 100644 --- a/src/main.zig +++ b/src/main.zig @@ -6133,7 +6133,7 @@ fn cmdAstCheck( } file.mod = try Package.Module.createLimited(arena, .{ - .root = Package.Path.cwd(), + .root = Cache.Path.cwd(), .root_src_path = file.sub_file_path, .fully_qualified_name = "root", }); @@ -6306,7 +6306,7 @@ fn cmdChangelist( }; file.mod = try Package.Module.createLimited(arena, .{ - .root = Package.Path.cwd(), + .root = Cache.Path.cwd(), .root_src_path = file.sub_file_path, .fully_qualified_name = "root", }); -- cgit v1.2.3 From 2f4bbd6c637782eb985860255cf70011bbadd452 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 21 Mar 2024 19:53:24 -0700 Subject: std.Build.Cache: use an array hash map for files Rather than an ArrayList. Provides deduplication. --- lib/std/Build/Cache.zig | 216 +++++++++++++++++++++++++++++++++--------------- lib/std/Build/Step.zig | 2 +- src/Compilation.zig | 4 +- src/glibc.zig | 2 +- 4 files changed, 152 insertions(+), 72 deletions(-) (limited to 'lib/std/Build/Cache.zig') diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig index d846917fd3..cd57d3ad40 100644 --- a/lib/std/Build/Cache.zig +++ b/lib/std/Build/Cache.zig @@ -55,7 +55,15 @@ pub fn prefixes(cache: *const Cache) []const Directory { const PrefixedPath = struct { prefix: u8, - sub_path: []u8, + sub_path: []const u8, + + fn eql(a: PrefixedPath, b: PrefixedPath) bool { + return a.prefix == b.prefix and std.mem.eql(u8, a.sub_path, b.sub_path); + } + + fn hash(pp: PrefixedPath) u32 { + return @truncate(std.hash.Wyhash.hash(pp.prefix, pp.sub_path)); + } }; fn findPrefix(cache: *const Cache, file_path: []const u8) !PrefixedPath { @@ -132,7 +140,7 @@ pub const hasher_init: Hasher = Hasher.init(&[_]u8{ }); pub const File = struct { - prefixed_path: ?PrefixedPath, + prefixed_path: PrefixedPath, max_file_size: ?usize, stat: Stat, bin_digest: BinDigest, @@ -145,16 +153,18 @@ pub const File = struct { }; pub fn deinit(self: *File, gpa: Allocator) void { - if (self.prefixed_path) |pp| { - gpa.free(pp.sub_path); - self.prefixed_path = null; - } + gpa.free(self.prefixed_path.sub_path); if (self.contents) |contents| { gpa.free(contents); self.contents = null; } self.* = undefined; } + + pub fn updateMaxSize(file: *File, new_max_size: ?usize) void { + const new = new_max_size orelse return; + file.max_file_size = if (file.max_file_size) |old| @max(old, new) else new; + } }; pub const HashHelper = struct { @@ -296,7 +306,7 @@ pub const Manifest = struct { // order to obtain a problematic timestamp for the next call. Calls after that // will then use the same timestamp, to avoid unnecessary filesystem writes. want_refresh_timestamp: bool = true, - files: std.ArrayListUnmanaged(File) = .{}, + files: Files = .{}, hex_digest: HexDigest, /// Populated when hit() returns an error because of one /// of the files listed in the manifest. @@ -305,6 +315,34 @@ pub const Manifest = struct { /// what time the file system thinks it is, according to its own granularity. recent_problematic_timestamp: i128 = 0, + pub const Files = std.ArrayHashMapUnmanaged(File, void, FilesContext, false); + + pub const FilesContext = struct { + pub fn hash(fc: FilesContext, file: File) u32 { + _ = fc; + return file.prefixed_path.hash(); + } + + pub fn eql(fc: FilesContext, a: File, b: File, b_index: usize) bool { + _ = fc; + _ = b_index; + return a.prefixed_path.eql(b.prefixed_path); + } + }; + + const FilesAdapter = struct { + pub fn eql(context: @This(), a: PrefixedPath, b: File, b_index: usize) bool { + _ = context; + _ = b_index; + return a.eql(b.prefixed_path); + } + + pub fn hash(context: @This(), key: PrefixedPath) u32 { + _ = context; + return key.hash(); + } + }; + /// Add a file as a dependency of process being cached. When `hit` is /// called, the file's contents will be checked to ensure that it matches /// the contents from previous times. @@ -317,7 +355,7 @@ pub const Manifest = struct { /// to access the contents of the file after calling `hit()` like so: /// /// ``` - /// var file_contents = cache_hash.files.items[file_index].contents.?; + /// var file_contents = cache_hash.files.keys()[file_index].contents.?; /// ``` pub fn addFile(self: *Manifest, file_path: []const u8, max_file_size: ?usize) !usize { assert(self.manifest_file == null); @@ -327,7 +365,12 @@ pub const Manifest = struct { const prefixed_path = try self.cache.findPrefix(file_path); errdefer gpa.free(prefixed_path.sub_path); - self.files.addOneAssumeCapacity().* = .{ + const gop = self.files.getOrPutAssumeCapacityAdapted(prefixed_path, FilesAdapter{}); + if (gop.found_existing) { + gop.key_ptr.updateMaxSize(max_file_size); + return gop.index; + } + gop.key_ptr.* = .{ .prefixed_path = prefixed_path, .contents = null, .max_file_size = max_file_size, @@ -338,7 +381,7 @@ pub const Manifest = struct { self.hash.add(prefixed_path.prefix); self.hash.addBytes(prefixed_path.sub_path); - return self.files.items.len - 1; + return gop.index; } pub fn addOptionalFile(self: *Manifest, optional_file_path: ?[]const u8) !void { @@ -418,7 +461,7 @@ pub const Manifest = struct { self.want_refresh_timestamp = true; - const input_file_count = self.files.items.len; + const input_file_count = self.files.entries.len; while (true) : (self.unhit(bin_digest, input_file_count)) { const file_contents = try self.manifest_file.?.reader().readAllAlloc(gpa, manifest_file_size_max); defer gpa.free(file_contents); @@ -430,7 +473,7 @@ pub const Manifest = struct { if (try self.upgradeToExclusiveLock()) continue; self.manifest_dirty = true; while (idx < input_file_count) : (idx += 1) { - const ch_file = &self.files.items[idx]; + const ch_file = &self.files.keys()[idx]; self.populateFileHash(ch_file) catch |err| { self.failed_file_index = idx; return err; @@ -441,18 +484,6 @@ pub const Manifest = struct { while (line_iter.next()) |line| { defer idx += 1; - const cache_hash_file = if (idx < input_file_count) &self.files.items[idx] else blk: { - const new = try self.files.addOne(gpa); - new.* = .{ - .prefixed_path = null, - .contents = null, - .max_file_size = null, - .stat = undefined, - .bin_digest = undefined, - }; - break :blk new; - }; - var iter = mem.tokenizeScalar(u8, line, ' '); const size = iter.next() orelse return error.InvalidFormat; const inode = iter.next() orelse return error.InvalidFormat; @@ -461,30 +492,61 @@ pub const Manifest = struct { const prefix_str = iter.next() orelse return error.InvalidFormat; const file_path = iter.rest(); - cache_hash_file.stat.size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat; - cache_hash_file.stat.inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat; - cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; - _ = fmt.hexToBytes(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat; + const stat_size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat; + const stat_inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat; + const stat_mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat; + const file_bin_digest = b: { + if (digest_str.len != hex_digest_len) return error.InvalidFormat; + var bd: BinDigest = undefined; + _ = fmt.hexToBytes(&bd, digest_str) catch return error.InvalidFormat; + break :b bd; + }; + const prefix = fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidFormat; if (prefix >= self.cache.prefixes_len) return error.InvalidFormat; - if (file_path.len == 0) { - return error.InvalidFormat; - } - if (cache_hash_file.prefixed_path) |pp| { - if (pp.prefix != prefix or !mem.eql(u8, file_path, pp.sub_path)) { - return error.InvalidFormat; - } - } + if (file_path.len == 0) return error.InvalidFormat; - if (cache_hash_file.prefixed_path == null) { - cache_hash_file.prefixed_path = .{ + const cache_hash_file = f: { + const prefixed_path: PrefixedPath = .{ .prefix = prefix, - .sub_path = try gpa.dupe(u8, file_path), + .sub_path = file_path, // expires with file_contents }; - } + if (idx < input_file_count) { + const file = &self.files.keys()[idx]; + if (!file.prefixed_path.eql(prefixed_path)) + return error.InvalidFormat; + + file.stat = .{ + .size = stat_size, + .inode = stat_inode, + .mtime = stat_mtime, + }; + file.bin_digest = file_bin_digest; + break :f file; + } + const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{}); + errdefer assert(self.files.popOrNull() != null); + if (!gop.found_existing) { + gop.key_ptr.* = .{ + .prefixed_path = .{ + .prefix = prefix, + .sub_path = try gpa.dupe(u8, file_path), + }, + .contents = null, + .max_file_size = null, + .stat = .{ + .size = stat_size, + .inode = stat_inode, + .mtime = stat_mtime, + }, + .bin_digest = file_bin_digest, + }; + } + break :f gop.key_ptr; + }; - const pp = cache_hash_file.prefixed_path.?; + const pp = cache_hash_file.prefixed_path; const dir = self.cache.prefixes()[pp.prefix].handle; const this_file = dir.openFile(pp.sub_path, .{ .mode = .read_only }) catch |err| switch (err) { error.FileNotFound => { @@ -548,7 +610,7 @@ pub const Manifest = struct { if (try self.upgradeToExclusiveLock()) continue; self.manifest_dirty = true; while (idx < input_file_count) : (idx += 1) { - const ch_file = &self.files.items[idx]; + const ch_file = &self.files.keys()[idx]; self.populateFileHash(ch_file) catch |err| { self.failed_file_index = idx; return err; @@ -571,12 +633,12 @@ pub const Manifest = struct { self.hash.hasher.update(&bin_digest); // Remove files not in the initial hash. - for (self.files.items[input_file_count..]) |*file| { + for (self.files.keys()[input_file_count..]) |*file| { file.deinit(self.cache.gpa); } self.files.shrinkRetainingCapacity(input_file_count); - for (self.files.items) |file| { + for (self.files.keys()) |file| { self.hash.hasher.update(&file.bin_digest); } } @@ -616,7 +678,7 @@ pub const Manifest = struct { } fn populateFileHash(self: *Manifest, ch_file: *File) !void { - const pp = ch_file.prefixed_path.?; + const pp = ch_file.prefixed_path; const dir = self.cache.prefixes()[pp.prefix].handle; const file = try dir.openFile(pp.sub_path, .{}); defer file.close(); @@ -682,7 +744,7 @@ pub const Manifest = struct { .bin_digest = undefined, .contents = null, }; - errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); + errdefer self.files.shrinkRetainingCapacity(self.files.entries.len - 1); try self.populateFileHash(new_ch_file); @@ -690,9 +752,11 @@ pub const Manifest = struct { } /// Add a file as a dependency of process being cached, after the initial hash has been - /// calculated. This is useful for processes that don't know the all the files that - /// are depended on ahead of time. For example, a source file that can import other files - /// will need to be recompiled if the imported file is changed. + /// calculated. + /// + /// This is useful for processes that don't know the all the files that are + /// depended on ahead of time. For example, a source file that can import + /// other files will need to be recompiled if the imported file is changed. pub fn addFilePost(self: *Manifest, file_path: []const u8) !void { assert(self.manifest_file != null); @@ -700,17 +764,26 @@ pub const Manifest = struct { const prefixed_path = try self.cache.findPrefix(file_path); errdefer gpa.free(prefixed_path.sub_path); - const new_ch_file = try self.files.addOne(gpa); - new_ch_file.* = .{ + const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{}); + errdefer assert(self.files.popOrNull() != null); + + if (gop.found_existing) { + gpa.free(prefixed_path.sub_path); + return; + } + + gop.key_ptr.* = .{ .prefixed_path = prefixed_path, .max_file_size = null, .stat = undefined, .bin_digest = undefined, .contents = null, }; - errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); - try self.populateFileHash(new_ch_file); + self.files.lockPointers(); + defer self.files.unlockPointers(); + + try self.populateFileHash(gop.key_ptr); } /// Like `addFilePost` but when the file contents have already been loaded from disk. @@ -724,13 +797,20 @@ pub const Manifest = struct { assert(self.manifest_file != null); const gpa = self.cache.gpa; - const ch_file = try self.files.addOne(gpa); - errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); - const prefixed_path = try self.cache.findPrefixResolved(resolved_path); errdefer gpa.free(prefixed_path.sub_path); - ch_file.* = .{ + const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{}); + errdefer assert(self.files.popOrNull() != null); + + if (gop.found_existing) { + gpa.free(prefixed_path.sub_path); + return; + } + + const new_file = gop.key_ptr; + + new_file.* = .{ .prefixed_path = prefixed_path, .max_file_size = null, .stat = stat, @@ -738,19 +818,19 @@ pub const Manifest = struct { .contents = null, }; - if (self.isProblematicTimestamp(ch_file.stat.mtime)) { + if (self.isProblematicTimestamp(new_file.stat.mtime)) { // The actual file has an unreliable timestamp, force it to be hashed - ch_file.stat.mtime = 0; - ch_file.stat.inode = 0; + new_file.stat.mtime = 0; + new_file.stat.inode = 0; } { var hasher = hasher_init; hasher.update(bytes); - hasher.final(&ch_file.bin_digest); + hasher.final(&new_file.bin_digest); } - self.hash.hasher.update(&ch_file.bin_digest); + self.hash.hasher.update(&new_file.bin_digest); } pub fn addDepFilePost(self: *Manifest, dir: fs.Dir, dep_file_basename: []const u8) !void { @@ -816,14 +896,14 @@ pub const Manifest = struct { const writer = contents.writer(); try writer.writeAll(manifest_header ++ "\n"); - for (self.files.items) |file| { + for (self.files.keys()) |file| { try writer.print("{d} {d} {d} {} {d} {s}\n", .{ file.stat.size, file.stat.inode, file.stat.mtime, fmt.fmtSliceHexLower(&file.bin_digest), - file.prefixed_path.?.prefix, - file.prefixed_path.?.sub_path, + file.prefixed_path.prefix, + file.prefixed_path.sub_path, }); } @@ -892,7 +972,7 @@ pub const Manifest = struct { file.close(); } - for (self.files.items) |*file| { + for (self.files.keys()) |*file| { file.deinit(self.cache.gpa); } self.files.deinit(self.cache.gpa); @@ -1061,7 +1141,7 @@ test "check that changing a file makes cache fail" { // There should be nothing in the cache try testing.expectEqual(false, try ch.hit()); - try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?)); + try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.keys()[temp_file_idx].contents.?)); digest1 = ch.final(); @@ -1081,7 +1161,7 @@ test "check that changing a file makes cache fail" { try testing.expectEqual(false, try ch.hit()); // The cache system does not keep the contents of re-hashed input files. - try testing.expect(ch.files.items[temp_file_idx].contents == null); + try testing.expect(ch.files.keys()[temp_file_idx].contents == null); digest2 = ch.final(); diff --git a/lib/std/Build/Step.zig b/lib/std/Build/Step.zig index f67cba6c3f..48af9e54d6 100644 --- a/lib/std/Build/Step.zig +++ b/lib/std/Build/Step.zig @@ -544,7 +544,7 @@ pub fn cacheHit(s: *Step, man: *std.Build.Cache.Manifest) !bool { fn failWithCacheError(s: *Step, man: *const std.Build.Cache.Manifest, err: anyerror) anyerror { const i = man.failed_file_index orelse return err; - const pp = man.files.items[i].prefixed_path orelse return err; + const pp = man.files.keys()[i].prefixed_path; const prefix = man.cache.prefixes()[pp.prefix].path orelse ""; return s.fail("{s}: {s}/{s}", .{ @errorName(err), prefix, pp.sub_path }); } diff --git a/src/Compilation.zig b/src/Compilation.zig index 7a84848b80..2c047504e9 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1999,7 +1999,7 @@ pub fn update(comp: *Compilation, main_progress_node: *std.Progress.Node) !void const is_hit = man.hit() catch |err| { const i = man.failed_file_index orelse return err; - const pp = man.files.items[i].prefixed_path orelse return err; + const pp = man.files.keys()[i].prefixed_path; const prefix = man.cache.prefixes()[pp.prefix]; return comp.setMiscFailure( .check_whole_cache, @@ -4147,7 +4147,7 @@ pub fn cImport(comp: *Compilation, c_src: []const u8, owner_mod: *Package.Module const prev_hash_state = man.hash.peekBin(); const actual_hit = hit: { _ = try man.hit(); - if (man.files.items.len == 0) { + if (man.files.entries.len == 0) { man.unhit(prev_hash_state, 0); break :hit false; } diff --git a/src/glibc.zig b/src/glibc.zig index 9765e0ad78..365f5cab2b 100644 --- a/src/glibc.zig +++ b/src/glibc.zig @@ -713,7 +713,7 @@ pub fn buildSharedObjects(comp: *Compilation, prog_node: *std.Progress.Node) !vo }; defer o_directory.handle.close(); - const abilists_contents = man.files.items[abilists_index].contents.?; + const abilists_contents = man.files.keys()[abilists_index].contents.?; const metadata = try loadMetaData(comp.gpa, abilists_contents); defer metadata.destroy(comp.gpa); -- cgit v1.2.3 From 950359071bca707dbc9763f1bf3ebc79cd52ebca Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 21 Mar 2024 19:56:47 -0700 Subject: std.Build.Cache: bump manifest_file_size_max to 100M Some users are hitting this limit. I think it's primarily due to not deduplicating (solved in the previous commit) but this seems like a better limit regardless. --- lib/std/Build/Cache.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/std/Build/Cache.zig') diff --git a/lib/std/Build/Cache.zig b/lib/std/Build/Cache.zig index cd57d3ad40..c4145a9f3f 100644 --- a/lib/std/Build/Cache.zig +++ b/lib/std/Build/Cache.zig @@ -122,7 +122,7 @@ pub const HexDigest = [hex_digest_len]u8; /// This is currently just an arbitrary non-empty string that can't match another manifest line. const manifest_header = "0"; -const manifest_file_size_max = 50 * 1024 * 1024; +const manifest_file_size_max = 100 * 1024 * 1024; /// The type used for hashing file contents. Currently, this is SipHash128(1, 3), because it /// provides enough collision resistance for the Manifest use cases, while being one of our -- cgit v1.2.3