From e3bed8d81dfd7198dd4c496f19a6791e27e41f26 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 29 Dec 2021 20:49:16 -0700 Subject: stage2: introduce CacheMode The two CacheMode values are `whole` and `incremental`. `incremental` is what we had before; `whole` is new. Whole cache mode uses everything as inputs to the cache hash; and when a hit occurs it skips everything including linking. This is ideal for when source files change rarely and for backends that do not have good incremental compilation support, for example compiler-rt or libc compiled with LLVM with optimizations on. This is the main motivation for the additional mode, so that we can have LLVM-optimized compiler-rt/libc builds, without waiting for the LLVM backend every single time Zig is invoked. Incremental cache mode hashes only the input file path and a few target options, intentionally relying on collisions to locate already-existing build artifacts which can then be incrementally updated. The bespoke logic for caching stage1 backend build artifacts is removed since we now have a global caching mechanism for when we want to cache the entire compilation, *including* linking. Previously we had to get "creative" with libs.txt and a special byte in the hash id to communicate flags, so that when the cached artifacts were re-linked, we had this information from stage1 even though we didn't actually run it. Now that `CacheMode.whole` includes linking, this extra information does not need to be preserved for cache hits. So although this changeset introduces complexity, it also removes complexity. The main trickiness here comes from the inherent differences between the two modes: `incremental` wants a directory immediately to operate on, while `whole` doesn't know the output directory until the compilation is complete. This commit deals with this problem mostly inside `update()`, where, on a cache miss, it replaces `zig_cache_artifact_directory` with a temporary directory, and then renames it into place once the compilation is complete. Items remaining before this branch can be merged: * [ ] make sure these things make it into the cache manifest: - @import files - @embedFile files - we already add dep files from c but make sure the main .c files make it in there too, not just the included files * [ ] double check that the emit paths of other things besides the binary are working correctly. * [ ] test `-fno-emit-bin` + `-fstage1` * [ ] test `-femit-bin=foo` + `-fstage1` * [ ] implib emit directory copies bin_file_emit directory in create() and needs to be adjusted to be overridden as well. * [ ] make sure emit-h is handled correctly in the cache hash * [ ] Cache: detect duplicate files added to the manifest Some preliminary performance measurements of wall clock time and peak RSS used: stage1 behavior (1077 tests), llvm backend, release build: * cold global cache: 4.6s, 1.1 GiB * warm global cache: 3.4s, 980 MiB stage2 master branch behavior (575 tests), llvm backend, release build: * cold global cache: 0.62s, 191 MiB * warm global cache: 0.40s, 128 MiB stage2 this branch behavior (575 tests), llvm backend, release build: * cold global cache: 0.62s, 179 MiB * warm global cache: 0.27s, 90 MiB --- src/Module.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Module.zig') diff --git a/src/Module.zig b/src/Module.zig index ef01aa2c54..fa79783c6a 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -33,7 +33,7 @@ const build_options = @import("build_options"); gpa: Allocator, comp: *Compilation, -/// Where our incremental compilation metadata serialization will go. +/// Where build artifacts and incremental compilation metadata serialization go. zig_cache_artifact_directory: Compilation.Directory, /// Pointer to externally managed resource. root_pkg: *Package, -- cgit v1.2.3 From e36718165cdc29b777392a3a343d92ccd1c6acf3 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 30 Dec 2021 16:42:32 -0700 Subject: stage2: add `@import` and `@embedFile` to CacheHash when using `CacheMode.whole`. Also, I verified that `addDepFilePost` is in fact including the original C source file in addition to the files it depends on. --- src/Cache.zig | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--- src/Module.zig | 23 +++++++++++++++++++++++ 2 files changed, 77 insertions(+), 3 deletions(-) (limited to 'src/Module.zig') diff --git a/src/Cache.zig b/src/Cache.zig index 94ad947f69..a5995f64ea 100644 --- a/src/Cache.zig +++ b/src/Cache.zig @@ -47,10 +47,16 @@ pub const hasher_init: Hasher = Hasher.init(&[_]u8{0} ** Hasher.key_length); pub const File = struct { path: ?[]const u8, max_file_size: ?usize, - stat: fs.File.Stat, + stat: Stat, bin_digest: BinDigest, contents: ?[]const u8, + pub const Stat = struct { + inode: fs.File.INode, + size: u64, + mtime: i128, + }; + pub fn deinit(self: *File, allocator: Allocator) void { if (self.path) |owned_slice| { allocator.free(owned_slice); @@ -424,7 +430,11 @@ pub const Manifest = struct { if (!size_match or !mtime_match or !inode_match) { self.manifest_dirty = true; - cache_hash_file.stat = actual_stat; + cache_hash_file.stat = .{ + .size = actual_stat.size, + .mtime = actual_stat.mtime, + .inode = actual_stat.inode, + }; if (self.isProblematicTimestamp(cache_hash_file.stat.mtime)) { // The actual file has an unreliable timestamp, force it to be hashed @@ -530,7 +540,12 @@ pub const Manifest = struct { const file = try fs.cwd().openFile(ch_file.path.?, .{}); defer file.close(); - ch_file.stat = try file.stat(); + const actual_stat = try file.stat(); + ch_file.stat = .{ + .size = actual_stat.size, + .mtime = actual_stat.mtime, + .inode = actual_stat.inode, + }; if (self.isProblematicTimestamp(ch_file.stat.mtime)) { // The actual file has an unreliable timestamp, force it to be hashed @@ -615,6 +630,42 @@ pub const Manifest = struct { try self.populateFileHash(new_ch_file); } + /// Like `addFilePost` but when the file contents have already been loaded from disk. + /// On success, cache takes ownership of `resolved_path`. + pub fn addFilePostContents( + self: *Manifest, + resolved_path: []const u8, + bytes: []const u8, + stat: File.Stat, + ) error{OutOfMemory}!void { + assert(self.manifest_file != null); + + const ch_file = try self.files.addOne(self.cache.gpa); + errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1); + + ch_file.* = .{ + .path = resolved_path, + .max_file_size = null, + .stat = stat, + .bin_digest = undefined, + .contents = null, + }; + + if (self.isProblematicTimestamp(ch_file.stat.mtime)) { + // The actual file has an unreliable timestamp, force it to be hashed + ch_file.stat.mtime = 0; + ch_file.stat.inode = 0; + } + + { + var hasher = hasher_init; + hasher.update(bytes); + hasher.final(&ch_file.bin_digest); + } + + self.hash.hasher.update(&ch_file.bin_digest); + } + pub fn addDepFilePost(self: *Manifest, dir: fs.Dir, dep_file_basename: []const u8) !void { assert(self.manifest_file != null); diff --git a/src/Module.zig b/src/Module.zig index fa79783c6a..6742ddd486 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -3380,6 +3380,19 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void { error.OutOfMemory => return error.OutOfMemory, error.AnalysisFail => {}, } + + if (mod.comp.whole_cache_manifest) |man| { + assert(file.source_loaded); + const resolved_path = try file.pkg.root_src_directory.join(gpa, &.{ + file.sub_file_path, + }); + errdefer gpa.free(resolved_path); + try man.addFilePostContents(resolved_path, file.source, .{ + .size = file.stat_size, + .inode = file.stat_inode, + .mtime = file.stat_mtime, + }); + } } else { new_decl.analysis = .file_failure; } @@ -3836,6 +3849,16 @@ pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*Emb resolved_root_path, resolved_path, sub_file_path, rel_file_path, }); + if (mod.comp.whole_cache_manifest) |man| { + const copied_resolved_path = try gpa.dupe(u8, resolved_path); + errdefer gpa.free(copied_resolved_path); + try man.addFilePostContents(copied_resolved_path, bytes, .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }); + } + keep_resolved_path = true; // It's now owned by embed_table. gop.value_ptr.* = new_file; new_file.* = .{ -- cgit v1.2.3 From 0ad2a99675d331c686847a7b2a84feddfcce6573 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 30 Dec 2021 20:49:02 -0700 Subject: stage2: CacheMode.whole: trigger loading zig source files Previously the code asserted source files were already loaded, but this is not the case when cached ZIR is loaded. Now it will trigger .zig source code to be loaded for the purposes of hashing the source for `CacheMode.whole`. This additionally refactors stat_size, stat_inode, and stat_mtime fields into using the `Cache.File.Stat` struct. --- src/Compilation.zig | 4 +- src/Module.zig | 156 ++++++++++++++++++++++++++++++++++------------------ src/main.zig | 34 ++++++------ 3 files changed, 121 insertions(+), 73 deletions(-) (limited to 'src/Module.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index ddf331d4f3..e71c84af33 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -421,7 +421,7 @@ pub const AllErrors = struct { const module_note = module_err_msg.notes[i]; const source = try module_note.src_loc.file_scope.getSource(module.gpa); const byte_offset = try module_note.src_loc.byteOffset(module.gpa); - const loc = std.zig.findLineColumn(source, byte_offset); + const loc = std.zig.findLineColumn(source.bytes, byte_offset); const file_path = try module_note.src_loc.file_scope.fullPath(allocator); note.* = .{ .src = .{ @@ -444,7 +444,7 @@ pub const AllErrors = struct { } const source = try module_err_msg.src_loc.file_scope.getSource(module.gpa); const byte_offset = try module_err_msg.src_loc.byteOffset(module.gpa); - const loc = std.zig.findLineColumn(source, byte_offset); + const loc = std.zig.findLineColumn(source.bytes, byte_offset); const file_path = try module_err_msg.src_loc.file_scope.fullPath(allocator); try errors.append(.{ .src = .{ diff --git a/src/Module.zig b/src/Module.zig index 6742ddd486..0cbf75c735 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -1463,11 +1463,7 @@ pub const File = struct { /// Whether this is populated depends on `source_loaded`. source: [:0]const u8, /// Whether this is populated depends on `status`. - stat_size: u64, - /// Whether this is populated depends on `status`. - stat_inode: std.fs.File.INode, - /// Whether this is populated depends on `status`. - stat_mtime: i128, + stat: Cache.File.Stat, /// Whether this is populated or not depends on `tree_loaded`. tree: Ast, /// Whether this is populated or not depends on `zir_loaded`. @@ -1535,8 +1531,16 @@ pub const File = struct { file.* = undefined; } - pub fn getSource(file: *File, gpa: Allocator) ![:0]const u8 { - if (file.source_loaded) return file.source; + pub const Source = struct { + bytes: [:0]const u8, + stat: Cache.File.Stat, + }; + + pub fn getSource(file: *File, gpa: Allocator) !Source { + if (file.source_loaded) return Source{ + .bytes = file.source, + .stat = file.stat, + }; const root_dir_path = file.pkg.root_src_directory.path orelse "."; log.debug("File.getSource, not cached. pkgdir={s} sub_file_path={s}", .{ @@ -1565,14 +1569,21 @@ pub const File = struct { file.source = source; file.source_loaded = true; - return source; + return Source{ + .bytes = source, + .stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }, + }; } pub fn getTree(file: *File, gpa: Allocator) !*const Ast { if (file.tree_loaded) return &file.tree; const source = try file.getSource(gpa); - file.tree = try std.zig.parse(gpa, source); + file.tree = try std.zig.parse(gpa, source.bytes); file.tree_loaded = true; return &file.tree; } @@ -1631,9 +1642,7 @@ pub const EmbedFile = struct { /// Memory is stored in gpa, owned by EmbedFile. sub_file_path: []const u8, bytes: [:0]const u8, - stat_size: u64, - stat_inode: std.fs.File.INode, - stat_mtime: i128, + stat: Cache.File.Stat, /// Package that this file is a part of, managed externally. pkg: *Package, /// The Decl that was created from the `@embedFile` to own this resource. @@ -2704,9 +2713,11 @@ pub fn astGenFile(mod: *Module, file: *File) !void { keep_zir = true; file.zir = zir; file.zir_loaded = true; - file.stat_size = header.stat_size; - file.stat_inode = header.stat_inode; - file.stat_mtime = header.stat_mtime; + file.stat = .{ + .size = header.stat_size, + .inode = header.stat_inode, + .mtime = header.stat_mtime, + }; file.status = .success_zir; log.debug("AstGen cached success: {s}", .{file.sub_file_path}); @@ -2724,9 +2735,9 @@ pub fn astGenFile(mod: *Module, file: *File) !void { }, .parse_failure, .astgen_failure, .success_zir => { const unchanged_metadata = - stat.size == file.stat_size and - stat.mtime == file.stat_mtime and - stat.inode == file.stat_inode; + stat.size == file.stat.size and + stat.mtime == file.stat.mtime and + stat.inode == file.stat.inode; if (unchanged_metadata) { log.debug("unmodified metadata of file: {s}", .{file.sub_file_path}); @@ -2787,9 +2798,11 @@ pub fn astGenFile(mod: *Module, file: *File) !void { if (amt != stat.size) return error.UnexpectedEndOfFile; - file.stat_size = stat.size; - file.stat_inode = stat.inode; - file.stat_mtime = stat.mtime; + file.stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }; file.source = source; file.source_loaded = true; @@ -3069,9 +3082,11 @@ pub fn populateBuiltinFile(mod: *Module) !void { try writeBuiltinFile(file, builtin_pkg); } else { - file.stat_size = stat.size; - file.stat_inode = stat.inode; - file.stat_mtime = stat.mtime; + file.stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }; } } else |err| switch (err) { error.BadPathName => unreachable, // it's always "builtin.zig" @@ -3099,9 +3114,11 @@ pub fn writeBuiltinFile(file: *File, builtin_pkg: *Package) !void { try af.file.writeAll(file.source); try af.finish(); - file.stat_size = file.source.len; - file.stat_inode = 0; // dummy value - file.stat_mtime = 0; // dummy value + file.stat = .{ + .size = file.source.len, + .inode = 0, // dummy value + .mtime = 0, // dummy value + }; } pub fn mapOldZirToNew( @@ -3382,16 +3399,16 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void { } if (mod.comp.whole_cache_manifest) |man| { - assert(file.source_loaded); + const source = file.getSource(gpa) catch |err| { + try reportRetryableFileError(mod, file, "unable to load source: {s}", .{@errorName(err)}); + return error.AnalysisFail; + }; const resolved_path = try file.pkg.root_src_directory.join(gpa, &.{ file.sub_file_path, }); errdefer gpa.free(resolved_path); - try man.addFilePostContents(resolved_path, file.source, .{ - .size = file.stat_size, - .inode = file.stat_inode, - .mtime = file.stat_mtime, - }); + + try man.addFilePostContents(resolved_path, source.bytes, source.stat); } } else { new_decl.analysis = .file_failure; @@ -3723,9 +3740,7 @@ pub fn importPkg(mod: *Module, pkg: *Package) !ImportFileResult { .source_loaded = false, .tree_loaded = false, .zir_loaded = false, - .stat_size = undefined, - .stat_inode = undefined, - .stat_mtime = undefined, + .stat = undefined, .tree = undefined, .zir = undefined, .status = .never_loaded, @@ -3793,9 +3808,7 @@ pub fn importFile( .source_loaded = false, .tree_loaded = false, .zir_loaded = false, - .stat_size = undefined, - .stat_inode = undefined, - .stat_mtime = undefined, + .stat = undefined, .tree = undefined, .zir = undefined, .status = .never_loaded, @@ -3840,8 +3853,13 @@ pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*Emb var file = try cur_file.pkg.root_src_directory.handle.openFile(sub_file_path, .{}); defer file.close(); - const stat = try file.stat(); - const size_usize = try std.math.cast(usize, stat.size); + const actual_stat = try file.stat(); + const stat: Cache.File.Stat = .{ + .size = actual_stat.size, + .inode = actual_stat.inode, + .mtime = actual_stat.mtime, + }; + const size_usize = try std.math.cast(usize, actual_stat.size); const bytes = try file.readToEndAllocOptions(gpa, std.math.maxInt(u32), size_usize, 1, 0); errdefer gpa.free(bytes); @@ -3852,11 +3870,7 @@ pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*Emb if (mod.comp.whole_cache_manifest) |man| { const copied_resolved_path = try gpa.dupe(u8, resolved_path); errdefer gpa.free(copied_resolved_path); - try man.addFilePostContents(copied_resolved_path, bytes, .{ - .size = stat.size, - .inode = stat.inode, - .mtime = stat.mtime, - }); + try man.addFilePostContents(copied_resolved_path, bytes, stat); } keep_resolved_path = true; // It's now owned by embed_table. @@ -3864,9 +3878,7 @@ pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*Emb new_file.* = .{ .sub_file_path = sub_file_path, .bytes = bytes, - .stat_size = stat.size, - .stat_inode = stat.inode, - .stat_mtime = stat.mtime, + .stat = stat, .pkg = cur_file.pkg, .owner_decl = undefined, // Set by Sema immediately after this function returns. }; @@ -3880,9 +3892,9 @@ pub fn detectEmbedFileUpdate(mod: *Module, embed_file: *EmbedFile) !void { const stat = try file.stat(); const unchanged_metadata = - stat.size == embed_file.stat_size and - stat.mtime == embed_file.stat_mtime and - stat.inode == embed_file.stat_inode; + stat.size == embed_file.stat.size and + stat.mtime == embed_file.stat.mtime and + stat.inode == embed_file.stat.inode; if (unchanged_metadata) return; @@ -3891,9 +3903,11 @@ pub fn detectEmbedFileUpdate(mod: *Module, embed_file: *EmbedFile) !void { const bytes = try file.readToEndAllocOptions(gpa, std.math.maxInt(u32), size_usize, 1, 0); gpa.free(embed_file.bytes); embed_file.bytes = bytes; - embed_file.stat_size = stat.size; - embed_file.stat_mtime = stat.mtime; - embed_file.stat_inode = stat.inode; + embed_file.stat = .{ + .size = stat.size, + .mtime = stat.mtime, + .inode = stat.inode, + }; mod.comp.mutex.lock(); defer mod.comp.mutex.unlock(); @@ -5024,3 +5038,35 @@ pub fn linkerUpdateDecl(mod: *Module, decl: *Decl) !void { }, }; } + +fn reportRetryableFileError( + mod: *Module, + file: *File, + comptime format: []const u8, + args: anytype, +) error{OutOfMemory}!void { + file.status = .retryable_failure; + + const err_msg = try ErrorMsg.create( + mod.gpa, + .{ + .file_scope = file, + .parent_decl_node = 0, + .lazy = .entire_file, + }, + format, + args, + ); + errdefer err_msg.destroy(mod.gpa); + + mod.comp.mutex.lock(); + defer mod.comp.mutex.unlock(); + + const gop = try mod.failed_files.getOrPut(mod.gpa, file); + if (gop.found_existing) { + if (gop.value_ptr.*) |old_err_msg| { + old_err_msg.destroy(mod.gpa); + } + } + gop.value_ptr.* = err_msg; +} diff --git a/src/main.zig b/src/main.zig index 6f55e46eb1..4747772b8a 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3664,9 +3664,7 @@ pub fn cmdFmt(gpa: Allocator, arena: Allocator, args: []const []const u8) !void .zir_loaded = false, .sub_file_path = "", .source = source_code, - .stat_size = undefined, - .stat_inode = undefined, - .stat_mtime = undefined, + .stat = undefined, .tree = tree, .tree_loaded = true, .zir = undefined, @@ -3860,9 +3858,11 @@ fn fmtPathFile( .zir_loaded = false, .sub_file_path = file_path, .source = source_code, - .stat_size = stat.size, - .stat_inode = stat.inode, - .stat_mtime = stat.mtime, + .stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }, .tree = tree, .tree_loaded = true, .zir = undefined, @@ -4458,9 +4458,7 @@ pub fn cmdAstCheck( .zir_loaded = false, .sub_file_path = undefined, .source = undefined, - .stat_size = undefined, - .stat_inode = undefined, - .stat_mtime = undefined, + .stat = undefined, .tree = undefined, .zir = undefined, .pkg = undefined, @@ -4485,9 +4483,11 @@ pub fn cmdAstCheck( file.sub_file_path = file_name; file.source = source; file.source_loaded = true; - file.stat_size = stat.size; - file.stat_inode = stat.inode; - file.stat_mtime = stat.mtime; + file.stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }; } else { const stdin = io.getStdIn(); const source = readSourceFileToEndAlloc(arena, &stdin, null) catch |err| { @@ -4496,7 +4496,7 @@ pub fn cmdAstCheck( file.sub_file_path = ""; file.source = source; file.source_loaded = true; - file.stat_size = source.len; + file.stat.size = source.len; } file.pkg = try Package.create(gpa, null, file.sub_file_path); @@ -4609,9 +4609,11 @@ pub fn cmdChangelist( .zir_loaded = false, .sub_file_path = old_source_file, .source = undefined, - .stat_size = stat.size, - .stat_inode = stat.inode, - .stat_mtime = stat.mtime, + .stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }, .tree = undefined, .zir = undefined, .pkg = undefined, -- cgit v1.2.3