From e3bed8d81dfd7198dd4c496f19a6791e27e41f26 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 29 Dec 2021 20:49:16 -0700 Subject: stage2: introduce CacheMode The two CacheMode values are `whole` and `incremental`. `incremental` is what we had before; `whole` is new. Whole cache mode uses everything as inputs to the cache hash; and when a hit occurs it skips everything including linking. This is ideal for when source files change rarely and for backends that do not have good incremental compilation support, for example compiler-rt or libc compiled with LLVM with optimizations on. This is the main motivation for the additional mode, so that we can have LLVM-optimized compiler-rt/libc builds, without waiting for the LLVM backend every single time Zig is invoked. Incremental cache mode hashes only the input file path and a few target options, intentionally relying on collisions to locate already-existing build artifacts which can then be incrementally updated. The bespoke logic for caching stage1 backend build artifacts is removed since we now have a global caching mechanism for when we want to cache the entire compilation, *including* linking. Previously we had to get "creative" with libs.txt and a special byte in the hash id to communicate flags, so that when the cached artifacts were re-linked, we had this information from stage1 even though we didn't actually run it. Now that `CacheMode.whole` includes linking, this extra information does not need to be preserved for cache hits. So although this changeset introduces complexity, it also removes complexity. The main trickiness here comes from the inherent differences between the two modes: `incremental` wants a directory immediately to operate on, while `whole` doesn't know the output directory until the compilation is complete. This commit deals with this problem mostly inside `update()`, where, on a cache miss, it replaces `zig_cache_artifact_directory` with a temporary directory, and then renames it into place once the compilation is complete. Items remaining before this branch can be merged: * [ ] make sure these things make it into the cache manifest: - @import files - @embedFile files - we already add dep files from c but make sure the main .c files make it in there too, not just the included files * [ ] double check that the emit paths of other things besides the binary are working correctly. * [ ] test `-fno-emit-bin` + `-fstage1` * [ ] test `-femit-bin=foo` + `-fstage1` * [ ] implib emit directory copies bin_file_emit directory in create() and needs to be adjusted to be overridden as well. * [ ] make sure emit-h is handled correctly in the cache hash * [ ] Cache: detect duplicate files added to the manifest Some preliminary performance measurements of wall clock time and peak RSS used: stage1 behavior (1077 tests), llvm backend, release build: * cold global cache: 4.6s, 1.1 GiB * warm global cache: 3.4s, 980 MiB stage2 master branch behavior (575 tests), llvm backend, release build: * cold global cache: 0.62s, 191 MiB * warm global cache: 0.40s, 128 MiB stage2 this branch behavior (575 tests), llvm backend, release build: * cold global cache: 0.62s, 179 MiB * warm global cache: 0.27s, 90 MiB --- src/Compilation.zig | 589 +++++++++++++++++++++++++++++++++------------------- 1 file changed, 380 insertions(+), 209 deletions(-) (limited to 'src/Compilation.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index cd4794554e..8afb373699 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -41,8 +41,8 @@ gpa: Allocator, arena_state: std.heap.ArenaAllocator.State, bin_file: *link.File, c_object_table: std.AutoArrayHashMapUnmanaged(*CObject, void) = .{}, -stage1_lock: ?Cache.Lock = null, -stage1_cache_manifest: *Cache.Manifest = undefined, +/// This is a pointer to a local variable inside `update()`. +whole_cache_manifest: ?*Cache.Manifest = null, link_error_flags: link.File.ErrorFlags = .{}, @@ -98,6 +98,9 @@ clang_argv: []const []const u8, cache_parent: *Cache, /// Path to own executable for invoking `zig clang`. self_exe_path: ?[]const u8, +/// null means -fno-emit-bin. Contains the basename of the +/// outputted binary file in case we don't know the directory yet. +whole_bin_basename: ?[]const u8, zig_lib_directory: Directory, local_cache_directory: Directory, global_cache_directory: Directory, @@ -612,6 +615,15 @@ pub const Directory = struct { return std.fs.path.joinZ(allocator, paths); } } + + /// Whether or not the handle should be closed, or the path should be freed + /// is determined by usage, however this function is provided for convenience + /// if it happens to be what the caller needs. + pub fn closeAndFree(self: *Directory, gpa: Allocator) void { + self.handle.close(); + if (self.path) |p| gpa.free(p); + self.* = undefined; + } }; pub const EmitLoc = struct { @@ -631,6 +643,7 @@ pub const ClangPreprocessorMode = enum { }; pub const SystemLib = link.SystemLib; +pub const CacheMode = link.CacheMode; pub const InitOptions = struct { zig_lib_directory: Directory, @@ -668,6 +681,7 @@ pub const InitOptions = struct { /// is externally modified - essentially anything other than zig-cache - then /// this flag would be set to disable this machinery to avoid false positives. disable_lld_caching: bool = false, + cache_mode: CacheMode = .incremental, object_format: ?std.Target.ObjectFormat = null, optimize_mode: std.builtin.Mode = .Debug, keep_source_files_loaded: bool = false, @@ -885,6 +899,8 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { break :blk build_options.is_stage1; }; + const cache_mode = if (use_stage1) CacheMode.whole else options.cache_mode; + // Make a decision on whether to use LLVM or our own backend. const use_llvm = build_options.have_llvm and blk: { if (options.use_llvm) |explicit| @@ -1219,18 +1235,26 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { // modified between incremental updates. var hash = cache.hash; - // Here we put the root source file path name, but *not* with addFile. We want the - // hash to be the same regardless of the contents of the source file, because - // incremental compilation will handle it, but we do want to namespace different - // source file names because they are likely different compilations and therefore this - // would be likely to cause cache hits. - hash.addBytes(main_pkg.root_src_path); - hash.addOptionalBytes(main_pkg.root_src_directory.path); - { - var local_arena = std.heap.ArenaAllocator.init(gpa); - defer local_arena.deinit(); - var seen_table = std.AutoHashMap(*Package, void).init(local_arena.allocator()); - try addPackageTableToCacheHash(&hash, &local_arena, main_pkg.table, &seen_table, .path_bytes); + switch (cache_mode) { + .incremental => { + // Here we put the root source file path name, but *not* with addFile. + // We want the hash to be the same regardless of the contents of the + // source file, because incremental compilation will handle it, but we + // do want to namespace different source file names because they are + // likely different compilations and therefore this would be likely to + // cause cache hits. + hash.addBytes(main_pkg.root_src_path); + hash.addOptionalBytes(main_pkg.root_src_directory.path); + { + var seen_table = std.AutoHashMap(*Package, void).init(arena); + try addPackageTableToCacheHash(&hash, &arena_allocator, main_pkg.table, &seen_table, .path_bytes); + } + }, + .whole => { + // In this case, we postpone adding the input source file until + // we create the cache manifest, in update(), because we want to + // track it and packages as files. + }, } hash.add(valgrind); hash.add(single_threaded); @@ -1238,9 +1262,35 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { hash.add(use_llvm); hash.add(dll_export_fns); hash.add(options.is_test); + hash.add(options.test_evented_io); + hash.addOptionalBytes(options.test_filter); + hash.addOptionalBytes(options.test_name_prefix); hash.add(options.skip_linker_dependencies); hash.add(options.parent_compilation_link_libc); + // In the case of incremental cache mode, this `zig_cache_artifact_directory` + // is computed based on a hash of non-linker inputs, and it is where all + // build artifacts are stored (even while in-progress). + // + // For whole cache mode, it is still used for builtin.zig so that the file + // path to builtin.zig can remain consistent during a debugging session at + // runtime. However, we don't know where to put outputs from the linker + // or stage1 backend object files until the final cache hash, which is available + // after the compilation is complete. + // + // Therefore, in whole cache mode, we additionally create a temporary cache + // directory for these two kinds of build artifacts, and then rename it + // into place after the final hash is known. However, we don't want + // to create the temporary directory here, because in the case of a cache hit, + // this would have been wasted syscalls to make the directory and then not + // use it (or delete it). + // + // In summary, for whole cache mode, we simulate `-fno-emit-bin` in this + // function, and `zig_cache_artifact_directory` is *wrong* except for builtin.zig, + // and then at the beginning of `update()` when we find out whether we need + // a temporary directory, we patch up all the places that the incorrect + // `zig_cache_artifact_directory` was passed to various components of the compiler. + const digest = hash.final(); const artifact_sub_dir = try std.fs.path.join(arena, &[_][]const u8{ "o", &digest }); var artifact_dir = try options.local_cache_directory.handle.makeOpenPath(artifact_sub_dir, .{}); @@ -1374,6 +1424,11 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { }; } + switch (cache_mode) { + .whole => break :blk null, + .incremental => {}, + } + if (module) |zm| { break :blk link.Emit{ .directory = zm.zig_cache_artifact_directory, @@ -1425,6 +1480,18 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { }; }; + // This is so that when doing `CacheMode.whole`, the mechanism in update() + // can use it for communicating the result directory via `bin_file.emit`. + // This is used to distinguish between -fno-emit-bin and -femit-bin + // for `CacheMode.whole`. + const whole_bin_basename: ?[]const u8 = if (options.emit_bin) |x| + if (x.directory == null) + x.basename + else + null + else + null; + var system_libs: std.StringArrayHashMapUnmanaged(SystemLib) = .{}; errdefer system_libs.deinit(gpa); try system_libs.ensureTotalCapacity(gpa, options.system_lib_names.len); @@ -1512,7 +1579,8 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { .skip_linker_dependencies = options.skip_linker_dependencies, .parent_compilation_link_libc = options.parent_compilation_link_libc, .each_lib_rpath = options.each_lib_rpath orelse options.is_native_os, - .disable_lld_caching = options.disable_lld_caching, + .cache_mode = cache_mode, + .disable_lld_caching = options.disable_lld_caching or cache_mode == .whole, .subsystem = options.subsystem, .is_test = options.is_test, .wasi_exec_model = wasi_exec_model, @@ -1529,6 +1597,7 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { .local_cache_directory = options.local_cache_directory, .global_cache_directory = options.global_cache_directory, .bin_file = bin_file, + .whole_bin_basename = whole_bin_basename, .emit_asm = options.emit_asm, .emit_llvm_ir = options.emit_llvm_ir, .emit_llvm_bc = options.emit_llvm_bc, @@ -1725,20 +1794,11 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { return comp; } -fn releaseStage1Lock(comp: *Compilation) void { - if (comp.stage1_lock) |*lock| { - lock.release(); - comp.stage1_lock = null; - } -} - pub fn destroy(self: *Compilation) void { const optional_module = self.bin_file.options.module; self.bin_file.destroy(); if (optional_module) |module| module.deinit(); - self.releaseStage1Lock(); - const gpa = self.gpa; self.work_queue.deinit(); self.anon_work_queue.deinit(); @@ -1815,22 +1875,135 @@ pub fn getTarget(self: Compilation) Target { return self.bin_file.options.target; } +fn restorePrevZigCacheArtifactDirectory(comp: *Compilation, directory: *Directory) void { + if (directory.path) |p| comp.gpa.free(p); + + // Restore the Module's previous zig_cache_artifact_directory + // This is only for cleanup purposes; Module.deinit calls close + // on the handle of zig_cache_artifact_directory. + if (comp.bin_file.options.module) |module| { + const builtin_pkg = module.main_pkg.table.get("builtin").?; + module.zig_cache_artifact_directory = builtin_pkg.root_src_directory; + } +} + +fn cleanupTmpArtifactDirectory( + comp: *Compilation, + tmp_artifact_directory: *?Directory, + tmp_dir_sub_path: []const u8, +) void { + comp.gpa.free(tmp_dir_sub_path); + if (tmp_artifact_directory.*) |*directory| { + directory.handle.close(); + restorePrevZigCacheArtifactDirectory(comp, directory); + } +} + /// Detect changes to source files, perform semantic analysis, and update the output files. -pub fn update(self: *Compilation) !void { +pub fn update(comp: *Compilation) !void { const tracy_trace = trace(@src()); defer tracy_trace.end(); - self.clearMiscFailures(); + comp.clearMiscFailures(); + + var man: Cache.Manifest = undefined; + defer if (comp.whole_cache_manifest != null) man.deinit(); + + var tmp_dir_sub_path: []const u8 = &.{}; + var tmp_artifact_directory: ?Directory = null; + defer cleanupTmpArtifactDirectory(comp, &tmp_artifact_directory, tmp_dir_sub_path); + + // If using the whole caching strategy, we check for *everything* up front, including + // C source files. + if (comp.bin_file.options.cache_mode == .whole) { + // We are about to obtain this lock, so here we give other processes a chance first. + comp.bin_file.releaseLock(); + + man = comp.cache_parent.obtain(); + try comp.addNonIncrementalStuffToCacheManifest(&man); + + const is_hit = man.hit() catch |err| { + // TODO properly bubble these up instead of emitting a warning + const i = man.failed_file_index orelse return err; + const file_path = man.files.items[i].path orelse return err; + std.log.warn("{s}: {s}", .{ @errorName(err), file_path }); + return err; + }; + if (is_hit) { + const digest = man.final(); + + // Communicate the output binary location to parent Compilations. + if (comp.whole_bin_basename) |basename| { + const new_sub_path = try std.fs.path.join(comp.gpa, &.{ + "o", &digest, basename, + }); + if (comp.bin_file.options.emit) |emit| { + comp.gpa.free(emit.sub_path); + } + comp.bin_file.options.emit = .{ + .directory = comp.local_cache_directory, + .sub_path = new_sub_path, + }; + } + + comp.emitOthers(); + + assert(comp.bin_file.lock == null); + comp.bin_file.lock = man.toOwnedLock(); + return; + } + comp.whole_cache_manifest = &man; + + // Initialize `bin_file.emit` with a temporary Directory so that compilation can + // continue on the same path as incremental, using the temporary Directory. + tmp_artifact_directory = d: { + const s = std.fs.path.sep_str; + const rand_int = std.crypto.random.int(u64); + + tmp_dir_sub_path = try std.fmt.allocPrint(comp.gpa, "tmp" ++ s ++ "{x}", .{rand_int}); + + const path = try comp.local_cache_directory.join(comp.gpa, &.{tmp_dir_sub_path}); + errdefer comp.gpa.free(path); + + const handle = try comp.local_cache_directory.handle.makeOpenPath(tmp_dir_sub_path, .{}); + errdefer handle.close(); + + break :d .{ + .path = path, + .handle = handle, + }; + }; + + // This updates the output directory for stage1 backend and linker outputs. + if (comp.bin_file.options.module) |module| { + module.zig_cache_artifact_directory = tmp_artifact_directory.?; + } + + // This resets the link.File to operate as if we called openPath() in create() + // instead of simulating -fno-emit-bin. + var options = comp.bin_file.options; + if (comp.whole_bin_basename) |basename| { + if (options.emit) |emit| { + comp.gpa.free(emit.sub_path); + } + options.emit = .{ + .directory = tmp_artifact_directory.?, + .sub_path = basename, + }; + } + comp.bin_file.destroy(); + comp.bin_file = try link.File.openPath(comp.gpa, options); + } // For compiling C objects, we rely on the cache hash system to avoid duplicating work. // Add a Job for each C object. - try self.c_object_work_queue.ensureUnusedCapacity(self.c_object_table.count()); - for (self.c_object_table.keys()) |key| { - self.c_object_work_queue.writeItemAssumeCapacity(key); + try comp.c_object_work_queue.ensureUnusedCapacity(comp.c_object_table.count()); + for (comp.c_object_table.keys()) |key| { + comp.c_object_work_queue.writeItemAssumeCapacity(key); } - const use_stage1 = build_options.is_stage1 and self.bin_file.options.use_stage1; - if (self.bin_file.options.module) |module| { + const use_stage1 = build_options.is_stage1 and comp.bin_file.options.use_stage1; + if (comp.bin_file.options.module) |module| { module.compile_log_text.shrinkAndFree(module.gpa, 0); module.generation += 1; @@ -1845,7 +2018,7 @@ pub fn update(self: *Compilation) !void { // import_table here. // Likewise, in the case of `zig test`, the test runner is the root source file, // and so there is nothing to import the main file. - if (use_stage1 or self.bin_file.options.is_test) { + if (use_stage1 or comp.bin_file.options.is_test) { _ = try module.importPkg(module.main_pkg); } @@ -1854,34 +2027,34 @@ pub fn update(self: *Compilation) !void { // to update it. // We still want AstGen work items for stage1 so that we expose compile errors // that are implemented in stage2 but not stage1. - try self.astgen_work_queue.ensureUnusedCapacity(module.import_table.count()); + try comp.astgen_work_queue.ensureUnusedCapacity(module.import_table.count()); for (module.import_table.values()) |value| { - self.astgen_work_queue.writeItemAssumeCapacity(value); + comp.astgen_work_queue.writeItemAssumeCapacity(value); } if (!use_stage1) { // Put a work item in for checking if any files used with `@embedFile` changed. { - try self.embed_file_work_queue.ensureUnusedCapacity(module.embed_table.count()); + try comp.embed_file_work_queue.ensureUnusedCapacity(module.embed_table.count()); var it = module.embed_table.iterator(); while (it.next()) |entry| { const embed_file = entry.value_ptr.*; - self.embed_file_work_queue.writeItemAssumeCapacity(embed_file); + comp.embed_file_work_queue.writeItemAssumeCapacity(embed_file); } } - try self.work_queue.writeItem(.{ .analyze_pkg = std_pkg }); - if (self.bin_file.options.is_test) { - try self.work_queue.writeItem(.{ .analyze_pkg = module.main_pkg }); + try comp.work_queue.writeItem(.{ .analyze_pkg = std_pkg }); + if (comp.bin_file.options.is_test) { + try comp.work_queue.writeItem(.{ .analyze_pkg = module.main_pkg }); } } } - try self.performAllTheWork(); + try comp.performAllTheWork(); if (!use_stage1) { - if (self.bin_file.options.module) |module| { - if (self.bin_file.options.is_test and self.totalErrorCount() == 0) { + if (comp.bin_file.options.module) |module| { + if (comp.bin_file.options.is_test and comp.totalErrorCount() == 0) { // The `test_functions` decl has been intentionally postponed until now, // at which point we must populate it with the list of test functions that // have been discovered and not filtered out. @@ -1910,39 +2083,184 @@ pub fn update(self: *Compilation) !void { } } - if (self.totalErrorCount() != 0) { + if (comp.totalErrorCount() != 0) { // Skip flushing. - self.link_error_flags = .{}; + comp.link_error_flags = .{}; return; } // This is needed before reading the error flags. - try self.bin_file.flush(self); - self.link_error_flags = self.bin_file.errorFlags(); + try comp.bin_file.flush(comp); + comp.link_error_flags = comp.bin_file.errorFlags(); if (!use_stage1) { - if (self.bin_file.options.module) |module| { + if (comp.bin_file.options.module) |module| { try link.File.C.flushEmitH(module); } } // Flush takes care of -femit-bin, but we still have -femit-llvm-ir, -femit-llvm-bc, and // -femit-asm to handle, in the case of C objects. - self.emitOthers(); + comp.emitOthers(); // If there are any errors, we anticipate the source files being loaded // to report error messages. Otherwise we unload all source files to save memory. // The ZIR needs to stay loaded in memory because (1) Decl objects contain references // to it, and (2) generic instantiations, comptime calls, inline calls will need // to reference the ZIR. - if (self.totalErrorCount() == 0 and !self.keep_source_files_loaded) { - if (self.bin_file.options.module) |module| { + if (!comp.keep_source_files_loaded) { + if (comp.bin_file.options.module) |module| { for (module.import_table.values()) |file| { - file.unloadTree(self.gpa); - file.unloadSource(self.gpa); + file.unloadTree(comp.gpa); + file.unloadSource(comp.gpa); + } + } + } + + if (comp.whole_cache_manifest != null) { + const digest = man.final(); + + // Rename the temporary directory into place. + var directory = tmp_artifact_directory.?; + tmp_artifact_directory = null; + + directory.handle.close(); + defer restorePrevZigCacheArtifactDirectory(comp, &directory); + + const o_sub_path = try std.fs.path.join(comp.gpa, &[_][]const u8{ "o", &digest }); + defer comp.gpa.free(o_sub_path); + + try std.fs.rename( + comp.local_cache_directory.handle, + tmp_dir_sub_path, + comp.local_cache_directory.handle, + o_sub_path, + ); + + // Failure here only means an unnecessary cache miss. + man.writeManifest() catch |err| { + log.warn("failed to write cache manifest: {s}", .{@errorName(err)}); + }; + + // Communicate the output binary location to parent Compilations. + if (comp.whole_bin_basename) |basename| { + comp.bin_file.options.emit = .{ + .directory = comp.local_cache_directory, + .sub_path = try std.fs.path.join(comp.gpa, &.{ "o", &digest, basename }), + }; + } + + assert(comp.bin_file.lock == null); + comp.bin_file.lock = man.toOwnedLock(); + return; + } +} + +/// This is only observed at compile-time and used to emit a compile error +/// to remind the programmer to update multiple related pieces of code that +/// are in different locations. Bump this number when adding or deleting +/// anything from the link cache manifest. +pub const link_hash_implementation_version = 1; + +fn addNonIncrementalStuffToCacheManifest(comp: *Compilation, man: *Cache.Manifest) !void { + const gpa = comp.gpa; + const target = comp.getTarget(); + + var arena_allocator = std.heap.ArenaAllocator.init(gpa); + errdefer arena_allocator.deinit(); + const arena = arena_allocator.allocator(); + + comptime assert(link_hash_implementation_version == 1); + + if (comp.bin_file.options.module) |mod| { + const main_zig_file = try mod.main_pkg.root_src_directory.join(arena, &[_][]const u8{ + mod.main_pkg.root_src_path, + }); + _ = try man.addFile(main_zig_file, null); + { + var seen_table = std.AutoHashMap(*Package, void).init(arena); + + // Skip builtin.zig; it is useless as an input, and we don't want to have to + // write it before checking for a cache hit. + const builtin_pkg = mod.main_pkg.table.get("builtin").?; + try seen_table.put(builtin_pkg, {}); + + try addPackageTableToCacheHash(&man.hash, &arena_allocator, mod.main_pkg.table, &seen_table, .{ .files = man }); + } + } + + try man.addOptionalFile(comp.bin_file.options.linker_script); + try man.addOptionalFile(comp.bin_file.options.version_script); + try man.addListOfFiles(comp.bin_file.options.objects); + + for (comp.c_object_table.keys()) |key| { + _ = try man.addFile(key.src.src_path, null); + man.hash.addListOfBytes(key.src.extra_flags); + } + + man.hash.addOptionalEmitLoc(comp.emit_asm); + man.hash.addOptionalEmitLoc(comp.emit_llvm_ir); + man.hash.addOptionalEmitLoc(comp.emit_llvm_bc); + man.hash.addOptionalEmitLoc(comp.emit_analysis); + man.hash.addOptionalEmitLoc(comp.emit_docs); + + man.hash.addListOfBytes(comp.clang_argv); + + man.hash.addOptional(comp.bin_file.options.stack_size_override); + man.hash.addOptional(comp.bin_file.options.image_base_override); + man.hash.addOptional(comp.bin_file.options.gc_sections); + man.hash.add(comp.bin_file.options.eh_frame_hdr); + man.hash.add(comp.bin_file.options.emit_relocs); + man.hash.add(comp.bin_file.options.rdynamic); + man.hash.addListOfBytes(comp.bin_file.options.lib_dirs); + man.hash.addListOfBytes(comp.bin_file.options.rpath_list); + man.hash.add(comp.bin_file.options.each_lib_rpath); + man.hash.add(comp.bin_file.options.skip_linker_dependencies); + man.hash.add(comp.bin_file.options.z_nodelete); + man.hash.add(comp.bin_file.options.z_notext); + man.hash.add(comp.bin_file.options.z_defs); + man.hash.add(comp.bin_file.options.z_origin); + man.hash.add(comp.bin_file.options.z_noexecstack); + man.hash.add(comp.bin_file.options.z_now); + man.hash.add(comp.bin_file.options.z_relro); + man.hash.add(comp.bin_file.options.include_compiler_rt); + if (comp.bin_file.options.link_libc) { + man.hash.add(comp.bin_file.options.libc_installation != null); + if (comp.bin_file.options.libc_installation) |libc_installation| { + man.hash.addBytes(libc_installation.crt_dir.?); + if (target.abi == .msvc) { + man.hash.addBytes(libc_installation.msvc_lib_dir.?); + man.hash.addBytes(libc_installation.kernel32_lib_dir.?); } } + man.hash.addOptionalBytes(comp.bin_file.options.dynamic_linker); } + man.hash.addOptionalBytes(comp.bin_file.options.soname); + man.hash.addOptional(comp.bin_file.options.version); + link.hashAddSystemLibs(&man.hash, comp.bin_file.options.system_libs); + man.hash.addOptional(comp.bin_file.options.allow_shlib_undefined); + man.hash.add(comp.bin_file.options.bind_global_refs_locally); + man.hash.add(comp.bin_file.options.tsan); + man.hash.addOptionalBytes(comp.bin_file.options.sysroot); + man.hash.add(comp.bin_file.options.linker_optimization); + + // WASM specific stuff + man.hash.add(comp.bin_file.options.import_memory); + man.hash.addOptional(comp.bin_file.options.initial_memory); + man.hash.addOptional(comp.bin_file.options.max_memory); + man.hash.addOptional(comp.bin_file.options.global_base); + + // Mach-O specific stuff + man.hash.addListOfBytes(comp.bin_file.options.framework_dirs); + man.hash.addListOfBytes(comp.bin_file.options.frameworks); + + // COFF specific stuff + man.hash.addOptional(comp.bin_file.options.subsystem); + man.hash.add(comp.bin_file.options.tsaware); + man.hash.add(comp.bin_file.options.nxcompat); + man.hash.add(comp.bin_file.options.dynamicbase); + man.hash.addOptional(comp.bin_file.options.major_subsystem_version); + man.hash.addOptional(comp.bin_file.options.minor_subsystem_version); } fn emitOthers(comp: *Compilation) void { @@ -2988,7 +3306,9 @@ pub fn cImport(comp: *Compilation, c_src: []const u8) !CImportResult { const dep_basename = std.fs.path.basename(out_dep_path); try man.addDepFilePost(zig_cache_tmp_dir, dep_basename); - if (build_options.is_stage1 and comp.bin_file.options.use_stage1) try comp.stage1_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename); + if (comp.whole_cache_manifest) |whole_cache_manifest| { + try whole_cache_manifest.addDepFilePost(zig_cache_tmp_dir, dep_basename); + } const digest = man.final(); const o_sub_path = try std.fs.path.join(arena, &[_][]const u8{ "o", &digest }); @@ -3351,13 +3671,13 @@ fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.P }; } -pub fn tmpFilePath(comp: *Compilation, arena: Allocator, suffix: []const u8) error{OutOfMemory}![]const u8 { +pub fn tmpFilePath(comp: *Compilation, ally: Allocator, suffix: []const u8) error{OutOfMemory}![]const u8 { const s = std.fs.path.sep_str; const rand_int = std.crypto.random.int(u64); if (comp.local_cache_directory.path) |p| { - return std.fmt.allocPrint(arena, "{s}" ++ s ++ "tmp" ++ s ++ "{x}-{s}", .{ p, rand_int, suffix }); + return std.fmt.allocPrint(ally, "{s}" ++ s ++ "tmp" ++ s ++ "{x}-{s}", .{ p, rand_int, suffix }); } else { - return std.fmt.allocPrint(arena, "tmp" ++ s ++ "{x}-{s}", .{ rand_int, suffix }); + return std.fmt.allocPrint(ally, "tmp" ++ s ++ "{x}-{s}", .{ rand_int, suffix }); } } @@ -4424,6 +4744,7 @@ fn buildOutputFromZig( .global_cache_directory = comp.global_cache_directory, .local_cache_directory = comp.global_cache_directory, .zig_lib_directory = comp.zig_lib_directory, + .cache_mode = .whole, .target = target, .root_name = root_name, .main_pkg = &main_pkg, @@ -4501,10 +4822,7 @@ fn updateStage1Module(comp: *Compilation, main_progress_node: *std.Progress.Node mod.main_pkg.root_src_path, }); const zig_lib_dir = comp.zig_lib_directory.path.?; - const builtin_zig_path = try directory.join(arena, &[_][]const u8{"builtin.zig"}); const target = comp.getTarget(); - const id_symlink_basename = "stage1.id"; - const libs_txt_basename = "libs.txt"; // The include_compiler_rt stored in the bin file options here means that we need // compiler-rt symbols *somehow*. However, in the context of using the stage1 backend @@ -4516,115 +4834,6 @@ fn updateStage1Module(comp: *Compilation, main_progress_node: *std.Progress.Node const include_compiler_rt = comp.bin_file.options.output_mode == .Obj and comp.bin_file.options.include_compiler_rt; - // We are about to obtain this lock, so here we give other processes a chance first. - comp.releaseStage1Lock(); - - // Unlike with the self-hosted Zig module, stage1 does not support incremental compilation, - // so we input all the zig source files into the cache hash system. We're going to keep - // the artifact directory the same, however, so we take the same strategy as linking - // does where we have a file which specifies the hash of the output directory so that we can - // skip the expensive compilation step if the hash matches. - var man = comp.cache_parent.obtain(); - defer man.deinit(); - - _ = try man.addFile(main_zig_file, null); - { - var seen_table = std.AutoHashMap(*Package, void).init(arena_allocator.allocator()); - try addPackageTableToCacheHash(&man.hash, &arena_allocator, mod.main_pkg.table, &seen_table, .{ .files = &man }); - } - man.hash.add(comp.bin_file.options.valgrind); - man.hash.add(comp.bin_file.options.single_threaded); - man.hash.add(target.os.getVersionRange()); - man.hash.add(comp.bin_file.options.dll_export_fns); - man.hash.add(comp.bin_file.options.function_sections); - man.hash.add(include_compiler_rt); - man.hash.add(comp.bin_file.options.is_test); - man.hash.add(comp.bin_file.options.emit != null); - man.hash.add(mod.emit_h != null); - if (mod.emit_h) |emit_h| { - man.hash.addEmitLoc(emit_h.loc); - } - man.hash.addOptionalEmitLoc(comp.emit_asm); - man.hash.addOptionalEmitLoc(comp.emit_llvm_ir); - man.hash.addOptionalEmitLoc(comp.emit_llvm_bc); - man.hash.addOptionalEmitLoc(comp.emit_analysis); - man.hash.addOptionalEmitLoc(comp.emit_docs); - man.hash.add(comp.test_evented_io); - man.hash.addOptionalBytes(comp.test_filter); - man.hash.addOptionalBytes(comp.test_name_prefix); - man.hash.addListOfBytes(comp.clang_argv); - - // Capture the state in case we come back from this branch where the hash doesn't match. - const prev_hash_state = man.hash.peekBin(); - const input_file_count = man.files.items.len; - - const hit = man.hit() catch |err| { - const i = man.failed_file_index orelse return err; - const file_path = man.files.items[i].path orelse return err; - fatal("unable to build stage1 zig object: {s}: {s}", .{ @errorName(err), file_path }); - }; - if (hit) { - const digest = man.final(); - - // We use an extra hex-encoded byte here to store some flags. - var prev_digest_buf: [digest.len + 2]u8 = undefined; - const prev_digest: []u8 = Cache.readSmallFile( - directory.handle, - id_symlink_basename, - &prev_digest_buf, - ) catch |err| blk: { - log.debug("stage1 {s} new_digest={s} error: {s}", .{ - mod.main_pkg.root_src_path, - std.fmt.fmtSliceHexLower(&digest), - @errorName(err), - }); - // Handle this as a cache miss. - break :blk prev_digest_buf[0..0]; - }; - if (prev_digest.len >= digest.len + 2) hit: { - if (!mem.eql(u8, prev_digest[0..digest.len], &digest)) - break :hit; - - log.debug("stage1 {s} digest={s} match - skipping invocation", .{ - mod.main_pkg.root_src_path, - std.fmt.fmtSliceHexLower(&digest), - }); - var flags_bytes: [1]u8 = undefined; - _ = std.fmt.hexToBytes(&flags_bytes, prev_digest[digest.len..]) catch { - log.warn("bad cache stage1 digest: '{s}'", .{std.fmt.fmtSliceHexLower(prev_digest)}); - break :hit; - }; - - if (directory.handle.readFileAlloc(comp.gpa, libs_txt_basename, 10 * 1024 * 1024)) |libs_txt| { - var it = mem.tokenize(u8, libs_txt, "\n"); - while (it.next()) |lib_name| { - try comp.stage1AddLinkLib(lib_name); - } - } else |err| switch (err) { - error.FileNotFound => {}, // That's OK, it just means 0 libs. - else => { - log.warn("unable to read cached list of link libs: {s}", .{@errorName(err)}); - break :hit; - }, - } - comp.stage1_lock = man.toOwnedLock(); - mod.stage1_flags = @bitCast(@TypeOf(mod.stage1_flags), flags_bytes[0]); - return; - } - log.debug("stage1 {s} prev_digest={s} new_digest={s}", .{ - mod.main_pkg.root_src_path, - std.fmt.fmtSliceHexLower(prev_digest), - std.fmt.fmtSliceHexLower(&digest), - }); - man.unhit(prev_hash_state, input_file_count); - } - - // We are about to change the output file to be different, so we invalidate the build hash now. - directory.handle.deleteFile(id_symlink_basename) catch |err| switch (err) { - error.FileNotFound => {}, - else => |e| return e, - }; - const stage2_target = try arena.create(stage1.Stage2Target); stage2_target.* = .{ .arch = @enumToInt(target.cpu.arch) + 1, // skip over ZigLLVM_UnknownArch @@ -4637,9 +4846,9 @@ fn updateStage1Module(comp: *Compilation, main_progress_node: *std.Progress.Node .llvm_target_abi = if (target_util.llvmMachineAbi(target)) |s| s.ptr else null, }; - comp.stage1_cache_manifest = &man; - const main_pkg_path = mod.main_pkg.root_src_directory.path orelse ""; + const builtin_pkg = mod.main_pkg.table.get("builtin").?; + const builtin_zig_path = try builtin_pkg.root_src_directory.join(arena, &.{builtin_pkg.root_src_path}); const stage1_module = stage1.create( @enumToInt(comp.bin_file.options.optimize_mode), @@ -4740,19 +4949,8 @@ fn updateStage1Module(comp: *Compilation, main_progress_node: *std.Progress.Node .have_dllmain_crt_startup = false, }; - const inferred_lib_start_index = comp.bin_file.options.system_libs.count(); stage1_module.build_object(); - if (comp.bin_file.options.system_libs.count() > inferred_lib_start_index) { - // We need to save the inferred link libs to the cache, otherwise if we get a cache hit - // next time we will be missing these libs. - var libs_txt = std.ArrayList(u8).init(arena); - for (comp.bin_file.options.system_libs.keys()[inferred_lib_start_index..]) |key| { - try libs_txt.writer().print("{s}\n", .{key}); - } - try directory.handle.writeFile(libs_txt_basename, libs_txt.items); - } - mod.stage1_flags = .{ .have_c_main = stage1_module.have_c_main, .have_winmain = stage1_module.have_winmain, @@ -4763,34 +4961,6 @@ fn updateStage1Module(comp: *Compilation, main_progress_node: *std.Progress.Node }; stage1_module.destroy(); - - const digest = man.final(); - - // Update the small file with the digest. If it fails we can continue; it only - // means that the next invocation will have an unnecessary cache miss. - const stage1_flags_byte = @bitCast(u8, mod.stage1_flags); - log.debug("stage1 {s} final digest={s} flags={x}", .{ - mod.main_pkg.root_src_path, std.fmt.fmtSliceHexLower(&digest), stage1_flags_byte, - }); - var digest_plus_flags: [digest.len + 2]u8 = undefined; - digest_plus_flags[0..digest.len].* = digest; - assert(std.fmt.formatIntBuf(digest_plus_flags[digest.len..], stage1_flags_byte, 16, .lower, .{ - .width = 2, - .fill = '0', - }) == 2); - log.debug("saved digest + flags: '{s}' (byte = {}) have_winmain_crt_startup={}", .{ - digest_plus_flags, stage1_flags_byte, mod.stage1_flags.have_winmain_crt_startup, - }); - Cache.writeSmallFile(directory.handle, id_symlink_basename, &digest_plus_flags) catch |err| { - log.warn("failed to save stage1 hash digest file: {s}", .{@errorName(err)}); - }; - // Failure here only means an unnecessary cache miss. - man.writeManifest() catch |err| { - log.warn("failed to write cache manifest when linking: {s}", .{@errorName(err)}); - }; - // We hang on to this lock so that the output file path can be used without - // other processes clobbering it. - comp.stage1_lock = man.toOwnedLock(); } fn stage1LocPath(arena: Allocator, opt_loc: ?EmitLoc, cache_directory: Directory) ![]const u8 { @@ -4862,6 +5032,7 @@ pub fn build_crt_file( .local_cache_directory = comp.global_cache_directory, .global_cache_directory = comp.global_cache_directory, .zig_lib_directory = comp.zig_lib_directory, + .cache_mode = .whole, .target = target, .root_name = root_name, .main_pkg = null, -- cgit v1.2.3 From 67e31807df73ad2da992293fcdf1f6ea7ca67d2a Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 30 Dec 2021 15:32:50 -0700 Subject: stage2: CacheMode.whole fixes * Logic to check whether a bin file is not emitted is more complicated in between `Compilation.create` and `Compilation.update`. Fixed the logic that decides whether to build compiler-rt and other support artifacts. * Basically, one cannot inspect the value of `comp.bin_file.emit` until after update() is called - fixed another instance of this happening in the CLI. * In the CLI, `runOrTest` is updated to properly use the result value of `comp.bin_file.options.emit` rather than guessing whether the output binary is. * Don't assume that the emit output has no directory components in sub_path. In other words, don't assume that the emit directory is the final directory; there may be sub-directories. --- src/Compilation.zig | 11 ++++++++++- src/main.zig | 30 ++++++++++++++++-------------- 2 files changed, 26 insertions(+), 15 deletions(-) (limited to 'src/Compilation.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index 8afb373699..ac48804848 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1662,7 +1662,9 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { comp.c_object_table.putAssumeCapacityNoClobber(c_object, {}); } - if (comp.bin_file.options.emit != null and !comp.bin_file.options.skip_linker_dependencies) { + const have_bin_emit = comp.bin_file.options.emit != null or comp.whole_bin_basename != null; + + if (have_bin_emit and !comp.bin_file.options.skip_linker_dependencies) { // If we need to build glibc for the target, add work items for it. // We go through the work queue so that building can be done in parallel. if (comp.wantBuildGLibCFromSource()) { @@ -1767,8 +1769,10 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { if (comp.bin_file.options.include_compiler_rt and capable_of_building_compiler_rt) { if (is_exe_or_dyn_lib) { + log.debug("queuing a job to build compiler_rt_lib", .{}); try comp.work_queue.writeItem(.{ .compiler_rt_lib = {} }); } else if (options.output_mode != .Obj) { + log.debug("queuing a job to build compiler_rt_obj", .{}); // If build-obj with -fcompiler-rt is requested, that is handled specially // elsewhere. In this case we are making a static library, so we ask // for a compiler-rt object to put in it. @@ -1930,6 +1934,7 @@ pub fn update(comp: *Compilation) !void { return err; }; if (is_hit) { + log.debug("CacheMode.whole cache hit for {s}", .{comp.bin_file.options.root_name}); const digest = man.final(); // Communicate the output binary location to parent Compilations. @@ -1952,6 +1957,8 @@ pub fn update(comp: *Compilation) !void { comp.bin_file.lock = man.toOwnedLock(); return; } + log.debug("CacheMode.whole cache miss for {s}", .{comp.bin_file.options.root_name}); + comp.whole_cache_manifest = &man; // Initialize `bin_file.emit` with a temporary Directory so that compilation can @@ -2187,6 +2194,8 @@ fn addNonIncrementalStuffToCacheManifest(comp: *Compilation, man: *Cache.Manifes try addPackageTableToCacheHash(&man.hash, &arena_allocator, mod.main_pkg.table, &seen_table, .{ .files = man }); } + + man.hash.add(mod.emit_h != null); } try man.addOptionalFile(comp.bin_file.options.linker_script); diff --git a/src/main.zig b/src/main.zig index 53e7df3520..6f55e46eb1 100644 --- a/src/main.zig +++ b/src/main.zig @@ -2564,9 +2564,7 @@ fn buildOutputType( switch (emit_bin) { .no => break :blk .none, - .yes_default_path => break :blk .{ - .print = comp.bin_file.options.emit.?.directory.path orelse ".", - }, + .yes_default_path => break :blk .print_emit_bin_dir_path, .yes => |full_path| break :blk .{ .update = full_path }, .yes_a_out => break :blk .{ .update = a_out_basename }, } @@ -2598,7 +2596,6 @@ fn buildOutputType( comp, gpa, arena, - emit_bin_loc, test_exec_args.items, self_exe_path, arg_mode, @@ -2671,7 +2668,6 @@ fn buildOutputType( comp, gpa, arena, - emit_bin_loc, test_exec_args.items, self_exe_path, arg_mode, @@ -2697,7 +2693,6 @@ fn buildOutputType( comp, gpa, arena, - emit_bin_loc, test_exec_args.items, self_exe_path, arg_mode, @@ -2762,7 +2757,6 @@ fn runOrTest( comp: *Compilation, gpa: Allocator, arena: Allocator, - emit_bin_loc: ?Compilation.EmitLoc, test_exec_args: []const ?[]const u8, self_exe_path: []const u8, arg_mode: ArgMode, @@ -2773,10 +2767,11 @@ fn runOrTest( runtime_args_start: ?usize, link_libc: bool, ) !void { - const exe_loc = emit_bin_loc orelse return; - const exe_directory = exe_loc.directory orelse comp.bin_file.options.emit.?.directory; + const exe_emit = comp.bin_file.options.emit orelse return; + // A naive `directory.join` here will indeed get the correct path to the binary, + // however, in the case of cwd, we actually want `./foo` so that the path can be executed. const exe_path = try fs.path.join(arena, &[_][]const u8{ - exe_directory.path orelse ".", exe_loc.basename, + exe_emit.directory.path orelse ".", exe_emit.sub_path, }); var argv = std.ArrayList([]const u8).init(gpa); @@ -2880,7 +2875,7 @@ fn runOrTest( const AfterUpdateHook = union(enum) { none, - print: []const u8, + print_emit_bin_dir_path, update: []const u8, }; @@ -2906,7 +2901,13 @@ fn updateModule(gpa: Allocator, comp: *Compilation, hook: AfterUpdateHook) !void return error.SemanticAnalyzeFail; } else switch (hook) { .none => {}, - .print => |bin_path| try io.getStdOut().writer().print("{s}\n", .{bin_path}), + .print_emit_bin_dir_path => { + const emit = comp.bin_file.options.emit.?; + const full_path = try emit.directory.join(gpa, &.{emit.sub_path}); + defer gpa.free(full_path); + const dir_path = fs.path.dirname(full_path).?; + try io.getStdOut().writer().print("{s}\n", .{dir_path}); + }, .update => |full_path| { const bin_sub_path = comp.bin_file.options.emit.?.sub_path; const cwd = fs.cwd(); @@ -3469,9 +3470,10 @@ pub fn cmdBuild(gpa: Allocator, arena: Allocator, args: []const []const u8) !voi }; try comp.makeBinFileExecutable(); - child_argv.items[argv_index_exe] = try comp.bin_file.options.emit.?.directory.join( + const emit = comp.bin_file.options.emit.?; + child_argv.items[argv_index_exe] = try emit.directory.join( arena, - &[_][]const u8{exe_basename}, + &[_][]const u8{emit.sub_path}, ); break :argv child_argv.items; -- cgit v1.2.3 From d6c5602d4665ba4e9e7c0b7f42bd00b2489d420c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 30 Dec 2021 17:05:17 -0700 Subject: stage2: fix CLI not populating output binary files This fixes a regression in this branch that can be reproduced with the following steps: 1. `zig build-exe hello.zig` 2. delete the "hello" binary 3. `zig build-exe hello.zig` 4. observe that the "hello" binary is missing This happened because it was a cache hit, but nothing got copied to the output directory. This commit sets CacheMode to incremental - even for stage1 - when the CLI requests `disable_lld_caching` (this option should be renamed), resulting in the main Compilation to be repeated (uncached) for stage1, populating the binary into the cwd as expected. For stage2 the result is even better: the incremental compilation system will look for build artifacts to incrementally compile, and start fresh if not found. --- src/Compilation.zig | 7 ++++--- src/stage1.zig | 5 ++++- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src/Compilation.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index ac48804848..ddf331d4f3 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -899,7 +899,10 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { break :blk build_options.is_stage1; }; - const cache_mode = if (use_stage1) CacheMode.whole else options.cache_mode; + const cache_mode = if (use_stage1 and !options.disable_lld_caching) + CacheMode.whole + else + options.cache_mode; // Make a decision on whether to use LLVM or our own backend. const use_llvm = build_options.have_llvm and blk: { @@ -1951,8 +1954,6 @@ pub fn update(comp: *Compilation) !void { }; } - comp.emitOthers(); - assert(comp.bin_file.lock == null); comp.bin_file.lock = man.toOwnedLock(); return; diff --git a/src/stage1.zig b/src/stage1.zig index a5be1b78fe..384988dc07 100644 --- a/src/stage1.zig +++ b/src/stage1.zig @@ -458,7 +458,10 @@ export fn stage2_fetch_file( const comp = @intToPtr(*Compilation, stage1.userdata); const file_path = path_ptr[0..path_len]; const max_file_size = std.math.maxInt(u32); - const contents = comp.whole_cache_manifest.?.addFilePostFetch(file_path, max_file_size) catch return null; + const contents = if (comp.whole_cache_manifest) |man| + man.addFilePostFetch(file_path, max_file_size) catch return null + else + std.fs.cwd().readFileAlloc(comp.gpa, file_path, max_file_size) catch return null; result_len.* = contents.len; // TODO https://github.com/ziglang/zig/issues/3328#issuecomment-716749475 if (contents.len == 0) return @intToPtr(?[*]const u8, 0x1); -- cgit v1.2.3 From 0ad2a99675d331c686847a7b2a84feddfcce6573 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 30 Dec 2021 20:49:02 -0700 Subject: stage2: CacheMode.whole: trigger loading zig source files Previously the code asserted source files were already loaded, but this is not the case when cached ZIR is loaded. Now it will trigger .zig source code to be loaded for the purposes of hashing the source for `CacheMode.whole`. This additionally refactors stat_size, stat_inode, and stat_mtime fields into using the `Cache.File.Stat` struct. --- src/Compilation.zig | 4 +- src/Module.zig | 156 ++++++++++++++++++++++++++++++++++------------------ src/main.zig | 34 ++++++------ 3 files changed, 121 insertions(+), 73 deletions(-) (limited to 'src/Compilation.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index ddf331d4f3..e71c84af33 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -421,7 +421,7 @@ pub const AllErrors = struct { const module_note = module_err_msg.notes[i]; const source = try module_note.src_loc.file_scope.getSource(module.gpa); const byte_offset = try module_note.src_loc.byteOffset(module.gpa); - const loc = std.zig.findLineColumn(source, byte_offset); + const loc = std.zig.findLineColumn(source.bytes, byte_offset); const file_path = try module_note.src_loc.file_scope.fullPath(allocator); note.* = .{ .src = .{ @@ -444,7 +444,7 @@ pub const AllErrors = struct { } const source = try module_err_msg.src_loc.file_scope.getSource(module.gpa); const byte_offset = try module_err_msg.src_loc.byteOffset(module.gpa); - const loc = std.zig.findLineColumn(source, byte_offset); + const loc = std.zig.findLineColumn(source.bytes, byte_offset); const file_path = try module_err_msg.src_loc.file_scope.fullPath(allocator); try errors.append(.{ .src = .{ diff --git a/src/Module.zig b/src/Module.zig index 6742ddd486..0cbf75c735 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -1463,11 +1463,7 @@ pub const File = struct { /// Whether this is populated depends on `source_loaded`. source: [:0]const u8, /// Whether this is populated depends on `status`. - stat_size: u64, - /// Whether this is populated depends on `status`. - stat_inode: std.fs.File.INode, - /// Whether this is populated depends on `status`. - stat_mtime: i128, + stat: Cache.File.Stat, /// Whether this is populated or not depends on `tree_loaded`. tree: Ast, /// Whether this is populated or not depends on `zir_loaded`. @@ -1535,8 +1531,16 @@ pub const File = struct { file.* = undefined; } - pub fn getSource(file: *File, gpa: Allocator) ![:0]const u8 { - if (file.source_loaded) return file.source; + pub const Source = struct { + bytes: [:0]const u8, + stat: Cache.File.Stat, + }; + + pub fn getSource(file: *File, gpa: Allocator) !Source { + if (file.source_loaded) return Source{ + .bytes = file.source, + .stat = file.stat, + }; const root_dir_path = file.pkg.root_src_directory.path orelse "."; log.debug("File.getSource, not cached. pkgdir={s} sub_file_path={s}", .{ @@ -1565,14 +1569,21 @@ pub const File = struct { file.source = source; file.source_loaded = true; - return source; + return Source{ + .bytes = source, + .stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }, + }; } pub fn getTree(file: *File, gpa: Allocator) !*const Ast { if (file.tree_loaded) return &file.tree; const source = try file.getSource(gpa); - file.tree = try std.zig.parse(gpa, source); + file.tree = try std.zig.parse(gpa, source.bytes); file.tree_loaded = true; return &file.tree; } @@ -1631,9 +1642,7 @@ pub const EmbedFile = struct { /// Memory is stored in gpa, owned by EmbedFile. sub_file_path: []const u8, bytes: [:0]const u8, - stat_size: u64, - stat_inode: std.fs.File.INode, - stat_mtime: i128, + stat: Cache.File.Stat, /// Package that this file is a part of, managed externally. pkg: *Package, /// The Decl that was created from the `@embedFile` to own this resource. @@ -2704,9 +2713,11 @@ pub fn astGenFile(mod: *Module, file: *File) !void { keep_zir = true; file.zir = zir; file.zir_loaded = true; - file.stat_size = header.stat_size; - file.stat_inode = header.stat_inode; - file.stat_mtime = header.stat_mtime; + file.stat = .{ + .size = header.stat_size, + .inode = header.stat_inode, + .mtime = header.stat_mtime, + }; file.status = .success_zir; log.debug("AstGen cached success: {s}", .{file.sub_file_path}); @@ -2724,9 +2735,9 @@ pub fn astGenFile(mod: *Module, file: *File) !void { }, .parse_failure, .astgen_failure, .success_zir => { const unchanged_metadata = - stat.size == file.stat_size and - stat.mtime == file.stat_mtime and - stat.inode == file.stat_inode; + stat.size == file.stat.size and + stat.mtime == file.stat.mtime and + stat.inode == file.stat.inode; if (unchanged_metadata) { log.debug("unmodified metadata of file: {s}", .{file.sub_file_path}); @@ -2787,9 +2798,11 @@ pub fn astGenFile(mod: *Module, file: *File) !void { if (amt != stat.size) return error.UnexpectedEndOfFile; - file.stat_size = stat.size; - file.stat_inode = stat.inode; - file.stat_mtime = stat.mtime; + file.stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }; file.source = source; file.source_loaded = true; @@ -3069,9 +3082,11 @@ pub fn populateBuiltinFile(mod: *Module) !void { try writeBuiltinFile(file, builtin_pkg); } else { - file.stat_size = stat.size; - file.stat_inode = stat.inode; - file.stat_mtime = stat.mtime; + file.stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }; } } else |err| switch (err) { error.BadPathName => unreachable, // it's always "builtin.zig" @@ -3099,9 +3114,11 @@ pub fn writeBuiltinFile(file: *File, builtin_pkg: *Package) !void { try af.file.writeAll(file.source); try af.finish(); - file.stat_size = file.source.len; - file.stat_inode = 0; // dummy value - file.stat_mtime = 0; // dummy value + file.stat = .{ + .size = file.source.len, + .inode = 0, // dummy value + .mtime = 0, // dummy value + }; } pub fn mapOldZirToNew( @@ -3382,16 +3399,16 @@ pub fn semaFile(mod: *Module, file: *File) SemaError!void { } if (mod.comp.whole_cache_manifest) |man| { - assert(file.source_loaded); + const source = file.getSource(gpa) catch |err| { + try reportRetryableFileError(mod, file, "unable to load source: {s}", .{@errorName(err)}); + return error.AnalysisFail; + }; const resolved_path = try file.pkg.root_src_directory.join(gpa, &.{ file.sub_file_path, }); errdefer gpa.free(resolved_path); - try man.addFilePostContents(resolved_path, file.source, .{ - .size = file.stat_size, - .inode = file.stat_inode, - .mtime = file.stat_mtime, - }); + + try man.addFilePostContents(resolved_path, source.bytes, source.stat); } } else { new_decl.analysis = .file_failure; @@ -3723,9 +3740,7 @@ pub fn importPkg(mod: *Module, pkg: *Package) !ImportFileResult { .source_loaded = false, .tree_loaded = false, .zir_loaded = false, - .stat_size = undefined, - .stat_inode = undefined, - .stat_mtime = undefined, + .stat = undefined, .tree = undefined, .zir = undefined, .status = .never_loaded, @@ -3793,9 +3808,7 @@ pub fn importFile( .source_loaded = false, .tree_loaded = false, .zir_loaded = false, - .stat_size = undefined, - .stat_inode = undefined, - .stat_mtime = undefined, + .stat = undefined, .tree = undefined, .zir = undefined, .status = .never_loaded, @@ -3840,8 +3853,13 @@ pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*Emb var file = try cur_file.pkg.root_src_directory.handle.openFile(sub_file_path, .{}); defer file.close(); - const stat = try file.stat(); - const size_usize = try std.math.cast(usize, stat.size); + const actual_stat = try file.stat(); + const stat: Cache.File.Stat = .{ + .size = actual_stat.size, + .inode = actual_stat.inode, + .mtime = actual_stat.mtime, + }; + const size_usize = try std.math.cast(usize, actual_stat.size); const bytes = try file.readToEndAllocOptions(gpa, std.math.maxInt(u32), size_usize, 1, 0); errdefer gpa.free(bytes); @@ -3852,11 +3870,7 @@ pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*Emb if (mod.comp.whole_cache_manifest) |man| { const copied_resolved_path = try gpa.dupe(u8, resolved_path); errdefer gpa.free(copied_resolved_path); - try man.addFilePostContents(copied_resolved_path, bytes, .{ - .size = stat.size, - .inode = stat.inode, - .mtime = stat.mtime, - }); + try man.addFilePostContents(copied_resolved_path, bytes, stat); } keep_resolved_path = true; // It's now owned by embed_table. @@ -3864,9 +3878,7 @@ pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*Emb new_file.* = .{ .sub_file_path = sub_file_path, .bytes = bytes, - .stat_size = stat.size, - .stat_inode = stat.inode, - .stat_mtime = stat.mtime, + .stat = stat, .pkg = cur_file.pkg, .owner_decl = undefined, // Set by Sema immediately after this function returns. }; @@ -3880,9 +3892,9 @@ pub fn detectEmbedFileUpdate(mod: *Module, embed_file: *EmbedFile) !void { const stat = try file.stat(); const unchanged_metadata = - stat.size == embed_file.stat_size and - stat.mtime == embed_file.stat_mtime and - stat.inode == embed_file.stat_inode; + stat.size == embed_file.stat.size and + stat.mtime == embed_file.stat.mtime and + stat.inode == embed_file.stat.inode; if (unchanged_metadata) return; @@ -3891,9 +3903,11 @@ pub fn detectEmbedFileUpdate(mod: *Module, embed_file: *EmbedFile) !void { const bytes = try file.readToEndAllocOptions(gpa, std.math.maxInt(u32), size_usize, 1, 0); gpa.free(embed_file.bytes); embed_file.bytes = bytes; - embed_file.stat_size = stat.size; - embed_file.stat_mtime = stat.mtime; - embed_file.stat_inode = stat.inode; + embed_file.stat = .{ + .size = stat.size, + .mtime = stat.mtime, + .inode = stat.inode, + }; mod.comp.mutex.lock(); defer mod.comp.mutex.unlock(); @@ -5024,3 +5038,35 @@ pub fn linkerUpdateDecl(mod: *Module, decl: *Decl) !void { }, }; } + +fn reportRetryableFileError( + mod: *Module, + file: *File, + comptime format: []const u8, + args: anytype, +) error{OutOfMemory}!void { + file.status = .retryable_failure; + + const err_msg = try ErrorMsg.create( + mod.gpa, + .{ + .file_scope = file, + .parent_decl_node = 0, + .lazy = .entire_file, + }, + format, + args, + ); + errdefer err_msg.destroy(mod.gpa); + + mod.comp.mutex.lock(); + defer mod.comp.mutex.unlock(); + + const gop = try mod.failed_files.getOrPut(mod.gpa, file); + if (gop.found_existing) { + if (gop.value_ptr.*) |old_err_msg| { + old_err_msg.destroy(mod.gpa); + } + } + gop.value_ptr.* = err_msg; +} diff --git a/src/main.zig b/src/main.zig index 6f55e46eb1..4747772b8a 100644 --- a/src/main.zig +++ b/src/main.zig @@ -3664,9 +3664,7 @@ pub fn cmdFmt(gpa: Allocator, arena: Allocator, args: []const []const u8) !void .zir_loaded = false, .sub_file_path = "", .source = source_code, - .stat_size = undefined, - .stat_inode = undefined, - .stat_mtime = undefined, + .stat = undefined, .tree = tree, .tree_loaded = true, .zir = undefined, @@ -3860,9 +3858,11 @@ fn fmtPathFile( .zir_loaded = false, .sub_file_path = file_path, .source = source_code, - .stat_size = stat.size, - .stat_inode = stat.inode, - .stat_mtime = stat.mtime, + .stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }, .tree = tree, .tree_loaded = true, .zir = undefined, @@ -4458,9 +4458,7 @@ pub fn cmdAstCheck( .zir_loaded = false, .sub_file_path = undefined, .source = undefined, - .stat_size = undefined, - .stat_inode = undefined, - .stat_mtime = undefined, + .stat = undefined, .tree = undefined, .zir = undefined, .pkg = undefined, @@ -4485,9 +4483,11 @@ pub fn cmdAstCheck( file.sub_file_path = file_name; file.source = source; file.source_loaded = true; - file.stat_size = stat.size; - file.stat_inode = stat.inode; - file.stat_mtime = stat.mtime; + file.stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }; } else { const stdin = io.getStdIn(); const source = readSourceFileToEndAlloc(arena, &stdin, null) catch |err| { @@ -4496,7 +4496,7 @@ pub fn cmdAstCheck( file.sub_file_path = ""; file.source = source; file.source_loaded = true; - file.stat_size = source.len; + file.stat.size = source.len; } file.pkg = try Package.create(gpa, null, file.sub_file_path); @@ -4609,9 +4609,11 @@ pub fn cmdChangelist( .zir_loaded = false, .sub_file_path = old_source_file, .source = undefined, - .stat_size = stat.size, - .stat_inode = stat.inode, - .stat_mtime = stat.mtime, + .stat = .{ + .size = stat.size, + .inode = stat.inode, + .mtime = stat.mtime, + }, .tree = undefined, .zir = undefined, .pkg = undefined, -- cgit v1.2.3 From 208a6c7d6a58c1fc46ed832acaa8a882f1c6a1dd Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 30 Dec 2021 21:01:20 -0700 Subject: stage2: fix not calling deinit() on whole_cache_manifest Need to mark it as "needing cleanup" a bit earlier in the function. --- src/Compilation.zig | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/Compilation.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index e71c84af33..9361159c4e 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1926,6 +1926,7 @@ pub fn update(comp: *Compilation) !void { // We are about to obtain this lock, so here we give other processes a chance first. comp.bin_file.releaseLock(); + comp.whole_cache_manifest = &man; man = comp.cache_parent.obtain(); try comp.addNonIncrementalStuffToCacheManifest(&man); @@ -1960,8 +1961,6 @@ pub fn update(comp: *Compilation) !void { } log.debug("CacheMode.whole cache miss for {s}", .{comp.bin_file.options.root_name}); - comp.whole_cache_manifest = &man; - // Initialize `bin_file.emit` with a temporary Directory so that compilation can // continue on the same path as incremental, using the temporary Directory. tmp_artifact_directory = d: { -- cgit v1.2.3 From 73ba6bf30be02d65e19304b0ec45c9a2a495698e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 30 Dec 2021 21:28:56 -0700 Subject: stage2: fix memory leak of emit_bin.sub_path Instead of juggling GPA-allocated sub_path (and ultimately dropping the ball, in this analogy), `Compilation.create` allocates an already-exactly-correct size `sub_path` that has the digest unpopulated. This is then overwritten in place as necessary and used as the `emit_bin.sub_path` value, and no allocations/frees are performed for this file path. --- src/Compilation.zig | 60 +++++++++++++++++++++++++---------------------------- 1 file changed, 28 insertions(+), 32 deletions(-) (limited to 'src/Compilation.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index 9361159c4e..4c31f8a998 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -98,9 +98,11 @@ clang_argv: []const []const u8, cache_parent: *Cache, /// Path to own executable for invoking `zig clang`. self_exe_path: ?[]const u8, -/// null means -fno-emit-bin. Contains the basename of the -/// outputted binary file in case we don't know the directory yet. -whole_bin_basename: ?[]const u8, +/// null means -fno-emit-bin. +/// This is mutable memory allocated into the Compilation-lifetime arena (`arena_state`) +/// of exactly the correct size for "o/[digest]/[basename]". +/// The basename is of the outputted binary file in case we don't know the directory yet. +whole_bin_sub_path: ?[]u8, zig_lib_directory: Directory, local_cache_directory: Directory, global_cache_directory: Directory, @@ -1487,9 +1489,12 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { // can use it for communicating the result directory via `bin_file.emit`. // This is used to distinguish between -fno-emit-bin and -femit-bin // for `CacheMode.whole`. - const whole_bin_basename: ?[]const u8 = if (options.emit_bin) |x| + // This memory will be overwritten with the real digest in update() but + // the basename will be preserved. + const whole_bin_sub_path: ?[]u8 = if (options.emit_bin) |x| if (x.directory == null) - x.basename + try std.fmt.allocPrint(arena, "o" ++ std.fs.path.sep_str ++ + ("x" ** Cache.hex_digest_len) ++ std.fs.path.sep_str ++ "{s}", .{x.basename}) else null else @@ -1600,7 +1605,7 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { .local_cache_directory = options.local_cache_directory, .global_cache_directory = options.global_cache_directory, .bin_file = bin_file, - .whole_bin_basename = whole_bin_basename, + .whole_bin_sub_path = whole_bin_sub_path, .emit_asm = options.emit_asm, .emit_llvm_ir = options.emit_llvm_ir, .emit_llvm_bc = options.emit_llvm_bc, @@ -1665,7 +1670,7 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { comp.c_object_table.putAssumeCapacityNoClobber(c_object, {}); } - const have_bin_emit = comp.bin_file.options.emit != null or comp.whole_bin_basename != null; + const have_bin_emit = comp.bin_file.options.emit != null or comp.whole_bin_sub_path != null; if (have_bin_emit and !comp.bin_file.options.skip_linker_dependencies) { // If we need to build glibc for the target, add work items for it. @@ -1941,19 +1946,7 @@ pub fn update(comp: *Compilation) !void { log.debug("CacheMode.whole cache hit for {s}", .{comp.bin_file.options.root_name}); const digest = man.final(); - // Communicate the output binary location to parent Compilations. - if (comp.whole_bin_basename) |basename| { - const new_sub_path = try std.fs.path.join(comp.gpa, &.{ - "o", &digest, basename, - }); - if (comp.bin_file.options.emit) |emit| { - comp.gpa.free(emit.sub_path); - } - comp.bin_file.options.emit = .{ - .directory = comp.local_cache_directory, - .sub_path = new_sub_path, - }; - } + comp.wholeCacheModeSetBinFilePath(&digest); assert(comp.bin_file.lock == null); comp.bin_file.lock = man.toOwnedLock(); @@ -1989,13 +1982,10 @@ pub fn update(comp: *Compilation) !void { // This resets the link.File to operate as if we called openPath() in create() // instead of simulating -fno-emit-bin. var options = comp.bin_file.options; - if (comp.whole_bin_basename) |basename| { - if (options.emit) |emit| { - comp.gpa.free(emit.sub_path); - } + if (comp.whole_bin_sub_path) |sub_path| { options.emit = .{ .directory = tmp_artifact_directory.?, - .sub_path = basename, + .sub_path = std.fs.path.basename(sub_path), }; } comp.bin_file.destroy(); @@ -2149,13 +2139,7 @@ pub fn update(comp: *Compilation) !void { log.warn("failed to write cache manifest: {s}", .{@errorName(err)}); }; - // Communicate the output binary location to parent Compilations. - if (comp.whole_bin_basename) |basename| { - comp.bin_file.options.emit = .{ - .directory = comp.local_cache_directory, - .sub_path = try std.fs.path.join(comp.gpa, &.{ "o", &digest, basename }), - }; - } + comp.wholeCacheModeSetBinFilePath(&digest); assert(comp.bin_file.lock == null); comp.bin_file.lock = man.toOwnedLock(); @@ -2163,6 +2147,18 @@ pub fn update(comp: *Compilation) !void { } } +/// Communicate the output binary location to parent Compilations. +fn wholeCacheModeSetBinFilePath(comp: *Compilation, digest: *const [Cache.hex_digest_len]u8) void { + const sub_path = comp.whole_bin_sub_path orelse return; + const digest_start = 2; // "o/[digest]/[basename]" + mem.copy(u8, sub_path[digest_start..], digest); + + comp.bin_file.options.emit = .{ + .directory = comp.local_cache_directory, + .sub_path = sub_path, + }; +} + /// This is only observed at compile-time and used to emit a compile error /// to remind the programmer to update multiple related pieces of code that /// are in different locations. Bump this number when adding or deleting -- cgit v1.2.3 From 013c286e432eacbd55a9f842cd277be11f72cc49 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 30 Dec 2021 21:35:24 -0700 Subject: stage2: fix memory leak in addNonIncrementalStuffToCacheManifest I accidentally used errdefer instead of defer for the function-local arena. --- src/Compilation.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/Compilation.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index 4c31f8a998..c17b187f4f 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -2170,7 +2170,7 @@ fn addNonIncrementalStuffToCacheManifest(comp: *Compilation, man: *Cache.Manifes const target = comp.getTarget(); var arena_allocator = std.heap.ArenaAllocator.init(gpa); - errdefer arena_allocator.deinit(); + defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); comptime assert(link_hash_implementation_version == 1); -- cgit v1.2.3 From 55243cfb31e40f09503fbc5b4bb63e7a5d3d87eb Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 30 Dec 2021 22:04:18 -0700 Subject: stage2: fix implibs Broken by the introduction of `CacheMode.whole`, they are now working again by using the same mechanism as emitted binary files. --- src/Compilation.zig | 57 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 43 insertions(+), 14 deletions(-) (limited to 'src/Compilation.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index c17b187f4f..5b14eacc06 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -103,6 +103,8 @@ self_exe_path: ?[]const u8, /// of exactly the correct size for "o/[digest]/[basename]". /// The basename is of the outputted binary file in case we don't know the directory yet. whole_bin_sub_path: ?[]u8, +/// Same as `whole_bin_sub_path` but for implibs. +whole_implib_sub_path: ?[]u8, zig_lib_directory: Directory, local_cache_directory: Directory, global_cache_directory: Directory, @@ -1477,6 +1479,12 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { }; } + // This is here for the same reason as in `bin_file_emit` above. + switch (cache_mode) { + .whole => break :blk null, + .incremental => {}, + } + // Use the same directory as the bin. The CLI already emits an // error if -fno-emit-bin is combined with -femit-implib. break :blk link.Emit{ @@ -1491,14 +1499,9 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { // for `CacheMode.whole`. // This memory will be overwritten with the real digest in update() but // the basename will be preserved. - const whole_bin_sub_path: ?[]u8 = if (options.emit_bin) |x| - if (x.directory == null) - try std.fmt.allocPrint(arena, "o" ++ std.fs.path.sep_str ++ - ("x" ** Cache.hex_digest_len) ++ std.fs.path.sep_str ++ "{s}", .{x.basename}) - else - null - else - null; + const whole_bin_sub_path: ?[]u8 = try prepareWholeEmitSubPath(arena, options.emit_bin); + // Same thing but for implibs. + const whole_implib_sub_path: ?[]u8 = try prepareWholeEmitSubPath(arena, options.emit_implib); var system_libs: std.StringArrayHashMapUnmanaged(SystemLib) = .{}; errdefer system_libs.deinit(gpa); @@ -1606,6 +1609,7 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { .global_cache_directory = options.global_cache_directory, .bin_file = bin_file, .whole_bin_sub_path = whole_bin_sub_path, + .whole_implib_sub_path = whole_implib_sub_path, .emit_asm = options.emit_asm, .emit_llvm_ir = options.emit_llvm_ir, .emit_llvm_bc = options.emit_llvm_bc, @@ -1988,6 +1992,12 @@ pub fn update(comp: *Compilation) !void { .sub_path = std.fs.path.basename(sub_path), }; } + if (comp.whole_implib_sub_path) |sub_path| { + options.implib_emit = .{ + .directory = tmp_artifact_directory.?, + .sub_path = std.fs.path.basename(sub_path), + }; + } comp.bin_file.destroy(); comp.bin_file = try link.File.openPath(comp.gpa, options); } @@ -2149,14 +2159,33 @@ pub fn update(comp: *Compilation) !void { /// Communicate the output binary location to parent Compilations. fn wholeCacheModeSetBinFilePath(comp: *Compilation, digest: *const [Cache.hex_digest_len]u8) void { - const sub_path = comp.whole_bin_sub_path orelse return; const digest_start = 2; // "o/[digest]/[basename]" - mem.copy(u8, sub_path[digest_start..], digest); - comp.bin_file.options.emit = .{ - .directory = comp.local_cache_directory, - .sub_path = sub_path, - }; + if (comp.whole_bin_sub_path) |sub_path| { + mem.copy(u8, sub_path[digest_start..], digest); + + comp.bin_file.options.emit = .{ + .directory = comp.local_cache_directory, + .sub_path = sub_path, + }; + } + + if (comp.whole_implib_sub_path) |sub_path| { + mem.copy(u8, sub_path[digest_start..], digest); + + comp.bin_file.options.implib_emit = .{ + .directory = comp.local_cache_directory, + .sub_path = sub_path, + }; + } +} + +fn prepareWholeEmitSubPath(arena: Allocator, opt_emit: ?EmitLoc) error{OutOfMemory}!?[]u8 { + const emit = opt_emit orelse return null; + if (emit.directory != null) return null; + const s = std.fs.path.sep_str; + const format = "o" ++ s ++ ("x" ** Cache.hex_digest_len) ++ s ++ "{s}"; + return try std.fmt.allocPrint(arena, format, .{emit.basename}); } /// This is only observed at compile-time and used to emit a compile error -- cgit v1.2.3 From 9dc25dd0b695c907e861bfd5a974cf454e657228 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 30 Dec 2021 23:14:34 -0700 Subject: stage2: fix UAF of system_libs --- src/Compilation.zig | 2 +- src/link.zig | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'src/Compilation.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index 5b14eacc06..8b761cd450 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1985,7 +1985,7 @@ pub fn update(comp: *Compilation) !void { // This resets the link.File to operate as if we called openPath() in create() // instead of simulating -fno-emit-bin. - var options = comp.bin_file.options; + var options = comp.bin_file.options.move(); if (comp.whole_bin_sub_path) |sub_path| { options.emit = .{ .directory = tmp_artifact_directory.?, diff --git a/src/link.zig b/src/link.zig index 215d24adca..a720bdada2 100644 --- a/src/link.zig +++ b/src/link.zig @@ -167,6 +167,12 @@ pub const Options = struct { pub fn effectiveOutputMode(options: Options) std.builtin.OutputMode { return if (options.use_lld) .Obj else options.output_mode; } + + pub fn move(self: *Options) Options { + const copied_state = self.*; + self.system_libs = .{}; + return copied_state; + } }; pub const File = struct { -- cgit v1.2.3 From 663ffa5a7a1d53d1adf9c1ea6991aad8bf82aadc Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 2 Jan 2022 18:54:16 -0700 Subject: stage2: fix missing items from whole cache mode hash Without this, Zig would re-use a compiler-rt built with stage2 when one built by stage1 was needed. --- src/Compilation.zig | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'src/Compilation.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index 8b761cd450..b01b4bbae4 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -1263,6 +1263,8 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { // track it and packages as files. }, } + + // Synchronize with other matching comments: ZigOnlyHashStuff hash.add(valgrind); hash.add(single_threaded); hash.add(use_stage1); @@ -1304,11 +1306,11 @@ pub fn create(gpa: Allocator, options: InitOptions) !*Compilation { errdefer artifact_dir.close(); const zig_cache_artifact_directory: Directory = .{ .handle = artifact_dir, - .path = if (options.local_cache_directory.path) |p| - try std.fs.path.join(arena, &[_][]const u8{ p, artifact_sub_dir }) - else - artifact_sub_dir, + .path = try options.local_cache_directory.join(arena, &[_][]const u8{artifact_sub_dir}), }; + log.debug("zig_cache_artifact_directory='{s}' use_stage1={}", .{ + zig_cache_artifact_directory.path, use_stage1, + }); const builtin_pkg = try Package.createWithDir( gpa, @@ -2220,6 +2222,18 @@ fn addNonIncrementalStuffToCacheManifest(comp: *Compilation, man: *Cache.Manifes try addPackageTableToCacheHash(&man.hash, &arena_allocator, mod.main_pkg.table, &seen_table, .{ .files = man }); } + // Synchronize with other matching comments: ZigOnlyHashStuff + man.hash.add(comp.bin_file.options.valgrind); + man.hash.add(comp.bin_file.options.single_threaded); + man.hash.add(comp.bin_file.options.use_stage1); + man.hash.add(comp.bin_file.options.use_llvm); + man.hash.add(comp.bin_file.options.dll_export_fns); + man.hash.add(comp.bin_file.options.is_test); + man.hash.add(comp.test_evented_io); + man.hash.addOptionalBytes(comp.test_filter); + man.hash.addOptionalBytes(comp.test_name_prefix); + man.hash.add(comp.bin_file.options.skip_linker_dependencies); + man.hash.add(comp.bin_file.options.parent_compilation_link_libc); man.hash.add(mod.emit_h != null); } -- cgit v1.2.3 From d94303be2bcee33e7efba22a186fd06eaa809707 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sun, 2 Jan 2022 20:57:04 -0700 Subject: stage2: introduce renameTmpIntoCache into the linker API Doc comments reproduced here: This function is called by the frontend before flush(). It communicates that `options.bin_file.emit` directory needs to be renamed from `[zig-cache]/tmp/[random]` to `[zig-cache]/o/[digest]`. The frontend would like to simply perform a file system rename, however, some linker backends care about the file paths of the objects they are linking. So this function call tells linker backends to rename the paths of object files to observe the new directory path. Linker backends which do not have this requirement can fall back to the simple implementation at the bottom of this function. This function is only called when CacheMode is `whole`. This solves stack trace regressions on Windows and macOS because the linker backends do not observe object file paths until flush(). --- src/Compilation.zig | 84 +++++++++++++++++++++++++++++++--------------------- src/codegen/llvm.zig | 10 ++++--- src/link.zig | 59 +++++++++++++++++++++++++++++------- src/link/Coff.zig | 21 ++++++++----- src/link/Elf.zig | 26 +++++++++++----- src/link/MachO.zig | 23 +++++++++----- src/link/Wasm.zig | 22 +++++++++----- 7 files changed, 166 insertions(+), 79 deletions(-) (limited to 'src/Compilation.zig') diff --git a/src/Compilation.zig b/src/Compilation.zig index b01b4bbae4..cbb44c1e18 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -2093,39 +2093,15 @@ pub fn update(comp: *Compilation) !void { } if (comp.totalErrorCount() != 0) { - // Skip flushing. + // Skip flushing and keep source files loaded for error reporting. comp.link_error_flags = .{}; return; } - // This is needed before reading the error flags. - try comp.bin_file.flush(comp); - comp.link_error_flags = comp.bin_file.errorFlags(); - - if (!use_stage1) { - if (comp.bin_file.options.module) |module| { - try link.File.C.flushEmitH(module); - } - } - // Flush takes care of -femit-bin, but we still have -femit-llvm-ir, -femit-llvm-bc, and // -femit-asm to handle, in the case of C objects. comp.emitOthers(); - // If there are any errors, we anticipate the source files being loaded - // to report error messages. Otherwise we unload all source files to save memory. - // The ZIR needs to stay loaded in memory because (1) Decl objects contain references - // to it, and (2) generic instantiations, comptime calls, inline calls will need - // to reference the ZIR. - if (!comp.keep_source_files_loaded) { - if (comp.bin_file.options.module) |module| { - for (module.import_table.values()) |file| { - file.unloadTree(comp.gpa); - file.unloadSource(comp.gpa); - } - } - } - if (comp.whole_cache_manifest != null) { const digest = man.final(); @@ -2139,23 +2115,63 @@ pub fn update(comp: *Compilation) !void { const o_sub_path = try std.fs.path.join(comp.gpa, &[_][]const u8{ "o", &digest }); defer comp.gpa.free(o_sub_path); - try std.fs.rename( - comp.local_cache_directory.handle, - tmp_dir_sub_path, - comp.local_cache_directory.handle, - o_sub_path, - ); + try comp.bin_file.renameTmpIntoCache(comp.local_cache_directory, tmp_dir_sub_path, o_sub_path); + comp.wholeCacheModeSetBinFilePath(&digest); + + // This is intentionally sandwiched between renameTmpIntoCache() and writeManifest(). + if (comp.bin_file.options.module) |module| { + // We need to set the zig_cache_artifact_directory for -femit-asm, -femit-llvm-ir, + // etc to know where to output to. + var artifact_dir = try comp.local_cache_directory.handle.openDir(o_sub_path, .{}); + defer artifact_dir.close(); + + var dir_path = try comp.local_cache_directory.join(comp.gpa, &.{o_sub_path}); + defer comp.gpa.free(dir_path); + + module.zig_cache_artifact_directory = .{ + .handle = artifact_dir, + .path = dir_path, + }; + + try comp.flush(); + } else { + try comp.flush(); + } // Failure here only means an unnecessary cache miss. man.writeManifest() catch |err| { log.warn("failed to write cache manifest: {s}", .{@errorName(err)}); }; - comp.wholeCacheModeSetBinFilePath(&digest); - assert(comp.bin_file.lock == null); comp.bin_file.lock = man.toOwnedLock(); - return; + } else { + try comp.flush(); + } + + // Unload all source files to save memory. + // The ZIR needs to stay loaded in memory because (1) Decl objects contain references + // to it, and (2) generic instantiations, comptime calls, inline calls will need + // to reference the ZIR. + if (!comp.keep_source_files_loaded) { + if (comp.bin_file.options.module) |module| { + for (module.import_table.values()) |file| { + file.unloadTree(comp.gpa); + file.unloadSource(comp.gpa); + } + } + } +} + +fn flush(comp: *Compilation) !void { + try comp.bin_file.flush(comp); // This is needed before reading the error flags. + comp.link_error_flags = comp.bin_file.errorFlags(); + + const use_stage1 = build_options.is_stage1 and comp.bin_file.options.use_stage1; + if (!use_stage1) { + if (comp.bin_file.options.module) |module| { + try link.File.C.flushEmitH(module); + } } } diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index a248fb1718..e29116476d 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -324,10 +324,12 @@ pub const Object = struct { const mod = comp.bin_file.options.module.?; const cache_dir = mod.zig_cache_artifact_directory; - const emit_bin_path: ?[*:0]const u8 = if (comp.bin_file.options.emit) |emit| - try emit.directory.joinZ(arena, &[_][]const u8{self.sub_path}) - else - null; + const emit_bin_path: ?[*:0]const u8 = if (comp.bin_file.options.emit) |emit| blk: { + const full_out_path = try emit.directory.join(arena, &[_][]const u8{emit.sub_path}); + break :blk try std.fs.path.joinZ(arena, &.{ + std.fs.path.dirname(full_out_path).?, self.sub_path, + }); + } else null; const emit_asm_path = try locPath(arena, comp.emit_asm, cache_dir); const emit_llvm_ir_path = try locPath(arena, comp.emit_llvm_ir, cache_dir); diff --git a/src/link.zig b/src/link.zig index a720bdada2..5c33ce3b70 100644 --- a/src/link.zig +++ b/src/link.zig @@ -636,6 +636,36 @@ pub const File = struct { } } + /// This function is called by the frontend before flush(). It communicates that + /// `options.bin_file.emit` directory needs to be renamed from + /// `[zig-cache]/tmp/[random]` to `[zig-cache]/o/[digest]`. + /// The frontend would like to simply perform a file system rename, however, + /// some linker backends care about the file paths of the objects they are linking. + /// So this function call tells linker backends to rename the paths of object files + /// to observe the new directory path. + /// Linker backends which do not have this requirement can fall back to the simple + /// implementation at the bottom of this function. + /// This function is only called when CacheMode is `whole`. + pub fn renameTmpIntoCache( + base: *File, + cache_directory: Compilation.Directory, + tmp_dir_sub_path: []const u8, + o_sub_path: []const u8, + ) !void { + // So far, none of the linker backends need to respond to this event, however, + // it makes sense that they might want to. So we leave this mechanism here + // for now. Once the linker backends get more mature, if it turns out this + // is not needed we can refactor this into having the frontend do the rename + // directly, and remove this function from link.zig. + _ = base; + try std.fs.rename( + cache_directory.handle, + tmp_dir_sub_path, + cache_directory.handle, + o_sub_path, + ); + } + pub fn linkAsArchive(base: *File, comp: *Compilation) !void { const tracy = trace(@src()); defer tracy.end(); @@ -645,9 +675,11 @@ pub const File = struct { const arena = arena_allocator.allocator(); const directory = base.options.emit.?.directory; // Just an alias to make it shorter to type. + const full_out_path = try directory.join(arena, &[_][]const u8{base.options.emit.?.sub_path}); + const full_out_path_z = try arena.dupeZ(u8, full_out_path); - // If there is no Zig code to compile, then we should skip flushing the output file because it - // will not be part of the linker line anyway. + // If there is no Zig code to compile, then we should skip flushing the output file + // because it will not be part of the linker line anyway. const module_obj_path: ?[]const u8 = if (base.options.module) |module| blk: { const use_stage1 = build_options.is_stage1 and base.options.use_stage1; if (use_stage1) { @@ -656,20 +688,28 @@ pub const File = struct { .target = base.options.target, .output_mode = .Obj, }); - const o_directory = module.zig_cache_artifact_directory; - const full_obj_path = try o_directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; + switch (base.options.cache_mode) { + .incremental => break :blk try module.zig_cache_artifact_directory.join( + arena, + &[_][]const u8{obj_basename}, + ), + .whole => break :blk try fs.path.join(arena, &.{ + fs.path.dirname(full_out_path_z).?, obj_basename, + }), + } } if (base.options.object_format == .macho) { try base.cast(MachO).?.flushObject(comp); } else { try base.flushModule(comp); } - const obj_basename = base.intermediary_basename.?; - const full_obj_path = try directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; + break :blk try fs.path.join(arena, &.{ + fs.path.dirname(full_out_path_z).?, base.intermediary_basename.?, + }); } else null; + log.debug("module_obj_path={s}", .{if (module_obj_path) |s| s else "(null)"}); + const compiler_rt_path: ?[]const u8 = if (base.options.include_compiler_rt) comp.compiler_rt_obj.?.full_object_path else @@ -742,9 +782,6 @@ pub const File = struct { object_files.appendAssumeCapacity(try arena.dupeZ(u8, p)); } - const full_out_path = try directory.join(arena, &[_][]const u8{base.options.emit.?.sub_path}); - const full_out_path_z = try arena.dupeZ(u8, full_out_path); - if (base.options.verbose_link) { std.debug.print("ar rcs {s}", .{full_out_path_z}); for (object_files.items) |arg| { diff --git a/src/link/Coff.zig b/src/link/Coff.zig index e6788120a0..ec06c28b44 100644 --- a/src/link/Coff.zig +++ b/src/link/Coff.zig @@ -880,6 +880,7 @@ fn linkWithLLD(self: *Coff, comp: *Compilation) !void { const arena = arena_allocator.allocator(); const directory = self.base.options.emit.?.directory; // Just an alias to make it shorter to type. + const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); // If there is no Zig code to compile, then we should skip flushing the output file because it // will not be part of the linker line anyway. @@ -891,15 +892,22 @@ fn linkWithLLD(self: *Coff, comp: *Compilation) !void { .target = self.base.options.target, .output_mode = .Obj, }); - const o_directory = module.zig_cache_artifact_directory; - const full_obj_path = try o_directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; + switch (self.base.options.cache_mode) { + .incremental => break :blk try module.zig_cache_artifact_directory.join( + arena, + &[_][]const u8{obj_basename}, + ), + .whole => break :blk try fs.path.join(arena, &.{ + fs.path.dirname(full_out_path).?, obj_basename, + }), + } } try self.flushModule(comp); - const obj_basename = self.base.intermediary_basename.?; - const full_obj_path = try directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; + + break :blk try fs.path.join(arena, &.{ + fs.path.dirname(full_out_path).?, self.base.intermediary_basename.?, + }); } else null; const is_lib = self.base.options.output_mode == .Lib; @@ -978,7 +986,6 @@ fn linkWithLLD(self: *Coff, comp: *Compilation) !void { }; } - const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); if (self.base.options.output_mode == .Obj) { // LLD's COFF driver does not support the equivalent of `-r` so we do a simple file copy // here. TODO: think carefully about how we can avoid this redundant operation when doing diff --git a/src/link/Elf.zig b/src/link/Elf.zig index 5f3771a3cb..105b012fbf 100644 --- a/src/link/Elf.zig +++ b/src/link/Elf.zig @@ -297,6 +297,7 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Elf { else => return error.UnsupportedELFArchitecture, }; const self = try gpa.create(Elf); + errdefer gpa.destroy(self); self.* = .{ .base = .{ .tag = .elf, @@ -306,6 +307,9 @@ pub fn createEmpty(gpa: Allocator, options: link.Options) !*Elf { }, .ptr_width = ptr_width, }; + // TODO get rid of the sub_path parameter to LlvmObject.create + // and create the llvm_object here. Also openPath needs to + // not override this field or there will be a memory leak. return self; } @@ -1298,6 +1302,7 @@ fn linkWithLLD(self: *Elf, comp: *Compilation) !void { const arena = arena_allocator.allocator(); const directory = self.base.options.emit.?.directory; // Just an alias to make it shorter to type. + const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); // If there is no Zig code to compile, then we should skip flushing the output file because it // will not be part of the linker line anyway. @@ -1309,15 +1314,22 @@ fn linkWithLLD(self: *Elf, comp: *Compilation) !void { .target = self.base.options.target, .output_mode = .Obj, }); - const o_directory = module.zig_cache_artifact_directory; - const full_obj_path = try o_directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; + switch (self.base.options.cache_mode) { + .incremental => break :blk try module.zig_cache_artifact_directory.join( + arena, + &[_][]const u8{obj_basename}, + ), + .whole => break :blk try fs.path.join(arena, &.{ + fs.path.dirname(full_out_path).?, obj_basename, + }), + } } try self.flushModule(comp); - const obj_basename = self.base.intermediary_basename.?; - const full_obj_path = try directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; + + break :blk try fs.path.join(arena, &.{ + fs.path.dirname(full_out_path).?, self.base.intermediary_basename.?, + }); } else null; const is_obj = self.base.options.output_mode == .Obj; @@ -1434,8 +1446,6 @@ fn linkWithLLD(self: *Elf, comp: *Compilation) !void { }; } - const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); - // Due to a deficiency in LLD, we need to special-case BPF to a simple file copy when generating // relocatables. Normally, we would expect `lld -r` to work. However, because LLD wants to resolve // BPF relocations which it shouldn't, it fails before even generating the relocatable. diff --git a/src/link/MachO.zig b/src/link/MachO.zig index f970ffb5f2..ed720080af 100644 --- a/src/link/MachO.zig +++ b/src/link/MachO.zig @@ -423,6 +423,7 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { const arena = arena_allocator.allocator(); const directory = self.base.options.emit.?.directory; // Just an alias to make it shorter to type. + const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); // If there is no Zig code to compile, then we should skip flushing the output file because it // will not be part of the linker line anyway. @@ -433,15 +434,24 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { .target = self.base.options.target, .output_mode = .Obj, }); - const o_directory = module.zig_cache_artifact_directory; - const full_obj_path = try o_directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; + switch (self.base.options.cache_mode) { + .incremental => break :blk try module.zig_cache_artifact_directory.join( + arena, + &[_][]const u8{obj_basename}, + ), + .whole => break :blk try fs.path.join(arena, &.{ + fs.path.dirname(full_out_path).?, obj_basename, + }), + } } const obj_basename = self.base.intermediary_basename orelse break :blk null; + try self.flushObject(comp); - const full_obj_path = try directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; + + break :blk try fs.path.join(arena, &.{ + fs.path.dirname(full_out_path).?, obj_basename, + }); } else null; const is_lib = self.base.options.output_mode == .Lib; @@ -534,7 +544,6 @@ pub fn flushModule(self: *MachO, comp: *Compilation) !void { else => |e| return e, }; } - const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); if (self.base.options.output_mode == .Obj) { // LLD's MachO driver does not support the equivalent of `-r` so we do a simple file copy @@ -1269,7 +1278,7 @@ fn parseInputFiles(self: *MachO, files: []const []const u8, syslibroot: ?[]const for (files) |file_name| { const full_path = full_path: { var buffer: [fs.MAX_PATH_BYTES]u8 = undefined; - const path = try std.fs.realpath(file_name, &buffer); + const path = try fs.realpath(file_name, &buffer); break :full_path try self.base.allocator.dupe(u8, path); }; defer self.base.allocator.free(full_path); diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig index 5f8f48d1b1..220ab2f53c 100644 --- a/src/link/Wasm.zig +++ b/src/link/Wasm.zig @@ -1050,6 +1050,7 @@ fn linkWithLLD(self: *Wasm, comp: *Compilation) !void { const arena = arena_allocator.allocator(); const directory = self.base.options.emit.?.directory; // Just an alias to make it shorter to type. + const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); // If there is no Zig code to compile, then we should skip flushing the output file because it // will not be part of the linker line anyway. @@ -1061,15 +1062,22 @@ fn linkWithLLD(self: *Wasm, comp: *Compilation) !void { .target = self.base.options.target, .output_mode = .Obj, }); - const o_directory = module.zig_cache_artifact_directory; - const full_obj_path = try o_directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; + switch (self.base.options.cache_mode) { + .incremental => break :blk try module.zig_cache_artifact_directory.join( + arena, + &[_][]const u8{obj_basename}, + ), + .whole => break :blk try fs.path.join(arena, &.{ + fs.path.dirname(full_out_path).?, obj_basename, + }), + } } try self.flushModule(comp); - const obj_basename = self.base.intermediary_basename.?; - const full_obj_path = try directory.join(arena, &[_][]const u8{obj_basename}); - break :blk full_obj_path; + + break :blk try fs.path.join(arena, &.{ + fs.path.dirname(full_out_path).?, self.base.intermediary_basename.?, + }); } else null; const is_obj = self.base.options.output_mode == .Obj; @@ -1143,8 +1151,6 @@ fn linkWithLLD(self: *Wasm, comp: *Compilation) !void { }; } - const full_out_path = try directory.join(arena, &[_][]const u8{self.base.options.emit.?.sub_path}); - if (self.base.options.output_mode == .Obj) { // LLD's WASM driver does not support the equivalent of `-r` so we do a simple file copy // here. TODO: think carefully about how we can avoid this redundant operation when doing -- cgit v1.2.3