diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2021-10-17 18:57:54 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2021-10-17 18:59:11 -0700 |
| commit | e5dac0a0b391f227605e496a09f32b453ac3280d (patch) | |
| tree | df9fc28a45b801727a28ef56a8abc08deff43bc9 | |
| parent | ad17108bddc3bc198190407ab5b00820b2c17cd5 (diff) | |
| download | zig-e5dac0a0b391f227605e496a09f32b453ac3280d.tar.gz zig-e5dac0a0b391f227605e496a09f32b453ac3280d.zip | |
stage2: implement `@embedFile`
| -rw-r--r-- | src/Compilation.zig | 100 | ||||
| -rw-r--r-- | src/Module.zig | 138 | ||||
| -rw-r--r-- | src/Sema.zig | 45 |
3 files changed, 275 insertions, 8 deletions
diff --git a/src/Compilation.zig b/src/Compilation.zig index f997b53388..63a6b50d5b 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -55,6 +55,10 @@ c_object_work_queue: std.fifo.LinearFifo(*CObject, .Dynamic), /// since the last compilation, as well as scan for `@import` and queue up /// additional jobs corresponding to those new files. astgen_work_queue: std.fifo.LinearFifo(*Module.File, .Dynamic), +/// These jobs are to inspect the file system stat() and if the embedded file has changed +/// on disk, mark the corresponding Decl outdated and queue up an `analyze_decl` +/// task for it. +embed_file_work_queue: std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic), /// The ErrorMsg memory is owned by the `CObject`, using Compilation's general purpose allocator. /// This data is accessed by multiple threads and is protected by `mutex`. @@ -181,6 +185,10 @@ const Job = union(enum) { /// It may have already be analyzed, or it may have been determined /// to be outdated; in this case perform semantic analysis again. analyze_decl: *Module.Decl, + /// The file that was loaded with `@embedFile` has changed on disk + /// and has been re-loaded into memory. All Decls that depend on it + /// need to be re-analyzed. + update_embed_file: *Module.EmbedFile, /// The source file containing the Decl has been updated, and so the /// Decl may need its line number information updated in the debug info. update_line_number: *Module.Decl, @@ -1447,6 +1455,7 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation { .work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa), .c_object_work_queue = std.fifo.LinearFifo(*CObject, .Dynamic).init(gpa), .astgen_work_queue = std.fifo.LinearFifo(*Module.File, .Dynamic).init(gpa), + .embed_file_work_queue = std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic).init(gpa), .keep_source_files_loaded = options.keep_source_files_loaded, .use_clang = use_clang, .clang_argv = options.clang_argv, @@ -1632,6 +1641,7 @@ pub fn destroy(self: *Compilation) void { self.work_queue.deinit(); self.c_object_work_queue.deinit(); self.astgen_work_queue.deinit(); + self.embed_file_work_queue.deinit(); { var it = self.crt_files.iterator(); @@ -1747,6 +1757,16 @@ pub fn update(self: *Compilation) !void { } if (!use_stage1) { + // Put a work item in for checking if any files used with `@embedFile` changed. + { + try self.embed_file_work_queue.ensureUnusedCapacity(module.embed_table.count()); + var it = module.embed_table.iterator(); + while (it.next()) |entry| { + const embed_file = entry.value_ptr.*; + self.embed_file_work_queue.writeItemAssumeCapacity(embed_file); + } + } + try self.work_queue.writeItem(.{ .analyze_pkg = std_pkg }); if (self.bin_file.options.is_test) { try self.work_queue.writeItem(.{ .analyze_pkg = module.main_pkg }); @@ -1870,6 +1890,7 @@ pub fn totalErrorCount(self: *Compilation) usize { if (self.bin_file.options.module) |module| { total += module.failed_exports.count(); + total += module.failed_embed_files.count(); { var it = module.failed_files.iterator(); @@ -1967,6 +1988,13 @@ pub fn getAllErrorsAlloc(self: *Compilation) !AllErrors { } } { + var it = module.failed_embed_files.iterator(); + while (it.next()) |entry| { + const msg = entry.value_ptr.*; + try AllErrors.add(module, &arena, &errors, msg.*); + } + } + { var it = module.failed_decls.iterator(); while (it.next()) |entry| { // Skip errors for Decls within files that had a parse failure. @@ -2065,6 +2093,9 @@ pub fn performAllTheWork(self: *Compilation) error{ TimerUnsupported, OutOfMemor var c_obj_prog_node = main_progress_node.start("Compile C Objects", self.c_source_files.len); defer c_obj_prog_node.end(); + var embed_file_prog_node = main_progress_node.start("Detect @embedFile updates", self.embed_file_work_queue.count); + defer embed_file_prog_node.end(); + self.work_queue_wait_group.reset(); defer self.work_queue_wait_group.wait(); @@ -2079,6 +2110,13 @@ pub fn performAllTheWork(self: *Compilation) error{ TimerUnsupported, OutOfMemor }); } + while (self.embed_file_work_queue.readItem()) |embed_file| { + self.astgen_wait_group.start(); + try self.thread_pool.spawn(workerCheckEmbedFile, .{ + self, embed_file, &embed_file_prog_node, &self.astgen_wait_group, + }); + } + while (self.c_object_work_queue.readItem()) |c_object| { self.work_queue_wait_group.start(); try self.thread_pool.spawn(workerUpdateCObject, .{ @@ -2260,6 +2298,15 @@ pub fn performAllTheWork(self: *Compilation) error{ TimerUnsupported, OutOfMemor error.AnalysisFail => continue, }; }, + .update_embed_file => |embed_file| { + if (build_options.omit_stage2) + @panic("sadly stage2 is omitted from this build to save memory on the CI server"); + const module = self.bin_file.options.module.?; + module.updateEmbedFile(embed_file) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.AnalysisFail => continue, + }; + }, .update_line_number => |decl| { if (build_options.omit_stage2) @panic("sadly stage2 is omitted from this build to save memory on the CI server"); @@ -2542,6 +2589,29 @@ fn workerAstGenFile( } } +fn workerCheckEmbedFile( + comp: *Compilation, + embed_file: *Module.EmbedFile, + prog_node: *std.Progress.Node, + wg: *WaitGroup, +) void { + defer wg.finish(); + + var child_prog_node = prog_node.start(embed_file.sub_file_path, 0); + child_prog_node.activate(); + defer child_prog_node.end(); + + const mod = comp.bin_file.options.module.?; + mod.detectEmbedFileUpdate(embed_file) catch |err| { + comp.reportRetryableEmbedFileError(embed_file, err) catch |oom| switch (oom) { + // Swallowing this error is OK because it's implied to be OOM when + // there is a missing `failed_embed_files` error message. + error.OutOfMemory => {}, + }; + return; + }; +} + pub fn obtainCObjectCacheManifest(comp: *const Compilation) Cache.Manifest { var man = comp.cache_parent.obtain(); @@ -2790,6 +2860,36 @@ fn reportRetryableAstGenError( } } +fn reportRetryableEmbedFileError( + comp: *Compilation, + embed_file: *Module.EmbedFile, + err: anyerror, +) error{OutOfMemory}!void { + const mod = comp.bin_file.options.module.?; + const gpa = mod.gpa; + + const src_loc: Module.SrcLoc = embed_file.owner_decl.srcLoc(); + + const err_msg = if (embed_file.pkg.root_src_directory.path) |dir_path| + try Module.ErrorMsg.create( + gpa, + src_loc, + "unable to load '{s}" ++ std.fs.path.sep_str ++ "{s}': {s}", + .{ dir_path, embed_file.sub_file_path, @errorName(err) }, + ) + else + try Module.ErrorMsg.create(gpa, src_loc, "unable to load '{s}': {s}", .{ + embed_file.sub_file_path, @errorName(err), + }); + errdefer err_msg.destroy(gpa); + + { + const lock = comp.mutex.acquire(); + defer lock.release(); + try mod.failed_embed_files.putNoClobber(gpa, embed_file, err_msg); + } +} + fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.Progress.Node) !void { if (!build_options.have_llvm) { return comp.failCObj(c_object, "clang not available: compiler built without LLVM extensions", .{}); diff --git a/src/Module.zig b/src/Module.zig index f52e1c8ef7..13181ae326 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -55,11 +55,17 @@ decl_exports: std.AutoArrayHashMapUnmanaged(*Decl, []*Export) = .{}, /// is performing the export of another Decl. /// This table owns the Export memory. export_owners: std.AutoArrayHashMapUnmanaged(*Decl, []*Export) = .{}, -/// The set of all the files in the Module. We keep track of this in order to iterate -/// over it and check which source files have been modified on the file system when +/// The set of all the Zig source files in the Module. We keep track of this in order +/// to iterate over it and check which source files have been modified on the file system when /// an update is requested, as well as to cache `@import` results. /// Keys are fully resolved file paths. This table owns the keys and values. import_table: std.StringArrayHashMapUnmanaged(*File) = .{}, +/// The set of all the files which have been loaded with `@embedFile` in the Module. +/// We keep track of this in order to iterate over it and check which files have been +/// modified on the file system when an update is requested, as well as to cache +/// `@embedFile` results. +/// Keys are fully resolved file paths. This table owns the keys and values. +embed_table: std.StringHashMapUnmanaged(*EmbedFile) = .{}, /// The set of all the generic function instantiations. This is used so that when a generic /// function is called twice with the same comptime parameter arguments, both calls dispatch @@ -87,6 +93,8 @@ compile_log_decls: std.AutoArrayHashMapUnmanaged(*Decl, i32) = .{}, /// Using a map here for consistency with the other fields here. /// The ErrorMsg memory is owned by the `File`, using Module's general purpose allocator. failed_files: std.AutoArrayHashMapUnmanaged(*File, ?*ErrorMsg) = .{}, +/// The ErrorMsg memory is owned by the `EmbedFile`, using Module's general purpose allocator. +failed_embed_files: std.AutoArrayHashMapUnmanaged(*EmbedFile, *ErrorMsg) = .{}, /// Using a map here for consistency with the other fields here. /// The ErrorMsg memory is owned by the `Export`, using Module's general purpose allocator. failed_exports: std.AutoArrayHashMapUnmanaged(*Export, *ErrorMsg) = .{}, @@ -1534,6 +1542,23 @@ pub const File = struct { } }; +/// Represents the contents of a file loaded with `@embedFile`. +pub const EmbedFile = struct { + /// Relative to the owning package's root_src_dir. + /// Memory is stored in gpa, owned by EmbedFile. + sub_file_path: []const u8, + bytes: [:0]const u8, + stat_size: u64, + stat_inode: std.fs.File.INode, + stat_mtime: i128, + /// Package that this file is a part of, managed externally. + pkg: *Package, + /// The Decl that was created from the `@embedFile` to own this resource. + /// This is how zig knows what other Decl objects to invalidate if the file + /// changes on disk. + owner_decl: *Decl, +}; + /// This struct holds data necessary to construct API-facing `AllErrors.Message`. /// Its memory is managed with the general purpose allocator so that they /// can be created and destroyed in response to incremental updates. @@ -2364,6 +2389,11 @@ pub fn deinit(mod: *Module) void { } mod.failed_files.deinit(gpa); + for (mod.failed_embed_files.values()) |msg| { + msg.destroy(gpa); + } + mod.failed_embed_files.deinit(gpa); + for (mod.failed_exports.values()) |value| { value.destroy(gpa); } @@ -3060,6 +3090,32 @@ pub fn ensureDeclAnalyzed(mod: *Module, decl: *Decl) SemaError!void { } } +pub fn updateEmbedFile(mod: *Module, embed_file: *EmbedFile) SemaError!void { + const tracy = trace(@src()); + defer tracy.end(); + + // TODO we can potentially relax this if we store some more information along + // with decl dependency edges + for (embed_file.owner_decl.dependants.keys()) |dep| { + switch (dep.analysis) { + .unreferenced => unreachable, + .in_progress => continue, // already doing analysis, ok + .outdated => continue, // already queued for update + + .file_failure, + .dependency_failure, + .sema_failure, + .sema_failure_retryable, + .codegen_failure, + .codegen_failure_retryable, + .complete, + => if (dep.generation != mod.generation) { + try mod.markOutdatedDecl(dep); + }, + } + } +} + pub fn semaPkg(mod: *Module, pkg: *Package) !void { const file = (try mod.importPkg(pkg)).file; return mod.semaFile(file); @@ -3551,6 +3607,84 @@ pub fn importFile( }; } +pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*EmbedFile { + const gpa = mod.gpa; + + // The resolved path is used as the key in the table, to detect if + // a file refers to the same as another, despite different relative paths. + const cur_pkg_dir_path = cur_file.pkg.root_src_directory.path orelse "."; + const resolved_path = try std.fs.path.resolve(gpa, &[_][]const u8{ + cur_pkg_dir_path, cur_file.sub_file_path, "..", rel_file_path, + }); + var keep_resolved_path = false; + defer if (!keep_resolved_path) gpa.free(resolved_path); + + const gop = try mod.embed_table.getOrPut(gpa, resolved_path); + if (gop.found_existing) return gop.value_ptr.*; + keep_resolved_path = true; // It's now owned by embed_table. + + const new_file = try gpa.create(EmbedFile); + errdefer gpa.destroy(new_file); + + const resolved_root_path = try std.fs.path.resolve(gpa, &[_][]const u8{cur_pkg_dir_path}); + defer gpa.free(resolved_root_path); + + if (!mem.startsWith(u8, resolved_path, resolved_root_path)) { + return error.ImportOutsidePkgPath; + } + // +1 for the directory separator here. + const sub_file_path = try gpa.dupe(u8, resolved_path[resolved_root_path.len + 1 ..]); + errdefer gpa.free(sub_file_path); + + var file = try cur_file.pkg.root_src_directory.handle.openFile(sub_file_path, .{}); + defer file.close(); + + const stat = try file.stat(); + const bytes = try file.readToEndAllocOptions(gpa, std.math.maxInt(u32), stat.size, 1, 0); + + log.debug("new embedFile. resolved_root_path={s}, resolved_path={s}, sub_file_path={s}, rel_file_path={s}", .{ + resolved_root_path, resolved_path, sub_file_path, rel_file_path, + }); + + gop.value_ptr.* = new_file; + new_file.* = .{ + .sub_file_path = sub_file_path, + .bytes = bytes, + .stat_size = stat.size, + .stat_inode = stat.inode, + .stat_mtime = stat.mtime, + .pkg = cur_file.pkg, + .owner_decl = undefined, // Set by Sema immediately after this function returns. + }; + return new_file; +} + +pub fn detectEmbedFileUpdate(mod: *Module, embed_file: *EmbedFile) !void { + var file = try embed_file.pkg.root_src_directory.handle.openFile(embed_file.sub_file_path, .{}); + defer file.close(); + + const stat = try file.stat(); + + const unchanged_metadata = + stat.size == embed_file.stat_size and + stat.mtime == embed_file.stat_mtime and + stat.inode == embed_file.stat_inode; + + if (unchanged_metadata) return; + + const gpa = mod.gpa; + const bytes = try file.readToEndAllocOptions(gpa, std.math.maxInt(u32), stat.size, 1, 0); + gpa.free(embed_file.bytes); + embed_file.bytes = bytes; + embed_file.stat_size = stat.size; + embed_file.stat_mtime = stat.mtime; + embed_file.stat_inode = stat.inode; + + const lock = mod.comp.mutex.acquire(); + defer lock.release(); + try mod.comp.work_queue.writeItem(.{ .update_embed_file = embed_file }); +} + pub fn scanNamespace( mod: *Module, namespace: *Namespace, diff --git a/src/Sema.zig b/src/Sema.zig index 2211e69fb0..c96b890785 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -6467,6 +6467,45 @@ fn zirImport(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air. return sema.addConstant(file_root_decl.ty, file_root_decl.val); } +fn zirEmbedFile(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { + const tracy = trace(@src()); + defer tracy.end(); + + const mod = sema.mod; + const inst_data = sema.code.instructions.items(.data)[inst].un_node; + const src = inst_data.src(); + const name = try sema.resolveConstString(block, src, inst_data.operand); + + const embed_file = mod.embedFile(block.getFileScope(), name) catch |err| switch (err) { + error.ImportOutsidePkgPath => { + return sema.fail(block, src, "embed of file outside package path: '{s}'", .{name}); + }, + else => { + // TODO: these errors are file system errors; make sure an update() will + // retry this and not cache the file system error, which may be transient. + return sema.fail(block, src, "unable to open '{s}': {s}", .{ name, @errorName(err) }); + }, + }; + + var anon_decl = try block.startAnonDecl(); + defer anon_decl.deinit(); + + const bytes_including_null = embed_file.bytes[0 .. embed_file.bytes.len + 1]; + + // TODO instead of using `Value.Tag.bytes`, create a new value tag for pointing at + // a `*Module.EmbedFile`. The purpose of this would be: + // - If only the length is read and the bytes are not inspected by comptime code, + // there can be an optimization where the codegen backend does a copy_file_range + // into the final binary, and never loads the data into memory. + // - When a Decl is destroyed, it can free the `*Module.EmbedFile`. + embed_file.owner_decl = try anon_decl.finish( + try Type.Tag.array_u8_sentinel_0.create(anon_decl.arena(), embed_file.bytes.len), + try Value.Tag.bytes.create(anon_decl.arena(), bytes_including_null), + ); + + return sema.analyzeDeclRef(embed_file.owner_decl); +} + fn zirRetErrValueCode(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { _ = block; _ = inst; @@ -9020,12 +9059,6 @@ fn zirBoolToInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A return block.addUnOp(.bool_to_int, operand); } -fn zirEmbedFile(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { - const inst_data = sema.code.instructions.items(.data)[inst].un_node; - const src = inst_data.src(); - return sema.fail(block, src, "TODO: Sema.zirEmbedFile", .{}); -} - fn zirErrorName(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { const inst_data = sema.code.instructions.items(.data)[inst].un_node; const src = inst_data.src(); |
