aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2021-10-17 18:57:54 -0700
committerAndrew Kelley <andrew@ziglang.org>2021-10-17 18:59:11 -0700
commite5dac0a0b391f227605e496a09f32b453ac3280d (patch)
treedf9fc28a45b801727a28ef56a8abc08deff43bc9
parentad17108bddc3bc198190407ab5b00820b2c17cd5 (diff)
downloadzig-e5dac0a0b391f227605e496a09f32b453ac3280d.tar.gz
zig-e5dac0a0b391f227605e496a09f32b453ac3280d.zip
stage2: implement `@embedFile`
-rw-r--r--src/Compilation.zig100
-rw-r--r--src/Module.zig138
-rw-r--r--src/Sema.zig45
3 files changed, 275 insertions, 8 deletions
diff --git a/src/Compilation.zig b/src/Compilation.zig
index f997b53388..63a6b50d5b 100644
--- a/src/Compilation.zig
+++ b/src/Compilation.zig
@@ -55,6 +55,10 @@ c_object_work_queue: std.fifo.LinearFifo(*CObject, .Dynamic),
/// since the last compilation, as well as scan for `@import` and queue up
/// additional jobs corresponding to those new files.
astgen_work_queue: std.fifo.LinearFifo(*Module.File, .Dynamic),
+/// These jobs are to inspect the file system stat() and if the embedded file has changed
+/// on disk, mark the corresponding Decl outdated and queue up an `analyze_decl`
+/// task for it.
+embed_file_work_queue: std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic),
/// The ErrorMsg memory is owned by the `CObject`, using Compilation's general purpose allocator.
/// This data is accessed by multiple threads and is protected by `mutex`.
@@ -181,6 +185,10 @@ const Job = union(enum) {
/// It may have already be analyzed, or it may have been determined
/// to be outdated; in this case perform semantic analysis again.
analyze_decl: *Module.Decl,
+ /// The file that was loaded with `@embedFile` has changed on disk
+ /// and has been re-loaded into memory. All Decls that depend on it
+ /// need to be re-analyzed.
+ update_embed_file: *Module.EmbedFile,
/// The source file containing the Decl has been updated, and so the
/// Decl may need its line number information updated in the debug info.
update_line_number: *Module.Decl,
@@ -1447,6 +1455,7 @@ pub fn create(gpa: *Allocator, options: InitOptions) !*Compilation {
.work_queue = std.fifo.LinearFifo(Job, .Dynamic).init(gpa),
.c_object_work_queue = std.fifo.LinearFifo(*CObject, .Dynamic).init(gpa),
.astgen_work_queue = std.fifo.LinearFifo(*Module.File, .Dynamic).init(gpa),
+ .embed_file_work_queue = std.fifo.LinearFifo(*Module.EmbedFile, .Dynamic).init(gpa),
.keep_source_files_loaded = options.keep_source_files_loaded,
.use_clang = use_clang,
.clang_argv = options.clang_argv,
@@ -1632,6 +1641,7 @@ pub fn destroy(self: *Compilation) void {
self.work_queue.deinit();
self.c_object_work_queue.deinit();
self.astgen_work_queue.deinit();
+ self.embed_file_work_queue.deinit();
{
var it = self.crt_files.iterator();
@@ -1747,6 +1757,16 @@ pub fn update(self: *Compilation) !void {
}
if (!use_stage1) {
+ // Put a work item in for checking if any files used with `@embedFile` changed.
+ {
+ try self.embed_file_work_queue.ensureUnusedCapacity(module.embed_table.count());
+ var it = module.embed_table.iterator();
+ while (it.next()) |entry| {
+ const embed_file = entry.value_ptr.*;
+ self.embed_file_work_queue.writeItemAssumeCapacity(embed_file);
+ }
+ }
+
try self.work_queue.writeItem(.{ .analyze_pkg = std_pkg });
if (self.bin_file.options.is_test) {
try self.work_queue.writeItem(.{ .analyze_pkg = module.main_pkg });
@@ -1870,6 +1890,7 @@ pub fn totalErrorCount(self: *Compilation) usize {
if (self.bin_file.options.module) |module| {
total += module.failed_exports.count();
+ total += module.failed_embed_files.count();
{
var it = module.failed_files.iterator();
@@ -1967,6 +1988,13 @@ pub fn getAllErrorsAlloc(self: *Compilation) !AllErrors {
}
}
{
+ var it = module.failed_embed_files.iterator();
+ while (it.next()) |entry| {
+ const msg = entry.value_ptr.*;
+ try AllErrors.add(module, &arena, &errors, msg.*);
+ }
+ }
+ {
var it = module.failed_decls.iterator();
while (it.next()) |entry| {
// Skip errors for Decls within files that had a parse failure.
@@ -2065,6 +2093,9 @@ pub fn performAllTheWork(self: *Compilation) error{ TimerUnsupported, OutOfMemor
var c_obj_prog_node = main_progress_node.start("Compile C Objects", self.c_source_files.len);
defer c_obj_prog_node.end();
+ var embed_file_prog_node = main_progress_node.start("Detect @embedFile updates", self.embed_file_work_queue.count);
+ defer embed_file_prog_node.end();
+
self.work_queue_wait_group.reset();
defer self.work_queue_wait_group.wait();
@@ -2079,6 +2110,13 @@ pub fn performAllTheWork(self: *Compilation) error{ TimerUnsupported, OutOfMemor
});
}
+ while (self.embed_file_work_queue.readItem()) |embed_file| {
+ self.astgen_wait_group.start();
+ try self.thread_pool.spawn(workerCheckEmbedFile, .{
+ self, embed_file, &embed_file_prog_node, &self.astgen_wait_group,
+ });
+ }
+
while (self.c_object_work_queue.readItem()) |c_object| {
self.work_queue_wait_group.start();
try self.thread_pool.spawn(workerUpdateCObject, .{
@@ -2260,6 +2298,15 @@ pub fn performAllTheWork(self: *Compilation) error{ TimerUnsupported, OutOfMemor
error.AnalysisFail => continue,
};
},
+ .update_embed_file => |embed_file| {
+ if (build_options.omit_stage2)
+ @panic("sadly stage2 is omitted from this build to save memory on the CI server");
+ const module = self.bin_file.options.module.?;
+ module.updateEmbedFile(embed_file) catch |err| switch (err) {
+ error.OutOfMemory => return error.OutOfMemory,
+ error.AnalysisFail => continue,
+ };
+ },
.update_line_number => |decl| {
if (build_options.omit_stage2)
@panic("sadly stage2 is omitted from this build to save memory on the CI server");
@@ -2542,6 +2589,29 @@ fn workerAstGenFile(
}
}
+fn workerCheckEmbedFile(
+ comp: *Compilation,
+ embed_file: *Module.EmbedFile,
+ prog_node: *std.Progress.Node,
+ wg: *WaitGroup,
+) void {
+ defer wg.finish();
+
+ var child_prog_node = prog_node.start(embed_file.sub_file_path, 0);
+ child_prog_node.activate();
+ defer child_prog_node.end();
+
+ const mod = comp.bin_file.options.module.?;
+ mod.detectEmbedFileUpdate(embed_file) catch |err| {
+ comp.reportRetryableEmbedFileError(embed_file, err) catch |oom| switch (oom) {
+ // Swallowing this error is OK because it's implied to be OOM when
+ // there is a missing `failed_embed_files` error message.
+ error.OutOfMemory => {},
+ };
+ return;
+ };
+}
+
pub fn obtainCObjectCacheManifest(comp: *const Compilation) Cache.Manifest {
var man = comp.cache_parent.obtain();
@@ -2790,6 +2860,36 @@ fn reportRetryableAstGenError(
}
}
+fn reportRetryableEmbedFileError(
+ comp: *Compilation,
+ embed_file: *Module.EmbedFile,
+ err: anyerror,
+) error{OutOfMemory}!void {
+ const mod = comp.bin_file.options.module.?;
+ const gpa = mod.gpa;
+
+ const src_loc: Module.SrcLoc = embed_file.owner_decl.srcLoc();
+
+ const err_msg = if (embed_file.pkg.root_src_directory.path) |dir_path|
+ try Module.ErrorMsg.create(
+ gpa,
+ src_loc,
+ "unable to load '{s}" ++ std.fs.path.sep_str ++ "{s}': {s}",
+ .{ dir_path, embed_file.sub_file_path, @errorName(err) },
+ )
+ else
+ try Module.ErrorMsg.create(gpa, src_loc, "unable to load '{s}': {s}", .{
+ embed_file.sub_file_path, @errorName(err),
+ });
+ errdefer err_msg.destroy(gpa);
+
+ {
+ const lock = comp.mutex.acquire();
+ defer lock.release();
+ try mod.failed_embed_files.putNoClobber(gpa, embed_file, err_msg);
+ }
+}
+
fn updateCObject(comp: *Compilation, c_object: *CObject, c_obj_prog_node: *std.Progress.Node) !void {
if (!build_options.have_llvm) {
return comp.failCObj(c_object, "clang not available: compiler built without LLVM extensions", .{});
diff --git a/src/Module.zig b/src/Module.zig
index f52e1c8ef7..13181ae326 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -55,11 +55,17 @@ decl_exports: std.AutoArrayHashMapUnmanaged(*Decl, []*Export) = .{},
/// is performing the export of another Decl.
/// This table owns the Export memory.
export_owners: std.AutoArrayHashMapUnmanaged(*Decl, []*Export) = .{},
-/// The set of all the files in the Module. We keep track of this in order to iterate
-/// over it and check which source files have been modified on the file system when
+/// The set of all the Zig source files in the Module. We keep track of this in order
+/// to iterate over it and check which source files have been modified on the file system when
/// an update is requested, as well as to cache `@import` results.
/// Keys are fully resolved file paths. This table owns the keys and values.
import_table: std.StringArrayHashMapUnmanaged(*File) = .{},
+/// The set of all the files which have been loaded with `@embedFile` in the Module.
+/// We keep track of this in order to iterate over it and check which files have been
+/// modified on the file system when an update is requested, as well as to cache
+/// `@embedFile` results.
+/// Keys are fully resolved file paths. This table owns the keys and values.
+embed_table: std.StringHashMapUnmanaged(*EmbedFile) = .{},
/// The set of all the generic function instantiations. This is used so that when a generic
/// function is called twice with the same comptime parameter arguments, both calls dispatch
@@ -87,6 +93,8 @@ compile_log_decls: std.AutoArrayHashMapUnmanaged(*Decl, i32) = .{},
/// Using a map here for consistency with the other fields here.
/// The ErrorMsg memory is owned by the `File`, using Module's general purpose allocator.
failed_files: std.AutoArrayHashMapUnmanaged(*File, ?*ErrorMsg) = .{},
+/// The ErrorMsg memory is owned by the `EmbedFile`, using Module's general purpose allocator.
+failed_embed_files: std.AutoArrayHashMapUnmanaged(*EmbedFile, *ErrorMsg) = .{},
/// Using a map here for consistency with the other fields here.
/// The ErrorMsg memory is owned by the `Export`, using Module's general purpose allocator.
failed_exports: std.AutoArrayHashMapUnmanaged(*Export, *ErrorMsg) = .{},
@@ -1534,6 +1542,23 @@ pub const File = struct {
}
};
+/// Represents the contents of a file loaded with `@embedFile`.
+pub const EmbedFile = struct {
+ /// Relative to the owning package's root_src_dir.
+ /// Memory is stored in gpa, owned by EmbedFile.
+ sub_file_path: []const u8,
+ bytes: [:0]const u8,
+ stat_size: u64,
+ stat_inode: std.fs.File.INode,
+ stat_mtime: i128,
+ /// Package that this file is a part of, managed externally.
+ pkg: *Package,
+ /// The Decl that was created from the `@embedFile` to own this resource.
+ /// This is how zig knows what other Decl objects to invalidate if the file
+ /// changes on disk.
+ owner_decl: *Decl,
+};
+
/// This struct holds data necessary to construct API-facing `AllErrors.Message`.
/// Its memory is managed with the general purpose allocator so that they
/// can be created and destroyed in response to incremental updates.
@@ -2364,6 +2389,11 @@ pub fn deinit(mod: *Module) void {
}
mod.failed_files.deinit(gpa);
+ for (mod.failed_embed_files.values()) |msg| {
+ msg.destroy(gpa);
+ }
+ mod.failed_embed_files.deinit(gpa);
+
for (mod.failed_exports.values()) |value| {
value.destroy(gpa);
}
@@ -3060,6 +3090,32 @@ pub fn ensureDeclAnalyzed(mod: *Module, decl: *Decl) SemaError!void {
}
}
+pub fn updateEmbedFile(mod: *Module, embed_file: *EmbedFile) SemaError!void {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ // TODO we can potentially relax this if we store some more information along
+ // with decl dependency edges
+ for (embed_file.owner_decl.dependants.keys()) |dep| {
+ switch (dep.analysis) {
+ .unreferenced => unreachable,
+ .in_progress => continue, // already doing analysis, ok
+ .outdated => continue, // already queued for update
+
+ .file_failure,
+ .dependency_failure,
+ .sema_failure,
+ .sema_failure_retryable,
+ .codegen_failure,
+ .codegen_failure_retryable,
+ .complete,
+ => if (dep.generation != mod.generation) {
+ try mod.markOutdatedDecl(dep);
+ },
+ }
+ }
+}
+
pub fn semaPkg(mod: *Module, pkg: *Package) !void {
const file = (try mod.importPkg(pkg)).file;
return mod.semaFile(file);
@@ -3551,6 +3607,84 @@ pub fn importFile(
};
}
+pub fn embedFile(mod: *Module, cur_file: *File, rel_file_path: []const u8) !*EmbedFile {
+ const gpa = mod.gpa;
+
+ // The resolved path is used as the key in the table, to detect if
+ // a file refers to the same as another, despite different relative paths.
+ const cur_pkg_dir_path = cur_file.pkg.root_src_directory.path orelse ".";
+ const resolved_path = try std.fs.path.resolve(gpa, &[_][]const u8{
+ cur_pkg_dir_path, cur_file.sub_file_path, "..", rel_file_path,
+ });
+ var keep_resolved_path = false;
+ defer if (!keep_resolved_path) gpa.free(resolved_path);
+
+ const gop = try mod.embed_table.getOrPut(gpa, resolved_path);
+ if (gop.found_existing) return gop.value_ptr.*;
+ keep_resolved_path = true; // It's now owned by embed_table.
+
+ const new_file = try gpa.create(EmbedFile);
+ errdefer gpa.destroy(new_file);
+
+ const resolved_root_path = try std.fs.path.resolve(gpa, &[_][]const u8{cur_pkg_dir_path});
+ defer gpa.free(resolved_root_path);
+
+ if (!mem.startsWith(u8, resolved_path, resolved_root_path)) {
+ return error.ImportOutsidePkgPath;
+ }
+ // +1 for the directory separator here.
+ const sub_file_path = try gpa.dupe(u8, resolved_path[resolved_root_path.len + 1 ..]);
+ errdefer gpa.free(sub_file_path);
+
+ var file = try cur_file.pkg.root_src_directory.handle.openFile(sub_file_path, .{});
+ defer file.close();
+
+ const stat = try file.stat();
+ const bytes = try file.readToEndAllocOptions(gpa, std.math.maxInt(u32), stat.size, 1, 0);
+
+ log.debug("new embedFile. resolved_root_path={s}, resolved_path={s}, sub_file_path={s}, rel_file_path={s}", .{
+ resolved_root_path, resolved_path, sub_file_path, rel_file_path,
+ });
+
+ gop.value_ptr.* = new_file;
+ new_file.* = .{
+ .sub_file_path = sub_file_path,
+ .bytes = bytes,
+ .stat_size = stat.size,
+ .stat_inode = stat.inode,
+ .stat_mtime = stat.mtime,
+ .pkg = cur_file.pkg,
+ .owner_decl = undefined, // Set by Sema immediately after this function returns.
+ };
+ return new_file;
+}
+
+pub fn detectEmbedFileUpdate(mod: *Module, embed_file: *EmbedFile) !void {
+ var file = try embed_file.pkg.root_src_directory.handle.openFile(embed_file.sub_file_path, .{});
+ defer file.close();
+
+ const stat = try file.stat();
+
+ const unchanged_metadata =
+ stat.size == embed_file.stat_size and
+ stat.mtime == embed_file.stat_mtime and
+ stat.inode == embed_file.stat_inode;
+
+ if (unchanged_metadata) return;
+
+ const gpa = mod.gpa;
+ const bytes = try file.readToEndAllocOptions(gpa, std.math.maxInt(u32), stat.size, 1, 0);
+ gpa.free(embed_file.bytes);
+ embed_file.bytes = bytes;
+ embed_file.stat_size = stat.size;
+ embed_file.stat_mtime = stat.mtime;
+ embed_file.stat_inode = stat.inode;
+
+ const lock = mod.comp.mutex.acquire();
+ defer lock.release();
+ try mod.comp.work_queue.writeItem(.{ .update_embed_file = embed_file });
+}
+
pub fn scanNamespace(
mod: *Module,
namespace: *Namespace,
diff --git a/src/Sema.zig b/src/Sema.zig
index 2211e69fb0..c96b890785 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -6467,6 +6467,45 @@ fn zirImport(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.
return sema.addConstant(file_root_decl.ty, file_root_decl.val);
}
+fn zirEmbedFile(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
+ const tracy = trace(@src());
+ defer tracy.end();
+
+ const mod = sema.mod;
+ const inst_data = sema.code.instructions.items(.data)[inst].un_node;
+ const src = inst_data.src();
+ const name = try sema.resolveConstString(block, src, inst_data.operand);
+
+ const embed_file = mod.embedFile(block.getFileScope(), name) catch |err| switch (err) {
+ error.ImportOutsidePkgPath => {
+ return sema.fail(block, src, "embed of file outside package path: '{s}'", .{name});
+ },
+ else => {
+ // TODO: these errors are file system errors; make sure an update() will
+ // retry this and not cache the file system error, which may be transient.
+ return sema.fail(block, src, "unable to open '{s}': {s}", .{ name, @errorName(err) });
+ },
+ };
+
+ var anon_decl = try block.startAnonDecl();
+ defer anon_decl.deinit();
+
+ const bytes_including_null = embed_file.bytes[0 .. embed_file.bytes.len + 1];
+
+ // TODO instead of using `Value.Tag.bytes`, create a new value tag for pointing at
+ // a `*Module.EmbedFile`. The purpose of this would be:
+ // - If only the length is read and the bytes are not inspected by comptime code,
+ // there can be an optimization where the codegen backend does a copy_file_range
+ // into the final binary, and never loads the data into memory.
+ // - When a Decl is destroyed, it can free the `*Module.EmbedFile`.
+ embed_file.owner_decl = try anon_decl.finish(
+ try Type.Tag.array_u8_sentinel_0.create(anon_decl.arena(), embed_file.bytes.len),
+ try Value.Tag.bytes.create(anon_decl.arena(), bytes_including_null),
+ );
+
+ return sema.analyzeDeclRef(embed_file.owner_decl);
+}
+
fn zirRetErrValueCode(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
_ = block;
_ = inst;
@@ -9020,12 +9059,6 @@ fn zirBoolToInt(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!A
return block.addUnOp(.bool_to_int, operand);
}
-fn zirEmbedFile(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
- const inst_data = sema.code.instructions.items(.data)[inst].un_node;
- const src = inst_data.src();
- return sema.fail(block, src, "TODO: Sema.zirEmbedFile", .{});
-}
-
fn zirErrorName(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
const inst_data = sema.code.instructions.items(.data)[inst].un_node;
const src = inst_data.src();