Module: fix `@embedFile` of files containing zero bytes

If an adapted string key with embedded nulls was put in a hash map with `std.hash_map.StringIndexAdapter`, then an incorrect hash would be entered for that entry such that it is possible that when looking for the exact key that matches the prefix of the original key up to the first null would sometimes match this entry due to hash collisions and sometimes not if performed later after a grow + rehash, causing the same key to exist with two different indices breaking every string equality comparison ever, for example claiming that a container type doesn't contain a field because the field name string in the struct and the string representing the identifier to lookup might be equal strings but have different string indices. This could maybe be fixed by changing `std.hash_map.StringIndexAdapter.hash` to only hash up to the first null, therefore ensuring that the entry's hash is correct and that all future lookups will be consistent, but I don't trust anything so instead I assert that there are no embedded nulls.
author: Jacob Young <jacobly0@users.noreply.github.com> 2024-02-21 16:21:14 +0100
committer: Andrew Kelley <andrew@ziglang.org> 2024-02-22 12:33:53 -0800
commit: e60d66711185fa2e3164f5af92b9786e04c4fa19 (patch)
tree: 58a35ac4bb210ba0cfc4f781fb977d494c2c2066 /src
parent: 241e100827fffde710eb0722691eeee592854744 (diff)
download: zig-e60d66711185fa2e3164f5af92b9786e04c4fa19.tar.gz
zig-e60d66711185fa2e3164f5af92b9786e04c4fa19.zip
3 files changed, 14 insertions, 8 deletions
diff --git a/src/AstGen.zig b/src/AstGen.zig
index bb2cab5c70..b080a36d4f 100644
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@@ -11461,6 +11461,10 @@ fn strLitAsString(astgen: *AstGen, str_lit_token: Ast.TokenIndex) !IndexSlice {
     const token_bytes = astgen.tree.tokenSlice(str_lit_token);
     try astgen.parseStrLit(str_lit_token, string_bytes, token_bytes, 0);
     const key: []const u8 = string_bytes.items[str_index..];
+    if (std.mem.indexOfScalar(u8, key, 0)) |_| return .{
+        .index = @enumFromInt(str_index),
+        .len = @intCast(key.len),
+    };
     const gop = try astgen.string_table.getOrPutContextAdapted(gpa, key, StringIndexAdapter{
         .bytes = string_bytes,
     }, StringIndexContext{
@@ -11468,7 +11472,7 @@ fn strLitAsString(astgen: *AstGen, str_lit_token: Ast.TokenIndex) !IndexSlice {
     });
     if (gop.found_existing) {
         string_bytes.shrinkRetainingCapacity(str_index);
-        return IndexSlice{
+        return .{
             .index = @enumFromInt(gop.key_ptr.*),
             .len = @intCast(key.len),
         };
@@ -11478,7 +11482,7 @@ fn strLitAsString(astgen: *AstGen, str_lit_token: Ast.TokenIndex) !IndexSlice {
         // to lookup null terminated strings, so if we get a match, it has to
         // be null terminated for that to work.
         try string_bytes.append(gpa, 0);
-        return IndexSlice{
+        return .{
             .index = @enumFromInt(str_index),
             .len = @intCast(key.len),
         };
diff --git a/src/InternPool.zig b/src/InternPool.zig
index 379a4f76c6..19be12c129 100644
--- a/src/InternPool.zig
+++ b/src/InternPool.zig
@@ -7985,7 +7985,8 @@ pub fn getTrailingAggregate(
 ) Allocator.Error!Index {
     try ip.items.ensureUnusedCapacity(gpa, 1);
     try ip.extra.ensureUnusedCapacity(gpa, @typeInfo(Bytes).Struct.fields.len);
-    const str: String = @enumFromInt(@intFromEnum(try getOrPutTrailingString(ip, gpa, len)));
+
+    const str: String = @enumFromInt(ip.string_bytes.items.len - len);
     const adapter: KeyAdapter = .{ .intern_pool = ip };
     const gop = try ip.map.getOrPutAdapted(gpa, Key{ .aggregate = .{
         .ty = ty,
diff --git a/src/Module.zig b/src/Module.zig
index 66d6aa3fe5..2f32fa8197 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -4400,6 +4400,7 @@ fn newEmbedFile(
     src_loc: SrcLoc,
 ) !InternPool.Index {
     const gpa = mod.gpa;
+    const ip = &mod.intern_pool;
 
     const new_file = try gpa.create(EmbedFile);
     errdefer gpa.destroy(new_file);
@@ -4414,11 +4415,11 @@ fn newEmbedFile(
         .mtime = actual_stat.mtime,
     };
     const size = std.math.cast(usize, actual_stat.size) orelse return error.Overflow;
-    const ip = &mod.intern_pool;
 
-    const ptr = try ip.string_bytes.addManyAsSlice(gpa, size);
-    const actual_read = try file.readAll(ptr);
+    const bytes = try ip.string_bytes.addManyAsSlice(gpa, try std.math.add(usize, size, 1));
+    const actual_read = try file.readAll(bytes[0..size]);
     if (actual_read != size) return error.UnexpectedEndOfFile;
+    bytes[size] = 0;
 
     const comp = mod.comp;
     switch (comp.cache_use) {
@@ -4427,7 +4428,7 @@ fn newEmbedFile(
             errdefer gpa.free(copied_resolved_path);
             whole.cache_manifest_mutex.lock();
             defer whole.cache_manifest_mutex.unlock();
-            try man.addFilePostContents(copied_resolved_path, ptr, stat);
+            try man.addFilePostContents(copied_resolved_path, bytes[0..size], stat);
         },
         .incremental => {},
     }
@@ -4437,7 +4438,7 @@ fn newEmbedFile(
         .sentinel = .zero_u8,
         .child = .u8_type,
     } });
-    const array_val = try ip.getTrailingAggregate(gpa, array_ty, size);
+    const array_val = try ip.getTrailingAggregate(gpa, array_ty, bytes.len);
 
     const ptr_ty = (try mod.ptrType(.{
         .child = array_ty,
author	Jacob Young <jacobly0@users.noreply.github.com>	2024-02-21 16:21:14 +0100
committer	Andrew Kelley <andrew@ziglang.org>	2024-02-22 12:33:53 -0800
commit	e60d66711185fa2e3164f5af92b9786e04c4fa19 (patch)
tree	58a35ac4bb210ba0cfc4f781fb977d494c2c2066 /src
parent	241e100827fffde710eb0722691eeee592854744 (diff)
download	zig-e60d66711185fa2e3164f5af92b9786e04c4fa19.tar.gz zig-e60d66711185fa2e3164f5af92b9786e04c4fa19.zip