From 818fbd9c567b907031c44961e4299ce1f9059be6 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 24 May 2022 00:10:56 -0700 Subject: stage2: string literal interning This is a temporary addition to stage2 in order to match stage1 behavior, however the end-game once the lang spec is settled will be to use a global InternPool for comptime memoized objects, making this behavior consistent across all types, not only string literals. Or, we might decide to not guarantee string literals to have equal comptime pointers, in which case this commit can be reverted. --- src/Module.zig | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 61 insertions(+), 6 deletions(-) (limited to 'src/Module.zig') diff --git a/src/Module.zig b/src/Module.zig index 11549ccda6..24495d8591 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -70,6 +70,17 @@ import_table: std.StringArrayHashMapUnmanaged(*File) = .{}, /// Keys are fully resolved file paths. This table owns the keys and values. embed_table: std.StringHashMapUnmanaged(*EmbedFile) = .{}, +/// This is a temporary addition to stage2 in order to match stage1 behavior, +/// however the end-game once the lang spec is settled will be to use a global +/// InternPool for comptime memoized objects, making this behavior consistent across all types, +/// not only string literals. Or, we might decide to not guarantee string literals +/// to have equal comptime pointers, in which case this field can be deleted (perhaps +/// the commit that introduced it can simply be reverted). +/// This table uses an optional index so that when a Decl is destroyed, the string literal +/// is still reclaimable by a future Decl. +string_literal_table: std.HashMapUnmanaged(StringLiteralContext.Key, Decl.OptionalIndex, StringLiteralContext, std.hash_map.default_max_load_percentage) = .{}, +string_literal_bytes: std.ArrayListUnmanaged(u8) = .{}, + /// The set of all the generic function instantiations. This is used so that when a generic /// function is called twice with the same comptime parameter arguments, both calls dispatch /// to the same function. @@ -157,6 +168,39 @@ decls_free_list: std.ArrayListUnmanaged(Decl.Index) = .{}, global_assembly: std.AutoHashMapUnmanaged(Decl.Index, []u8) = .{}, +pub const StringLiteralContext = struct { + bytes: *std.ArrayListUnmanaged(u8), + + pub const Key = struct { + index: u32, + len: u32, + }; + + pub fn eql(self: @This(), a: Key, b: Key) bool { + _ = self; + return a.index == b.index and a.len == b.len; + } + + pub fn hash(self: @This(), x: Key) u64 { + const x_slice = self.bytes.items[x.index..][0..x.len]; + return std.hash_map.hashString(x_slice); + } +}; + +pub const StringLiteralAdapter = struct { + bytes: *std.ArrayListUnmanaged(u8), + + pub fn eql(self: @This(), a_slice: []const u8, b: StringLiteralContext.Key) bool { + const b_slice = self.bytes.items[b.index..][0..b.len]; + return mem.eql(u8, a_slice, b_slice); + } + + pub fn hash(self: @This(), adapted_key: []const u8) u64 { + _ = self; + return std.hash_map.hashString(adapted_key); + } +}; + const MonomorphedFuncsSet = std.HashMapUnmanaged( *Fn, void, @@ -507,7 +551,8 @@ pub const Decl = struct { decl.name = undefined; } - pub fn clearValues(decl: *Decl, gpa: Allocator) void { + pub fn clearValues(decl: *Decl, mod: *Module) void { + const gpa = mod.gpa; if (decl.getExternFn()) |extern_fn| { extern_fn.deinit(gpa); gpa.destroy(extern_fn); @@ -521,6 +566,13 @@ pub const Decl = struct { gpa.destroy(variable); } if (decl.value_arena) |arena_state| { + if (decl.owns_tv) { + if (decl.val.castTag(.str_lit)) |str_lit| { + mod.string_literal_table.getPtrContext(str_lit.data, .{ + .bytes = &mod.string_literal_bytes, + }).?.* = .none; + } + } arena_state.promote(gpa).deinit(); decl.value_arena = null; decl.has_tv = false; @@ -2839,6 +2891,9 @@ pub fn deinit(mod: *Module) void { mod.decls_free_list.deinit(gpa); mod.allocated_decls.deinit(gpa); mod.global_assembly.deinit(gpa); + + mod.string_literal_table.deinit(gpa); + mod.string_literal_bytes.deinit(gpa); } pub fn destroyDecl(mod: *Module, decl_index: Decl.Index) void { @@ -2857,7 +2912,7 @@ pub fn destroyDecl(mod: *Module, decl_index: Decl.Index) void { if (decl.getInnerNamespace()) |namespace| { namespace.destroyDecls(mod); } - decl.clearValues(gpa); + decl.clearValues(mod); } decl.dependants.deinit(gpa); decl.dependencies.deinit(gpa); @@ -4034,7 +4089,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { if (decl.getFunction()) |prev_func| { prev_is_inline = prev_func.state == .inline_only; } - decl.clearValues(gpa); + decl.clearValues(mod); } decl.ty = try decl_tv.ty.copy(decl_arena_allocator); @@ -4080,7 +4135,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { var type_changed = true; if (decl.has_tv) { type_changed = !decl.ty.eql(decl_tv.ty, mod); - decl.clearValues(gpa); + decl.clearValues(mod); } decl.owns_tv = false; @@ -4694,7 +4749,7 @@ pub fn clearDecl( if (decl.getInnerNamespace()) |namespace| { try namespace.deleteAllDecls(mod, outdated_decls); } - decl.clearValues(gpa); + decl.clearValues(mod); } if (decl.deletion_flag) { @@ -5623,7 +5678,7 @@ pub fn populateTestFunctions(mod: *Module) !void { // Since we are replacing the Decl's value we must perform cleanup on the // previous value. - decl.clearValues(gpa); + decl.clearValues(mod); decl.ty = new_ty; decl.val = new_val; decl.has_tv = true; -- cgit v1.2.3