From 717b0e827511b55375de82258f570709c07cc59d Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Mon, 31 Aug 2020 23:34:58 -0700 Subject: stage2: introduce the ability for Scope.Block to be comptime This gives zir_sema analysis the ability to check if the current scope is expected to be comptime. --- src-self-hosted/Module.zig | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'src-self-hosted/Module.zig') diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index c476c307d2..78586dd096 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -725,6 +725,7 @@ pub const Scope = struct { /// Points to the arena allocator of DeclAnalysis arena: *Allocator, label: ?Label = null, + is_comptime: bool, pub const Label = struct { zir_block: *zir.Inst.Block, @@ -1320,6 +1321,7 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { .decl = decl, .instructions = .{}, .arena = &decl_arena.allocator, + .is_comptime = false, }; defer block_scope.instructions.deinit(self.gpa); @@ -1457,6 +1459,7 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { .decl = decl, .instructions = .{}, .arena = &decl_arena.allocator, + .is_comptime = true, }; defer block_scope.instructions.deinit(self.gpa); @@ -1528,7 +1531,6 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { defer gen_scope.instructions.deinit(self.gpa); const src = tree.token_locs[init_node.firstToken()].start; - // TODO comptime scope here const init_inst = try astgen.expr(self, &gen_scope.base, .none, init_node); _ = try astgen.addZIRUnOp(self, &gen_scope.base, src, .@"return", init_inst); @@ -1538,6 +1540,7 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { .decl = decl, .instructions = .{}, .arena = &gen_scope_arena.allocator, + .is_comptime = true, }; defer inner_block.instructions.deinit(self.gpa); try zir_sema.analyzeBody(self, &inner_block.base, .{ .instructions = gen_scope.instructions.items }); @@ -1628,8 +1631,7 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { }; defer gen_scope.instructions.deinit(self.gpa); - // TODO comptime scope here - _ = try astgen.expr(self, &gen_scope.base, .none, comptime_decl.expr); + _ = try astgen.comptimeExpr(self, &gen_scope.base, .none, comptime_decl.expr); var block_scope: Scope.Block = .{ .parent = null, @@ -1637,6 +1639,7 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { .decl = decl, .instructions = .{}, .arena = &analysis_arena.allocator, + .is_comptime = true, }; defer block_scope.instructions.deinit(self.gpa); @@ -2007,6 +2010,7 @@ fn analyzeFnBody(self: *Module, decl: *Decl, func: *Fn) !void { .decl = decl, .instructions = .{}, .arena = &arena.allocator, + .is_comptime = false, }; defer inner_block.instructions.deinit(self.gpa); @@ -3432,6 +3436,7 @@ pub fn addSafetyCheck(mod: *Module, parent_block: *Scope.Block, ok: *Inst, panic .decl = parent_block.decl, .instructions = .{}, .arena = parent_block.arena, + .is_comptime = parent_block.is_comptime, }; defer fail_block.instructions.deinit(mod.gpa); -- cgit v1.2.3 From 4c13d020dbecbd7664b99765de33f230e98f3322 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 1 Sep 2020 12:39:47 -0700 Subject: stage2: proper split of requireRuntimeBlock and requireFunctionBlock * improve the ZIR generated of variable decls - utilize the same ZIR for the type and init value when possible - init value gets a result location with the variable type. no manual coercion is required. * no longer use return instructions to extract values out of comptime blocks. Instead run the analysis and then look at the corresponding analyzed instruction, relying on the comptime mechanism to report errors when something could not be comptime evaluated. --- src-self-hosted/Module.zig | 122 ++++++++++++++++++++++--------------------- src-self-hosted/test.zig | 22 ++++---- src-self-hosted/zir_sema.zig | 22 ++++---- test/stage2/test.zig | 21 +++++--- 4 files changed, 98 insertions(+), 89 deletions(-) (limited to 'src-self-hosted/Module.zig') diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index 78586dd096..72597975c9 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -1308,7 +1308,6 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { .return_type = return_type_inst, .param_types = param_types, }, .{}); - _ = try astgen.addZIRUnOp(self, &fn_type_scope.base, fn_src, .@"return", fn_type_inst); // We need the memory for the Type to go into the arena for the Decl var decl_arena = std.heap.ArenaAllocator.init(self.gpa); @@ -1325,7 +1324,7 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { }; defer block_scope.instructions.deinit(self.gpa); - const fn_type = try zir_sema.analyzeBodyValueAsType(self, &block_scope, .{ + const fn_type = try zir_sema.analyzeBodyValueAsType(self, &block_scope, fn_type_inst, .{ .instructions = fn_type_scope.instructions.items, }); const new_func = try decl_arena.allocator.create(Fn); @@ -1492,35 +1491,7 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { return self.failNode(&block_scope.base, sect_expr, "TODO implement function section expression", .{}); } - const explicit_type = blk: { - const type_node = var_decl.getTypeNode() orelse - break :blk null; - - // Temporary arena for the zir instructions. - var type_scope_arena = std.heap.ArenaAllocator.init(self.gpa); - defer type_scope_arena.deinit(); - var type_scope: Scope.GenZIR = .{ - .decl = decl, - .arena = &type_scope_arena.allocator, - .parent = decl.scope, - }; - defer type_scope.instructions.deinit(self.gpa); - - const src = tree.token_locs[type_node.firstToken()].start; - const type_type = try astgen.addZIRInstConst(self, &type_scope.base, src, .{ - .ty = Type.initTag(.type), - .val = Value.initTag(.type_type), - }); - const var_type = try astgen.expr(self, &type_scope.base, .{ .ty = type_type }, type_node); - _ = try astgen.addZIRUnOp(self, &type_scope.base, src, .@"return", var_type); - - break :blk try zir_sema.analyzeBodyValueAsType(self, &block_scope, .{ - .instructions = type_scope.instructions.items, - }); - }; - - var var_type: Type = undefined; - const value: ?Value = if (var_decl.getInitNode()) |init_node| blk: { + const var_info: struct { ty: Type, val: ?Value } = if (var_decl.getInitNode()) |init_node| vi: { var gen_scope_arena = std.heap.ArenaAllocator.init(self.gpa); defer gen_scope_arena.deinit(); var gen_scope: Scope.GenZIR = .{ @@ -1529,10 +1500,19 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { .parent = decl.scope, }; defer gen_scope.instructions.deinit(self.gpa); - const src = tree.token_locs[init_node.firstToken()].start; - const init_inst = try astgen.expr(self, &gen_scope.base, .none, init_node); - _ = try astgen.addZIRUnOp(self, &gen_scope.base, src, .@"return", init_inst); + const init_result_loc: astgen.ResultLoc = if (var_decl.getTypeNode()) |type_node| rl: { + const src = tree.token_locs[type_node.firstToken()].start; + const type_type = try astgen.addZIRInstConst(self, &gen_scope.base, src, .{ + .ty = Type.initTag(.type), + .val = Value.initTag(.type_type), + }); + const var_type = try astgen.expr(self, &gen_scope.base, .{ .ty = type_type }, type_node); + break :rl .{ .ty = var_type }; + } else .none; + + const src = tree.token_locs[init_node.firstToken()].start; + const init_inst = try astgen.expr(self, &gen_scope.base, init_result_loc, init_node); var inner_block: Scope.Block = .{ .parent = null, @@ -1545,38 +1525,53 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { defer inner_block.instructions.deinit(self.gpa); try zir_sema.analyzeBody(self, &inner_block.base, .{ .instructions = gen_scope.instructions.items }); - for (inner_block.instructions.items) |inst| { - if (inst.castTag(.ret)) |ret| { - const coerced = if (explicit_type) |some| - try self.coerce(&inner_block.base, some, ret.operand) - else - ret.operand; - const val = coerced.value() orelse - return self.fail(&block_scope.base, inst.src, "unable to resolve comptime value", .{}); - - var_type = explicit_type orelse try ret.operand.ty.copy(block_scope.arena); - break :blk try val.copy(block_scope.arena); - } else { - return self.fail(&block_scope.base, inst.src, "unable to resolve comptime value", .{}); - } - } - unreachable; + // The result location guarantees the type coercion. + const analyzed_init_inst = init_inst.analyzed_inst.?; + // The is_comptime in the Scope.Block guarantees the result is comptime-known. + const val = analyzed_init_inst.value().?; + + const ty = try analyzed_init_inst.ty.copy(block_scope.arena); + break :vi .{ + .ty = ty, + .val = try val.copy(block_scope.arena), + }; } else if (!is_extern) { return self.failTok(&block_scope.base, var_decl.firstToken(), "variables must be initialized", .{}); - } else if (explicit_type) |some| blk: { - var_type = some; - break :blk null; + } else if (var_decl.getTypeNode()) |type_node| vi: { + // Temporary arena for the zir instructions. + var type_scope_arena = std.heap.ArenaAllocator.init(self.gpa); + defer type_scope_arena.deinit(); + var type_scope: Scope.GenZIR = .{ + .decl = decl, + .arena = &type_scope_arena.allocator, + .parent = decl.scope, + }; + defer type_scope.instructions.deinit(self.gpa); + + const src = tree.token_locs[type_node.firstToken()].start; + const type_type = try astgen.addZIRInstConst(self, &type_scope.base, src, .{ + .ty = Type.initTag(.type), + .val = Value.initTag(.type_type), + }); + const var_type = try astgen.expr(self, &type_scope.base, .{ .ty = type_type }, type_node); + const ty = try zir_sema.analyzeBodyValueAsType(self, &block_scope, var_type, .{ + .instructions = type_scope.instructions.items, + }); + break :vi .{ + .ty = ty, + .val = null, + }; } else { return self.failTok(&block_scope.base, var_decl.firstToken(), "unable to infer variable type", .{}); }; - if (is_mutable and !var_type.isValidVarType(is_extern)) { - return self.failTok(&block_scope.base, var_decl.firstToken(), "variable of type '{}' must be const", .{var_type}); + if (is_mutable and !var_info.ty.isValidVarType(is_extern)) { + return self.failTok(&block_scope.base, var_decl.firstToken(), "variable of type '{}' must be const", .{var_info.ty}); } var type_changed = true; if (decl.typedValueManaged()) |tvm| { - type_changed = !tvm.typed_value.ty.eql(var_type); + type_changed = !tvm.typed_value.ty.eql(var_info.ty); tvm.deinit(self.gpa); } @@ -1585,7 +1580,7 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { const var_payload = try decl_arena.allocator.create(Value.Payload.Variable); new_variable.* = .{ .owner_decl = decl, - .init = value orelse undefined, + .init = var_info.val orelse undefined, .is_extern = is_extern, .is_mutable = is_mutable, .is_threadlocal = is_threadlocal, @@ -1596,7 +1591,7 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { decl.typed_value = .{ .most_recent = .{ .typed_value = .{ - .ty = var_type, + .ty = var_info.ty, .val = Value.initPayload(&var_payload.base), }, .arena = decl_arena_state, @@ -2096,12 +2091,19 @@ pub fn getErrorValue(self: *Module, name: []const u8) !std.StringHashMapUnmanage return gop.entry.*; } -/// TODO split this into `requireRuntimeBlock` and `requireFunctionBlock` and audit callsites. -pub fn requireRuntimeBlock(self: *Module, scope: *Scope, src: usize) !*Scope.Block { +pub fn requireFunctionBlock(self: *Module, scope: *Scope, src: usize) !*Scope.Block { return scope.cast(Scope.Block) orelse return self.fail(scope, src, "instruction illegal outside function body", .{}); } +pub fn requireRuntimeBlock(self: *Module, scope: *Scope, src: usize) !*Scope.Block { + const block = try self.requireFunctionBlock(scope, src); + if (block.is_comptime) { + return self.fail(scope, src, "unable to resolve comptime value", .{}); + } + return block; +} + pub fn resolveConstValue(self: *Module, scope: *Scope, base: *Inst) !Value { return (try self.resolveDefinedValue(scope, base)) orelse return self.fail(scope, base.src, "unable to resolve comptime value", .{}); diff --git a/src-self-hosted/test.zig b/src-self-hosted/test.zig index f9c9121817..aef48e198b 100644 --- a/src-self-hosted/test.zig +++ b/src-self-hosted/test.zig @@ -474,15 +474,15 @@ pub const TestContext = struct { var all_errors = try module.getAllErrorsAlloc(); defer all_errors.deinit(allocator); if (all_errors.list.len != 0) { - std.debug.warn("\nErrors occurred updating the module:\n================\n", .{}); + std.debug.print("\nErrors occurred updating the module:\n================\n", .{}); for (all_errors.list) |err| { - std.debug.warn(":{}:{}: error: {}\n================\n", .{ err.line + 1, err.column + 1, err.msg }); + std.debug.print(":{}:{}: error: {}\n================\n", .{ err.line + 1, err.column + 1, err.msg }); } if (case.cbe) { const C = module.bin_file.cast(link.File.C).?; - std.debug.warn("Generated C: \n===============\n{}\n\n===========\n\n", .{C.main.items}); + std.debug.print("Generated C: \n===============\n{}\n\n===========\n\n", .{C.main.items}); } - std.debug.warn("Test failed.\n", .{}); + std.debug.print("Test failed.\n", .{}); std.process.exit(1); } } @@ -497,12 +497,12 @@ pub const TestContext = struct { var out = file.reader().readAllAlloc(arena, 1024 * 1024) catch @panic("Unable to read C output!"); if (expected_output.len != out.len) { - std.debug.warn("\nTransformed C length differs:\n================\nExpected:\n================\n{}\n================\nFound:\n================\n{}\n================\nTest failed.\n", .{ expected_output, out }); + std.debug.print("\nTransformed C length differs:\n================\nExpected:\n================\n{}\n================\nFound:\n================\n{}\n================\nTest failed.\n", .{ expected_output, out }); std.process.exit(1); } for (expected_output) |e, i| { if (out[i] != e) { - std.debug.warn("\nTransformed C differs:\n================\nExpected:\n================\n{}\n================\nFound:\n================\n{}\n================\nTest failed.\n", .{ expected_output, out }); + std.debug.print("\nTransformed C differs:\n================\nExpected:\n================\n{}\n================\nFound:\n================\n{}\n================\nTest failed.\n", .{ expected_output, out }); std.process.exit(1); } } @@ -526,12 +526,12 @@ pub const TestContext = struct { defer test_node.end(); if (expected_output.len != out_zir.items.len) { - std.debug.warn("{}\nTransformed ZIR length differs:\n================\nExpected:\n================\n{}\n================\nFound:\n================\n{}\n================\nTest failed.\n", .{ case.name, expected_output, out_zir.items }); + std.debug.print("{}\nTransformed ZIR length differs:\n================\nExpected:\n================\n{}\n================\nFound:\n================\n{}\n================\nTest failed.\n", .{ case.name, expected_output, out_zir.items }); std.process.exit(1); } for (expected_output) |e, i| { if (out_zir.items[i] != e) { - std.debug.warn("{}\nTransformed ZIR differs:\n================\nExpected:\n================\n{}\n================\nFound:\n================\n{}\n================\nTest failed.\n", .{ case.name, expected_output, out_zir.items }); + std.debug.print("{}\nTransformed ZIR differs:\n================\nExpected:\n================\n{}\n================\nFound:\n================\n{}\n================\nTest failed.\n", .{ case.name, expected_output, out_zir.items }); std.process.exit(1); } } @@ -554,7 +554,7 @@ pub const TestContext = struct { break; } } else { - std.debug.warn("{}\nUnexpected error:\n================\n:{}:{}: error: {}\n================\nTest failed.\n", .{ case.name, a.line + 1, a.column + 1, a.msg }); + std.debug.print("{}\nUnexpected error:\n================\n:{}:{}: error: {}\n================\nTest failed.\n", .{ case.name, a.line + 1, a.column + 1, a.msg }); std.process.exit(1); } } @@ -562,7 +562,7 @@ pub const TestContext = struct { for (handled_errors) |h, i| { if (!h) { const er = e[i]; - std.debug.warn("{}\nDid not receive error:\n================\n{}:{}: {}\n================\nTest failed.\n", .{ case.name, er.line, er.column, er.msg }); + std.debug.print("{}\nDid not receive error:\n================\n{}:{}: {}\n================\nTest failed.\n", .{ case.name, er.line, er.column, er.msg }); std.process.exit(1); } } @@ -643,7 +643,7 @@ pub const TestContext = struct { switch (exec_result.term) { .Exited => |code| { if (code != 0) { - std.debug.warn("elf file exited with code {}\n", .{code}); + std.debug.print("elf file exited with code {}\n", .{code}); return error.BinaryBadExitCode; } }, diff --git a/src-self-hosted/zir_sema.zig b/src-self-hosted/zir_sema.zig index 2f2f1ec1bb..b4dafac1da 100644 --- a/src-self-hosted/zir_sema.zig +++ b/src-self-hosted/zir_sema.zig @@ -150,18 +150,16 @@ pub fn analyzeBody(mod: *Module, scope: *Scope, body: zir.Module.Body) !void { } } -/// TODO improve this to use .block_comptime_flat -pub fn analyzeBodyValueAsType(mod: *Module, block_scope: *Scope.Block, body: zir.Module.Body) !Type { +pub fn analyzeBodyValueAsType( + mod: *Module, + block_scope: *Scope.Block, + zir_result_inst: *zir.Inst, + body: zir.Module.Body, +) !Type { try analyzeBody(mod, &block_scope.base, body); - for (block_scope.instructions.items) |inst| { - if (inst.castTag(.ret)) |ret| { - const val = try mod.resolveConstValue(&block_scope.base, ret.operand); - return val.toType(block_scope.base.arena()); - } else { - return mod.fail(&block_scope.base, inst.src, "unable to resolve comptime value", .{}); - } - } - unreachable; + const result_inst = zir_result_inst.analyzed_inst.?; + const val = try mod.resolveConstValue(&block_scope.base, result_inst); + return val.toType(block_scope.base.arena()); } pub fn analyzeZirDecl(mod: *Module, decl: *Decl, src_decl: *zir.Decl) InnerError!bool { @@ -366,7 +364,7 @@ fn analyzeInstRef(mod: *Module, scope: *Scope, inst: *zir.Inst.UnOp) InnerError! } fn analyzeInstRetType(mod: *Module, scope: *Scope, inst: *zir.Inst.NoOp) InnerError!*Inst { - const b = try mod.requireRuntimeBlock(scope, inst.base.src); + const b = try mod.requireFunctionBlock(scope, inst.base.src); const fn_ty = b.func.?.owner_decl.typed_value.most_recent.typed_value.ty; const ret_type = fn_ty.fnReturnType(); return mod.constType(scope, inst.base.src, ret_type); diff --git a/test/stage2/test.zig b/test/stage2/test.zig index c8f8c19cf7..b631e37b97 100644 --- a/test/stage2/test.zig +++ b/test/stage2/test.zig @@ -967,10 +967,19 @@ pub fn addCases(ctx: *TestContext) !void { \\fn entry() void {} , &[_][]const u8{":2:4: error: redefinition of 'entry'"}); - ctx.compileError("extern variable has no type", linux_x64, - \\comptime { - \\ _ = foo; - \\} - \\extern var foo; - , &[_][]const u8{":4:1: error: unable to infer variable type"}); + { + var case = ctx.obj("extern variable has no type", linux_x64); + case.addError( + \\comptime { + \\ _ = foo; + \\} + \\extern var foo; + , &[_][]const u8{":2:5: error: unable to resolve comptime value"}); + case.addError( + \\export fn entry() void { + \\ _ = foo; + \\} + \\extern var foo; + , &[_][]const u8{":4:1: error: unable to infer variable type"}); + } } -- cgit v1.2.3 From 575fbd5e3592cff70cbfc5153884d919e6bed89f Mon Sep 17 00:00:00 2001 From: Sahnvour Date: Sun, 2 Aug 2020 23:24:03 +0200 Subject: hash_map: rename to ArrayHashMap and add new HashMap implementation --- lib/std/array_hash_map.zig | 1087 ++++++++++++++++++++ lib/std/buf_set.zig | 3 +- lib/std/hash_map.zig | 1543 +++++++++++++++------------- lib/std/heap/general_purpose_allocator.zig | 5 +- lib/std/http/headers.zig | 9 +- lib/std/std.zig | 7 + src-self-hosted/Module.zig | 27 +- src-self-hosted/codegen.zig | 8 +- src-self-hosted/codegen/c.zig | 3 +- src-self-hosted/link.zig | 2 +- src-self-hosted/link/Elf.zig | 9 +- src-self-hosted/liveness.zig | 41 +- src-self-hosted/translate_c.zig | 25 +- src-self-hosted/type.zig | 4 +- src-self-hosted/value.zig | 3 +- src-self-hosted/zir.zig | 6 +- src-self-hosted/zir_sema.zig | 2 +- 17 files changed, 2017 insertions(+), 767 deletions(-) create mode 100644 lib/std/array_hash_map.zig (limited to 'src-self-hosted/Module.zig') diff --git a/lib/std/array_hash_map.zig b/lib/std/array_hash_map.zig new file mode 100644 index 0000000000..f8c3623ef2 --- /dev/null +++ b/lib/std/array_hash_map.zig @@ -0,0 +1,1087 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2015-2020 Zig Contributors +// This file is part of [zig](https://ziglang.org/), which is MIT licensed. +// The MIT license requires this copyright notice to be included in all copies +// and substantial portions of the software. +const std = @import("std.zig"); +const debug = std.debug; +const assert = debug.assert; +const testing = std.testing; +const math = std.math; +const mem = std.mem; +const meta = std.meta; +const trait = meta.trait; +const autoHash = std.hash.autoHash; +const Wyhash = std.hash.Wyhash; +const Allocator = mem.Allocator; +const builtin = @import("builtin"); +const hash_map = @This(); + +pub fn AutoArrayHashMap(comptime K: type, comptime V: type) type { + return ArrayHashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), autoEqlIsCheap(K)); +} + +pub fn AutoArrayHashMapUnmanaged(comptime K: type, comptime V: type) type { + return ArrayHashMapUnmanaged(K, V, getAutoHashFn(K), getAutoEqlFn(K), autoEqlIsCheap(K)); +} + +/// Builtin hashmap for strings as keys. +pub fn StringArrayHashMap(comptime V: type) type { + return ArrayHashMap([]const u8, V, hashString, eqlString, true); +} + +pub fn StringArrayHashMapUnmanaged(comptime V: type) type { + return ArrayHashMapUnmanaged([]const u8, V, hashString, eqlString, true); +} + +pub fn eqlString(a: []const u8, b: []const u8) bool { + return mem.eql(u8, a, b); +} + +pub fn hashString(s: []const u8) u32 { + return @truncate(u32, std.hash.Wyhash.hash(0, s)); +} + +/// Insertion order is preserved. +/// Deletions perform a "swap removal" on the entries list. +/// Modifying the hash map while iterating is allowed, however one must understand +/// the (well defined) behavior when mixing insertions and deletions with iteration. +/// For a hash map that can be initialized directly that does not store an Allocator +/// field, see `ArrayHashMapUnmanaged`. +/// When `store_hash` is `false`, this data structure is biased towards cheap `eql` +/// functions. It does not store each item's hash in the table. Setting `store_hash` +/// to `true` incurs slightly more memory cost by storing each key's hash in the table +/// but only has to call `eql` for hash collisions. +/// If typical operations (except iteration over entries) need to be faster, prefer +/// the alternative `std.HashMap`. +pub fn ArrayHashMap( + comptime K: type, + comptime V: type, + comptime hash: fn (key: K) u32, + comptime eql: fn (a: K, b: K) bool, + comptime store_hash: bool, +) type { + return struct { + unmanaged: Unmanaged, + allocator: *Allocator, + + pub const Unmanaged = ArrayHashMapUnmanaged(K, V, hash, eql, store_hash); + pub const Entry = Unmanaged.Entry; + pub const Hash = Unmanaged.Hash; + pub const GetOrPutResult = Unmanaged.GetOrPutResult; + + /// Deprecated. Iterate using `items`. + pub const Iterator = struct { + hm: *const Self, + /// Iterator through the entry array. + index: usize, + + pub fn next(it: *Iterator) ?*Entry { + if (it.index >= it.hm.unmanaged.entries.items.len) return null; + const result = &it.hm.unmanaged.entries.items[it.index]; + it.index += 1; + return result; + } + + /// Reset the iterator to the initial index + pub fn reset(it: *Iterator) void { + it.index = 0; + } + }; + + const Self = @This(); + const Index = Unmanaged.Index; + + pub fn init(allocator: *Allocator) Self { + return .{ + .unmanaged = .{}, + .allocator = allocator, + }; + } + + pub fn deinit(self: *Self) void { + self.unmanaged.deinit(self.allocator); + self.* = undefined; + } + + pub fn clearRetainingCapacity(self: *Self) void { + return self.unmanaged.clearRetainingCapacity(); + } + + pub fn clearAndFree(self: *Self) void { + return self.unmanaged.clearAndFree(self.allocator); + } + + /// Deprecated. Use `items().len`. + pub fn count(self: Self) usize { + return self.items().len; + } + + /// Deprecated. Iterate using `items`. + pub fn iterator(self: *const Self) Iterator { + return Iterator{ + .hm = self, + .index = 0, + }; + } + + /// If key exists this function cannot fail. + /// If there is an existing item with `key`, then the result + /// `Entry` pointer points to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). + pub fn getOrPut(self: *Self, key: K) !GetOrPutResult { + return self.unmanaged.getOrPut(self.allocator, key); + } + + /// If there is an existing item with `key`, then the result + /// `Entry` pointer points to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). + /// If a new entry needs to be stored, this function asserts there + /// is enough capacity to store it. + pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { + return self.unmanaged.getOrPutAssumeCapacity(key); + } + + pub fn getOrPutValue(self: *Self, key: K, value: V) !*Entry { + return self.unmanaged.getOrPutValue(self.allocator, key, value); + } + + /// Increases capacity, guaranteeing that insertions up until the + /// `expected_count` will not cause an allocation, and therefore cannot fail. + pub fn ensureCapacity(self: *Self, new_capacity: usize) !void { + return self.unmanaged.ensureCapacity(self.allocator, new_capacity); + } + + /// Returns the number of total elements which may be present before it is + /// no longer guaranteed that no allocations will be performed. + pub fn capacity(self: *Self) usize { + return self.unmanaged.capacity(); + } + + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPut`. + pub fn put(self: *Self, key: K, value: V) !void { + return self.unmanaged.put(self.allocator, key, value); + } + + /// Inserts a key-value pair into the hash map, asserting that no previous + /// entry with the same key is already present + pub fn putNoClobber(self: *Self, key: K, value: V) !void { + return self.unmanaged.putNoClobber(self.allocator, key, value); + } + + /// Asserts there is enough capacity to store the new key-value pair. + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPutAssumeCapacity`. + pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { + return self.unmanaged.putAssumeCapacity(key, value); + } + + /// Asserts there is enough capacity to store the new key-value pair. + /// Asserts that it does not clobber any existing data. + /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`. + pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { + return self.unmanaged.putAssumeCapacityNoClobber(key, value); + } + + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + pub fn fetchPut(self: *Self, key: K, value: V) !?Entry { + return self.unmanaged.fetchPut(self.allocator, key, value); + } + + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + /// If insertion happuns, asserts there is enough capacity without allocating. + pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { + return self.unmanaged.fetchPutAssumeCapacity(key, value); + } + + pub fn getEntry(self: Self, key: K) ?*Entry { + return self.unmanaged.getEntry(key); + } + + pub fn getIndex(self: Self, key: K) ?usize { + return self.unmanaged.getIndex(key); + } + + pub fn get(self: Self, key: K) ?V { + return self.unmanaged.get(key); + } + + pub fn contains(self: Self, key: K) bool { + return self.unmanaged.contains(key); + } + + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map, and then returned from this function. + pub fn remove(self: *Self, key: K) ?Entry { + return self.unmanaged.remove(key); + } + + /// Asserts there is an `Entry` with matching key, deletes it from the hash map, + /// and discards it. + pub fn removeAssertDiscard(self: *Self, key: K) void { + return self.unmanaged.removeAssertDiscard(key); + } + + pub fn items(self: Self) []Entry { + return self.unmanaged.items(); + } + + pub fn clone(self: Self) !Self { + var other = try self.unmanaged.clone(self.allocator); + return other.promote(self.allocator); + } + }; +} + +/// General purpose hash table. +/// Insertion order is preserved. +/// Deletions perform a "swap removal" on the entries list. +/// Modifying the hash map while iterating is allowed, however one must understand +/// the (well defined) behavior when mixing insertions and deletions with iteration. +/// This type does not store an Allocator field - the Allocator must be passed in +/// with each function call that requires it. See `ArrayHashMap` for a type that stores +/// an Allocator field for convenience. +/// Can be initialized directly using the default field values. +/// This type is designed to have low overhead for small numbers of entries. When +/// `store_hash` is `false` and the number of entries in the map is less than 9, +/// the overhead cost of using `ArrayHashMapUnmanaged` rather than `std.ArrayList` is +/// only a single pointer-sized integer. +/// When `store_hash` is `false`, this data structure is biased towards cheap `eql` +/// functions. It does not store each item's hash in the table. Setting `store_hash` +/// to `true` incurs slightly more memory cost by storing each key's hash in the table +/// but guarantees only one call to `eql` per insertion/deletion. +pub fn ArrayHashMapUnmanaged( + comptime K: type, + comptime V: type, + comptime hash: fn (key: K) u32, + comptime eql: fn (a: K, b: K) bool, + comptime store_hash: bool, +) type { + return struct { + /// It is permitted to access this field directly. + entries: std.ArrayListUnmanaged(Entry) = .{}, + + /// When entries length is less than `linear_scan_max`, this remains `null`. + /// Once entries length grows big enough, this field is allocated. There is + /// an IndexHeader followed by an array of Index(I) structs, where I is defined + /// by how many total indexes there are. + index_header: ?*IndexHeader = null, + + /// Modifying the key is illegal behavior. + /// Modifying the value is allowed. + /// Entry pointers become invalid whenever this ArrayHashMap is modified, + /// unless `ensureCapacity` was previously used. + pub const Entry = struct { + /// This field is `void` if `store_hash` is `false`. + hash: Hash, + key: K, + value: V, + }; + + pub const Hash = if (store_hash) u32 else void; + + pub const GetOrPutResult = struct { + entry: *Entry, + found_existing: bool, + }; + + pub const Managed = ArrayHashMap(K, V, hash, eql, store_hash); + + const Self = @This(); + + const linear_scan_max = 8; + + pub fn promote(self: Self, allocator: *Allocator) Managed { + return .{ + .unmanaged = self, + .allocator = allocator, + }; + } + + pub fn deinit(self: *Self, allocator: *Allocator) void { + self.entries.deinit(allocator); + if (self.index_header) |header| { + header.free(allocator); + } + self.* = undefined; + } + + pub fn clearRetainingCapacity(self: *Self) void { + self.entries.items.len = 0; + if (self.index_header) |header| { + header.max_distance_from_start_index = 0; + switch (header.capacityIndexType()) { + .u8 => mem.set(Index(u8), header.indexes(u8), Index(u8).empty), + .u16 => mem.set(Index(u16), header.indexes(u16), Index(u16).empty), + .u32 => mem.set(Index(u32), header.indexes(u32), Index(u32).empty), + .usize => mem.set(Index(usize), header.indexes(usize), Index(usize).empty), + } + } + } + + pub fn clearAndFree(self: *Self, allocator: *Allocator) void { + self.entries.shrink(allocator, 0); + if (self.index_header) |header| { + header.free(allocator); + self.index_header = null; + } + } + + /// If key exists this function cannot fail. + /// If there is an existing item with `key`, then the result + /// `Entry` pointer points to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). + pub fn getOrPut(self: *Self, allocator: *Allocator, key: K) !GetOrPutResult { + self.ensureCapacity(allocator, self.entries.items.len + 1) catch |err| { + // "If key exists this function cannot fail." + return GetOrPutResult{ + .entry = self.getEntry(key) orelse return err, + .found_existing = true, + }; + }; + return self.getOrPutAssumeCapacity(key); + } + + /// If there is an existing item with `key`, then the result + /// `Entry` pointer points to it, and found_existing is true. + /// Otherwise, puts a new item with undefined value, and + /// the `Entry` pointer points to it. Caller should then initialize + /// the value (but not the key). + /// If a new entry needs to be stored, this function asserts there + /// is enough capacity to store it. + pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { + const header = self.index_header orelse { + // Linear scan. + const h = if (store_hash) hash(key) else {}; + for (self.entries.items) |*item| { + if (item.hash == h and eql(key, item.key)) { + return GetOrPutResult{ + .entry = item, + .found_existing = true, + }; + } + } + const new_entry = self.entries.addOneAssumeCapacity(); + new_entry.* = .{ + .hash = if (store_hash) h else {}, + .key = key, + .value = undefined, + }; + return GetOrPutResult{ + .entry = new_entry, + .found_existing = false, + }; + }; + + switch (header.capacityIndexType()) { + .u8 => return self.getOrPutInternal(key, header, u8), + .u16 => return self.getOrPutInternal(key, header, u16), + .u32 => return self.getOrPutInternal(key, header, u32), + .usize => return self.getOrPutInternal(key, header, usize), + } + } + + pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !*Entry { + const res = try self.getOrPut(allocator, key); + if (!res.found_existing) + res.entry.value = value; + + return res.entry; + } + + /// Increases capacity, guaranteeing that insertions up until the + /// `expected_count` will not cause an allocation, and therefore cannot fail. + pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_capacity: usize) !void { + try self.entries.ensureCapacity(allocator, new_capacity); + if (new_capacity <= linear_scan_max) return; + + // Ensure that the indexes will be at most 60% full if + // `new_capacity` items are put into it. + const needed_len = new_capacity * 5 / 3; + if (self.index_header) |header| { + if (needed_len > header.indexes_len) { + // An overflow here would mean the amount of memory required would not + // be representable in the address space. + const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable; + const new_header = try IndexHeader.alloc(allocator, new_indexes_len); + self.insertAllEntriesIntoNewHeader(new_header); + header.free(allocator); + self.index_header = new_header; + } + } else { + // An overflow here would mean the amount of memory required would not + // be representable in the address space. + const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable; + const header = try IndexHeader.alloc(allocator, new_indexes_len); + self.insertAllEntriesIntoNewHeader(header); + self.index_header = header; + } + } + + /// Returns the number of total elements which may be present before it is + /// no longer guaranteed that no allocations will be performed. + pub fn capacity(self: Self) usize { + const entry_cap = self.entries.capacity; + const header = self.index_header orelse return math.min(linear_scan_max, entry_cap); + const indexes_cap = (header.indexes_len + 1) * 3 / 4; + return math.min(entry_cap, indexes_cap); + } + + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPut`. + pub fn put(self: *Self, allocator: *Allocator, key: K, value: V) !void { + const result = try self.getOrPut(allocator, key); + result.entry.value = value; + } + + /// Inserts a key-value pair into the hash map, asserting that no previous + /// entry with the same key is already present + pub fn putNoClobber(self: *Self, allocator: *Allocator, key: K, value: V) !void { + const result = try self.getOrPut(allocator, key); + assert(!result.found_existing); + result.entry.value = value; + } + + /// Asserts there is enough capacity to store the new key-value pair. + /// Clobbers any existing data. To detect if a put would clobber + /// existing data, see `getOrPutAssumeCapacity`. + pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { + const result = self.getOrPutAssumeCapacity(key); + result.entry.value = value; + } + + /// Asserts there is enough capacity to store the new key-value pair. + /// Asserts that it does not clobber any existing data. + /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`. + pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { + const result = self.getOrPutAssumeCapacity(key); + assert(!result.found_existing); + result.entry.value = value; + } + + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + pub fn fetchPut(self: *Self, allocator: *Allocator, key: K, value: V) !?Entry { + const gop = try self.getOrPut(allocator, key); + var result: ?Entry = null; + if (gop.found_existing) { + result = gop.entry.*; + } + gop.entry.value = value; + return result; + } + + /// Inserts a new `Entry` into the hash map, returning the previous one, if any. + /// If insertion happens, asserts there is enough capacity without allocating. + pub fn fetchPutAssumeCapacity(self: *Self, key: K, value: V) ?Entry { + const gop = self.getOrPutAssumeCapacity(key); + var result: ?Entry = null; + if (gop.found_existing) { + result = gop.entry.*; + } + gop.entry.value = value; + return result; + } + + pub fn getEntry(self: Self, key: K) ?*Entry { + const index = self.getIndex(key) orelse return null; + return &self.entries.items[index]; + } + + pub fn getIndex(self: Self, key: K) ?usize { + const header = self.index_header orelse { + // Linear scan. + const h = if (store_hash) hash(key) else {}; + for (self.entries.items) |*item, i| { + if (item.hash == h and eql(key, item.key)) { + return i; + } + } + return null; + }; + switch (header.capacityIndexType()) { + .u8 => return self.getInternal(key, header, u8), + .u16 => return self.getInternal(key, header, u16), + .u32 => return self.getInternal(key, header, u32), + .usize => return self.getInternal(key, header, usize), + } + } + + pub fn get(self: Self, key: K) ?V { + return if (self.getEntry(key)) |entry| entry.value else null; + } + + pub fn contains(self: Self, key: K) bool { + return self.getEntry(key) != null; + } + + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map, and then returned from this function. + pub fn remove(self: *Self, key: K) ?Entry { + const header = self.index_header orelse { + // Linear scan. + const h = if (store_hash) hash(key) else {}; + for (self.entries.items) |item, i| { + if (item.hash == h and eql(key, item.key)) { + return self.entries.swapRemove(i); + } + } + return null; + }; + switch (header.capacityIndexType()) { + .u8 => return self.removeInternal(key, header, u8), + .u16 => return self.removeInternal(key, header, u16), + .u32 => return self.removeInternal(key, header, u32), + .usize => return self.removeInternal(key, header, usize), + } + } + + /// Asserts there is an `Entry` with matching key, deletes it from the hash map, + /// and discards it. + pub fn removeAssertDiscard(self: *Self, key: K) void { + assert(self.remove(key) != null); + } + + pub fn items(self: Self) []Entry { + return self.entries.items; + } + + pub fn clone(self: Self, allocator: *Allocator) !Self { + var other: Self = .{}; + try other.entries.appendSlice(allocator, self.entries.items); + + if (self.index_header) |header| { + const new_header = try IndexHeader.alloc(allocator, header.indexes_len); + other.insertAllEntriesIntoNewHeader(new_header); + other.index_header = new_header; + } + return other; + } + + fn removeInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) ?Entry { + const indexes = header.indexes(I); + const h = hash(key); + const start_index = header.constrainIndex(h); + var roll_over: usize = 0; + while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { + const index_index = header.constrainIndex(start_index + roll_over); + var index = &indexes[index_index]; + if (index.isEmpty()) + return null; + + const entry = &self.entries.items[index.entry_index]; + + const hash_match = if (store_hash) h == entry.hash else true; + if (!hash_match or !eql(key, entry.key)) + continue; + + const removed_entry = self.entries.swapRemove(index.entry_index); + if (self.entries.items.len > 0 and self.entries.items.len != index.entry_index) { + // Because of the swap remove, now we need to update the index that was + // pointing to the last entry and is now pointing to this removed item slot. + self.updateEntryIndex(header, self.entries.items.len, index.entry_index, I, indexes); + } + + // Now we have to shift over the following indexes. + roll_over += 1; + while (roll_over < header.indexes_len) : (roll_over += 1) { + const next_index_index = header.constrainIndex(start_index + roll_over); + const next_index = &indexes[next_index_index]; + if (next_index.isEmpty() or next_index.distance_from_start_index == 0) { + index.setEmpty(); + return removed_entry; + } + index.* = next_index.*; + index.distance_from_start_index -= 1; + index = next_index; + } + unreachable; + } + return null; + } + + fn updateEntryIndex( + self: *Self, + header: *IndexHeader, + old_entry_index: usize, + new_entry_index: usize, + comptime I: type, + indexes: []Index(I), + ) void { + const h = if (store_hash) self.entries.items[new_entry_index].hash else hash(self.entries.items[new_entry_index].key); + const start_index = header.constrainIndex(h); + var roll_over: usize = 0; + while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { + const index_index = header.constrainIndex(start_index + roll_over); + const index = &indexes[index_index]; + if (index.entry_index == old_entry_index) { + index.entry_index = @intCast(I, new_entry_index); + return; + } + } + unreachable; + } + + /// Must ensureCapacity before calling this. + fn getOrPutInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) GetOrPutResult { + const indexes = header.indexes(I); + const h = hash(key); + const start_index = header.constrainIndex(h); + var roll_over: usize = 0; + var distance_from_start_index: usize = 0; + while (roll_over <= header.indexes_len) : ({ + roll_over += 1; + distance_from_start_index += 1; + }) { + const index_index = header.constrainIndex(start_index + roll_over); + const index = indexes[index_index]; + if (index.isEmpty()) { + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, self.entries.items.len), + }; + header.maybeBumpMax(distance_from_start_index); + const new_entry = self.entries.addOneAssumeCapacity(); + new_entry.* = .{ + .hash = if (store_hash) h else {}, + .key = key, + .value = undefined, + }; + return .{ + .found_existing = false, + .entry = new_entry, + }; + } + + // This pointer survives the following append because we call + // entries.ensureCapacity before getOrPutInternal. + const entry = &self.entries.items[index.entry_index]; + const hash_match = if (store_hash) h == entry.hash else true; + if (hash_match and eql(key, entry.key)) { + return .{ + .found_existing = true, + .entry = entry, + }; + } + if (index.distance_from_start_index < distance_from_start_index) { + // In this case, we did not find the item. We will put a new entry. + // However, we will use this index for the new entry, and move + // the previous index down the line, to keep the max_distance_from_start_index + // as small as possible. + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, self.entries.items.len), + }; + header.maybeBumpMax(distance_from_start_index); + const new_entry = self.entries.addOneAssumeCapacity(); + new_entry.* = .{ + .hash = if (store_hash) h else {}, + .key = key, + .value = undefined, + }; + + distance_from_start_index = index.distance_from_start_index; + var prev_entry_index = index.entry_index; + + // Find somewhere to put the index we replaced by shifting + // following indexes backwards. + roll_over += 1; + distance_from_start_index += 1; + while (roll_over < header.indexes_len) : ({ + roll_over += 1; + distance_from_start_index += 1; + }) { + const next_index_index = header.constrainIndex(start_index + roll_over); + const next_index = indexes[next_index_index]; + if (next_index.isEmpty()) { + header.maybeBumpMax(distance_from_start_index); + indexes[next_index_index] = .{ + .entry_index = prev_entry_index, + .distance_from_start_index = @intCast(I, distance_from_start_index), + }; + return .{ + .found_existing = false, + .entry = new_entry, + }; + } + if (next_index.distance_from_start_index < distance_from_start_index) { + header.maybeBumpMax(distance_from_start_index); + indexes[next_index_index] = .{ + .entry_index = prev_entry_index, + .distance_from_start_index = @intCast(I, distance_from_start_index), + }; + distance_from_start_index = next_index.distance_from_start_index; + prev_entry_index = next_index.entry_index; + } + } + unreachable; + } + } + unreachable; + } + + fn getInternal(self: Self, key: K, header: *IndexHeader, comptime I: type) ?usize { + const indexes = header.indexes(I); + const h = hash(key); + const start_index = header.constrainIndex(h); + var roll_over: usize = 0; + while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { + const index_index = header.constrainIndex(start_index + roll_over); + const index = indexes[index_index]; + if (index.isEmpty()) + return null; + + const entry = &self.entries.items[index.entry_index]; + const hash_match = if (store_hash) h == entry.hash else true; + if (hash_match and eql(key, entry.key)) + return index.entry_index; + } + return null; + } + + fn insertAllEntriesIntoNewHeader(self: *Self, header: *IndexHeader) void { + switch (header.capacityIndexType()) { + .u8 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u8), + .u16 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u16), + .u32 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u32), + .usize => return self.insertAllEntriesIntoNewHeaderGeneric(header, usize), + } + } + + fn insertAllEntriesIntoNewHeaderGeneric(self: *Self, header: *IndexHeader, comptime I: type) void { + const indexes = header.indexes(I); + entry_loop: for (self.entries.items) |entry, i| { + const h = if (store_hash) entry.hash else hash(entry.key); + const start_index = header.constrainIndex(h); + var entry_index = i; + var roll_over: usize = 0; + var distance_from_start_index: usize = 0; + while (roll_over < header.indexes_len) : ({ + roll_over += 1; + distance_from_start_index += 1; + }) { + const index_index = header.constrainIndex(start_index + roll_over); + const next_index = indexes[index_index]; + if (next_index.isEmpty()) { + header.maybeBumpMax(distance_from_start_index); + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, entry_index), + }; + continue :entry_loop; + } + if (next_index.distance_from_start_index < distance_from_start_index) { + header.maybeBumpMax(distance_from_start_index); + indexes[index_index] = .{ + .distance_from_start_index = @intCast(I, distance_from_start_index), + .entry_index = @intCast(I, entry_index), + }; + distance_from_start_index = next_index.distance_from_start_index; + entry_index = next_index.entry_index; + } + } + unreachable; + } + } + }; +} + +const CapacityIndexType = enum { u8, u16, u32, usize }; + +fn capacityIndexType(indexes_len: usize) CapacityIndexType { + if (indexes_len < math.maxInt(u8)) + return .u8; + if (indexes_len < math.maxInt(u16)) + return .u16; + if (indexes_len < math.maxInt(u32)) + return .u32; + return .usize; +} + +fn capacityIndexSize(indexes_len: usize) usize { + switch (capacityIndexType(indexes_len)) { + .u8 => return @sizeOf(Index(u8)), + .u16 => return @sizeOf(Index(u16)), + .u32 => return @sizeOf(Index(u32)), + .usize => return @sizeOf(Index(usize)), + } +} + +fn Index(comptime I: type) type { + return extern struct { + entry_index: I, + distance_from_start_index: I, + + const Self = @This(); + + const empty = Self{ + .entry_index = math.maxInt(I), + .distance_from_start_index = undefined, + }; + + fn isEmpty(idx: Self) bool { + return idx.entry_index == math.maxInt(I); + } + + fn setEmpty(idx: *Self) void { + idx.entry_index = math.maxInt(I); + } + }; +} + +/// This struct is trailed by an array of `Index(I)`, where `I` +/// and the array length are determined by `indexes_len`. +const IndexHeader = struct { + max_distance_from_start_index: usize, + indexes_len: usize, + + fn constrainIndex(header: IndexHeader, i: usize) usize { + // This is an optimization for modulo of power of two integers; + // it requires `indexes_len` to always be a power of two. + return i & (header.indexes_len - 1); + } + + fn indexes(header: *IndexHeader, comptime I: type) []Index(I) { + const start = @ptrCast([*]Index(I), @ptrCast([*]u8, header) + @sizeOf(IndexHeader)); + return start[0..header.indexes_len]; + } + + fn capacityIndexType(header: IndexHeader) CapacityIndexType { + return hash_map.capacityIndexType(header.indexes_len); + } + + fn maybeBumpMax(header: *IndexHeader, distance_from_start_index: usize) void { + if (distance_from_start_index > header.max_distance_from_start_index) { + header.max_distance_from_start_index = distance_from_start_index; + } + } + + fn alloc(allocator: *Allocator, len: usize) !*IndexHeader { + const index_size = hash_map.capacityIndexSize(len); + const nbytes = @sizeOf(IndexHeader) + index_size * len; + const bytes = try allocator.allocAdvanced(u8, @alignOf(IndexHeader), nbytes, .exact); + @memset(bytes.ptr + @sizeOf(IndexHeader), 0xff, bytes.len - @sizeOf(IndexHeader)); + const result = @ptrCast(*IndexHeader, bytes.ptr); + result.* = .{ + .max_distance_from_start_index = 0, + .indexes_len = len, + }; + return result; + } + + fn free(header: *IndexHeader, allocator: *Allocator) void { + const index_size = hash_map.capacityIndexSize(header.indexes_len); + const ptr = @ptrCast([*]u8, header); + const slice = ptr[0 .. @sizeOf(IndexHeader) + header.indexes_len * index_size]; + allocator.free(slice); + } +}; + +test "basic hash map usage" { + var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator); + defer map.deinit(); + + testing.expect((try map.fetchPut(1, 11)) == null); + testing.expect((try map.fetchPut(2, 22)) == null); + testing.expect((try map.fetchPut(3, 33)) == null); + testing.expect((try map.fetchPut(4, 44)) == null); + + try map.putNoClobber(5, 55); + testing.expect((try map.fetchPut(5, 66)).?.value == 55); + testing.expect((try map.fetchPut(5, 55)).?.value == 66); + + const gop1 = try map.getOrPut(5); + testing.expect(gop1.found_existing == true); + testing.expect(gop1.entry.value == 55); + gop1.entry.value = 77; + testing.expect(map.getEntry(5).?.value == 77); + + const gop2 = try map.getOrPut(99); + testing.expect(gop2.found_existing == false); + gop2.entry.value = 42; + testing.expect(map.getEntry(99).?.value == 42); + + const gop3 = try map.getOrPutValue(5, 5); + testing.expect(gop3.value == 77); + + const gop4 = try map.getOrPutValue(100, 41); + testing.expect(gop4.value == 41); + + testing.expect(map.contains(2)); + testing.expect(map.getEntry(2).?.value == 22); + testing.expect(map.get(2).? == 22); + + const rmv1 = map.remove(2); + testing.expect(rmv1.?.key == 2); + testing.expect(rmv1.?.value == 22); + testing.expect(map.remove(2) == null); + testing.expect(map.getEntry(2) == null); + testing.expect(map.get(2) == null); + + map.removeAssertDiscard(3); +} + +test "iterator hash map" { + // https://github.com/ziglang/zig/issues/5127 + if (std.Target.current.cpu.arch == .mips) return error.SkipZigTest; + + var reset_map = AutoArrayHashMap(i32, i32).init(std.testing.allocator); + defer reset_map.deinit(); + + // test ensureCapacity with a 0 parameter + try reset_map.ensureCapacity(0); + + try reset_map.putNoClobber(0, 11); + try reset_map.putNoClobber(1, 22); + try reset_map.putNoClobber(2, 33); + + var keys = [_]i32{ + 0, 2, 1, + }; + + var values = [_]i32{ + 11, 33, 22, + }; + + var buffer = [_]i32{ + 0, 0, 0, + }; + + var it = reset_map.iterator(); + const first_entry = it.next().?; + it.reset(); + + var count: usize = 0; + while (it.next()) |entry| : (count += 1) { + buffer[@intCast(usize, entry.key)] = entry.value; + } + testing.expect(count == 3); + testing.expect(it.next() == null); + + for (buffer) |v, i| { + testing.expect(buffer[@intCast(usize, keys[i])] == values[i]); + } + + it.reset(); + count = 0; + while (it.next()) |entry| { + buffer[@intCast(usize, entry.key)] = entry.value; + count += 1; + if (count >= 2) break; + } + + for (buffer[0..2]) |v, i| { + testing.expect(buffer[@intCast(usize, keys[i])] == values[i]); + } + + it.reset(); + var entry = it.next().?; + testing.expect(entry.key == first_entry.key); + testing.expect(entry.value == first_entry.value); +} + +test "ensure capacity" { + var map = AutoArrayHashMap(i32, i32).init(std.testing.allocator); + defer map.deinit(); + + try map.ensureCapacity(20); + const initial_capacity = map.capacity(); + testing.expect(initial_capacity >= 20); + var i: i32 = 0; + while (i < 20) : (i += 1) { + testing.expect(map.fetchPutAssumeCapacity(i, i + 10) == null); + } + // shouldn't resize from putAssumeCapacity + testing.expect(initial_capacity == map.capacity()); +} + +test "clone" { + var original = AutoArrayHashMap(i32, i32).init(std.testing.allocator); + defer original.deinit(); + + // put more than `linear_scan_max` so we can test that the index header is properly cloned + var i: u8 = 0; + while (i < 10) : (i += 1) { + try original.putNoClobber(i, i * 10); + } + + var copy = try original.clone(); + defer copy.deinit(); + + i = 0; + while (i < 10) : (i += 1) { + testing.expect(copy.get(i).? == i * 10); + } +} + +pub fn getHashPtrAddrFn(comptime K: type) (fn (K) u32) { + return struct { + fn hash(key: K) u32 { + return getAutoHashFn(usize)(@ptrToInt(key)); + } + }.hash; +} + +pub fn getTrivialEqlFn(comptime K: type) (fn (K, K) bool) { + return struct { + fn eql(a: K, b: K) bool { + return a == b; + } + }.eql; +} + +pub fn getAutoHashFn(comptime K: type) (fn (K) u32) { + return struct { + fn hash(key: K) u32 { + if (comptime trait.hasUniqueRepresentation(K)) { + return @truncate(u32, Wyhash.hash(0, std.mem.asBytes(&key))); + } else { + var hasher = Wyhash.init(0); + autoHash(&hasher, key); + return @truncate(u32, hasher.final()); + } + } + }.hash; +} + +pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) { + return struct { + fn eql(a: K, b: K) bool { + return meta.eql(a, b); + } + }.eql; +} + +pub fn autoEqlIsCheap(comptime K: type) bool { + return switch (@typeInfo(K)) { + .Bool, + .Int, + .Float, + .Pointer, + .ComptimeFloat, + .ComptimeInt, + .Enum, + .Fn, + .ErrorSet, + .AnyFrame, + .EnumLiteral, + => true, + else => false, + }; +} + +pub fn getAutoHashStratFn(comptime K: type, comptime strategy: std.hash.Strategy) (fn (K) u32) { + return struct { + fn hash(key: K) u32 { + var hasher = Wyhash.init(0); + std.hash.autoHashStrat(&hasher, key, strategy); + return @truncate(u32, hasher.final()); + } + }.hash; +} diff --git a/lib/std/buf_set.zig b/lib/std/buf_set.zig index 1f5dda09c2..f48e6c594c 100644 --- a/lib/std/buf_set.zig +++ b/lib/std/buf_set.zig @@ -20,7 +20,8 @@ pub const BufSet = struct { } pub fn deinit(self: *BufSet) void { - for (self.hash_map.items()) |entry| { + var it = self.hash_map.iterator(); + while (it.next()) |entry| { self.free(entry.key); } self.hash_map.deinit(); diff --git a/lib/std/hash_map.zig b/lib/std/hash_map.zig index 8966737a0c..144a512edc 100644 --- a/lib/std/hash_map.zig +++ b/lib/std/hash_map.zig @@ -4,91 +4,94 @@ // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. const std = @import("std.zig"); -const debug = std.debug; +const builtin = @import("builtin"); const assert = debug.assert; -const testing = std.testing; +const autoHash = std.hash.autoHash; +const debug = std.debug; +const warn = debug.warn; const math = std.math; const mem = std.mem; const meta = std.meta; const trait = meta.trait; -const autoHash = std.hash.autoHash; -const Wyhash = std.hash.Wyhash; const Allocator = mem.Allocator; -const builtin = @import("builtin"); -const hash_map = @This(); +const Wyhash = std.hash.Wyhash; + +pub fn getAutoHashFn(comptime K: type) (fn (K) u64) { + return struct { + fn hash(key: K) u64 { + if (comptime trait.hasUniqueRepresentation(K)) { + return Wyhash.hash(0, std.mem.asBytes(&key)); + } else { + var hasher = Wyhash.init(0); + autoHash(&hasher, key); + return hasher.final(); + } + } + }.hash; +} + +pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) { + return struct { + fn eql(a: K, b: K) bool { + return meta.eql(a, b); + } + }.eql; +} pub fn AutoHashMap(comptime K: type, comptime V: type) type { - return HashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), autoEqlIsCheap(K)); + return HashMap(K, V, getAutoHashFn(K), getAutoEqlFn(K), DefaultMaxLoadPercentage); } pub fn AutoHashMapUnmanaged(comptime K: type, comptime V: type) type { - return HashMapUnmanaged(K, V, getAutoHashFn(K), getAutoEqlFn(K), autoEqlIsCheap(K)); + return HashMapUnmanaged(K, V, getAutoHashFn(K), getAutoEqlFn(K), DefaultMaxLoadPercentage); } /// Builtin hashmap for strings as keys. pub fn StringHashMap(comptime V: type) type { - return HashMap([]const u8, V, hashString, eqlString, true); + return HashMap([]const u8, V, hashString, eqlString, DefaultMaxLoadPercentage); } pub fn StringHashMapUnmanaged(comptime V: type) type { - return HashMapUnmanaged([]const u8, V, hashString, eqlString, true); + return HashMapUnmanaged([]const u8, V, hashString, eqlString, DefaultMaxLoadPercentage); } pub fn eqlString(a: []const u8, b: []const u8) bool { return mem.eql(u8, a, b); } -pub fn hashString(s: []const u8) u32 { - return @truncate(u32, std.hash.Wyhash.hash(0, s)); +pub fn hashString(s: []const u8) u64 { + return std.hash.Wyhash.hash(0, s); } -/// Insertion order is preserved. -/// Deletions perform a "swap removal" on the entries list. -/// Modifying the hash map while iterating is allowed, however one must understand -/// the (well defined) behavior when mixing insertions and deletions with iteration. +pub const DefaultMaxLoadPercentage = 80; + +/// General purpose hash table. +/// No order is guaranteed and any modification invalidates live iterators. +/// It provides fast operations (lookup, insertion, deletion) with quite high +/// load factors (up to 80% by default) for a low memory usage. /// For a hash map that can be initialized directly that does not store an Allocator /// field, see `HashMapUnmanaged`. -/// When `store_hash` is `false`, this data structure is biased towards cheap `eql` -/// functions. It does not store each item's hash in the table. Setting `store_hash` -/// to `true` incurs slightly more memory cost by storing each key's hash in the table -/// but only has to call `eql` for hash collisions. +/// If iterating over the table entries is a strong usecase and needs to be fast, +/// prefer the alternative `std.ArrayHashMap`. pub fn HashMap( comptime K: type, comptime V: type, - comptime hash: fn (key: K) u32, - comptime eql: fn (a: K, b: K) bool, - comptime store_hash: bool, + comptime hashFn: fn (key: K) u64, + comptime eqlFn: fn (a: K, b: K) bool, + comptime MaxLoadPercentage: u64, ) type { return struct { unmanaged: Unmanaged, allocator: *Allocator, - pub const Unmanaged = HashMapUnmanaged(K, V, hash, eql, store_hash); + pub const Unmanaged = HashMapUnmanaged(K, V, hashFn, eqlFn, MaxLoadPercentage); pub const Entry = Unmanaged.Entry; pub const Hash = Unmanaged.Hash; + pub const Iterator = Unmanaged.Iterator; + pub const Size = Unmanaged.Size; pub const GetOrPutResult = Unmanaged.GetOrPutResult; - /// Deprecated. Iterate using `items`. - pub const Iterator = struct { - hm: *const Self, - /// Iterator through the entry array. - index: usize, - - pub fn next(it: *Iterator) ?*Entry { - if (it.index >= it.hm.unmanaged.entries.items.len) return null; - const result = &it.hm.unmanaged.entries.items[it.index]; - it.index += 1; - return result; - } - - /// Reset the iterator to the initial index - pub fn reset(it: *Iterator) void { - it.index = 0; - } - }; - const Self = @This(); - const Index = Unmanaged.Index; pub fn init(allocator: *Allocator) Self { return .{ @@ -110,17 +113,12 @@ pub fn HashMap( return self.unmanaged.clearAndFree(self.allocator); } - /// Deprecated. Use `items().len`. pub fn count(self: Self) usize { - return self.items().len; + return self.unmanaged.count(); } - /// Deprecated. Iterate using `items`. pub fn iterator(self: *const Self) Iterator { - return Iterator{ - .hm = self, - .index = 0, - }; + return self.unmanaged.iterator(); } /// If key exists this function cannot fail. @@ -150,13 +148,13 @@ pub fn HashMap( /// Increases capacity, guaranteeing that insertions up until the /// `expected_count` will not cause an allocation, and therefore cannot fail. - pub fn ensureCapacity(self: *Self, new_capacity: usize) !void { - return self.unmanaged.ensureCapacity(self.allocator, new_capacity); + pub fn ensureCapacity(self: *Self, expected_count: Size) !void { + return self.unmanaged.ensureCapacity(self.allocator, expected_count); } /// Returns the number of total elements which may be present before it is /// no longer guaranteed that no allocations will be performed. - pub fn capacity(self: *Self) usize { + pub fn capacity(self: *Self) Size { return self.unmanaged.capacity(); } @@ -197,18 +195,14 @@ pub fn HashMap( return self.unmanaged.fetchPutAssumeCapacity(key, value); } - pub fn getEntry(self: Self, key: K) ?*Entry { - return self.unmanaged.getEntry(key); - } - - pub fn getIndex(self: Self, key: K) ?usize { - return self.unmanaged.getIndex(key); - } - pub fn get(self: Self, key: K) ?V { return self.unmanaged.get(key); } + pub fn getEntry(self: Self, key: K) ?*Entry { + return self.unmanaged.getEntry(key); + } + pub fn contains(self: Self, key: K) bool { return self.unmanaged.contains(key); } @@ -225,10 +219,6 @@ pub fn HashMap( return self.unmanaged.removeAssertDiscard(key); } - pub fn items(self: Self) []Entry { - return self.unmanaged.items(); - } - pub fn clone(self: Self) !Self { var other = try self.unmanaged.clone(self.allocator); return other.promote(self.allocator); @@ -236,63 +226,152 @@ pub fn HashMap( }; } -/// General purpose hash table. -/// Insertion order is preserved. -/// Deletions perform a "swap removal" on the entries list. -/// Modifying the hash map while iterating is allowed, however one must understand -/// the (well defined) behavior when mixing insertions and deletions with iteration. -/// This type does not store an Allocator field - the Allocator must be passed in -/// with each function call that requires it. See `HashMap` for a type that stores -/// an Allocator field for convenience. -/// Can be initialized directly using the default field values. -/// This type is designed to have low overhead for small numbers of entries. When -/// `store_hash` is `false` and the number of entries in the map is less than 9, -/// the overhead cost of using `HashMapUnmanaged` rather than `std.ArrayList` is -/// only a single pointer-sized integer. -/// When `store_hash` is `false`, this data structure is biased towards cheap `eql` -/// functions. It does not store each item's hash in the table. Setting `store_hash` -/// to `true` incurs slightly more memory cost by storing each key's hash in the table -/// but guarantees only one call to `eql` per insertion/deletion. +/// A HashMap based on open addressing and linear probing. +/// A lookup or modification typically occurs only 2 cache misses. +/// No order is guaranteed and any modification invalidates live iterators. +/// It achieves good performance with quite high load factors (by default, +/// grow is triggered at 80% full) and only one byte of overhead per element. +/// The struct itself is only 16 bytes for a small footprint. This comes at +/// the price of handling size with u32, which should be reasonnable enough +/// for almost all uses. +/// Deletions are achieved with tombstones. pub fn HashMapUnmanaged( comptime K: type, comptime V: type, - comptime hash: fn (key: K) u32, - comptime eql: fn (a: K, b: K) bool, - comptime store_hash: bool, + hashFn: fn (key: K) u64, + eqlFn: fn (a: K, b: K) bool, + comptime MaxLoadPercentage: u64, ) type { + comptime assert(MaxLoadPercentage > 0 and MaxLoadPercentage < 100); + return struct { - /// It is permitted to access this field directly. - entries: std.ArrayListUnmanaged(Entry) = .{}, - - /// When entries length is less than `linear_scan_max`, this remains `null`. - /// Once entries length grows big enough, this field is allocated. There is - /// an IndexHeader followed by an array of Index(I) structs, where I is defined - /// by how many total indexes there are. - index_header: ?*IndexHeader = null, - - /// Modifying the key is illegal behavior. - /// Modifying the value is allowed. - /// Entry pointers become invalid whenever this HashMap is modified, - /// unless `ensureCapacity` was previously used. + const Self = @This(); + + // This is actually a midway pointer to the single buffer containing + // a `Header` field, the `Metadata`s and `Entry`s. + // At `-@sizeOf(Header)` is the Header field. + // At `sizeOf(Metadata) * capacity + offset`, which is pointed to by + // self.header().entries, is the array of entries. + // This means that the hashmap only holds one live allocation, to + // reduce memory fragmentation and struct size. + /// Pointer to the metadata. + metadata: ?[*]Metadata = null, + + /// Current number of elements in the hashmap. + size: Size = 0, + + // Having a countdown to grow reduces the number of instructions to + // execute when determining if the hashmap has enough capacity already. + /// Number of available slots before a grow is needed to satisfy the + /// `MaxLoadPercentage`. + available: Size = 0, + + // This is purely empirical and not a /very smart magic constantâ„¢/. + /// Capacity of the first grow when bootstrapping the hashmap. + const MinimalCapacity = 8; + + // This hashmap is specially designed for sizes that fit in a u32. + const Size = u32; + + // u64 hashes guarantee us that the fingerprint bits will never be used + // to compute the index of a slot, maximizing the use of entropy. + const Hash = u64; + pub const Entry = struct { - /// This field is `void` if `store_hash` is `false`. - hash: Hash, key: K, value: V, }; - pub const Hash = if (store_hash) u32 else void; + const Header = packed struct { + entries: [*]Entry, + capacity: Size, + }; + + /// Metadata for a slot. It can be in three states: empty, used or + /// tombstone. Tombstones indicate that an entry was previously used, + /// they are a simple way to handle removal. + /// To this state, we add 6 bits from the slot's key hash. These are + /// used as a fast way to disambiguate between entries without + /// having to use the equality function. If two fingerprints are + /// different, we know that we don't have to compare the keys at all. + /// The 6 bits are the highest ones from a 64 bit hash. This way, not + /// only we use the `log2(capacity)` lowest bits from the hash to determine + /// a slot index, but we use 6 more bits to quickly resolve collisions + /// when multiple elements with different hashes end up wanting to be in / the same slot. + /// Not using the equality function means we don't have to read into + /// the entries array, avoiding a likely cache miss. + const Metadata = packed struct { + const FingerPrint = u6; + + used: u1 = 0, + tombstone: u1 = 0, + fingerprint: FingerPrint = 0, + + pub fn isUsed(self: Metadata) bool { + return self.used == 1; + } + + pub fn isTombstone(self: Metadata) bool { + return self.tombstone == 1; + } + + pub fn takeFingerprint(hash: Hash) FingerPrint { + const hash_bits = @typeInfo(Hash).Int.bits; + const fp_bits = @typeInfo(FingerPrint).Int.bits; + return @truncate(FingerPrint, hash >> (hash_bits - fp_bits)); + } + + pub fn fill(self: *Metadata, fp: FingerPrint) void { + self.used = 1; + self.tombstone = 0; + self.fingerprint = fp; + } + + pub fn remove(self: *Metadata) void { + self.used = 0; + self.tombstone = 1; + self.fingerprint = 0; + } + }; + + comptime { + assert(@sizeOf(Metadata) == 1); + assert(@alignOf(Metadata) == 1); + } + + const Iterator = struct { + hm: *const Self, + index: Size = 0, + + pub fn next(it: *Iterator) ?*Entry { + assert(it.index <= it.hm.capacity()); + if (it.hm.size == 0) return null; + + const cap = it.hm.capacity(); + const end = it.hm.metadata.? + cap; + var metadata = it.hm.metadata.? + it.index; + + while (metadata != end) : ({ + metadata += 1; + it.index += 1; + }) { + if (metadata[0].isUsed()) { + const entry = &it.hm.entries()[it.index]; + it.index += 1; + return entry; + } + } + + return null; + } + }; pub const GetOrPutResult = struct { entry: *Entry, found_existing: bool, }; - pub const Managed = HashMap(K, V, hash, eql, store_hash); - - const Self = @This(); - - const linear_scan_max = 8; + pub const Managed = HashMap(K, V, hashFn, eqlFn, MaxLoadPercentage); pub fn promote(self: Self, allocator: *Allocator) Managed { return .{ @@ -301,167 +380,156 @@ pub fn HashMapUnmanaged( }; } + fn isUnderMaxLoadPercentage(size: Size, cap: Size) bool { + return size * 100 < MaxLoadPercentage * cap; + } + + pub fn init(allocator: *Allocator) Self { + return .{}; + } + pub fn deinit(self: *Self, allocator: *Allocator) void { - self.entries.deinit(allocator); - if (self.index_header) |header| { - header.free(allocator); - } + self.deallocate(allocator); self.* = undefined; } - pub fn clearRetainingCapacity(self: *Self) void { - self.entries.items.len = 0; - if (self.index_header) |header| { - header.max_distance_from_start_index = 0; - switch (header.capacityIndexType()) { - .u8 => mem.set(Index(u8), header.indexes(u8), Index(u8).empty), - .u16 => mem.set(Index(u16), header.indexes(u16), Index(u16).empty), - .u32 => mem.set(Index(u32), header.indexes(u32), Index(u32).empty), - .usize => mem.set(Index(usize), header.indexes(usize), Index(usize).empty), - } - } - } + fn deallocate(self: *Self, allocator: *Allocator) void { + if (self.metadata == null) return; - pub fn clearAndFree(self: *Self, allocator: *Allocator) void { - self.entries.shrink(allocator, 0); - if (self.index_header) |header| { - header.free(allocator); - self.index_header = null; - } + const cap = self.capacity(); + const meta_size = @sizeOf(Header) + cap * @sizeOf(Metadata); + + const alignment = @alignOf(Entry) - 1; + const entries_size = @as(usize, cap) * @sizeOf(Entry) + alignment; + + const total_size = meta_size + entries_size; + + var slice: []u8 = undefined; + slice.ptr = @intToPtr([*]u8, @ptrToInt(self.header())); + slice.len = total_size; + allocator.free(slice); + + self.metadata = null; + self.available = 0; } - /// If key exists this function cannot fail. - /// If there is an existing item with `key`, then the result - /// `Entry` pointer points to it, and found_existing is true. - /// Otherwise, puts a new item with undefined value, and - /// the `Entry` pointer points to it. Caller should then initialize - /// the value (but not the key). - pub fn getOrPut(self: *Self, allocator: *Allocator, key: K) !GetOrPutResult { - self.ensureCapacity(allocator, self.entries.items.len + 1) catch |err| { - // "If key exists this function cannot fail." - return GetOrPutResult{ - .entry = self.getEntry(key) orelse return err, - .found_existing = true, - }; - }; - return self.getOrPutAssumeCapacity(key); + fn capacityForSize(size: Size) Size { + var new_cap = @truncate(u32, (@as(u64, size) * 100) / MaxLoadPercentage + 1); + new_cap = math.ceilPowerOfTwo(u32, new_cap) catch unreachable; + return new_cap; } - /// If there is an existing item with `key`, then the result - /// `Entry` pointer points to it, and found_existing is true. - /// Otherwise, puts a new item with undefined value, and - /// the `Entry` pointer points to it. Caller should then initialize - /// the value (but not the key). - /// If a new entry needs to be stored, this function asserts there - /// is enough capacity to store it. - pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { - const header = self.index_header orelse { - // Linear scan. - const h = if (store_hash) hash(key) else {}; - for (self.entries.items) |*item| { - if (item.hash == h and eql(key, item.key)) { - return GetOrPutResult{ - .entry = item, - .found_existing = true, - }; - } - } - const new_entry = self.entries.addOneAssumeCapacity(); - new_entry.* = .{ - .hash = if (store_hash) h else {}, - .key = key, - .value = undefined, - }; - return GetOrPutResult{ - .entry = new_entry, - .found_existing = false, - }; - }; + pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_size: Size) !void { + if (new_size > self.size) + try self.growIfNeeded(allocator, new_size - self.size); + } - switch (header.capacityIndexType()) { - .u8 => return self.getOrPutInternal(key, header, u8), - .u16 => return self.getOrPutInternal(key, header, u16), - .u32 => return self.getOrPutInternal(key, header, u32), - .usize => return self.getOrPutInternal(key, header, usize), + pub fn clearRetainingCapacity(self: *Self) void { + if (self.metadata) |_| { + self.initMetadatas(); + self.size = 0; + self.available = 0; } } - pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !*Entry { - const res = try self.getOrPut(allocator, key); - if (!res.found_existing) - res.entry.value = value; + pub fn clearAndFree(self: *Self, allocator: *Allocator) void { + self.deallocate(allocator); + self.size = 0; + self.available = 0; + } - return res.entry; + pub fn count(self: *const Self) Size { + return self.size; } - /// Increases capacity, guaranteeing that insertions up until the - /// `expected_count` will not cause an allocation, and therefore cannot fail. - pub fn ensureCapacity(self: *Self, allocator: *Allocator, new_capacity: usize) !void { - try self.entries.ensureCapacity(allocator, new_capacity); - if (new_capacity <= linear_scan_max) return; - - // Ensure that the indexes will be at most 60% full if - // `new_capacity` items are put into it. - const needed_len = new_capacity * 5 / 3; - if (self.index_header) |header| { - if (needed_len > header.indexes_len) { - // An overflow here would mean the amount of memory required would not - // be representable in the address space. - const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable; - const new_header = try IndexHeader.alloc(allocator, new_indexes_len); - self.insertAllEntriesIntoNewHeader(new_header); - header.free(allocator); - self.index_header = new_header; - } - } else { - // An overflow here would mean the amount of memory required would not - // be representable in the address space. - const new_indexes_len = math.ceilPowerOfTwo(usize, needed_len) catch unreachable; - const header = try IndexHeader.alloc(allocator, new_indexes_len); - self.insertAllEntriesIntoNewHeader(header); - self.index_header = header; - } + fn header(self: *const Self) *Header { + return @ptrCast(*Header, @ptrCast([*]Header, self.metadata.?) - 1); } - /// Returns the number of total elements which may be present before it is - /// no longer guaranteed that no allocations will be performed. - pub fn capacity(self: Self) usize { - const entry_cap = self.entries.capacity; - const header = self.index_header orelse return math.min(linear_scan_max, entry_cap); - const indexes_cap = (header.indexes_len + 1) * 3 / 4; - return math.min(entry_cap, indexes_cap); + fn entries(self: *const Self) [*]Entry { + return self.header().entries; } - /// Clobbers any existing data. To detect if a put would clobber - /// existing data, see `getOrPut`. - pub fn put(self: *Self, allocator: *Allocator, key: K, value: V) !void { - const result = try self.getOrPut(allocator, key); - result.entry.value = value; + pub fn capacity(self: *const Self) Size { + if (self.metadata == null) return 0; + + return self.header().capacity; } - /// Inserts a key-value pair into the hash map, asserting that no previous - /// entry with the same key is already present + pub fn iterator(self: *const Self) Iterator { + return .{ .hm = self }; + } + + /// Insert an entry in the map. Assumes it is not already present. pub fn putNoClobber(self: *Self, allocator: *Allocator, key: K, value: V) !void { - const result = try self.getOrPut(allocator, key); - assert(!result.found_existing); - result.entry.value = value; + assert(!self.contains(key)); + try self.growIfNeeded(allocator, 1); + + self.putAssumeCapacityNoClobber(key, value); } - /// Asserts there is enough capacity to store the new key-value pair. - /// Clobbers any existing data. To detect if a put would clobber - /// existing data, see `getOrPutAssumeCapacity`. pub fn putAssumeCapacity(self: *Self, key: K, value: V) void { - const result = self.getOrPutAssumeCapacity(key); - result.entry.value = value; + const hash = hashFn(key); + const mask = self.capacity() - 1; + const fingerprint = Metadata.takeFingerprint(hash); + var idx = @truncate(usize, hash & mask); + + var first_tombstone_idx: usize = self.capacity(); // invalid index + var metadata = self.metadata.? + idx; + while (metadata[0].isUsed() or metadata[0].isTombstone()) { + if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { + const entry = &self.entries()[idx]; + if (eqlFn(entry.key, key)) { + return; + } + } else if (first_tombstone_idx == self.capacity() and metadata[0].isTombstone()) { + first_tombstone_idx = idx; + } + + idx = (idx + 1) & mask; + metadata = self.metadata.? + idx; + } + + if (first_tombstone_idx < self.capacity()) { + // Cheap try to lower probing lengths after deletions. Recycle a tombstone. + idx = first_tombstone_idx; + metadata = self.metadata.? + idx; + } else { + // We're using a slot previously free. + self.available -= 1; + } + + metadata[0].fill(fingerprint); + const entry = &self.entries()[idx]; + entry.* = .{ .key = key, .value = undefined }; + self.size += 1; } - /// Asserts there is enough capacity to store the new key-value pair. - /// Asserts that it does not clobber any existing data. - /// To detect if a put would clobber existing data, see `getOrPutAssumeCapacity`. + /// Insert an entry in the map. Assumes it is not already present, + /// and that no allocation is needed. pub fn putAssumeCapacityNoClobber(self: *Self, key: K, value: V) void { - const result = self.getOrPutAssumeCapacity(key); - assert(!result.found_existing); - result.entry.value = value; + assert(!self.contains(key)); + + const hash = hashFn(key); + const mask = self.capacity() - 1; + var idx = @truncate(usize, hash & mask); + + var metadata = self.metadata.? + idx; + while (metadata[0].isUsed()) { + idx = (idx + 1) & mask; + metadata = self.metadata.? + idx; + } + + if (!metadata[0].isTombstone()) { + assert(self.available > 0); + self.available -= 1; + } + + const fingerprint = Metadata.takeFingerprint(hash); + metadata[0].fill(fingerprint); + self.entries()[idx] = Entry{ .key = key, .value = value }; + + self.size += 1; } /// Inserts a new `Entry` into the hash map, returning the previous one, if any. @@ -488,400 +556,622 @@ pub fn HashMapUnmanaged( } pub fn getEntry(self: Self, key: K) ?*Entry { - const index = self.getIndex(key) orelse return null; - return &self.entries.items[index]; - } + if (self.size == 0) { + return null; + } - pub fn getIndex(self: Self, key: K) ?usize { - const header = self.index_header orelse { - // Linear scan. - const h = if (store_hash) hash(key) else {}; - for (self.entries.items) |*item, i| { - if (item.hash == h and eql(key, item.key)) { - return i; + const hash = hashFn(key); + const mask = self.capacity() - 1; + const fingerprint = Metadata.takeFingerprint(hash); + var idx = @truncate(usize, hash & mask); + + var metadata = self.metadata.? + idx; + while (metadata[0].isUsed() or metadata[0].isTombstone()) { + if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { + const entry = &self.entries()[idx]; + if (eqlFn(entry.key, key)) { + return entry; } } - return null; - }; - switch (header.capacityIndexType()) { - .u8 => return self.getInternal(key, header, u8), - .u16 => return self.getInternal(key, header, u16), - .u32 => return self.getInternal(key, header, u32), - .usize => return self.getInternal(key, header, usize), + idx = (idx + 1) & mask; + metadata = self.metadata.? + idx; } - } - pub fn get(self: Self, key: K) ?V { - return if (self.getEntry(key)) |entry| entry.value else null; + return null; } - pub fn contains(self: Self, key: K) bool { - return self.getEntry(key) != null; + /// Insert an entry if the associated key is not already present, otherwise update preexisting value. + /// Returns true if the key was already present. + pub fn put(self: *Self, allocator: *Allocator, key: K, value: V) !void { + const result = try self.getOrPut(allocator, key); + result.entry.value = value; } - /// If there is an `Entry` with a matching key, it is deleted from - /// the hash map, and then returned from this function. - pub fn remove(self: *Self, key: K) ?Entry { - const header = self.index_header orelse { - // Linear scan. - const h = if (store_hash) hash(key) else {}; - for (self.entries.items) |item, i| { - if (item.hash == h and eql(key, item.key)) { - return self.entries.swapRemove(i); + /// Get an optional pointer to the value associated with key, if present. + pub fn get(self: Self, key: K) ?V { + if (self.size == 0) { + return null; + } + + const hash = hashFn(key); + const mask = self.capacity() - 1; + const fingerprint = Metadata.takeFingerprint(hash); + var idx = @truncate(usize, hash & mask); + + var metadata = self.metadata.? + idx; + while (metadata[0].isUsed() or metadata[0].isTombstone()) { + if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { + const entry = &self.entries()[idx]; + if (eqlFn(entry.key, key)) { + return entry.value; } } - return null; - }; - switch (header.capacityIndexType()) { - .u8 => return self.removeInternal(key, header, u8), - .u16 => return self.removeInternal(key, header, u16), - .u32 => return self.removeInternal(key, header, u32), - .usize => return self.removeInternal(key, header, usize), + idx = (idx + 1) & mask; + metadata = self.metadata.? + idx; } - } - /// Asserts there is an `Entry` with matching key, deletes it from the hash map, - /// and discards it. - pub fn removeAssertDiscard(self: *Self, key: K) void { - assert(self.remove(key) != null); + return null; } - pub fn items(self: Self) []Entry { - return self.entries.items; + pub fn getOrPut(self: *Self, allocator: *Allocator, key: K) !GetOrPutResult { + try self.growIfNeeded(allocator, 1); + + return self.getOrPutAssumeCapacity(key); } - pub fn clone(self: Self, allocator: *Allocator) !Self { - var other: Self = .{}; - try other.entries.appendSlice(allocator, self.entries.items); + pub fn getOrPutAssumeCapacity(self: *Self, key: K) GetOrPutResult { + const hash = hashFn(key); + const mask = self.capacity() - 1; + const fingerprint = Metadata.takeFingerprint(hash); + var idx = @truncate(usize, hash & mask); + + var first_tombstone_idx: usize = self.capacity(); // invalid index + var metadata = self.metadata.? + idx; + while (metadata[0].isUsed() or metadata[0].isTombstone()) { + if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { + const entry = &self.entries()[idx]; + if (eqlFn(entry.key, key)) { + return GetOrPutResult{ .entry = entry, .found_existing = true }; + } + } else if (first_tombstone_idx == self.capacity() and metadata[0].isTombstone()) { + first_tombstone_idx = idx; + } - if (self.index_header) |header| { - const new_header = try IndexHeader.alloc(allocator, header.indexes_len); - other.insertAllEntriesIntoNewHeader(new_header); - other.index_header = new_header; + idx = (idx + 1) & mask; + metadata = self.metadata.? + idx; } - return other; + + if (first_tombstone_idx < self.capacity()) { + // Cheap try to lower probing lengths after deletions. Recycle a tombstone. + idx = first_tombstone_idx; + metadata = self.metadata.? + idx; + } else { + // We're using a slot previously free. + self.available -= 1; + } + + metadata[0].fill(fingerprint); + const entry = &self.entries()[idx]; + entry.* = .{ .key = key, .value = undefined }; + self.size += 1; + + return GetOrPutResult{ .entry = entry, .found_existing = false }; } - fn removeInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) ?Entry { - const indexes = header.indexes(I); - const h = hash(key); - const start_index = header.constrainIndex(h); - var roll_over: usize = 0; - while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { - const index_index = header.constrainIndex(start_index + roll_over); - var index = &indexes[index_index]; - if (index.isEmpty()) - return null; - - const entry = &self.entries.items[index.entry_index]; - - const hash_match = if (store_hash) h == entry.hash else true; - if (!hash_match or !eql(key, entry.key)) - continue; - - const removed_entry = self.entries.swapRemove(index.entry_index); - if (self.entries.items.len > 0 and self.entries.items.len != index.entry_index) { - // Because of the swap remove, now we need to update the index that was - // pointing to the last entry and is now pointing to this removed item slot. - self.updateEntryIndex(header, self.entries.items.len, index.entry_index, I, indexes); - } + pub fn getOrPutValue(self: *Self, allocator: *Allocator, key: K, value: V) !*Entry { + const res = try self.getOrPut(allocator, key); + if (!res.found_existing) res.entry.value = value; + return res.entry; + } - // Now we have to shift over the following indexes. - roll_over += 1; - while (roll_over < header.indexes_len) : (roll_over += 1) { - const next_index_index = header.constrainIndex(start_index + roll_over); - const next_index = &indexes[next_index_index]; - if (next_index.isEmpty() or next_index.distance_from_start_index == 0) { - index.setEmpty(); + /// Return true if there is a value associated with key in the map. + pub fn contains(self: *const Self, key: K) bool { + return self.get(key) != null; + } + + /// If there is an `Entry` with a matching key, it is deleted from + /// the hash map, and then returned from this function. + pub fn remove(self: *Self, key: K) ?Entry { + if (self.size == 0) return null; + + const hash = hashFn(key); + const mask = self.capacity() - 1; + const fingerprint = Metadata.takeFingerprint(hash); + var idx = @truncate(usize, hash & mask); + + var metadata = self.metadata.? + idx; + while (metadata[0].isUsed() or metadata[0].isTombstone()) { + if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { + const entry = &self.entries()[idx]; + if (eqlFn(entry.key, key)) { + const removed_entry = entry.*; + metadata[0].remove(); + entry.* = undefined; + self.size -= 1; return removed_entry; } - index.* = next_index.*; - index.distance_from_start_index -= 1; - index = next_index; } - unreachable; + idx = (idx + 1) & mask; + metadata = self.metadata.? + idx; } + return null; } - fn updateEntryIndex( - self: *Self, - header: *IndexHeader, - old_entry_index: usize, - new_entry_index: usize, - comptime I: type, - indexes: []Index(I), - ) void { - const h = if (store_hash) self.entries.items[new_entry_index].hash else hash(self.entries.items[new_entry_index].key); - const start_index = header.constrainIndex(h); - var roll_over: usize = 0; - while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { - const index_index = header.constrainIndex(start_index + roll_over); - const index = &indexes[index_index]; - if (index.entry_index == old_entry_index) { - index.entry_index = @intCast(I, new_entry_index); - return; + /// Asserts there is an `Entry` with matching key, deletes it from the hash map, + /// and discards it. + pub fn removeAssertDiscard(self: *Self, key: K) void { + assert(self.contains(key)); + + const hash = hashFn(key); + const mask = self.capacity() - 1; + const fingerprint = Metadata.takeFingerprint(hash); + var idx = @truncate(usize, hash & mask); + + var metadata = self.metadata.? + idx; + while (metadata[0].isUsed() or metadata[0].isTombstone()) { + if (metadata[0].isUsed() and metadata[0].fingerprint == fingerprint) { + const entry = &self.entries()[idx]; + if (eqlFn(entry.key, key)) { + metadata[0].remove(); + entry.* = undefined; + self.size -= 1; + return; + } } + idx = (idx + 1) & mask; + metadata = self.metadata.? + idx; } + unreachable; } - /// Must ensureCapacity before calling this. - fn getOrPutInternal(self: *Self, key: K, header: *IndexHeader, comptime I: type) GetOrPutResult { - const indexes = header.indexes(I); - const h = hash(key); - const start_index = header.constrainIndex(h); - var roll_over: usize = 0; - var distance_from_start_index: usize = 0; - while (roll_over <= header.indexes_len) : ({ - roll_over += 1; - distance_from_start_index += 1; - }) { - const index_index = header.constrainIndex(start_index + roll_over); - const index = indexes[index_index]; - if (index.isEmpty()) { - indexes[index_index] = .{ - .distance_from_start_index = @intCast(I, distance_from_start_index), - .entry_index = @intCast(I, self.entries.items.len), - }; - header.maybeBumpMax(distance_from_start_index); - const new_entry = self.entries.addOneAssumeCapacity(); - new_entry.* = .{ - .hash = if (store_hash) h else {}, - .key = key, - .value = undefined, - }; - return .{ - .found_existing = false, - .entry = new_entry, - }; - } + fn initMetadatas(self: *Self) void { + @memset(@ptrCast([*]u8, self.metadata.?), 0, @sizeOf(Metadata) * self.capacity()); + } - // This pointer survives the following append because we call - // entries.ensureCapacity before getOrPutInternal. - const entry = &self.entries.items[index.entry_index]; - const hash_match = if (store_hash) h == entry.hash else true; - if (hash_match and eql(key, entry.key)) { - return .{ - .found_existing = true, - .entry = entry, - }; - } - if (index.distance_from_start_index < distance_from_start_index) { - // In this case, we did not find the item. We will put a new entry. - // However, we will use this index for the new entry, and move - // the previous index down the line, to keep the max_distance_from_start_index - // as small as possible. - indexes[index_index] = .{ - .distance_from_start_index = @intCast(I, distance_from_start_index), - .entry_index = @intCast(I, self.entries.items.len), - }; - header.maybeBumpMax(distance_from_start_index); - const new_entry = self.entries.addOneAssumeCapacity(); - new_entry.* = .{ - .hash = if (store_hash) h else {}, - .key = key, - .value = undefined, - }; - - distance_from_start_index = index.distance_from_start_index; - var prev_entry_index = index.entry_index; - - // Find somewhere to put the index we replaced by shifting - // following indexes backwards. - roll_over += 1; - distance_from_start_index += 1; - while (roll_over < header.indexes_len) : ({ - roll_over += 1; - distance_from_start_index += 1; - }) { - const next_index_index = header.constrainIndex(start_index + roll_over); - const next_index = indexes[next_index_index]; - if (next_index.isEmpty()) { - header.maybeBumpMax(distance_from_start_index); - indexes[next_index_index] = .{ - .entry_index = prev_entry_index, - .distance_from_start_index = @intCast(I, distance_from_start_index), - }; - return .{ - .found_existing = false, - .entry = new_entry, - }; - } - if (next_index.distance_from_start_index < distance_from_start_index) { - header.maybeBumpMax(distance_from_start_index); - indexes[next_index_index] = .{ - .entry_index = prev_entry_index, - .distance_from_start_index = @intCast(I, distance_from_start_index), - }; - distance_from_start_index = next_index.distance_from_start_index; - prev_entry_index = next_index.entry_index; - } - } - unreachable; - } - } - unreachable; + // This counts the number of occupied slots, used + tombstones, which is + // what has to stay under the MaxLoadPercentage of capacity. + fn load(self: *const Self) Size { + const max_load = (self.capacity() * MaxLoadPercentage) / 100; + assert(max_load >= self.available); + return @truncate(Size, max_load - self.available); } - fn getInternal(self: Self, key: K, header: *IndexHeader, comptime I: type) ?usize { - const indexes = header.indexes(I); - const h = hash(key); - const start_index = header.constrainIndex(h); - var roll_over: usize = 0; - while (roll_over <= header.max_distance_from_start_index) : (roll_over += 1) { - const index_index = header.constrainIndex(start_index + roll_over); - const index = indexes[index_index]; - if (index.isEmpty()) - return null; - - const entry = &self.entries.items[index.entry_index]; - const hash_match = if (store_hash) h == entry.hash else true; - if (hash_match and eql(key, entry.key)) - return index.entry_index; + fn growIfNeeded(self: *Self, allocator: *Allocator, new_count: Size) !void { + if (new_count > self.available) { + try self.grow(allocator, capacityForSize(self.load() + new_count)); } - return null; } - fn insertAllEntriesIntoNewHeader(self: *Self, header: *IndexHeader) void { - switch (header.capacityIndexType()) { - .u8 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u8), - .u16 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u16), - .u32 => return self.insertAllEntriesIntoNewHeaderGeneric(header, u32), - .usize => return self.insertAllEntriesIntoNewHeaderGeneric(header, usize), + pub fn clone(self: Self, allocator: *Allocator) !Self { + var other = Self{}; + if (self.size == 0) + return other; + + const new_cap = capacityForSize(self.size); + try other.allocate(allocator, new_cap); + other.initMetadatas(); + other.available = @truncate(u32, (new_cap * MaxLoadPercentage) / 100); + + var i: Size = 0; + var metadata = self.metadata.?; + var entr = self.entries(); + while (i < self.capacity()) : (i += 1) { + if (metadata[i].isUsed()) { + const entry = &entr[i]; + other.putAssumeCapacityNoClobber(entry.key, entry.value); + if (other.size == self.size) + break; + } } + + return other; } - fn insertAllEntriesIntoNewHeaderGeneric(self: *Self, header: *IndexHeader, comptime I: type) void { - const indexes = header.indexes(I); - entry_loop: for (self.entries.items) |entry, i| { - const h = if (store_hash) entry.hash else hash(entry.key); - const start_index = header.constrainIndex(h); - var entry_index = i; - var roll_over: usize = 0; - var distance_from_start_index: usize = 0; - while (roll_over < header.indexes_len) : ({ - roll_over += 1; - distance_from_start_index += 1; - }) { - const index_index = header.constrainIndex(start_index + roll_over); - const next_index = indexes[index_index]; - if (next_index.isEmpty()) { - header.maybeBumpMax(distance_from_start_index); - indexes[index_index] = .{ - .distance_from_start_index = @intCast(I, distance_from_start_index), - .entry_index = @intCast(I, entry_index), - }; - continue :entry_loop; - } - if (next_index.distance_from_start_index < distance_from_start_index) { - header.maybeBumpMax(distance_from_start_index); - indexes[index_index] = .{ - .distance_from_start_index = @intCast(I, distance_from_start_index), - .entry_index = @intCast(I, entry_index), - }; - distance_from_start_index = next_index.distance_from_start_index; - entry_index = next_index.entry_index; + fn grow(self: *Self, allocator: *Allocator, new_capacity: Size) !void { + const new_cap = std.math.max(new_capacity, MinimalCapacity); + assert(new_cap > self.capacity()); + assert(std.math.isPowerOfTwo(new_cap)); + + var map = Self{}; + defer map.deinit(allocator); + try map.allocate(allocator, new_cap); + map.initMetadatas(); + map.available = @truncate(u32, (new_cap * MaxLoadPercentage) / 100); + + if (self.size != 0) { + const old_capacity = self.capacity(); + var i: Size = 0; + var metadata = self.metadata.?; + var entr = self.entries(); + while (i < old_capacity) : (i += 1) { + if (metadata[i].isUsed()) { + const entry = &entr[i]; + map.putAssumeCapacityNoClobber(entry.key, entry.value); + if (map.size == self.size) + break; } } - unreachable; } + + self.size = 0; + std.mem.swap(Self, self, &map); + } + + fn allocate(self: *Self, allocator: *Allocator, new_capacity: Size) !void { + const meta_size = @sizeOf(Header) + new_capacity * @sizeOf(Metadata); + + const alignment = @alignOf(Entry) - 1; + const entries_size = @as(usize, new_capacity) * @sizeOf(Entry) + alignment; + + const total_size = meta_size + entries_size; + + const slice = try allocator.alignedAlloc(u8, @alignOf(Header), total_size); + const ptr = @ptrToInt(slice.ptr); + + const metadata = ptr + @sizeOf(Header); + var entry_ptr = ptr + meta_size; + entry_ptr = (entry_ptr + alignment) & ~@as(usize, alignment); + assert(entry_ptr + @as(usize, new_capacity) * @sizeOf(Entry) <= ptr + total_size); + + const hdr = @intToPtr(*Header, ptr); + hdr.entries = @intToPtr([*]Entry, entry_ptr); + hdr.capacity = new_capacity; + self.metadata = @intToPtr([*]Metadata, metadata); } }; } -const CapacityIndexType = enum { u8, u16, u32, usize }; +const testing = std.testing; +const expect = std.testing.expect; +const expectEqual = std.testing.expectEqual; + +test "std.hash_map basic usage" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); + + const count = 5; + var i: u32 = 0; + var total: u32 = 0; + while (i < count) : (i += 1) { + try map.put(i, i); + total += i; + } + + var sum: u32 = 0; + var it = map.iterator(); + while (it.next()) |kv| { + sum += kv.key; + } + expect(sum == total); + + i = 0; + sum = 0; + while (i < count) : (i += 1) { + expectEqual(map.get(i).?, i); + sum += map.get(i).?; + } + expectEqual(total, sum); +} + +test "std.hash_map ensureCapacity" { + var map = AutoHashMap(i32, i32).init(std.testing.allocator); + defer map.deinit(); -fn capacityIndexType(indexes_len: usize) CapacityIndexType { - if (indexes_len < math.maxInt(u8)) - return .u8; - if (indexes_len < math.maxInt(u16)) - return .u16; - if (indexes_len < math.maxInt(u32)) - return .u32; - return .usize; + try map.ensureCapacity(20); + const initial_capacity = map.capacity(); + testing.expect(initial_capacity >= 20); + var i: i32 = 0; + while (i < 20) : (i += 1) { + testing.expect(map.fetchPutAssumeCapacity(i, i + 10) == null); + } + // shouldn't resize from putAssumeCapacity + testing.expect(initial_capacity == map.capacity()); } -fn capacityIndexSize(indexes_len: usize) usize { - switch (capacityIndexType(indexes_len)) { - .u8 => return @sizeOf(Index(u8)), - .u16 => return @sizeOf(Index(u16)), - .u32 => return @sizeOf(Index(u32)), - .usize => return @sizeOf(Index(usize)), +test "std.hash_map ensureCapacity with tombstones" { + var map = AutoHashMap(i32, i32).init(std.testing.allocator); + defer map.deinit(); + + var i: i32 = 0; + while (i < 100) : (i += 1) { + try map.ensureCapacity(@intCast(u32, map.count() + 1)); + map.putAssumeCapacity(i, i); + // Remove to create tombstones that still count as load in the hashmap. + _ = map.remove(i); } } -fn Index(comptime I: type) type { - return extern struct { - entry_index: I, - distance_from_start_index: I, +test "std.hash_map clearRetainingCapacity" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); + + map.clearRetainingCapacity(); - const Self = @This(); + try map.put(1, 1); + expectEqual(map.get(1).?, 1); + expectEqual(map.count(), 1); - const empty = Self{ - .entry_index = math.maxInt(I), - .distance_from_start_index = undefined, - }; + const cap = map.capacity(); + expect(cap > 0); + + map.clearRetainingCapacity(); + map.clearRetainingCapacity(); + expectEqual(map.count(), 0); + expectEqual(map.capacity(), cap); + expect(!map.contains(1)); +} + +test "std.hash_map grow" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); - fn isEmpty(idx: Self) bool { - return idx.entry_index == math.maxInt(I); + const growTo = 12456; + + var i: u32 = 0; + while (i < growTo) : (i += 1) { + try map.put(i, i); + } + expectEqual(map.count(), growTo); + + i = 0; + var it = map.iterator(); + while (it.next()) |kv| { + expectEqual(kv.key, kv.value); + i += 1; + } + expectEqual(i, growTo); + + i = 0; + while (i < growTo) : (i += 1) { + expectEqual(map.get(i).?, i); + } +} + +test "std.hash_map clone" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); + + var a = try map.clone(); + defer a.deinit(); + + expectEqual(a.count(), 0); + + try a.put(1, 1); + try a.put(2, 2); + try a.put(3, 3); + + var b = try a.clone(); + defer b.deinit(); + + expectEqual(b.count(), 3); + expectEqual(b.get(1), 1); + expectEqual(b.get(2), 2); + expectEqual(b.get(3), 3); +} + +test "std.hash_map ensureCapacity with existing elements" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); + + try map.put(0, 0); + expectEqual(map.count(), 1); + expectEqual(map.capacity(), @TypeOf(map).Unmanaged.MinimalCapacity); + + try map.ensureCapacity(65); + expectEqual(map.count(), 1); + expectEqual(map.capacity(), 128); +} + +test "std.hash_map ensureCapacity satisfies max load factor" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); + + try map.ensureCapacity(127); + expectEqual(map.capacity(), 256); +} + +test "std.hash_map remove" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); + + var i: u32 = 0; + while (i < 16) : (i += 1) { + try map.put(i, i); + } + + i = 0; + while (i < 16) : (i += 1) { + if (i % 3 == 0) { + _ = map.remove(i); } + } + expectEqual(map.count(), 10); + var it = map.iterator(); + while (it.next()) |kv| { + expectEqual(kv.key, kv.value); + expect(kv.key % 3 != 0); + } - fn setEmpty(idx: *Self) void { - idx.entry_index = math.maxInt(I); + i = 0; + while (i < 16) : (i += 1) { + if (i % 3 == 0) { + expect(!map.contains(i)); + } else { + expectEqual(map.get(i).?, i); } - }; + } } -/// This struct is trailed by an array of `Index(I)`, where `I` -/// and the array length are determined by `indexes_len`. -const IndexHeader = struct { - max_distance_from_start_index: usize, - indexes_len: usize, +test "std.hash_map reverse removes" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); - fn constrainIndex(header: IndexHeader, i: usize) usize { - // This is an optimization for modulo of power of two integers; - // it requires `indexes_len` to always be a power of two. - return i & (header.indexes_len - 1); + var i: u32 = 0; + while (i < 16) : (i += 1) { + try map.putNoClobber(i, i); } - fn indexes(header: *IndexHeader, comptime I: type) []Index(I) { - const start = @ptrCast([*]Index(I), @ptrCast([*]u8, header) + @sizeOf(IndexHeader)); - return start[0..header.indexes_len]; + i = 16; + while (i > 0) : (i -= 1) { + _ = map.remove(i - 1); + expect(!map.contains(i - 1)); + var j: u32 = 0; + while (j < i - 1) : (j += 1) { + expectEqual(map.get(j).?, j); + } } - fn capacityIndexType(header: IndexHeader) CapacityIndexType { - return hash_map.capacityIndexType(header.indexes_len); + expectEqual(map.count(), 0); +} + +test "std.hash_map multiple removes on same metadata" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); + + var i: u32 = 0; + while (i < 16) : (i += 1) { + try map.put(i, i); } - fn maybeBumpMax(header: *IndexHeader, distance_from_start_index: usize) void { - if (distance_from_start_index > header.max_distance_from_start_index) { - header.max_distance_from_start_index = distance_from_start_index; + _ = map.remove(7); + _ = map.remove(15); + _ = map.remove(14); + _ = map.remove(13); + expect(!map.contains(7)); + expect(!map.contains(15)); + expect(!map.contains(14)); + expect(!map.contains(13)); + + i = 0; + while (i < 13) : (i += 1) { + if (i == 7) { + expect(!map.contains(i)); + } else { + expectEqual(map.get(i).?, i); } } - fn alloc(allocator: *Allocator, len: usize) !*IndexHeader { - const index_size = hash_map.capacityIndexSize(len); - const nbytes = @sizeOf(IndexHeader) + index_size * len; - const bytes = try allocator.allocAdvanced(u8, @alignOf(IndexHeader), nbytes, .exact); - @memset(bytes.ptr + @sizeOf(IndexHeader), 0xff, bytes.len - @sizeOf(IndexHeader)); - const result = @ptrCast(*IndexHeader, bytes.ptr); - result.* = .{ - .max_distance_from_start_index = 0, - .indexes_len = len, - }; - return result; + try map.put(15, 15); + try map.put(13, 13); + try map.put(14, 14); + try map.put(7, 7); + i = 0; + while (i < 16) : (i += 1) { + expectEqual(map.get(i).?, i); + } +} + +test "std.hash_map put and remove loop in random order" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); + + var keys = std.ArrayList(u32).init(std.testing.allocator); + defer keys.deinit(); + + const size = 32; + const iterations = 100; + + var i: u32 = 0; + while (i < size) : (i += 1) { + try keys.append(i); + } + var rng = std.rand.DefaultPrng.init(0); + + while (i < iterations) : (i += 1) { + std.rand.Random.shuffle(&rng.random, u32, keys.items); + + for (keys.items) |key| { + try map.put(key, key); + } + expectEqual(map.count(), size); + + for (keys.items) |key| { + _ = map.remove(key); + } + expectEqual(map.count(), 0); + } +} + +test "std.hash_map remove one million elements in random order" { + const Map = AutoHashMap(u32, u32); + const n = 1000 * 1000; + var map = Map.init(std.heap.page_allocator); + defer map.deinit(); + + var keys = std.ArrayList(u32).init(std.heap.page_allocator); + defer keys.deinit(); + + var i: u32 = 0; + while (i < n) : (i += 1) { + keys.append(i) catch unreachable; + } + + var rng = std.rand.DefaultPrng.init(0); + std.rand.Random.shuffle(&rng.random, u32, keys.items); + + for (keys.items) |key| { + map.put(key, key) catch unreachable; + } + + std.rand.Random.shuffle(&rng.random, u32, keys.items); + i = 0; + while (i < n) : (i += 1) { + const key = keys.items[i]; + _ = map.remove(key); + } +} + +test "std.hash_map put" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); + + var i: u32 = 0; + while (i < 16) : (i += 1) { + _ = try map.put(i, i); + } + + i = 0; + while (i < 16) : (i += 1) { + expectEqual(map.get(i).?, i); + } + + i = 0; + while (i < 16) : (i += 1) { + try map.put(i, i * 16 + 1); + } + + i = 0; + while (i < 16) : (i += 1) { + expectEqual(map.get(i).?, i * 16 + 1); + } +} + +test "std.hash_map getOrPut" { + var map = AutoHashMap(u32, u32).init(std.testing.allocator); + defer map.deinit(); + + var i: u32 = 0; + while (i < 10) : (i += 1) { + try map.put(i * 2, 2); } - fn free(header: *IndexHeader, allocator: *Allocator) void { - const index_size = hash_map.capacityIndexSize(header.indexes_len); - const ptr = @ptrCast([*]u8, header); - const slice = ptr[0 .. @sizeOf(IndexHeader) + header.indexes_len * index_size]; - allocator.free(slice); + i = 0; + while (i < 20) : (i += 1) { + var n = try map.getOrPutValue(i, 1); } -}; -test "basic hash map usage" { + i = 0; + var sum = i; + while (i < 20) : (i += 1) { + sum += map.get(i).?; + } + + expectEqual(sum, 30); +} + +test "std.hash_map basic hash map usage" { var map = AutoHashMap(i32, i32).init(std.testing.allocator); defer map.deinit(); @@ -925,85 +1215,10 @@ test "basic hash map usage" { map.removeAssertDiscard(3); } -test "iterator hash map" { - // https://github.com/ziglang/zig/issues/5127 - if (std.Target.current.cpu.arch == .mips) return error.SkipZigTest; - - var reset_map = AutoHashMap(i32, i32).init(std.testing.allocator); - defer reset_map.deinit(); - - // test ensureCapacity with a 0 parameter - try reset_map.ensureCapacity(0); - - try reset_map.putNoClobber(0, 11); - try reset_map.putNoClobber(1, 22); - try reset_map.putNoClobber(2, 33); - - var keys = [_]i32{ - 0, 2, 1, - }; - - var values = [_]i32{ - 11, 33, 22, - }; - - var buffer = [_]i32{ - 0, 0, 0, - }; - - var it = reset_map.iterator(); - const first_entry = it.next().?; - it.reset(); - - var count: usize = 0; - while (it.next()) |entry| : (count += 1) { - buffer[@intCast(usize, entry.key)] = entry.value; - } - testing.expect(count == 3); - testing.expect(it.next() == null); - - for (buffer) |v, i| { - testing.expect(buffer[@intCast(usize, keys[i])] == values[i]); - } - - it.reset(); - count = 0; - while (it.next()) |entry| { - buffer[@intCast(usize, entry.key)] = entry.value; - count += 1; - if (count >= 2) break; - } - - for (buffer[0..2]) |v, i| { - testing.expect(buffer[@intCast(usize, keys[i])] == values[i]); - } - - it.reset(); - var entry = it.next().?; - testing.expect(entry.key == first_entry.key); - testing.expect(entry.value == first_entry.value); -} - -test "ensure capacity" { - var map = AutoHashMap(i32, i32).init(std.testing.allocator); - defer map.deinit(); - - try map.ensureCapacity(20); - const initial_capacity = map.capacity(); - testing.expect(initial_capacity >= 20); - var i: i32 = 0; - while (i < 20) : (i += 1) { - testing.expect(map.fetchPutAssumeCapacity(i, i + 10) == null); - } - // shouldn't resize from putAssumeCapacity - testing.expect(initial_capacity == map.capacity()); -} - -test "clone" { +test "std.hash_map clone" { var original = AutoHashMap(i32, i32).init(std.testing.allocator); defer original.deinit(); - // put more than `linear_scan_max` so we can test that the index header is properly cloned var i: u8 = 0; while (i < 10) : (i += 1) { try original.putNoClobber(i, i * 10); @@ -1017,69 +1232,3 @@ test "clone" { testing.expect(copy.get(i).? == i * 10); } } - -pub fn getHashPtrAddrFn(comptime K: type) (fn (K) u32) { - return struct { - fn hash(key: K) u32 { - return getAutoHashFn(usize)(@ptrToInt(key)); - } - }.hash; -} - -pub fn getTrivialEqlFn(comptime K: type) (fn (K, K) bool) { - return struct { - fn eql(a: K, b: K) bool { - return a == b; - } - }.eql; -} - -pub fn getAutoHashFn(comptime K: type) (fn (K) u32) { - return struct { - fn hash(key: K) u32 { - if (comptime trait.hasUniqueRepresentation(K)) { - return @truncate(u32, Wyhash.hash(0, std.mem.asBytes(&key))); - } else { - var hasher = Wyhash.init(0); - autoHash(&hasher, key); - return @truncate(u32, hasher.final()); - } - } - }.hash; -} - -pub fn getAutoEqlFn(comptime K: type) (fn (K, K) bool) { - return struct { - fn eql(a: K, b: K) bool { - return meta.eql(a, b); - } - }.eql; -} - -pub fn autoEqlIsCheap(comptime K: type) bool { - return switch (@typeInfo(K)) { - .Bool, - .Int, - .Float, - .Pointer, - .ComptimeFloat, - .ComptimeInt, - .Enum, - .Fn, - .ErrorSet, - .AnyFrame, - .EnumLiteral, - => true, - else => false, - }; -} - -pub fn getAutoHashStratFn(comptime K: type, comptime strategy: std.hash.Strategy) (fn (K) u32) { - return struct { - fn hash(key: K) u32 { - var hasher = Wyhash.init(0); - std.hash.autoHashStrat(&hasher, key, strategy); - return @truncate(u32, hasher.final()); - } - }.hash; -} diff --git a/lib/std/heap/general_purpose_allocator.zig b/lib/std/heap/general_purpose_allocator.zig index 4b9a5aea66..2ae13cba0c 100644 --- a/lib/std/heap/general_purpose_allocator.zig +++ b/lib/std/heap/general_purpose_allocator.zig @@ -325,7 +325,8 @@ pub fn GeneralPurposeAllocator(comptime config: Config) type { break; } } - for (self.large_allocations.items()) |*large_alloc| { + var it = self.large_allocations.iterator(); + while (it.next()) |large_alloc| { log.err("Memory leak detected: {}", .{large_alloc.value.getStackTrace()}); leaks = true; } @@ -584,7 +585,7 @@ pub fn GeneralPurposeAllocator(comptime config: Config) type { if (new_aligned_size > largest_bucket_object_size) { try self.large_allocations.ensureCapacity( self.backing_allocator, - self.large_allocations.entries.items.len + 1, + self.large_allocations.count() + 1, ); const slice = try self.backing_allocator.allocFn(self.backing_allocator, len, ptr_align, len_align, ret_addr); diff --git a/lib/std/http/headers.zig b/lib/std/http/headers.zig index 8c80af512f..0ce642865c 100644 --- a/lib/std/http/headers.zig +++ b/lib/std/http/headers.zig @@ -123,9 +123,9 @@ pub const Headers = struct { pub fn deinit(self: *Self) void { { - for (self.index.items()) |*entry| { - const dex = &entry.value; - dex.deinit(self.allocator); + var it = self.index.iterator(); + while (it.next()) |entry| { + entry.value.deinit(self.allocator); self.allocator.free(entry.key); } self.index.deinit(self.allocator); @@ -333,7 +333,8 @@ pub const Headers = struct { fn rebuildIndex(self: *Self) void { // clear out the indexes - for (self.index.items()) |*entry| { + var it = self.index.iterator(); + while (it.next()) |entry| { entry.value.shrinkRetainingCapacity(0); } // fill up indexes again; we know capacity is fine from before diff --git a/lib/std/std.zig b/lib/std/std.zig index 2ff44f5e41..330f3c253b 100644 --- a/lib/std/std.zig +++ b/lib/std/std.zig @@ -3,11 +3,15 @@ // This file is part of [zig](https://ziglang.org/), which is MIT licensed. // The MIT license requires this copyright notice to be included in all copies // and substantial portions of the software. +pub const ArrayHashMap = array_hash_map.ArrayHashMap; +pub const ArrayHashMapUnmanaged = array_hash_map.ArrayHashMapUnmanaged; pub const ArrayList = @import("array_list.zig").ArrayList; pub const ArrayListAligned = @import("array_list.zig").ArrayListAligned; pub const ArrayListAlignedUnmanaged = @import("array_list.zig").ArrayListAlignedUnmanaged; pub const ArrayListSentineled = @import("array_list_sentineled.zig").ArrayListSentineled; pub const ArrayListUnmanaged = @import("array_list.zig").ArrayListUnmanaged; +pub const AutoArrayHashMap = array_hash_map.AutoArrayHashMap; +pub const AutoArrayHashMapUnmanaged = array_hash_map.AutoArrayHashMapUnmanaged; pub const AutoHashMap = hash_map.AutoHashMap; pub const AutoHashMapUnmanaged = hash_map.AutoHashMapUnmanaged; pub const BloomFilter = @import("bloom_filter.zig").BloomFilter; @@ -32,10 +36,13 @@ pub const SinglyLinkedList = @import("linked_list.zig").SinglyLinkedList; pub const SpinLock = @import("spinlock.zig").SpinLock; pub const StringHashMap = hash_map.StringHashMap; pub const StringHashMapUnmanaged = hash_map.StringHashMapUnmanaged; +pub const StringArrayHashMap = array_hash_map.StringArrayHashMap; +pub const StringArrayHashMapUnmanaged = array_hash_map.StringArrayHashMapUnmanaged; pub const TailQueue = @import("linked_list.zig").TailQueue; pub const Target = @import("target.zig").Target; pub const Thread = @import("thread.zig").Thread; +pub const array_hash_map = @import("array_hash_map.zig"); pub const atomic = @import("atomic.zig"); pub const base64 = @import("base64.zig"); pub const build = @import("build.zig"); diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index 72597975c9..24dcb541b4 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -36,17 +36,17 @@ bin_file_path: []const u8, /// It's rare for a decl to be exported, so we save memory by having a sparse map of /// Decl pointers to details about them being exported. /// The Export memory is owned by the `export_owners` table; the slice itself is owned by this table. -decl_exports: std.AutoHashMapUnmanaged(*Decl, []*Export) = .{}, +decl_exports: std.AutoArrayHashMapUnmanaged(*Decl, []*Export) = .{}, /// We track which export is associated with the given symbol name for quick /// detection of symbol collisions. -symbol_exports: std.StringHashMapUnmanaged(*Export) = .{}, +symbol_exports: std.StringArrayHashMapUnmanaged(*Export) = .{}, /// This models the Decls that perform exports, so that `decl_exports` can be updated when a Decl /// is modified. Note that the key of this table is not the Decl being exported, but the Decl that /// is performing the export of another Decl. /// This table owns the Export memory. -export_owners: std.AutoHashMapUnmanaged(*Decl, []*Export) = .{}, +export_owners: std.AutoArrayHashMapUnmanaged(*Decl, []*Export) = .{}, /// Maps fully qualified namespaced names to the Decl struct for them. -decl_table: std.HashMapUnmanaged(Scope.NameHash, *Decl, Scope.name_hash_hash, Scope.name_hash_eql, false) = .{}, +decl_table: std.ArrayHashMapUnmanaged(Scope.NameHash, *Decl, Scope.name_hash_hash, Scope.name_hash_eql, false) = .{}, link_error_flags: link.File.ErrorFlags = .{}, @@ -57,13 +57,13 @@ work_queue: std.fifo.LinearFifo(WorkItem, .Dynamic), /// The ErrorMsg memory is owned by the decl, using Module's allocator. /// Note that a Decl can succeed but the Fn it represents can fail. In this case, /// a Decl can have a failed_decls entry but have analysis status of success. -failed_decls: std.AutoHashMapUnmanaged(*Decl, *ErrorMsg) = .{}, +failed_decls: std.AutoArrayHashMapUnmanaged(*Decl, *ErrorMsg) = .{}, /// Using a map here for consistency with the other fields here. /// The ErrorMsg memory is owned by the `Scope`, using Module's allocator. -failed_files: std.AutoHashMapUnmanaged(*Scope, *ErrorMsg) = .{}, +failed_files: std.AutoArrayHashMapUnmanaged(*Scope, *ErrorMsg) = .{}, /// Using a map here for consistency with the other fields here. /// The ErrorMsg memory is owned by the `Export`, using Module's allocator. -failed_exports: std.AutoHashMapUnmanaged(*Export, *ErrorMsg) = .{}, +failed_exports: std.AutoArrayHashMapUnmanaged(*Export, *ErrorMsg) = .{}, /// Incrementing integer used to compare against the corresponding Decl /// field to determine whether a Decl's status applies to an ongoing update, or a @@ -201,9 +201,9 @@ pub const Decl = struct { /// typed_value may need to be regenerated. dependencies: DepsTable = .{}, - /// The reason this is not `std.AutoHashMapUnmanaged` is a workaround for + /// The reason this is not `std.AutoArrayHashMapUnmanaged` is a workaround for /// stage1 compiler giving me: `error: struct 'Module.Decl' depends on itself` - pub const DepsTable = std.HashMapUnmanaged(*Decl, void, std.hash_map.getAutoHashFn(*Decl), std.hash_map.getAutoEqlFn(*Decl), false); + pub const DepsTable = std.ArrayHashMapUnmanaged(*Decl, void, std.array_hash_map.getAutoHashFn(*Decl), std.array_hash_map.getAutoEqlFn(*Decl), false); pub fn destroy(self: *Decl, gpa: *Allocator) void { gpa.free(mem.spanZ(self.name)); @@ -933,7 +933,8 @@ pub fn deinit(self: *Module) void { self.symbol_exports.deinit(gpa); self.root_scope.destroy(gpa); - for (self.global_error_set.items()) |entry| { + var it = self.global_error_set.iterator(); + while (it.next()) |entry| { gpa.free(entry.key); } self.global_error_set.deinit(gpa); @@ -1756,7 +1757,7 @@ fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void { // Keep track of the decls that we expect to see in this file so that // we know which ones have been deleted. - var deleted_decls = std.AutoHashMap(*Decl, void).init(self.gpa); + var deleted_decls = std.AutoArrayHashMap(*Decl, void).init(self.gpa); defer deleted_decls.deinit(); try deleted_decls.ensureCapacity(root_scope.decls.items.len); for (root_scope.decls.items) |file_decl| { @@ -1877,7 +1878,7 @@ fn analyzeRootZIRModule(self: *Module, root_scope: *Scope.ZIRModule) !void { // Keep track of the decls that we expect to see in this file so that // we know which ones have been deleted. - var deleted_decls = std.AutoHashMap(*Decl, void).init(self.gpa); + var deleted_decls = std.AutoArrayHashMap(*Decl, void).init(self.gpa); defer deleted_decls.deinit(); try deleted_decls.ensureCapacity(self.decl_table.items().len); for (self.decl_table.items()) |entry| { @@ -2087,7 +2088,7 @@ pub fn getErrorValue(self: *Module, name: []const u8) !std.StringHashMapUnmanage errdefer self.global_error_set.removeAssertDiscard(name); gop.entry.key = try self.gpa.dupe(u8, name); - gop.entry.value = @intCast(u16, self.global_error_set.items().len - 1); + gop.entry.value = @intCast(u16, self.global_error_set.count() - 1); return gop.entry.*; } diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 82c06d8003..d6e3194c12 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -359,7 +359,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { }; const Branch = struct { - inst_table: std.AutoHashMapUnmanaged(*ir.Inst, MCValue) = .{}, + inst_table: std.AutoArrayHashMapUnmanaged(*ir.Inst, MCValue) = .{}, fn deinit(self: *Branch, gpa: *Allocator) void { self.inst_table.deinit(gpa); @@ -750,7 +750,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { const ptr_bits = arch.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); if (abi_size <= ptr_bytes) { - try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1); + try self.registers.ensureCapacity(self.gpa, self.registers.count() + 1); if (self.allocReg(inst)) |reg| { return MCValue{ .register = registerAlias(reg, abi_size) }; } @@ -788,7 +788,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { /// `reg_owner` is the instruction that gets associated with the register in the register table. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToNewRegister(self: *Self, reg_owner: *ir.Inst, mcv: MCValue) !MCValue { - try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1); + try self.registers.ensureCapacity(self.gpa, @intCast(u32, self.registers.count() + 1)); const reg = self.allocReg(reg_owner) orelse b: { // We'll take over the first register. Move the instruction that was previously @@ -1247,7 +1247,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (inst.base.isUnused()) return MCValue.dead; - try self.registers.ensureCapacity(self.gpa, self.registers.items().len + 1); + try self.registers.ensureCapacity(self.gpa, self.registers.count() + 1); const result = self.args[self.arg_index]; self.arg_index += 1; diff --git a/src-self-hosted/codegen/c.zig b/src-self-hosted/codegen/c.zig index 9a7a4888be..c037c55289 100644 --- a/src-self-hosted/codegen/c.zig +++ b/src-self-hosted/codegen/c.zig @@ -110,7 +110,8 @@ const Context = struct { } fn deinit(self: *Context) void { - for (self.inst_map.items()) |kv| { + var it = self.inst_map.iterator(); + while (it.next()) |kv| { self.file.base.allocator.free(kv.value); } self.inst_map.deinit(); diff --git a/src-self-hosted/link.zig b/src-self-hosted/link.zig index 7a5680dfbf..ecf3876582 100644 --- a/src-self-hosted/link.zig +++ b/src-self-hosted/link.zig @@ -47,7 +47,7 @@ pub const File = struct { }; /// For DWARF .debug_info. - pub const DbgInfoTypeRelocsTable = std.HashMapUnmanaged(Type, DbgInfoTypeReloc, Type.hash, Type.eql, true); + pub const DbgInfoTypeRelocsTable = std.HashMapUnmanaged(Type, DbgInfoTypeReloc, Type.hash, Type.eql, std.hash_map.DefaultMaxLoadPercentage); /// For DWARF .debug_info. pub const DbgInfoTypeReloc = struct { diff --git a/src-self-hosted/link/Elf.zig b/src-self-hosted/link/Elf.zig index 8bf28557b4..69f1260d20 100644 --- a/src-self-hosted/link/Elf.zig +++ b/src-self-hosted/link/Elf.zig @@ -1629,7 +1629,8 @@ pub fn updateDecl(self: *Elf, module: *Module, decl: *Module.Decl) !void { var dbg_info_type_relocs: File.DbgInfoTypeRelocsTable = .{}; defer { - for (dbg_info_type_relocs.items()) |*entry| { + var it = dbg_info_type_relocs.iterator(); + while (it.next()) |entry| { entry.value.relocs.deinit(self.base.allocator); } dbg_info_type_relocs.deinit(self.base.allocator); @@ -1917,7 +1918,8 @@ pub fn updateDecl(self: *Elf, module: *Module, decl: *Module.Decl) !void { // Now we emit the .debug_info types of the Decl. These will count towards the size of // the buffer, so we have to do it before computing the offset, and we can't perform the actual // relocations yet. - for (dbg_info_type_relocs.items()) |*entry| { + var it = dbg_info_type_relocs.iterator(); + while (it.next()) |entry| { entry.value.off = @intCast(u32, dbg_info_buffer.items.len); try self.addDbgInfoType(entry.key, &dbg_info_buffer); } @@ -1925,7 +1927,8 @@ pub fn updateDecl(self: *Elf, module: *Module, decl: *Module.Decl) !void { try self.updateDeclDebugInfoAllocation(text_block, @intCast(u32, dbg_info_buffer.items.len)); // Now that we have the offset assigned we can finally perform type relocations. - for (dbg_info_type_relocs.items()) |entry| { + it = dbg_info_type_relocs.iterator(); + while (it.next()) |entry| { for (entry.value.relocs.items) |off| { mem.writeInt( u32, diff --git a/src-self-hosted/liveness.zig b/src-self-hosted/liveness.zig index 6ea949d8af..d528e09ce7 100644 --- a/src-self-hosted/liveness.zig +++ b/src-self-hosted/liveness.zig @@ -15,7 +15,7 @@ pub fn analyze( var table = std.AutoHashMap(*ir.Inst, void).init(gpa); defer table.deinit(); - try table.ensureCapacity(body.instructions.len); + try table.ensureCapacity(@intCast(u32, body.instructions.len)); try analyzeWithTable(arena, &table, null, body); } @@ -84,8 +84,11 @@ fn analyzeInst( try analyzeWithTable(arena, table, &then_table, inst.then_body); // Reset the table back to its state from before the branch. - for (then_table.items()) |entry| { - table.removeAssertDiscard(entry.key); + { + var it = then_table.iterator(); + while (it.next()) |entry| { + table.removeAssertDiscard(entry.key); + } } var else_table = std.AutoHashMap(*ir.Inst, void).init(table.allocator); @@ -97,28 +100,36 @@ fn analyzeInst( var else_entry_deaths = std.ArrayList(*ir.Inst).init(table.allocator); defer else_entry_deaths.deinit(); - for (else_table.items()) |entry| { - const else_death = entry.key; - if (!then_table.contains(else_death)) { - try then_entry_deaths.append(else_death); + { + var it = else_table.iterator(); + while (it.next()) |entry| { + const else_death = entry.key; + if (!then_table.contains(else_death)) { + try then_entry_deaths.append(else_death); + } } } // This loop is the same, except it's for the then branch, and it additionally // has to put its items back into the table to undo the reset. - for (then_table.items()) |entry| { - const then_death = entry.key; - if (!else_table.contains(then_death)) { - try else_entry_deaths.append(then_death); + { + var it = then_table.iterator(); + while (it.next()) |entry| { + const then_death = entry.key; + if (!else_table.contains(then_death)) { + try else_entry_deaths.append(then_death); + } + _ = try table.put(then_death, {}); } - _ = try table.put(then_death, {}); } // Now we have to correctly populate new_set. if (new_set) |ns| { - try ns.ensureCapacity(ns.items().len + then_table.items().len + else_table.items().len); - for (then_table.items()) |entry| { + try ns.ensureCapacity(@intCast(u32, ns.count() + then_table.count() + else_table.count())); + var it = then_table.iterator(); + while (it.next()) |entry| { _ = ns.putAssumeCapacity(entry.key, {}); } - for (else_table.items()) |entry| { + it = else_table.iterator(); + while (it.next()) |entry| { _ = ns.putAssumeCapacity(entry.key, {}); } } diff --git a/src-self-hosted/translate_c.zig b/src-self-hosted/translate_c.zig index b333d24c51..68d1dabb0e 100644 --- a/src-self-hosted/translate_c.zig +++ b/src-self-hosted/translate_c.zig @@ -19,23 +19,9 @@ pub const Error = error{OutOfMemory}; const TypeError = Error || error{UnsupportedType}; const TransError = TypeError || error{UnsupportedTranslation}; -const DeclTable = std.HashMap(usize, []const u8, addrHash, addrEql, false); +const DeclTable = std.AutoArrayHashMap(usize, []const u8); -fn addrHash(x: usize) u32 { - switch (@typeInfo(usize).Int.bits) { - 32 => return x, - // pointers are usually aligned so we ignore the bits that are probably all 0 anyway - // usually the larger bits of addr space are unused so we just chop em off - 64 => return @truncate(u32, x >> 4), - else => @compileError("unreachable"), - } -} - -fn addrEql(a: usize, b: usize) bool { - return a == b; -} - -const SymbolTable = std.StringHashMap(*ast.Node); +const SymbolTable = std.StringArrayHashMap(*ast.Node); const AliasList = std.ArrayList(struct { alias: []const u8, name: []const u8, @@ -285,7 +271,7 @@ pub const Context = struct { /// a list of names that we found by visiting all the top level decls without /// translating them. The other maps are updated as we translate; this one is updated /// up front in a pre-processing step. - global_names: std.StringHashMap(void), + global_names: std.StringArrayHashMap(void), fn getMangle(c: *Context) u32 { c.mangle_count += 1; @@ -380,7 +366,7 @@ pub fn translate( .alias_list = AliasList.init(gpa), .global_scope = try arena.allocator.create(Scope.Root), .clang_context = ZigClangASTUnit_getASTContext(ast_unit).?, - .global_names = std.StringHashMap(void).init(gpa), + .global_names = std.StringArrayHashMap(void).init(gpa), .token_ids = .{}, .token_locs = .{}, .errors = .{}, @@ -6424,7 +6410,8 @@ fn getFnProto(c: *Context, ref: *ast.Node) ?*ast.Node.FnProto { } fn addMacros(c: *Context) !void { - for (c.global_scope.macro_table.items()) |kv| { + var it = c.global_scope.macro_table.iterator(); + while (it.next()) |kv| { if (getFnProto(c, kv.value)) |proto_node| { // If a macro aliases a global variable which is a function pointer, we conclude that // the macro is intended to represent a function that assumes the function pointer diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index 13024f34de..a9a1acf44b 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -238,7 +238,7 @@ pub const Type = extern union { } } - pub fn hash(self: Type) u32 { + pub fn hash(self: Type) u64 { var hasher = std.hash.Wyhash.init(0); const zig_type_tag = self.zigTypeTag(); std.hash.autoHash(&hasher, zig_type_tag); @@ -303,7 +303,7 @@ pub const Type = extern union { // TODO implement more type hashing }, } - return @truncate(u32, hasher.final()); + return hasher.final(); } pub fn copy(self: Type, allocator: *Allocator) error{OutOfMemory}!Type { diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index 6a39371ebe..bfd205f4d9 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -358,7 +358,8 @@ pub const Value = extern union { .error_set => { const error_set = val.cast(Payload.ErrorSet).?; try out_stream.writeAll("error{"); - for (error_set.fields.items()) |entry| { + var it = error_set.fields.iterator(); + while (it.next()) |entry| { try out_stream.print("{},", .{entry.value}); } return out_stream.writeAll("}"); diff --git a/src-self-hosted/zir.zig b/src-self-hosted/zir.zig index 8915cb0f90..04d3393626 100644 --- a/src-self-hosted/zir.zig +++ b/src-self-hosted/zir.zig @@ -1049,7 +1049,7 @@ pub const Module = struct { defer write.loop_table.deinit(); // First, build a map of *Inst to @ or % indexes - try write.inst_table.ensureCapacity(self.decls.len); + try write.inst_table.ensureCapacity(@intCast(u32, self.decls.len)); for (self.decls) |decl, decl_i| { try write.inst_table.putNoClobber(decl.inst, .{ .inst = decl.inst, .index = null, .name = decl.name }); @@ -1685,7 +1685,7 @@ pub fn emit(allocator: *Allocator, old_module: IrModule) !Module { .arena = std.heap.ArenaAllocator.init(allocator), .old_module = &old_module, .next_auto_name = 0, - .names = std.StringHashMap(void).init(allocator), + .names = std.StringArrayHashMap(void).init(allocator), .primitive_table = std.AutoHashMap(Inst.Primitive.Builtin, *Decl).init(allocator), .indent = 0, .block_table = std.AutoHashMap(*ir.Inst.Block, *Inst.Block).init(allocator), @@ -1758,7 +1758,7 @@ const EmitZIR = struct { arena: std.heap.ArenaAllocator, old_module: *const IrModule, decls: std.ArrayListUnmanaged(*Decl), - names: std.StringHashMap(void), + names: std.StringArrayHashMap(void), next_auto_name: usize, primitive_table: std.AutoHashMap(Inst.Primitive.Builtin, *Decl), indent: usize, diff --git a/src-self-hosted/zir_sema.zig b/src-self-hosted/zir_sema.zig index b4dafac1da..8a61178a68 100644 --- a/src-self-hosted/zir_sema.zig +++ b/src-self-hosted/zir_sema.zig @@ -812,7 +812,7 @@ fn analyzeInstErrorSet(mod: *Module, scope: *Scope, inst: *zir.Inst.ErrorSet) In .fields = .{}, .decl = undefined, // populated below }; - try payload.fields.ensureCapacity(&new_decl_arena.allocator, inst.positionals.fields.len); + try payload.fields.ensureCapacity(&new_decl_arena.allocator, @intCast(u32, inst.positionals.fields.len)); for (inst.positionals.fields) |field_name| { const entry = try mod.getErrorValue(field_name); -- cgit v1.2.3 From 9a59cdcd41f5a05d70a02d89178afaf8789791c6 Mon Sep 17 00:00:00 2001 From: Vexu Date: Thu, 27 Aug 2020 23:07:39 +0300 Subject: stage2: various small type fixes --- src-self-hosted/Module.zig | 6 ++++++ src-self-hosted/type.zig | 10 +++++----- src-self-hosted/value.zig | 6 +++--- src-self-hosted/zir_sema.zig | 6 ++++++ 4 files changed, 20 insertions(+), 8 deletions(-) (limited to 'src-self-hosted/Module.zig') diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index 24dcb541b4..c4b0f70d5c 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -2801,6 +2801,12 @@ pub fn resolvePeerTypes(self: *Module, scope: *Scope, instructions: []*Inst) !Ty prev_inst = next_inst; continue; } + if (next_inst.ty.zigTypeTag() == .Undefined) + continue; + if (prev_inst.ty.zigTypeTag() == .Undefined) { + prev_inst = next_inst; + continue; + } if (prev_inst.ty.isInt() and next_inst.ty.isInt() and prev_inst.ty.isSignedInt() == next_inst.ty.isSignedInt()) diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index a9a1acf44b..66a7961073 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -163,7 +163,7 @@ pub const Type = extern union { // Hot path for common case: if (a.castPointer()) |a_payload| { if (b.castPointer()) |b_payload| { - return eql(a_payload.pointee_type, b_payload.pointee_type); + return a.tag() == b.tag() and eql(a_payload.pointee_type, b_payload.pointee_type); } } const is_slice_a = isSlice(a); @@ -189,7 +189,7 @@ pub const Type = extern union { .Array => { if (a.arrayLen() != b.arrayLen()) return false; - if (a.elemType().eql(b.elemType())) + if (!a.elemType().eql(b.elemType())) return false; const sentinel_a = a.arraySentinel(); const sentinel_b = b.arraySentinel(); @@ -501,9 +501,9 @@ pub const Type = extern union { .noreturn, => return out_stream.writeAll(@tagName(t)), - .enum_literal => return out_stream.writeAll("@TypeOf(.EnumLiteral)"), - .@"null" => return out_stream.writeAll("@TypeOf(null)"), - .@"undefined" => return out_stream.writeAll("@TypeOf(undefined)"), + .enum_literal => return out_stream.writeAll("@Type(.EnumLiteral)"), + .@"null" => return out_stream.writeAll("@Type(.Null)"), + .@"undefined" => return out_stream.writeAll("@Type(.Undefined)"), .@"anyframe" => return out_stream.writeAll("anyframe"), .anyerror_void_error_union => return out_stream.writeAll("anyerror!void"), diff --git a/src-self-hosted/value.zig b/src-self-hosted/value.zig index bfd205f4d9..b65aa06bea 100644 --- a/src-self-hosted/value.zig +++ b/src-self-hosted/value.zig @@ -301,15 +301,15 @@ pub const Value = extern union { .comptime_int_type => return out_stream.writeAll("comptime_int"), .comptime_float_type => return out_stream.writeAll("comptime_float"), .noreturn_type => return out_stream.writeAll("noreturn"), - .null_type => return out_stream.writeAll("@TypeOf(null)"), - .undefined_type => return out_stream.writeAll("@TypeOf(undefined)"), + .null_type => return out_stream.writeAll("@Type(.Null)"), + .undefined_type => return out_stream.writeAll("@Type(.Undefined)"), .fn_noreturn_no_args_type => return out_stream.writeAll("fn() noreturn"), .fn_void_no_args_type => return out_stream.writeAll("fn() void"), .fn_naked_noreturn_no_args_type => return out_stream.writeAll("fn() callconv(.Naked) noreturn"), .fn_ccc_void_no_args_type => return out_stream.writeAll("fn() callconv(.C) void"), .single_const_pointer_to_comptime_int_type => return out_stream.writeAll("*const comptime_int"), .const_slice_u8_type => return out_stream.writeAll("[]const u8"), - .enum_literal_type => return out_stream.writeAll("@TypeOf(.EnumLiteral)"), + .enum_literal_type => return out_stream.writeAll("@Type(.EnumLiteral)"), .anyframe_type => return out_stream.writeAll("anyframe"), .null_value => return out_stream.writeAll("null"), diff --git a/src-self-hosted/zir_sema.zig b/src-self-hosted/zir_sema.zig index 676b662077..88a130c1db 100644 --- a/src-self-hosted/zir_sema.zig +++ b/src-self-hosted/zir_sema.zig @@ -1239,6 +1239,12 @@ fn analyzeInstArithmetic(mod: *Module, scope: *Scope, inst: *zir.Inst.BinOp) Inn if (casted_lhs.value()) |lhs_val| { if (casted_rhs.value()) |rhs_val| { + if (lhs_val.isUndef() or rhs_val.isUndef()) { + return mod.constInst(scope, inst.base.src, .{ + .ty = resolved_type, + .val = Value.initTag(.undef), + }); + } return analyzeInstComptimeOp(mod, scope, scalar_type, inst, lhs_val, rhs_val); } } -- cgit v1.2.3 From 6ab0ac161e02c2361b72d124423509556b9332fa Mon Sep 17 00:00:00 2001 From: Vexu Date: Fri, 28 Aug 2020 15:51:27 +0300 Subject: stage2: slice return type analysis --- src-self-hosted/Module.zig | 66 ++++++++++++++++++++++++++ src-self-hosted/codegen.zig | 2 +- src-self-hosted/codegen/c.zig | 2 +- src-self-hosted/type.zig | 108 ++++++++++++++++++++++++++++++++++++------ src-self-hosted/zir.zig | 2 +- src-self-hosted/zir_sema.zig | 12 ++++- 6 files changed, 172 insertions(+), 20 deletions(-) (limited to 'src-self-hosted/Module.zig') diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index c4b0f70d5c..93509c6674 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -2591,6 +2591,72 @@ pub fn analyzeIsErr(self: *Module, scope: *Scope, src: usize, operand: *Inst) In return self.fail(scope, src, "TODO implement analysis of iserr", .{}); } +pub fn analyzeSlice(self: *Module, scope: *Scope, src: usize, array_ptr: *Inst, start: *Inst, end_opt: ?*Inst, sentinel_opt: ?*Inst) InnerError!*Inst { + const ptr_child = switch (array_ptr.ty.zigTypeTag()) { + .Pointer => array_ptr.ty.elemType(), + else => return self.fail(scope, src, "expected pointer, found '{}'", .{array_ptr.ty}), + }; + + var array_type = ptr_child; + const elem_type = switch (ptr_child.zigTypeTag()) { + .Array => ptr_child.elemType(), + .Pointer => blk: { + if (ptr_child.isSinglePointer()) { + if (ptr_child.elemType().zigTypeTag() == .Array) { + array_type = ptr_child.elemType(); + break :blk ptr_child.elemType().elemType(); + } + + return self.fail(scope, src, "slice of single-item pointer", .{}); + } + break :blk ptr_child.elemType(); + }, + else => return self.fail(scope, src, "slice of non-array type '{}'", .{ptr_child}), + }; + + const slice_sentinel = if (sentinel_opt) |sentinel| blk: { + const casted = try self.coerce(scope, elem_type, sentinel); + break :blk try self.resolveConstValue(scope, casted); + } else null; + + var return_ptr_size: std.builtin.TypeInfo.Pointer.Size = .Slice; + var return_elem_type = elem_type; + if (end_opt) |end| { + if (end.value()) |end_val| { + if (start.value()) |start_val| { + const start_u64 = start_val.toUnsignedInt(); + const end_u64 = end_val.toUnsignedInt(); + if (start_u64 > end_u64) { + return self.fail(scope, src, "out of bounds slice", .{}); + } + + const len = end_u64 - start_u64; + const array_sentinel = if (array_type.zigTypeTag() == .Array and end_u64 == array_type.arrayLen()) + array_type.sentinel() + else + slice_sentinel; + return_elem_type = try self.arrayType(scope, len, array_sentinel, elem_type); + return_ptr_size = .One; + } + } + } + const return_type = try self.ptrType( + scope, + src, + return_elem_type, + if (end_opt == null) slice_sentinel else null, + 0, // TODO alignment + 0, + 0, + !ptr_child.isConstPtr(), + ptr_child.isAllowzeroPtr(), + ptr_child.isVolatilePtr(), + return_ptr_size, + ); + + return self.fail(scope, src, "TODO implement analysis of slice", .{}); +} + /// Asserts that lhs and rhs types are both numeric. pub fn cmpNumeric( self: *Module, diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index d6e3194c12..6f08c7a689 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -132,7 +132,7 @@ pub fn generateSymbol( .Array => { // TODO populate .debug_info for the array if (typed_value.val.cast(Value.Payload.Bytes)) |payload| { - if (typed_value.ty.arraySentinel()) |sentinel| { + if (typed_value.ty.sentinel()) |sentinel| { try code.ensureCapacity(code.items.len + payload.data.len + 1); code.appendSliceAssumeCapacity(payload.data); const prev_len = code.items.len; diff --git a/src-self-hosted/codegen/c.zig b/src-self-hosted/codegen/c.zig index c037c55289..34ddcfbb3b 100644 --- a/src-self-hosted/codegen/c.zig +++ b/src-self-hosted/codegen/c.zig @@ -85,7 +85,7 @@ fn genArray(file: *C, decl: *Decl) !void { const name = try map(file.base.allocator, mem.span(decl.name)); defer file.base.allocator.free(name); if (tv.val.cast(Value.Payload.Bytes)) |payload| - if (tv.ty.arraySentinel()) |sentinel| + if (tv.ty.sentinel()) |sentinel| if (sentinel.toUnsignedInt() == 0) try file.constants.writer().print("const char *const {} = \"{}\";\n", .{ name, payload.data }) else diff --git a/src-self-hosted/type.zig b/src-self-hosted/type.zig index 66a7961073..4966395512 100644 --- a/src-self-hosted/type.zig +++ b/src-self-hosted/type.zig @@ -191,8 +191,8 @@ pub const Type = extern union { return false; if (!a.elemType().eql(b.elemType())) return false; - const sentinel_a = a.arraySentinel(); - const sentinel_b = b.arraySentinel(); + const sentinel_a = a.sentinel(); + const sentinel_b = b.sentinel(); if (sentinel_a) |sa| { if (sentinel_b) |sb| { return sa.eql(sb); @@ -630,8 +630,8 @@ pub const Type = extern union { const payload = @fieldParentPtr(Payload.Pointer, "base", ty.ptr_otherwise); if (payload.sentinel) |some| switch (payload.size) { .One, .C => unreachable, - .Many => try out_stream.writeAll("[*:{}]"), - .Slice => try out_stream.writeAll("[:{}]"), + .Many => try out_stream.print("[*:{}]", .{some}), + .Slice => try out_stream.print("[:{}]", .{some}), } else switch (payload.size) { .One => try out_stream.writeAll("*"), .Many => try out_stream.writeAll("[*]"), @@ -1341,6 +1341,81 @@ pub const Type = extern union { }; } + pub fn isAllowzeroPtr(self: Type) bool { + return switch (self.tag()) { + .u8, + .i8, + .u16, + .i16, + .u32, + .i32, + .u64, + .i64, + .usize, + .isize, + .c_short, + .c_ushort, + .c_int, + .c_uint, + .c_long, + .c_ulong, + .c_longlong, + .c_ulonglong, + .c_longdouble, + .f16, + .f32, + .f64, + .f128, + .c_void, + .bool, + .void, + .type, + .anyerror, + .comptime_int, + .comptime_float, + .noreturn, + .@"null", + .@"undefined", + .array, + .array_sentinel, + .array_u8, + .array_u8_sentinel_0, + .fn_noreturn_no_args, + .fn_void_no_args, + .fn_naked_noreturn_no_args, + .fn_ccc_void_no_args, + .function, + .int_unsigned, + .int_signed, + .single_mut_pointer, + .single_const_pointer, + .many_const_pointer, + .many_mut_pointer, + .c_const_pointer, + .c_mut_pointer, + .const_slice, + .mut_slice, + .single_const_pointer_to_comptime_int, + .const_slice_u8, + .optional, + .optional_single_mut_pointer, + .optional_single_const_pointer, + .enum_literal, + .error_union, + .@"anyframe", + .anyframe_T, + .anyerror_void_error_union, + .error_set, + .error_set_single, + => false, + + .pointer => { + const payload = @fieldParentPtr(Payload.Pointer, "base", self.ptr_otherwise); + return payload.@"allowzero"; + }, + }; + } + /// Asserts that the type is an optional pub fn isPtrLikeOptional(self: Type) bool { switch (self.tag()) { @@ -1585,8 +1660,8 @@ pub const Type = extern union { }; } - /// Asserts the type is an array or vector. - pub fn arraySentinel(self: Type) ?Value { + /// Asserts the type is an array, pointer or vector. + pub fn sentinel(self: Type) ?Value { return switch (self.tag()) { .u8, .i8, @@ -1626,16 +1701,8 @@ pub const Type = extern union { .fn_naked_noreturn_no_args, .fn_ccc_void_no_args, .function, - .pointer, - .single_const_pointer, - .single_mut_pointer, - .many_const_pointer, - .many_mut_pointer, - .c_const_pointer, - .c_mut_pointer, .const_slice, .mut_slice, - .single_const_pointer_to_comptime_int, .const_slice_u8, .int_unsigned, .int_signed, @@ -1651,7 +1718,18 @@ pub const Type = extern union { .error_set_single, => unreachable, - .array, .array_u8 => return null, + .single_const_pointer, + .single_mut_pointer, + .many_const_pointer, + .many_mut_pointer, + .c_const_pointer, + .c_mut_pointer, + .single_const_pointer_to_comptime_int, + .array, + .array_u8, + => return null, + + .pointer => return self.cast(Payload.Pointer).?.sentinel, .array_sentinel => return self.cast(Payload.ArraySentinel).?.sentinel, .array_u8_sentinel_0 => return Value.initTag(.zero), }; diff --git a/src-self-hosted/zir.zig b/src-self-hosted/zir.zig index 9d0a5b825e..b6d7fab4c5 100644 --- a/src-self-hosted/zir.zig +++ b/src-self-hosted/zir.zig @@ -2596,7 +2596,7 @@ const EmitZIR = struct { var len_pl = Value.Payload.Int_u64{ .int = ty.arrayLen() }; const len = Value.initPayload(&len_pl.base); - const inst = if (ty.arraySentinel()) |sentinel| blk: { + const inst = if (ty.sentinel()) |sentinel| blk: { const inst = try self.arena.allocator.create(Inst.ArrayTypeSentinel); inst.* = .{ .base = .{ diff --git a/src-self-hosted/zir_sema.zig b/src-self-hosted/zir_sema.zig index 012bc63581..c99da39c04 100644 --- a/src-self-hosted/zir_sema.zig +++ b/src-self-hosted/zir_sema.zig @@ -1175,11 +1175,19 @@ fn analyzeInstElemPtr(mod: *Module, scope: *Scope, inst: *zir.Inst.ElemPtr) Inne } fn analyzeInstSlice(mod: *Module, scope: *Scope, inst: *zir.Inst.Slice) InnerError!*Inst { - return mod.fail(scope, inst.base.src, "TODO implement analyzeInstSlice", .{}); + const array_ptr = try resolveInst(mod, scope, inst.positionals.array_ptr); + const start = try resolveInst(mod, scope, inst.positionals.start); + const end = if (inst.kw_args.end) |end| try resolveInst(mod, scope, end) else null; + const sentinel = if (inst.kw_args.sentinel) |sentinel| try resolveInst(mod, scope, sentinel) else null; + + return mod.analyzeSlice(scope, inst.base.src, array_ptr, start, end, sentinel); } fn analyzeInstSliceStart(mod: *Module, scope: *Scope, inst: *zir.Inst.BinOp) InnerError!*Inst { - return mod.fail(scope, inst.base.src, "TODO implement analyzeInstSliceStart", .{}); + const array_ptr = try resolveInst(mod, scope, inst.positionals.lhs); + const start = try resolveInst(mod, scope, inst.positionals.rhs); + + return mod.analyzeSlice(scope, inst.base.src, array_ptr, start, null, null); } fn analyzeInstShl(mod: *Module, scope: *Scope, inst: *zir.Inst.BinOp) InnerError!*Inst { -- cgit v1.2.3 From 6f0126e9573a6bde9cbe5b113208e0a515b2eee7 Mon Sep 17 00:00:00 2001 From: Vexu Date: Thu, 3 Sep 2020 14:58:47 +0300 Subject: stage2: split Scope.Container from Scope.File --- src-self-hosted/Module.zig | 142 +++++++++++++++++++++++++++---------------- src-self-hosted/codegen.zig | 4 +- src-self-hosted/link/Elf.zig | 8 +-- 3 files changed, 96 insertions(+), 58 deletions(-) (limited to 'src-self-hosted/Module.zig') diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index 93509c6674..8d7a4d7b36 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -125,7 +125,7 @@ pub const Decl = struct { /// mapping them to an address in the output file. /// Memory owned by this decl, using Module's allocator. name: [*:0]const u8, - /// The direct parent container of the Decl. This is either a `Scope.File` or `Scope.ZIRModule`. + /// The direct parent container of the Decl. This is either a `Scope.Container` or `Scope.ZIRModule`. /// Reference to externally owned memory. scope: *Scope, /// The AST Node decl index or ZIR Inst index that contains this declaration. @@ -217,9 +217,10 @@ pub const Decl = struct { pub fn src(self: Decl) usize { switch (self.scope.tag) { - .file => { - const file = @fieldParentPtr(Scope.File, "base", self.scope); - const tree = file.contents.tree; + .container => { + const container = @fieldParentPtr(Scope.Container, "base", self.scope); + const tree = container.file_scope.contents.tree; + // TODO Container should have it's own decls() const decl_node = tree.root_node.decls()[self.src_index]; return tree.token_locs[decl_node.firstToken()].start; }, @@ -229,6 +230,7 @@ pub const Decl = struct { const src_decl = module.decls[self.src_index]; return src_decl.inst.src; }, + .file, .block => unreachable, .gen_zir => unreachable, .local_val => unreachable, @@ -359,6 +361,7 @@ pub const Scope = struct { .local_ptr => return self.cast(LocalPtr).?.gen_zir.arena, .zir_module => return &self.cast(ZIRModule).?.contents.module.arena.allocator, .file => unreachable, + .container => unreachable, } } @@ -368,15 +371,16 @@ pub const Scope = struct { return switch (self.tag) { .block => self.cast(Block).?.decl, .gen_zir => self.cast(GenZIR).?.decl, - .local_val => return self.cast(LocalVal).?.gen_zir.decl, - .local_ptr => return self.cast(LocalPtr).?.gen_zir.decl, + .local_val => self.cast(LocalVal).?.gen_zir.decl, + .local_ptr => self.cast(LocalPtr).?.gen_zir.decl, .decl => self.cast(DeclAnalysis).?.decl, .zir_module => null, .file => null, + .container => null, }; } - /// Asserts the scope has a parent which is a ZIRModule or File and + /// Asserts the scope has a parent which is a ZIRModule or Container and /// returns it. pub fn namespace(self: *Scope) *Scope { switch (self.tag) { @@ -385,7 +389,8 @@ pub const Scope = struct { .local_val => return self.cast(LocalVal).?.gen_zir.decl.scope, .local_ptr => return self.cast(LocalPtr).?.gen_zir.decl.scope, .decl => return self.cast(DeclAnalysis).?.decl.scope, - .zir_module, .file => return self, + .file => return &self.cast(File).?.root_container.base, + .zir_module, .container => return self, } } @@ -399,8 +404,9 @@ pub const Scope = struct { .local_val => unreachable, .local_ptr => unreachable, .decl => unreachable, + .file => unreachable, .zir_module => return self.cast(ZIRModule).?.fullyQualifiedNameHash(name), - .file => return self.cast(File).?.fullyQualifiedNameHash(name), + .container => return self.cast(Container).?.fullyQualifiedNameHash(name), } } @@ -409,11 +415,12 @@ pub const Scope = struct { switch (self.tag) { .file => return self.cast(File).?.contents.tree, .zir_module => unreachable, - .decl => return self.cast(DeclAnalysis).?.decl.scope.cast(File).?.contents.tree, - .block => return self.cast(Block).?.decl.scope.cast(File).?.contents.tree, - .gen_zir => return self.cast(GenZIR).?.decl.scope.cast(File).?.contents.tree, - .local_val => return self.cast(LocalVal).?.gen_zir.decl.scope.cast(File).?.contents.tree, - .local_ptr => return self.cast(LocalPtr).?.gen_zir.decl.scope.cast(File).?.contents.tree, + .decl => return self.cast(DeclAnalysis).?.decl.scope.cast(Container).?.file_scope.contents.tree, + .block => return self.cast(Block).?.decl.scope.cast(Container).?.file_scope.contents.tree, + .gen_zir => return self.cast(GenZIR).?.decl.scope.cast(Container).?.file_scope.contents.tree, + .local_val => return self.cast(LocalVal).?.gen_zir.decl.scope.cast(Container).?.file_scope.contents.tree, + .local_ptr => return self.cast(LocalPtr).?.gen_zir.decl.scope.cast(Container).?.file_scope.contents.tree, + .container => return self.cast(Container).?.file_scope.contents.tree, } } @@ -427,13 +434,15 @@ pub const Scope = struct { .decl => unreachable, .zir_module => unreachable, .file => unreachable, + .container => unreachable, }; } - /// Asserts the scope has a parent which is a ZIRModule or File and + /// Asserts the scope has a parent which is a ZIRModule, Contaienr or File and /// returns the sub_file_path field. pub fn subFilePath(base: *Scope) []const u8 { switch (base.tag) { + .container => return @fieldParentPtr(Container, "base", base).file_scope.sub_file_path, .file => return @fieldParentPtr(File, "base", base).sub_file_path, .zir_module => return @fieldParentPtr(ZIRModule, "base", base).sub_file_path, .block => unreachable, @@ -453,11 +462,13 @@ pub const Scope = struct { .local_val => unreachable, .local_ptr => unreachable, .decl => unreachable, + .container => unreachable, } } pub fn getSource(base: *Scope, module: *Module) ![:0]const u8 { switch (base.tag) { + .container => return @fieldParentPtr(Container, "base", base).file_scope.getSource(module), .file => return @fieldParentPtr(File, "base", base).getSource(module), .zir_module => return @fieldParentPtr(ZIRModule, "base", base).getSource(module), .gen_zir => unreachable, @@ -471,8 +482,9 @@ pub const Scope = struct { /// Asserts the scope is a namespace Scope and removes the Decl from the namespace. pub fn removeDecl(base: *Scope, child: *Decl) void { switch (base.tag) { - .file => return @fieldParentPtr(File, "base", base).removeDecl(child), + .container => return @fieldParentPtr(Container, "base", base).removeDecl(child), .zir_module => return @fieldParentPtr(ZIRModule, "base", base).removeDecl(child), + .file => unreachable, .block => unreachable, .gen_zir => unreachable, .local_val => unreachable, @@ -499,6 +511,7 @@ pub const Scope = struct { .local_val => unreachable, .local_ptr => unreachable, .decl => unreachable, + .container => unreachable, } } @@ -515,6 +528,8 @@ pub const Scope = struct { zir_module, /// .zig source code. file, + /// struct, enum or union, every .file contains one of these. + container, block, decl, gen_zir, @@ -522,6 +537,38 @@ pub const Scope = struct { local_ptr, }; + pub const Container = struct { + pub const base_tag: Tag = .container; + base: Scope = Scope{ .tag = base_tag }, + + file_scope: *Scope.File, + + /// Direct children of the file. + decls: ArrayListUnmanaged(*Decl), + + // TODO implement container types and put this in a status union + // ty: Type + + pub fn deinit(self: *Container, gpa: *Allocator) void { + self.decls.deinit(gpa); + self.* = undefined; + } + + pub fn removeDecl(self: *Container, child: *Decl) void { + for (self.decls.items) |item, i| { + if (item == child) { + _ = self.decls.swapRemove(i); + return; + } + } + } + + pub fn fullyQualifiedNameHash(self: *Container, name: []const u8) NameHash { + // TODO container scope qualified names. + return std.zig.hashSrc(name); + } + }; + pub const File = struct { pub const base_tag: Tag = .file; base: Scope = Scope{ .tag = base_tag }, @@ -544,8 +591,7 @@ pub const Scope = struct { loaded_success, }, - /// Direct children of the file. - decls: ArrayListUnmanaged(*Decl), + root_container: Container, pub fn unload(self: *File, gpa: *Allocator) void { switch (self.status) { @@ -569,20 +615,11 @@ pub const Scope = struct { } pub fn deinit(self: *File, gpa: *Allocator) void { - self.decls.deinit(gpa); + self.root_container.deinit(gpa); self.unload(gpa); self.* = undefined; } - pub fn removeDecl(self: *File, child: *Decl) void { - for (self.decls.items) |item, i| { - if (item == child) { - _ = self.decls.swapRemove(i); - return; - } - } - } - pub fn dumpSrc(self: *File, src: usize) void { const loc = std.zig.findLineColumn(self.source.bytes, src); std.debug.print("{}:{}:{}\n", .{ self.sub_file_path, loc.line + 1, loc.column + 1 }); @@ -604,11 +641,6 @@ pub const Scope = struct { .bytes => |bytes| return bytes, } } - - pub fn fullyQualifiedNameHash(self: *File, name: []const u8) NameHash { - // We don't have struct scopes yet so this is currently just a simple name hash. - return std.zig.hashSrc(name); - } }; pub const ZIRModule = struct { @@ -861,7 +893,10 @@ pub fn init(gpa: *Allocator, options: InitOptions) !Module { .source = .{ .unloaded = {} }, .contents = .{ .not_available = {} }, .status = .never_loaded, - .decls = .{}, + .root_container = .{ + .file_scope = root_scope, + .decls = .{}, + }, }; break :blk &root_scope.base; } else if (mem.endsWith(u8, options.root_pkg.root_src_path, ".zir")) { @@ -969,7 +1004,7 @@ pub fn update(self: *Module) !void { // to force a refresh we unload now. if (self.root_scope.cast(Scope.File)) |zig_file| { zig_file.unload(self.gpa); - self.analyzeRootSrcFile(zig_file) catch |err| switch (err) { + self.analyzeContainer(&zig_file.root_container) catch |err| switch (err) { error.AnalysisFail => { assert(self.totalErrorCount() != 0); }, @@ -1237,8 +1272,8 @@ fn astGenAndAnalyzeDecl(self: *Module, decl: *Decl) !bool { const tracy = trace(@src()); defer tracy.end(); - const file_scope = decl.scope.cast(Scope.File).?; - const tree = try self.getAstTree(file_scope); + const container_scope = decl.scope.cast(Scope.Container).?; + const tree = try self.getAstTree(container_scope); const ast_node = tree.root_node.decls()[decl.src_index]; switch (ast_node.tag) { .FnProto => { @@ -1698,10 +1733,12 @@ fn getSrcModule(self: *Module, root_scope: *Scope.ZIRModule) !*zir.Module { } } -fn getAstTree(self: *Module, root_scope: *Scope.File) !*ast.Tree { +fn getAstTree(self: *Module, container_scope: *Scope.Container) !*ast.Tree { const tracy = trace(@src()); defer tracy.end(); + const root_scope = container_scope.file_scope; + switch (root_scope.status) { .never_loaded, .unloaded_success => { try self.failed_files.ensureCapacity(self.gpa, self.failed_files.items().len + 1); @@ -1743,24 +1780,24 @@ fn getAstTree(self: *Module, root_scope: *Scope.File) !*ast.Tree { } } -fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void { +fn analyzeContainer(self: *Module, container_scope: *Scope.Container) !void { const tracy = trace(@src()); defer tracy.end(); // We may be analyzing it for the first time, or this may be // an incremental update. This code handles both cases. - const tree = try self.getAstTree(root_scope); + const tree = try self.getAstTree(container_scope); const decls = tree.root_node.decls(); try self.work_queue.ensureUnusedCapacity(decls.len); - try root_scope.decls.ensureCapacity(self.gpa, decls.len); + try container_scope.decls.ensureCapacity(self.gpa, decls.len); // Keep track of the decls that we expect to see in this file so that // we know which ones have been deleted. var deleted_decls = std.AutoArrayHashMap(*Decl, void).init(self.gpa); defer deleted_decls.deinit(); - try deleted_decls.ensureCapacity(root_scope.decls.items.len); - for (root_scope.decls.items) |file_decl| { + try deleted_decls.ensureCapacity(container_scope.decls.items.len); + for (container_scope.decls.items) |file_decl| { deleted_decls.putAssumeCapacityNoClobber(file_decl, {}); } @@ -1773,7 +1810,7 @@ fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void { const name_loc = tree.token_locs[name_tok]; const name = tree.tokenSliceLoc(name_loc); - const name_hash = root_scope.fullyQualifiedNameHash(name); + const name_hash = container_scope.fullyQualifiedNameHash(name); const contents_hash = std.zig.hashSrc(tree.getNodeSource(src_decl)); if (self.decl_table.get(name_hash)) |decl| { // Update the AST Node index of the decl, even if its contents are unchanged, it may @@ -1801,8 +1838,8 @@ fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void { } } } else { - const new_decl = try self.createNewDecl(&root_scope.base, name, decl_i, name_hash, contents_hash); - root_scope.decls.appendAssumeCapacity(new_decl); + const new_decl = try self.createNewDecl(&container_scope.base, name, decl_i, name_hash, contents_hash); + container_scope.decls.appendAssumeCapacity(new_decl); if (fn_proto.getExternExportInlineToken()) |maybe_export_token| { if (tree.token_ids[maybe_export_token] == .Keyword_export) { self.work_queue.writeItemAssumeCapacity(.{ .analyze_decl = new_decl }); @@ -1812,7 +1849,7 @@ fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void { } else if (src_decl.castTag(.VarDecl)) |var_decl| { const name_loc = tree.token_locs[var_decl.name_token]; const name = tree.tokenSliceLoc(name_loc); - const name_hash = root_scope.fullyQualifiedNameHash(name); + const name_hash = container_scope.fullyQualifiedNameHash(name); const contents_hash = std.zig.hashSrc(tree.getNodeSource(src_decl)); if (self.decl_table.get(name_hash)) |decl| { // Update the AST Node index of the decl, even if its contents are unchanged, it may @@ -1828,8 +1865,8 @@ fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void { decl.contents_hash = contents_hash; } } else { - const new_decl = try self.createNewDecl(&root_scope.base, name, decl_i, name_hash, contents_hash); - root_scope.decls.appendAssumeCapacity(new_decl); + const new_decl = try self.createNewDecl(&container_scope.base, name, decl_i, name_hash, contents_hash); + container_scope.decls.appendAssumeCapacity(new_decl); if (var_decl.getExternExportToken()) |maybe_export_token| { if (tree.token_ids[maybe_export_token] == .Keyword_export) { self.work_queue.writeItemAssumeCapacity(.{ .analyze_decl = new_decl }); @@ -1841,11 +1878,11 @@ fn analyzeRootSrcFile(self: *Module, root_scope: *Scope.File) !void { const name = try std.fmt.allocPrint(self.gpa, "__comptime_{}", .{name_index}); defer self.gpa.free(name); - const name_hash = root_scope.fullyQualifiedNameHash(name); + const name_hash = container_scope.fullyQualifiedNameHash(name); const contents_hash = std.zig.hashSrc(tree.getNodeSource(src_decl)); - const new_decl = try self.createNewDecl(&root_scope.base, name, decl_i, name_hash, contents_hash); - root_scope.decls.appendAssumeCapacity(new_decl); + const new_decl = try self.createNewDecl(&container_scope.base, name, decl_i, name_hash, contents_hash); + container_scope.decls.appendAssumeCapacity(new_decl); self.work_queue.writeItemAssumeCapacity(.{ .analyze_decl = new_decl }); } else if (src_decl.castTag(.ContainerField)) |container_field| { log.err("TODO: analyze container field", .{}); @@ -3124,6 +3161,7 @@ fn failWithOwnedErrorMsg(self: *Module, scope: *Scope, src: usize, err_msg: *Err self.failed_files.putAssumeCapacityNoClobber(scope, err_msg); }, .file => unreachable, + .container => unreachable, } return error.AnalysisFail; } diff --git a/src-self-hosted/codegen.zig b/src-self-hosted/codegen.zig index 6f08c7a689..bad1f59b88 100644 --- a/src-self-hosted/codegen.zig +++ b/src-self-hosted/codegen.zig @@ -436,8 +436,8 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { try branch_stack.append(.{}); const src_data: struct {lbrace_src: usize, rbrace_src: usize, source: []const u8} = blk: { - if (module_fn.owner_decl.scope.cast(Module.Scope.File)) |scope_file| { - const tree = scope_file.contents.tree; + if (module_fn.owner_decl.scope.cast(Module.Scope.Container)) |container_scope| { + const tree = container_scope.file_scope.contents.tree; const fn_proto = tree.root_node.decls()[module_fn.owner_decl.src_index].castTag(.FnProto).?; const block = fn_proto.getBodyNode().?.castTag(.Block).?; const lbrace_src = tree.token_locs[block.lbrace].start; diff --git a/src-self-hosted/link/Elf.zig b/src-self-hosted/link/Elf.zig index 69f1260d20..451160630a 100644 --- a/src-self-hosted/link/Elf.zig +++ b/src-self-hosted/link/Elf.zig @@ -1656,8 +1656,8 @@ pub fn updateDecl(self: *Elf, module: *Module, decl: *Module.Decl) !void { try dbg_line_buffer.ensureCapacity(26); const line_off: u28 = blk: { - if (decl.scope.cast(Module.Scope.File)) |scope_file| { - const tree = scope_file.contents.tree; + if (decl.scope.cast(Module.Scope.Container)) |container_scope| { + const tree = container_scope.file_scope.contents.tree; const file_ast_decls = tree.root_node.decls(); // TODO Look into improving the performance here by adding a token-index-to-line // lookup table. Currently this involves scanning over the source code for newlines. @@ -2157,8 +2157,8 @@ pub fn updateDeclLineNumber(self: *Elf, module: *Module, decl: *const Module.Dec const tracy = trace(@src()); defer tracy.end(); - const scope_file = decl.scope.cast(Module.Scope.File).?; - const tree = scope_file.contents.tree; + const container_scope = decl.scope.cast(Module.Scope.Container).?; + const tree = container_scope.file_scope.contents.tree; const file_ast_decls = tree.root_node.decls(); // TODO Look into improving the performance here by adding a token-index-to-line // lookup table. Currently this involves scanning over the source code for newlines. -- cgit v1.2.3 From 17f36566de1cf549907d20dfd963596784691c73 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 3 Sep 2020 15:02:38 -0700 Subject: stage2: upgrade Scope.Container decls from ArrayList to HashMap --- src-self-hosted/Module.zig | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) (limited to 'src-self-hosted/Module.zig') diff --git a/src-self-hosted/Module.zig b/src-self-hosted/Module.zig index 8d7a4d7b36..d273712cd1 100644 --- a/src-self-hosted/Module.zig +++ b/src-self-hosted/Module.zig @@ -230,8 +230,7 @@ pub const Decl = struct { const src_decl = module.decls[self.src_index]; return src_decl.inst.src; }, - .file, - .block => unreachable, + .file, .block => unreachable, .gen_zir => unreachable, .local_val => unreachable, .local_ptr => unreachable, @@ -544,7 +543,7 @@ pub const Scope = struct { file_scope: *Scope.File, /// Direct children of the file. - decls: ArrayListUnmanaged(*Decl), + decls: std.AutoArrayHashMapUnmanaged(*Decl, void), // TODO implement container types and put this in a status union // ty: Type @@ -555,12 +554,7 @@ pub const Scope = struct { } pub fn removeDecl(self: *Container, child: *Decl) void { - for (self.decls.items) |item, i| { - if (item == child) { - _ = self.decls.swapRemove(i); - return; - } - } + _ = self.decls.remove(child); } pub fn fullyQualifiedNameHash(self: *Container, name: []const u8) NameHash { @@ -1796,9 +1790,9 @@ fn analyzeContainer(self: *Module, container_scope: *Scope.Container) !void { // we know which ones have been deleted. var deleted_decls = std.AutoArrayHashMap(*Decl, void).init(self.gpa); defer deleted_decls.deinit(); - try deleted_decls.ensureCapacity(container_scope.decls.items.len); - for (container_scope.decls.items) |file_decl| { - deleted_decls.putAssumeCapacityNoClobber(file_decl, {}); + try deleted_decls.ensureCapacity(container_scope.decls.items().len); + for (container_scope.decls.items()) |entry| { + deleted_decls.putAssumeCapacityNoClobber(entry.key, {}); } for (decls) |src_decl, decl_i| { @@ -1839,7 +1833,7 @@ fn analyzeContainer(self: *Module, container_scope: *Scope.Container) !void { } } else { const new_decl = try self.createNewDecl(&container_scope.base, name, decl_i, name_hash, contents_hash); - container_scope.decls.appendAssumeCapacity(new_decl); + container_scope.decls.putAssumeCapacity(new_decl, {}); if (fn_proto.getExternExportInlineToken()) |maybe_export_token| { if (tree.token_ids[maybe_export_token] == .Keyword_export) { self.work_queue.writeItemAssumeCapacity(.{ .analyze_decl = new_decl }); @@ -1866,7 +1860,7 @@ fn analyzeContainer(self: *Module, container_scope: *Scope.Container) !void { } } else { const new_decl = try self.createNewDecl(&container_scope.base, name, decl_i, name_hash, contents_hash); - container_scope.decls.appendAssumeCapacity(new_decl); + container_scope.decls.putAssumeCapacity(new_decl, {}); if (var_decl.getExternExportToken()) |maybe_export_token| { if (tree.token_ids[maybe_export_token] == .Keyword_export) { self.work_queue.writeItemAssumeCapacity(.{ .analyze_decl = new_decl }); @@ -1882,7 +1876,7 @@ fn analyzeContainer(self: *Module, container_scope: *Scope.Container) !void { const contents_hash = std.zig.hashSrc(tree.getNodeSource(src_decl)); const new_decl = try self.createNewDecl(&container_scope.base, name, decl_i, name_hash, contents_hash); - container_scope.decls.appendAssumeCapacity(new_decl); + container_scope.decls.putAssumeCapacity(new_decl, {}); self.work_queue.writeItemAssumeCapacity(.{ .analyze_decl = new_decl }); } else if (src_decl.castTag(.ContainerField)) |container_field| { log.err("TODO: analyze container field", .{}); -- cgit v1.2.3