From 00b2e31589b2f4c3f67ab2bf46e140e00df3f910 Mon Sep 17 00:00:00 2001
From: Luuk de Gram <Luukdegram@users.noreply.github.com>
Date: Sat, 27 Mar 2021 23:26:20 +0100
Subject: Basic "Hello world" working

---
 src/codegen/wasm.zig | 132 ++++++++++++++++++++++++++++++---------------------
 src/link/Wasm.zig    | 121 +++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 187 insertions(+), 66 deletions(-)

(limited to 'src')

diff --git a/src/codegen/wasm.zig b/src/codegen/wasm.zig
index fbea02e0c3..0b36e7cd9a 100644
--- a/src/codegen/wasm.zig
+++ b/src/codegen/wasm.zig
@@ -163,25 +163,23 @@ fn buildOpcode(args: OpcodeBuildArguments) wasm.Opcode {
         .global_get => return .global_get,
         .global_set => return .global_set,
 
-        .load => if (args.width) |width|
-            switch (width) {
-                8 => switch (args.valtype1.?) {
-                    .i32 => if (args.signedness.? == .signed) return .i32_load8_s else return .i32_load8_u,
-                    .i64 => if (args.signedness.? == .signed) return .i64_load8_s else return .i64_load8_u,
-                    .f32, .f64 => unreachable,
-                },
-                16 => switch (args.valtype1.?) {
-                    .i32 => if (args.signedness.? == .signed) return .i32_load16_s else return .i32_load16_u,
-                    .i64 => if (args.signedness.? == .signed) return .i64_load16_s else return .i64_load16_u,
-                    .f32, .f64 => unreachable,
-                },
-                32 => switch (args.valtype1.?) {
-                    .i64 => if (args.signedness.? == .signed) return .i64_load32_s else return .i64_load32_u,
-                    .i32, .f32, .f64 => unreachable,
-                },
-                else => unreachable,
-            }
-        else switch (args.valtype1.?) {
+        .load => if (args.width) |width| switch (width) {
+            8 => switch (args.valtype1.?) {
+                .i32 => if (args.signedness.? == .signed) return .i32_load8_s else return .i32_load8_u,
+                .i64 => if (args.signedness.? == .signed) return .i64_load8_s else return .i64_load8_u,
+                .f32, .f64 => unreachable,
+            },
+            16 => switch (args.valtype1.?) {
+                .i32 => if (args.signedness.? == .signed) return .i32_load16_s else return .i32_load16_u,
+                .i64 => if (args.signedness.? == .signed) return .i64_load16_s else return .i64_load16_u,
+                .f32, .f64 => unreachable,
+            },
+            32 => switch (args.valtype1.?) {
+                .i64 => if (args.signedness.? == .signed) return .i64_load32_s else return .i64_load32_u,
+                .i32, .f32, .f64 => unreachable,
+            },
+            else => unreachable,
+        } else switch (args.valtype1.?) {
             .i32 => return .i32_load,
             .i64 => return .i64_load,
             .f32 => return .f32_load,
@@ -469,6 +467,13 @@ test "Wasm - buildOpcode" {
     testing.expectEqual(@as(wasm.Opcode, .f64_reinterpret_i64), f64_reinterpret_i64);
 }
 
+pub const Result = union(enum) {
+    /// The codegen bytes have been appended to `Context.code`
+    appended: void,
+    /// The data is managed externally and are part of the `Result`
+    externally_managed: []const u8,
+};
+
 /// Hashmap to store generated `WValue` for each `Inst`
 pub const ValueTable = std.AutoHashMapUnmanaged(*Inst, WValue);
 
@@ -504,6 +509,8 @@ pub const Context = struct {
     const InnerError = error{
         OutOfMemory,
         CodegenFail,
+        /// Can occur when dereferencing a pointer that points to a `Decl` of which the analysis has failed
+        AnalysisFail,
     };
 
     pub fn deinit(self: *Context) void {
@@ -604,48 +611,65 @@ pub const Context = struct {
     }
 
     /// Generates the wasm bytecode for the function declaration belonging to `Context`
-    pub fn gen(self: *Context) InnerError!void {
+    pub fn gen(self: *Context) InnerError!Result {
         assert(self.code.items.len == 0);
-        try self.genFunctype();
 
-        // Write instructions
-        // TODO: check for and handle death of instructions
         const tv = self.decl.typed_value.most_recent.typed_value;
-        const mod_fn = blk: {
-            if (tv.val.castTag(.function)) |func| break :blk func.data;
-            if (tv.val.castTag(.extern_fn)) |ext_fn| return; // don't need codegen for extern functions
-            return self.fail(.{ .node_offset = 0 }, "TODO: Wasm codegen for decl type '{s}'", .{tv.ty.tag()});
-        };
-
-        // Reserve space to write the size after generating the code as well as space for locals count
-        try self.code.resize(10);
-
-        try self.genBody(mod_fn.body);
+        switch (tv.ty.zigTypeTag()) {
+            .Fn => {
+                try self.genFunctype();
+
+                // Write instructions
+                // TODO: check for and handle death of instructions
+                const mod_fn = blk: {
+                    if (tv.val.castTag(.function)) |func| break :blk func.data;
+                    if (tv.val.castTag(.extern_fn)) |ext_fn| return Result.appended; // don't need code body for extern functions
+                    return self.fail(.{ .node_offset = 0 }, "TODO: Wasm codegen for decl type '{s}'", .{tv.ty.tag()});
+                };
+
+                // Reserve space to write the size after generating the code as well as space for locals count
+                try self.code.resize(10);
+
+                try self.genBody(mod_fn.body);
+
+                // finally, write our local types at the 'offset' position
+                {
+                    leb.writeUnsignedFixed(5, self.code.items[5..10], @intCast(u32, self.locals.items.len));
+
+                    // offset into 'code' section where we will put our locals types
+                    var local_offset: usize = 10;
+
+                    // emit the actual locals amount
+                    for (self.locals.items) |local| {
+                        var buf: [6]u8 = undefined;
+                        leb.writeUnsignedFixed(5, buf[0..5], @as(u32, 1));
+                        buf[5] = local;
+                        try self.code.insertSlice(local_offset, &buf);
+                        local_offset += 6;
+                    }
+                }
 
-        // finally, write our local types at the 'offset' position
-        {
-            leb.writeUnsignedFixed(5, self.code.items[5..10], @intCast(u32, self.locals.items.len));
+                const writer = self.code.writer();
+                try writer.writeByte(wasm.opcode(.end));
 
-            // offset into 'code' section where we will put our locals types
-            var local_offset: usize = 10;
+                // Fill in the size of the generated code to the reserved space at the
+                // beginning of the buffer.
+                const size = self.code.items.len - 5 + self.decl.fn_link.wasm.?.idx_refs.items.len * 5;
+                leb.writeUnsignedFixed(5, self.code.items[0..5], @intCast(u32, size));
 
-            // emit the actual locals amount
-            for (self.locals.items) |local| {
-                var buf: [6]u8 = undefined;
-                leb.writeUnsignedFixed(5, buf[0..5], @as(u32, 1));
-                buf[5] = local;
-                try self.code.insertSlice(local_offset, &buf);
-                local_offset += 6;
-            }
+                // codegen data has been appended to `code`
+                return Result.appended;
+            },
+            .Array => {
+                if (tv.val.castTag(.bytes)) |payload| {
+                    if (tv.ty.sentinel()) |sentinel| {
+                        // TODO, handle sentinel correctly
+                    }
+                    return Result{ .externally_managed = payload.data };
+                } else return self.fail(.{ .node_offset = 0 }, "TODO implement gen for more kinds of arrays", .{});
+            },
+            else => |tag| return self.fail(.{ .node_offset = 0 }, "TODO: Implement zig type codegen for type: '{s}'", .{tag}),
         }
-
-        const writer = self.code.writer();
-        try writer.writeByte(wasm.opcode(.end));
-
-        // Fill in the size of the generated code to the reserved space at the
-        // beginning of the buffer.
-        const size = self.code.items.len - 5 + self.decl.fn_link.wasm.?.idx_refs.items.len * 5;
-        leb.writeUnsignedFixed(5, self.code.items[0..5], @intCast(u32, size));
     }
 
     fn genInst(self: *Context, inst: *Inst) InnerError!WValue {
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 523d4c8a64..b732915924 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -29,6 +29,32 @@ pub const FnData = struct {
     idx_refs: std.ArrayListUnmanaged(struct { offset: u32, decl: *Module.Decl }) = .{},
 };
 
+/// Data section of the wasm binary
+/// Each declaration will have its own 'data_segment' within the section
+/// where the offset is calculated using the previous segments and the content length
+/// of the data
+pub const DataSection = struct {
+    segments: std.AutoArrayHashMapUnmanaged(*const Module.Decl, []const u8) = .{},
+
+    /// Returns the offset into the data segment based on a given `Decl`
+    pub fn offset(self: DataSection, decl: *const Module.Decl) u32 {
+        var cur_offset: u32 = 0;
+        return for (self.segments.items()) |entry| {
+            if (entry.key == decl) break cur_offset;
+            cur_offset += @intCast(u32, entry.value.len);
+        } else cur_offset;
+    }
+
+    /// Returns the total payload size of the data section
+    pub fn size(self: DataSection) u32 {
+        var total: u32 = 0;
+        for (self.segments.items()) |entry| {
+            total += @intCast(u32, entry.value.len);
+        }
+        return total;
+    }
+};
+
 base: link.File,
 
 /// List of all function Decls to be written to the output file. The index of
@@ -45,6 +71,10 @@ ext_funcs: std.ArrayListUnmanaged(*Module.Decl) = .{},
 /// to support existing code.
 /// TODO: Allow setting this through a flag?
 host_name: []const u8 = "env",
+/// Map of declarations with its bytes payload, used to keep track of all data segments
+/// that needs to be emit when creating the wasm binary.
+/// The `DataSection`'s lifetime must be kept alive until the linking stage.
+data: DataSection = .{},
 
 pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Options) !*Wasm {
     assert(options.object_format == .wasm);
@@ -52,7 +82,7 @@ pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Optio
     if (options.use_llvm) return error.LLVM_BackendIsTODO_ForWasm; // TODO
     if (options.use_lld) return error.LLD_LinkingIsTODO_ForWasm; // TODO
 
-    // TODO: read the file and keep vaild parts instead of truncating
+    // TODO: read the file and keep valid parts instead of truncating
     const file = try options.emit.?.directory.handle.createFile(sub_path, .{ .truncate = true, .read = true });
     errdefer file.close();
 
@@ -92,14 +122,13 @@ pub fn deinit(self: *Wasm) void {
     }
     self.funcs.deinit(self.base.allocator);
     self.ext_funcs.deinit(self.base.allocator);
+    self.data.segments.deinit(self.base.allocator);
 }
 
 // Generate code for the Decl, storing it in memory to be later written to
 // the file on flush().
 pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
     const typed_value = decl.typed_value.most_recent.typed_value;
-    if (typed_value.ty.zigTypeTag() != .Fn)
-        return error.TODOImplementNonFnDeclsForWasm;
 
     if (decl.fn_link.wasm) |*fn_data| {
         fn_data.functype.items.len = 0;
@@ -111,6 +140,7 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
         switch (decl.typed_value.most_recent.typed_value.val.tag()) {
             .function => try self.funcs.append(self.base.allocator, decl),
             .extern_fn => try self.ext_funcs.append(self.base.allocator, decl),
+            .bytes => {},
             else => return error.TODOImplementNonFnDeclsForWasm,
         }
     }
@@ -132,7 +162,7 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
     defer context.deinit();
 
     // generate the 'code' section for the function declaration
-    context.gen() catch |err| switch (err) {
+    const result = context.gen() catch |err| switch (err) {
         error.CodegenFail => {
             decl.analysis = .codegen_failure;
             try module.failed_decls.put(module.gpa, decl, context.err_msg);
@@ -141,15 +171,24 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
         else => |e| return err,
     };
 
-    // as locals are patched afterwards, the offsets of funcidx's are off,
-    // here we update them to correct them
-    for (decl.fn_link.wasm.?.idx_refs.items) |*func| {
-        // For each local, add 6 bytes (count + type)
-        func.offset += @intCast(u32, context.locals.items.len * 6);
-    }
+    switch (typed_value.ty.zigTypeTag()) {
+        .Fn => {
+            // as locals are patched afterwards, the offsets of funcidx's are off,
+            // here we update them to correct them
+            for (decl.fn_link.wasm.?.idx_refs.items) |*func| {
+                // For each local, add 6 bytes (count + type)
+                func.offset += @intCast(u32, context.locals.items.len * 6);
+            }
 
-    fn_data.functype = context.func_type_data.toUnmanaged();
-    fn_data.code = context.code.toUnmanaged();
+            fn_data.functype = context.func_type_data.toUnmanaged();
+            fn_data.code = context.code.toUnmanaged();
+        },
+        .Array => switch (result) {
+            .appended => unreachable,
+            .externally_managed => |payload| try self.data.segments.put(self.base.allocator, decl, payload),
+        },
+        else => return error.TODO,
+    }
 }
 
 pub fn updateDeclExports(
@@ -257,6 +296,22 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
         );
     }
 
+    // Memory section
+    if (self.data.size() != 0) {
+        const header_offset = try reserveVecSectionHeader(file);
+        const writer = file.writer();
+
+        try leb.writeULEB128(writer, @as(u32, 0));
+        try leb.writeULEB128(writer, @as(u32, 1));
+        try writeVecSectionHeader(
+            file,
+            header_offset,
+            .memory,
+            @intCast(u32, (try file.getPos()) - header_offset - header_size),
+            @as(u32, 1),
+        );
+    }
+
     // Export section
     if (self.base.options.module) |module| {
         const header_offset = try reserveVecSectionHeader(file);
@@ -281,6 +336,16 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
                 count += 1;
             }
         }
+
+        // export memory if size is not 0
+        if (self.data.size() != 0) {
+            try leb.writeULEB128(writer, @intCast(u32, "memory".len));
+            try writer.writeAll("memory");
+            try writer.writeByte(wasm.externalKind(.memory));
+            try leb.writeULEB128(writer, @as(u32, 0)); // only 1 memory 'object' can exist
+            count += 1;
+        }
+
         try writeVecSectionHeader(
             file,
             header_offset,
@@ -320,6 +385,38 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
             @intCast(u32, self.funcs.items.len),
         );
     }
+
+    // Data section
+    {
+        const header_offset = try reserveVecSectionHeader(file);
+        const writer = file.writer();
+        var offset: i32 = 0;
+        for (self.data.segments.items()) |entry| {
+            // index to memory section (always 0 in current wasm version)
+            try leb.writeULEB128(writer, @as(u32, 0));
+
+            // offset into data section
+            try writer.writeByte(wasm.opcode(.i32_const));
+            try leb.writeILEB128(writer, offset);
+            try writer.writeByte(wasm.opcode(.end));
+
+            // payload size
+            const len = @intCast(u32, entry.value.len);
+            try leb.writeULEB128(writer, len);
+
+            // write payload
+            try writer.writeAll(entry.value);
+            offset += @bitCast(i32, len);
+        }
+
+        try writeVecSectionHeader(
+            file,
+            header_offset,
+            .data,
+            @intCast(u32, (try file.getPos()) - header_offset - header_size),
+            @intCast(u32, self.data.segments.items().len),
+        );
+    }
 }
 
 fn linkWithLLD(self: *Wasm, comp: *Compilation) !void {
-- 
cgit v1.2.3


From 1bd5552fc1a8fd2ddcb8f0c17f35662e4eb1cbcf Mon Sep 17 00:00:00 2001
From: Luuk de Gram <Luukdegram@users.noreply.github.com>
Date: Fri, 2 Apr 2021 20:59:40 +0200
Subject: Calculate data length to ensure correct pointer offsets

---
 src/Module.zig       |   2 +-
 src/codegen/wasm.zig |  42 ++++++++---
 src/link.zig         |   5 +-
 src/link/Wasm.zig    | 203 +++++++++++++++++++++++++++++++++------------------
 4 files changed, 167 insertions(+), 85 deletions(-)

(limited to 'src')

diff --git a/src/Module.zig b/src/Module.zig
index 933917d948..8360b3245b 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -3842,7 +3842,7 @@ fn allocateNewDecl(
             .elf => .{ .elf = link.File.Elf.SrcFn.empty },
             .macho => .{ .macho = link.File.MachO.SrcFn.empty },
             .c => .{ .c = link.File.C.FnBlock.empty },
-            .wasm => .{ .wasm = null },
+            .wasm => .{ .wasm = .{} },
             .spirv => .{ .spirv = .{} },
         },
         .generation = 0,
diff --git a/src/codegen/wasm.zig b/src/codegen/wasm.zig
index 0b36e7cd9a..3e20f94d6f 100644
--- a/src/codegen/wasm.zig
+++ b/src/codegen/wasm.zig
@@ -16,9 +16,12 @@ const Value = @import("../value.zig").Value;
 const Compilation = @import("../Compilation.zig");
 const AnyMCValue = @import("../codegen.zig").AnyMCValue;
 const LazySrcLoc = Module.LazySrcLoc;
+const link = @import("../link.zig");
+const TypedValue = @import("../TypedValue.zig");
 
 /// Wasm Value, created when generating an instruction
 const WValue = union(enum) {
+    /// May be referenced but is unused
     none: void,
     /// Index of the local variable
     local: u32,
@@ -611,11 +614,8 @@ pub const Context = struct {
     }
 
     /// Generates the wasm bytecode for the function declaration belonging to `Context`
-    pub fn gen(self: *Context) InnerError!Result {
-        assert(self.code.items.len == 0);
-
-        const tv = self.decl.typed_value.most_recent.typed_value;
-        switch (tv.ty.zigTypeTag()) {
+    pub fn gen(self: *Context, typed_value: TypedValue) InnerError!Result {
+        switch (typed_value.ty.zigTypeTag()) {
             .Fn => {
                 try self.genFunctype();
 
@@ -654,21 +654,41 @@ pub const Context = struct {
 
                 // Fill in the size of the generated code to the reserved space at the
                 // beginning of the buffer.
-                const size = self.code.items.len - 5 + self.decl.fn_link.wasm.?.idx_refs.items.len * 5;
+                const size = self.code.items.len - 5 + self.decl.fn_link.wasm.idx_refs.items.len * 5;
                 leb.writeUnsignedFixed(5, self.code.items[0..5], @intCast(u32, size));
 
                 // codegen data has been appended to `code`
                 return Result.appended;
             },
             .Array => {
-                if (tv.val.castTag(.bytes)) |payload| {
-                    if (tv.ty.sentinel()) |sentinel| {
-                        // TODO, handle sentinel correctly
+                if (typed_value.val.castTag(.bytes)) |payload| {
+                    if (typed_value.ty.sentinel()) |sentinel| {
+                        try self.code.appendSlice(payload.data);
+
+                        switch (try self.gen(.{
+                            .ty = typed_value.ty.elemType(),
+                            .val = sentinel,
+                        })) {
+                            .appended => return Result.appended,
+                            .externally_managed => |data| {
+                                try self.code.appendSlice(data);
+                                return Result.appended;
+                            },
+                        }
                     }
                     return Result{ .externally_managed = payload.data };
                 } else return self.fail(.{ .node_offset = 0 }, "TODO implement gen for more kinds of arrays", .{});
             },
-            else => |tag| return self.fail(.{ .node_offset = 0 }, "TODO: Implement zig type codegen for type: '{s}'", .{tag}),
+            .Int => {
+                const info = typed_value.ty.intInfo(self.bin_file.base.options.target);
+                if (info.bits == 8 and info.signedness == .unsigned) {
+                    const int_byte = typed_value.val.toUnsignedInt();
+                    try self.code.append(@intCast(u8, int_byte));
+                    return Result.appended;
+                }
+                return self.fail(self.decl.src(), "TODO: Implement codegen for int type: '{}'", .{typed_value.ty});
+            },
+            else => |tag| return self.fail(self.decl.src(), "TODO: Implement zig type codegen for type: '{s}'", .{tag}),
         }
     }
 
@@ -745,7 +765,7 @@ pub const Context = struct {
 
         // The function index immediate argument will be filled in using this data
         // in link.Wasm.flush().
-        try self.decl.fn_link.wasm.?.idx_refs.append(self.gpa, .{
+        try self.decl.fn_link.wasm.idx_refs.append(self.gpa, .{
             .offset = @intCast(u32, self.code.items.len),
             .decl = target,
         });
diff --git a/src/link.zig b/src/link.zig
index db3e973f84..162b55a0d0 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -147,7 +147,7 @@ pub const File = struct {
         coff: Coff.SrcFn,
         macho: MachO.SrcFn,
         c: C.FnBlock,
-        wasm: ?Wasm.FnData,
+        wasm: Wasm.FnData,
         spirv: SpirV.FnData,
     };
 
@@ -328,7 +328,8 @@ pub const File = struct {
             .elf => return @fieldParentPtr(Elf, "base", base).allocateDeclIndexes(decl),
             .macho => return @fieldParentPtr(MachO, "base", base).allocateDeclIndexes(decl),
             .c => return @fieldParentPtr(C, "base", base).allocateDeclIndexes(decl),
-            .wasm, .spirv => {},
+            .wasm => return @fieldParentPtr(Wasm, "base", base).allocateDeclIndexes(decl),
+            .spirv => {},
         }
     }
 
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index b732915924..37c9b02d9e 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -16,6 +16,7 @@ const link = @import("../link.zig");
 const trace = @import("../tracy.zig").trace;
 const build_options = @import("build_options");
 const Cache = @import("../Cache.zig");
+const TypedValue = @import("../TypedValue.zig");
 
 pub const base_tag = link.File.Tag.wasm;
 
@@ -34,25 +35,33 @@ pub const FnData = struct {
 /// where the offset is calculated using the previous segments and the content length
 /// of the data
 pub const DataSection = struct {
-    segments: std.AutoArrayHashMapUnmanaged(*const Module.Decl, []const u8) = .{},
+    segments: std.AutoArrayHashMapUnmanaged(*Module.Decl, struct { data: [*]const u8, len: u32 }) = .{},
 
     /// Returns the offset into the data segment based on a given `Decl`
     pub fn offset(self: DataSection, decl: *const Module.Decl) u32 {
         var cur_offset: u32 = 0;
         return for (self.segments.items()) |entry| {
             if (entry.key == decl) break cur_offset;
-            cur_offset += @intCast(u32, entry.value.len);
-        } else cur_offset;
+            cur_offset += entry.value.len;
+        } else unreachable; // offset() called on declaration that does not live inside 'data' section
     }
 
     /// Returns the total payload size of the data section
     pub fn size(self: DataSection) u32 {
         var total: u32 = 0;
         for (self.segments.items()) |entry| {
-            total += @intCast(u32, entry.value.len);
+            total += entry.value.len;
         }
         return total;
     }
+
+    /// Updates the data in the data segment belonging to the given decl.
+    /// It's illegal behaviour to call this before allocateDeclIndexes was called
+    /// `data` must be managed externally with a lifetime that last as long as codegen does.
+    pub fn updateData(self: DataSection, decl: *Module.Decl, data: []const u8) void {
+        const entry = self.segments.getEntry(decl).?; // called updateData before the declaration was added to data segments
+        entry.value.data = data.ptr;
+    }
 };
 
 base: link.File,
@@ -111,49 +120,92 @@ pub fn createEmpty(gpa: *Allocator, options: link.Options) !*Wasm {
 
 pub fn deinit(self: *Wasm) void {
     for (self.funcs.items) |decl| {
-        decl.fn_link.wasm.?.functype.deinit(self.base.allocator);
-        decl.fn_link.wasm.?.code.deinit(self.base.allocator);
-        decl.fn_link.wasm.?.idx_refs.deinit(self.base.allocator);
+        decl.fn_link.wasm.functype.deinit(self.base.allocator);
+        decl.fn_link.wasm.code.deinit(self.base.allocator);
+        decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
     }
     for (self.ext_funcs.items) |decl| {
-        decl.fn_link.wasm.?.functype.deinit(self.base.allocator);
-        decl.fn_link.wasm.?.code.deinit(self.base.allocator);
-        decl.fn_link.wasm.?.idx_refs.deinit(self.base.allocator);
+        decl.fn_link.wasm.functype.deinit(self.base.allocator);
+        decl.fn_link.wasm.code.deinit(self.base.allocator);
+        decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
+    }
+    for (self.data.segments.items()) |entry| {
+        // data segments only use the code section
+        entry.key.fn_link.wasm.code.deinit(self.base.allocator);
     }
     self.funcs.deinit(self.base.allocator);
     self.ext_funcs.deinit(self.base.allocator);
     self.data.segments.deinit(self.base.allocator);
 }
 
+pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
+    std.debug.print("INIT: '{s}'\n", .{decl.name});
+    const tv = decl.typed_value.most_recent.typed_value;
+    decl.fn_link.wasm = .{};
+
+    switch (tv.ty.zigTypeTag()) {
+        .Array => {
+            // if the codegen of the given decl contributes to the data segment
+            // we must calculate its data length now so that the data offsets are available
+            // to other decls when called
+            const data_len = calcDataLen(self, tv) catch return error.AnalysisFail;
+            try self.data.segments.putNoClobber(self.base.allocator, decl, .{ .data = undefined, .len = data_len });
+        },
+        .Fn => if (self.getFuncidx(decl) == null) switch (tv.val.tag()) {
+            // dependent on function type, appends it to the correct list
+            .function => try self.funcs.append(self.base.allocator, decl),
+            .extern_fn => try self.ext_funcs.append(self.base.allocator, decl),
+            else => unreachable,
+        },
+        else => {},
+    }
+}
+
+// TODO, remove this and use the existing error mechanism
+const DataLenError = error{
+    TODO_WASM_CalcDataLenArray,
+    TODO_WASM_CalcDataLen,
+};
+/// Calculates the length of the data segment that will be occupied by the given `TypedValue`
+fn calcDataLen(bin_file: *Wasm, typed_value: TypedValue) DataLenError!u32 {
+    switch (typed_value.ty.zigTypeTag()) {
+        .Array => {
+            if (typed_value.val.castTag(.bytes)) |payload| {
+                if (typed_value.ty.sentinel()) |sentinel| {
+                    return @intCast(u32, payload.data.len) + try calcDataLen(bin_file, .{
+                        .ty = typed_value.ty.elemType(),
+                        .val = sentinel,
+                    });
+                }
+                return @intCast(u32, payload.data.len);
+            }
+            return error.TODO_WASM_CalcDataLenArray;
+        },
+        .Int => {
+            const info = typed_value.ty.intInfo(bin_file.base.options.target);
+            return info.bits / 8;
+        },
+        .Pointer => return 4,
+        else => return error.TODO_WASM_CalcDataLen,
+    }
+}
+
 // Generate code for the Decl, storing it in memory to be later written to
 // the file on flush().
 pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
+    std.debug.print("Updating '{s}'\n", .{decl.name});
     const typed_value = decl.typed_value.most_recent.typed_value;
 
-    if (decl.fn_link.wasm) |*fn_data| {
-        fn_data.functype.items.len = 0;
-        fn_data.code.items.len = 0;
-        fn_data.idx_refs.items.len = 0;
-    } else {
-        decl.fn_link.wasm = .{};
-        // dependent on function type, appends it to the correct list
-        switch (decl.typed_value.most_recent.typed_value.val.tag()) {
-            .function => try self.funcs.append(self.base.allocator, decl),
-            .extern_fn => try self.ext_funcs.append(self.base.allocator, decl),
-            .bytes => {},
-            else => return error.TODOImplementNonFnDeclsForWasm,
-        }
-    }
-    const fn_data = &decl.fn_link.wasm.?;
-
-    var managed_functype = fn_data.functype.toManaged(self.base.allocator);
-    var managed_code = fn_data.code.toManaged(self.base.allocator);
+    const fn_data = &decl.fn_link.wasm;
+    fn_data.functype.items.len = 0;
+    fn_data.code.items.len = 0;
+    fn_data.idx_refs.items.len = 0;
 
     var context = codegen.Context{
         .gpa = self.base.allocator,
         .values = .{},
-        .code = managed_code,
-        .func_type_data = managed_functype,
+        .code = fn_data.code.toManaged(self.base.allocator),
+        .func_type_data = fn_data.functype.toManaged(self.base.allocator),
         .decl = decl,
         .err_msg = undefined,
         .locals = .{},
@@ -162,7 +214,7 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
     defer context.deinit();
 
     // generate the 'code' section for the function declaration
-    const result = context.gen() catch |err| switch (err) {
+    const result = context.gen(typed_value) catch |err| switch (err) {
         error.CodegenFail => {
             decl.analysis = .codegen_failure;
             try module.failed_decls.put(module.gpa, decl, context.err_msg);
@@ -175,7 +227,7 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
         .Fn => {
             // as locals are patched afterwards, the offsets of funcidx's are off,
             // here we update them to correct them
-            for (decl.fn_link.wasm.?.idx_refs.items) |*func| {
+            for (decl.fn_link.wasm.idx_refs.items) |*func| {
                 // For each local, add 6 bytes (count + type)
                 func.offset += @intCast(u32, context.locals.items.len * 6);
             }
@@ -184,8 +236,12 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
             fn_data.code = context.code.toUnmanaged();
         },
         .Array => switch (result) {
-            .appended => unreachable,
-            .externally_managed => |payload| try self.data.segments.put(self.base.allocator, decl, payload),
+            .appended => {
+                fn_data.functype = context.func_type_data.toUnmanaged();
+                fn_data.code = context.code.toUnmanaged();
+                self.data.updateData(decl, fn_data.code.items);
+            },
+            .externally_managed => |payload| self.data.updateData(decl, payload),
         },
         else => return error.TODO,
     }
@@ -199,18 +255,18 @@ pub fn updateDeclExports(
 ) !void {}
 
 pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
-    // TODO: remove this assert when non-function Decls are implemented
-    assert(decl.typed_value.most_recent.typed_value.ty.zigTypeTag() == .Fn);
-    const func_idx = self.getFuncidx(decl).?;
-    switch (decl.typed_value.most_recent.typed_value.val.tag()) {
-        .function => _ = self.funcs.swapRemove(func_idx),
-        .extern_fn => _ = self.ext_funcs.swapRemove(func_idx),
-        else => unreachable,
+    if (self.getFuncidx(decl)) |func_idx| {
+        switch (decl.typed_value.most_recent.typed_value.val.tag()) {
+            .function => _ = self.funcs.swapRemove(func_idx),
+            .extern_fn => _ = self.ext_funcs.swapRemove(func_idx),
+            else => unreachable,
+        }
     }
-    decl.fn_link.wasm.?.functype.deinit(self.base.allocator);
-    decl.fn_link.wasm.?.code.deinit(self.base.allocator);
-    decl.fn_link.wasm.?.idx_refs.deinit(self.base.allocator);
-    decl.fn_link.wasm = null;
+    decl.fn_link.wasm.functype.deinit(self.base.allocator);
+    decl.fn_link.wasm.code.deinit(self.base.allocator);
+    decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
+    _ = self.data.segments.orderedRemove(decl);
+    decl.fn_link.wasm = undefined;
 }
 
 pub fn flush(self: *Wasm, comp: *Compilation) !void {
@@ -238,8 +294,8 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
 
         // extern functions are defined in the wasm binary first through the `import`
         // section, so define their func types first
-        for (self.ext_funcs.items) |decl| try file.writeAll(decl.fn_link.wasm.?.functype.items);
-        for (self.funcs.items) |decl| try file.writeAll(decl.fn_link.wasm.?.functype.items);
+        for (self.ext_funcs.items) |decl| try file.writeAll(decl.fn_link.wasm.functype.items);
+        for (self.funcs.items) |decl| try file.writeAll(decl.fn_link.wasm.functype.items);
 
         try writeVecSectionHeader(
             file,
@@ -302,13 +358,22 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
         const writer = file.writer();
 
         try leb.writeULEB128(writer, @as(u32, 0));
-        try leb.writeULEB128(writer, @as(u32, 1));
+        // Calculate the amount of memory pages are required and write them.
+        // Wasm uses 64kB page sizes. Round up to ensure the data segments fit into the memory
+        try leb.writeULEB128(
+            writer,
+            try std.math.divCeil(
+                u32,
+                self.data.size(),
+                std.mem.page_size,
+            ),
+        );
         try writeVecSectionHeader(
             file,
             header_offset,
             .memory,
             @intCast(u32, (try file.getPos()) - header_offset - header_size),
-            @as(u32, 1),
+            @as(u32, 1), // wasm currently only supports 1 linear memory segment
         );
     }
 
@@ -360,7 +425,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
         const header_offset = try reserveVecSectionHeader(file);
         const writer = file.writer();
         for (self.funcs.items) |decl| {
-            const fn_data = &decl.fn_link.wasm.?;
+            const fn_data = &decl.fn_link.wasm;
 
             // Write the already generated code to the file, inserting
             // function indexes where required.
@@ -387,34 +452,30 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
     }
 
     // Data section
-    {
+    if (self.data.size() != 0) {
         const header_offset = try reserveVecSectionHeader(file);
         const writer = file.writer();
-        var offset: i32 = 0;
-        for (self.data.segments.items()) |entry| {
-            // index to memory section (always 0 in current wasm version)
-            try leb.writeULEB128(writer, @as(u32, 0));
-
-            // offset into data section
-            try writer.writeByte(wasm.opcode(.i32_const));
-            try leb.writeILEB128(writer, offset);
-            try writer.writeByte(wasm.opcode(.end));
-
-            // payload size
-            const len = @intCast(u32, entry.value.len);
-            try leb.writeULEB128(writer, len);
-
-            // write payload
-            try writer.writeAll(entry.value);
-            offset += @bitCast(i32, len);
-        }
+        var len: u32 = 0;
+        // index to memory section (currently, there can only be 1 memory section in wasm)
+        try leb.writeULEB128(writer, @as(u32, 0));
+
+        // offset into data section
+        try writer.writeByte(wasm.opcode(.i32_const));
+        try leb.writeILEB128(writer, @as(i32, 0));
+        try writer.writeByte(wasm.opcode(.end));
+
+        // payload size
+        try leb.writeULEB128(writer, self.data.size());
+
+        // write payload
+        for (self.data.segments.items()) |entry| try writer.writeAll(entry.value.data[0..entry.value.len]);
 
         try writeVecSectionHeader(
             file,
             header_offset,
             .data,
             @intCast(u32, (try file.getPos()) - header_offset - header_size),
-            @intCast(u32, self.data.segments.items().len),
+            @intCast(u32, 1),
         );
     }
 }
@@ -681,7 +742,7 @@ fn linkWithLLD(self: *Wasm, comp: *Compilation) !void {
 /// Get the current index of a given Decl in the function list
 /// This will correctly provide the index, regardless whether the function is extern or not
 /// TODO: we could maintain a hash map to potentially make this simpler
-fn getFuncidx(self: Wasm, decl: *Module.Decl) ?u32 {
+fn getFuncidx(self: Wasm, decl: *const Module.Decl) ?u32 {
     var offset: u32 = 0;
     const slice = switch (decl.typed_value.most_recent.typed_value.val.tag()) {
         .function => blk: {
-- 
cgit v1.2.3


From 9fd1dab58230edae11e2798f30ec43704a0c2178 Mon Sep 17 00:00:00 2001
From: Luuk de Gram <Luukdegram@users.noreply.github.com>
Date: Sat, 3 Apr 2021 20:59:41 +0200
Subject: Handle incremental compilation correctly

---
 src/link/Wasm.zig | 65 ++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 48 insertions(+), 17 deletions(-)

(limited to 'src')

diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 37c9b02d9e..e94f61d54a 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -35,22 +35,28 @@ pub const FnData = struct {
 /// where the offset is calculated using the previous segments and the content length
 /// of the data
 pub const DataSection = struct {
-    segments: std.AutoArrayHashMapUnmanaged(*Module.Decl, struct { data: [*]const u8, len: u32 }) = .{},
+    /// Every data object will be appended to this list,
+    /// containing its `Decl`, the data in bytes, and its length.
+    segments: std.ArrayListUnmanaged(struct {
+        decl: *Module.Decl,
+        data: [*]const u8,
+        len: u32,
+    }) = .{},
 
     /// Returns the offset into the data segment based on a given `Decl`
     pub fn offset(self: DataSection, decl: *const Module.Decl) u32 {
         var cur_offset: u32 = 0;
-        return for (self.segments.items()) |entry| {
-            if (entry.key == decl) break cur_offset;
-            cur_offset += entry.value.len;
+        return for (self.segments.items) |entry| {
+            if (entry.decl == decl) break cur_offset;
+            cur_offset += entry.len;
         } else unreachable; // offset() called on declaration that does not live inside 'data' section
     }
 
     /// Returns the total payload size of the data section
     pub fn size(self: DataSection) u32 {
         var total: u32 = 0;
-        for (self.segments.items()) |entry| {
-            total += entry.value.len;
+        for (self.segments.items) |entry| {
+            total += entry.len;
         }
         return total;
     }
@@ -59,8 +65,17 @@ pub const DataSection = struct {
     /// It's illegal behaviour to call this before allocateDeclIndexes was called
     /// `data` must be managed externally with a lifetime that last as long as codegen does.
     pub fn updateData(self: DataSection, decl: *Module.Decl, data: []const u8) void {
-        const entry = self.segments.getEntry(decl).?; // called updateData before the declaration was added to data segments
-        entry.value.data = data.ptr;
+        const entry = for (self.segments.items) |*item| {
+            if (item.decl == decl) break item;
+        } else unreachable; // called updateData before the declaration was added to data segments
+        entry.data = data.ptr;
+    }
+
+    /// Returns the index of a declaration and `null` when not found
+    pub fn idx(self: DataSection, decl: *Module.Decl) ?usize {
+        return for (self.segments.items) |entry, i| {
+            if (entry.decl == decl) break i;
+        } else null;
     }
 };
 
@@ -129,9 +144,10 @@ pub fn deinit(self: *Wasm) void {
         decl.fn_link.wasm.code.deinit(self.base.allocator);
         decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
     }
-    for (self.data.segments.items()) |entry| {
-        // data segments only use the code section
-        entry.key.fn_link.wasm.code.deinit(self.base.allocator);
+    for (self.data.segments.items) |entry| {
+        entry.decl.fn_link.wasm.functype.deinit(self.base.allocator);
+        entry.decl.fn_link.wasm.code.deinit(self.base.allocator);
+        entry.decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
     }
     self.funcs.deinit(self.base.allocator);
     self.ext_funcs.deinit(self.base.allocator);
@@ -139,7 +155,6 @@ pub fn deinit(self: *Wasm) void {
 }
 
 pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
-    std.debug.print("INIT: '{s}'\n", .{decl.name});
     const tv = decl.typed_value.most_recent.typed_value;
     decl.fn_link.wasm = .{};
 
@@ -149,7 +164,22 @@ pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
             // we must calculate its data length now so that the data offsets are available
             // to other decls when called
             const data_len = calcDataLen(self, tv) catch return error.AnalysisFail;
-            try self.data.segments.putNoClobber(self.base.allocator, decl, .{ .data = undefined, .len = data_len });
+            try self.data.segments.append(self.base.allocator, .{
+                .decl = decl,
+                .data = undefined,
+                .len = data_len,
+            });
+
+            // detect if we can replace it into a to-be-deleted decl's spot to ensure no gaps are
+            // made in our data segment
+            const idx: ?usize = for (self.data.segments.items) |entry, i| {
+                if (entry.decl.deletion_flag) break i;
+            } else null;
+            if (idx) |id| {
+                const old_decl = self.data.segments.swapRemove(id); // current decl is now in to-be-deleted decl's spot
+                // re-append to end of list so it can be cleaned up by `freeDecl`
+                try self.data.segments.append(self.base.allocator, old_decl);
+            }
         },
         .Fn => if (self.getFuncidx(decl) == null) switch (tv.val.tag()) {
             // dependent on function type, appends it to the correct list
@@ -193,7 +223,6 @@ fn calcDataLen(bin_file: *Wasm, typed_value: TypedValue) DataLenError!u32 {
 // Generate code for the Decl, storing it in memory to be later written to
 // the file on flush().
 pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
-    std.debug.print("Updating '{s}'\n", .{decl.name});
     const typed_value = decl.typed_value.most_recent.typed_value;
 
     const fn_data = &decl.fn_link.wasm;
@@ -262,10 +291,12 @@ pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
             else => unreachable,
         }
     }
+    if (self.data.idx(decl)) |idx| {
+        _ = self.data.segments.swapRemove(idx);
+    }
     decl.fn_link.wasm.functype.deinit(self.base.allocator);
     decl.fn_link.wasm.code.deinit(self.base.allocator);
     decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
-    _ = self.data.segments.orderedRemove(decl);
     decl.fn_link.wasm = undefined;
 }
 
@@ -468,7 +499,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
         try leb.writeULEB128(writer, self.data.size());
 
         // write payload
-        for (self.data.segments.items()) |entry| try writer.writeAll(entry.value.data[0..entry.value.len]);
+        for (self.data.segments.items) |entry| try writer.writeAll(entry.data[0..entry.len]);
 
         try writeVecSectionHeader(
             file,
@@ -742,7 +773,7 @@ fn linkWithLLD(self: *Wasm, comp: *Compilation) !void {
 /// Get the current index of a given Decl in the function list
 /// This will correctly provide the index, regardless whether the function is extern or not
 /// TODO: we could maintain a hash map to potentially make this simpler
-fn getFuncidx(self: Wasm, decl: *const Module.Decl) ?u32 {
+fn getFuncidx(self: Wasm, decl: *Module.Decl) ?u32 {
     var offset: u32 = 0;
     const slice = switch (decl.typed_value.most_recent.typed_value.val.tag()) {
         .function => blk: {
-- 
cgit v1.2.3


From 47f36427887fc7d1646cfb7ed8eeeeb14cd3555b Mon Sep 17 00:00:00 2001
From: Luuk de Gram <Luukdegram@users.noreply.github.com>
Date: Sun, 4 Apr 2021 20:31:35 +0200
Subject: Cleanup

---
 src/codegen/wasm.zig | 10 ++++-----
 src/link/Wasm.zig    | 59 ++++++++++++++++++++++++++--------------------------
 2 files changed, 35 insertions(+), 34 deletions(-)

(limited to 'src')

diff --git a/src/codegen/wasm.zig b/src/codegen/wasm.zig
index 3e20f94d6f..a5069de956 100644
--- a/src/codegen/wasm.zig
+++ b/src/codegen/wasm.zig
@@ -622,9 +622,9 @@ pub const Context = struct {
                 // Write instructions
                 // TODO: check for and handle death of instructions
                 const mod_fn = blk: {
-                    if (tv.val.castTag(.function)) |func| break :blk func.data;
-                    if (tv.val.castTag(.extern_fn)) |ext_fn| return Result.appended; // don't need code body for extern functions
-                    return self.fail(.{ .node_offset = 0 }, "TODO: Wasm codegen for decl type '{s}'", .{tv.ty.tag()});
+                    if (typed_value.val.castTag(.function)) |func| break :blk func.data;
+                    if (typed_value.val.castTag(.extern_fn)) |ext_fn| return Result.appended; // don't need code body for extern functions
+                    return self.fail(.{ .node_offset = 0 }, "TODO: Wasm codegen for decl type '{s}'", .{typed_value.ty.tag()});
                 };
 
                 // Reserve space to write the size after generating the code as well as space for locals count
@@ -686,9 +686,9 @@ pub const Context = struct {
                     try self.code.append(@intCast(u8, int_byte));
                     return Result.appended;
                 }
-                return self.fail(self.decl.src(), "TODO: Implement codegen for int type: '{}'", .{typed_value.ty});
+                return self.fail(.{ .node_offset = 0 }, "TODO: Implement codegen for int type: '{}'", .{typed_value.ty});
             },
-            else => |tag| return self.fail(self.decl.src(), "TODO: Implement zig type codegen for type: '{s}'", .{tag}),
+            else => |tag| return self.fail(.{ .node_offset = 0 }, "TODO: Implement zig type codegen for type: '{s}'", .{tag}),
         }
     }
 
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index e94f61d54a..5f878ca5ac 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -38,8 +38,11 @@ pub const DataSection = struct {
     /// Every data object will be appended to this list,
     /// containing its `Decl`, the data in bytes, and its length.
     segments: std.ArrayListUnmanaged(struct {
+        /// The decl that lives inside the 'data' section such as an array
         decl: *Module.Decl,
+        /// The contents of the data in bytes
         data: [*]const u8,
+        /// The length of the contents inside the 'data' section
         len: u32,
     }) = .{},
 
@@ -72,7 +75,7 @@ pub const DataSection = struct {
     }
 
     /// Returns the index of a declaration and `null` when not found
-    pub fn idx(self: DataSection, decl: *Module.Decl) ?usize {
+    pub fn getIdx(self: DataSection, decl: *Module.Decl) ?usize {
         return for (self.segments.items) |entry, i| {
             if (entry.decl == decl) break i;
         } else null;
@@ -145,9 +148,8 @@ pub fn deinit(self: *Wasm) void {
         decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
     }
     for (self.data.segments.items) |entry| {
-        entry.decl.fn_link.wasm.functype.deinit(self.base.allocator);
+        // decl's that live in data section do not generate idx_refs or func types
         entry.decl.fn_link.wasm.code.deinit(self.base.allocator);
-        entry.decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
     }
     self.funcs.deinit(self.base.allocator);
     self.ext_funcs.deinit(self.base.allocator);
@@ -155,15 +157,14 @@ pub fn deinit(self: *Wasm) void {
 }
 
 pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
-    const tv = decl.typed_value.most_recent.typed_value;
-    decl.fn_link.wasm = .{};
+    const typed_value = decl.typed_value.most_recent.typed_value;
 
-    switch (tv.ty.zigTypeTag()) {
+    switch (typed_value.ty.zigTypeTag()) {
         .Array => {
             // if the codegen of the given decl contributes to the data segment
             // we must calculate its data length now so that the data offsets are available
             // to other decls when called
-            const data_len = calcDataLen(self, tv) catch return error.AnalysisFail;
+            const data_len = self.calcDataLen(typed_value);
             try self.data.segments.append(self.base.allocator, .{
                 .decl = decl,
                 .data = undefined,
@@ -175,13 +176,18 @@ pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
             const idx: ?usize = for (self.data.segments.items) |entry, i| {
                 if (entry.decl.deletion_flag) break i;
             } else null;
+
             if (idx) |id| {
-                const old_decl = self.data.segments.swapRemove(id); // current decl is now in to-be-deleted decl's spot
-                // re-append to end of list so it can be cleaned up by `freeDecl`
-                try self.data.segments.append(self.base.allocator, old_decl);
+                // swap to-be-removed decl with newly added to create a contigious valid data segment
+                const items = self.data.segments.items;
+                std.mem.swap(
+                    std.meta.Child(@TypeOf(items)),
+                    &items[id],
+                    &items[items.len - 1],
+                );
             }
         },
-        .Fn => if (self.getFuncidx(decl) == null) switch (tv.val.tag()) {
+        .Fn => if (self.getFuncidx(decl) == null) switch (typed_value.val.tag()) {
             // dependent on function type, appends it to the correct list
             .function => try self.funcs.append(self.base.allocator, decl),
             .extern_fn => try self.ext_funcs.append(self.base.allocator, decl),
@@ -191,32 +197,26 @@ pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
     }
 }
 
-// TODO, remove this and use the existing error mechanism
-const DataLenError = error{
-    TODO_WASM_CalcDataLenArray,
-    TODO_WASM_CalcDataLen,
-};
 /// Calculates the length of the data segment that will be occupied by the given `TypedValue`
-fn calcDataLen(bin_file: *Wasm, typed_value: TypedValue) DataLenError!u32 {
+fn calcDataLen(self: *Wasm, typed_value: TypedValue) u32 {
     switch (typed_value.ty.zigTypeTag()) {
         .Array => {
             if (typed_value.val.castTag(.bytes)) |payload| {
                 if (typed_value.ty.sentinel()) |sentinel| {
-                    return @intCast(u32, payload.data.len) + try calcDataLen(bin_file, .{
+                    return @intCast(u32, payload.data.len) + self.calcDataLen(.{
                         .ty = typed_value.ty.elemType(),
                         .val = sentinel,
                     });
                 }
-                return @intCast(u32, payload.data.len);
             }
-            return error.TODO_WASM_CalcDataLenArray;
+            return @intCast(u32, typed_value.ty.arrayLen());
         },
         .Int => {
-            const info = typed_value.ty.intInfo(bin_file.base.options.target);
-            return info.bits / 8;
+            const info = typed_value.ty.intInfo(self.base.options.target);
+            return std.math.divCeil(u32, info.bits, 8) catch unreachable;
         },
         .Pointer => return 4,
-        else => return error.TODO_WASM_CalcDataLen,
+        else => unreachable,
     }
 }
 
@@ -256,7 +256,7 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
         .Fn => {
             // as locals are patched afterwards, the offsets of funcidx's are off,
             // here we update them to correct them
-            for (decl.fn_link.wasm.idx_refs.items) |*func| {
+            for (fn_data.idx_refs.items) |*func| {
                 // For each local, add 6 bytes (count + type)
                 func.offset += @intCast(u32, context.locals.items.len * 6);
             }
@@ -291,7 +291,7 @@ pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
             else => unreachable,
         }
     }
-    if (self.data.idx(decl)) |idx| {
+    if (self.data.getIdx(decl)) |idx| {
         _ = self.data.segments.swapRemove(idx);
     }
     decl.fn_link.wasm.functype.deinit(self.base.allocator);
@@ -314,6 +314,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
 
     const file = self.base.file.?;
     const header_size = 5 + 1;
+    const data_size = self.data.size();
 
     // No need to rewrite the magic/version header
     try file.setEndPos(@sizeOf(@TypeOf(wasm.magic ++ wasm.version)));
@@ -384,7 +385,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
     }
 
     // Memory section
-    if (self.data.size() != 0) {
+    if (data_size != 0) {
         const header_offset = try reserveVecSectionHeader(file);
         const writer = file.writer();
 
@@ -434,7 +435,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
         }
 
         // export memory if size is not 0
-        if (self.data.size() != 0) {
+        if (data_size != 0) {
             try leb.writeULEB128(writer, @intCast(u32, "memory".len));
             try writer.writeAll("memory");
             try writer.writeByte(wasm.externalKind(.memory));
@@ -483,7 +484,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
     }
 
     // Data section
-    if (self.data.size() != 0) {
+    if (data_size != 0) {
         const header_offset = try reserveVecSectionHeader(file);
         const writer = file.writer();
         var len: u32 = 0;
@@ -496,7 +497,7 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
         try writer.writeByte(wasm.opcode(.end));
 
         // payload size
-        try leb.writeULEB128(writer, self.data.size());
+        try leb.writeULEB128(writer, data_size);
 
         // write payload
         for (self.data.segments.items) |entry| try writer.writeAll(entry.data[0..entry.len]);
-- 
cgit v1.2.3


From ff5774d93d9d952a74fab3666d4480f534c770db Mon Sep 17 00:00:00 2001
From: Luuk de Gram <Luukdegram@users.noreply.github.com>
Date: Thu, 8 Apr 2021 22:44:29 +0200
Subject: Refactor link/wasm.zig to use offset table

This refactor inserts an offset table into wasm's data section
where each offset points to the actual data region.
This means we can keep offset indexes consistant and do not
have to perform any computer to determine where in the data section
something like a static string exists. Instead during runtime
it will load the data offset onto the stack.
---
 lib/std/wasm.zig     |   3 +
 src/Module.zig       |  13 +-
 src/codegen/wasm.zig |  39 +++++-
 src/link.zig         |   2 +-
 src/link/Wasm.zig    | 357 ++++++++++++++++++++++++++++-----------------------
 5 files changed, 238 insertions(+), 176 deletions(-)

(limited to 'src')

diff --git a/lib/std/wasm.zig b/lib/std/wasm.zig
index 89ab9b6e12..ad6b947f67 100644
--- a/lib/std/wasm.zig
+++ b/lib/std/wasm.zig
@@ -280,3 +280,6 @@ pub const block_empty: u8 = 0x40;
 // binary constants
 pub const magic = [_]u8{ 0x00, 0x61, 0x73, 0x6D }; // \0asm
 pub const version = [_]u8{ 0x01, 0x00, 0x00, 0x00 }; // version 1
+
+// Each wasm page size is 64kB
+pub const page_size = 64 * 1024;
diff --git a/src/Module.zig b/src/Module.zig
index 8360b3245b..346760728e 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -3029,9 +3029,12 @@ fn astgenAndSemaVarDecl(
         };
         defer gen_scope.instructions.deinit(mod.gpa);
 
-        const init_result_loc: AstGen.ResultLoc = if (var_decl.ast.type_node != 0) .{
-            .ty = try AstGen.expr(&gen_scope, &gen_scope.base, .{ .ty = .type_type }, var_decl.ast.type_node),
-        } else .none;
+        const init_result_loc: AstGen.ResultLoc = if (var_decl.ast.type_node != 0)
+            .{
+                .ty = try AstGen.expr(&gen_scope, &gen_scope.base, .{ .ty = .type_type }, var_decl.ast.type_node),
+            }
+        else
+            .none;
 
         const init_inst = try AstGen.comptimeExpr(
             &gen_scope,
@@ -3834,7 +3837,7 @@ fn allocateNewDecl(
             .elf => .{ .elf = link.File.Elf.TextBlock.empty },
             .macho => .{ .macho = link.File.MachO.TextBlock.empty },
             .c => .{ .c = link.File.C.DeclBlock.empty },
-            .wasm => .{ .wasm = {} },
+            .wasm => .{ .wasm = link.File.Wasm.DeclBlock.empty },
             .spirv => .{ .spirv = {} },
         },
         .fn_link = switch (mod.comp.bin_file.tag) {
@@ -3842,7 +3845,7 @@ fn allocateNewDecl(
             .elf => .{ .elf = link.File.Elf.SrcFn.empty },
             .macho => .{ .macho = link.File.MachO.SrcFn.empty },
             .c => .{ .c = link.File.C.FnBlock.empty },
-            .wasm => .{ .wasm = .{} },
+            .wasm => .{ .wasm = link.File.Wasm.FnData.empty },
             .spirv => .{ .spirv = .{} },
         },
         .generation = 0,
diff --git a/src/codegen/wasm.zig b/src/codegen/wasm.zig
index a5069de956..400a5cd1a3 100644
--- a/src/codegen/wasm.zig
+++ b/src/codegen/wasm.zig
@@ -543,11 +543,20 @@ pub const Context = struct {
 
     /// Using a given `Type`, returns the corresponding wasm Valtype
     fn typeToValtype(self: *Context, src: LazySrcLoc, ty: Type) InnerError!wasm.Valtype {
-        return switch (ty.tag()) {
-            .f32 => .f32,
-            .f64 => .f64,
-            .u32, .i32, .bool => .i32,
-            .u64, .i64 => .i64,
+        return switch (ty.zigTypeTag()) {
+            .Float => blk: {
+                const bits = ty.floatBits(self.target);
+                if (bits == 16 or bits == 32) break :blk wasm.Valtype.f32;
+                if (bits == 64) break :blk wasm.Valtype.f64;
+                return self.fail(src, "Float bit size not supported by wasm: '{d}'", .{bits});
+            },
+            .Int => blk: {
+                const info = ty.intInfo(self.target);
+                if (info.bits <= 32) break :blk wasm.Valtype.i32;
+                if (info.bits > 32 and info.bits <= 64) break :blk wasm.Valtype.i64;
+                return self.fail(src, "Integer bit size not supported by wasm: '{d}'", .{info.bits});
+            },
+            .Bool, .Pointer => wasm.Valtype.i32,
             else => self.fail(src, "TODO - Wasm valtype for type '{s}'", .{ty.tag()}),
         };
     }
@@ -624,7 +633,7 @@ pub const Context = struct {
                 const mod_fn = blk: {
                     if (typed_value.val.castTag(.function)) |func| break :blk func.data;
                     if (typed_value.val.castTag(.extern_fn)) |ext_fn| return Result.appended; // don't need code body for extern functions
-                    return self.fail(.{ .node_offset = 0 }, "TODO: Wasm codegen for decl type '{s}'", .{typed_value.ty.tag()});
+                    unreachable;
                 };
 
                 // Reserve space to write the size after generating the code as well as space for locals count
@@ -680,7 +689,7 @@ pub const Context = struct {
                 } else return self.fail(.{ .node_offset = 0 }, "TODO implement gen for more kinds of arrays", .{});
             },
             .Int => {
-                const info = typed_value.ty.intInfo(self.bin_file.base.options.target);
+                const info = typed_value.ty.intInfo(self.target);
                 if (info.bits == 8 and info.signedness == .unsigned) {
                     const int_byte = typed_value.val.toUnsignedInt();
                     try self.code.append(@intCast(u8, int_byte));
@@ -856,6 +865,22 @@ pub const Context = struct {
                     else => |bits| return self.fail(inst.base.src, "Wasm TODO: emitConstant for float with {d} bits", .{bits}),
                 }
             },
+            .Pointer => {
+                if (inst.val.castTag(.decl_ref)) |payload| {
+                    const decl = payload.data;
+
+                    // offset into the offset table within the 'data' section
+                    const ptr_width = self.target.cpu.arch.ptrBitWidth() / 8;
+                    try writer.writeByte(wasm.opcode(.i32_const));
+                    try leb.writeULEB128(writer, decl.link.wasm.offset_index * ptr_width);
+
+                    // memory instruction followed by their memarg immediate
+                    // memarg ::== x:u32, y:u32 => {align x, offset y}
+                    try writer.writeByte(wasm.opcode(.i32_load));
+                    try leb.writeULEB128(writer, @as(u32, 0));
+                    try leb.writeULEB128(writer, @as(u32, 0));
+                } else return self.fail(inst.base.src, "Wasm TODO: emitConstant for other const pointer tag {s}", .{inst.val.tag()});
+            },
             .Void => {},
             else => |ty| return self.fail(inst.base.src, "Wasm TODO: emitConstant for zigTypeTag {s}", .{ty}),
         }
diff --git a/src/link.zig b/src/link.zig
index 162b55a0d0..c0f9a50b2b 100644
--- a/src/link.zig
+++ b/src/link.zig
@@ -138,7 +138,7 @@ pub const File = struct {
         coff: Coff.TextBlock,
         macho: MachO.TextBlock,
         c: C.DeclBlock,
-        wasm: void,
+        wasm: Wasm.DeclBlock,
         spirv: void,
     };
 
diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 5f878ca5ac..2186afb4a6 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -20,70 +20,7 @@ const TypedValue = @import("../TypedValue.zig");
 
 pub const base_tag = link.File.Tag.wasm;
 
-pub const FnData = struct {
-    /// Generated code for the type of the function
-    functype: std.ArrayListUnmanaged(u8) = .{},
-    /// Generated code for the body of the function
-    code: std.ArrayListUnmanaged(u8) = .{},
-    /// Locations in the generated code where function indexes must be filled in.
-    /// This must be kept ordered by offset.
-    idx_refs: std.ArrayListUnmanaged(struct { offset: u32, decl: *Module.Decl }) = .{},
-};
-
-/// Data section of the wasm binary
-/// Each declaration will have its own 'data_segment' within the section
-/// where the offset is calculated using the previous segments and the content length
-/// of the data
-pub const DataSection = struct {
-    /// Every data object will be appended to this list,
-    /// containing its `Decl`, the data in bytes, and its length.
-    segments: std.ArrayListUnmanaged(struct {
-        /// The decl that lives inside the 'data' section such as an array
-        decl: *Module.Decl,
-        /// The contents of the data in bytes
-        data: [*]const u8,
-        /// The length of the contents inside the 'data' section
-        len: u32,
-    }) = .{},
-
-    /// Returns the offset into the data segment based on a given `Decl`
-    pub fn offset(self: DataSection, decl: *const Module.Decl) u32 {
-        var cur_offset: u32 = 0;
-        return for (self.segments.items) |entry| {
-            if (entry.decl == decl) break cur_offset;
-            cur_offset += entry.len;
-        } else unreachable; // offset() called on declaration that does not live inside 'data' section
-    }
-
-    /// Returns the total payload size of the data section
-    pub fn size(self: DataSection) u32 {
-        var total: u32 = 0;
-        for (self.segments.items) |entry| {
-            total += entry.len;
-        }
-        return total;
-    }
-
-    /// Updates the data in the data segment belonging to the given decl.
-    /// It's illegal behaviour to call this before allocateDeclIndexes was called
-    /// `data` must be managed externally with a lifetime that last as long as codegen does.
-    pub fn updateData(self: DataSection, decl: *Module.Decl, data: []const u8) void {
-        const entry = for (self.segments.items) |*item| {
-            if (item.decl == decl) break item;
-        } else unreachable; // called updateData before the declaration was added to data segments
-        entry.data = data.ptr;
-    }
-
-    /// Returns the index of a declaration and `null` when not found
-    pub fn getIdx(self: DataSection, decl: *Module.Decl) ?usize {
-        return for (self.segments.items) |entry, i| {
-            if (entry.decl == decl) break i;
-        } else null;
-    }
-};
-
 base: link.File,
-
 /// List of all function Decls to be written to the output file. The index of
 /// each Decl in this list at the time of writing the binary is used as the
 /// function index. In the event where ext_funcs' size is not 0, the index of
@@ -98,10 +35,67 @@ ext_funcs: std.ArrayListUnmanaged(*Module.Decl) = .{},
 /// to support existing code.
 /// TODO: Allow setting this through a flag?
 host_name: []const u8 = "env",
-/// Map of declarations with its bytes payload, used to keep track of all data segments
-/// that needs to be emit when creating the wasm binary.
-/// The `DataSection`'s lifetime must be kept alive until the linking stage.
-data: DataSection = .{},
+/// The last `DeclBlock` that was initialized will be saved here.
+last_block: ?*DeclBlock = null,
+/// Table with offsets, each element represents an offset with the value being
+/// the offset into the 'data' section where the data lives
+offset_table: std.ArrayListUnmanaged(u32) = .{},
+/// List of offset indexes which are free to be used for new decl's.
+/// Each element's value points to an index into the offset_table.
+offset_table_free_list: std.ArrayListUnmanaged(u32) = .{},
+/// List of all `Decl` that are currently alive.
+/// This is ment for bookkeeping so we can safely cleanup all codegen memory
+/// when calling `deinit`
+symbols: std.ArrayListUnmanaged(*Module.Decl) = .{},
+/// Contains indexes into `symbols` that are no longer used and can be populated instead,
+/// removing the need to search for a symbol and remove it when it's dereferenced.
+symbols_free_list: std.ArrayListUnmanaged(u32) = .{},
+
+pub const FnData = struct {
+    /// Generated code for the type of the function
+    functype: std.ArrayListUnmanaged(u8),
+    /// Generated code for the body of the function
+    code: std.ArrayListUnmanaged(u8),
+    /// Locations in the generated code where function indexes must be filled in.
+    /// This must be kept ordered by offset.
+    idx_refs: std.ArrayListUnmanaged(struct { offset: u32, decl: *Module.Decl }),
+
+    pub const empty: FnData = .{
+        .functype = .{},
+        .code = .{},
+        .idx_refs = .{},
+    };
+};
+
+pub const DeclBlock = struct {
+    /// Determines whether the `DeclBlock` has been initialized for codegen.
+    init: bool,
+    /// Index into the `symbols` list.
+    symbol_index: u32,
+    /// Index into the offset table
+    offset_index: u32,
+    /// The size of the block and how large part of the data section it occupies.
+    /// Will be 0 when the Decl will not live inside the data section and `data` will be undefined.
+    size: u32,
+    /// Points to the previous and next blocks.
+    /// Can be used to find the total size, and used to calculate the `offset` based on the previous block.
+    prev: ?*DeclBlock,
+    next: ?*DeclBlock,
+    /// Pointer to data that will be written to the 'data' section.
+    /// This data either lives in `FnData.code` or is externally managed.
+    /// For data that does not live inside the 'data' section, this field will be undefined. (size == 0).
+    data: [*]const u8,
+
+    pub const empty: DeclBlock = .{
+        .init = false,
+        .symbol_index = 0,
+        .offset_index = 0,
+        .size = 0,
+        .prev = null,
+        .next = null,
+        .data = undefined,
+    };
+};
 
 pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Options) !*Wasm {
     assert(options.object_format == .wasm);
@@ -137,94 +131,66 @@ pub fn createEmpty(gpa: *Allocator, options: link.Options) !*Wasm {
 }
 
 pub fn deinit(self: *Wasm) void {
-    for (self.funcs.items) |decl| {
-        decl.fn_link.wasm.functype.deinit(self.base.allocator);
-        decl.fn_link.wasm.code.deinit(self.base.allocator);
-        decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
+    while (self.symbols_free_list.popOrNull()) |idx| {
+        //dead decl's so remove them from symbol list before trying to clean them up
+        _ = self.symbols.swapRemove(idx);
     }
-    for (self.ext_funcs.items) |decl| {
+    for (self.symbols.items) |decl| {
         decl.fn_link.wasm.functype.deinit(self.base.allocator);
         decl.fn_link.wasm.code.deinit(self.base.allocator);
         decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
     }
-    for (self.data.segments.items) |entry| {
-        // decl's that live in data section do not generate idx_refs or func types
-        entry.decl.fn_link.wasm.code.deinit(self.base.allocator);
-    }
+
     self.funcs.deinit(self.base.allocator);
     self.ext_funcs.deinit(self.base.allocator);
-    self.data.segments.deinit(self.base.allocator);
+    self.offset_table.deinit(self.base.allocator);
+    self.offset_table_free_list.deinit(self.base.allocator);
+    self.symbols.deinit(self.base.allocator);
+    self.symbols_free_list.deinit(self.base.allocator);
 }
 
 pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
-    const typed_value = decl.typed_value.most_recent.typed_value;
+    if (decl.link.wasm.init) return;
 
-    switch (typed_value.ty.zigTypeTag()) {
-        .Array => {
-            // if the codegen of the given decl contributes to the data segment
-            // we must calculate its data length now so that the data offsets are available
-            // to other decls when called
-            const data_len = self.calcDataLen(typed_value);
-            try self.data.segments.append(self.base.allocator, .{
-                .decl = decl,
-                .data = undefined,
-                .len = data_len,
-            });
+    try self.offset_table.ensureCapacity(self.base.allocator, self.offset_table.items.len + 1);
+    try self.symbols.ensureCapacity(self.base.allocator, self.symbols.items.len + 1);
 
-            // detect if we can replace it into a to-be-deleted decl's spot to ensure no gaps are
-            // made in our data segment
-            const idx: ?usize = for (self.data.segments.items) |entry, i| {
-                if (entry.decl.deletion_flag) break i;
-            } else null;
-
-            if (idx) |id| {
-                // swap to-be-removed decl with newly added to create a contigious valid data segment
-                const items = self.data.segments.items;
-                std.mem.swap(
-                    std.meta.Child(@TypeOf(items)),
-                    &items[id],
-                    &items[items.len - 1],
-                );
-            }
-        },
-        .Fn => if (self.getFuncidx(decl) == null) switch (typed_value.val.tag()) {
+    const block = &decl.link.wasm;
+    block.init = true;
+
+    if (self.symbols_free_list.popOrNull()) |index| {
+        block.symbol_index = index;
+    } else {
+        block.symbol_index = @intCast(u32, self.symbols.items.len);
+        _ = self.symbols.addOneAssumeCapacity();
+    }
+
+    if (self.offset_table_free_list.popOrNull()) |index| {
+        block.offset_index = index;
+    } else {
+        block.offset_index = @intCast(u32, self.offset_table.items.len);
+        _ = self.offset_table.addOneAssumeCapacity();
+    }
+
+    self.offset_table.items[block.offset_index] = 0;
+
+    const typed_value = decl.typed_value.most_recent.typed_value;
+    if (typed_value.ty.zigTypeTag() == .Fn) {
+        switch (typed_value.val.tag()) {
             // dependent on function type, appends it to the correct list
             .function => try self.funcs.append(self.base.allocator, decl),
             .extern_fn => try self.ext_funcs.append(self.base.allocator, decl),
             else => unreachable,
-        },
-        else => {},
-    }
-}
-
-/// Calculates the length of the data segment that will be occupied by the given `TypedValue`
-fn calcDataLen(self: *Wasm, typed_value: TypedValue) u32 {
-    switch (typed_value.ty.zigTypeTag()) {
-        .Array => {
-            if (typed_value.val.castTag(.bytes)) |payload| {
-                if (typed_value.ty.sentinel()) |sentinel| {
-                    return @intCast(u32, payload.data.len) + self.calcDataLen(.{
-                        .ty = typed_value.ty.elemType(),
-                        .val = sentinel,
-                    });
-                }
-            }
-            return @intCast(u32, typed_value.ty.arrayLen());
-        },
-        .Int => {
-            const info = typed_value.ty.intInfo(self.base.options.target);
-            return std.math.divCeil(u32, info.bits, 8) catch unreachable;
-        },
-        .Pointer => return 4,
-        else => unreachable,
+        }
     }
 }
 
 // Generate code for the Decl, storing it in memory to be later written to
 // the file on flush().
 pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
-    const typed_value = decl.typed_value.most_recent.typed_value;
+    std.debug.assert(decl.link.wasm.init); // Must call allocateDeclIndexes()
 
+    const typed_value = decl.typed_value.most_recent.typed_value;
     const fn_data = &decl.fn_link.wasm;
     fn_data.functype.items.len = 0;
     fn_data.code.items.len = 0;
@@ -252,28 +218,43 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
         else => |e| return err,
     };
 
-    switch (typed_value.ty.zigTypeTag()) {
-        .Fn => {
-            // as locals are patched afterwards, the offsets of funcidx's are off,
-            // here we update them to correct them
-            for (fn_data.idx_refs.items) |*func| {
-                // For each local, add 6 bytes (count + type)
-                func.offset += @intCast(u32, context.locals.items.len * 6);
-            }
+    const code: []const u8 = switch (result) {
+        .appended => @as([]const u8, context.code.items),
+        .externally_managed => |payload| payload,
+    };
 
-            fn_data.functype = context.func_type_data.toUnmanaged();
-            fn_data.code = context.code.toUnmanaged();
-        },
-        .Array => switch (result) {
-            .appended => {
-                fn_data.functype = context.func_type_data.toUnmanaged();
-                fn_data.code = context.code.toUnmanaged();
-                self.data.updateData(decl, fn_data.code.items);
-            },
-            .externally_managed => |payload| self.data.updateData(decl, payload),
-        },
-        else => return error.TODO,
+    fn_data.code = context.code.toUnmanaged();
+    fn_data.functype = context.func_type_data.toUnmanaged();
+
+    const block = &decl.link.wasm;
+    if (typed_value.ty.zigTypeTag() == .Fn) {
+        // as locals are patched afterwards, the offsets of funcidx's are off,
+        // here we update them to correct them
+        for (fn_data.idx_refs.items) |*func| {
+            // For each local, add 6 bytes (count + type)
+            func.offset += @intCast(u32, context.locals.items.len * 6);
+        }
+    } else {
+        block.size = @intCast(u32, code.len);
+        block.data = code.ptr;
+    }
+
+    // If we're updating an existing decl, unplug it first
+    // to avoid infinite loops due to earlier links
+    if (block.prev) |prev| {
+        prev.next = block.next;
+    }
+    if (block.next) |next| {
+        next.prev = block.prev;
     }
+
+    if (self.last_block) |last| {
+        if (last != block) {
+            last.next = block;
+            block.prev = last;
+        }
+    }
+    self.last_block = block;
 }
 
 pub fn updateDeclExports(
@@ -291,9 +272,24 @@ pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
             else => unreachable,
         }
     }
-    if (self.data.getIdx(decl)) |idx| {
-        _ = self.data.segments.swapRemove(idx);
+    const block = &decl.link.wasm;
+
+    if (self.last_block == block) {
+        self.last_block = block.prev;
+    }
+
+    if (block.prev) |prev| {
+        prev.next = block.next;
     }
+
+    if (block.next) |next| {
+        next.prev = block.prev;
+    }
+
+    self.offset_table_free_list.append(self.base.allocator, decl.link.wasm.offset_index) catch {};
+    self.symbols_free_list.append(self.base.allocator, decl.link.wasm.symbol_index) catch {};
+    block.init = false;
+
     decl.fn_link.wasm.functype.deinit(self.base.allocator);
     decl.fn_link.wasm.code.deinit(self.base.allocator);
     decl.fn_link.wasm.idx_refs.deinit(self.base.allocator);
@@ -314,7 +310,25 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
 
     const file = self.base.file.?;
     const header_size = 5 + 1;
-    const data_size = self.data.size();
+    // ptr_width in bytes
+    const ptr_width = self.base.options.target.cpu.arch.ptrBitWidth() / 8;
+    // The size of the offset table in bytes
+    // The table contains all decl's with its corresponding offset into
+    // the 'data' section
+    const offset_table_size = @intCast(u32, self.offset_table.items.len * ptr_width);
+
+    // The size of the data, this together with `offset_table_size` amounts to the
+    // total size of the 'data' section
+    var first_decl: ?*DeclBlock = null;
+    const data_size: u32 = if (self.last_block) |last| blk: {
+        var size = last.size;
+        var cur = last;
+        while (cur.prev) |prev| : (cur = prev) {
+            size += prev.size;
+        }
+        first_decl = cur;
+        break :blk size;
+    } else 0;
 
     // No need to rewrite the magic/version header
     try file.setEndPos(@sizeOf(@TypeOf(wasm.magic ++ wasm.version)));
@@ -396,8 +410,8 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
             writer,
             try std.math.divCeil(
                 u32,
-                self.data.size(),
-                std.mem.page_size,
+                offset_table_size + data_size,
+                std.wasm.page_size,
             ),
         );
         try writeVecSectionHeader(
@@ -496,18 +510,35 @@ pub fn flushModule(self: *Wasm, comp: *Compilation) !void {
         try leb.writeILEB128(writer, @as(i32, 0));
         try writer.writeByte(wasm.opcode(.end));
 
-        // payload size
-        try leb.writeULEB128(writer, data_size);
+        const total_size = offset_table_size + data_size;
 
-        // write payload
-        for (self.data.segments.items) |entry| try writer.writeAll(entry.data[0..entry.len]);
+        // offset table + data size
+        try leb.writeULEB128(writer, total_size);
 
+        // fill in the offset table and the data segments
+        const file_offset = try file.getPos();
+        var cur = first_decl;
+        var data_offset = offset_table_size;
+        while (cur) |cur_block| : (cur = cur_block.next) {
+            if (cur_block.size == 0) continue;
+            std.debug.assert(cur_block.init);
+
+            const offset = (cur_block.offset_index) * ptr_width;
+            var buf: [4]u8 = undefined;
+            std.mem.writeIntLittle(u32, &buf, data_offset);
+
+            try file.pwriteAll(&buf, file_offset + offset);
+            try file.pwriteAll(cur_block.data[0..cur_block.size], file_offset + data_offset);
+            data_offset += cur_block.size;
+        }
+
+        try file.seekTo(file_offset + data_offset);
         try writeVecSectionHeader(
             file,
             header_offset,
             .data,
-            @intCast(u32, (try file.getPos()) - header_offset - header_size),
-            @intCast(u32, 1),
+            @intCast(u32, (file_offset + data_offset) - header_offset - header_size),
+            @intCast(u32, 1), // only 1 data section
         );
     }
 }
-- 
cgit v1.2.3


From eaaf75c1579e0202efb1b8b71155ea147d52c56a Mon Sep 17 00:00:00 2001
From: Luuk de Gram <Luukdegram@users.noreply.github.com>
Date: Fri, 9 Apr 2021 09:24:52 +0200
Subject: Fix memory cleanup and update unplugging to avoid infinite loop

---
 src/link/Wasm.zig | 51 +++++++++++++++++++++++----------------------------
 1 file changed, 23 insertions(+), 28 deletions(-)

(limited to 'src')

diff --git a/src/link/Wasm.zig b/src/link/Wasm.zig
index 2186afb4a6..2dd15db1d4 100644
--- a/src/link/Wasm.zig
+++ b/src/link/Wasm.zig
@@ -47,9 +47,6 @@ offset_table_free_list: std.ArrayListUnmanaged(u32) = .{},
 /// This is ment for bookkeeping so we can safely cleanup all codegen memory
 /// when calling `deinit`
 symbols: std.ArrayListUnmanaged(*Module.Decl) = .{},
-/// Contains indexes into `symbols` that are no longer used and can be populated instead,
-/// removing the need to search for a symbol and remove it when it's dereferenced.
-symbols_free_list: std.ArrayListUnmanaged(u32) = .{},
 
 pub const FnData = struct {
     /// Generated code for the type of the function
@@ -95,6 +92,19 @@ pub const DeclBlock = struct {
         .next = null,
         .data = undefined,
     };
+
+    /// Unplugs the `DeclBlock` from the chain
+    fn unplug(self: *DeclBlock) void {
+        if (self.prev) |prev| {
+            prev.next = self.next;
+        }
+
+        if (self.next) |next| {
+            next.prev = self.prev;
+        }
+        self.next = null;
+        self.prev = null;
+    }
 };
 
 pub fn openPath(allocator: *Allocator, sub_path: []const u8, options: link.Options) !*Wasm {
@@ -131,10 +141,6 @@ pub fn createEmpty(gpa: *Allocator, options: link.Options) !*Wasm {
 }
 
 pub fn deinit(self: *Wasm) void {
-    while (self.symbols_free_list.popOrNull()) |idx| {
-        //dead decl's so remove them from symbol list before trying to clean them up
-        _ = self.symbols.swapRemove(idx);
-    }
     for (self.symbols.items) |decl| {
         decl.fn_link.wasm.functype.deinit(self.base.allocator);
         decl.fn_link.wasm.code.deinit(self.base.allocator);
@@ -146,7 +152,6 @@ pub fn deinit(self: *Wasm) void {
     self.offset_table.deinit(self.base.allocator);
     self.offset_table_free_list.deinit(self.base.allocator);
     self.symbols.deinit(self.base.allocator);
-    self.symbols_free_list.deinit(self.base.allocator);
 }
 
 pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
@@ -158,12 +163,8 @@ pub fn allocateDeclIndexes(self: *Wasm, decl: *Module.Decl) !void {
     const block = &decl.link.wasm;
     block.init = true;
 
-    if (self.symbols_free_list.popOrNull()) |index| {
-        block.symbol_index = index;
-    } else {
-        block.symbol_index = @intCast(u32, self.symbols.items.len);
-        _ = self.symbols.addOneAssumeCapacity();
-    }
+    block.symbol_index = @intCast(u32, self.symbols.items.len);
+    self.symbols.appendAssumeCapacity(decl);
 
     if (self.offset_table_free_list.popOrNull()) |index| {
         block.offset_index = index;
@@ -241,12 +242,7 @@ pub fn updateDecl(self: *Wasm, module: *Module, decl: *Module.Decl) !void {
 
     // If we're updating an existing decl, unplug it first
     // to avoid infinite loops due to earlier links
-    if (block.prev) |prev| {
-        prev.next = block.next;
-    }
-    if (block.next) |next| {
-        next.prev = block.prev;
-    }
+    block.unplug();
 
     if (self.last_block) |last| {
         if (last != block) {
@@ -278,16 +274,15 @@ pub fn freeDecl(self: *Wasm, decl: *Module.Decl) void {
         self.last_block = block.prev;
     }
 
-    if (block.prev) |prev| {
-        prev.next = block.next;
-    }
-
-    if (block.next) |next| {
-        next.prev = block.prev;
-    }
+    block.unplug();
 
     self.offset_table_free_list.append(self.base.allocator, decl.link.wasm.offset_index) catch {};
-    self.symbols_free_list.append(self.base.allocator, decl.link.wasm.symbol_index) catch {};
+    _ = self.symbols.swapRemove(block.symbol_index);
+
+    // update symbol_index as we swap removed the last symbol into the removed's position
+    if (block.symbol_index < self.symbols.items.len)
+        self.symbols.items[block.symbol_index].link.wasm.symbol_index = block.symbol_index;
+
     block.init = false;
 
     decl.fn_link.wasm.functype.deinit(self.base.allocator);
-- 
cgit v1.2.3