compiler: move self-hosted backends from src/arch to src/codegen

author: Alex Rønne Petersen <alex@alexrp.com> 2025-08-20 15:45:53 +0200
committer: Alex Rønne Petersen <alex@alexrp.com> 2025-09-26 02:02:07 +0200
commit: 86077fe6bdac34fe610f4c0b6bac3d6d1b97c22d (patch)
tree: d8f58b4d4e034d5770c816e886690387a1db7ffe /src/codegen/wasm/CodeGen.zig
parent: 212715f62d3b22a2da18904f570dbc918ca8470a (diff)
download: zig-86077fe6bdac34fe610f4c0b6bac3d6d1b97c22d.tar.gz
zig-86077fe6bdac34fe610f4c0b6bac3d6d1b97c22d.zip
1 files changed, 7578 insertions, 0 deletions
diff --git a/src/codegen/wasm/CodeGen.zig b/src/codegen/wasm/CodeGen.zig
new file mode 100644
index 0000000000..d8d8933cc3
--- /dev/null
+++ b/src/codegen/wasm/CodeGen.zig
@@ -0,0 +1,7578 @@
+const std = @import("std");
+const builtin = @import("builtin");
+const Allocator = std.mem.Allocator;
+const assert = std.debug.assert;
+const testing = std.testing;
+const mem = std.mem;
+const log = std.log.scoped(.codegen);
+
+const CodeGen = @This();
+const codegen = @import("../../codegen.zig");
+const Zcu = @import("../../Zcu.zig");
+const InternPool = @import("../../InternPool.zig");
+const Decl = Zcu.Decl;
+const Type = @import("../../Type.zig");
+const Value = @import("../../Value.zig");
+const Compilation = @import("../../Compilation.zig");
+const link = @import("../../link.zig");
+const Air = @import("../../Air.zig");
+const Mir = @import("Mir.zig");
+const abi = @import("../../codegen/wasm/abi.zig");
+const Alignment = InternPool.Alignment;
+const errUnionPayloadOffset = codegen.errUnionPayloadOffset;
+const errUnionErrorOffset = codegen.errUnionErrorOffset;
+
+const target_util = @import("../../target.zig");
+const libcFloatPrefix = target_util.libcFloatPrefix;
+const libcFloatSuffix = target_util.libcFloatSuffix;
+const compilerRtFloatAbbrev = target_util.compilerRtFloatAbbrev;
+const compilerRtIntAbbrev = target_util.compilerRtIntAbbrev;
+
+pub fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features {
+    return comptime &.initMany(&.{
+        .expand_intcast_safe,
+        .expand_int_from_float_safe,
+        .expand_int_from_float_optimized_safe,
+        .expand_add_safe,
+        .expand_sub_safe,
+        .expand_mul_safe,
+    });
+}
+
+/// Reference to the function declaration the code
+/// section belongs to
+owner_nav: InternPool.Nav.Index,
+/// Current block depth. Used to calculate the relative difference between a break
+/// and block
+block_depth: u32 = 0,
+air: Air,
+liveness: Air.Liveness,
+gpa: mem.Allocator,
+func_index: InternPool.Index,
+/// Contains a list of current branches.
+/// When we return from a branch, the branch will be popped from this list,
+/// which means branches can only contain references from within its own branch,
+/// or a branch higher (lower index) in the tree.
+branches: std.ArrayListUnmanaged(Branch) = .empty,
+/// Table to save `WValue`'s generated by an `Air.Inst`
+// values: ValueTable,
+/// Mapping from Air.Inst.Index to block ids
+blocks: std.AutoArrayHashMapUnmanaged(Air.Inst.Index, struct {
+    label: u32,
+    value: WValue,
+}) = .{},
+/// Maps `loop` instructions to their label. `br` to here repeats the loop.
+loops: std.AutoHashMapUnmanaged(Air.Inst.Index, u32) = .empty,
+/// The index the next local generated will have
+/// NOTE: arguments share the index with locals therefore the first variable
+/// will have the index that comes after the last argument's index
+local_index: u32,
+/// The index of the current argument.
+/// Used to track which argument is being referenced in `airArg`.
+arg_index: u32 = 0,
+/// List of simd128 immediates. Each value is stored as an array of bytes.
+/// This list will only be populated for 128bit-simd values when the target features
+/// are enabled also.
+simd_immediates: std.ArrayListUnmanaged([16]u8) = .empty,
+/// The Target we're emitting (used to call intInfo)
+target: *const std.Target,
+ptr_size: enum { wasm32, wasm64 },
+pt: Zcu.PerThread,
+/// List of MIR Instructions
+mir_instructions: std.MultiArrayList(Mir.Inst),
+/// Contains extra data for MIR
+mir_extra: std.ArrayListUnmanaged(u32),
+/// List of all locals' types generated throughout this declaration
+/// used to emit locals count at start of 'code' section.
+mir_locals: std.ArrayListUnmanaged(std.wasm.Valtype),
+/// Set of all UAVs referenced by this function. Key is the UAV value, value is the alignment.
+/// `.none` means naturally aligned. An explicit alignment is never less than the natural alignment.
+mir_uavs: std.AutoArrayHashMapUnmanaged(InternPool.Index, Alignment),
+/// Set of all functions whose address this function has taken and which therefore might be called
+/// via a `call_indirect` function.
+mir_indirect_function_set: std.AutoArrayHashMapUnmanaged(InternPool.Nav.Index, void),
+/// Set of all function types used by this function. These must be interned by the linker.
+mir_func_tys: std.AutoArrayHashMapUnmanaged(InternPool.Index, void),
+/// The number of `error_name_table_ref` instructions emitted.
+error_name_table_ref_count: u32,
+/// When a function is executing, we store the the current stack pointer's value within this local.
+/// This value is then used to restore the stack pointer to the original value at the return of the function.
+initial_stack_value: WValue = .none,
+/// The current stack pointer subtracted with the stack size. From this value, we will calculate
+/// all offsets of the stack values.
+bottom_stack_value: WValue = .none,
+/// Arguments of this function declaration
+/// This will be set after `resolveCallingConventionValues`
+args: []WValue,
+/// This will only be `.none` if the function returns void, or returns an immediate.
+/// When it returns a pointer to the stack, the `.local` tag will be active and must be populated
+/// before this function returns its execution to the caller.
+return_value: WValue,
+/// The size of the stack this function occupies. In the function prologue
+/// we will move the stack pointer by this number, forward aligned with the `stack_alignment`.
+stack_size: u32 = 0,
+/// The stack alignment, which is 16 bytes by default. This is specified by the
+/// tool-conventions: https://github.com/WebAssembly/tool-conventions/blob/main/BasicCABI.md
+/// and also what the llvm backend will emit.
+/// However, local variables or the usage of `incoming_stack_alignment` in a `CallingConvention` can overwrite this default.
+stack_alignment: Alignment = .@"16",
+
+// For each individual Wasm valtype we store a seperate free list which
+// allows us to re-use locals that are no longer used. e.g. a temporary local.
+/// A list of indexes which represents a local of valtype `i32`.
+/// It is illegal to store a non-i32 valtype in this list.
+free_locals_i32: std.ArrayListUnmanaged(u32) = .empty,
+/// A list of indexes which represents a local of valtype `i64`.
+/// It is illegal to store a non-i64 valtype in this list.
+free_locals_i64: std.ArrayListUnmanaged(u32) = .empty,
+/// A list of indexes which represents a local of valtype `f32`.
+/// It is illegal to store a non-f32 valtype in this list.
+free_locals_f32: std.ArrayListUnmanaged(u32) = .empty,
+/// A list of indexes which represents a local of valtype `f64`.
+/// It is illegal to store a non-f64 valtype in this list.
+free_locals_f64: std.ArrayListUnmanaged(u32) = .empty,
+/// A list of indexes which represents a local of valtype `v127`.
+/// It is illegal to store a non-v128 valtype in this list.
+free_locals_v128: std.ArrayListUnmanaged(u32) = .empty,
+
+/// When in debug mode, this tracks if no `finishAir` was missed.
+/// Forgetting to call `finishAir` will cause the result to not be
+/// stored in our `values` map and therefore cause bugs.
+air_bookkeeping: @TypeOf(bookkeeping_init) = bookkeeping_init,
+
+/// Wasm Value, created when generating an instruction
+const WValue = union(enum) {
+    /// `WValue` which has been freed and may no longer hold
+    /// any references.
+    dead: void,
+    /// May be referenced but is unused
+    none: void,
+    /// The value lives on top of the stack
+    stack: void,
+    /// Index of the local
+    local: struct {
+        /// Contains the index to the local
+        value: u32,
+        /// The amount of instructions referencing this `WValue`
+        references: u32,
+    },
+    /// An immediate 32bit value
+    imm32: u32,
+    /// An immediate 64bit value
+    imm64: u64,
+    /// Index into the list of simd128 immediates. This `WValue` is
+    /// only possible in very rare cases, therefore it would be
+    /// a waste of memory to store the value in a 128 bit integer.
+    imm128: u32,
+    /// A constant 32bit float value
+    float32: f32,
+    /// A constant 64bit float value
+    float64: f64,
+    nav_ref: struct {
+        nav_index: InternPool.Nav.Index,
+        offset: i32 = 0,
+    },
+    uav_ref: struct {
+        ip_index: InternPool.Index,
+        offset: i32 = 0,
+        orig_ptr_ty: InternPool.Index = .none,
+    },
+    /// Offset from the bottom of the virtual stack, with the offset
+    /// pointing to where the value lives.
+    stack_offset: struct {
+        /// Contains the actual value of the offset
+        value: u32,
+        /// The amount of instructions referencing this `WValue`
+        references: u32,
+    },
+
+    /// Returns the offset from the bottom of the stack. This is useful when
+    /// we use the load or store instruction to ensure we retrieve the value
+    /// from the correct position, rather than the value that lives at the
+    /// bottom of the stack. For instances where `WValue` is not `stack_value`
+    /// this will return 0, which allows us to simply call this function for all
+    /// loads and stores without requiring checks everywhere.
+    fn offset(value: WValue) u32 {
+        switch (value) {
+            .stack_offset => |stack_offset| return stack_offset.value,
+            .dead => unreachable,
+            else => return 0,
+        }
+    }
+
+    /// Promotes a `WValue` to a local when given value is on top of the stack.
+    /// When encountering a `local` or `stack_offset` this is essentially a no-op.
+    /// All other tags are illegal.
+    fn toLocal(value: WValue, gen: *CodeGen, ty: Type) InnerError!WValue {
+        switch (value) {
+            .stack => {
+                const new_local = try gen.allocLocal(ty);
+                try gen.addLocal(.local_set, new_local.local.value);
+                return new_local;
+            },
+            .local, .stack_offset => return value,
+            else => unreachable,
+        }
+    }
+
+    /// Marks a local as no longer being referenced and essentially allows
+    /// us to re-use it somewhere else within the function.
+    /// The valtype of the local is deducted by using the index of the given `WValue`.
+    fn free(value: *WValue, gen: *CodeGen) void {
+        if (value.* != .local) return;
+        const local_value = value.local.value;
+        const reserved = gen.args.len + @intFromBool(gen.return_value != .none);
+        if (local_value < reserved + 2) return; // reserved locals may never be re-used. Also accounts for 2 stack locals.
+
+        const index = local_value - reserved;
+        const valtype = gen.mir_locals.items[index];
+        switch (valtype) {
+            .i32 => gen.free_locals_i32.append(gen.gpa, local_value) catch return, // It's ok to fail any of those, a new local can be allocated instead
+            .i64 => gen.free_locals_i64.append(gen.gpa, local_value) catch return,
+            .f32 => gen.free_locals_f32.append(gen.gpa, local_value) catch return,
+            .f64 => gen.free_locals_f64.append(gen.gpa, local_value) catch return,
+            .v128 => gen.free_locals_v128.append(gen.gpa, local_value) catch return,
+        }
+        log.debug("freed local ({d}) of type {}", .{ local_value, valtype });
+        value.* = .dead;
+    }
+};
+
+const Op = enum {
+    @"unreachable",
+    nop,
+    block,
+    loop,
+    @"if",
+    @"else",
+    end,
+    br,
+    br_if,
+    br_table,
+    @"return",
+    call,
+    drop,
+    select,
+    global_get,
+    global_set,
+    load,
+    store,
+    memory_size,
+    memory_grow,
+    @"const",
+    eqz,
+    eq,
+    ne,
+    lt,
+    gt,
+    le,
+    ge,
+    clz,
+    ctz,
+    popcnt,
+    add,
+    sub,
+    mul,
+    div,
+    rem,
+    @"and",
+    @"or",
+    xor,
+    shl,
+    shr,
+    rotl,
+    rotr,
+    abs,
+    neg,
+    ceil,
+    floor,
+    trunc,
+    nearest,
+    sqrt,
+    min,
+    max,
+    copysign,
+    wrap,
+    convert,
+    demote,
+    promote,
+    reinterpret,
+    extend,
+};
+
+const OpcodeBuildArguments = struct {
+    /// First valtype in the opcode (usually represents the type of the output)
+    valtype1: ?std.wasm.Valtype = null,
+    /// The operation (e.g. call, unreachable, div, min, sqrt, etc.)
+    op: Op,
+    /// Width of the operation (e.g. 8 for i32_load8_s, 16 for i64_extend16_i32_s)
+    width: ?u8 = null,
+    /// Second valtype in the opcode name (usually represents the type of the input)
+    valtype2: ?std.wasm.Valtype = null,
+    /// Signedness of the op
+    signedness: ?std.builtin.Signedness = null,
+};
+
+/// TODO: deprecated, should be split up per tag.
+fn buildOpcode(args: OpcodeBuildArguments) std.wasm.Opcode {
+    switch (args.op) {
+        .@"unreachable" => unreachable,
+        .nop => unreachable,
+        .block => unreachable,
+        .loop => unreachable,
+        .@"if" => unreachable,
+        .@"else" => unreachable,
+        .end => unreachable,
+        .br => unreachable,
+        .br_if => unreachable,
+        .br_table => unreachable,
+        .@"return" => unreachable,
+        .call => unreachable,
+        .drop => unreachable,
+        .select => unreachable,
+        .global_get => unreachable,
+        .global_set => unreachable,
+
+        .load => if (args.width) |width| switch (width) {
+            8 => switch (args.valtype1.?) {
+                .i32 => if (args.signedness.? == .signed) return .i32_load8_s else return .i32_load8_u,
+                .i64 => if (args.signedness.? == .signed) return .i64_load8_s else return .i64_load8_u,
+                .f32, .f64, .v128 => unreachable,
+            },
+            16 => switch (args.valtype1.?) {
+                .i32 => if (args.signedness.? == .signed) return .i32_load16_s else return .i32_load16_u,
+                .i64 => if (args.signedness.? == .signed) return .i64_load16_s else return .i64_load16_u,
+                .f32, .f64, .v128 => unreachable,
+            },
+            32 => switch (args.valtype1.?) {
+                .i64 => if (args.signedness.? == .signed) return .i64_load32_s else return .i64_load32_u,
+                .i32 => return .i32_load,
+                .f32 => return .f32_load,
+                .f64, .v128 => unreachable,
+            },
+            64 => switch (args.valtype1.?) {
+                .i64 => return .i64_load,
+                .f64 => return .f64_load,
+                else => unreachable,
+            },
+            else => unreachable,
+        } else switch (args.valtype1.?) {
+            .i32 => return .i32_load,
+            .i64 => return .i64_load,
+            .f32 => return .f32_load,
+            .f64 => return .f64_load,
+            .v128 => unreachable, // handled independently
+        },
+        .store => if (args.width) |width| {
+            switch (width) {
+                8 => switch (args.valtype1.?) {
+                    .i32 => return .i32_store8,
+                    .i64 => return .i64_store8,
+                    .f32, .f64, .v128 => unreachable,
+                },
+                16 => switch (args.valtype1.?) {
+                    .i32 => return .i32_store16,
+                    .i64 => return .i64_store16,
+                    .f32, .f64, .v128 => unreachable,
+                },
+                32 => switch (args.valtype1.?) {
+                    .i64 => return .i64_store32,
+                    .i32 => return .i32_store,
+                    .f32 => return .f32_store,
+                    .f64, .v128 => unreachable,
+                },
+                64 => switch (args.valtype1.?) {
+                    .i64 => return .i64_store,
+                    .f64 => return .f64_store,
+                    else => unreachable,
+                },
+                else => unreachable,
+            }
+        } else {
+            switch (args.valtype1.?) {
+                .i32 => return .i32_store,
+                .i64 => return .i64_store,
+                .f32 => return .f32_store,
+                .f64 => return .f64_store,
+                .v128 => unreachable, // handled independently
+            }
+        },
+
+        .memory_size => return .memory_size,
+        .memory_grow => return .memory_grow,
+
+        .@"const" => switch (args.valtype1.?) {
+            .i32 => return .i32_const,
+            .i64 => return .i64_const,
+            .f32 => return .f32_const,
+            .f64 => return .f64_const,
+            .v128 => unreachable, // handled independently
+        },
+
+        .eqz => switch (args.valtype1.?) {
+            .i32 => return .i32_eqz,
+            .i64 => return .i64_eqz,
+            .f32, .f64, .v128 => unreachable,
+        },
+        .eq => switch (args.valtype1.?) {
+            .i32 => return .i32_eq,
+            .i64 => return .i64_eq,
+            .f32 => return .f32_eq,
+            .f64 => return .f64_eq,
+            .v128 => unreachable, // handled independently
+        },
+        .ne => switch (args.valtype1.?) {
+            .i32 => return .i32_ne,
+            .i64 => return .i64_ne,
+            .f32 => return .f32_ne,
+            .f64 => return .f64_ne,
+            .v128 => unreachable, // handled independently
+        },
+
+        .lt => switch (args.valtype1.?) {
+            .i32 => if (args.signedness.? == .signed) return .i32_lt_s else return .i32_lt_u,
+            .i64 => if (args.signedness.? == .signed) return .i64_lt_s else return .i64_lt_u,
+            .f32 => return .f32_lt,
+            .f64 => return .f64_lt,
+            .v128 => unreachable, // handled independently
+        },
+        .gt => switch (args.valtype1.?) {
+            .i32 => if (args.signedness.? == .signed) return .i32_gt_s else return .i32_gt_u,
+            .i64 => if (args.signedness.? == .signed) return .i64_gt_s else return .i64_gt_u,
+            .f32 => return .f32_gt,
+            .f64 => return .f64_gt,
+            .v128 => unreachable, // handled independently
+        },
+        .le => switch (args.valtype1.?) {
+            .i32 => if (args.signedness.? == .signed) return .i32_le_s else return .i32_le_u,
+            .i64 => if (args.signedness.? == .signed) return .i64_le_s else return .i64_le_u,
+            .f32 => return .f32_le,
+            .f64 => return .f64_le,
+            .v128 => unreachable, // handled independently
+        },
+        .ge => switch (args.valtype1.?) {
+            .i32 => if (args.signedness.? == .signed) return .i32_ge_s else return .i32_ge_u,
+            .i64 => if (args.signedness.? == .signed) return .i64_ge_s else return .i64_ge_u,
+            .f32 => return .f32_ge,
+            .f64 => return .f64_ge,
+            .v128 => unreachable, // handled independently
+        },
+
+        .clz => switch (args.valtype1.?) {
+            .i32 => return .i32_clz,
+            .i64 => return .i64_clz,
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+        .ctz => switch (args.valtype1.?) {
+            .i32 => return .i32_ctz,
+            .i64 => return .i64_ctz,
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+        .popcnt => switch (args.valtype1.?) {
+            .i32 => return .i32_popcnt,
+            .i64 => return .i64_popcnt,
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+
+        .add => switch (args.valtype1.?) {
+            .i32 => return .i32_add,
+            .i64 => return .i64_add,
+            .f32 => return .f32_add,
+            .f64 => return .f64_add,
+            .v128 => unreachable, // handled independently
+        },
+        .sub => switch (args.valtype1.?) {
+            .i32 => return .i32_sub,
+            .i64 => return .i64_sub,
+            .f32 => return .f32_sub,
+            .f64 => return .f64_sub,
+            .v128 => unreachable, // handled independently
+        },
+        .mul => switch (args.valtype1.?) {
+            .i32 => return .i32_mul,
+            .i64 => return .i64_mul,
+            .f32 => return .f32_mul,
+            .f64 => return .f64_mul,
+            .v128 => unreachable, // handled independently
+        },
+
+        .div => switch (args.valtype1.?) {
+            .i32 => if (args.signedness.? == .signed) return .i32_div_s else return .i32_div_u,
+            .i64 => if (args.signedness.? == .signed) return .i64_div_s else return .i64_div_u,
+            .f32 => return .f32_div,
+            .f64 => return .f64_div,
+            .v128 => unreachable, // handled independently
+        },
+        .rem => switch (args.valtype1.?) {
+            .i32 => if (args.signedness.? == .signed) return .i32_rem_s else return .i32_rem_u,
+            .i64 => if (args.signedness.? == .signed) return .i64_rem_s else return .i64_rem_u,
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+
+        .@"and" => switch (args.valtype1.?) {
+            .i32 => return .i32_and,
+            .i64 => return .i64_and,
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+        .@"or" => switch (args.valtype1.?) {
+            .i32 => return .i32_or,
+            .i64 => return .i64_or,
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+        .xor => switch (args.valtype1.?) {
+            .i32 => return .i32_xor,
+            .i64 => return .i64_xor,
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+
+        .shl => switch (args.valtype1.?) {
+            .i32 => return .i32_shl,
+            .i64 => return .i64_shl,
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+        .shr => switch (args.valtype1.?) {
+            .i32 => if (args.signedness.? == .signed) return .i32_shr_s else return .i32_shr_u,
+            .i64 => if (args.signedness.? == .signed) return .i64_shr_s else return .i64_shr_u,
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+        .rotl => switch (args.valtype1.?) {
+            .i32 => return .i32_rotl,
+            .i64 => return .i64_rotl,
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+        .rotr => switch (args.valtype1.?) {
+            .i32 => return .i32_rotr,
+            .i64 => return .i64_rotr,
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+
+        .abs => switch (args.valtype1.?) {
+            .i32, .i64 => unreachable,
+            .f32 => return .f32_abs,
+            .f64 => return .f64_abs,
+            .v128 => unreachable, // handled independently
+        },
+        .neg => switch (args.valtype1.?) {
+            .i32, .i64 => unreachable,
+            .f32 => return .f32_neg,
+            .f64 => return .f64_neg,
+            .v128 => unreachable, // handled independently
+        },
+        .ceil => switch (args.valtype1.?) {
+            .i64 => unreachable,
+            .i32 => return .f32_ceil, // when valtype is f16, we store it in i32.
+            .f32 => return .f32_ceil,
+            .f64 => return .f64_ceil,
+            .v128 => unreachable, // handled independently
+        },
+        .floor => switch (args.valtype1.?) {
+            .i64 => unreachable,
+            .i32 => return .f32_floor, // when valtype is f16, we store it in i32.
+            .f32 => return .f32_floor,
+            .f64 => return .f64_floor,
+            .v128 => unreachable, // handled independently
+        },
+        .trunc => switch (args.valtype1.?) {
+            .i32 => if (args.valtype2) |valty| switch (valty) {
+                .i32 => unreachable,
+                .i64 => unreachable,
+                .f32 => if (args.signedness.? == .signed) return .i32_trunc_f32_s else return .i32_trunc_f32_u,
+                .f64 => if (args.signedness.? == .signed) return .i32_trunc_f64_s else return .i32_trunc_f64_u,
+                .v128 => unreachable, // handled independently
+            } else return .f32_trunc, // when no valtype2, it's an f16 instead which is stored in an i32.
+            .i64 => switch (args.valtype2.?) {
+                .i32 => unreachable,
+                .i64 => unreachable,
+                .f32 => if (args.signedness.? == .signed) return .i64_trunc_f32_s else return .i64_trunc_f32_u,
+                .f64 => if (args.signedness.? == .signed) return .i64_trunc_f64_s else return .i64_trunc_f64_u,
+                .v128 => unreachable, // handled independently
+            },
+            .f32 => return .f32_trunc,
+            .f64 => return .f64_trunc,
+            .v128 => unreachable, // handled independently
+        },
+        .nearest => switch (args.valtype1.?) {
+            .i32, .i64 => unreachable,
+            .f32 => return .f32_nearest,
+            .f64 => return .f64_nearest,
+            .v128 => unreachable, // handled independently
+        },
+        .sqrt => switch (args.valtype1.?) {
+            .i32, .i64 => unreachable,
+            .f32 => return .f32_sqrt,
+            .f64 => return .f64_sqrt,
+            .v128 => unreachable, // handled independently
+        },
+        .min => switch (args.valtype1.?) {
+            .i32, .i64 => unreachable,
+            .f32 => return .f32_min,
+            .f64 => return .f64_min,
+            .v128 => unreachable, // handled independently
+        },
+        .max => switch (args.valtype1.?) {
+            .i32, .i64 => unreachable,
+            .f32 => return .f32_max,
+            .f64 => return .f64_max,
+            .v128 => unreachable, // handled independently
+        },
+        .copysign => switch (args.valtype1.?) {
+            .i32, .i64 => unreachable,
+            .f32 => return .f32_copysign,
+            .f64 => return .f64_copysign,
+            .v128 => unreachable, // handled independently
+        },
+
+        .wrap => switch (args.valtype1.?) {
+            .i32 => switch (args.valtype2.?) {
+                .i32 => unreachable,
+                .i64 => return .i32_wrap_i64,
+                .f32, .f64 => unreachable,
+                .v128 => unreachable, // handled independently
+            },
+            .i64, .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+        .convert => switch (args.valtype1.?) {
+            .i32, .i64 => unreachable,
+            .f32 => switch (args.valtype2.?) {
+                .i32 => if (args.signedness.? == .signed) return .f32_convert_i32_s else return .f32_convert_i32_u,
+                .i64 => if (args.signedness.? == .signed) return .f32_convert_i64_s else return .f32_convert_i64_u,
+                .f32, .f64 => unreachable,
+                .v128 => unreachable, // handled independently
+            },
+            .f64 => switch (args.valtype2.?) {
+                .i32 => if (args.signedness.? == .signed) return .f64_convert_i32_s else return .f64_convert_i32_u,
+                .i64 => if (args.signedness.? == .signed) return .f64_convert_i64_s else return .f64_convert_i64_u,
+                .f32, .f64 => unreachable,
+                .v128 => unreachable, // handled independently
+            },
+            .v128 => unreachable, // handled independently
+        },
+        .demote => if (args.valtype1.? == .f32 and args.valtype2.? == .f64) return .f32_demote_f64 else unreachable,
+        .promote => if (args.valtype1.? == .f64 and args.valtype2.? == .f32) return .f64_promote_f32 else unreachable,
+        .reinterpret => switch (args.valtype1.?) {
+            .i32 => if (args.valtype2.? == .f32) return .i32_reinterpret_f32 else unreachable,
+            .i64 => if (args.valtype2.? == .f64) return .i64_reinterpret_f64 else unreachable,
+            .f32 => if (args.valtype2.? == .i32) return .f32_reinterpret_i32 else unreachable,
+            .f64 => if (args.valtype2.? == .i64) return .f64_reinterpret_i64 else unreachable,
+            .v128 => unreachable, // handled independently
+        },
+        .extend => switch (args.valtype1.?) {
+            .i32 => switch (args.width.?) {
+                8 => if (args.signedness.? == .signed) return .i32_extend8_s else unreachable,
+                16 => if (args.signedness.? == .signed) return .i32_extend16_s else unreachable,
+                else => unreachable,
+            },
+            .i64 => switch (args.width.?) {
+                8 => if (args.signedness.? == .signed) return .i64_extend8_s else unreachable,
+                16 => if (args.signedness.? == .signed) return .i64_extend16_s else unreachable,
+                32 => if (args.signedness.? == .signed) return .i64_extend32_s else unreachable,
+                else => unreachable,
+            },
+            .f32, .f64 => unreachable,
+            .v128 => unreachable, // handled independently
+        },
+    }
+}
+
+test "Wasm - buildOpcode" {
+    // Make sure buildOpcode is referenced, and test some examples
+    const i32_const = buildOpcode(.{ .op = .@"const", .valtype1 = .i32 });
+    const i64_extend32_s = buildOpcode(.{ .op = .extend, .valtype1 = .i64, .width = 32, .signedness = .signed });
+    const f64_reinterpret_i64 = buildOpcode(.{ .op = .reinterpret, .valtype1 = .f64, .valtype2 = .i64 });
+
+    try testing.expectEqual(@as(std.wasm.Opcode, .i32_const), i32_const);
+    try testing.expectEqual(@as(std.wasm.Opcode, .i64_extend32_s), i64_extend32_s);
+    try testing.expectEqual(@as(std.wasm.Opcode, .f64_reinterpret_i64), f64_reinterpret_i64);
+}
+
+/// Hashmap to store generated `WValue` for each `Air.Inst.Ref`
+pub const ValueTable = std.AutoArrayHashMapUnmanaged(Air.Inst.Ref, WValue);
+
+const bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {};
+
+const InnerError = error{
+    OutOfMemory,
+    /// An error occurred when trying to lower AIR to MIR.
+    CodegenFail,
+    /// Compiler implementation could not handle a large integer.
+    Overflow,
+} || link.File.UpdateDebugInfoError;
+
+pub fn deinit(cg: *CodeGen) void {
+    const gpa = cg.gpa;
+    for (cg.branches.items) |*branch| branch.deinit(gpa);
+    cg.branches.deinit(gpa);
+    cg.blocks.deinit(gpa);
+    cg.loops.deinit(gpa);
+    cg.simd_immediates.deinit(gpa);
+    cg.free_locals_i32.deinit(gpa);
+    cg.free_locals_i64.deinit(gpa);
+    cg.free_locals_f32.deinit(gpa);
+    cg.free_locals_f64.deinit(gpa);
+    cg.free_locals_v128.deinit(gpa);
+    cg.mir_instructions.deinit(gpa);
+    cg.mir_extra.deinit(gpa);
+    cg.mir_locals.deinit(gpa);
+    cg.mir_uavs.deinit(gpa);
+    cg.mir_indirect_function_set.deinit(gpa);
+    cg.mir_func_tys.deinit(gpa);
+    cg.* = undefined;
+}
+
+fn fail(cg: *CodeGen, comptime fmt: []const u8, args: anytype) error{ OutOfMemory, CodegenFail } {
+    const zcu = cg.pt.zcu;
+    const func = zcu.funcInfo(cg.func_index);
+    return zcu.codegenFail(func.owner_nav, fmt, args);
+}
+
+/// Resolves the `WValue` for the given instruction `inst`
+/// When the given instruction has a `Value`, it returns a constant instead
+fn resolveInst(cg: *CodeGen, ref: Air.Inst.Ref) InnerError!WValue {
+    var branch_index = cg.branches.items.len;
+    while (branch_index > 0) : (branch_index -= 1) {
+        const branch = cg.branches.items[branch_index - 1];
+        if (branch.values.get(ref)) |value| {
+            return value;
+        }
+    }
+
+    // when we did not find an existing instruction, it
+    // means we must generate it from a constant.
+    // We always store constants in the most outer branch as they must never
+    // be removed. The most outer branch is always at index 0.
+    const gop = try cg.branches.items[0].values.getOrPut(cg.gpa, ref);
+    assert(!gop.found_existing);
+
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const val = (try cg.air.value(ref, pt)).?;
+    const ty = cg.typeOf(ref);
+    if (!ty.hasRuntimeBitsIgnoreComptime(zcu) and !ty.isInt(zcu) and !ty.isError(zcu)) {
+        gop.value_ptr.* = .none;
+        return .none;
+    }
+
+    // When we need to pass the value by reference (such as a struct), we will
+    // leverage `generateSymbol` to lower the constant to bytes and emit it
+    // to the 'rodata' section. We then return the index into the section as `WValue`.
+    //
+    // In the other cases, we will simply lower the constant to a value that fits
+    // into a single local (such as a pointer, integer, bool, etc).
+    const result: WValue = if (isByRef(ty, zcu, cg.target))
+        .{ .uav_ref = .{ .ip_index = val.toIntern() } }
+    else
+        try cg.lowerConstant(val, ty);
+
+    gop.value_ptr.* = result;
+    return result;
+}
+
+fn resolveValue(cg: *CodeGen, val: Value) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    const ty = val.typeOf(zcu);
+
+    return if (isByRef(ty, zcu, cg.target))
+        .{ .uav_ref = .{ .ip_index = val.toIntern() } }
+    else
+        try cg.lowerConstant(val, ty);
+}
+
+/// NOTE: if result == .stack, it will be stored in .local
+fn finishAir(cg: *CodeGen, inst: Air.Inst.Index, result: WValue, operands: []const Air.Inst.Ref) InnerError!void {
+    assert(operands.len <= Air.Liveness.bpi - 1);
+    var tomb_bits = cg.liveness.getTombBits(inst);
+    for (operands) |operand| {
+        const dies = @as(u1, @truncate(tomb_bits)) != 0;
+        tomb_bits >>= 1;
+        if (!dies) continue;
+        processDeath(cg, operand);
+    }
+
+    // results of `none` can never be referenced.
+    if (result != .none) {
+        const trackable_result = if (result != .stack)
+            result
+        else
+            try result.toLocal(cg, cg.typeOfIndex(inst));
+        const branch = cg.currentBranch();
+        branch.values.putAssumeCapacityNoClobber(inst.toRef(), trackable_result);
+    }
+
+    if (std.debug.runtime_safety) {
+        cg.air_bookkeeping += 1;
+    }
+}
+
+const Branch = struct {
+    values: ValueTable = .{},
+
+    fn deinit(branch: *Branch, gpa: Allocator) void {
+        branch.values.deinit(gpa);
+        branch.* = undefined;
+    }
+};
+
+inline fn currentBranch(cg: *CodeGen) *Branch {
+    return &cg.branches.items[cg.branches.items.len - 1];
+}
+
+const BigTomb = struct {
+    gen: *CodeGen,
+    inst: Air.Inst.Index,
+    lbt: Air.Liveness.BigTomb,
+
+    fn feed(bt: *BigTomb, op_ref: Air.Inst.Ref) void {
+        const dies = bt.lbt.feed();
+        if (!dies) return;
+        // This will be a nop for interned constants.
+        processDeath(bt.gen, op_ref);
+    }
+
+    fn finishAir(bt: *BigTomb, result: WValue) void {
+        assert(result != .stack);
+        if (result != .none) {
+            bt.gen.currentBranch().values.putAssumeCapacityNoClobber(bt.inst.toRef(), result);
+        }
+
+        if (std.debug.runtime_safety) {
+            bt.gen.air_bookkeeping += 1;
+        }
+    }
+};
+
+fn iterateBigTomb(cg: *CodeGen, inst: Air.Inst.Index, operand_count: usize) !BigTomb {
+    try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, operand_count + 1);
+    return BigTomb{
+        .gen = cg,
+        .inst = inst,
+        .lbt = cg.liveness.iterateBigTomb(inst),
+    };
+}
+
+fn processDeath(cg: *CodeGen, ref: Air.Inst.Ref) void {
+    if (ref.toIndex() == null) return;
+    // Branches are currently only allowed to free locals allocated
+    // within their own branch.
+    // TODO: Upon branch consolidation free any locals if needed.
+    const value = cg.currentBranch().values.getPtr(ref) orelse return;
+    if (value.* != .local) return;
+    const reserved_indexes = cg.args.len + @intFromBool(cg.return_value != .none);
+    if (value.local.value < reserved_indexes) {
+        return; // function arguments can never be re-used
+    }
+    log.debug("Decreasing reference for ref: %{d}, using local '{d}'", .{ @intFromEnum(ref.toIndex().?), value.local.value });
+    value.local.references -= 1; // if this panics, a call to `reuseOperand` was forgotten by the developer
+    if (value.local.references == 0) {
+        value.free(cg);
+    }
+}
+
+fn addInst(cg: *CodeGen, inst: Mir.Inst) error{OutOfMemory}!void {
+    try cg.mir_instructions.append(cg.gpa, inst);
+}
+
+fn addTag(cg: *CodeGen, tag: Mir.Inst.Tag) error{OutOfMemory}!void {
+    try cg.addInst(.{ .tag = tag, .data = .{ .tag = {} } });
+}
+
+fn addExtended(cg: *CodeGen, opcode: std.wasm.MiscOpcode) error{OutOfMemory}!void {
+    const extra_index: u32 = @intCast(cg.mir_extra.items.len);
+    try cg.mir_extra.append(cg.gpa, @intFromEnum(opcode));
+    try cg.addInst(.{ .tag = .misc_prefix, .data = .{ .payload = extra_index } });
+}
+
+fn addLabel(cg: *CodeGen, tag: Mir.Inst.Tag, label: u32) error{OutOfMemory}!void {
+    try cg.addInst(.{ .tag = tag, .data = .{ .label = label } });
+}
+
+fn addLocal(cg: *CodeGen, tag: Mir.Inst.Tag, local: u32) error{OutOfMemory}!void {
+    try cg.addInst(.{ .tag = tag, .data = .{ .local = local } });
+}
+
+/// Accepts an unsigned 32bit integer rather than a signed integer to
+/// prevent us from having to bitcast multiple times as most values
+/// within codegen are represented as unsigned rather than signed.
+fn addImm32(cg: *CodeGen, imm: u32) error{OutOfMemory}!void {
+    try cg.addInst(.{ .tag = .i32_const, .data = .{ .imm32 = @bitCast(imm) } });
+}
+
+/// Accepts an unsigned 64bit integer rather than a signed integer to
+/// prevent us from having to bitcast multiple times as most values
+/// within codegen are represented as unsigned rather than signed.
+fn addImm64(cg: *CodeGen, imm: u64) error{OutOfMemory}!void {
+    const extra_index = try cg.addExtra(Mir.Imm64.init(imm));
+    try cg.addInst(.{ .tag = .i64_const, .data = .{ .payload = extra_index } });
+}
+
+/// Accepts the index into the list of 128bit-immediates
+fn addImm128(cg: *CodeGen, index: u32) error{OutOfMemory}!void {
+    const simd_values = cg.simd_immediates.items[index];
+    const extra_index: u32 = @intCast(cg.mir_extra.items.len);
+    // tag + 128bit value
+    try cg.mir_extra.ensureUnusedCapacity(cg.gpa, 5);
+    cg.mir_extra.appendAssumeCapacity(@intFromEnum(std.wasm.SimdOpcode.v128_const));
+    cg.mir_extra.appendSliceAssumeCapacity(@alignCast(mem.bytesAsSlice(u32, &simd_values)));
+    try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
+}
+
+fn addFloat64(cg: *CodeGen, float: f64) error{OutOfMemory}!void {
+    const extra_index = try cg.addExtra(Mir.Float64.init(float));
+    try cg.addInst(.{ .tag = .f64_const, .data = .{ .payload = extra_index } });
+}
+
+/// Inserts an instruction to load/store from/to wasm's linear memory dependent on the given `tag`.
+fn addMemArg(cg: *CodeGen, tag: Mir.Inst.Tag, mem_arg: Mir.MemArg) error{OutOfMemory}!void {
+    const extra_index = try cg.addExtra(mem_arg);
+    try cg.addInst(.{ .tag = tag, .data = .{ .payload = extra_index } });
+}
+
+/// Inserts an instruction from the 'atomics' feature which accesses wasm's linear memory dependent on the
+/// given `tag`.
+fn addAtomicMemArg(cg: *CodeGen, tag: std.wasm.AtomicsOpcode, mem_arg: Mir.MemArg) error{OutOfMemory}!void {
+    const extra_index = try cg.addExtra(@as(struct { val: u32 }, .{ .val = @intFromEnum(tag) }));
+    _ = try cg.addExtra(mem_arg);
+    try cg.addInst(.{ .tag = .atomics_prefix, .data = .{ .payload = extra_index } });
+}
+
+/// Helper function to emit atomic mir opcodes.
+fn addAtomicTag(cg: *CodeGen, tag: std.wasm.AtomicsOpcode) error{OutOfMemory}!void {
+    const extra_index = try cg.addExtra(@as(struct { val: u32 }, .{ .val = @intFromEnum(tag) }));
+    try cg.addInst(.{ .tag = .atomics_prefix, .data = .{ .payload = extra_index } });
+}
+
+/// Appends entries to `mir_extra` based on the type of `extra`.
+/// Returns the index into `mir_extra`
+fn addExtra(cg: *CodeGen, extra: anytype) error{OutOfMemory}!u32 {
+    const fields = std.meta.fields(@TypeOf(extra));
+    try cg.mir_extra.ensureUnusedCapacity(cg.gpa, fields.len);
+    return cg.addExtraAssumeCapacity(extra);
+}
+
+/// Appends entries to `mir_extra` based on the type of `extra`.
+/// Returns the index into `mir_extra`
+fn addExtraAssumeCapacity(cg: *CodeGen, extra: anytype) error{OutOfMemory}!u32 {
+    const fields = std.meta.fields(@TypeOf(extra));
+    const result: u32 = @intCast(cg.mir_extra.items.len);
+    inline for (fields) |field| {
+        cg.mir_extra.appendAssumeCapacity(switch (field.type) {
+            u32 => @field(extra, field.name),
+            i32 => @bitCast(@field(extra, field.name)),
+            InternPool.Index,
+            InternPool.Nav.Index,
+            => @intFromEnum(@field(extra, field.name)),
+            else => |field_type| @compileError("Unsupported field type " ++ @typeName(field_type)),
+        });
+    }
+    return result;
+}
+
+/// For `std.builtin.CallingConvention.auto`.
+pub fn typeToValtype(ty: Type, zcu: *const Zcu, target: *const std.Target) std.wasm.Valtype {
+    const ip = &zcu.intern_pool;
+    return switch (ty.zigTypeTag(zcu)) {
+        .float => switch (ty.floatBits(target)) {
+            16 => .i32, // stored/loaded as u16
+            32 => .f32,
+            64 => .f64,
+            80, 128 => .i32,
+            else => unreachable,
+        },
+        .int, .@"enum" => switch (ty.intInfo(zcu).bits) {
+            0...32 => .i32,
+            33...64 => .i64,
+            else => .i32,
+        },
+        .@"struct" => blk: {
+            if (zcu.typeToPackedStruct(ty)) |packed_struct| {
+                const backing_int_ty = Type.fromInterned(packed_struct.backingIntTypeUnordered(ip));
+                break :blk typeToValtype(backing_int_ty, zcu, target);
+            } else {
+                break :blk .i32;
+            }
+        },
+        .vector => switch (CodeGen.determineSimdStoreStrategy(ty, zcu, target)) {
+            .direct => .v128,
+            .unrolled => .i32,
+        },
+        .@"union" => switch (ty.containerLayout(zcu)) {
+            .@"packed" => switch (ty.bitSize(zcu)) {
+                0...32 => .i32,
+                33...64 => .i64,
+                else => .i32,
+            },
+            else => .i32,
+        },
+        else => .i32, // all represented as reference/immediate
+    };
+}
+
+/// Using a given `Type`, returns the corresponding wasm value type
+/// Differently from `typeToValtype` this also allows `void` to create a block
+/// with no return type
+fn genBlockType(ty: Type, zcu: *const Zcu, target: *const std.Target) std.wasm.BlockType {
+    return switch (ty.ip_index) {
+        .void_type, .noreturn_type => .empty,
+        else => .fromValtype(typeToValtype(ty, zcu, target)),
+    };
+}
+
+/// Writes the bytecode depending on the given `WValue` in `val`
+fn emitWValue(cg: *CodeGen, value: WValue) InnerError!void {
+    switch (value) {
+        .dead => unreachable, // reference to free'd `WValue` (missing reuseOperand?)
+        .none, .stack => {}, // no-op
+        .local => |idx| try cg.addLocal(.local_get, idx.value),
+        .imm32 => |val| try cg.addImm32(val),
+        .imm64 => |val| try cg.addImm64(val),
+        .imm128 => |val| try cg.addImm128(val),
+        .float32 => |val| try cg.addInst(.{ .tag = .f32_const, .data = .{ .float32 = val } }),
+        .float64 => |val| try cg.addFloat64(val),
+        .nav_ref => |nav_ref| {
+            const zcu = cg.pt.zcu;
+            const ip = &zcu.intern_pool;
+            if (ip.getNav(nav_ref.nav_index).isFn(ip)) {
+                assert(nav_ref.offset == 0);
+                try cg.mir_indirect_function_set.put(cg.gpa, nav_ref.nav_index, {});
+                try cg.addInst(.{ .tag = .func_ref, .data = .{ .nav_index = nav_ref.nav_index } });
+            } else if (nav_ref.offset == 0) {
+                try cg.addInst(.{ .tag = .nav_ref, .data = .{ .nav_index = nav_ref.nav_index } });
+            } else {
+                try cg.addInst(.{
+                    .tag = .nav_ref_off,
+                    .data = .{
+                        .payload = try cg.addExtra(Mir.NavRefOff{
+                            .nav_index = nav_ref.nav_index,
+                            .offset = nav_ref.offset,
+                        }),
+                    },
+                });
+            }
+        },
+        .uav_ref => |uav| {
+            const zcu = cg.pt.zcu;
+            const ip = &zcu.intern_pool;
+            assert(!ip.isFunctionType(ip.typeOf(uav.ip_index)));
+            const gop = try cg.mir_uavs.getOrPut(cg.gpa, uav.ip_index);
+            const this_align: Alignment = a: {
+                if (uav.orig_ptr_ty == .none) break :a .none;
+                const ptr_type = ip.indexToKey(uav.orig_ptr_ty).ptr_type;
+                const this_align = ptr_type.flags.alignment;
+                if (this_align == .none) break :a .none;
+                const abi_align = Type.fromInterned(ptr_type.child).abiAlignment(zcu);
+                if (this_align.compare(.lte, abi_align)) break :a .none;
+                break :a this_align;
+            };
+            if (!gop.found_existing or
+                gop.value_ptr.* == .none or
+                (this_align != .none and this_align.compare(.gt, gop.value_ptr.*)))
+            {
+                gop.value_ptr.* = this_align;
+            }
+            if (uav.offset == 0) {
+                try cg.addInst(.{
+                    .tag = .uav_ref,
+                    .data = .{ .ip_index = uav.ip_index },
+                });
+            } else {
+                try cg.addInst(.{
+                    .tag = .uav_ref_off,
+                    .data = .{ .payload = try cg.addExtra(@as(Mir.UavRefOff, .{
+                        .value = uav.ip_index,
+                        .offset = uav.offset,
+                    })) },
+                });
+            }
+        },
+        .stack_offset => try cg.addLocal(.local_get, cg.bottom_stack_value.local.value), // caller must ensure to address the offset
+    }
+}
+
+/// If given a local or stack-offset, increases the reference count by 1.
+/// The old `WValue` found at instruction `ref` is then replaced by the
+/// modified `WValue` and returned. When given a non-local or non-stack-offset,
+/// returns the given `operand` itfunc instead.
+fn reuseOperand(cg: *CodeGen, ref: Air.Inst.Ref, operand: WValue) WValue {
+    if (operand != .local and operand != .stack_offset) return operand;
+    var new_value = operand;
+    switch (new_value) {
+        .local => |*local| local.references += 1,
+        .stack_offset => |*stack_offset| stack_offset.references += 1,
+        else => unreachable,
+    }
+    const old_value = cg.getResolvedInst(ref);
+    old_value.* = new_value;
+    return new_value;
+}
+
+/// From a reference, returns its resolved `WValue`.
+/// It's illegal to provide a `Air.Inst.Ref` that hasn't been resolved yet.
+fn getResolvedInst(cg: *CodeGen, ref: Air.Inst.Ref) *WValue {
+    var index = cg.branches.items.len;
+    while (index > 0) : (index -= 1) {
+        const branch = cg.branches.items[index - 1];
+        if (branch.values.getPtr(ref)) |value| {
+            return value;
+        }
+    }
+    unreachable; // developer-error: This can only be called on resolved instructions. Use `resolveInst` instead.
+}
+
+/// Creates one locals for a given `Type`.
+/// Returns a corresponding `Wvalue` with `local` as active tag
+fn allocLocal(cg: *CodeGen, ty: Type) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    const valtype = typeToValtype(ty, zcu, cg.target);
+    const index_or_null = switch (valtype) {
+        .i32 => cg.free_locals_i32.pop(),
+        .i64 => cg.free_locals_i64.pop(),
+        .f32 => cg.free_locals_f32.pop(),
+        .f64 => cg.free_locals_f64.pop(),
+        .v128 => cg.free_locals_v128.pop(),
+    };
+    if (index_or_null) |index| {
+        log.debug("reusing local ({d}) of type {}", .{ index, valtype });
+        return .{ .local = .{ .value = index, .references = 1 } };
+    }
+    log.debug("new local of type {}", .{valtype});
+    return cg.ensureAllocLocal(ty);
+}
+
+/// Ensures a new local will be created. This is useful when it's useful
+/// to use a zero-initialized local.
+fn ensureAllocLocal(cg: *CodeGen, ty: Type) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    try cg.mir_locals.append(cg.gpa, typeToValtype(ty, zcu, cg.target));
+    const initial_index = cg.local_index;
+    cg.local_index += 1;
+    return .{ .local = .{ .value = initial_index, .references = 1 } };
+}
+
+pub const Error = error{
+    OutOfMemory,
+    /// Compiler was asked to operate on a number larger than supported.
+    Overflow,
+    /// Indicates the error is already stored in Zcu `failed_codegen`.
+    CodegenFail,
+};
+
+pub fn generate(
+    bin_file: *link.File,
+    pt: Zcu.PerThread,
+    src_loc: Zcu.LazySrcLoc,
+    func_index: InternPool.Index,
+    air: *const Air,
+    liveness: *const ?Air.Liveness,
+) Error!Mir {
+    _ = src_loc;
+    _ = bin_file;
+    const zcu = pt.zcu;
+    const gpa = zcu.gpa;
+    const cg = zcu.funcInfo(func_index);
+    const file_scope = zcu.navFileScope(cg.owner_nav);
+    const target = &file_scope.mod.?.resolved_target.result;
+    const fn_ty = zcu.navValue(cg.owner_nav).typeOf(zcu);
+    const fn_info = zcu.typeToFunc(fn_ty).?;
+    const ret_ty: Type = .fromInterned(fn_info.return_type);
+    const any_returns = !firstParamSRet(fn_info.cc, ret_ty, zcu, target) and ret_ty.hasRuntimeBitsIgnoreComptime(zcu);
+
+    var cc_result = try resolveCallingConventionValues(zcu, fn_ty, target);
+    defer cc_result.deinit(gpa);
+
+    var code_gen: CodeGen = .{
+        .gpa = gpa,
+        .pt = pt,
+        .air = air.*,
+        .liveness = liveness.*.?,
+        .owner_nav = cg.owner_nav,
+        .target = target,
+        .ptr_size = switch (target.cpu.arch) {
+            .wasm32 => .wasm32,
+            .wasm64 => .wasm64,
+            else => unreachable,
+        },
+        .func_index = func_index,
+        .args = cc_result.args,
+        .return_value = cc_result.return_value,
+        .local_index = cc_result.local_index,
+        .mir_instructions = .empty,
+        .mir_extra = .empty,
+        .mir_locals = .empty,
+        .mir_uavs = .empty,
+        .mir_indirect_function_set = .empty,
+        .mir_func_tys = .empty,
+        .error_name_table_ref_count = 0,
+    };
+    defer code_gen.deinit();
+
+    try code_gen.mir_func_tys.putNoClobber(gpa, fn_ty.toIntern(), {});
+
+    return generateInner(&code_gen, any_returns) catch |err| switch (err) {
+        error.CodegenFail,
+        error.OutOfMemory,
+        error.Overflow,
+        => |e| return e,
+        else => |e| return code_gen.fail("failed to generate function: {s}", .{@errorName(e)}),
+    };
+}
+
+fn generateInner(cg: *CodeGen, any_returns: bool) InnerError!Mir {
+    const zcu = cg.pt.zcu;
+    try cg.branches.append(cg.gpa, .{});
+    // clean up outer branch
+    defer {
+        var outer_branch = cg.branches.pop().?;
+        outer_branch.deinit(cg.gpa);
+        assert(cg.branches.items.len == 0); // missing branch merge
+    }
+    // Generate MIR for function body
+    try cg.genBody(cg.air.getMainBody());
+
+    // In case we have a return value, but the last instruction is a noreturn (such as a while loop)
+    // we emit an unreachable instruction to tell the stack validator that part will never be reached.
+    if (any_returns and cg.air.instructions.len > 0) {
+        const inst: Air.Inst.Index = @enumFromInt(cg.air.instructions.len - 1);
+        const last_inst_ty = cg.typeOfIndex(inst);
+        if (!last_inst_ty.hasRuntimeBitsIgnoreComptime(zcu) or last_inst_ty.isNoReturn(zcu)) {
+            try cg.addTag(.@"unreachable");
+        }
+    }
+    // End of function body
+    try cg.addTag(.end);
+    try cg.addTag(.dbg_epilogue_begin);
+
+    var mir: Mir = .{
+        .instructions = cg.mir_instructions.toOwnedSlice(),
+        .extra = &.{}, // fallible so assigned after errdefer
+        .locals = &.{}, // fallible so assigned after errdefer
+        .prologue = if (cg.initial_stack_value == .none) .none else .{
+            .sp_local = cg.initial_stack_value.local.value,
+            .flags = .{ .stack_alignment = cg.stack_alignment },
+            .stack_size = cg.stack_size,
+            .bottom_stack_local = cg.bottom_stack_value.local.value,
+        },
+        .uavs = cg.mir_uavs.move(),
+        .indirect_function_set = cg.mir_indirect_function_set.move(),
+        .func_tys = cg.mir_func_tys.move(),
+        .error_name_table_ref_count = cg.error_name_table_ref_count,
+    };
+    errdefer mir.deinit(cg.gpa);
+    mir.extra = try cg.mir_extra.toOwnedSlice(cg.gpa);
+    mir.locals = try cg.mir_locals.toOwnedSlice(cg.gpa);
+    return mir;
+}
+
+const CallWValues = struct {
+    args: []WValue,
+    return_value: WValue,
+    local_index: u32,
+
+    fn deinit(values: *CallWValues, gpa: Allocator) void {
+        gpa.free(values.args);
+        values.* = undefined;
+    }
+};
+
+fn resolveCallingConventionValues(
+    zcu: *const Zcu,
+    fn_ty: Type,
+    target: *const std.Target,
+) Allocator.Error!CallWValues {
+    const gpa = zcu.gpa;
+    const ip = &zcu.intern_pool;
+    const fn_info = zcu.typeToFunc(fn_ty).?;
+    const cc = fn_info.cc;
+
+    var result: CallWValues = .{
+        .args = &.{},
+        .return_value = .none,
+        .local_index = 0,
+    };
+    if (cc == .naked) return result;
+
+    var args = std.array_list.Managed(WValue).init(gpa);
+    defer args.deinit();
+
+    // Check if we store the result as a pointer to the stack rather than
+    // by value
+    if (firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, target)) {
+        // the sret arg will be passed as first argument, therefore we
+        // set the `return_value` before allocating locals for regular args.
+        result.return_value = .{ .local = .{ .value = result.local_index, .references = 1 } };
+        result.local_index += 1;
+    }
+
+    switch (cc) {
+        .auto => {
+            for (fn_info.param_types.get(ip)) |ty| {
+                if (!Type.fromInterned(ty).hasRuntimeBitsIgnoreComptime(zcu)) {
+                    continue;
+                }
+
+                try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } });
+                result.local_index += 1;
+            }
+        },
+        .wasm_mvp => {
+            for (fn_info.param_types.get(ip)) |ty| {
+                if (!Type.fromInterned(ty).hasRuntimeBitsIgnoreComptime(zcu)) {
+                    continue;
+                }
+                switch (abi.classifyType(.fromInterned(ty), zcu)) {
+                    .direct => |scalar_ty| if (!abi.lowerAsDoubleI64(scalar_ty, zcu)) {
+                        try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } });
+                        result.local_index += 1;
+                    } else {
+                        try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } });
+                        try args.append(.{ .local = .{ .value = result.local_index + 1, .references = 1 } });
+                        result.local_index += 2;
+                    },
+                    .indirect => {
+                        try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } });
+                        result.local_index += 1;
+                    },
+                }
+            }
+        },
+        else => unreachable, // Frontend is responsible for emitting an error earlier.
+    }
+    result.args = try args.toOwnedSlice();
+    return result;
+}
+
+pub fn firstParamSRet(
+    cc: std.builtin.CallingConvention,
+    return_type: Type,
+    zcu: *const Zcu,
+    target: *const std.Target,
+) bool {
+    if (!return_type.hasRuntimeBitsIgnoreComptime(zcu)) return false;
+    switch (cc) {
+        .@"inline" => unreachable,
+        .auto => return isByRef(return_type, zcu, target),
+        .wasm_mvp => switch (abi.classifyType(return_type, zcu)) {
+            .direct => |scalar_ty| return abi.lowerAsDoubleI64(scalar_ty, zcu),
+            .indirect => return true,
+        },
+        else => return false,
+    }
+}
+
+/// Lowers a Zig type and its value based on a given calling convention to ensure
+/// it matches the ABI.
+fn lowerArg(cg: *CodeGen, cc: std.builtin.CallingConvention, ty: Type, value: WValue) !void {
+    if (cc != .wasm_mvp) {
+        return cg.lowerToStack(value);
+    }
+
+    const zcu = cg.pt.zcu;
+
+    switch (abi.classifyType(ty, zcu)) {
+        .direct => |scalar_type| if (!abi.lowerAsDoubleI64(scalar_type, zcu)) {
+            if (!isByRef(ty, zcu, cg.target)) {
+                return cg.lowerToStack(value);
+            } else {
+                switch (value) {
+                    .nav_ref, .stack_offset => _ = try cg.load(value, scalar_type, 0),
+                    .dead => unreachable,
+                    else => try cg.emitWValue(value),
+                }
+            }
+        } else {
+            assert(ty.abiSize(zcu) == 16);
+            // in this case we have an integer or float that must be lowered as 2 i64's.
+            try cg.emitWValue(value);
+            try cg.addMemArg(.i64_load, .{ .offset = value.offset(), .alignment = 8 });
+            try cg.emitWValue(value);
+            try cg.addMemArg(.i64_load, .{ .offset = value.offset() + 8, .alignment = 8 });
+        },
+        .indirect => return cg.lowerToStack(value),
+    }
+}
+
+/// Lowers a `WValue` to the stack. This means when the `value` results in
+/// `.stack_offset` we calculate the pointer of this offset and use that.
+/// The value is left on the stack, and not stored in any temporary.
+fn lowerToStack(cg: *CodeGen, value: WValue) !void {
+    switch (value) {
+        .stack_offset => |offset| {
+            try cg.emitWValue(value);
+            if (offset.value > 0) {
+                switch (cg.ptr_size) {
+                    .wasm32 => {
+                        try cg.addImm32(offset.value);
+                        try cg.addTag(.i32_add);
+                    },
+                    .wasm64 => {
+                        try cg.addImm64(offset.value);
+                        try cg.addTag(.i64_add);
+                    },
+                }
+            }
+        },
+        else => try cg.emitWValue(value),
+    }
+}
+
+/// Creates a local for the initial stack value
+/// Asserts `initial_stack_value` is `.none`
+fn initializeStack(cg: *CodeGen) !void {
+    assert(cg.initial_stack_value == .none);
+    // Reserve a local to store the current stack pointer
+    // We can later use this local to set the stack pointer back to the value
+    // we have stored here.
+    cg.initial_stack_value = try cg.ensureAllocLocal(Type.usize);
+    // Also reserve a local to store the bottom stack value
+    cg.bottom_stack_value = try cg.ensureAllocLocal(Type.usize);
+}
+
+/// Reads the stack pointer from `Context.initial_stack_value` and writes it
+/// to the global stack pointer variable
+fn restoreStackPointer(cg: *CodeGen) !void {
+    // only restore the pointer if it was initialized
+    if (cg.initial_stack_value == .none) return;
+    // Get the original stack pointer's value
+    try cg.emitWValue(cg.initial_stack_value);
+
+    try cg.addTag(.global_set_sp);
+}
+
+/// From a given type, will create space on the virtual stack to store the value of such type.
+/// This returns a `WValue` with its active tag set to `local`, containing the index to the local
+/// that points to the position on the virtual stack. This function should be used instead of
+/// moveStack unless a local was already created to store the pointer.
+///
+/// Asserts Type has codegenbits
+fn allocStack(cg: *CodeGen, ty: Type) !WValue {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    assert(ty.hasRuntimeBitsIgnoreComptime(zcu));
+    if (cg.initial_stack_value == .none) {
+        try cg.initializeStack();
+    }
+
+    const abi_size = std.math.cast(u32, ty.abiSize(zcu)) orelse {
+        return cg.fail("Type {f} with ABI size of {d} exceeds stack frame size", .{
+            ty.fmt(pt), ty.abiSize(zcu),
+        });
+    };
+    const abi_align = ty.abiAlignment(zcu);
+
+    cg.stack_alignment = cg.stack_alignment.max(abi_align);
+
+    const offset: u32 = @intCast(abi_align.forward(cg.stack_size));
+    defer cg.stack_size = offset + abi_size;
+
+    return .{ .stack_offset = .{ .value = offset, .references = 1 } };
+}
+
+/// From a given AIR instruction generates a pointer to the stack where
+/// the value of its type will live.
+/// This is different from allocStack where this will use the pointer's alignment
+/// if it is set, to ensure the stack alignment will be set correctly.
+fn allocStackPtr(cg: *CodeGen, inst: Air.Inst.Index) !WValue {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ptr_ty = cg.typeOfIndex(inst);
+    const pointee_ty = ptr_ty.childType(zcu);
+
+    if (cg.initial_stack_value == .none) {
+        try cg.initializeStack();
+    }
+
+    if (!pointee_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+        return cg.allocStack(Type.usize); // create a value containing just the stack pointer.
+    }
+
+    const abi_alignment = ptr_ty.ptrAlignment(zcu);
+    const abi_size = std.math.cast(u32, pointee_ty.abiSize(zcu)) orelse {
+        return cg.fail("Type {f} with ABI size of {d} exceeds stack frame size", .{
+            pointee_ty.fmt(pt), pointee_ty.abiSize(zcu),
+        });
+    };
+    cg.stack_alignment = cg.stack_alignment.max(abi_alignment);
+
+    const offset: u32 = @intCast(abi_alignment.forward(cg.stack_size));
+    defer cg.stack_size = offset + abi_size;
+
+    return .{ .stack_offset = .{ .value = offset, .references = 1 } };
+}
+
+/// From given zig bitsize, returns the wasm bitsize
+fn toWasmBits(bits: u16) ?u16 {
+    return for ([_]u16{ 32, 64, 128 }) |wasm_bits| {
+        if (bits <= wasm_bits) return wasm_bits;
+    } else null;
+}
+
+/// Performs a copy of bytes for a given type. Copying all bytes
+/// from rhs to lhs.
+fn memcpy(cg: *CodeGen, dst: WValue, src: WValue, len: WValue) !void {
+    const len_known_neq_0 = switch (len) {
+        .imm32 => |val| if (val != 0) true else return,
+        .imm64 => |val| if (val != 0) true else return,
+        else => false,
+    };
+    // When bulk_memory is enabled, we lower it to wasm's memcpy instruction.
+    // If not, we lower it ourselves manually
+    if (cg.target.cpu.has(.wasm, .bulk_memory)) {
+        const len0_ok = cg.target.cpu.has(.wasm, .nontrapping_bulk_memory_len0);
+        const emit_check = !(len0_ok or len_known_neq_0);
+
+        if (emit_check) {
+            try cg.startBlock(.block, .empty);
+
+            // Even if `len` is zero, the spec requires an implementation to trap if `src + len` or
+            // `dst + len` are out of memory bounds. This can easily happen in Zig in a case such
+            // as:
+            //
+            // const dst: [*]u8 = undefined;
+            // const src: [*]u8 = undefined;
+            // var len: usize = runtime_zero();
+            // @memcpy(dst[0..len], src[0..len]);
+            //
+            // So explicitly avoid using `memory.copy` in the `len == 0` case. Lovely design.
+            try cg.emitWValue(len);
+            try cg.addTag(.i32_eqz);
+            try cg.addLabel(.br_if, 0);
+        }
+
+        try cg.lowerToStack(dst);
+        try cg.lowerToStack(src);
+        try cg.emitWValue(len);
+        try cg.addExtended(.memory_copy);
+
+        if (emit_check) {
+            try cg.endBlock();
+        }
+
+        return;
+    }
+
+    // when the length is comptime-known, rather than a runtime value, we can optimize the generated code by having
+    // the loop during codegen, rather than inserting a runtime loop into the binary.
+    switch (len) {
+        .imm32, .imm64 => blk: {
+            const length = switch (len) {
+                .imm32 => |val| val,
+                .imm64 => |val| val,
+                else => unreachable,
+            };
+            // if the size (length) is more than 32 bytes, we use a runtime loop instead to prevent
+            // binary size bloat.
+            if (length > 32) break :blk;
+            var offset: u32 = 0;
+            const lhs_base = dst.offset();
+            const rhs_base = src.offset();
+            while (offset < length) : (offset += 1) {
+                // get dst's address to store the result
+                try cg.emitWValue(dst);
+                // load byte from src's address
+                try cg.emitWValue(src);
+                switch (cg.ptr_size) {
+                    .wasm32 => {
+                        try cg.addMemArg(.i32_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 });
+                        try cg.addMemArg(.i32_store8, .{ .offset = lhs_base + offset, .alignment = 1 });
+                    },
+                    .wasm64 => {
+                        try cg.addMemArg(.i64_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 });
+                        try cg.addMemArg(.i64_store8, .{ .offset = lhs_base + offset, .alignment = 1 });
+                    },
+                }
+            }
+            return;
+        },
+        else => {},
+    }
+
+    // allocate a local for the offset, and set it to 0.
+    // This to ensure that inside loops we correctly re-set the counter.
+    var offset = try cg.allocLocal(Type.usize); // local for counter
+    defer offset.free(cg);
+    switch (cg.ptr_size) {
+        .wasm32 => try cg.addImm32(0),
+        .wasm64 => try cg.addImm64(0),
+    }
+    try cg.addLocal(.local_set, offset.local.value);
+
+    // outer block to jump to when loop is done
+    try cg.startBlock(.block, .empty);
+    try cg.startBlock(.loop, .empty);
+
+    // loop condition (offset == length -> break)
+    {
+        try cg.emitWValue(offset);
+        try cg.emitWValue(len);
+        switch (cg.ptr_size) {
+            .wasm32 => try cg.addTag(.i32_eq),
+            .wasm64 => try cg.addTag(.i64_eq),
+        }
+        try cg.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
+    }
+
+    // get dst ptr
+    {
+        try cg.emitWValue(dst);
+        try cg.emitWValue(offset);
+        switch (cg.ptr_size) {
+            .wasm32 => try cg.addTag(.i32_add),
+            .wasm64 => try cg.addTag(.i64_add),
+        }
+    }
+
+    // get src value and also store in dst
+    {
+        try cg.emitWValue(src);
+        try cg.emitWValue(offset);
+        switch (cg.ptr_size) {
+            .wasm32 => {
+                try cg.addTag(.i32_add);
+                try cg.addMemArg(.i32_load8_u, .{ .offset = src.offset(), .alignment = 1 });
+                try cg.addMemArg(.i32_store8, .{ .offset = dst.offset(), .alignment = 1 });
+            },
+            .wasm64 => {
+                try cg.addTag(.i64_add);
+                try cg.addMemArg(.i64_load8_u, .{ .offset = src.offset(), .alignment = 1 });
+                try cg.addMemArg(.i64_store8, .{ .offset = dst.offset(), .alignment = 1 });
+            },
+        }
+    }
+
+    // increment loop counter
+    {
+        try cg.emitWValue(offset);
+        switch (cg.ptr_size) {
+            .wasm32 => {
+                try cg.addImm32(1);
+                try cg.addTag(.i32_add);
+            },
+            .wasm64 => {
+                try cg.addImm64(1);
+                try cg.addTag(.i64_add);
+            },
+        }
+        try cg.addLocal(.local_set, offset.local.value);
+        try cg.addLabel(.br, 0); // jump to start of loop
+    }
+    try cg.endBlock(); // close off loop block
+    try cg.endBlock(); // close off outer block
+}
+
+fn ptrSize(cg: *const CodeGen) u16 {
+    return @divExact(cg.target.ptrBitWidth(), 8);
+}
+
+/// For a given `Type`, will return true when the type will be passed
+/// by reference, rather than by value
+fn isByRef(ty: Type, zcu: *const Zcu, target: *const std.Target) bool {
+    const ip = &zcu.intern_pool;
+    switch (ty.zigTypeTag(zcu)) {
+        .type,
+        .comptime_int,
+        .comptime_float,
+        .enum_literal,
+        .undefined,
+        .null,
+        .@"opaque",
+        => unreachable,
+
+        .noreturn,
+        .void,
+        .bool,
+        .error_set,
+        .@"fn",
+        .@"anyframe",
+        => return false,
+
+        .array,
+        .frame,
+        => return ty.hasRuntimeBitsIgnoreComptime(zcu),
+        .@"union" => {
+            if (zcu.typeToUnion(ty)) |union_obj| {
+                if (union_obj.flagsUnordered(ip).layout == .@"packed") {
+                    return ty.abiSize(zcu) > 8;
+                }
+            }
+            return ty.hasRuntimeBitsIgnoreComptime(zcu);
+        },
+        .@"struct" => {
+            if (zcu.typeToPackedStruct(ty)) |packed_struct| {
+                return isByRef(Type.fromInterned(packed_struct.backingIntTypeUnordered(ip)), zcu, target);
+            }
+            return ty.hasRuntimeBitsIgnoreComptime(zcu);
+        },
+        .vector => return determineSimdStoreStrategy(ty, zcu, target) == .unrolled,
+        .int => return ty.intInfo(zcu).bits > 64,
+        .@"enum" => return ty.intInfo(zcu).bits > 64,
+        .float => return ty.floatBits(target) > 64,
+        .error_union => {
+            const pl_ty = ty.errorUnionPayload(zcu);
+            if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+                return false;
+            }
+            return true;
+        },
+        .optional => {
+            if (ty.isPtrLikeOptional(zcu)) return false;
+            const pl_type = ty.optionalChild(zcu);
+            if (pl_type.zigTypeTag(zcu) == .error_set) return false;
+            return pl_type.hasRuntimeBitsIgnoreComptime(zcu);
+        },
+        .pointer => {
+            // Slices act like struct and will be passed by reference
+            if (ty.isSlice(zcu)) return true;
+            return false;
+        },
+    }
+}
+
+const SimdStoreStrategy = enum {
+    direct,
+    unrolled,
+};
+
+/// For a given vector type, returns the `SimdStoreStrategy`.
+/// This means when a given type is 128 bits and either the simd128 or relaxed-simd
+/// features are enabled, the function will return `.direct`. This would allow to store
+/// it using a instruction, rather than an unrolled version.
+pub fn determineSimdStoreStrategy(ty: Type, zcu: *const Zcu, target: *const std.Target) SimdStoreStrategy {
+    assert(ty.zigTypeTag(zcu) == .vector);
+    if (ty.bitSize(zcu) != 128) return .unrolled;
+    if (target.cpu.has(.wasm, .relaxed_simd) or target.cpu.has(.wasm, .simd128)) {
+        return .direct;
+    }
+    return .unrolled;
+}
+
+/// Creates a new local for a pointer that points to memory with given offset.
+/// This can be used to get a pointer to a struct field, error payload, etc.
+/// By providing `modify` as action, it will modify the given `ptr_value` instead of making a new
+/// local value to store the pointer. This allows for local re-use and improves binary size.
+fn buildPointerOffset(cg: *CodeGen, ptr_value: WValue, offset: u64, action: enum { modify, new }) InnerError!WValue {
+    // do not perform arithmetic when offset is 0.
+    if (offset == 0 and ptr_value.offset() == 0 and action == .modify) return ptr_value;
+    const result_ptr: WValue = switch (action) {
+        .new => try cg.ensureAllocLocal(Type.usize),
+        .modify => ptr_value,
+    };
+    try cg.emitWValue(ptr_value);
+    if (offset + ptr_value.offset() > 0) {
+        switch (cg.ptr_size) {
+            .wasm32 => {
+                try cg.addImm32(@intCast(offset + ptr_value.offset()));
+                try cg.addTag(.i32_add);
+            },
+            .wasm64 => {
+                try cg.addImm64(offset + ptr_value.offset());
+                try cg.addTag(.i64_add);
+            },
+        }
+    }
+    try cg.addLocal(.local_set, result_ptr.local.value);
+    return result_ptr;
+}
+
+fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const air_tags = cg.air.instructions.items(.tag);
+    return switch (air_tags[@intFromEnum(inst)]) {
+        .inferred_alloc, .inferred_alloc_comptime => unreachable,
+
+        .add => cg.airBinOp(inst, .add),
+        .add_sat => cg.airSatBinOp(inst, .add),
+        .add_wrap => cg.airWrapBinOp(inst, .add),
+        .sub => cg.airBinOp(inst, .sub),
+        .sub_sat => cg.airSatBinOp(inst, .sub),
+        .sub_wrap => cg.airWrapBinOp(inst, .sub),
+        .mul => cg.airBinOp(inst, .mul),
+        .mul_sat => cg.airSatMul(inst),
+        .mul_wrap => cg.airWrapBinOp(inst, .mul),
+        .div_float, .div_exact => cg.airDiv(inst),
+        .div_trunc => cg.airDivTrunc(inst),
+        .div_floor => cg.airDivFloor(inst),
+        .bit_and => cg.airBinOp(inst, .@"and"),
+        .bit_or => cg.airBinOp(inst, .@"or"),
+        .bool_and => cg.airBinOp(inst, .@"and"),
+        .bool_or => cg.airBinOp(inst, .@"or"),
+        .rem => cg.airRem(inst),
+        .mod => cg.airMod(inst),
+        .shl => cg.airWrapBinOp(inst, .shl),
+        .shl_exact => cg.airBinOp(inst, .shl),
+        .shl_sat => cg.airShlSat(inst),
+        .shr, .shr_exact => cg.airBinOp(inst, .shr),
+        .xor => cg.airBinOp(inst, .xor),
+        .max => cg.airMaxMin(inst, .fmax, .gt),
+        .min => cg.airMaxMin(inst, .fmin, .lt),
+        .mul_add => cg.airMulAdd(inst),
+
+        .sqrt => cg.airUnaryFloatOp(inst, .sqrt),
+        .sin => cg.airUnaryFloatOp(inst, .sin),
+        .cos => cg.airUnaryFloatOp(inst, .cos),
+        .tan => cg.airUnaryFloatOp(inst, .tan),
+        .exp => cg.airUnaryFloatOp(inst, .exp),
+        .exp2 => cg.airUnaryFloatOp(inst, .exp2),
+        .log => cg.airUnaryFloatOp(inst, .log),
+        .log2 => cg.airUnaryFloatOp(inst, .log2),
+        .log10 => cg.airUnaryFloatOp(inst, .log10),
+        .floor => cg.airUnaryFloatOp(inst, .floor),
+        .ceil => cg.airUnaryFloatOp(inst, .ceil),
+        .round => cg.airUnaryFloatOp(inst, .round),
+        .trunc_float => cg.airUnaryFloatOp(inst, .trunc),
+        .neg => cg.airUnaryFloatOp(inst, .neg),
+
+        .abs => cg.airAbs(inst),
+
+        .add_with_overflow => cg.airAddSubWithOverflow(inst, .add),
+        .sub_with_overflow => cg.airAddSubWithOverflow(inst, .sub),
+        .shl_with_overflow => cg.airShlWithOverflow(inst),
+        .mul_with_overflow => cg.airMulWithOverflow(inst),
+
+        .clz => cg.airClz(inst),
+        .ctz => cg.airCtz(inst),
+
+        .cmp_eq => cg.airCmp(inst, .eq),
+        .cmp_gte => cg.airCmp(inst, .gte),
+        .cmp_gt => cg.airCmp(inst, .gt),
+        .cmp_lte => cg.airCmp(inst, .lte),
+        .cmp_lt => cg.airCmp(inst, .lt),
+        .cmp_neq => cg.airCmp(inst, .neq),
+
+        .cmp_vector => cg.airCmpVector(inst),
+        .cmp_lt_errors_len => cg.airCmpLtErrorsLen(inst),
+
+        .array_elem_val => cg.airArrayElemVal(inst),
+        .array_to_slice => cg.airArrayToSlice(inst),
+        .alloc => cg.airAlloc(inst),
+        .arg => cg.airArg(inst),
+        .bitcast => cg.airBitcast(inst),
+        .block => cg.airBlock(inst),
+        .trap => cg.airTrap(inst),
+        .breakpoint => cg.airBreakpoint(inst),
+        .br => cg.airBr(inst),
+        .repeat => cg.airRepeat(inst),
+        .switch_dispatch => cg.airSwitchDispatch(inst),
+        .cond_br => cg.airCondBr(inst),
+        .intcast => cg.airIntcast(inst),
+        .fptrunc => cg.airFptrunc(inst),
+        .fpext => cg.airFpext(inst),
+        .int_from_float => cg.airIntFromFloat(inst),
+        .float_from_int => cg.airFloatFromInt(inst),
+        .get_union_tag => cg.airGetUnionTag(inst),
+
+        .@"try" => cg.airTry(inst),
+        .try_cold => cg.airTry(inst),
+        .try_ptr => cg.airTryPtr(inst),
+        .try_ptr_cold => cg.airTryPtr(inst),
+
+        .dbg_stmt => cg.airDbgStmt(inst),
+        .dbg_empty_stmt => try cg.finishAir(inst, .none, &.{}),
+        .dbg_inline_block => cg.airDbgInlineBlock(inst),
+        .dbg_var_ptr => cg.airDbgVar(inst, .local_var, true),
+        .dbg_var_val => cg.airDbgVar(inst, .local_var, false),
+        .dbg_arg_inline => cg.airDbgVar(inst, .arg, false),
+
+        .call => cg.airCall(inst, .auto),
+        .call_always_tail => cg.airCall(inst, .always_tail),
+        .call_never_tail => cg.airCall(inst, .never_tail),
+        .call_never_inline => cg.airCall(inst, .never_inline),
+
+        .is_err => cg.airIsErr(inst, .i32_ne, .value),
+        .is_non_err => cg.airIsErr(inst, .i32_eq, .value),
+        .is_err_ptr => cg.airIsErr(inst, .i32_ne, .ptr),
+        .is_non_err_ptr => cg.airIsErr(inst, .i32_eq, .ptr),
+
+        .is_null => cg.airIsNull(inst, .i32_eq, .value),
+        .is_non_null => cg.airIsNull(inst, .i32_ne, .value),
+        .is_null_ptr => cg.airIsNull(inst, .i32_eq, .ptr),
+        .is_non_null_ptr => cg.airIsNull(inst, .i32_ne, .ptr),
+
+        .load => cg.airLoad(inst),
+        .loop => cg.airLoop(inst),
+        .memset => cg.airMemset(inst, false),
+        .memset_safe => cg.airMemset(inst, true),
+        .not => cg.airNot(inst),
+        .optional_payload => cg.airOptionalPayload(inst),
+        .optional_payload_ptr => cg.airOptionalPayloadPtr(inst),
+        .optional_payload_ptr_set => cg.airOptionalPayloadPtrSet(inst),
+        .ptr_add => cg.airPtrBinOp(inst, .add),
+        .ptr_sub => cg.airPtrBinOp(inst, .sub),
+        .ptr_elem_ptr => cg.airPtrElemPtr(inst),
+        .ptr_elem_val => cg.airPtrElemVal(inst),
+        .ret => cg.airRet(inst),
+        .ret_safe => cg.airRet(inst), // TODO
+        .ret_ptr => cg.airRetPtr(inst),
+        .ret_load => cg.airRetLoad(inst),
+        .splat => cg.airSplat(inst),
+        .select => cg.airSelect(inst),
+        .shuffle_one => cg.airShuffleOne(inst),
+        .shuffle_two => cg.airShuffleTwo(inst),
+        .reduce => cg.airReduce(inst),
+        .aggregate_init => cg.airAggregateInit(inst),
+        .union_init => cg.airUnionInit(inst),
+        .prefetch => cg.airPrefetch(inst),
+        .popcount => cg.airPopcount(inst),
+        .byte_swap => cg.airByteSwap(inst),
+        .bit_reverse => cg.airBitReverse(inst),
+
+        .slice => cg.airSlice(inst),
+        .slice_len => cg.airSliceLen(inst),
+        .slice_elem_val => cg.airSliceElemVal(inst),
+        .slice_elem_ptr => cg.airSliceElemPtr(inst),
+        .slice_ptr => cg.airSlicePtr(inst),
+        .ptr_slice_len_ptr => cg.airPtrSliceFieldPtr(inst, cg.ptrSize()),
+        .ptr_slice_ptr_ptr => cg.airPtrSliceFieldPtr(inst, 0),
+        .store => cg.airStore(inst, false),
+        .store_safe => cg.airStore(inst, true),
+
+        .set_union_tag => cg.airSetUnionTag(inst),
+        .struct_field_ptr => cg.airStructFieldPtr(inst),
+        .struct_field_ptr_index_0 => cg.airStructFieldPtrIndex(inst, 0),
+        .struct_field_ptr_index_1 => cg.airStructFieldPtrIndex(inst, 1),
+        .struct_field_ptr_index_2 => cg.airStructFieldPtrIndex(inst, 2),
+        .struct_field_ptr_index_3 => cg.airStructFieldPtrIndex(inst, 3),
+        .struct_field_val => cg.airStructFieldVal(inst),
+        .field_parent_ptr => cg.airFieldParentPtr(inst),
+
+        .switch_br => cg.airSwitchBr(inst, false),
+        .loop_switch_br => cg.airSwitchBr(inst, true),
+        .trunc => cg.airTrunc(inst),
+        .unreach => cg.airUnreachable(inst),
+
+        .wrap_optional => cg.airWrapOptional(inst),
+        .unwrap_errunion_payload => cg.airUnwrapErrUnionPayload(inst, false),
+        .unwrap_errunion_payload_ptr => cg.airUnwrapErrUnionPayload(inst, true),
+        .unwrap_errunion_err => cg.airUnwrapErrUnionError(inst, false),
+        .unwrap_errunion_err_ptr => cg.airUnwrapErrUnionError(inst, true),
+        .wrap_errunion_payload => cg.airWrapErrUnionPayload(inst),
+        .wrap_errunion_err => cg.airWrapErrUnionErr(inst),
+        .errunion_payload_ptr_set => cg.airErrUnionPayloadPtrSet(inst),
+        .error_name => cg.airErrorName(inst),
+
+        .wasm_memory_size => cg.airWasmMemorySize(inst),
+        .wasm_memory_grow => cg.airWasmMemoryGrow(inst),
+
+        .memcpy, .memmove => cg.airMemcpy(inst),
+
+        .ret_addr => cg.airRetAddr(inst),
+        .tag_name => cg.airTagName(inst),
+
+        .error_set_has_value => cg.airErrorSetHasValue(inst),
+        .frame_addr => cg.airFrameAddress(inst),
+
+        .runtime_nav_ptr => cg.airRuntimeNavPtr(inst),
+
+        .assembly,
+
+        .err_return_trace,
+        .set_err_return_trace,
+        .save_err_return_trace_index,
+        .is_named_enum_value,
+        .addrspace_cast,
+        .vector_store_elem,
+        .c_va_arg,
+        .c_va_copy,
+        .c_va_end,
+        .c_va_start,
+        => |tag| return cg.fail("TODO: Implement wasm inst: {s}", .{@tagName(tag)}),
+
+        .atomic_load => cg.airAtomicLoad(inst),
+        .atomic_store_unordered,
+        .atomic_store_monotonic,
+        .atomic_store_release,
+        .atomic_store_seq_cst,
+        // in WebAssembly, all atomic instructions are sequentially ordered.
+        => cg.airAtomicStore(inst),
+        .atomic_rmw => cg.airAtomicRmw(inst),
+        .cmpxchg_weak => cg.airCmpxchg(inst),
+        .cmpxchg_strong => cg.airCmpxchg(inst),
+
+        .add_optimized,
+        .sub_optimized,
+        .mul_optimized,
+        .div_float_optimized,
+        .div_trunc_optimized,
+        .div_floor_optimized,
+        .div_exact_optimized,
+        .rem_optimized,
+        .mod_optimized,
+        .neg_optimized,
+        .cmp_lt_optimized,
+        .cmp_lte_optimized,
+        .cmp_eq_optimized,
+        .cmp_gte_optimized,
+        .cmp_gt_optimized,
+        .cmp_neq_optimized,
+        .cmp_vector_optimized,
+        .reduce_optimized,
+        .int_from_float_optimized,
+        => return cg.fail("TODO implement optimized float mode", .{}),
+
+        .add_safe,
+        .sub_safe,
+        .mul_safe,
+        .intcast_safe,
+        .int_from_float_safe,
+        .int_from_float_optimized_safe,
+        => return cg.fail("TODO implement safety_checked_instructions", .{}),
+
+        .work_item_id,
+        .work_group_size,
+        .work_group_id,
+        => unreachable,
+    };
+}
+
+fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ip = &zcu.intern_pool;
+
+    for (body) |inst| {
+        if (cg.liveness.isUnused(inst) and !cg.air.mustLower(inst, ip)) {
+            continue;
+        }
+        const old_bookkeeping_value = cg.air_bookkeeping;
+        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, Air.Liveness.bpi);
+        try cg.genInst(inst);
+
+        if (std.debug.runtime_safety and cg.air_bookkeeping < old_bookkeeping_value + 1) {
+            std.debug.panic("Missing call to `finishAir` in AIR instruction %{d} ('{t}')", .{
+                inst,
+                cg.air.instructions.items(.tag)[@intFromEnum(inst)],
+            });
+        }
+    }
+}
+
+fn airRet(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
+    const operand = try cg.resolveInst(un_op);
+    const fn_info = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?;
+    const ret_ty = Type.fromInterned(fn_info.return_type);
+
+    // result must be stored in the stack and we return a pointer
+    // to the stack instead
+    if (cg.return_value != .none) {
+        try cg.store(cg.return_value, operand, ret_ty, 0);
+    } else if (fn_info.cc == .wasm_mvp and ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+        switch (abi.classifyType(ret_ty, zcu)) {
+            .direct => |scalar_type| {
+                assert(!abi.lowerAsDoubleI64(scalar_type, zcu));
+                if (!isByRef(ret_ty, zcu, cg.target)) {
+                    try cg.emitWValue(operand);
+                } else {
+                    _ = try cg.load(operand, scalar_type, 0);
+                }
+            },
+            .indirect => unreachable,
+        }
+    } else {
+        if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu) and ret_ty.isError(zcu)) {
+            try cg.addImm32(0);
+        } else {
+            try cg.emitWValue(operand);
+        }
+    }
+    try cg.restoreStackPointer();
+    try cg.addTag(.@"return");
+
+    return cg.finishAir(inst, .none, &.{un_op});
+}
+
+fn airRetPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const child_type = cg.typeOfIndex(inst).childType(zcu);
+
+    const result = result: {
+        if (!child_type.isFnOrHasRuntimeBitsIgnoreComptime(zcu)) {
+            break :result try cg.allocStack(Type.usize); // create pointer to void
+        }
+
+        const fn_info = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?;
+        if (firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, cg.target)) {
+            break :result cg.return_value;
+        }
+
+        break :result try cg.allocStackPtr(inst);
+    };
+
+    return cg.finishAir(inst, result, &.{});
+}
+
+fn airRetLoad(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
+    const operand = try cg.resolveInst(un_op);
+    const ret_ty = cg.typeOf(un_op).childType(zcu);
+
+    const fn_info = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?;
+    if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+        if (ret_ty.isError(zcu)) {
+            try cg.addImm32(0);
+        }
+    } else if (!firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, cg.target)) {
+        // leave on the stack
+        _ = try cg.load(operand, ret_ty, 0);
+    }
+
+    try cg.restoreStackPointer();
+    try cg.addTag(.@"return");
+    return cg.finishAir(inst, .none, &.{un_op});
+}
+
+fn airCall(cg: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) InnerError!void {
+    if (modifier == .always_tail) return cg.fail("TODO implement tail calls for wasm", .{});
+    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
+    const extra = cg.air.extraData(Air.Call, pl_op.payload);
+    const args: []const Air.Inst.Ref = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.args_len]);
+    const ty = cg.typeOf(pl_op.operand);
+
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ip = &zcu.intern_pool;
+    const fn_ty = switch (ty.zigTypeTag(zcu)) {
+        .@"fn" => ty,
+        .pointer => ty.childType(zcu),
+        else => unreachable,
+    };
+    const ret_ty = fn_ty.fnReturnType(zcu);
+    const fn_info = zcu.typeToFunc(fn_ty).?;
+    const first_param_sret = firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, cg.target);
+
+    const callee: ?InternPool.Nav.Index = blk: {
+        const func_val = (try cg.air.value(pl_op.operand, pt)) orelse break :blk null;
+
+        switch (ip.indexToKey(func_val.toIntern())) {
+            inline .func, .@"extern" => |x| break :blk x.owner_nav,
+            .ptr => |ptr| if (ptr.byte_offset == 0) switch (ptr.base_addr) {
+                .nav => |nav| break :blk nav,
+                else => {},
+            },
+            else => {},
+        }
+        return cg.fail("unable to lower callee to a function index", .{});
+    };
+
+    const sret: WValue = if (first_param_sret) blk: {
+        const sret_local = try cg.allocStack(ret_ty);
+        try cg.lowerToStack(sret_local);
+        break :blk sret_local;
+    } else .none;
+
+    for (args) |arg| {
+        const arg_val = try cg.resolveInst(arg);
+
+        const arg_ty = cg.typeOf(arg);
+        if (!arg_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
+
+        try cg.lowerArg(zcu.typeToFunc(fn_ty).?.cc, arg_ty, arg_val);
+    }
+
+    if (callee) |nav_index| {
+        try cg.addInst(.{ .tag = .call_nav, .data = .{ .nav_index = nav_index } });
+    } else {
+        // in this case we call a function pointer
+        // so load its value onto the stack
+        assert(ty.zigTypeTag(zcu) == .pointer);
+        const operand = try cg.resolveInst(pl_op.operand);
+        try cg.emitWValue(operand);
+
+        try cg.mir_func_tys.put(cg.gpa, fn_ty.toIntern(), {});
+        try cg.addInst(.{
+            .tag = .call_indirect,
+            .data = .{ .ip_index = fn_ty.toIntern() },
+        });
+    }
+
+    const result_value = result_value: {
+        if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu) and !ret_ty.isError(zcu)) {
+            break :result_value .none;
+        } else if (ret_ty.isNoReturn(zcu)) {
+            try cg.addTag(.@"unreachable");
+            break :result_value .none;
+        } else if (first_param_sret) {
+            break :result_value sret;
+        } else if (zcu.typeToFunc(fn_ty).?.cc == .wasm_mvp) {
+            switch (abi.classifyType(ret_ty, zcu)) {
+                .direct => |scalar_type| {
+                    assert(!abi.lowerAsDoubleI64(scalar_type, zcu));
+                    if (!isByRef(ret_ty, zcu, cg.target)) {
+                        const result_local = try cg.allocLocal(ret_ty);
+                        try cg.addLocal(.local_set, result_local.local.value);
+                        break :result_value result_local;
+                    } else {
+                        const result_local = try cg.allocLocal(ret_ty);
+                        try cg.addLocal(.local_set, result_local.local.value);
+                        const result = try cg.allocStack(ret_ty);
+                        try cg.store(result, result_local, scalar_type, 0);
+                        break :result_value result;
+                    }
+                },
+                .indirect => unreachable,
+            }
+        } else {
+            const result_local = try cg.allocLocal(ret_ty);
+            try cg.addLocal(.local_set, result_local.local.value);
+            break :result_value result_local;
+        }
+    };
+
+    var bt = try cg.iterateBigTomb(inst, 1 + args.len);
+    bt.feed(pl_op.operand);
+    for (args) |arg| bt.feed(arg);
+    return bt.finishAir(result_value);
+}
+
+fn airAlloc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const value = try cg.allocStackPtr(inst);
+    return cg.finishAir(inst, value, &.{});
+}
+
+fn airStore(cg: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+    const ptr_ty = cg.typeOf(bin_op.lhs);
+    const ptr_info = ptr_ty.ptrInfo(zcu);
+    const ty = ptr_ty.childType(zcu);
+
+    if (!safety and bin_op.rhs == .undef) {
+        return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
+    }
+
+    if (ptr_info.packed_offset.host_size == 0) {
+        try cg.store(lhs, rhs, ty, 0);
+    } else {
+        // at this point we have a non-natural alignment, we must
+        // load the value, and then shift+or the rhs into the result location.
+        const host_size = ptr_info.packed_offset.host_size * 8;
+        const host_ty = try pt.intType(.unsigned, host_size);
+        const bit_size: u16 = @intCast(ty.bitSize(zcu));
+        const bit_offset = ptr_info.packed_offset.bit_offset;
+
+        const mask_val = try cg.resolveValue(val: {
+            const limbs = try cg.gpa.alloc(
+                std.math.big.Limb,
+                std.math.big.int.calcTwosCompLimbCount(host_size) + 1,
+            );
+            defer cg.gpa.free(limbs);
+
+            var mask_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined };
+            mask_bigint.setTwosCompIntLimit(.max, .unsigned, host_size);
+
+            if (bit_size != host_size) {
+                mask_bigint.shiftRight(mask_bigint.toConst(), host_size - bit_size);
+            }
+            if (bit_offset != 0) {
+                mask_bigint.shiftLeft(mask_bigint.toConst(), bit_offset);
+            }
+            mask_bigint.bitNotWrap(mask_bigint.toConst(), .unsigned, host_size);
+
+            break :val try pt.intValue_big(host_ty, mask_bigint.toConst());
+        });
+
+        const shift_val: WValue = if (33 <= host_size and host_size <= 64)
+            .{ .imm64 = bit_offset }
+        else
+            .{ .imm32 = bit_offset };
+
+        if (host_size <= 64) {
+            try cg.emitWValue(lhs);
+        }
+        const loaded = if (host_size <= 64)
+            try cg.load(lhs, host_ty, 0)
+        else
+            lhs;
+        const anded = try cg.binOp(loaded, mask_val, host_ty, .@"and");
+        const extended_value = try cg.intcast(rhs, ty, host_ty);
+        const shifted_value = if (bit_offset > 0)
+            try cg.binOp(extended_value, shift_val, host_ty, .shl)
+        else
+            extended_value;
+        const result = try cg.binOp(anded, shifted_value, host_ty, .@"or");
+        if (host_size <= 64) {
+            try cg.store(.stack, result, host_ty, lhs.offset());
+        } else {
+            try cg.store(lhs, result, host_ty, lhs.offset());
+        }
+    }
+
+    return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn store(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerError!void {
+    assert(!(lhs != .stack and rhs == .stack));
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const abi_size = ty.abiSize(zcu);
+
+    if (!ty.hasRuntimeBitsIgnoreComptime(zcu)) return;
+
+    switch (ty.zigTypeTag(zcu)) {
+        .error_union => {
+            const pl_ty = ty.errorUnionPayload(zcu);
+            if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+                return cg.store(lhs, rhs, Type.anyerror, offset);
+            }
+
+            const len = @as(u32, @intCast(abi_size));
+            assert(offset == 0);
+            return cg.memcpy(lhs, rhs, .{ .imm32 = len });
+        },
+        .optional => {
+            if (ty.isPtrLikeOptional(zcu)) {
+                return cg.store(lhs, rhs, Type.usize, offset);
+            }
+            const pl_ty = ty.optionalChild(zcu);
+            if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+                return cg.store(lhs, rhs, Type.u8, offset);
+            }
+            if (pl_ty.zigTypeTag(zcu) == .error_set) {
+                return cg.store(lhs, rhs, Type.anyerror, offset);
+            }
+
+            const len = @as(u32, @intCast(abi_size));
+            assert(offset == 0);
+            return cg.memcpy(lhs, rhs, .{ .imm32 = len });
+        },
+        .@"struct", .array, .@"union" => if (isByRef(ty, zcu, cg.target)) {
+            const len = @as(u32, @intCast(abi_size));
+            assert(offset == 0);
+            return cg.memcpy(lhs, rhs, .{ .imm32 = len });
+        },
+        .vector => switch (determineSimdStoreStrategy(ty, zcu, cg.target)) {
+            .unrolled => {
+                const len: u32 = @intCast(abi_size);
+                return cg.memcpy(lhs, rhs, .{ .imm32 = len });
+            },
+            .direct => {
+                try cg.emitWValue(lhs);
+                try cg.lowerToStack(rhs);
+                // TODO: Add helper functions for simd opcodes
+                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
+                // stores as := opcode, offset, alignment (opcode::memarg)
+                try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{
+                    @intFromEnum(std.wasm.SimdOpcode.v128_store),
+                    offset + lhs.offset(),
+                    @intCast(ty.abiAlignment(zcu).toByteUnits() orelse 0),
+                });
+                return cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
+            },
+        },
+        .pointer => {
+            if (ty.isSlice(zcu)) {
+                assert(offset == 0);
+                // store pointer first
+                // lower it to the stack so we do not have to store rhs into a local first
+                try cg.emitWValue(lhs);
+                const ptr_local = try cg.load(rhs, Type.usize, 0);
+                try cg.store(.stack, ptr_local, Type.usize, 0 + lhs.offset());
+
+                // retrieve length from rhs, and store that alongside lhs as well
+                try cg.emitWValue(lhs);
+                const len_local = try cg.load(rhs, Type.usize, cg.ptrSize());
+                try cg.store(.stack, len_local, Type.usize, cg.ptrSize() + lhs.offset());
+                return;
+            }
+        },
+        .int, .@"enum", .float => if (abi_size > 8 and abi_size <= 16) {
+            assert(offset == 0);
+            try cg.emitWValue(lhs);
+            const lsb = try cg.load(rhs, Type.u64, 0);
+            try cg.store(.stack, lsb, Type.u64, 0 + lhs.offset());
+
+            try cg.emitWValue(lhs);
+            const msb = try cg.load(rhs, Type.u64, 8);
+            try cg.store(.stack, msb, Type.u64, 8 + lhs.offset());
+            return;
+        } else if (abi_size > 16) {
+            assert(offset == 0);
+            try cg.memcpy(lhs, rhs, .{ .imm32 = @as(u32, @intCast(ty.abiSize(zcu))) });
+        },
+        else => if (abi_size > 8) {
+            return cg.fail("TODO: `store` for type `{f}` with abisize `{d}`", .{ ty.fmt(pt), abi_size });
+        },
+    }
+    try cg.emitWValue(lhs);
+    // In this case we're actually interested in storing the stack position
+    // into lhs, so we calculate that and emit that instead
+    try cg.lowerToStack(rhs);
+
+    const valtype = typeToValtype(ty, zcu, cg.target);
+    const opcode = buildOpcode(.{
+        .valtype1 = valtype,
+        .width = @as(u8, @intCast(abi_size * 8)),
+        .op = .store,
+    });
+
+    // store rhs value at stack pointer's location in memory
+    try cg.addMemArg(
+        Mir.Inst.Tag.fromOpcode(opcode),
+        .{
+            .offset = offset + lhs.offset(),
+            .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
+        },
+    );
+}
+
+fn airLoad(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+    const operand = try cg.resolveInst(ty_op.operand);
+    const ty = ty_op.ty.toType();
+    const ptr_ty = cg.typeOf(ty_op.operand);
+    const ptr_info = ptr_ty.ptrInfo(zcu);
+
+    if (!ty.hasRuntimeBitsIgnoreComptime(zcu)) return cg.finishAir(inst, .none, &.{ty_op.operand});
+
+    const result = result: {
+        if (isByRef(ty, zcu, cg.target)) {
+            const new_local = try cg.allocStack(ty);
+            try cg.store(new_local, operand, ty, 0);
+            break :result new_local;
+        }
+
+        if (ptr_info.packed_offset.host_size == 0) {
+            const loaded = try cg.load(operand, ty, 0);
+            const ty_size = ty.abiSize(zcu);
+            if (ty.isAbiInt(zcu) and ty_size * 8 > ty.bitSize(zcu)) {
+                const int_elem_ty = try pt.intType(.unsigned, @intCast(ty_size * 8));
+                break :result try cg.trunc(loaded, ty, int_elem_ty);
+            } else {
+                break :result loaded;
+            }
+        } else {
+            const int_elem_ty = try pt.intType(.unsigned, ptr_info.packed_offset.host_size * 8);
+            const shift_val: WValue = if (ptr_info.packed_offset.host_size <= 4)
+                .{ .imm32 = ptr_info.packed_offset.bit_offset }
+            else if (ptr_info.packed_offset.host_size <= 8)
+                .{ .imm64 = ptr_info.packed_offset.bit_offset }
+            else
+                .{ .imm32 = ptr_info.packed_offset.bit_offset };
+
+            const stack_loaded = if (ptr_info.packed_offset.host_size <= 8)
+                try cg.load(operand, int_elem_ty, 0)
+            else
+                operand;
+            const shifted = try cg.binOp(stack_loaded, shift_val, int_elem_ty, .shr);
+            break :result try cg.trunc(shifted, ty, int_elem_ty);
+        }
+    };
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+/// Loads an operand from the linear memory section.
+/// NOTE: Leaves the value on the stack.
+fn load(cg: *CodeGen, operand: WValue, ty: Type, offset: u32) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    // load local's value from memory by its stack position
+    try cg.emitWValue(operand);
+
+    if (ty.zigTypeTag(zcu) == .vector) {
+        // TODO: Add helper functions for simd opcodes
+        const extra_index: u32 = @intCast(cg.mir_extra.items.len);
+        // stores as := opcode, offset, alignment (opcode::memarg)
+        try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{
+            @intFromEnum(std.wasm.SimdOpcode.v128_load),
+            offset + operand.offset(),
+            @intCast(ty.abiAlignment(zcu).toByteUnits().?),
+        });
+        try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
+        return .stack;
+    }
+
+    const abi_size: u8 = @intCast(ty.abiSize(zcu));
+    const opcode = buildOpcode(.{
+        .valtype1 = typeToValtype(ty, zcu, cg.target),
+        .width = abi_size * 8,
+        .op = .load,
+        .signedness = if (ty.isSignedInt(zcu)) .signed else .unsigned,
+    });
+
+    try cg.addMemArg(
+        Mir.Inst.Tag.fromOpcode(opcode),
+        .{
+            .offset = offset + operand.offset(),
+            .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
+        },
+    );
+
+    return .stack;
+}
+
+fn airArg(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const arg_index = cg.arg_index;
+    const arg = cg.args[arg_index];
+    const cc = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?.cc;
+    const arg_ty = cg.typeOfIndex(inst);
+    if (cc == .wasm_mvp) {
+        switch (abi.classifyType(arg_ty, zcu)) {
+            .direct => |scalar_ty| if (!abi.lowerAsDoubleI64(scalar_ty, zcu)) {
+                cg.arg_index += 1;
+            } else {
+                cg.arg_index += 2;
+                const result = try cg.allocStack(arg_ty);
+                try cg.store(result, arg, Type.u64, 0);
+                try cg.store(result, cg.args[arg_index + 1], Type.u64, 8);
+                return cg.finishAir(inst, result, &.{});
+            },
+            .indirect => cg.arg_index += 1,
+        }
+    } else {
+        cg.arg_index += 1;
+    }
+
+    return cg.finishAir(inst, arg, &.{});
+}
+
+fn airBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+    const lhs_ty = cg.typeOf(bin_op.lhs);
+    const rhs_ty = cg.typeOf(bin_op.rhs);
+
+    // For certain operations, such as shifting, the types are different.
+    // When converting this to a WebAssembly type, they *must* match to perform
+    // an operation. For this reason we verify if the WebAssembly type is different, in which
+    // case we first coerce the operands to the same type before performing the operation.
+    // For big integers we can ignore this as we will call into compiler-rt which handles this.
+    const result = switch (op) {
+        .shr, .shl => result: {
+            if (lhs_ty.isVector(zcu) and !rhs_ty.isVector(zcu)) {
+                return cg.fail("TODO: implement vector '{s}' with scalar rhs", .{@tagName(op)});
+            }
+
+            const lhs_wasm_bits = toWasmBits(@intCast(lhs_ty.bitSize(zcu))) orelse {
+                return cg.fail("TODO: implement '{s}' for types larger than 128 bits", .{@tagName(op)});
+            };
+            const rhs_wasm_bits = toWasmBits(@intCast(rhs_ty.bitSize(zcu))).?;
+            const new_rhs = if (lhs_wasm_bits != rhs_wasm_bits and lhs_wasm_bits != 128)
+                try (try cg.intcast(rhs, rhs_ty, lhs_ty)).toLocal(cg, lhs_ty)
+            else
+                rhs;
+            break :result try cg.binOp(lhs, new_rhs, lhs_ty, op);
+        },
+        else => try cg.binOp(lhs, rhs, lhs_ty, op),
+    };
+
+    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+/// Performs a binary operation on the given `WValue`'s
+/// NOTE: THis leaves the value on top of the stack.
+fn binOp(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    assert(!(lhs != .stack and rhs == .stack));
+
+    if (ty.isAnyFloat()) {
+        const float_op = FloatOp.fromOp(op);
+        return cg.floatOp(float_op, ty, &.{ lhs, rhs });
+    }
+
+    if (isByRef(ty, zcu, cg.target)) {
+        if (ty.zigTypeTag(zcu) == .int) {
+            return cg.binOpBigInt(lhs, rhs, ty, op);
+        } else {
+            return cg.fail("TODO: Implement binary operation for type: {f}", .{ty.fmt(pt)});
+        }
+    }
+
+    const opcode: std.wasm.Opcode = buildOpcode(.{
+        .op = op,
+        .valtype1 = typeToValtype(ty, zcu, cg.target),
+        .signedness = if (ty.isSignedInt(zcu)) .signed else .unsigned,
+    });
+    try cg.emitWValue(lhs);
+    try cg.emitWValue(rhs);
+
+    try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
+
+    return .stack;
+}
+
+fn binOpBigInt(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    const int_info = ty.intInfo(zcu);
+    if (int_info.bits > 128) {
+        return cg.fail("TODO: Implement binary operation for big integers larger than 128 bits", .{});
+    }
+
+    switch (op) {
+        .mul => return cg.callIntrinsic(.__multi3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }),
+        .div => switch (int_info.signedness) {
+            .signed => return cg.callIntrinsic(.__divti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }),
+            .unsigned => return cg.callIntrinsic(.__udivti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }),
+        },
+        .rem => switch (int_info.signedness) {
+            .signed => return cg.callIntrinsic(.__modti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }),
+            .unsigned => return cg.callIntrinsic(.__umodti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }),
+        },
+        .shr => switch (int_info.signedness) {
+            .signed => return cg.callIntrinsic(.__ashrti3, &.{ ty.toIntern(), .i32_type }, ty, &.{ lhs, rhs }),
+            .unsigned => return cg.callIntrinsic(.__lshrti3, &.{ ty.toIntern(), .i32_type }, ty, &.{ lhs, rhs }),
+        },
+        .shl => return cg.callIntrinsic(.__ashlti3, &.{ ty.toIntern(), .i32_type }, ty, &.{ lhs, rhs }),
+        .@"and", .@"or", .xor => {
+            const result = try cg.allocStack(ty);
+            try cg.emitWValue(result);
+            const lhs_lsb = try cg.load(lhs, Type.u64, 0);
+            const rhs_lsb = try cg.load(rhs, Type.u64, 0);
+            const op_lsb = try cg.binOp(lhs_lsb, rhs_lsb, Type.u64, op);
+            try cg.store(.stack, op_lsb, Type.u64, result.offset());
+
+            try cg.emitWValue(result);
+            const lhs_msb = try cg.load(lhs, Type.u64, 8);
+            const rhs_msb = try cg.load(rhs, Type.u64, 8);
+            const op_msb = try cg.binOp(lhs_msb, rhs_msb, Type.u64, op);
+            try cg.store(.stack, op_msb, Type.u64, result.offset() + 8);
+            return result;
+        },
+        .add, .sub => {
+            const result = try cg.allocStack(ty);
+            var lhs_lsb = try (try cg.load(lhs, Type.u64, 0)).toLocal(cg, Type.u64);
+            defer lhs_lsb.free(cg);
+            var rhs_lsb = try (try cg.load(rhs, Type.u64, 0)).toLocal(cg, Type.u64);
+            defer rhs_lsb.free(cg);
+            var op_lsb = try (try cg.binOp(lhs_lsb, rhs_lsb, Type.u64, op)).toLocal(cg, Type.u64);
+            defer op_lsb.free(cg);
+
+            const lhs_msb = try cg.load(lhs, Type.u64, 8);
+            const rhs_msb = try cg.load(rhs, Type.u64, 8);
+            const op_msb = try cg.binOp(lhs_msb, rhs_msb, Type.u64, op);
+
+            const lt = if (op == .add) blk: {
+                break :blk try cg.cmp(op_lsb, rhs_lsb, Type.u64, .lt);
+            } else if (op == .sub) blk: {
+                break :blk try cg.cmp(lhs_lsb, rhs_lsb, Type.u64, .lt);
+            } else unreachable;
+            const tmp = try cg.intcast(lt, Type.u32, Type.u64);
+            var tmp_op = try (try cg.binOp(op_msb, tmp, Type.u64, op)).toLocal(cg, Type.u64);
+            defer tmp_op.free(cg);
+
+            try cg.store(result, op_lsb, Type.u64, 0);
+            try cg.store(result, tmp_op, Type.u64, 8);
+            return result;
+        },
+        else => return cg.fail("TODO: Implement binary operation for big integers: '{s}'", .{@tagName(op)}),
+    }
+}
+
+const FloatOp = enum {
+    add,
+    ceil,
+    cos,
+    div,
+    exp,
+    exp2,
+    fabs,
+    floor,
+    fma,
+    fmax,
+    fmin,
+    fmod,
+    log,
+    log10,
+    log2,
+    mul,
+    neg,
+    round,
+    sin,
+    sqrt,
+    sub,
+    tan,
+    trunc,
+
+    pub fn fromOp(op: Op) FloatOp {
+        return switch (op) {
+            .add => .add,
+            .ceil => .ceil,
+            .div => .div,
+            .abs => .fabs,
+            .floor => .floor,
+            .max => .fmax,
+            .min => .fmin,
+            .mul => .mul,
+            .neg => .neg,
+            .nearest => .round,
+            .sqrt => .sqrt,
+            .sub => .sub,
+            .trunc => .trunc,
+            .rem => .fmod,
+            else => unreachable,
+        };
+    }
+
+    pub fn toOp(float_op: FloatOp) ?Op {
+        return switch (float_op) {
+            .add => .add,
+            .ceil => .ceil,
+            .div => .div,
+            .fabs => .abs,
+            .floor => .floor,
+            .fmax => .max,
+            .fmin => .min,
+            .mul => .mul,
+            .neg => .neg,
+            .round => .nearest,
+            .sqrt => .sqrt,
+            .sub => .sub,
+            .trunc => .trunc,
+
+            .cos,
+            .exp,
+            .exp2,
+            .fma,
+            .fmod,
+            .log,
+            .log10,
+            .log2,
+            .sin,
+            .tan,
+            => null,
+        };
+    }
+
+    fn intrinsic(op: FloatOp, bits: u16) Mir.Intrinsic {
+        return switch (op) {
+            inline .add, .sub, .div, .mul => |ct_op| switch (bits) {
+                inline 16, 80, 128 => |ct_bits| @field(
+                    Mir.Intrinsic,
+                    "__" ++ @tagName(ct_op) ++ compilerRtFloatAbbrev(ct_bits) ++ "f3",
+                ),
+                else => unreachable,
+            },
+
+            inline .ceil,
+            .fabs,
+            .floor,
+            .fmax,
+            .fmin,
+            .round,
+            .sqrt,
+            .trunc,
+            => |ct_op| switch (bits) {
+                inline 16, 80, 128 => |ct_bits| @field(
+                    Mir.Intrinsic,
+                    libcFloatPrefix(ct_bits) ++ @tagName(ct_op) ++ libcFloatSuffix(ct_bits),
+                ),
+                else => unreachable,
+            },
+
+            inline .cos,
+            .exp,
+            .exp2,
+            .fma,
+            .fmod,
+            .log,
+            .log10,
+            .log2,
+            .sin,
+            .tan,
+            => |ct_op| switch (bits) {
+                inline 16, 32, 64, 80, 128 => |ct_bits| @field(
+                    Mir.Intrinsic,
+                    libcFloatPrefix(ct_bits) ++ @tagName(ct_op) ++ libcFloatSuffix(ct_bits),
+                ),
+                else => unreachable,
+            },
+
+            .neg => unreachable,
+        };
+    }
+};
+
+fn airAbs(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+    const operand = try cg.resolveInst(ty_op.operand);
+    const ty = cg.typeOf(ty_op.operand);
+    const scalar_ty = ty.scalarType(zcu);
+
+    switch (scalar_ty.zigTypeTag(zcu)) {
+        .int => if (ty.zigTypeTag(zcu) == .vector) {
+            return cg.fail("TODO implement airAbs for {f}", .{ty.fmt(pt)});
+        } else {
+            const int_bits = ty.intInfo(zcu).bits;
+            const wasm_bits = toWasmBits(int_bits) orelse {
+                return cg.fail("TODO: airAbs for signed integers larger than '{d}' bits", .{int_bits});
+            };
+
+            switch (wasm_bits) {
+                32 => {
+                    try cg.emitWValue(operand);
+
+                    try cg.addImm32(31);
+                    try cg.addTag(.i32_shr_s);
+
+                    var tmp = try cg.allocLocal(ty);
+                    defer tmp.free(cg);
+                    try cg.addLocal(.local_tee, tmp.local.value);
+
+                    try cg.emitWValue(operand);
+                    try cg.addTag(.i32_xor);
+                    try cg.emitWValue(tmp);
+                    try cg.addTag(.i32_sub);
+                    return cg.finishAir(inst, .stack, &.{ty_op.operand});
+                },
+                64 => {
+                    try cg.emitWValue(operand);
+
+                    try cg.addImm64(63);
+                    try cg.addTag(.i64_shr_s);
+
+                    var tmp = try cg.allocLocal(ty);
+                    defer tmp.free(cg);
+                    try cg.addLocal(.local_tee, tmp.local.value);
+
+                    try cg.emitWValue(operand);
+                    try cg.addTag(.i64_xor);
+                    try cg.emitWValue(tmp);
+                    try cg.addTag(.i64_sub);
+                    return cg.finishAir(inst, .stack, &.{ty_op.operand});
+                },
+                128 => {
+                    const mask = try cg.allocStack(Type.u128);
+                    try cg.emitWValue(mask);
+                    try cg.emitWValue(mask);
+
+                    _ = try cg.load(operand, Type.u64, 8);
+                    try cg.addImm64(63);
+                    try cg.addTag(.i64_shr_s);
+
+                    var tmp = try cg.allocLocal(Type.u64);
+                    defer tmp.free(cg);
+                    try cg.addLocal(.local_tee, tmp.local.value);
+                    try cg.store(.stack, .stack, Type.u64, mask.offset() + 0);
+                    try cg.emitWValue(tmp);
+                    try cg.store(.stack, .stack, Type.u64, mask.offset() + 8);
+
+                    const a = try cg.binOpBigInt(operand, mask, Type.u128, .xor);
+                    const b = try cg.binOpBigInt(a, mask, Type.u128, .sub);
+
+                    return cg.finishAir(inst, b, &.{ty_op.operand});
+                },
+                else => unreachable,
+            }
+        },
+        .float => {
+            const result = try cg.floatOp(.fabs, ty, &.{operand});
+            return cg.finishAir(inst, result, &.{ty_op.operand});
+        },
+        else => unreachable,
+    }
+}
+
+fn airUnaryFloatOp(cg: *CodeGen, inst: Air.Inst.Index, op: FloatOp) InnerError!void {
+    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
+    const operand = try cg.resolveInst(un_op);
+    const ty = cg.typeOf(un_op);
+
+    const result = try cg.floatOp(op, ty, &.{operand});
+    return cg.finishAir(inst, result, &.{un_op});
+}
+
+fn floatOp(cg: *CodeGen, float_op: FloatOp, ty: Type, args: []const WValue) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    if (ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("TODO: Implement floatOps for vectors", .{});
+    }
+
+    const float_bits = ty.floatBits(cg.target);
+
+    if (float_op == .neg) {
+        return cg.floatNeg(ty, args[0]);
+    }
+
+    if (float_bits == 32 or float_bits == 64) {
+        if (float_op.toOp()) |op| {
+            for (args) |operand| {
+                try cg.emitWValue(operand);
+            }
+            const opcode = buildOpcode(.{ .op = op, .valtype1 = typeToValtype(ty, zcu, cg.target) });
+            try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
+            return .stack;
+        }
+    }
+
+    const intrinsic = float_op.intrinsic(float_bits);
+
+    // fma requires three operands
+    var param_types_buffer: [3]InternPool.Index = .{ ty.ip_index, ty.ip_index, ty.ip_index };
+    const param_types = param_types_buffer[0..args.len];
+    return cg.callIntrinsic(intrinsic, param_types, ty, args);
+}
+
+/// NOTE: The result value remains on top of the stack.
+fn floatNeg(cg: *CodeGen, ty: Type, arg: WValue) InnerError!WValue {
+    const float_bits = ty.floatBits(cg.target);
+    switch (float_bits) {
+        16 => {
+            try cg.emitWValue(arg);
+            try cg.addImm32(0x8000);
+            try cg.addTag(.i32_xor);
+            return .stack;
+        },
+        32, 64 => {
+            try cg.emitWValue(arg);
+            const val_type: std.wasm.Valtype = if (float_bits == 32) .f32 else .f64;
+            const opcode = buildOpcode(.{ .op = .neg, .valtype1 = val_type });
+            try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
+            return .stack;
+        },
+        80, 128 => {
+            const result = try cg.allocStack(ty);
+            try cg.emitWValue(result);
+            try cg.emitWValue(arg);
+            try cg.addMemArg(.i64_load, .{ .offset = 0 + arg.offset(), .alignment = 2 });
+            try cg.addMemArg(.i64_store, .{ .offset = 0 + result.offset(), .alignment = 2 });
+
+            try cg.emitWValue(result);
+            try cg.emitWValue(arg);
+            try cg.addMemArg(.i64_load, .{ .offset = 8 + arg.offset(), .alignment = 2 });
+
+            if (float_bits == 80) {
+                try cg.addImm64(0x8000);
+                try cg.addTag(.i64_xor);
+                try cg.addMemArg(.i64_store16, .{ .offset = 8 + result.offset(), .alignment = 2 });
+            } else {
+                try cg.addImm64(0x8000000000000000);
+                try cg.addTag(.i64_xor);
+                try cg.addMemArg(.i64_store, .{ .offset = 8 + result.offset(), .alignment = 2 });
+            }
+            return result;
+        },
+        else => unreachable,
+    }
+}
+
+fn airWrapBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+    const lhs_ty = cg.typeOf(bin_op.lhs);
+    const rhs_ty = cg.typeOf(bin_op.rhs);
+
+    if (lhs_ty.isVector(zcu)) {
+        if ((op == .shr or op == .shl) and !rhs_ty.isVector(zcu)) {
+            return cg.fail("TODO: implement wrapping vector '{s}' with scalar rhs", .{@tagName(op)});
+        } else {
+            return cg.fail("TODO: implement wrapping '{s}' for vectors", .{@tagName(op)});
+        }
+    }
+
+    // For certain operations, such as shifting, the types are different.
+    // When converting this to a WebAssembly type, they *must* match to perform
+    // an operation. For this reason we verify if the WebAssembly type is different, in which
+    // case we first coerce the operands to the same type before performing the operation.
+    // For big integers we can ignore this as we will call into compiler-rt which handles this.
+    const result = switch (op) {
+        .shr, .shl => result: {
+            const lhs_wasm_bits = toWasmBits(@intCast(lhs_ty.bitSize(zcu))) orelse {
+                return cg.fail("TODO: implement '{s}' for types larger than 128 bits", .{@tagName(op)});
+            };
+            const rhs_wasm_bits = toWasmBits(@intCast(rhs_ty.bitSize(zcu))).?;
+            const new_rhs = if (lhs_wasm_bits != rhs_wasm_bits and lhs_wasm_bits != 128)
+                try (try cg.intcast(rhs, rhs_ty, lhs_ty)).toLocal(cg, lhs_ty)
+            else
+                rhs;
+            break :result try cg.wrapBinOp(lhs, new_rhs, lhs_ty, op);
+        },
+        else => try cg.wrapBinOp(lhs, rhs, lhs_ty, op),
+    };
+
+    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+/// Performs a wrapping binary operation.
+/// Asserts rhs is not a stack value when lhs also isn't.
+/// NOTE: Leaves the result on the stack when its Type is <= 64 bits
+fn wrapBinOp(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue {
+    const bin_local = try cg.binOp(lhs, rhs, ty, op);
+    return cg.wrapOperand(bin_local, ty);
+}
+
+/// Wraps an operand based on a given type's bitsize.
+/// Asserts `Type` is <= 128 bits.
+/// NOTE: When the Type is <= 64 bits, leaves the value on top of the stack, if wrapping was needed.
+fn wrapOperand(cg: *CodeGen, operand: WValue, ty: Type) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    assert(ty.abiSize(zcu) <= 16);
+    const int_bits: u16 = @intCast(ty.bitSize(zcu)); // TODO use ty.intInfo(zcu).bits
+    const wasm_bits = toWasmBits(int_bits) orelse {
+        return cg.fail("TODO: Implement wrapOperand for bitsize '{d}'", .{int_bits});
+    };
+
+    if (wasm_bits == int_bits) return operand;
+
+    switch (wasm_bits) {
+        32 => {
+            try cg.emitWValue(operand);
+            if (ty.isSignedInt(zcu)) {
+                try cg.addImm32(32 - int_bits);
+                try cg.addTag(.i32_shl);
+                try cg.addImm32(32 - int_bits);
+                try cg.addTag(.i32_shr_s);
+            } else {
+                try cg.addImm32(~@as(u32, 0) >> @intCast(32 - int_bits));
+                try cg.addTag(.i32_and);
+            }
+            return .stack;
+        },
+        64 => {
+            try cg.emitWValue(operand);
+            if (ty.isSignedInt(zcu)) {
+                try cg.addImm64(64 - int_bits);
+                try cg.addTag(.i64_shl);
+                try cg.addImm64(64 - int_bits);
+                try cg.addTag(.i64_shr_s);
+            } else {
+                try cg.addImm64(~@as(u64, 0) >> @intCast(64 - int_bits));
+                try cg.addTag(.i64_and);
+            }
+            return .stack;
+        },
+        128 => {
+            assert(operand != .stack);
+            const result = try cg.allocStack(ty);
+
+            try cg.emitWValue(result);
+            _ = try cg.load(operand, Type.u64, 0);
+            try cg.store(.stack, .stack, Type.u64, result.offset());
+
+            try cg.emitWValue(result);
+            _ = try cg.load(operand, Type.u64, 8);
+            if (ty.isSignedInt(zcu)) {
+                try cg.addImm64(128 - int_bits);
+                try cg.addTag(.i64_shl);
+                try cg.addImm64(128 - int_bits);
+                try cg.addTag(.i64_shr_s);
+            } else {
+                try cg.addImm64(~@as(u64, 0) >> @intCast(128 - int_bits));
+                try cg.addTag(.i64_and);
+            }
+            try cg.store(.stack, .stack, Type.u64, result.offset() + 8);
+
+            return result;
+        },
+        else => unreachable,
+    }
+}
+
+fn lowerPtr(cg: *CodeGen, ptr_val: InternPool.Index, prev_offset: u64) InnerError!WValue {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ptr = zcu.intern_pool.indexToKey(ptr_val).ptr;
+    const offset: u64 = prev_offset + ptr.byte_offset;
+    return switch (ptr.base_addr) {
+        .nav => |nav| return .{ .nav_ref = .{ .nav_index = nav, .offset = @intCast(offset) } },
+        .uav => |uav| return .{ .uav_ref = .{ .ip_index = uav.val, .offset = @intCast(offset), .orig_ptr_ty = uav.orig_ty } },
+        .int => return cg.lowerConstant(try pt.intValue(Type.usize, offset), Type.usize),
+        .eu_payload => |eu_ptr| try cg.lowerPtr(
+            eu_ptr,
+            offset + codegen.errUnionPayloadOffset(
+                Value.fromInterned(eu_ptr).typeOf(zcu).childType(zcu),
+                zcu,
+            ),
+        ),
+        .opt_payload => |opt_ptr| return cg.lowerPtr(opt_ptr, offset),
+        .field => |field| {
+            const base_ptr = Value.fromInterned(field.base);
+            const base_ty = base_ptr.typeOf(zcu).childType(zcu);
+            const field_off: u64 = switch (base_ty.zigTypeTag(zcu)) {
+                .pointer => off: {
+                    assert(base_ty.isSlice(zcu));
+                    break :off switch (field.index) {
+                        Value.slice_ptr_index => 0,
+                        Value.slice_len_index => @divExact(cg.target.ptrBitWidth(), 8),
+                        else => unreachable,
+                    };
+                },
+                .@"struct" => switch (base_ty.containerLayout(zcu)) {
+                    .auto => base_ty.structFieldOffset(@intCast(field.index), zcu),
+                    .@"extern", .@"packed" => unreachable,
+                },
+                .@"union" => switch (base_ty.containerLayout(zcu)) {
+                    .auto => base_ty.structFieldOffset(@intCast(field.index), zcu),
+                    .@"extern", .@"packed" => unreachable,
+                },
+                else => unreachable,
+            };
+            return cg.lowerPtr(field.base, offset + field_off);
+        },
+        .arr_elem, .comptime_field, .comptime_alloc => unreachable,
+    };
+}
+
+/// Asserts that `isByRef` returns `false` for `ty`.
+fn lowerConstant(cg: *CodeGen, val: Value, ty: Type) InnerError!WValue {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    assert(!isByRef(ty, zcu, cg.target));
+    const ip = &zcu.intern_pool;
+    if (val.isUndef(zcu)) return cg.emitUndefined(ty);
+
+    switch (ip.indexToKey(val.ip_index)) {
+        .int_type,
+        .ptr_type,
+        .array_type,
+        .vector_type,
+        .opt_type,
+        .anyframe_type,
+        .error_union_type,
+        .simple_type,
+        .struct_type,
+        .tuple_type,
+        .union_type,
+        .opaque_type,
+        .enum_type,
+        .func_type,
+        .error_set_type,
+        .inferred_error_set_type,
+        => unreachable, // types, not values
+
+        .undef => unreachable, // handled above
+        .simple_value => |simple_value| switch (simple_value) {
+            .undefined,
+            .void,
+            .null,
+            .empty_tuple,
+            .@"unreachable",
+            => unreachable, // non-runtime values
+            .false, .true => return .{ .imm32 = switch (simple_value) {
+                .false => 0,
+                .true => 1,
+                else => unreachable,
+            } },
+        },
+        .variable,
+        .@"extern",
+        .func,
+        .enum_literal,
+        .empty_enum_value,
+        => unreachable, // non-runtime values
+        .int => {
+            const int_info = ty.intInfo(zcu);
+            switch (int_info.signedness) {
+                .signed => switch (int_info.bits) {
+                    0...32 => return .{ .imm32 = @bitCast(@as(i32, @intCast(val.toSignedInt(zcu)))) },
+                    33...64 => return .{ .imm64 = @bitCast(val.toSignedInt(zcu)) },
+                    else => unreachable,
+                },
+                .unsigned => switch (int_info.bits) {
+                    0...32 => return .{ .imm32 = @intCast(val.toUnsignedInt(zcu)) },
+                    33...64 => return .{ .imm64 = val.toUnsignedInt(zcu) },
+                    else => unreachable,
+                },
+            }
+        },
+        .err => |err| {
+            const int = try pt.getErrorValue(err.name);
+            return .{ .imm32 = int };
+        },
+        .error_union => |error_union| {
+            const err_int_ty = try pt.errorIntType();
+            const err_ty, const err_val = switch (error_union.val) {
+                .err_name => |err_name| .{
+                    ty.errorUnionSet(zcu),
+                    Value.fromInterned(try pt.intern(.{ .err = .{
+                        .ty = ty.errorUnionSet(zcu).toIntern(),
+                        .name = err_name,
+                    } })),
+                },
+                .payload => .{
+                    err_int_ty,
+                    try pt.intValue(err_int_ty, 0),
+                },
+            };
+            const payload_type = ty.errorUnionPayload(zcu);
+            if (!payload_type.hasRuntimeBitsIgnoreComptime(zcu)) {
+                // We use the error type directly as the type.
+                return cg.lowerConstant(err_val, err_ty);
+            }
+
+            return cg.fail("Wasm TODO: lowerConstant error union with non-zero-bit payload type", .{});
+        },
+        .enum_tag => |enum_tag| {
+            const int_tag_ty = ip.typeOf(enum_tag.int);
+            return cg.lowerConstant(Value.fromInterned(enum_tag.int), Type.fromInterned(int_tag_ty));
+        },
+        .float => |float| switch (float.storage) {
+            .f16 => |f16_val| return .{ .imm32 = @as(u16, @bitCast(f16_val)) },
+            .f32 => |f32_val| return .{ .float32 = f32_val },
+            .f64 => |f64_val| return .{ .float64 = f64_val },
+            else => unreachable,
+        },
+        .slice => unreachable, // isByRef == true
+        .ptr => return cg.lowerPtr(val.toIntern(), 0),
+        .opt => if (ty.optionalReprIsPayload(zcu)) {
+            const pl_ty = ty.optionalChild(zcu);
+            if (val.optionalValue(zcu)) |payload| {
+                return cg.lowerConstant(payload, pl_ty);
+            } else {
+                return .{ .imm32 = 0 };
+            }
+        } else {
+            return .{ .imm32 = @intFromBool(!val.isNull(zcu)) };
+        },
+        .aggregate => switch (ip.indexToKey(ty.ip_index)) {
+            .array_type => return cg.fail("Wasm TODO: LowerConstant for {f}", .{ty.fmt(pt)}),
+            .vector_type => {
+                assert(determineSimdStoreStrategy(ty, zcu, cg.target) == .direct);
+                var buf: [16]u8 = undefined;
+                val.writeToMemory(pt, &buf) catch unreachable;
+                return cg.storeSimdImmd(buf);
+            },
+            .struct_type => {
+                const struct_type = ip.loadStructType(ty.toIntern());
+                // non-packed structs are not handled in this function because they
+                // are by-ref types.
+                assert(struct_type.layout == .@"packed");
+                var buf: [8]u8 = .{0} ** 8; // zero the buffer so we do not read 0xaa as integer
+                val.writeToPackedMemory(ty, pt, &buf, 0) catch unreachable;
+                const backing_int_ty = Type.fromInterned(struct_type.backingIntTypeUnordered(ip));
+                const int_val = try pt.intValue(
+                    backing_int_ty,
+                    mem.readInt(u64, &buf, .little),
+                );
+                return cg.lowerConstant(int_val, backing_int_ty);
+            },
+            else => unreachable,
+        },
+        .un => {
+            const int_type = try pt.intType(.unsigned, @intCast(ty.bitSize(zcu)));
+
+            var buf: [8]u8 = .{0} ** 8; // zero the buffer so we do not read 0xaa as integer
+            val.writeToPackedMemory(ty, pt, &buf, 0) catch unreachable;
+            const int_val = try pt.intValue(
+                int_type,
+                mem.readInt(u64, &buf, .little),
+            );
+            return cg.lowerConstant(int_val, int_type);
+        },
+        .memoized_call => unreachable,
+    }
+}
+
+/// Stores the value as a 128bit-immediate value by storing it inside
+/// the list and returning the index into this list as `WValue`.
+fn storeSimdImmd(cg: *CodeGen, value: [16]u8) !WValue {
+    const index = @as(u32, @intCast(cg.simd_immediates.items.len));
+    try cg.simd_immediates.append(cg.gpa, value);
+    return .{ .imm128 = index };
+}
+
+fn emitUndefined(cg: *CodeGen, ty: Type) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    const ip = &zcu.intern_pool;
+    switch (ty.zigTypeTag(zcu)) {
+        .bool, .error_set => return .{ .imm32 = 0xaaaaaaaa },
+        .int, .@"enum" => switch (ty.intInfo(zcu).bits) {
+            0...32 => return .{ .imm32 = 0xaaaaaaaa },
+            33...64 => return .{ .imm64 = 0xaaaaaaaaaaaaaaaa },
+            else => unreachable,
+        },
+        .float => switch (ty.floatBits(cg.target)) {
+            16 => return .{ .imm32 = 0xaaaaaaaa },
+            32 => return .{ .float32 = @as(f32, @bitCast(@as(u32, 0xaaaaaaaa))) },
+            64 => return .{ .float64 = @as(f64, @bitCast(@as(u64, 0xaaaaaaaaaaaaaaaa))) },
+            else => unreachable,
+        },
+        .pointer => switch (cg.ptr_size) {
+            .wasm32 => return .{ .imm32 = 0xaaaaaaaa },
+            .wasm64 => return .{ .imm64 = 0xaaaaaaaaaaaaaaaa },
+        },
+        .optional => {
+            const pl_ty = ty.optionalChild(zcu);
+            if (ty.optionalReprIsPayload(zcu)) {
+                return cg.emitUndefined(pl_ty);
+            }
+            return .{ .imm32 = 0xaaaaaaaa };
+        },
+        .error_union => {
+            return .{ .imm32 = 0xaaaaaaaa };
+        },
+        .@"struct" => {
+            const packed_struct = zcu.typeToPackedStruct(ty).?;
+            return cg.emitUndefined(Type.fromInterned(packed_struct.backingIntTypeUnordered(ip)));
+        },
+        .@"union" => switch (ty.containerLayout(zcu)) {
+            .@"packed" => switch (ty.bitSize(zcu)) {
+                0...32 => return .{ .imm32 = 0xaaaaaaaa },
+                33...64 => return .{ .imm64 = 0xaaaaaaaaaaaaaaaa },
+                else => unreachable,
+            },
+            else => unreachable,
+        },
+        else => return cg.fail("Wasm TODO: emitUndefined for type: {t}\n", .{ty.zigTypeTag(zcu)}),
+    }
+}
+
+fn airBlock(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const extra = cg.air.extraData(Air.Block, ty_pl.payload);
+    try cg.lowerBlock(inst, ty_pl.ty.toType(), @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len]));
+}
+
+fn lowerBlock(cg: *CodeGen, inst: Air.Inst.Index, block_ty: Type, body: []const Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    // if wasm_block_ty is non-empty, we create a register to store the temporary value
+    const block_result: WValue = if (block_ty.hasRuntimeBitsIgnoreComptime(zcu))
+        try cg.allocLocal(block_ty)
+    else
+        .none;
+
+    try cg.startBlock(.block, .empty);
+    // Here we set the current block idx, so breaks know the depth to jump
+    // to when breaking out.
+    try cg.blocks.putNoClobber(cg.gpa, inst, .{
+        .label = cg.block_depth,
+        .value = block_result,
+    });
+
+    try cg.genBody(body);
+    try cg.endBlock();
+
+    const liveness = cg.liveness.getBlock(inst);
+    try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths.len);
+
+    return cg.finishAir(inst, block_result, &.{});
+}
+
+/// appends a new wasm block to the code section and increases the `block_depth` by 1
+fn startBlock(cg: *CodeGen, block_tag: std.wasm.Opcode, block_type: std.wasm.BlockType) !void {
+    cg.block_depth += 1;
+    try cg.addInst(.{
+        .tag = Mir.Inst.Tag.fromOpcode(block_tag),
+        .data = .{ .block_type = block_type },
+    });
+}
+
+/// Ends the current wasm block and decreases the `block_depth` by 1
+fn endBlock(cg: *CodeGen) !void {
+    try cg.addTag(.end);
+    cg.block_depth -= 1;
+}
+
+fn airLoop(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const loop = cg.air.extraData(Air.Block, ty_pl.payload);
+    const body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[loop.end..][0..loop.data.body_len]);
+
+    // result type of loop is always 'noreturn', meaning we can always
+    // emit the wasm type 'block_empty'.
+    try cg.startBlock(.loop, .empty);
+
+    try cg.loops.putNoClobber(cg.gpa, inst, cg.block_depth);
+    defer assert(cg.loops.remove(inst));
+
+    try cg.genBody(body);
+    try cg.endBlock();
+
+    return cg.finishAir(inst, .none, &.{});
+}
+
+fn airCondBr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
+    const condition = try cg.resolveInst(pl_op.operand);
+    const extra = cg.air.extraData(Air.CondBr, pl_op.payload);
+    const then_body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.then_body_len]);
+    const else_body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end + then_body.len ..][0..extra.data.else_body_len]);
+    const liveness_condbr = cg.liveness.getCondBr(inst);
+
+    // result type is always noreturn, so use `block_empty` as type.
+    try cg.startBlock(.block, .empty);
+    // emit the conditional value
+    try cg.emitWValue(condition);
+
+    // we inserted the block in front of the condition
+    // so now check if condition matches. If not, break outside this block
+    // and continue with the then codepath
+    try cg.addLabel(.br_if, 0);
+
+    try cg.branches.ensureUnusedCapacity(cg.gpa, 2);
+    {
+        cg.branches.appendAssumeCapacity(.{});
+        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, @as(u32, @intCast(liveness_condbr.else_deaths.len)));
+        defer {
+            var else_stack = cg.branches.pop().?;
+            else_stack.deinit(cg.gpa);
+        }
+        try cg.genBody(else_body);
+        try cg.endBlock();
+    }
+
+    // Outer block that matches the condition
+    {
+        cg.branches.appendAssumeCapacity(.{});
+        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, @as(u32, @intCast(liveness_condbr.then_deaths.len)));
+        defer {
+            var then_stack = cg.branches.pop().?;
+            then_stack.deinit(cg.gpa);
+        }
+        try cg.genBody(then_body);
+    }
+
+    return cg.finishAir(inst, .none, &.{});
+}
+
+fn airCmp(cg: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) InnerError!void {
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+    const operand_ty = cg.typeOf(bin_op.lhs);
+    const result = try cg.cmp(lhs, rhs, operand_ty, op);
+    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+/// Compares two operands.
+/// Asserts rhs is not a stack value when the lhs isn't a stack value either
+/// NOTE: This leaves the result on top of the stack, rather than a new local.
+fn cmp(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: std.math.CompareOperator) InnerError!WValue {
+    assert(!(lhs != .stack and rhs == .stack));
+    const zcu = cg.pt.zcu;
+    if (ty.zigTypeTag(zcu) == .optional and !ty.optionalReprIsPayload(zcu)) {
+        const payload_ty = ty.optionalChild(zcu);
+        if (payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+            // When we hit this case, we must check the value of optionals
+            // that are not pointers. This means first checking against non-null for
+            // both lhs and rhs, as well as checking the payload are matching of lhs and rhs
+            return cg.cmpOptionals(lhs, rhs, ty, op);
+        }
+    } else if (ty.isAnyFloat()) {
+        return cg.cmpFloat(ty, lhs, rhs, op);
+    } else if (isByRef(ty, zcu, cg.target)) {
+        return cg.cmpBigInt(lhs, rhs, ty, op);
+    }
+
+    const signedness: std.builtin.Signedness = blk: {
+        // by default we tell the operand type is unsigned (i.e. bools and enum values)
+        if (ty.zigTypeTag(zcu) != .int) break :blk .unsigned;
+
+        // incase of an actual integer, we emit the correct signedness
+        break :blk ty.intInfo(zcu).signedness;
+    };
+
+    // ensure that when we compare pointers, we emit
+    // the true pointer of a stack value, rather than the stack pointer.
+    try cg.lowerToStack(lhs);
+    try cg.lowerToStack(rhs);
+
+    const opcode: std.wasm.Opcode = buildOpcode(.{
+        .valtype1 = typeToValtype(ty, zcu, cg.target),
+        .op = switch (op) {
+            .lt => .lt,
+            .lte => .le,
+            .eq => .eq,
+            .neq => .ne,
+            .gte => .ge,
+            .gt => .gt,
+        },
+        .signedness = signedness,
+    });
+    try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
+
+    return .stack;
+}
+
+/// Compares two floats.
+/// NOTE: Leaves the result of the comparison on top of the stack.
+fn cmpFloat(cg: *CodeGen, ty: Type, lhs: WValue, rhs: WValue, cmp_op: std.math.CompareOperator) InnerError!WValue {
+    const float_bits = ty.floatBits(cg.target);
+
+    const op: Op = switch (cmp_op) {
+        .lt => .lt,
+        .lte => .le,
+        .eq => .eq,
+        .neq => .ne,
+        .gte => .ge,
+        .gt => .gt,
+    };
+
+    switch (float_bits) {
+        16 => {
+            _ = try cg.fpext(lhs, Type.f16, Type.f32);
+            _ = try cg.fpext(rhs, Type.f16, Type.f32);
+            const opcode = buildOpcode(.{ .op = op, .valtype1 = .f32 });
+            try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
+            return .stack;
+        },
+        32, 64 => {
+            try cg.emitWValue(lhs);
+            try cg.emitWValue(rhs);
+            const val_type: std.wasm.Valtype = if (float_bits == 32) .f32 else .f64;
+            const opcode = buildOpcode(.{ .op = op, .valtype1 = val_type });
+            try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
+            return .stack;
+        },
+        80, 128 => {
+            const intrinsic = floatCmpIntrinsic(cmp_op, float_bits);
+            const result = try cg.callIntrinsic(intrinsic, &.{ ty.ip_index, ty.ip_index }, Type.bool, &.{ lhs, rhs });
+            return cg.cmp(result, .{ .imm32 = 0 }, Type.i32, cmp_op);
+        },
+        else => unreachable,
+    }
+}
+
+fn airCmpVector(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    _ = inst;
+    return cg.fail("TODO implement airCmpVector for wasm", .{});
+}
+
+fn airCmpLtErrorsLen(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
+    const operand = try cg.resolveInst(un_op);
+
+    try cg.emitWValue(operand);
+    const pt = cg.pt;
+    const err_int_ty = try pt.errorIntType();
+    try cg.addTag(.errors_len);
+    const result = try cg.cmp(.stack, .stack, err_int_ty, .lt);
+
+    return cg.finishAir(inst, result, &.{un_op});
+}
+
+fn airBr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const br = cg.air.instructions.items(.data)[@intFromEnum(inst)].br;
+    const block = cg.blocks.get(br.block_inst).?;
+
+    // if operand has codegen bits we should break with a value
+    if (block.value != .none) {
+        const operand = try cg.resolveInst(br.operand);
+        try cg.lowerToStack(operand);
+        try cg.addLocal(.local_set, block.value.local.value);
+    }
+
+    // We map every block to its block index.
+    // We then determine how far we have to jump to it by subtracting it from current block depth
+    const idx: u32 = cg.block_depth - block.label;
+    try cg.addLabel(.br, idx);
+
+    return cg.finishAir(inst, .none, &.{br.operand});
+}
+
+fn airRepeat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const repeat = cg.air.instructions.items(.data)[@intFromEnum(inst)].repeat;
+    const loop_label = cg.loops.get(repeat.loop_inst).?;
+
+    const idx: u32 = cg.block_depth - loop_label;
+    try cg.addLabel(.br, idx);
+
+    return cg.finishAir(inst, .none, &.{});
+}
+
+fn airNot(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const operand_ty = cg.typeOf(ty_op.operand);
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+
+    const result = result: {
+        if (operand_ty.zigTypeTag(zcu) == .bool) {
+            try cg.emitWValue(operand);
+            try cg.addTag(.i32_eqz);
+            const not_tmp = try cg.allocLocal(operand_ty);
+            try cg.addLocal(.local_set, not_tmp.local.value);
+            break :result not_tmp;
+        } else {
+            const int_info = operand_ty.intInfo(zcu);
+            const wasm_bits = toWasmBits(int_info.bits) orelse {
+                return cg.fail("TODO: Implement binary NOT for {f}", .{operand_ty.fmt(pt)});
+            };
+
+            switch (wasm_bits) {
+                32 => {
+                    try cg.emitWValue(operand);
+                    try cg.addImm32(switch (int_info.signedness) {
+                        .unsigned => ~@as(u32, 0) >> @intCast(32 - int_info.bits),
+                        .signed => ~@as(u32, 0),
+                    });
+                    try cg.addTag(.i32_xor);
+                    break :result .stack;
+                },
+                64 => {
+                    try cg.emitWValue(operand);
+                    try cg.addImm64(switch (int_info.signedness) {
+                        .unsigned => ~@as(u64, 0) >> @intCast(64 - int_info.bits),
+                        .signed => ~@as(u64, 0),
+                    });
+                    try cg.addTag(.i64_xor);
+                    break :result .stack;
+                },
+                128 => {
+                    const ptr = try cg.allocStack(operand_ty);
+
+                    try cg.emitWValue(ptr);
+                    _ = try cg.load(operand, Type.u64, 0);
+                    try cg.addImm64(~@as(u64, 0));
+                    try cg.addTag(.i64_xor);
+                    try cg.store(.stack, .stack, Type.u64, ptr.offset());
+
+                    try cg.emitWValue(ptr);
+                    _ = try cg.load(operand, Type.u64, 8);
+                    try cg.addImm64(switch (int_info.signedness) {
+                        .unsigned => ~@as(u64, 0) >> @intCast(128 - int_info.bits),
+                        .signed => ~@as(u64, 0),
+                    });
+                    try cg.addTag(.i64_xor);
+                    try cg.store(.stack, .stack, Type.u64, ptr.offset() + 8);
+
+                    break :result ptr;
+                },
+                else => unreachable,
+            }
+        }
+    };
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airTrap(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    try cg.addTag(.@"unreachable");
+    return cg.finishAir(inst, .none, &.{});
+}
+
+fn airBreakpoint(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    // unsupported by wasm itfunc. Can be implemented once we support DWARF
+    // for wasm
+    try cg.addTag(.@"unreachable");
+    return cg.finishAir(inst, .none, &.{});
+}
+
+fn airUnreachable(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    try cg.addTag(.@"unreachable");
+    return cg.finishAir(inst, .none, &.{});
+}
+
+fn airBitcast(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+    const operand = try cg.resolveInst(ty_op.operand);
+    const wanted_ty = cg.typeOfIndex(inst);
+    const given_ty = cg.typeOf(ty_op.operand);
+
+    const bit_size = given_ty.bitSize(zcu);
+    const needs_wrapping = (given_ty.isSignedInt(zcu) != wanted_ty.isSignedInt(zcu)) and
+        bit_size != 32 and bit_size != 64 and bit_size != 128;
+
+    const result = result: {
+        if (given_ty.isAnyFloat() or wanted_ty.isAnyFloat()) {
+            break :result try cg.bitcast(wanted_ty, given_ty, operand);
+        }
+
+        if (isByRef(given_ty, zcu, cg.target) and !isByRef(wanted_ty, zcu, cg.target)) {
+            const loaded_memory = try cg.load(operand, wanted_ty, 0);
+            if (needs_wrapping) {
+                break :result try cg.wrapOperand(loaded_memory, wanted_ty);
+            } else {
+                break :result loaded_memory;
+            }
+        }
+        if (!isByRef(given_ty, zcu, cg.target) and isByRef(wanted_ty, zcu, cg.target)) {
+            const stack_memory = try cg.allocStack(wanted_ty);
+            try cg.store(stack_memory, operand, given_ty, 0);
+            if (needs_wrapping) {
+                break :result try cg.wrapOperand(stack_memory, wanted_ty);
+            } else {
+                break :result stack_memory;
+            }
+        }
+
+        if (needs_wrapping) {
+            break :result try cg.wrapOperand(operand, wanted_ty);
+        }
+
+        break :result switch (operand) {
+            // for stack offset, return a pointer to this offset.
+            .stack_offset => try cg.buildPointerOffset(operand, 0, .new),
+            else => cg.reuseOperand(ty_op.operand, operand),
+        };
+    };
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn bitcast(cg: *CodeGen, wanted_ty: Type, given_ty: Type, operand: WValue) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    // if we bitcast a float to or from an integer we must use the 'reinterpret' instruction
+    if (!(wanted_ty.isAnyFloat() or given_ty.isAnyFloat())) return operand;
+    if (wanted_ty.ip_index == .f16_type or given_ty.ip_index == .f16_type) return operand;
+    if (wanted_ty.bitSize(zcu) > 64) return operand;
+    assert((wanted_ty.isInt(zcu) and given_ty.isAnyFloat()) or (wanted_ty.isAnyFloat() and given_ty.isInt(zcu)));
+
+    const opcode = buildOpcode(.{
+        .op = .reinterpret,
+        .valtype1 = typeToValtype(wanted_ty, zcu, cg.target),
+        .valtype2 = typeToValtype(given_ty, zcu, cg.target),
+    });
+    try cg.emitWValue(operand);
+    try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
+    return .stack;
+}
+
+fn airStructFieldPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const extra = cg.air.extraData(Air.StructField, ty_pl.payload);
+
+    const struct_ptr = try cg.resolveInst(extra.data.struct_operand);
+    const struct_ptr_ty = cg.typeOf(extra.data.struct_operand);
+    const struct_ty = struct_ptr_ty.childType(zcu);
+    const result = try cg.structFieldPtr(inst, extra.data.struct_operand, struct_ptr, struct_ptr_ty, struct_ty, extra.data.field_index);
+    return cg.finishAir(inst, result, &.{extra.data.struct_operand});
+}
+
+fn airStructFieldPtrIndex(cg: *CodeGen, inst: Air.Inst.Index, index: u32) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+    const struct_ptr = try cg.resolveInst(ty_op.operand);
+    const struct_ptr_ty = cg.typeOf(ty_op.operand);
+    const struct_ty = struct_ptr_ty.childType(zcu);
+
+    const result = try cg.structFieldPtr(inst, ty_op.operand, struct_ptr, struct_ptr_ty, struct_ty, index);
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn structFieldPtr(
+    cg: *CodeGen,
+    inst: Air.Inst.Index,
+    ref: Air.Inst.Ref,
+    struct_ptr: WValue,
+    struct_ptr_ty: Type,
+    struct_ty: Type,
+    index: u32,
+) InnerError!WValue {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const result_ty = cg.typeOfIndex(inst);
+    const struct_ptr_ty_info = struct_ptr_ty.ptrInfo(zcu);
+
+    const offset = switch (struct_ty.containerLayout(zcu)) {
+        .@"packed" => switch (struct_ty.zigTypeTag(zcu)) {
+            .@"struct" => offset: {
+                if (result_ty.ptrInfo(zcu).packed_offset.host_size != 0) {
+                    break :offset @as(u32, 0);
+                }
+                const struct_type = zcu.typeToStruct(struct_ty).?;
+                break :offset @divExact(zcu.structPackedFieldBitOffset(struct_type, index) + struct_ptr_ty_info.packed_offset.bit_offset, 8);
+            },
+            .@"union" => 0,
+            else => unreachable,
+        },
+        else => struct_ty.structFieldOffset(index, zcu),
+    };
+    // save a load and store when we can simply reuse the operand
+    if (offset == 0) {
+        return cg.reuseOperand(ref, struct_ptr);
+    }
+    switch (struct_ptr) {
+        .stack_offset => |stack_offset| {
+            return .{ .stack_offset = .{ .value = stack_offset.value + @as(u32, @intCast(offset)), .references = 1 } };
+        },
+        else => return cg.buildPointerOffset(struct_ptr, offset, .new),
+    }
+}
+
+fn airStructFieldVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ip = &zcu.intern_pool;
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const struct_field = cg.air.extraData(Air.StructField, ty_pl.payload).data;
+
+    const struct_ty = cg.typeOf(struct_field.struct_operand);
+    const operand = try cg.resolveInst(struct_field.struct_operand);
+    const field_index = struct_field.field_index;
+    const field_ty = struct_ty.fieldType(field_index, zcu);
+    if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) return cg.finishAir(inst, .none, &.{struct_field.struct_operand});
+
+    const result: WValue = switch (struct_ty.containerLayout(zcu)) {
+        .@"packed" => switch (struct_ty.zigTypeTag(zcu)) {
+            .@"struct" => result: {
+                const packed_struct = zcu.typeToPackedStruct(struct_ty).?;
+                const offset = zcu.structPackedFieldBitOffset(packed_struct, field_index);
+                const backing_ty = Type.fromInterned(packed_struct.backingIntTypeUnordered(ip));
+                const host_bits = backing_ty.intInfo(zcu).bits;
+
+                const const_wvalue: WValue = if (33 <= host_bits and host_bits <= 64)
+                    .{ .imm64 = offset }
+                else
+                    .{ .imm32 = offset };
+
+                // for first field we don't require any shifting
+                const shifted_value = if (offset == 0)
+                    operand
+                else
+                    try cg.binOp(operand, const_wvalue, backing_ty, .shr);
+
+                if (field_ty.zigTypeTag(zcu) == .float) {
+                    const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu))));
+                    const truncated = try cg.trunc(shifted_value, int_type, backing_ty);
+                    break :result try cg.bitcast(field_ty, int_type, truncated);
+                } else if (field_ty.isPtrAtRuntime(zcu) and packed_struct.field_types.len == 1) {
+                    // In this case we do not have to perform any transformations,
+                    // we can simply reuse the operand.
+                    break :result cg.reuseOperand(struct_field.struct_operand, operand);
+                } else if (field_ty.isPtrAtRuntime(zcu)) {
+                    const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu))));
+                    break :result try cg.trunc(shifted_value, int_type, backing_ty);
+                }
+                break :result try cg.trunc(shifted_value, field_ty, backing_ty);
+            },
+            .@"union" => result: {
+                if (isByRef(struct_ty, zcu, cg.target)) {
+                    if (!isByRef(field_ty, zcu, cg.target)) {
+                        break :result try cg.load(operand, field_ty, 0);
+                    } else {
+                        const new_stack_val = try cg.allocStack(field_ty);
+                        try cg.store(new_stack_val, operand, field_ty, 0);
+                        break :result new_stack_val;
+                    }
+                }
+
+                const union_int_type = try pt.intType(.unsigned, @as(u16, @intCast(struct_ty.bitSize(zcu))));
+                if (field_ty.zigTypeTag(zcu) == .float) {
+                    const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu))));
+                    const truncated = try cg.trunc(operand, int_type, union_int_type);
+                    break :result try cg.bitcast(field_ty, int_type, truncated);
+                } else if (field_ty.isPtrAtRuntime(zcu)) {
+                    const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu))));
+                    break :result try cg.trunc(operand, int_type, union_int_type);
+                }
+                break :result try cg.trunc(operand, field_ty, union_int_type);
+            },
+            else => unreachable,
+        },
+        else => result: {
+            const offset = std.math.cast(u32, struct_ty.structFieldOffset(field_index, zcu)) orelse {
+                return cg.fail("Field type '{f}' too big to fit into stack frame", .{field_ty.fmt(pt)});
+            };
+            if (isByRef(field_ty, zcu, cg.target)) {
+                switch (operand) {
+                    .stack_offset => |stack_offset| {
+                        break :result .{ .stack_offset = .{ .value = stack_offset.value + offset, .references = 1 } };
+                    },
+                    else => break :result try cg.buildPointerOffset(operand, offset, .new),
+                }
+            }
+            break :result try cg.load(operand, field_ty, offset);
+        },
+    };
+
+    return cg.finishAir(inst, result, &.{struct_field.struct_operand});
+}
+
+fn airSwitchBr(cg: *CodeGen, inst: Air.Inst.Index, is_dispatch_loop: bool) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+
+    const switch_br = cg.air.unwrapSwitch(inst);
+    const target_ty = cg.typeOf(switch_br.operand);
+
+    assert(target_ty.hasRuntimeBitsIgnoreComptime(zcu));
+
+    // swap target value with placeholder local, for dispatching
+    const target = if (is_dispatch_loop) target: {
+        const initial_target = try cg.resolveInst(switch_br.operand);
+        const target: WValue = try cg.allocLocal(target_ty);
+        try cg.lowerToStack(initial_target);
+        try cg.addLocal(.local_set, target.local.value);
+
+        try cg.startBlock(.loop, .empty); // dispatch loop start
+        try cg.blocks.putNoClobber(cg.gpa, inst, .{
+            .label = cg.block_depth,
+            .value = target,
+        });
+
+        break :target target;
+    } else try cg.resolveInst(switch_br.operand);
+
+    const liveness = try cg.liveness.getSwitchBr(cg.gpa, inst, switch_br.cases_len + 1);
+    defer cg.gpa.free(liveness.deaths);
+
+    const has_else_body = switch_br.else_body_len != 0;
+    const branch_count = switch_br.cases_len + 1; // if else branch is missing, we trap when failing all conditions
+    try cg.branches.ensureUnusedCapacity(cg.gpa, switch_br.cases_len + @intFromBool(has_else_body));
+
+    if (switch_br.cases_len == 0) {
+        assert(has_else_body);
+
+        var it = switch_br.iterateCases();
+        const else_body = it.elseBody();
+
+        cg.branches.appendAssumeCapacity(.{});
+        const else_deaths = liveness.deaths.len - 1;
+        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths[else_deaths].len);
+        defer {
+            var else_branch = cg.branches.pop().?;
+            else_branch.deinit(cg.gpa);
+        }
+        try cg.genBody(else_body);
+
+        if (is_dispatch_loop) {
+            try cg.endBlock(); // dispatch loop end
+        }
+        return cg.finishAir(inst, .none, &.{});
+    }
+
+    var min: ?Value = null;
+    var max: ?Value = null;
+    var branching_size: u32 = 0; // single item +1, range +2
+
+    {
+        var cases_it = switch_br.iterateCases();
+        while (cases_it.next()) |case| {
+            for (case.items) |item| {
+                const val = Value.fromInterned(item.toInterned().?);
+                if (min == null or val.compareHetero(.lt, min.?, zcu)) min = val;
+                if (max == null or val.compareHetero(.gt, max.?, zcu)) max = val;
+                branching_size += 1;
+            }
+            for (case.ranges) |range| {
+                const low = Value.fromInterned(range[0].toInterned().?);
+                if (min == null or low.compareHetero(.lt, min.?, zcu)) min = low;
+                const high = Value.fromInterned(range[1].toInterned().?);
+                if (max == null or high.compareHetero(.gt, max.?, zcu)) max = high;
+                branching_size += 2;
+            }
+        }
+    }
+
+    var min_space: Value.BigIntSpace = undefined;
+    const min_bigint = min.?.toBigInt(&min_space, zcu);
+    var max_space: Value.BigIntSpace = undefined;
+    const max_bigint = max.?.toBigInt(&max_space, zcu);
+    const limbs = try cg.gpa.alloc(
+        std.math.big.Limb,
+        @max(min_bigint.limbs.len, max_bigint.limbs.len) + 1,
+    );
+    defer cg.gpa.free(limbs);
+
+    const width_maybe: ?u32 = width: {
+        var width_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined };
+        width_bigint.sub(max_bigint, min_bigint);
+        width_bigint.addScalar(width_bigint.toConst(), 1);
+        break :width width_bigint.toConst().toInt(u32) catch null;
+    };
+
+    try cg.startBlock(.block, .empty); // whole switch block start
+
+    for (0..branch_count) |_| {
+        try cg.startBlock(.block, .empty);
+    }
+
+    // Heuristic on deciding when to use .br_table instead of .br_if jump table
+    // 1. Differences between lowest and highest values should fit into u32
+    // 2. .br_table should be applied for "dense" switch, we test it by checking .br_if jumps will need more instructions
+    // 3. Do not use .br_table for tiny switches
+    const use_br_table = cond: {
+        const width = width_maybe orelse break :cond false;
+        if (width > 2 * branching_size) break :cond false;
+        if (width < 2 or branch_count < 2) break :cond false;
+        break :cond true;
+    };
+
+    if (use_br_table) {
+        const width = width_maybe.?;
+
+        const br_value_original = try cg.binOp(target, try cg.resolveValue(min.?), target_ty, .sub);
+        _ = try cg.intcast(br_value_original, target_ty, Type.u32);
+
+        const jump_table: Mir.JumpTable = .{ .length = width + 1 };
+        const table_extra_index = try cg.addExtra(jump_table);
+        try cg.addInst(.{ .tag = .br_table, .data = .{ .payload = table_extra_index } });
+
+        const branch_list = try cg.mir_extra.addManyAsSlice(cg.gpa, width + 1);
+        @memset(branch_list, branch_count - 1);
+
+        var cases_it = switch_br.iterateCases();
+        while (cases_it.next()) |case| {
+            for (case.items) |item| {
+                const val = Value.fromInterned(item.toInterned().?);
+                var val_space: Value.BigIntSpace = undefined;
+                const val_bigint = val.toBigInt(&val_space, zcu);
+                var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined };
+                index_bigint.sub(val_bigint, min_bigint);
+                branch_list[index_bigint.toConst().toInt(u32) catch unreachable] = case.idx;
+            }
+            for (case.ranges) |range| {
+                var low_space: Value.BigIntSpace = undefined;
+                const low_bigint = Value.fromInterned(range[0].toInterned().?).toBigInt(&low_space, zcu);
+                var high_space: Value.BigIntSpace = undefined;
+                const high_bigint = Value.fromInterned(range[1].toInterned().?).toBigInt(&high_space, zcu);
+                var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined };
+                index_bigint.sub(low_bigint, min_bigint);
+                const start = index_bigint.toConst().toInt(u32) catch unreachable;
+                index_bigint.sub(high_bigint, min_bigint);
+                const end = (index_bigint.toConst().toInt(u32) catch unreachable) + 1;
+                @memset(branch_list[start..end], case.idx);
+            }
+        }
+    } else {
+        var cases_it = switch_br.iterateCases();
+        while (cases_it.next()) |case| {
+            for (case.items) |ref| {
+                const val = try cg.resolveInst(ref);
+                _ = try cg.cmp(target, val, target_ty, .eq);
+                try cg.addLabel(.br_if, case.idx); // item match found
+            }
+            for (case.ranges) |range| {
+                const low = try cg.resolveInst(range[0]);
+                const high = try cg.resolveInst(range[1]);
+
+                const gte = try cg.cmp(target, low, target_ty, .gte);
+                const lte = try cg.cmp(target, high, target_ty, .lte);
+                _ = try cg.binOp(gte, lte, Type.bool, .@"and");
+                try cg.addLabel(.br_if, case.idx); // range match found
+            }
+        }
+        try cg.addLabel(.br, branch_count - 1);
+    }
+
+    var cases_it = switch_br.iterateCases();
+    while (cases_it.next()) |case| {
+        try cg.endBlock();
+
+        cg.branches.appendAssumeCapacity(.{});
+        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths[case.idx].len);
+        defer {
+            var case_branch = cg.branches.pop().?;
+            case_branch.deinit(cg.gpa);
+        }
+        try cg.genBody(case.body);
+
+        try cg.addLabel(.br, branch_count - case.idx - 1); // matching case found and executed => exit switch
+    }
+
+    try cg.endBlock();
+    if (has_else_body) {
+        const else_body = cases_it.elseBody();
+
+        cg.branches.appendAssumeCapacity(.{});
+        const else_deaths = liveness.deaths.len - 1;
+        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths[else_deaths].len);
+        defer {
+            var else_branch = cg.branches.pop().?;
+            else_branch.deinit(cg.gpa);
+        }
+        try cg.genBody(else_body);
+    } else {
+        try cg.addTag(.@"unreachable");
+    }
+
+    try cg.endBlock(); // whole switch block end
+
+    if (is_dispatch_loop) {
+        try cg.endBlock(); // dispatch loop end
+    }
+
+    return cg.finishAir(inst, .none, &.{});
+}
+
+fn airSwitchDispatch(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const br = cg.air.instructions.items(.data)[@intFromEnum(inst)].br;
+    const switch_loop = cg.blocks.get(br.block_inst).?;
+
+    const operand = try cg.resolveInst(br.operand);
+    try cg.lowerToStack(operand);
+    try cg.addLocal(.local_set, switch_loop.value.local.value);
+
+    const idx: u32 = cg.block_depth - switch_loop.label;
+    try cg.addLabel(.br, idx);
+
+    return cg.finishAir(inst, .none, &.{br.operand});
+}
+
+fn airIsErr(cg: *CodeGen, inst: Air.Inst.Index, opcode: std.wasm.Opcode, op_kind: enum { value, ptr }) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
+    const operand = try cg.resolveInst(un_op);
+    const err_union_ty = cg.typeOf(un_op);
+    const pl_ty = err_union_ty.errorUnionPayload(zcu);
+
+    const result: WValue = result: {
+        if (err_union_ty.errorUnionSet(zcu).errorSetIsEmpty(zcu)) {
+            switch (opcode) {
+                .i32_ne => break :result .{ .imm32 = 0 },
+                .i32_eq => break :result .{ .imm32 = 1 },
+                else => unreachable,
+            }
+        }
+
+        try cg.emitWValue(operand);
+        if (op_kind == .ptr or pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+            try cg.addMemArg(.i32_load16_u, .{
+                .offset = operand.offset() + @as(u32, @intCast(errUnionErrorOffset(pl_ty, zcu))),
+                .alignment = @intCast(Type.anyerror.abiAlignment(zcu).toByteUnits().?),
+            });
+        }
+
+        // Compare the error value with '0'
+        try cg.addImm32(0);
+        try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
+        break :result .stack;
+    };
+    return cg.finishAir(inst, result, &.{un_op});
+}
+
+/// E!T -> T op_is_ptr == false
+/// *(E!T) -> *T op_is_prt == true
+fn airUnwrapErrUnionPayload(cg: *CodeGen, inst: Air.Inst.Index, op_is_ptr: bool) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const op_ty = cg.typeOf(ty_op.operand);
+    const eu_ty = if (op_is_ptr) op_ty.childType(zcu) else op_ty;
+    const payload_ty = eu_ty.errorUnionPayload(zcu);
+
+    const result: WValue = result: {
+        if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+            if (op_is_ptr) {
+                break :result cg.reuseOperand(ty_op.operand, operand);
+            } else {
+                break :result .none;
+            }
+        }
+
+        const pl_offset: u32 = @intCast(errUnionPayloadOffset(payload_ty, zcu));
+        if (op_is_ptr or isByRef(payload_ty, zcu, cg.target)) {
+            break :result try cg.buildPointerOffset(operand, pl_offset, .new);
+        } else {
+            assert(isByRef(eu_ty, zcu, cg.target));
+            break :result try cg.load(operand, payload_ty, pl_offset);
+        }
+    };
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+/// E!T -> E op_is_ptr == false
+/// *(E!T) -> E op_is_prt == true
+/// NOTE: op_is_ptr will not change return type
+fn airUnwrapErrUnionError(cg: *CodeGen, inst: Air.Inst.Index, op_is_ptr: bool) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const op_ty = cg.typeOf(ty_op.operand);
+    const eu_ty = if (op_is_ptr) op_ty.childType(zcu) else op_ty;
+    const payload_ty = eu_ty.errorUnionPayload(zcu);
+
+    const result: WValue = result: {
+        if (eu_ty.errorUnionSet(zcu).errorSetIsEmpty(zcu)) {
+            break :result .{ .imm32 = 0 };
+        }
+
+        const err_offset: u32 = @intCast(errUnionErrorOffset(payload_ty, zcu));
+        if (op_is_ptr or isByRef(eu_ty, zcu, cg.target)) {
+            break :result try cg.load(operand, Type.anyerror, err_offset);
+        } else {
+            assert(!payload_ty.hasRuntimeBitsIgnoreComptime(zcu));
+            break :result cg.reuseOperand(ty_op.operand, operand);
+        }
+    };
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airWrapErrUnionPayload(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const err_ty = cg.typeOfIndex(inst);
+
+    const pl_ty = cg.typeOf(ty_op.operand);
+    const result = result: {
+        if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+            break :result cg.reuseOperand(ty_op.operand, operand);
+        }
+
+        const err_union = try cg.allocStack(err_ty);
+        const payload_ptr = try cg.buildPointerOffset(err_union, @as(u32, @intCast(errUnionPayloadOffset(pl_ty, zcu))), .new);
+        try cg.store(payload_ptr, operand, pl_ty, 0);
+
+        // ensure we also write '0' to the error part, so any present stack value gets overwritten by it.
+        try cg.emitWValue(err_union);
+        try cg.addImm32(0);
+        const err_val_offset: u32 = @intCast(errUnionErrorOffset(pl_ty, zcu));
+        try cg.addMemArg(.i32_store16, .{
+            .offset = err_union.offset() + err_val_offset,
+            .alignment = 2,
+        });
+        break :result err_union;
+    };
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airWrapErrUnionErr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const err_ty = ty_op.ty.toType();
+    const pl_ty = err_ty.errorUnionPayload(zcu);
+
+    const result = result: {
+        if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+            break :result cg.reuseOperand(ty_op.operand, operand);
+        }
+
+        const err_union = try cg.allocStack(err_ty);
+        // store error value
+        try cg.store(err_union, operand, Type.anyerror, @intCast(errUnionErrorOffset(pl_ty, zcu)));
+
+        // write 'undefined' to the payload
+        const payload_ptr = try cg.buildPointerOffset(err_union, @as(u32, @intCast(errUnionPayloadOffset(pl_ty, zcu))), .new);
+        const len = @as(u32, @intCast(err_ty.errorUnionPayload(zcu).abiSize(zcu)));
+        try cg.memset(Type.u8, payload_ptr, .{ .imm32 = len }, .{ .imm32 = 0xaa });
+
+        break :result err_union;
+    };
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airIntcast(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const ty = ty_op.ty.toType();
+    const operand = try cg.resolveInst(ty_op.operand);
+    const operand_ty = cg.typeOf(ty_op.operand);
+    const zcu = cg.pt.zcu;
+    if (ty.zigTypeTag(zcu) == .vector or operand_ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("todo Wasm intcast for vectors", .{});
+    }
+    if (ty.abiSize(zcu) > 16 or operand_ty.abiSize(zcu) > 16) {
+        return cg.fail("todo Wasm intcast for bitsize > 128", .{});
+    }
+
+    const op_bits = toWasmBits(@intCast(operand_ty.bitSize(zcu))).?;
+    const wanted_bits = toWasmBits(@intCast(ty.bitSize(zcu))).?;
+    const result = if (op_bits == wanted_bits)
+        cg.reuseOperand(ty_op.operand, operand)
+    else
+        try cg.intcast(operand, operand_ty, ty);
+
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+/// Upcasts or downcasts an integer based on the given and wanted types,
+/// and stores the result in a new operand.
+/// Asserts type's bitsize <= 128
+/// NOTE: May leave the result on the top of the stack.
+fn intcast(cg: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    const given_bitsize = @as(u16, @intCast(given.bitSize(zcu)));
+    const wanted_bitsize = @as(u16, @intCast(wanted.bitSize(zcu)));
+    assert(given_bitsize <= 128);
+    assert(wanted_bitsize <= 128);
+
+    const op_bits = toWasmBits(given_bitsize).?;
+    const wanted_bits = toWasmBits(wanted_bitsize).?;
+    if (op_bits == wanted_bits) {
+        return operand;
+    }
+
+    if (op_bits == 64 and wanted_bits == 32) {
+        try cg.emitWValue(operand);
+        try cg.addTag(.i32_wrap_i64);
+        return .stack;
+    } else if (op_bits == 32 and wanted_bits == 64) {
+        try cg.emitWValue(operand);
+        try cg.addTag(if (wanted.isSignedInt(zcu)) .i64_extend_i32_s else .i64_extend_i32_u);
+        return .stack;
+    } else if (wanted_bits == 128) {
+        // for 128bit integers we store the integer in the virtual stack, rather than a local
+        const stack_ptr = try cg.allocStack(wanted);
+        try cg.emitWValue(stack_ptr);
+
+        // for 32 bit integers, we first coerce the value into a 64 bit integer before storing it
+        // meaning less store operations are required.
+        const lhs = if (op_bits == 32) blk: {
+            const sign_ty = if (wanted.isSignedInt(zcu)) Type.i64 else Type.u64;
+            break :blk try (try cg.intcast(operand, given, sign_ty)).toLocal(cg, sign_ty);
+        } else operand;
+
+        // store lsb first
+        try cg.store(.stack, lhs, Type.u64, 0 + stack_ptr.offset());
+
+        // For signed integers we shift lsb by 63 (64bit integer - 1 sign bit) and store remaining value
+        if (wanted.isSignedInt(zcu)) {
+            try cg.emitWValue(stack_ptr);
+            const shr = try cg.binOp(lhs, .{ .imm64 = 63 }, Type.i64, .shr);
+            try cg.store(.stack, shr, Type.u64, 8 + stack_ptr.offset());
+        } else {
+            // Ensure memory of msb is zero'd
+            try cg.store(stack_ptr, .{ .imm64 = 0 }, Type.u64, 8);
+        }
+        return stack_ptr;
+    } else return cg.load(operand, wanted, 0);
+}
+
+fn airIsNull(cg: *CodeGen, inst: Air.Inst.Index, opcode: std.wasm.Opcode, op_kind: enum { value, ptr }) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
+    const operand = try cg.resolveInst(un_op);
+
+    const op_ty = cg.typeOf(un_op);
+    const optional_ty = if (op_kind == .ptr) op_ty.childType(zcu) else op_ty;
+    const result = try cg.isNull(operand, optional_ty, opcode);
+    return cg.finishAir(inst, result, &.{un_op});
+}
+
+/// For a given type and operand, checks if it's considered `null`.
+/// NOTE: Leaves the result on the stack
+fn isNull(cg: *CodeGen, operand: WValue, optional_ty: Type, opcode: std.wasm.Opcode) InnerError!WValue {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    try cg.emitWValue(operand);
+    const payload_ty = optional_ty.optionalChild(zcu);
+    if (!optional_ty.optionalReprIsPayload(zcu)) {
+        // When payload is zero-bits, we can treat operand as a value, rather than
+        // a pointer to the stack value
+        if (payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+            const offset = std.math.cast(u32, payload_ty.abiSize(zcu)) orelse {
+                return cg.fail("Optional type {f} too big to fit into stack frame", .{optional_ty.fmt(pt)});
+            };
+            try cg.addMemArg(.i32_load8_u, .{ .offset = operand.offset() + offset, .alignment = 1 });
+        }
+    } else if (payload_ty.isSlice(zcu)) {
+        switch (cg.ptr_size) {
+            .wasm32 => try cg.addMemArg(.i32_load, .{ .offset = operand.offset(), .alignment = 4 }),
+            .wasm64 => try cg.addMemArg(.i64_load, .{ .offset = operand.offset(), .alignment = 8 }),
+        }
+    }
+
+    // Compare the null value with '0'
+    try cg.addImm32(0);
+    try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode));
+
+    return .stack;
+}
+
+fn airOptionalPayload(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+    const opt_ty = cg.typeOf(ty_op.operand);
+    const payload_ty = cg.typeOfIndex(inst);
+    if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+        return cg.finishAir(inst, .none, &.{ty_op.operand});
+    }
+
+    const result = result: {
+        const operand = try cg.resolveInst(ty_op.operand);
+        if (opt_ty.optionalReprIsPayload(zcu)) break :result cg.reuseOperand(ty_op.operand, operand);
+
+        if (isByRef(payload_ty, zcu, cg.target)) {
+            break :result try cg.buildPointerOffset(operand, 0, .new);
+        }
+
+        break :result try cg.load(operand, payload_ty, 0);
+    };
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airOptionalPayloadPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+    const operand = try cg.resolveInst(ty_op.operand);
+    const opt_ty = cg.typeOf(ty_op.operand).childType(zcu);
+
+    const result = result: {
+        const payload_ty = opt_ty.optionalChild(zcu);
+        if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu) or opt_ty.optionalReprIsPayload(zcu)) {
+            break :result cg.reuseOperand(ty_op.operand, operand);
+        }
+
+        break :result try cg.buildPointerOffset(operand, 0, .new);
+    };
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airOptionalPayloadPtrSet(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+    const operand = try cg.resolveInst(ty_op.operand);
+    const opt_ty = cg.typeOf(ty_op.operand).childType(zcu);
+    const payload_ty = opt_ty.optionalChild(zcu);
+
+    if (opt_ty.optionalReprIsPayload(zcu)) {
+        return cg.finishAir(inst, operand, &.{ty_op.operand});
+    }
+
+    const offset = std.math.cast(u32, payload_ty.abiSize(zcu)) orelse {
+        return cg.fail("Optional type {f} too big to fit into stack frame", .{opt_ty.fmt(pt)});
+    };
+
+    try cg.emitWValue(operand);
+    try cg.addImm32(1);
+    try cg.addMemArg(.i32_store8, .{ .offset = operand.offset() + offset, .alignment = 1 });
+
+    const result = try cg.buildPointerOffset(operand, 0, .new);
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airWrapOptional(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+    const payload_ty = cg.typeOf(ty_op.operand);
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+
+    const result = result: {
+        if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+            const non_null_bit = try cg.allocStack(Type.u1);
+            try cg.emitWValue(non_null_bit);
+            try cg.addImm32(1);
+            try cg.addMemArg(.i32_store8, .{ .offset = non_null_bit.offset(), .alignment = 1 });
+            break :result non_null_bit;
+        }
+
+        const operand = try cg.resolveInst(ty_op.operand);
+        const op_ty = cg.typeOfIndex(inst);
+        if (op_ty.optionalReprIsPayload(zcu)) {
+            break :result cg.reuseOperand(ty_op.operand, operand);
+        }
+        const offset = std.math.cast(u32, payload_ty.abiSize(zcu)) orelse {
+            return cg.fail("Optional type {f} too big to fit into stack frame", .{op_ty.fmt(pt)});
+        };
+
+        // Create optional type, set the non-null bit, and store the operand inside the optional type
+        const result_ptr = try cg.allocStack(op_ty);
+        try cg.emitWValue(result_ptr);
+        try cg.addImm32(1);
+        try cg.addMemArg(.i32_store8, .{ .offset = result_ptr.offset() + offset, .alignment = 1 });
+
+        const payload_ptr = try cg.buildPointerOffset(result_ptr, 0, .new);
+        try cg.store(payload_ptr, operand, payload_ty, 0);
+        break :result result_ptr;
+    };
+
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airSlice(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
+
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+    const slice_ty = cg.typeOfIndex(inst);
+
+    const slice = try cg.allocStack(slice_ty);
+    try cg.store(slice, lhs, Type.usize, 0);
+    try cg.store(slice, rhs, Type.usize, cg.ptrSize());
+
+    return cg.finishAir(inst, slice, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airSliceLen(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    return cg.finishAir(inst, try cg.sliceLen(operand), &.{ty_op.operand});
+}
+
+fn airSliceElemVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const slice_ty = cg.typeOf(bin_op.lhs);
+    const slice = try cg.resolveInst(bin_op.lhs);
+    const index = try cg.resolveInst(bin_op.rhs);
+    const elem_ty = slice_ty.childType(zcu);
+    const elem_size = elem_ty.abiSize(zcu);
+
+    // load pointer onto stack
+    _ = try cg.load(slice, Type.usize, 0);
+
+    // calculate index into slice
+    try cg.emitWValue(index);
+    try cg.addImm32(@intCast(elem_size));
+    try cg.addTag(.i32_mul);
+    try cg.addTag(.i32_add);
+
+    const elem_result = if (isByRef(elem_ty, zcu, cg.target))
+        .stack
+    else
+        try cg.load(.stack, elem_ty, 0);
+
+    return cg.finishAir(inst, elem_result, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airSliceElemPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
+
+    const elem_ty = ty_pl.ty.toType().childType(zcu);
+    const elem_size = elem_ty.abiSize(zcu);
+
+    const slice = try cg.resolveInst(bin_op.lhs);
+    const index = try cg.resolveInst(bin_op.rhs);
+
+    _ = try cg.load(slice, Type.usize, 0);
+
+    // calculate index into slice
+    try cg.emitWValue(index);
+    try cg.addImm32(@intCast(elem_size));
+    try cg.addTag(.i32_mul);
+    try cg.addTag(.i32_add);
+
+    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airSlicePtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+    const operand = try cg.resolveInst(ty_op.operand);
+    return cg.finishAir(inst, try cg.slicePtr(operand), &.{ty_op.operand});
+}
+
+fn slicePtr(cg: *CodeGen, operand: WValue) InnerError!WValue {
+    const ptr = try cg.load(operand, Type.usize, 0);
+    return ptr.toLocal(cg, Type.usize);
+}
+
+fn sliceLen(cg: *CodeGen, operand: WValue) InnerError!WValue {
+    const len = try cg.load(operand, Type.usize, cg.ptrSize());
+    return len.toLocal(cg, Type.usize);
+}
+
+fn airTrunc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const wanted_ty: Type = ty_op.ty.toType();
+    const op_ty = cg.typeOf(ty_op.operand);
+    const zcu = cg.pt.zcu;
+
+    if (wanted_ty.zigTypeTag(zcu) == .vector or op_ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("TODO: trunc for vectors", .{});
+    }
+
+    const result = if (op_ty.bitSize(zcu) == wanted_ty.bitSize(zcu))
+        cg.reuseOperand(ty_op.operand, operand)
+    else
+        try cg.trunc(operand, wanted_ty, op_ty);
+
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+/// Truncates a given operand to a given type, discarding any overflown bits.
+/// NOTE: Resulting value is left on the stack.
+fn trunc(cg: *CodeGen, operand: WValue, wanted_ty: Type, given_ty: Type) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    const given_bits = @as(u16, @intCast(given_ty.bitSize(zcu)));
+    if (toWasmBits(given_bits) == null) {
+        return cg.fail("TODO: Implement wasm integer truncation for integer bitsize: {d}", .{given_bits});
+    }
+
+    var result = try cg.intcast(operand, given_ty, wanted_ty);
+    const wanted_bits = @as(u16, @intCast(wanted_ty.bitSize(zcu)));
+    const wasm_bits = toWasmBits(wanted_bits).?;
+    if (wasm_bits != wanted_bits) {
+        result = try cg.wrapOperand(result, wanted_ty);
+    }
+    return result;
+}
+
+fn airArrayToSlice(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const array_ty = cg.typeOf(ty_op.operand).childType(zcu);
+    const slice_ty = ty_op.ty.toType();
+
+    // create a slice on the stack
+    const slice_local = try cg.allocStack(slice_ty);
+
+    // store the array ptr in the slice
+    if (array_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+        try cg.store(slice_local, operand, Type.usize, 0);
+    }
+
+    // store the length of the array in the slice
+    const array_len: u32 = @intCast(array_ty.arrayLen(zcu));
+    try cg.store(slice_local, .{ .imm32 = array_len }, Type.usize, cg.ptrSize());
+
+    return cg.finishAir(inst, slice_local, &.{ty_op.operand});
+}
+
+fn airPtrElemVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const ptr_ty = cg.typeOf(bin_op.lhs);
+    const ptr = try cg.resolveInst(bin_op.lhs);
+    const index = try cg.resolveInst(bin_op.rhs);
+    const elem_ty = ptr_ty.childType(zcu);
+    const elem_size = elem_ty.abiSize(zcu);
+
+    // load pointer onto the stack
+    if (ptr_ty.isSlice(zcu)) {
+        _ = try cg.load(ptr, Type.usize, 0);
+    } else {
+        try cg.lowerToStack(ptr);
+    }
+
+    // calculate index into slice
+    try cg.emitWValue(index);
+    try cg.addImm32(@intCast(elem_size));
+    try cg.addTag(.i32_mul);
+    try cg.addTag(.i32_add);
+
+    const elem_result = if (isByRef(elem_ty, zcu, cg.target))
+        .stack
+    else
+        try cg.load(.stack, elem_ty, 0);
+
+    return cg.finishAir(inst, elem_result, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airPtrElemPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
+
+    const ptr_ty = cg.typeOf(bin_op.lhs);
+    const elem_ty = ty_pl.ty.toType().childType(zcu);
+    const elem_size = elem_ty.abiSize(zcu);
+
+    const ptr = try cg.resolveInst(bin_op.lhs);
+    const index = try cg.resolveInst(bin_op.rhs);
+
+    // load pointer onto the stack
+    if (ptr_ty.isSlice(zcu)) {
+        _ = try cg.load(ptr, Type.usize, 0);
+    } else {
+        try cg.lowerToStack(ptr);
+    }
+
+    // calculate index into ptr
+    try cg.emitWValue(index);
+    try cg.addImm32(@intCast(elem_size));
+    try cg.addTag(.i32_mul);
+    try cg.addTag(.i32_add);
+
+    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airPtrBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data;
+
+    const ptr = try cg.resolveInst(bin_op.lhs);
+    const offset = try cg.resolveInst(bin_op.rhs);
+    const ptr_ty = cg.typeOf(bin_op.lhs);
+    const pointee_ty = switch (ptr_ty.ptrSize(zcu)) {
+        .one => ptr_ty.childType(zcu).childType(zcu), // ptr to array, so get array element type
+        else => ptr_ty.childType(zcu),
+    };
+
+    const valtype = typeToValtype(Type.usize, zcu, cg.target);
+    const mul_opcode = buildOpcode(.{ .valtype1 = valtype, .op = .mul });
+    const bin_opcode = buildOpcode(.{ .valtype1 = valtype, .op = op });
+
+    try cg.lowerToStack(ptr);
+    try cg.emitWValue(offset);
+    try cg.addImm32(@intCast(pointee_ty.abiSize(zcu)));
+    try cg.addTag(Mir.Inst.Tag.fromOpcode(mul_opcode));
+    try cg.addTag(Mir.Inst.Tag.fromOpcode(bin_opcode));
+
+    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airMemset(cg: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const ptr = try cg.resolveInst(bin_op.lhs);
+    const ptr_ty = cg.typeOf(bin_op.lhs);
+    const value = try cg.resolveInst(bin_op.rhs);
+    const len = switch (ptr_ty.ptrSize(zcu)) {
+        .slice => try cg.sliceLen(ptr),
+        .one => @as(WValue, .{ .imm32 = @as(u32, @intCast(ptr_ty.childType(zcu).arrayLen(zcu))) }),
+        .c, .many => unreachable,
+    };
+
+    const elem_ty = if (ptr_ty.ptrSize(zcu) == .one)
+        ptr_ty.childType(zcu).childType(zcu)
+    else
+        ptr_ty.childType(zcu);
+
+    if (!safety and bin_op.rhs == .undef) {
+        return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
+    }
+
+    const dst_ptr = try cg.sliceOrArrayPtr(ptr, ptr_ty);
+    try cg.memset(elem_ty, dst_ptr, len, value);
+
+    return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+/// Sets a region of memory at `ptr` to the value of `value`
+/// When the user has enabled the bulk_memory feature, we lower
+/// this to wasm's memset instruction. When the feature is not present,
+/// we implement it manually.
+fn memset(cg: *CodeGen, elem_ty: Type, ptr: WValue, len: WValue, value: WValue) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const abi_size = @as(u32, @intCast(elem_ty.abiSize(zcu)));
+
+    // When bulk_memory is enabled, we lower it to wasm's memset instruction.
+    // If not, we lower it ourselves.
+    if (cg.target.cpu.has(.wasm, .bulk_memory) and abi_size == 1) {
+        const len0_ok = cg.target.cpu.has(.wasm, .nontrapping_bulk_memory_len0);
+
+        if (!len0_ok) {
+            try cg.startBlock(.block, .empty);
+
+            // Even if `len` is zero, the spec requires an implementation to trap if `ptr + len` is
+            // out of memory bounds. This can easily happen in Zig in a case such as:
+            //
+            // const ptr: [*]u8 = undefined;
+            // var len: usize = runtime_zero();
+            // @memset(ptr[0..len], 42);
+            //
+            // So explicitly avoid using `memory.fill` in the `len == 0` case. Lovely design.
+            try cg.emitWValue(len);
+            try cg.addTag(.i32_eqz);
+            try cg.addLabel(.br_if, 0);
+        }
+
+        try cg.lowerToStack(ptr);
+        try cg.emitWValue(value);
+        try cg.emitWValue(len);
+        try cg.addExtended(.memory_fill);
+
+        if (!len0_ok) {
+            try cg.endBlock();
+        }
+
+        return;
+    }
+
+    const final_len: WValue = switch (len) {
+        .imm32 => |val| .{ .imm32 = val * abi_size },
+        .imm64 => |val| .{ .imm64 = val * abi_size },
+        else => if (abi_size != 1) blk: {
+            const new_len = try cg.ensureAllocLocal(Type.usize);
+            try cg.emitWValue(len);
+            switch (cg.ptr_size) {
+                .wasm32 => {
+                    try cg.emitWValue(.{ .imm32 = abi_size });
+                    try cg.addTag(.i32_mul);
+                },
+                .wasm64 => {
+                    try cg.emitWValue(.{ .imm64 = abi_size });
+                    try cg.addTag(.i64_mul);
+                },
+            }
+            try cg.addLocal(.local_set, new_len.local.value);
+            break :blk new_len;
+        } else len,
+    };
+
+    var end_ptr = try cg.allocLocal(Type.usize);
+    defer end_ptr.free(cg);
+    var new_ptr = try cg.buildPointerOffset(ptr, 0, .new);
+    defer new_ptr.free(cg);
+
+    // get the loop conditional: if current pointer address equals final pointer's address
+    try cg.lowerToStack(ptr);
+    try cg.emitWValue(final_len);
+    switch (cg.ptr_size) {
+        .wasm32 => try cg.addTag(.i32_add),
+        .wasm64 => try cg.addTag(.i64_add),
+    }
+    try cg.addLocal(.local_set, end_ptr.local.value);
+
+    // outer block to jump to when loop is done
+    try cg.startBlock(.block, .empty);
+    try cg.startBlock(.loop, .empty);
+
+    // check for condition for loop end
+    try cg.emitWValue(new_ptr);
+    try cg.emitWValue(end_ptr);
+    switch (cg.ptr_size) {
+        .wasm32 => try cg.addTag(.i32_eq),
+        .wasm64 => try cg.addTag(.i64_eq),
+    }
+    try cg.addLabel(.br_if, 1); // jump out of loop into outer block (finished)
+
+    // store the value at the current position of the pointer
+    try cg.store(new_ptr, value, elem_ty, 0);
+
+    // move the pointer to the next element
+    try cg.emitWValue(new_ptr);
+    switch (cg.ptr_size) {
+        .wasm32 => {
+            try cg.emitWValue(.{ .imm32 = abi_size });
+            try cg.addTag(.i32_add);
+        },
+        .wasm64 => {
+            try cg.emitWValue(.{ .imm64 = abi_size });
+            try cg.addTag(.i64_add);
+        },
+    }
+    try cg.addLocal(.local_set, new_ptr.local.value);
+
+    // end of loop
+    try cg.addLabel(.br, 0); // jump to start of loop
+    try cg.endBlock();
+    try cg.endBlock();
+}
+
+fn airArrayElemVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const array_ty = cg.typeOf(bin_op.lhs);
+    const array = try cg.resolveInst(bin_op.lhs);
+    const index = try cg.resolveInst(bin_op.rhs);
+    const elem_ty = array_ty.childType(zcu);
+    const elem_size = elem_ty.abiSize(zcu);
+
+    if (isByRef(array_ty, zcu, cg.target)) {
+        try cg.lowerToStack(array);
+        try cg.emitWValue(index);
+        try cg.addImm32(@intCast(elem_size));
+        try cg.addTag(.i32_mul);
+        try cg.addTag(.i32_add);
+    } else {
+        assert(array_ty.zigTypeTag(zcu) == .vector);
+
+        switch (index) {
+            inline .imm32, .imm64 => |lane| {
+                const opcode: std.wasm.SimdOpcode = switch (elem_ty.bitSize(zcu)) {
+                    8 => if (elem_ty.isSignedInt(zcu)) .i8x16_extract_lane_s else .i8x16_extract_lane_u,
+                    16 => if (elem_ty.isSignedInt(zcu)) .i16x8_extract_lane_s else .i16x8_extract_lane_u,
+                    32 => if (elem_ty.isInt(zcu)) .i32x4_extract_lane else .f32x4_extract_lane,
+                    64 => if (elem_ty.isInt(zcu)) .i64x2_extract_lane else .f64x2_extract_lane,
+                    else => unreachable,
+                };
+
+                var operands = [_]u32{ @intFromEnum(opcode), @as(u8, @intCast(lane)) };
+
+                try cg.emitWValue(array);
+
+                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
+                try cg.mir_extra.appendSlice(cg.gpa, &operands);
+                try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
+
+                return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
+            },
+            else => {
+                const stack_vec = try cg.allocStack(array_ty);
+                try cg.store(stack_vec, array, array_ty, 0);
+
+                // Is a non-unrolled vector (v128)
+                try cg.lowerToStack(stack_vec);
+                try cg.emitWValue(index);
+                try cg.addImm32(@intCast(elem_size));
+                try cg.addTag(.i32_mul);
+                try cg.addTag(.i32_add);
+            },
+        }
+    }
+
+    const elem_result = if (isByRef(elem_ty, zcu, cg.target))
+        .stack
+    else
+        try cg.load(.stack, elem_ty, 0);
+
+    return cg.finishAir(inst, elem_result, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airIntFromFloat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const op_ty = cg.typeOf(ty_op.operand);
+    const op_bits = op_ty.floatBits(cg.target);
+
+    const dest_ty = cg.typeOfIndex(inst);
+    const dest_info = dest_ty.intInfo(zcu);
+
+    if (dest_info.bits > 128) {
+        return cg.fail("TODO: intFromFloat for integers/floats with bitsize {}", .{dest_info.bits});
+    }
+
+    if ((op_bits != 32 and op_bits != 64) or dest_info.bits > 64) {
+        const dest_bitsize = if (dest_info.bits <= 32) 32 else std.math.ceilPowerOfTwoAssert(u16, dest_info.bits);
+
+        const intrinsic = switch (dest_info.signedness) {
+            inline .signed, .unsigned => |ct_s| switch (op_bits) {
+                inline 16, 32, 64, 80, 128 => |ct_op_bits| switch (dest_bitsize) {
+                    inline 32, 64, 128 => |ct_dest_bits| @field(
+                        Mir.Intrinsic,
+                        "__fix" ++ switch (ct_s) {
+                            .signed => "",
+                            .unsigned => "uns",
+                        } ++
+                            compilerRtFloatAbbrev(ct_op_bits) ++ "f" ++
+                            compilerRtIntAbbrev(ct_dest_bits) ++ "i",
+                    ),
+                    else => unreachable,
+                },
+                else => unreachable,
+            },
+        };
+        const result = try cg.callIntrinsic(intrinsic, &.{op_ty.ip_index}, dest_ty, &.{operand});
+        return cg.finishAir(inst, result, &.{ty_op.operand});
+    }
+
+    try cg.emitWValue(operand);
+    const op = buildOpcode(.{
+        .op = .trunc,
+        .valtype1 = typeToValtype(dest_ty, zcu, cg.target),
+        .valtype2 = typeToValtype(op_ty, zcu, cg.target),
+        .signedness = dest_info.signedness,
+    });
+    try cg.addTag(Mir.Inst.Tag.fromOpcode(op));
+    const result = try cg.wrapOperand(.stack, dest_ty);
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airFloatFromInt(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const op_ty = cg.typeOf(ty_op.operand);
+    const op_info = op_ty.intInfo(zcu);
+
+    const dest_ty = cg.typeOfIndex(inst);
+    const dest_bits = dest_ty.floatBits(cg.target);
+
+    if (op_info.bits > 128) {
+        return cg.fail("TODO: floatFromInt for integers/floats with bitsize {d} bits", .{op_info.bits});
+    }
+
+    if (op_info.bits > 64 or (dest_bits > 64 or dest_bits < 32)) {
+        const op_bitsize = if (op_info.bits <= 32) 32 else std.math.ceilPowerOfTwoAssert(u16, op_info.bits);
+
+        const intrinsic = switch (op_info.signedness) {
+            inline .signed, .unsigned => |ct_s| switch (op_bitsize) {
+                inline 32, 64, 128 => |ct_int_bits| switch (dest_bits) {
+                    inline 16, 32, 64, 80, 128 => |ct_float_bits| @field(
+                        Mir.Intrinsic,
+                        "__float" ++ switch (ct_s) {
+                            .signed => "",
+                            .unsigned => "un",
+                        } ++
+                            compilerRtIntAbbrev(ct_int_bits) ++ "i" ++
+                            compilerRtFloatAbbrev(ct_float_bits) ++ "f",
+                    ),
+                    else => unreachable,
+                },
+                else => unreachable,
+            },
+        };
+
+        const result = try cg.callIntrinsic(intrinsic, &.{op_ty.ip_index}, dest_ty, &.{operand});
+        return cg.finishAir(inst, result, &.{ty_op.operand});
+    }
+
+    try cg.emitWValue(operand);
+    const op = buildOpcode(.{
+        .op = .convert,
+        .valtype1 = typeToValtype(dest_ty, zcu, cg.target),
+        .valtype2 = typeToValtype(op_ty, zcu, cg.target),
+        .signedness = op_info.signedness,
+    });
+    try cg.addTag(Mir.Inst.Tag.fromOpcode(op));
+
+    return cg.finishAir(inst, .stack, &.{ty_op.operand});
+}
+
+fn airSplat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+    const operand = try cg.resolveInst(ty_op.operand);
+    const ty = cg.typeOfIndex(inst);
+    const elem_ty = ty.childType(zcu);
+
+    if (determineSimdStoreStrategy(ty, zcu, cg.target) == .direct) blk: {
+        switch (operand) {
+            // when the operand lives in the linear memory section, we can directly
+            // load and splat the value at once. Meaning we do not first have to load
+            // the scalar value onto the stack.
+            .stack_offset, .nav_ref, .uav_ref => {
+                const opcode = switch (elem_ty.bitSize(zcu)) {
+                    8 => @intFromEnum(std.wasm.SimdOpcode.v128_load8_splat),
+                    16 => @intFromEnum(std.wasm.SimdOpcode.v128_load16_splat),
+                    32 => @intFromEnum(std.wasm.SimdOpcode.v128_load32_splat),
+                    64 => @intFromEnum(std.wasm.SimdOpcode.v128_load64_splat),
+                    else => break :blk, // Cannot make use of simd-instructions
+                };
+                try cg.emitWValue(operand);
+                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
+                // stores as := opcode, offset, alignment (opcode::memarg)
+                try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{
+                    opcode,
+                    operand.offset(),
+                    @intCast(elem_ty.abiAlignment(zcu).toByteUnits().?),
+                });
+                try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
+                return cg.finishAir(inst, .stack, &.{ty_op.operand});
+            },
+            .local => {
+                const opcode = switch (elem_ty.bitSize(zcu)) {
+                    8 => @intFromEnum(std.wasm.SimdOpcode.i8x16_splat),
+                    16 => @intFromEnum(std.wasm.SimdOpcode.i16x8_splat),
+                    32 => if (elem_ty.isInt(zcu)) @intFromEnum(std.wasm.SimdOpcode.i32x4_splat) else @intFromEnum(std.wasm.SimdOpcode.f32x4_splat),
+                    64 => if (elem_ty.isInt(zcu)) @intFromEnum(std.wasm.SimdOpcode.i64x2_splat) else @intFromEnum(std.wasm.SimdOpcode.f64x2_splat),
+                    else => break :blk, // Cannot make use of simd-instructions
+                };
+                try cg.emitWValue(operand);
+                const extra_index: u32 = @intCast(cg.mir_extra.items.len);
+                try cg.mir_extra.append(cg.gpa, opcode);
+                try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
+                return cg.finishAir(inst, .stack, &.{ty_op.operand});
+            },
+            else => unreachable,
+        }
+    }
+    const elem_size = elem_ty.bitSize(zcu);
+    const vector_len = @as(usize, @intCast(ty.vectorLen(zcu)));
+    if ((!std.math.isPowerOfTwo(elem_size) or elem_size % 8 != 0) and vector_len > 1) {
+        return cg.fail("TODO: WebAssembly `@splat` for arbitrary element bitsize {d}", .{elem_size});
+    }
+
+    const result = try cg.allocStack(ty);
+    const elem_byte_size = @as(u32, @intCast(elem_ty.abiSize(zcu)));
+    var index: usize = 0;
+    var offset: u32 = 0;
+    while (index < vector_len) : (index += 1) {
+        try cg.store(result, operand, elem_ty, offset);
+        offset += elem_byte_size;
+    }
+
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airSelect(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
+    const operand = try cg.resolveInst(pl_op.operand);
+
+    _ = operand;
+    return cg.fail("TODO: Implement wasm airSelect", .{});
+}
+
+fn airShuffleOne(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+
+    const unwrapped = cg.air.unwrapShuffleOne(zcu, inst);
+    const result_ty = unwrapped.result_ty;
+    const mask = unwrapped.mask;
+    const operand = try cg.resolveInst(unwrapped.operand);
+
+    const elem_ty = result_ty.childType(zcu);
+    const elem_size = elem_ty.abiSize(zcu);
+
+    // TODO: this function could have an `i8x16_shuffle` fast path like `airShuffleTwo` if we were
+    // to lower the comptime-known operands to a non-by-ref vector value.
+
+    // TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible.
+    // I tried to fix it, but I couldn't make much sense of how this backend handles memory.
+    if (!isByRef(result_ty, zcu, cg.target) or
+        !isByRef(cg.typeOf(unwrapped.operand), zcu, cg.target)) return cg.fail("TODO: handle mixed by-ref shuffle", .{});
+
+    const dest_alloc = try cg.allocStack(result_ty);
+    for (mask, 0..) |mask_elem, out_idx| {
+        try cg.emitWValue(dest_alloc);
+        const elem_val = switch (mask_elem.unwrap()) {
+            .elem => |idx| try cg.load(operand, elem_ty, @intCast(elem_size * idx)),
+            .value => |val| try cg.lowerConstant(.fromInterned(val), elem_ty),
+        };
+        try cg.store(.stack, elem_val, elem_ty, @intCast(dest_alloc.offset() + elem_size * out_idx));
+    }
+    return cg.finishAir(inst, dest_alloc, &.{unwrapped.operand});
+}
+
+fn airShuffleTwo(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+
+    const unwrapped = cg.air.unwrapShuffleTwo(zcu, inst);
+    const result_ty = unwrapped.result_ty;
+    const mask = unwrapped.mask;
+    const operand_a = try cg.resolveInst(unwrapped.operand_a);
+    const operand_b = try cg.resolveInst(unwrapped.operand_b);
+
+    const a_ty = cg.typeOf(unwrapped.operand_a);
+    const b_ty = cg.typeOf(unwrapped.operand_b);
+    const elem_ty = result_ty.childType(zcu);
+    const elem_size = elem_ty.abiSize(zcu);
+
+    // WASM has `i8x16_shuffle`, which we can apply if the element type bit size is a multiple of 8
+    // and the input and output vectors have a bit size of 128 (and are hence not by-ref). Otherwise,
+    // we fall back to a naive loop lowering.
+    if (!isByRef(a_ty, zcu, cg.target) and
+        !isByRef(b_ty, zcu, cg.target) and
+        !isByRef(result_ty, zcu, cg.target) and
+        elem_ty.bitSize(zcu) % 8 == 0)
+    {
+        var lane_map: [16]u8 align(4) = undefined;
+        const lanes_per_elem: usize = @intCast(elem_ty.bitSize(zcu) / 8);
+        for (mask, 0..) |mask_elem, out_idx| {
+            const out_first_lane = out_idx * lanes_per_elem;
+            const in_first_lane = switch (mask_elem.unwrap()) {
+                .a_elem => |i| i * lanes_per_elem,
+                .b_elem => |i| i * lanes_per_elem + 16,
+                .undef => 0, // doesn't matter
+            };
+            for (lane_map[out_first_lane..][0..lanes_per_elem], in_first_lane..) |*out, in| {
+                out.* = @intCast(in);
+            }
+        }
+        try cg.emitWValue(operand_a);
+        try cg.emitWValue(operand_b);
+        const extra_index: u32 = @intCast(cg.mir_extra.items.len);
+        try cg.mir_extra.appendSlice(cg.gpa, &.{
+            @intFromEnum(std.wasm.SimdOpcode.i8x16_shuffle),
+            @bitCast(lane_map[0..4].*),
+            @bitCast(lane_map[4..8].*),
+            @bitCast(lane_map[8..12].*),
+            @bitCast(lane_map[12..].*),
+        });
+        try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } });
+        return cg.finishAir(inst, .stack, &.{ unwrapped.operand_a, unwrapped.operand_b });
+    }
+
+    // TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible.
+    // I tried to fix it, but I couldn't make much sense of how this backend handles memory.
+    if (!isByRef(result_ty, zcu, cg.target) or
+        !isByRef(a_ty, zcu, cg.target) or
+        !isByRef(b_ty, zcu, cg.target)) return cg.fail("TODO: handle mixed by-ref shuffle", .{});
+
+    const dest_alloc = try cg.allocStack(result_ty);
+    for (mask, 0..) |mask_elem, out_idx| {
+        try cg.emitWValue(dest_alloc);
+        const elem_val = switch (mask_elem.unwrap()) {
+            .a_elem => |idx| try cg.load(operand_a, elem_ty, @intCast(elem_size * idx)),
+            .b_elem => |idx| try cg.load(operand_b, elem_ty, @intCast(elem_size * idx)),
+            .undef => try cg.emitUndefined(elem_ty),
+        };
+        try cg.store(.stack, elem_val, elem_ty, @intCast(dest_alloc.offset() + elem_size * out_idx));
+    }
+    return cg.finishAir(inst, dest_alloc, &.{ unwrapped.operand_a, unwrapped.operand_b });
+}
+
+fn airReduce(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const reduce = cg.air.instructions.items(.data)[@intFromEnum(inst)].reduce;
+    const operand = try cg.resolveInst(reduce.operand);
+
+    _ = operand;
+    return cg.fail("TODO: Implement wasm airReduce", .{});
+}
+
+fn airAggregateInit(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ip = &zcu.intern_pool;
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const result_ty = cg.typeOfIndex(inst);
+    const len = @as(usize, @intCast(result_ty.arrayLen(zcu)));
+    const elements: []const Air.Inst.Ref = @ptrCast(cg.air.extra.items[ty_pl.payload..][0..len]);
+
+    const result: WValue = result_value: {
+        switch (result_ty.zigTypeTag(zcu)) {
+            .array => {
+                const result = try cg.allocStack(result_ty);
+                const elem_ty = result_ty.childType(zcu);
+                const elem_size = @as(u32, @intCast(elem_ty.abiSize(zcu)));
+                const sentinel = result_ty.sentinel(zcu);
+
+                // When the element type is by reference, we must copy the entire
+                // value. It is therefore safer to move the offset pointer and store
+                // each value individually, instead of using store offsets.
+                if (isByRef(elem_ty, zcu, cg.target)) {
+                    // copy stack pointer into a temporary local, which is
+                    // moved for each element to store each value in the right position.
+                    const offset = try cg.buildPointerOffset(result, 0, .new);
+                    for (elements, 0..) |elem, elem_index| {
+                        const elem_val = try cg.resolveInst(elem);
+                        try cg.store(offset, elem_val, elem_ty, 0);
+
+                        if (elem_index < elements.len - 1 or sentinel != null) {
+                            _ = try cg.buildPointerOffset(offset, elem_size, .modify);
+                        }
+                    }
+                    if (sentinel) |s| {
+                        const val = try cg.resolveValue(s);
+                        try cg.store(offset, val, elem_ty, 0);
+                    }
+                } else {
+                    var offset: u32 = 0;
+                    for (elements) |elem| {
+                        const elem_val = try cg.resolveInst(elem);
+                        try cg.store(result, elem_val, elem_ty, offset);
+                        offset += elem_size;
+                    }
+                    if (sentinel) |s| {
+                        const val = try cg.resolveValue(s);
+                        try cg.store(result, val, elem_ty, offset);
+                    }
+                }
+                break :result_value result;
+            },
+            .@"struct" => switch (result_ty.containerLayout(zcu)) {
+                .@"packed" => {
+                    if (isByRef(result_ty, zcu, cg.target)) {
+                        return cg.fail("TODO: airAggregateInit for packed structs larger than 64 bits", .{});
+                    }
+                    const packed_struct = zcu.typeToPackedStruct(result_ty).?;
+                    const field_types = packed_struct.field_types;
+                    const backing_type = Type.fromInterned(packed_struct.backingIntTypeUnordered(ip));
+
+                    // ensure the result is zero'd
+                    const result = try cg.allocLocal(backing_type);
+                    if (backing_type.bitSize(zcu) <= 32)
+                        try cg.addImm32(0)
+                    else
+                        try cg.addImm64(0);
+                    try cg.addLocal(.local_set, result.local.value);
+
+                    var current_bit: u16 = 0;
+                    for (elements, 0..) |elem, elem_index| {
+                        const field_ty = Type.fromInterned(field_types.get(ip)[elem_index]);
+                        if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue;
+
+                        const shift_val: WValue = if (backing_type.bitSize(zcu) <= 32)
+                            .{ .imm32 = current_bit }
+                        else
+                            .{ .imm64 = current_bit };
+
+                        const value = try cg.resolveInst(elem);
+                        const value_bit_size: u16 = @intCast(field_ty.bitSize(zcu));
+                        const int_ty = try pt.intType(.unsigned, value_bit_size);
+
+                        // load our current result on stack so we can perform all transformations
+                        // using only stack values. Saving the cost of loads and stores.
+                        try cg.emitWValue(result);
+                        const bitcasted = try cg.bitcast(int_ty, field_ty, value);
+                        const extended_val = try cg.intcast(bitcasted, int_ty, backing_type);
+                        // no need to shift any values when the current offset is 0
+                        const shifted = if (current_bit != 0) shifted: {
+                            break :shifted try cg.binOp(extended_val, shift_val, backing_type, .shl);
+                        } else extended_val;
+                        // we ignore the result as we keep it on the stack to assign it directly to `result`
+                        _ = try cg.binOp(.stack, shifted, backing_type, .@"or");
+                        try cg.addLocal(.local_set, result.local.value);
+                        current_bit += value_bit_size;
+                    }
+                    break :result_value result;
+                },
+                else => {
+                    const result = try cg.allocStack(result_ty);
+                    const offset = try cg.buildPointerOffset(result, 0, .new); // pointer to offset
+                    var prev_field_offset: u64 = 0;
+                    for (elements, 0..) |elem, elem_index| {
+                        if (try result_ty.structFieldValueComptime(pt, elem_index) != null) continue;
+
+                        const elem_ty = result_ty.fieldType(elem_index, zcu);
+                        const field_offset = result_ty.structFieldOffset(elem_index, zcu);
+                        _ = try cg.buildPointerOffset(offset, @intCast(field_offset - prev_field_offset), .modify);
+                        prev_field_offset = field_offset;
+
+                        const value = try cg.resolveInst(elem);
+                        try cg.store(offset, value, elem_ty, 0);
+                    }
+
+                    break :result_value result;
+                },
+            },
+            .vector => return cg.fail("TODO: Wasm backend: implement airAggregateInit for vectors", .{}),
+            else => unreachable,
+        }
+    };
+
+    if (elements.len <= Air.Liveness.bpi - 1) {
+        var buf = [1]Air.Inst.Ref{.none} ** (Air.Liveness.bpi - 1);
+        @memcpy(buf[0..elements.len], elements);
+        return cg.finishAir(inst, result, &buf);
+    }
+    var bt = try cg.iterateBigTomb(inst, elements.len);
+    for (elements) |arg| bt.feed(arg);
+    return bt.finishAir(result);
+}
+
+fn airUnionInit(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ip = &zcu.intern_pool;
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const extra = cg.air.extraData(Air.UnionInit, ty_pl.payload).data;
+
+    const result = result: {
+        const union_ty = cg.typeOfIndex(inst);
+        const layout = union_ty.unionGetLayout(zcu);
+        const union_obj = zcu.typeToUnion(union_ty).?;
+        const field_ty = Type.fromInterned(union_obj.field_types.get(ip)[extra.field_index]);
+        const field_name = union_obj.loadTagType(ip).names.get(ip)[extra.field_index];
+
+        const tag_int = blk: {
+            const tag_ty = union_ty.unionTagTypeHypothetical(zcu);
+            const enum_field_index = tag_ty.enumFieldIndex(field_name, zcu).?;
+            const tag_val = try pt.enumValueFieldIndex(tag_ty, enum_field_index);
+            break :blk try cg.lowerConstant(tag_val, tag_ty);
+        };
+        if (layout.payload_size == 0) {
+            if (layout.tag_size == 0) {
+                break :result .none;
+            }
+            assert(!isByRef(union_ty, zcu, cg.target));
+            break :result tag_int;
+        }
+
+        if (isByRef(union_ty, zcu, cg.target)) {
+            const result_ptr = try cg.allocStack(union_ty);
+            const payload = try cg.resolveInst(extra.init);
+            if (layout.tag_align.compare(.gte, layout.payload_align)) {
+                if (isByRef(field_ty, zcu, cg.target)) {
+                    const payload_ptr = try cg.buildPointerOffset(result_ptr, layout.tag_size, .new);
+                    try cg.store(payload_ptr, payload, field_ty, 0);
+                } else {
+                    try cg.store(result_ptr, payload, field_ty, @intCast(layout.tag_size));
+                }
+
+                if (layout.tag_size > 0) {
+                    try cg.store(result_ptr, tag_int, Type.fromInterned(union_obj.enum_tag_ty), 0);
+                }
+            } else {
+                try cg.store(result_ptr, payload, field_ty, 0);
+                if (layout.tag_size > 0) {
+                    try cg.store(
+                        result_ptr,
+                        tag_int,
+                        Type.fromInterned(union_obj.enum_tag_ty),
+                        @intCast(layout.payload_size),
+                    );
+                }
+            }
+            break :result result_ptr;
+        } else {
+            const operand = try cg.resolveInst(extra.init);
+            const union_int_type = try pt.intType(.unsigned, @as(u16, @intCast(union_ty.bitSize(zcu))));
+            if (field_ty.zigTypeTag(zcu) == .float) {
+                const int_type = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu)));
+                const bitcasted = try cg.bitcast(field_ty, int_type, operand);
+                break :result try cg.trunc(bitcasted, int_type, union_int_type);
+            } else if (field_ty.isPtrAtRuntime(zcu)) {
+                const int_type = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu)));
+                break :result try cg.intcast(operand, int_type, union_int_type);
+            }
+            break :result try cg.intcast(operand, field_ty, union_int_type);
+        }
+    };
+
+    return cg.finishAir(inst, result, &.{extra.init});
+}
+
+fn airPrefetch(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const prefetch = cg.air.instructions.items(.data)[@intFromEnum(inst)].prefetch;
+    return cg.finishAir(inst, .none, &.{prefetch.ptr});
+}
+
+fn airWasmMemorySize(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
+
+    try cg.addLabel(.memory_size, pl_op.payload);
+    return cg.finishAir(inst, .stack, &.{pl_op.operand});
+}
+
+fn airWasmMemoryGrow(cg: *CodeGen, inst: Air.Inst.Index) !void {
+    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
+
+    const operand = try cg.resolveInst(pl_op.operand);
+    try cg.emitWValue(operand);
+    try cg.addLabel(.memory_grow, pl_op.payload);
+    return cg.finishAir(inst, .stack, &.{pl_op.operand});
+}
+
+fn cmpOptionals(cg: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std.math.CompareOperator) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    assert(operand_ty.hasRuntimeBitsIgnoreComptime(zcu));
+    assert(op == .eq or op == .neq);
+    const payload_ty = operand_ty.optionalChild(zcu);
+    assert(!isByRef(payload_ty, zcu, cg.target));
+
+    var result = try cg.allocLocal(Type.i32);
+    defer result.free(cg);
+
+    var lhs_null = try cg.allocLocal(Type.i32);
+    defer lhs_null.free(cg);
+
+    try cg.startBlock(.block, .empty);
+
+    try cg.addImm32(if (op == .eq) 0 else 1);
+    try cg.addLocal(.local_set, result.local.value);
+
+    _ = try cg.isNull(lhs, operand_ty, .i32_eq);
+    try cg.addLocal(.local_tee, lhs_null.local.value);
+    _ = try cg.isNull(rhs, operand_ty, .i32_eq);
+    try cg.addTag(.i32_ne);
+    try cg.addLabel(.br_if, 0); // only one is null
+
+    try cg.addImm32(if (op == .eq) 1 else 0);
+    try cg.addLocal(.local_set, result.local.value);
+
+    try cg.addLocal(.local_get, lhs_null.local.value);
+    try cg.addLabel(.br_if, 0); // both are null
+
+    _ = try cg.load(lhs, payload_ty, 0);
+    _ = try cg.load(rhs, payload_ty, 0);
+    _ = try cg.cmp(.stack, .stack, payload_ty, op);
+    try cg.addLocal(.local_set, result.local.value);
+
+    try cg.endBlock();
+
+    try cg.addLocal(.local_get, result.local.value);
+
+    return .stack;
+}
+
+/// Compares big integers by checking both its high bits and low bits.
+/// NOTE: Leaves the result of the comparison on top of the stack.
+/// TODO: Lower this to compiler_rt call when bitsize > 128
+fn cmpBigInt(cg: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std.math.CompareOperator) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    assert(operand_ty.abiSize(zcu) >= 16);
+    assert(!(lhs != .stack and rhs == .stack));
+    if (operand_ty.bitSize(zcu) > 128) {
+        return cg.fail("TODO: Support cmpBigInt for integer bitsize: '{d}'", .{operand_ty.bitSize(zcu)});
+    }
+
+    var lhs_msb = try (try cg.load(lhs, Type.u64, 8)).toLocal(cg, Type.u64);
+    defer lhs_msb.free(cg);
+    var rhs_msb = try (try cg.load(rhs, Type.u64, 8)).toLocal(cg, Type.u64);
+    defer rhs_msb.free(cg);
+
+    switch (op) {
+        .eq, .neq => {
+            const xor_high = try cg.binOp(lhs_msb, rhs_msb, Type.u64, .xor);
+            const lhs_lsb = try cg.load(lhs, Type.u64, 0);
+            const rhs_lsb = try cg.load(rhs, Type.u64, 0);
+            const xor_low = try cg.binOp(lhs_lsb, rhs_lsb, Type.u64, .xor);
+            const or_result = try cg.binOp(xor_high, xor_low, Type.u64, .@"or");
+
+            switch (op) {
+                .eq => return cg.cmp(or_result, .{ .imm64 = 0 }, Type.u64, .eq),
+                .neq => return cg.cmp(or_result, .{ .imm64 = 0 }, Type.u64, .neq),
+                else => unreachable,
+            }
+        },
+        else => {
+            const ty = if (operand_ty.isSignedInt(zcu)) Type.i64 else Type.u64;
+            // leave those value on top of the stack for '.select'
+            const lhs_lsb = try cg.load(lhs, Type.u64, 0);
+            const rhs_lsb = try cg.load(rhs, Type.u64, 0);
+            _ = try cg.cmp(lhs_lsb, rhs_lsb, Type.u64, op);
+            _ = try cg.cmp(lhs_msb, rhs_msb, ty, op);
+            _ = try cg.cmp(lhs_msb, rhs_msb, ty, .eq);
+            try cg.addTag(.select);
+        },
+    }
+
+    return .stack;
+}
+
+fn airSetUnionTag(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+    const un_ty = cg.typeOf(bin_op.lhs).childType(zcu);
+    const tag_ty = cg.typeOf(bin_op.rhs);
+    const layout = un_ty.unionGetLayout(zcu);
+    if (layout.tag_size == 0) return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
+
+    const union_ptr = try cg.resolveInst(bin_op.lhs);
+    const new_tag = try cg.resolveInst(bin_op.rhs);
+    if (layout.payload_size == 0) {
+        try cg.store(union_ptr, new_tag, tag_ty, 0);
+        return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
+    }
+
+    // when the tag alignment is smaller than the payload, the field will be stored
+    // after the payload.
+    const offset: u32 = if (layout.tag_align.compare(.lt, layout.payload_align)) blk: {
+        break :blk @intCast(layout.payload_size);
+    } else 0;
+    try cg.store(union_ptr, new_tag, tag_ty, offset);
+    return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airGetUnionTag(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const un_ty = cg.typeOf(ty_op.operand);
+    const tag_ty = cg.typeOfIndex(inst);
+    const layout = un_ty.unionGetLayout(zcu);
+    if (layout.tag_size == 0) return cg.finishAir(inst, .none, &.{ty_op.operand});
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    // when the tag alignment is smaller than the payload, the field will be stored
+    // after the payload.
+    const offset: u32 = if (layout.tag_align.compare(.lt, layout.payload_align))
+        @intCast(layout.payload_size)
+    else
+        0;
+    const result = try cg.load(operand, tag_ty, offset);
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airFpext(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const dest_ty = cg.typeOfIndex(inst);
+    const operand = try cg.resolveInst(ty_op.operand);
+    const result = try cg.fpext(operand, cg.typeOf(ty_op.operand), dest_ty);
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+/// Extends a float from a given `Type` to a larger wanted `Type`, leaving the
+/// result on the stack.
+fn fpext(cg: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError!WValue {
+    const given_bits = given.floatBits(cg.target);
+    const wanted_bits = wanted.floatBits(cg.target);
+
+    const intrinsic: Mir.Intrinsic = switch (given_bits) {
+        16 => switch (wanted_bits) {
+            32 => {
+                assert(.stack == try cg.callIntrinsic(.__extendhfsf2, &.{.f16_type}, Type.f32, &.{operand}));
+                return .stack;
+            },
+            64 => {
+                assert(.stack == try cg.callIntrinsic(.__extendhfsf2, &.{.f16_type}, Type.f32, &.{operand}));
+                try cg.addTag(.f64_promote_f32);
+                return .stack;
+            },
+            80 => .__extendhfxf2,
+            128 => .__extendhftf2,
+            else => unreachable,
+        },
+        32 => switch (wanted_bits) {
+            64 => {
+                try cg.emitWValue(operand);
+                try cg.addTag(.f64_promote_f32);
+                return .stack;
+            },
+            80 => .__extendsfxf2,
+            128 => .__extendsftf2,
+            else => unreachable,
+        },
+        64 => switch (wanted_bits) {
+            80 => .__extenddfxf2,
+            128 => .__extenddftf2,
+            else => unreachable,
+        },
+        80 => switch (wanted_bits) {
+            128 => .__extendxftf2,
+            else => unreachable,
+        },
+        else => unreachable,
+    };
+    return cg.callIntrinsic(intrinsic, &.{given.ip_index}, wanted, &.{operand});
+}
+
+fn airFptrunc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const dest_ty = cg.typeOfIndex(inst);
+    const operand = try cg.resolveInst(ty_op.operand);
+    const result = try cg.fptrunc(operand, cg.typeOf(ty_op.operand), dest_ty);
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+/// Truncates a float from a given `Type` to its wanted `Type`, leaving the
+/// result on the stack.
+fn fptrunc(cg: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError!WValue {
+    const given_bits = given.floatBits(cg.target);
+    const wanted_bits = wanted.floatBits(cg.target);
+
+    const intrinsic: Mir.Intrinsic = switch (given_bits) {
+        32 => switch (wanted_bits) {
+            16 => {
+                return cg.callIntrinsic(.__truncsfhf2, &.{.f32_type}, Type.f16, &.{operand});
+            },
+            else => unreachable,
+        },
+        64 => switch (wanted_bits) {
+            16 => {
+                try cg.emitWValue(operand);
+                try cg.addTag(.f32_demote_f64);
+                return cg.callIntrinsic(.__truncsfhf2, &.{.f32_type}, Type.f16, &.{.stack});
+            },
+            32 => {
+                try cg.emitWValue(operand);
+                try cg.addTag(.f32_demote_f64);
+                return .stack;
+            },
+            else => unreachable,
+        },
+        80 => switch (wanted_bits) {
+            16 => .__truncxfhf2,
+            32 => .__truncxfsf2,
+            64 => .__truncxfdf2,
+            else => unreachable,
+        },
+        128 => switch (wanted_bits) {
+            16 => .__trunctfhf2,
+            32 => .__trunctfsf2,
+            64 => .__trunctfdf2,
+            80 => .__trunctfxf2,
+            else => unreachable,
+        },
+        else => unreachable,
+    };
+    return cg.callIntrinsic(intrinsic, &.{given.ip_index}, wanted, &.{operand});
+}
+
+fn airErrUnionPayloadPtrSet(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const err_set_ty = cg.typeOf(ty_op.operand).childType(zcu);
+    const payload_ty = err_set_ty.errorUnionPayload(zcu);
+    const operand = try cg.resolveInst(ty_op.operand);
+
+    // set error-tag to '0' to annotate error union is non-error
+    try cg.store(
+        operand,
+        .{ .imm32 = 0 },
+        Type.anyerror,
+        @intCast(errUnionErrorOffset(payload_ty, zcu)),
+    );
+
+    const result = result: {
+        if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) {
+            break :result cg.reuseOperand(ty_op.operand, operand);
+        }
+
+        break :result try cg.buildPointerOffset(operand, @as(u32, @intCast(errUnionPayloadOffset(payload_ty, zcu))), .new);
+    };
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airFieldParentPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const extra = cg.air.extraData(Air.FieldParentPtr, ty_pl.payload).data;
+
+    const field_ptr = try cg.resolveInst(extra.field_ptr);
+    const parent_ptr_ty = cg.typeOfIndex(inst);
+    const parent_ty = parent_ptr_ty.childType(zcu);
+    const field_ptr_ty = cg.typeOf(extra.field_ptr);
+    const field_index = extra.field_index;
+    const field_offset = switch (parent_ty.containerLayout(zcu)) {
+        .auto, .@"extern" => parent_ty.structFieldOffset(field_index, zcu),
+        .@"packed" => offset: {
+            const parent_ptr_offset = parent_ptr_ty.ptrInfo(zcu).packed_offset.bit_offset;
+            const field_offset = if (zcu.typeToStruct(parent_ty)) |loaded_struct| zcu.structPackedFieldBitOffset(loaded_struct, field_index) else 0;
+            const field_ptr_offset = field_ptr_ty.ptrInfo(zcu).packed_offset.bit_offset;
+            break :offset @divExact(parent_ptr_offset + field_offset - field_ptr_offset, 8);
+        },
+    };
+
+    const result = if (field_offset != 0) result: {
+        const base = try cg.buildPointerOffset(field_ptr, 0, .new);
+        try cg.addLocal(.local_get, base.local.value);
+        try cg.addImm32(@intCast(field_offset));
+        try cg.addTag(.i32_sub);
+        try cg.addLocal(.local_set, base.local.value);
+        break :result base;
+    } else cg.reuseOperand(extra.field_ptr, field_ptr);
+
+    return cg.finishAir(inst, result, &.{extra.field_ptr});
+}
+
+fn sliceOrArrayPtr(cg: *CodeGen, ptr: WValue, ptr_ty: Type) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    if (ptr_ty.isSlice(zcu)) {
+        return cg.slicePtr(ptr);
+    } else {
+        return ptr;
+    }
+}
+
+fn airMemcpy(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+    const dst = try cg.resolveInst(bin_op.lhs);
+    const dst_ty = cg.typeOf(bin_op.lhs);
+    const ptr_elem_ty = dst_ty.childType(zcu);
+    const src = try cg.resolveInst(bin_op.rhs);
+    const src_ty = cg.typeOf(bin_op.rhs);
+    const len = switch (dst_ty.ptrSize(zcu)) {
+        .slice => blk: {
+            const slice_len = try cg.sliceLen(dst);
+            if (ptr_elem_ty.abiSize(zcu) != 1) {
+                try cg.emitWValue(slice_len);
+                try cg.emitWValue(.{ .imm32 = @as(u32, @intCast(ptr_elem_ty.abiSize(zcu))) });
+                try cg.addTag(.i32_mul);
+                try cg.addLocal(.local_set, slice_len.local.value);
+            }
+            break :blk slice_len;
+        },
+        .one => @as(WValue, .{
+            .imm32 = @as(u32, @intCast(ptr_elem_ty.arrayLen(zcu) * ptr_elem_ty.childType(zcu).abiSize(zcu))),
+        }),
+        .c, .many => unreachable,
+    };
+    const dst_ptr = try cg.sliceOrArrayPtr(dst, dst_ty);
+    const src_ptr = try cg.sliceOrArrayPtr(src, src_ty);
+    try cg.memcpy(dst_ptr, src_ptr, len);
+
+    return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airRetAddr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    // TODO: Implement this properly once stack serialization is solved
+    return cg.finishAir(inst, switch (cg.ptr_size) {
+        .wasm32 => .{ .imm32 = 0 },
+        .wasm64 => .{ .imm64 = 0 },
+    }, &.{});
+}
+
+fn airPopcount(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const op_ty = cg.typeOf(ty_op.operand);
+
+    if (op_ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("TODO: Implement @popCount for vectors", .{});
+    }
+
+    const int_info = op_ty.intInfo(zcu);
+    const bits = int_info.bits;
+    const wasm_bits = toWasmBits(bits) orelse {
+        return cg.fail("TODO: Implement @popCount for integers with bitsize '{d}'", .{bits});
+    };
+
+    switch (wasm_bits) {
+        32 => {
+            try cg.emitWValue(operand);
+            if (op_ty.isSignedInt(zcu) and bits != wasm_bits) {
+                _ = try cg.wrapOperand(.stack, try pt.intType(.unsigned, bits));
+            }
+            try cg.addTag(.i32_popcnt);
+        },
+        64 => {
+            try cg.emitWValue(operand);
+            if (op_ty.isSignedInt(zcu) and bits != wasm_bits) {
+                _ = try cg.wrapOperand(.stack, try pt.intType(.unsigned, bits));
+            }
+            try cg.addTag(.i64_popcnt);
+            try cg.addTag(.i32_wrap_i64);
+            try cg.emitWValue(operand);
+        },
+        128 => {
+            _ = try cg.load(operand, Type.u64, 0);
+            try cg.addTag(.i64_popcnt);
+            _ = try cg.load(operand, Type.u64, 8);
+            if (op_ty.isSignedInt(zcu) and bits != wasm_bits) {
+                _ = try cg.wrapOperand(.stack, try pt.intType(.unsigned, bits - 64));
+            }
+            try cg.addTag(.i64_popcnt);
+            try cg.addTag(.i64_add);
+            try cg.addTag(.i32_wrap_i64);
+        },
+        else => unreachable,
+    }
+
+    return cg.finishAir(inst, .stack, &.{ty_op.operand});
+}
+
+fn airBitReverse(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const ty = cg.typeOf(ty_op.operand);
+
+    if (ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("TODO: Implement @bitReverse for vectors", .{});
+    }
+
+    const int_info = ty.intInfo(zcu);
+    const bits = int_info.bits;
+    const wasm_bits = toWasmBits(bits) orelse {
+        return cg.fail("TODO: Implement @bitReverse for integers with bitsize '{d}'", .{bits});
+    };
+
+    switch (wasm_bits) {
+        32 => {
+            const intrin_ret = try cg.callIntrinsic(
+                .__bitreversesi2,
+                &.{.u32_type},
+                Type.u32,
+                &.{operand},
+            );
+            const result = if (bits == 32)
+                intrin_ret
+            else
+                try cg.binOp(intrin_ret, .{ .imm32 = 32 - bits }, ty, .shr);
+            return cg.finishAir(inst, result, &.{ty_op.operand});
+        },
+        64 => {
+            const intrin_ret = try cg.callIntrinsic(
+                .__bitreversedi2,
+                &.{.u64_type},
+                Type.u64,
+                &.{operand},
+            );
+            const result = if (bits == 64)
+                intrin_ret
+            else
+                try cg.binOp(intrin_ret, .{ .imm64 = 64 - bits }, ty, .shr);
+            return cg.finishAir(inst, result, &.{ty_op.operand});
+        },
+        128 => {
+            const result = try cg.allocStack(ty);
+
+            try cg.emitWValue(result);
+            const first_half = try cg.load(operand, Type.u64, 8);
+            const intrin_ret_first = try cg.callIntrinsic(
+                .__bitreversedi2,
+                &.{.u64_type},
+                Type.u64,
+                &.{first_half},
+            );
+            try cg.emitWValue(intrin_ret_first);
+            if (bits < 128) {
+                try cg.emitWValue(.{ .imm64 = 128 - bits });
+                try cg.addTag(.i64_shr_u);
+            }
+            try cg.emitWValue(result);
+            const second_half = try cg.load(operand, Type.u64, 0);
+            const intrin_ret_second = try cg.callIntrinsic(
+                .__bitreversedi2,
+                &.{.u64_type},
+                Type.u64,
+                &.{second_half},
+            );
+            try cg.emitWValue(intrin_ret_second);
+            if (bits == 128) {
+                try cg.store(.stack, .stack, Type.u64, result.offset() + 8);
+                try cg.store(.stack, .stack, Type.u64, result.offset());
+            } else {
+                var tmp = try cg.allocLocal(Type.u64);
+                defer tmp.free(cg);
+                try cg.addLocal(.local_tee, tmp.local.value);
+                try cg.emitWValue(.{ .imm64 = 128 - bits });
+                if (ty.isSignedInt(zcu)) {
+                    try cg.addTag(.i64_shr_s);
+                } else {
+                    try cg.addTag(.i64_shr_u);
+                }
+                try cg.store(.stack, .stack, Type.u64, result.offset() + 8);
+                try cg.addLocal(.local_get, tmp.local.value);
+                try cg.emitWValue(.{ .imm64 = bits - 64 });
+                try cg.addTag(.i64_shl);
+                try cg.addTag(.i64_or);
+                try cg.store(.stack, .stack, Type.u64, result.offset());
+            }
+            return cg.finishAir(inst, result, &.{ty_op.operand});
+        },
+        else => unreachable,
+    }
+}
+
+fn airErrorName(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
+    const operand = try cg.resolveInst(un_op);
+    // Each entry to this table is a slice (ptr+len).
+    // The operand in this instruction represents the index within this table.
+    // This means to get the final name, we emit the base pointer and then perform
+    // pointer arithmetic to find the pointer to this slice and return that.
+    //
+    // As the names are global and the slice elements are constant, we do not have
+    // to make a copy of the ptr+value but can point towards them directly.
+    const pt = cg.pt;
+    const name_ty = Type.slice_const_u8_sentinel_0;
+    const abi_size = name_ty.abiSize(pt.zcu);
+
+    // Lowers to a i32.const or i64.const with the error table memory address.
+    cg.error_name_table_ref_count += 1;
+    try cg.addTag(.error_name_table_ref);
+    try cg.emitWValue(operand);
+    switch (cg.ptr_size) {
+        .wasm32 => {
+            try cg.addImm32(@intCast(abi_size));
+            try cg.addTag(.i32_mul);
+            try cg.addTag(.i32_add);
+        },
+        .wasm64 => {
+            try cg.addImm64(abi_size);
+            try cg.addTag(.i64_mul);
+            try cg.addTag(.i64_add);
+        },
+    }
+
+    return cg.finishAir(inst, .stack, &.{un_op});
+}
+
+fn airPtrSliceFieldPtr(cg: *CodeGen, inst: Air.Inst.Index, offset: u32) InnerError!void {
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+    const slice_ptr = try cg.resolveInst(ty_op.operand);
+    const result = try cg.buildPointerOffset(slice_ptr, offset, .new);
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+/// NOTE: Allocates place for result on virtual stack, when integer size > 64 bits
+fn intZeroValue(cg: *CodeGen, ty: Type) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+    const int_info = ty.intInfo(zcu);
+    const wasm_bits = toWasmBits(int_info.bits) orelse {
+        return cg.fail("TODO: Implement intZeroValue for integer bitsize: {d}", .{int_info.bits});
+    };
+    switch (wasm_bits) {
+        32 => return .{ .imm32 = 0 },
+        64 => return .{ .imm64 = 0 },
+        128 => {
+            const result = try cg.allocStack(ty);
+            try cg.store(result, .{ .imm64 = 0 }, Type.u64, 0);
+            try cg.store(result, .{ .imm64 = 0 }, Type.u64, 8);
+            return result;
+        },
+        else => unreachable,
+    }
+}
+
+fn airAddSubWithOverflow(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
+    assert(op == .add or op == .sub);
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const extra = cg.air.extraData(Air.Bin, ty_pl.payload).data;
+
+    const lhs = try cg.resolveInst(extra.lhs);
+    const rhs = try cg.resolveInst(extra.rhs);
+    const ty = cg.typeOf(extra.lhs);
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+
+    if (ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("TODO: Implement overflow arithmetic for vectors", .{});
+    }
+
+    const int_info = ty.intInfo(zcu);
+    const is_signed = int_info.signedness == .signed;
+    if (int_info.bits > 128) {
+        return cg.fail("TODO: Implement {{add/sub}}_with_overflow for integer bitsize: {d}", .{int_info.bits});
+    }
+
+    const op_result = try cg.wrapBinOp(lhs, rhs, ty, op);
+    var op_tmp = try op_result.toLocal(cg, ty);
+    defer op_tmp.free(cg);
+
+    const cmp_op: std.math.CompareOperator = switch (op) {
+        .add => .lt,
+        .sub => .gt,
+        else => unreachable,
+    };
+    const overflow_bit = if (is_signed) blk: {
+        const zero = try intZeroValue(cg, ty);
+        const rhs_is_neg = try cg.cmp(rhs, zero, ty, .lt);
+        const overflow_cmp = try cg.cmp(op_tmp, lhs, ty, cmp_op);
+        break :blk try cg.cmp(rhs_is_neg, overflow_cmp, Type.u1, .neq);
+    } else try cg.cmp(op_tmp, lhs, ty, cmp_op);
+    var bit_tmp = try overflow_bit.toLocal(cg, Type.u1);
+    defer bit_tmp.free(cg);
+
+    const result = try cg.allocStack(cg.typeOfIndex(inst));
+    const offset: u32 = @intCast(ty.abiSize(zcu));
+    try cg.store(result, op_tmp, ty, 0);
+    try cg.store(result, bit_tmp, Type.u1, offset);
+
+    return cg.finishAir(inst, result, &.{ extra.lhs, extra.rhs });
+}
+
+fn airShlWithOverflow(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const extra = cg.air.extraData(Air.Bin, ty_pl.payload).data;
+
+    const lhs = try cg.resolveInst(extra.lhs);
+    const rhs = try cg.resolveInst(extra.rhs);
+    const ty = cg.typeOf(extra.lhs);
+    const rhs_ty = cg.typeOf(extra.rhs);
+
+    if (ty.isVector(zcu)) {
+        if (!rhs_ty.isVector(zcu)) {
+            return cg.fail("TODO: implement vector 'shl_with_overflow' with scalar rhs", .{});
+        } else {
+            return cg.fail("TODO: implement vector 'shl_with_overflow'", .{});
+        }
+    }
+
+    const int_info = ty.intInfo(zcu);
+    const wasm_bits = toWasmBits(int_info.bits) orelse {
+        return cg.fail("TODO: implement 'shl_with_overflow' for integer bitsize: {d}", .{int_info.bits});
+    };
+
+    // Ensure rhs is coerced to lhs as they must have the same WebAssembly types
+    // before we can perform any binary operation.
+    const rhs_wasm_bits = toWasmBits(rhs_ty.intInfo(zcu).bits).?;
+    // If wasm_bits == 128, compiler-rt expects i32 for shift
+    const rhs_final = if (wasm_bits != rhs_wasm_bits and wasm_bits == 64) blk: {
+        const rhs_casted = try cg.intcast(rhs, rhs_ty, ty);
+        break :blk try rhs_casted.toLocal(cg, ty);
+    } else rhs;
+
+    var shl = try (try cg.wrapBinOp(lhs, rhs_final, ty, .shl)).toLocal(cg, ty);
+    defer shl.free(cg);
+
+    const overflow_bit = blk: {
+        const shr = try cg.binOp(shl, rhs_final, ty, .shr);
+        break :blk try cg.cmp(shr, lhs, ty, .neq);
+    };
+    var overflow_local = try overflow_bit.toLocal(cg, Type.u1);
+    defer overflow_local.free(cg);
+
+    const result = try cg.allocStack(cg.typeOfIndex(inst));
+    const offset: u32 = @intCast(ty.abiSize(zcu));
+    try cg.store(result, shl, ty, 0);
+    try cg.store(result, overflow_local, Type.u1, offset);
+
+    return cg.finishAir(inst, result, &.{ extra.lhs, extra.rhs });
+}
+
+fn airMulWithOverflow(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const extra = cg.air.extraData(Air.Bin, ty_pl.payload).data;
+
+    const lhs = try cg.resolveInst(extra.lhs);
+    const rhs = try cg.resolveInst(extra.rhs);
+    const ty = cg.typeOf(extra.lhs);
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+
+    if (ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("TODO: Implement overflow arithmetic for vectors", .{});
+    }
+
+    // We store the bit if it's overflowed or not in this. As it's zero-initialized
+    // we only need to update it if an overflow (or underflow) occurred.
+    var overflow_bit = try cg.ensureAllocLocal(Type.u1);
+    defer overflow_bit.free(cg);
+
+    const int_info = ty.intInfo(zcu);
+    const wasm_bits = toWasmBits(int_info.bits) orelse {
+        return cg.fail("TODO: Implement `@mulWithOverflow` for integer bitsize: {d}", .{int_info.bits});
+    };
+
+    const zero: WValue = switch (wasm_bits) {
+        32 => .{ .imm32 = 0 },
+        64, 128 => .{ .imm64 = 0 },
+        else => unreachable,
+    };
+
+    // for 32 bit integers we upcast it to a 64bit integer
+    const mul = if (wasm_bits == 32) blk: {
+        const new_ty = if (int_info.signedness == .signed) Type.i64 else Type.u64;
+        const lhs_upcast = try cg.intcast(lhs, ty, new_ty);
+        const rhs_upcast = try cg.intcast(rhs, ty, new_ty);
+        const bin_op = try (try cg.binOp(lhs_upcast, rhs_upcast, new_ty, .mul)).toLocal(cg, new_ty);
+        const res = try (try cg.trunc(bin_op, ty, new_ty)).toLocal(cg, ty);
+        const res_upcast = try cg.intcast(res, ty, new_ty);
+        _ = try cg.cmp(res_upcast, bin_op, new_ty, .neq);
+        try cg.addLocal(.local_set, overflow_bit.local.value);
+        break :blk res;
+    } else if (wasm_bits == 64) blk: {
+        const new_ty = if (int_info.signedness == .signed) Type.i128 else Type.u128;
+        const lhs_upcast = try cg.intcast(lhs, ty, new_ty);
+        const rhs_upcast = try cg.intcast(rhs, ty, new_ty);
+        const bin_op = try (try cg.binOp(lhs_upcast, rhs_upcast, new_ty, .mul)).toLocal(cg, new_ty);
+        const res = try (try cg.trunc(bin_op, ty, new_ty)).toLocal(cg, ty);
+        const res_upcast = try cg.intcast(res, ty, new_ty);
+        _ = try cg.cmp(res_upcast, bin_op, new_ty, .neq);
+        try cg.addLocal(.local_set, overflow_bit.local.value);
+        break :blk res;
+    } else if (int_info.bits == 128 and int_info.signedness == .unsigned) blk: {
+        var lhs_lsb = try (try cg.load(lhs, Type.u64, 0)).toLocal(cg, Type.u64);
+        defer lhs_lsb.free(cg);
+        var lhs_msb = try (try cg.load(lhs, Type.u64, 8)).toLocal(cg, Type.u64);
+        defer lhs_msb.free(cg);
+        var rhs_lsb = try (try cg.load(rhs, Type.u64, 0)).toLocal(cg, Type.u64);
+        defer rhs_lsb.free(cg);
+        var rhs_msb = try (try cg.load(rhs, Type.u64, 8)).toLocal(cg, Type.u64);
+        defer rhs_msb.free(cg);
+
+        const cross_1 = try cg.callIntrinsic(
+            .__multi3,
+            &[_]InternPool.Index{.i64_type} ** 4,
+            Type.i128,
+            &.{ lhs_msb, zero, rhs_lsb, zero },
+        );
+        const cross_2 = try cg.callIntrinsic(
+            .__multi3,
+            &[_]InternPool.Index{.i64_type} ** 4,
+            Type.i128,
+            &.{ rhs_msb, zero, lhs_lsb, zero },
+        );
+        const mul_lsb = try cg.callIntrinsic(
+            .__multi3,
+            &[_]InternPool.Index{.i64_type} ** 4,
+            Type.i128,
+            &.{ rhs_lsb, zero, lhs_lsb, zero },
+        );
+
+        const rhs_msb_not_zero = try cg.cmp(rhs_msb, zero, Type.u64, .neq);
+        const lhs_msb_not_zero = try cg.cmp(lhs_msb, zero, Type.u64, .neq);
+        const both_msb_not_zero = try cg.binOp(rhs_msb_not_zero, lhs_msb_not_zero, Type.bool, .@"and");
+        const cross_1_msb = try cg.load(cross_1, Type.u64, 8);
+        const cross_1_msb_not_zero = try cg.cmp(cross_1_msb, zero, Type.u64, .neq);
+        const cond_1 = try cg.binOp(both_msb_not_zero, cross_1_msb_not_zero, Type.bool, .@"or");
+        const cross_2_msb = try cg.load(cross_2, Type.u64, 8);
+        const cross_2_msb_not_zero = try cg.cmp(cross_2_msb, zero, Type.u64, .neq);
+        const cond_2 = try cg.binOp(cond_1, cross_2_msb_not_zero, Type.bool, .@"or");
+
+        const cross_1_lsb = try cg.load(cross_1, Type.u64, 0);
+        const cross_2_lsb = try cg.load(cross_2, Type.u64, 0);
+        const cross_add = try cg.binOp(cross_1_lsb, cross_2_lsb, Type.u64, .add);
+
+        var mul_lsb_msb = try (try cg.load(mul_lsb, Type.u64, 8)).toLocal(cg, Type.u64);
+        defer mul_lsb_msb.free(cg);
+        var all_add = try (try cg.binOp(cross_add, mul_lsb_msb, Type.u64, .add)).toLocal(cg, Type.u64);
+        defer all_add.free(cg);
+        const add_overflow = try cg.cmp(all_add, mul_lsb_msb, Type.u64, .lt);
+
+        // result for overflow bit
+        _ = try cg.binOp(cond_2, add_overflow, Type.bool, .@"or");
+        try cg.addLocal(.local_set, overflow_bit.local.value);
+
+        const tmp_result = try cg.allocStack(Type.u128);
+        try cg.emitWValue(tmp_result);
+        const mul_lsb_lsb = try cg.load(mul_lsb, Type.u64, 0);
+        try cg.store(.stack, mul_lsb_lsb, Type.u64, tmp_result.offset());
+        try cg.store(tmp_result, all_add, Type.u64, 8);
+        break :blk tmp_result;
+    } else if (int_info.bits == 128 and int_info.signedness == .signed) blk: {
+        const overflow_ret = try cg.allocStack(Type.i32);
+        const res = try cg.callIntrinsic(
+            .__muloti4,
+            &[_]InternPool.Index{ .i128_type, .i128_type, .usize_type },
+            Type.i128,
+            &.{ lhs, rhs, overflow_ret },
+        );
+        _ = try cg.load(overflow_ret, Type.i32, 0);
+        try cg.addLocal(.local_set, overflow_bit.local.value);
+        break :blk res;
+    } else return cg.fail("TODO: @mulWithOverflow for {f}", .{ty.fmt(pt)});
+    var bin_op_local = try mul.toLocal(cg, ty);
+    defer bin_op_local.free(cg);
+
+    const result = try cg.allocStack(cg.typeOfIndex(inst));
+    const offset: u32 = @intCast(ty.abiSize(zcu));
+    try cg.store(result, bin_op_local, ty, 0);
+    try cg.store(result, overflow_bit, Type.u1, offset);
+
+    return cg.finishAir(inst, result, &.{ extra.lhs, extra.rhs });
+}
+
+fn airMaxMin(
+    cg: *CodeGen,
+    inst: Air.Inst.Index,
+    op: enum { fmax, fmin },
+    cmp_op: std.math.CompareOperator,
+) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const ty = cg.typeOfIndex(inst);
+    if (ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("TODO: `@maximum` and `@minimum` for vectors", .{});
+    }
+
+    if (ty.abiSize(zcu) > 16) {
+        return cg.fail("TODO: `@maximum` and `@minimum` for types larger than 16 bytes", .{});
+    }
+
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+
+    if (ty.zigTypeTag(zcu) == .float) {
+        const intrinsic = switch (op) {
+            inline .fmin, .fmax => |ct_op| switch (ty.floatBits(cg.target)) {
+                inline 16, 32, 64, 80, 128 => |bits| @field(
+                    Mir.Intrinsic,
+                    libcFloatPrefix(bits) ++ @tagName(ct_op) ++ libcFloatSuffix(bits),
+                ),
+                else => unreachable,
+            },
+        };
+        const result = try cg.callIntrinsic(intrinsic, &.{ ty.ip_index, ty.ip_index }, ty, &.{ lhs, rhs });
+        try cg.lowerToStack(result);
+    } else {
+        // operands to select from
+        try cg.lowerToStack(lhs);
+        try cg.lowerToStack(rhs);
+        _ = try cg.cmp(lhs, rhs, ty, cmp_op);
+
+        // based on the result from comparison, return operand 0 or 1.
+        try cg.addTag(.select);
+    }
+
+    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airMulAdd(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
+    const bin_op = cg.air.extraData(Air.Bin, pl_op.payload).data;
+
+    const ty = cg.typeOfIndex(inst);
+    if (ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("TODO: `@mulAdd` for vectors", .{});
+    }
+
+    const addend = try cg.resolveInst(pl_op.operand);
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+
+    const result = if (ty.floatBits(cg.target) == 16) fl_result: {
+        const rhs_ext = try cg.fpext(rhs, ty, Type.f32);
+        const lhs_ext = try cg.fpext(lhs, ty, Type.f32);
+        const addend_ext = try cg.fpext(addend, ty, Type.f32);
+        // call to compiler-rt `fn fmaf(f32, f32, f32) f32`
+        const result = try cg.callIntrinsic(
+            .fmaf,
+            &.{ .f32_type, .f32_type, .f32_type },
+            Type.f32,
+            &.{ rhs_ext, lhs_ext, addend_ext },
+        );
+        break :fl_result try cg.fptrunc(result, Type.f32, ty);
+    } else result: {
+        const mul_result = try cg.binOp(lhs, rhs, ty, .mul);
+        break :result try cg.binOp(mul_result, addend, ty, .add);
+    };
+
+    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs, pl_op.operand });
+}
+
+fn airClz(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const ty = cg.typeOf(ty_op.operand);
+    if (ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("TODO: `@clz` for vectors", .{});
+    }
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const int_info = ty.intInfo(zcu);
+    const wasm_bits = toWasmBits(int_info.bits) orelse {
+        return cg.fail("TODO: `@clz` for integers with bitsize '{d}'", .{int_info.bits});
+    };
+
+    switch (wasm_bits) {
+        32 => {
+            if (int_info.signedness == .signed) {
+                const mask = ~@as(u32, 0) >> @intCast(32 - int_info.bits);
+                _ = try cg.binOp(operand, .{ .imm32 = mask }, ty, .@"and");
+            } else {
+                try cg.emitWValue(operand);
+            }
+            try cg.addTag(.i32_clz);
+        },
+        64 => {
+            if (int_info.signedness == .signed) {
+                const mask = ~@as(u64, 0) >> @intCast(64 - int_info.bits);
+                _ = try cg.binOp(operand, .{ .imm64 = mask }, ty, .@"and");
+            } else {
+                try cg.emitWValue(operand);
+            }
+            try cg.addTag(.i64_clz);
+            try cg.addTag(.i32_wrap_i64);
+        },
+        128 => {
+            var msb = try (try cg.load(operand, Type.u64, 8)).toLocal(cg, Type.u64);
+            defer msb.free(cg);
+
+            try cg.emitWValue(msb);
+            try cg.addTag(.i64_clz);
+            _ = try cg.load(operand, Type.u64, 0);
+            try cg.addTag(.i64_clz);
+            try cg.emitWValue(.{ .imm64 = 64 });
+            try cg.addTag(.i64_add);
+            _ = try cg.cmp(msb, .{ .imm64 = 0 }, Type.u64, .neq);
+            try cg.addTag(.select);
+            try cg.addTag(.i32_wrap_i64);
+        },
+        else => unreachable,
+    }
+
+    if (wasm_bits != int_info.bits) {
+        try cg.emitWValue(.{ .imm32 = wasm_bits - int_info.bits });
+        try cg.addTag(.i32_sub);
+    }
+
+    return cg.finishAir(inst, .stack, &.{ty_op.operand});
+}
+
+fn airCtz(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const ty = cg.typeOf(ty_op.operand);
+
+    if (ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("TODO: `@ctz` for vectors", .{});
+    }
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const int_info = ty.intInfo(zcu);
+    const wasm_bits = toWasmBits(int_info.bits) orelse {
+        return cg.fail("TODO: `@clz` for integers with bitsize '{d}'", .{int_info.bits});
+    };
+
+    switch (wasm_bits) {
+        32 => {
+            if (wasm_bits != int_info.bits) {
+                const val: u32 = @as(u32, 1) << @as(u5, @intCast(int_info.bits));
+                // leave value on the stack
+                _ = try cg.binOp(operand, .{ .imm32 = val }, ty, .@"or");
+            } else try cg.emitWValue(operand);
+            try cg.addTag(.i32_ctz);
+        },
+        64 => {
+            if (wasm_bits != int_info.bits) {
+                const val: u64 = @as(u64, 1) << @as(u6, @intCast(int_info.bits));
+                // leave value on the stack
+                _ = try cg.binOp(operand, .{ .imm64 = val }, ty, .@"or");
+            } else try cg.emitWValue(operand);
+            try cg.addTag(.i64_ctz);
+            try cg.addTag(.i32_wrap_i64);
+        },
+        128 => {
+            var lsb = try (try cg.load(operand, Type.u64, 0)).toLocal(cg, Type.u64);
+            defer lsb.free(cg);
+
+            try cg.emitWValue(lsb);
+            try cg.addTag(.i64_ctz);
+            _ = try cg.load(operand, Type.u64, 8);
+            if (wasm_bits != int_info.bits) {
+                try cg.addImm64(@as(u64, 1) << @as(u6, @intCast(int_info.bits - 64)));
+                try cg.addTag(.i64_or);
+            }
+            try cg.addTag(.i64_ctz);
+            try cg.addImm64(64);
+            if (wasm_bits != int_info.bits) {
+                try cg.addTag(.i64_or);
+            } else {
+                try cg.addTag(.i64_add);
+            }
+            _ = try cg.cmp(lsb, .{ .imm64 = 0 }, Type.u64, .neq);
+            try cg.addTag(.select);
+            try cg.addTag(.i32_wrap_i64);
+        },
+        else => unreachable,
+    }
+
+    return cg.finishAir(inst, .stack, &.{ty_op.operand});
+}
+
+fn airDbgStmt(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const dbg_stmt = cg.air.instructions.items(.data)[@intFromEnum(inst)].dbg_stmt;
+    try cg.addInst(.{ .tag = .dbg_line, .data = .{
+        .payload = try cg.addExtra(Mir.DbgLineColumn{
+            .line = dbg_stmt.line,
+            .column = dbg_stmt.column,
+        }),
+    } });
+    return cg.finishAir(inst, .none, &.{});
+}
+
+fn airDbgInlineBlock(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const extra = cg.air.extraData(Air.DbgInlineBlock, ty_pl.payload);
+    // TODO
+    try cg.lowerBlock(inst, ty_pl.ty.toType(), @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len]));
+}
+
+fn airDbgVar(
+    cg: *CodeGen,
+    inst: Air.Inst.Index,
+    local_tag: link.File.Dwarf.WipNav.LocalVarTag,
+    is_ptr: bool,
+) InnerError!void {
+    _ = is_ptr;
+    _ = local_tag;
+    return cg.finishAir(inst, .none, &.{});
+}
+
+fn airTry(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
+    const err_union = try cg.resolveInst(pl_op.operand);
+    const extra = cg.air.extraData(Air.Try, pl_op.payload);
+    const body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len]);
+    const err_union_ty = cg.typeOf(pl_op.operand);
+    const result = try lowerTry(cg, inst, err_union, body, err_union_ty, false);
+    return cg.finishAir(inst, result, &.{pl_op.operand});
+}
+
+fn airTryPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const extra = cg.air.extraData(Air.TryPtr, ty_pl.payload);
+    const err_union_ptr = try cg.resolveInst(extra.data.ptr);
+    const body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len]);
+    const err_union_ty = cg.typeOf(extra.data.ptr).childType(zcu);
+    const result = try lowerTry(cg, inst, err_union_ptr, body, err_union_ty, true);
+    return cg.finishAir(inst, result, &.{extra.data.ptr});
+}
+
+fn lowerTry(
+    cg: *CodeGen,
+    inst: Air.Inst.Index,
+    err_union: WValue,
+    body: []const Air.Inst.Index,
+    err_union_ty: Type,
+    operand_is_ptr: bool,
+) InnerError!WValue {
+    const zcu = cg.pt.zcu;
+
+    const pl_ty = err_union_ty.errorUnionPayload(zcu);
+    const pl_has_bits = pl_ty.hasRuntimeBitsIgnoreComptime(zcu);
+
+    if (!err_union_ty.errorUnionSet(zcu).errorSetIsEmpty(zcu)) {
+        // Block we can jump out of when error is not set
+        try cg.startBlock(.block, .empty);
+
+        // check if the error tag is set for the error union.
+        try cg.emitWValue(err_union);
+        if (pl_has_bits or operand_is_ptr) {
+            const err_offset: u32 = @intCast(errUnionErrorOffset(pl_ty, zcu));
+            try cg.addMemArg(.i32_load16_u, .{
+                .offset = err_union.offset() + err_offset,
+                .alignment = @intCast(Type.anyerror.abiAlignment(zcu).toByteUnits().?),
+            });
+        }
+        try cg.addTag(.i32_eqz);
+        try cg.addLabel(.br_if, 0); // jump out of block when error is '0'
+
+        const liveness = cg.liveness.getCondBr(inst);
+        try cg.branches.append(cg.gpa, .{});
+        try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.else_deaths.len + liveness.then_deaths.len);
+        defer {
+            var branch = cg.branches.pop().?;
+            branch.deinit(cg.gpa);
+        }
+        try cg.genBody(body);
+        try cg.endBlock();
+    }
+
+    // if we reach here it means error was not set, and we want the payload
+    if (!pl_has_bits and !operand_is_ptr) {
+        return .none;
+    }
+
+    const pl_offset: u32 = @intCast(errUnionPayloadOffset(pl_ty, zcu));
+    if (operand_is_ptr or isByRef(pl_ty, zcu, cg.target)) {
+        return buildPointerOffset(cg, err_union, pl_offset, .new);
+    }
+    const payload = try cg.load(err_union, pl_ty, pl_offset);
+    return payload.toLocal(cg, pl_ty);
+}
+
+fn airByteSwap(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const ty = cg.typeOfIndex(inst);
+    const operand = try cg.resolveInst(ty_op.operand);
+
+    if (ty.zigTypeTag(zcu) == .vector) {
+        return cg.fail("TODO: @byteSwap for vectors", .{});
+    }
+    const int_info = ty.intInfo(zcu);
+    const wasm_bits = toWasmBits(int_info.bits) orelse {
+        return cg.fail("TODO: @byteSwap for integers with bitsize {d}", .{int_info.bits});
+    };
+
+    // bytes are no-op
+    if (int_info.bits == 8) {
+        return cg.finishAir(inst, cg.reuseOperand(ty_op.operand, operand), &.{ty_op.operand});
+    }
+
+    const result = result: {
+        switch (wasm_bits) {
+            32 => {
+                const intrin_ret = try cg.callIntrinsic(
+                    .__bswapsi2,
+                    &.{.u32_type},
+                    Type.u32,
+                    &.{operand},
+                );
+                break :result if (int_info.bits == 32)
+                    intrin_ret
+                else
+                    try cg.binOp(intrin_ret, .{ .imm32 = 32 - int_info.bits }, ty, .shr);
+            },
+            64 => {
+                const intrin_ret = try cg.callIntrinsic(
+                    .__bswapdi2,
+                    &.{.u64_type},
+                    Type.u64,
+                    &.{operand},
+                );
+                break :result if (int_info.bits == 64)
+                    intrin_ret
+                else
+                    try cg.binOp(intrin_ret, .{ .imm64 = 64 - int_info.bits }, ty, .shr);
+            },
+            else => return cg.fail("TODO: @byteSwap for integers with bitsize {d}", .{int_info.bits}),
+        }
+    };
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+fn airDiv(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const ty = cg.typeOfIndex(inst);
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+
+    const result = try cg.binOp(lhs, rhs, ty, .div);
+    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airDivTrunc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const ty = cg.typeOfIndex(inst);
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+
+    const div_result = try cg.binOp(lhs, rhs, ty, .div);
+
+    if (ty.isAnyFloat()) {
+        const trunc_result = try cg.floatOp(.trunc, ty, &.{div_result});
+        return cg.finishAir(inst, trunc_result, &.{ bin_op.lhs, bin_op.rhs });
+    }
+
+    return cg.finishAir(inst, div_result, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airDivFloor(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const zcu = cg.pt.zcu;
+    const ty = cg.typeOfIndex(inst);
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+
+    if (ty.isUnsignedInt(zcu)) {
+        _ = try cg.binOp(lhs, rhs, ty, .div);
+    } else if (ty.isSignedInt(zcu)) {
+        const int_bits = ty.intInfo(zcu).bits;
+        const wasm_bits = toWasmBits(int_bits) orelse {
+            return cg.fail("TODO: `@divFloor` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits});
+        };
+
+        if (wasm_bits > 64) {
+            return cg.fail("TODO: `@divFloor` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits});
+        }
+
+        const zero: WValue = switch (wasm_bits) {
+            32 => .{ .imm32 = 0 },
+            64 => .{ .imm64 = 0 },
+            else => unreachable,
+        };
+
+        // tee leaves the value on the stack and stores it in a local.
+        const quotient = try cg.allocLocal(ty);
+        _ = try cg.binOp(lhs, rhs, ty, .div);
+        try cg.addLocal(.local_tee, quotient.local.value);
+
+        // select takes a 32 bit value as the condition, so in the 64 bit case we use eqz to narrow
+        // the 64 bit value we want to use as the condition to 32 bits.
+        // This also inverts the condition (non 0 => 0, 0 => 1), so we put the adjusted and
+        // non-adjusted quotients on the stack in the opposite order for 32 vs 64 bits.
+        if (wasm_bits == 64) {
+            try cg.emitWValue(quotient);
+        }
+
+        // 0 if the signs of rhs_wasm and lhs_wasm are the same, 1 otherwise.
+        _ = try cg.binOp(lhs, rhs, ty, .xor);
+        _ = try cg.cmp(.stack, zero, ty, .lt);
+
+        switch (wasm_bits) {
+            32 => {
+                try cg.addTag(.i32_sub);
+                try cg.emitWValue(quotient);
+            },
+            64 => {
+                try cg.addTag(.i64_extend_i32_u);
+                try cg.addTag(.i64_sub);
+            },
+            else => unreachable,
+        }
+
+        _ = try cg.binOp(lhs, rhs, ty, .rem);
+
+        if (wasm_bits == 64) {
+            try cg.addTag(.i64_eqz);
+        }
+
+        try cg.addTag(.select);
+
+        // We need to zero the high bits because N bit comparisons consider all 32 or 64 bits, and
+        // expect all but the lowest N bits to be 0.
+        // TODO: Should we be zeroing the high bits here or should we be ignoring the high bits
+        // when performing comparisons?
+        if (int_bits != wasm_bits) {
+            _ = try cg.wrapOperand(.stack, ty);
+        }
+    } else {
+        const float_bits = ty.floatBits(cg.target);
+        if (float_bits > 64) {
+            return cg.fail("TODO: `@divFloor` for floats with bitsize: {d}", .{float_bits});
+        }
+        const is_f16 = float_bits == 16;
+
+        const lhs_wasm = if (is_f16) try cg.fpext(lhs, Type.f16, Type.f32) else lhs;
+        const rhs_wasm = if (is_f16) try cg.fpext(rhs, Type.f16, Type.f32) else rhs;
+
+        try cg.emitWValue(lhs_wasm);
+        try cg.emitWValue(rhs_wasm);
+
+        switch (float_bits) {
+            16, 32 => {
+                try cg.addTag(.f32_div);
+                try cg.addTag(.f32_floor);
+            },
+            64 => {
+                try cg.addTag(.f64_div);
+                try cg.addTag(.f64_floor);
+            },
+            else => unreachable,
+        }
+
+        if (is_f16) {
+            _ = try cg.fptrunc(.stack, Type.f32, Type.f16);
+        }
+    }
+
+    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airRem(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const ty = cg.typeOfIndex(inst);
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+
+    const result = try cg.binOp(lhs, rhs, ty, .rem);
+
+    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+/// Remainder after floor division, defined by:
+/// @divFloor(a, b) * b + @mod(a, b) = a
+fn airMod(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ty = cg.typeOfIndex(inst);
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+
+    const result = result: {
+        if (ty.isUnsignedInt(zcu)) {
+            break :result try cg.binOp(lhs, rhs, ty, .rem);
+        }
+        if (ty.isSignedInt(zcu)) {
+            // The wasm rem instruction gives the remainder after truncating division (rounding towards
+            // 0), equivalent to @rem.
+            // We make use of the fact that:
+            // @mod(a, b) = @rem(@rem(a, b) + b, b)
+            const int_bits = ty.intInfo(zcu).bits;
+            const wasm_bits = toWasmBits(int_bits) orelse {
+                return cg.fail("TODO: `@mod` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits});
+            };
+
+            if (wasm_bits > 64) {
+                return cg.fail("TODO: `@mod` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits});
+            }
+
+            _ = try cg.binOp(lhs, rhs, ty, .rem);
+            _ = try cg.binOp(.stack, rhs, ty, .add);
+            break :result try cg.binOp(.stack, rhs, ty, .rem);
+        }
+        if (ty.isAnyFloat()) {
+            const rem = try cg.binOp(lhs, rhs, ty, .rem);
+            const add = try cg.binOp(rem, rhs, ty, .add);
+            break :result try cg.binOp(add, rhs, ty, .rem);
+        }
+        return cg.fail("TODO: @mod for {f}", .{ty.fmt(pt)});
+    };
+
+    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airSatMul(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const ty = cg.typeOfIndex(inst);
+    const int_info = ty.intInfo(zcu);
+    const is_signed = int_info.signedness == .signed;
+
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+    const wasm_bits = toWasmBits(int_info.bits) orelse {
+        return cg.fail("TODO: mul_sat for {f}", .{ty.fmt(pt)});
+    };
+
+    switch (wasm_bits) {
+        32 => {
+            const upcast_ty: Type = if (is_signed) Type.i64 else Type.u64;
+            const lhs_up = try cg.intcast(lhs, ty, upcast_ty);
+            const rhs_up = try cg.intcast(rhs, ty, upcast_ty);
+            var mul_res = try (try cg.binOp(lhs_up, rhs_up, upcast_ty, .mul)).toLocal(cg, upcast_ty);
+            defer mul_res.free(cg);
+            if (is_signed) {
+                const imm_max: WValue = .{ .imm64 = ~@as(u64, 0) >> @intCast(64 - (int_info.bits - 1)) };
+                try cg.emitWValue(mul_res);
+                try cg.emitWValue(imm_max);
+                _ = try cg.cmp(mul_res, imm_max, upcast_ty, .lt);
+                try cg.addTag(.select);
+
+                var tmp = try cg.allocLocal(upcast_ty);
+                defer tmp.free(cg);
+                try cg.addLocal(.local_set, tmp.local.value);
+
+                const imm_min: WValue = .{ .imm64 = ~@as(u64, 0) << @intCast(int_info.bits - 1) };
+                try cg.emitWValue(tmp);
+                try cg.emitWValue(imm_min);
+                _ = try cg.cmp(tmp, imm_min, upcast_ty, .gt);
+                try cg.addTag(.select);
+            } else {
+                const imm_max: WValue = .{ .imm64 = ~@as(u64, 0) >> @intCast(64 - int_info.bits) };
+                try cg.emitWValue(mul_res);
+                try cg.emitWValue(imm_max);
+                _ = try cg.cmp(mul_res, imm_max, upcast_ty, .lt);
+                try cg.addTag(.select);
+            }
+            try cg.addTag(.i32_wrap_i64);
+        },
+        64 => {
+            if (!(int_info.bits == 64 and int_info.signedness == .signed)) {
+                return cg.fail("TODO: mul_sat for {f}", .{ty.fmt(pt)});
+            }
+            const overflow_ret = try cg.allocStack(Type.i32);
+            _ = try cg.callIntrinsic(
+                .__mulodi4,
+                &[_]InternPool.Index{ .i64_type, .i64_type, .usize_type },
+                Type.i64,
+                &.{ lhs, rhs, overflow_ret },
+            );
+            const xor = try cg.binOp(lhs, rhs, Type.i64, .xor);
+            const sign_v = try cg.binOp(xor, .{ .imm64 = 63 }, Type.i64, .shr);
+            _ = try cg.binOp(sign_v, .{ .imm64 = ~@as(u63, 0) }, Type.i64, .xor);
+            _ = try cg.load(overflow_ret, Type.i32, 0);
+            try cg.addTag(.i32_eqz);
+            try cg.addTag(.select);
+        },
+        128 => {
+            if (!(int_info.bits == 128 and int_info.signedness == .signed)) {
+                return cg.fail("TODO: mul_sat for {f}", .{ty.fmt(pt)});
+            }
+            const overflow_ret = try cg.allocStack(Type.i32);
+            const ret = try cg.callIntrinsic(
+                .__muloti4,
+                &[_]InternPool.Index{ .i128_type, .i128_type, .usize_type },
+                Type.i128,
+                &.{ lhs, rhs, overflow_ret },
+            );
+            try cg.lowerToStack(ret);
+            const xor = try cg.binOp(lhs, rhs, Type.i128, .xor);
+            const sign_v = try cg.binOp(xor, .{ .imm32 = 127 }, Type.i128, .shr);
+
+            // xor ~@as(u127, 0)
+            try cg.emitWValue(sign_v);
+            const lsb = try cg.load(sign_v, Type.u64, 0);
+            _ = try cg.binOp(lsb, .{ .imm64 = ~@as(u64, 0) }, Type.u64, .xor);
+            try cg.store(.stack, .stack, Type.u64, sign_v.offset());
+            try cg.emitWValue(sign_v);
+            const msb = try cg.load(sign_v, Type.u64, 8);
+            _ = try cg.binOp(msb, .{ .imm64 = ~@as(u63, 0) }, Type.u64, .xor);
+            try cg.store(.stack, .stack, Type.u64, sign_v.offset() + 8);
+
+            try cg.lowerToStack(sign_v);
+            _ = try cg.load(overflow_ret, Type.i32, 0);
+            try cg.addTag(.i32_eqz);
+            try cg.addTag(.select);
+        },
+        else => unreachable,
+    }
+    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airSatBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void {
+    assert(op == .add or op == .sub);
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const zcu = cg.pt.zcu;
+    const ty = cg.typeOfIndex(inst);
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+
+    const int_info = ty.intInfo(zcu);
+    const is_signed = int_info.signedness == .signed;
+
+    if (int_info.bits > 64) {
+        return cg.fail("TODO: saturating arithmetic for integers with bitsize '{d}'", .{int_info.bits});
+    }
+
+    if (is_signed) {
+        const result = try signedSat(cg, lhs, rhs, ty, op);
+        return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
+    }
+
+    const wasm_bits = toWasmBits(int_info.bits).?;
+    var bin_result = try (try cg.binOp(lhs, rhs, ty, op)).toLocal(cg, ty);
+    defer bin_result.free(cg);
+    if (wasm_bits != int_info.bits and op == .add) {
+        const val: u64 = @as(u64, @intCast((@as(u65, 1) << @as(u7, @intCast(int_info.bits))) - 1));
+        const imm_val: WValue = switch (wasm_bits) {
+            32 => .{ .imm32 = @intCast(val) },
+            64 => .{ .imm64 = val },
+            else => unreachable,
+        };
+
+        try cg.emitWValue(bin_result);
+        try cg.emitWValue(imm_val);
+        _ = try cg.cmp(bin_result, imm_val, ty, .lt);
+    } else {
+        switch (wasm_bits) {
+            32 => try cg.addImm32(if (op == .add) std.math.maxInt(u32) else 0),
+            64 => try cg.addImm64(if (op == .add) std.math.maxInt(u64) else 0),
+            else => unreachable,
+        }
+        try cg.emitWValue(bin_result);
+        _ = try cg.cmp(bin_result, lhs, ty, if (op == .add) .lt else .gt);
+    }
+
+    try cg.addTag(.select);
+    return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn signedSat(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue {
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+    const int_info = ty.intInfo(zcu);
+    const wasm_bits = toWasmBits(int_info.bits).?;
+    const is_wasm_bits = wasm_bits == int_info.bits;
+    const ext_ty = if (!is_wasm_bits) try pt.intType(int_info.signedness, wasm_bits) else ty;
+
+    const max_val: u64 = @as(u64, @intCast((@as(u65, 1) << @as(u7, @intCast(int_info.bits - 1))) - 1));
+    const min_val: i64 = (-@as(i64, @intCast(@as(u63, @intCast(max_val))))) - 1;
+    const max_wvalue: WValue = switch (wasm_bits) {
+        32 => .{ .imm32 = @truncate(max_val) },
+        64 => .{ .imm64 = max_val },
+        else => unreachable,
+    };
+    const min_wvalue: WValue = switch (wasm_bits) {
+        32 => .{ .imm32 = @bitCast(@as(i32, @truncate(min_val))) },
+        64 => .{ .imm64 = @bitCast(min_val) },
+        else => unreachable,
+    };
+
+    var bin_result = try (try cg.binOp(lhs, rhs, ext_ty, op)).toLocal(cg, ext_ty);
+    if (!is_wasm_bits) {
+        defer bin_result.free(cg); // not returned in this branch
+        try cg.emitWValue(bin_result);
+        try cg.emitWValue(max_wvalue);
+        _ = try cg.cmp(bin_result, max_wvalue, ext_ty, .lt);
+        try cg.addTag(.select);
+        try cg.addLocal(.local_set, bin_result.local.value); // re-use local
+
+        try cg.emitWValue(bin_result);
+        try cg.emitWValue(min_wvalue);
+        _ = try cg.cmp(bin_result, min_wvalue, ext_ty, .gt);
+        try cg.addTag(.select);
+        try cg.addLocal(.local_set, bin_result.local.value); // re-use local
+        return (try cg.wrapOperand(bin_result, ty)).toLocal(cg, ty);
+    } else {
+        const zero: WValue = switch (wasm_bits) {
+            32 => .{ .imm32 = 0 },
+            64 => .{ .imm64 = 0 },
+            else => unreachable,
+        };
+        try cg.emitWValue(max_wvalue);
+        try cg.emitWValue(min_wvalue);
+        _ = try cg.cmp(bin_result, zero, ty, .lt);
+        try cg.addTag(.select);
+        try cg.emitWValue(bin_result);
+        // leave on stack
+        const cmp_zero_result = try cg.cmp(rhs, zero, ty, if (op == .add) .lt else .gt);
+        const cmp_bin_result = try cg.cmp(bin_result, lhs, ty, .lt);
+        _ = try cg.binOp(cmp_zero_result, cmp_bin_result, Type.u32, .xor); // comparisons always return i32, so provide u32 as type to xor.
+        try cg.addTag(.select);
+        try cg.addLocal(.local_set, bin_result.local.value); // re-use local
+        return bin_result;
+    }
+}
+
+fn airShlSat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const pt = cg.pt;
+    const zcu = pt.zcu;
+
+    if (cg.typeOf(bin_op.lhs).isVector(zcu) and !cg.typeOf(bin_op.rhs).isVector(zcu)) {
+        return cg.fail("TODO: implement vector 'shl_sat' with scalar rhs", .{});
+    }
+
+    const ty = cg.typeOfIndex(inst);
+    const int_info = ty.intInfo(zcu);
+    const is_signed = int_info.signedness == .signed;
+    if (int_info.bits > 64) {
+        return cg.fail("TODO: Saturating shifting left for integers with bitsize '{d}'", .{int_info.bits});
+    }
+
+    const lhs = try cg.resolveInst(bin_op.lhs);
+    const rhs = try cg.resolveInst(bin_op.rhs);
+    const wasm_bits = toWasmBits(int_info.bits).?;
+    const result = try cg.allocLocal(ty);
+
+    if (wasm_bits == int_info.bits) {
+        var shl = try (try cg.binOp(lhs, rhs, ty, .shl)).toLocal(cg, ty);
+        defer shl.free(cg);
+        var shr = try (try cg.binOp(shl, rhs, ty, .shr)).toLocal(cg, ty);
+        defer shr.free(cg);
+
+        switch (wasm_bits) {
+            32 => blk: {
+                if (!is_signed) {
+                    try cg.addImm32(std.math.maxInt(u32));
+                    break :blk;
+                }
+                try cg.addImm32(@bitCast(@as(i32, std.math.minInt(i32))));
+                try cg.addImm32(@bitCast(@as(i32, std.math.maxInt(i32))));
+                _ = try cg.cmp(lhs, .{ .imm32 = 0 }, ty, .lt);
+                try cg.addTag(.select);
+            },
+            64 => blk: {
+                if (!is_signed) {
+                    try cg.addImm64(std.math.maxInt(u64));
+                    break :blk;
+                }
+                try cg.addImm64(@bitCast(@as(i64, std.math.minInt(i64))));
+                try cg.addImm64(@bitCast(@as(i64, std.math.maxInt(i64))));
+                _ = try cg.cmp(lhs, .{ .imm64 = 0 }, ty, .lt);
+                try cg.addTag(.select);
+            },
+            else => unreachable,
+        }
+        try cg.emitWValue(shl);
+        _ = try cg.cmp(lhs, shr, ty, .neq);
+        try cg.addTag(.select);
+        try cg.addLocal(.local_set, result.local.value);
+    } else {
+        const shift_size = wasm_bits - int_info.bits;
+        const shift_value: WValue = switch (wasm_bits) {
+            32 => .{ .imm32 = shift_size },
+            64 => .{ .imm64 = shift_size },
+            else => unreachable,
+        };
+        const ext_ty = try pt.intType(int_info.signedness, wasm_bits);
+
+        var shl_res = try (try cg.binOp(lhs, shift_value, ext_ty, .shl)).toLocal(cg, ext_ty);
+        defer shl_res.free(cg);
+        var shl = try (try cg.binOp(shl_res, rhs, ext_ty, .shl)).toLocal(cg, ext_ty);
+        defer shl.free(cg);
+        var shr = try (try cg.binOp(shl, rhs, ext_ty, .shr)).toLocal(cg, ext_ty);
+        defer shr.free(cg);
+
+        switch (wasm_bits) {
+            32 => blk: {
+                if (!is_signed) {
+                    try cg.addImm32(std.math.maxInt(u32));
+                    break :blk;
+                }
+
+                try cg.addImm32(@bitCast(@as(i32, std.math.minInt(i32))));
+                try cg.addImm32(@bitCast(@as(i32, std.math.maxInt(i32))));
+                _ = try cg.cmp(shl_res, .{ .imm32 = 0 }, ext_ty, .lt);
+                try cg.addTag(.select);
+            },
+            64 => blk: {
+                if (!is_signed) {
+                    try cg.addImm64(std.math.maxInt(u64));
+                    break :blk;
+                }
+
+                try cg.addImm64(@bitCast(@as(i64, std.math.minInt(i64))));
+                try cg.addImm64(@bitCast(@as(i64, std.math.maxInt(i64))));
+                _ = try cg.cmp(shl_res, .{ .imm64 = 0 }, ext_ty, .lt);
+                try cg.addTag(.select);
+            },
+            else => unreachable,
+        }
+        try cg.emitWValue(shl);
+        _ = try cg.cmp(shl_res, shr, ext_ty, .neq);
+        try cg.addTag(.select);
+        try cg.addLocal(.local_set, result.local.value);
+        var shift_result = try cg.binOp(result, shift_value, ext_ty, .shr);
+        if (is_signed) {
+            shift_result = try cg.wrapOperand(shift_result, ty);
+        }
+        try cg.addLocal(.local_set, result.local.value);
+    }
+
+    return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+/// Calls a compiler-rt intrinsic by creating an undefined symbol,
+/// then lowering the arguments and calling the symbol as a function call.
+/// This function call assumes the C-ABI.
+/// Asserts arguments are not stack values when the return value is
+/// passed as the first parameter.
+/// May leave the return value on the stack.
+fn callIntrinsic(
+    cg: *CodeGen,
+    intrinsic: Mir.Intrinsic,
+    param_types: []const InternPool.Index,
+    return_type: Type,
+    args: []const WValue,
+) InnerError!WValue {
+    assert(param_types.len == args.len);
+    const zcu = cg.pt.zcu;
+
+    // Always pass over C-ABI
+
+    const want_sret_param = firstParamSRet(.{ .wasm_mvp = .{} }, return_type, zcu, cg.target);
+    // if we want return as first param, we allocate a pointer to stack,
+    // and emit it as our first argument
+    const sret = if (want_sret_param) blk: {
+        const sret_local = try cg.allocStack(return_type);
+        try cg.lowerToStack(sret_local);
+        break :blk sret_local;
+    } else .none;
+
+    // Lower all arguments to the stack before we call our function
+    for (args, 0..) |arg, arg_i| {
+        assert(!(want_sret_param and arg == .stack));
+        assert(Type.fromInterned(param_types[arg_i]).hasRuntimeBitsIgnoreComptime(zcu));
+        try cg.lowerArg(.{ .wasm_mvp = .{} }, Type.fromInterned(param_types[arg_i]), arg);
+    }
+
+    try cg.addInst(.{ .tag = .call_intrinsic, .data = .{ .intrinsic = intrinsic } });
+
+    if (!return_type.hasRuntimeBitsIgnoreComptime(zcu)) {
+        return .none;
+    } else if (return_type.isNoReturn(zcu)) {
+        try cg.addTag(.@"unreachable");
+        return .none;
+    } else if (want_sret_param) {
+        return sret;
+    } else {
+        return .stack;
+    }
+}
+
+fn airTagName(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op;
+    const operand = try cg.resolveInst(un_op);
+    const enum_ty = cg.typeOf(un_op);
+
+    const result_ptr = try cg.allocStack(cg.typeOfIndex(inst));
+    try cg.lowerToStack(result_ptr);
+    try cg.emitWValue(operand);
+    try cg.addInst(.{ .tag = .call_tag_name, .data = .{ .ip_index = enum_ty.toIntern() } });
+
+    return cg.finishAir(inst, result_ptr, &.{un_op});
+}
+
+fn airErrorSetHasValue(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ip = &zcu.intern_pool;
+    const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op;
+
+    const operand = try cg.resolveInst(ty_op.operand);
+    const error_set_ty = ty_op.ty.toType();
+    const result = try cg.allocLocal(Type.bool);
+
+    const names = error_set_ty.errorSetNames(zcu);
+    var values = try std.array_list.Managed(u32).initCapacity(cg.gpa, names.len);
+    defer values.deinit();
+
+    var lowest: ?u32 = null;
+    var highest: ?u32 = null;
+    for (0..names.len) |name_index| {
+        const err_int = ip.getErrorValueIfExists(names.get(ip)[name_index]).?;
+        if (lowest) |*l| {
+            if (err_int < l.*) {
+                l.* = err_int;
+            }
+        } else {
+            lowest = err_int;
+        }
+        if (highest) |*h| {
+            if (err_int > h.*) {
+                highest = err_int;
+            }
+        } else {
+            highest = err_int;
+        }
+
+        values.appendAssumeCapacity(err_int);
+    }
+
+    // start block for 'true' branch
+    try cg.startBlock(.block, .empty);
+    // start block for 'false' branch
+    try cg.startBlock(.block, .empty);
+    // block for the jump table itself
+    try cg.startBlock(.block, .empty);
+
+    // lower operand to determine jump table target
+    try cg.emitWValue(operand);
+    try cg.addImm32(lowest.?);
+    try cg.addTag(.i32_sub);
+
+    // Account for default branch so always add '1'
+    const depth = @as(u32, @intCast(highest.? - lowest.? + 1));
+    const jump_table: Mir.JumpTable = .{ .length = depth };
+    const table_extra_index = try cg.addExtra(jump_table);
+    try cg.addInst(.{ .tag = .br_table, .data = .{ .payload = table_extra_index } });
+    try cg.mir_extra.ensureUnusedCapacity(cg.gpa, depth);
+
+    var value: u32 = lowest.?;
+    while (value <= highest.?) : (value += 1) {
+        const idx: u32 = blk: {
+            for (values.items) |val| {
+                if (val == value) break :blk 1;
+            }
+            break :blk 0;
+        };
+        cg.mir_extra.appendAssumeCapacity(idx);
+    }
+    try cg.endBlock();
+
+    // 'false' branch (i.e. error set does not have value
+    // ensure we set local to 0 in case the local was re-used.
+    try cg.addImm32(0);
+    try cg.addLocal(.local_set, result.local.value);
+    try cg.addLabel(.br, 1);
+    try cg.endBlock();
+
+    // 'true' branch
+    try cg.addImm32(1);
+    try cg.addLocal(.local_set, result.local.value);
+    try cg.addLabel(.br, 0);
+    try cg.endBlock();
+
+    return cg.finishAir(inst, result, &.{ty_op.operand});
+}
+
+inline fn useAtomicFeature(cg: *const CodeGen) bool {
+    return cg.target.cpu.has(.wasm, .atomics);
+}
+
+fn airCmpxchg(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl;
+    const extra = cg.air.extraData(Air.Cmpxchg, ty_pl.payload).data;
+
+    const ptr_ty = cg.typeOf(extra.ptr);
+    const ty = ptr_ty.childType(zcu);
+    const result_ty = cg.typeOfIndex(inst);
+
+    const ptr_operand = try cg.resolveInst(extra.ptr);
+    const expected_val = try cg.resolveInst(extra.expected_value);
+    const new_val = try cg.resolveInst(extra.new_value);
+
+    const cmp_result = try cg.allocLocal(Type.bool);
+
+    const ptr_val = if (cg.useAtomicFeature()) val: {
+        const val_local = try cg.allocLocal(ty);
+        try cg.emitWValue(ptr_operand);
+        try cg.lowerToStack(expected_val);
+        try cg.lowerToStack(new_val);
+        try cg.addAtomicMemArg(switch (ty.abiSize(zcu)) {
+            1 => .i32_atomic_rmw8_cmpxchg_u,
+            2 => .i32_atomic_rmw16_cmpxchg_u,
+            4 => .i32_atomic_rmw_cmpxchg,
+            8 => .i32_atomic_rmw_cmpxchg,
+            else => |size| return cg.fail("TODO: implement `@cmpxchg` for types with abi size '{d}'", .{size}),
+        }, .{
+            .offset = ptr_operand.offset(),
+            .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
+        });
+        try cg.addLocal(.local_tee, val_local.local.value);
+        _ = try cg.cmp(.stack, expected_val, ty, .eq);
+        try cg.addLocal(.local_set, cmp_result.local.value);
+        break :val val_local;
+    } else val: {
+        if (ty.abiSize(zcu) > 8) {
+            return cg.fail("TODO: Implement `@cmpxchg` for types larger than abi size of 8 bytes", .{});
+        }
+        const ptr_val = try WValue.toLocal(try cg.load(ptr_operand, ty, 0), cg, ty);
+
+        try cg.lowerToStack(ptr_operand);
+        try cg.lowerToStack(new_val);
+        try cg.emitWValue(ptr_val);
+        _ = try cg.cmp(ptr_val, expected_val, ty, .eq);
+        try cg.addLocal(.local_tee, cmp_result.local.value);
+        try cg.addTag(.select);
+        try cg.store(.stack, .stack, ty, 0);
+
+        break :val ptr_val;
+    };
+
+    const result = if (isByRef(result_ty, zcu, cg.target)) val: {
+        try cg.emitWValue(cmp_result);
+        try cg.addImm32(~@as(u32, 0));
+        try cg.addTag(.i32_xor);
+        try cg.addImm32(1);
+        try cg.addTag(.i32_and);
+        const and_result = try WValue.toLocal(.stack, cg, Type.bool);
+        const result_ptr = try cg.allocStack(result_ty);
+        try cg.store(result_ptr, and_result, Type.bool, @as(u32, @intCast(ty.abiSize(zcu))));
+        try cg.store(result_ptr, ptr_val, ty, 0);
+        break :val result_ptr;
+    } else val: {
+        try cg.addImm32(0);
+        try cg.emitWValue(ptr_val);
+        try cg.emitWValue(cmp_result);
+        try cg.addTag(.select);
+        break :val .stack;
+    };
+
+    return cg.finishAir(inst, result, &.{ extra.ptr, extra.expected_value, extra.new_value });
+}
+
+fn airAtomicLoad(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const atomic_load = cg.air.instructions.items(.data)[@intFromEnum(inst)].atomic_load;
+    const ptr = try cg.resolveInst(atomic_load.ptr);
+    const ty = cg.typeOfIndex(inst);
+
+    if (cg.useAtomicFeature()) {
+        const tag: std.wasm.AtomicsOpcode = switch (ty.abiSize(zcu)) {
+            1 => .i32_atomic_load8_u,
+            2 => .i32_atomic_load16_u,
+            4 => .i32_atomic_load,
+            8 => .i64_atomic_load,
+            else => |size| return cg.fail("TODO: @atomicLoad for types with abi size {d}", .{size}),
+        };
+        try cg.emitWValue(ptr);
+        try cg.addAtomicMemArg(tag, .{
+            .offset = ptr.offset(),
+            .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
+        });
+    } else {
+        _ = try cg.load(ptr, ty, 0);
+    }
+
+    return cg.finishAir(inst, .stack, &.{atomic_load.ptr});
+}
+
+fn airAtomicRmw(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
+    const extra = cg.air.extraData(Air.AtomicRmw, pl_op.payload).data;
+
+    const ptr = try cg.resolveInst(pl_op.operand);
+    const operand = try cg.resolveInst(extra.operand);
+    const ty = cg.typeOfIndex(inst);
+    const op: std.builtin.AtomicRmwOp = extra.op();
+
+    if (cg.useAtomicFeature()) {
+        switch (op) {
+            .Max,
+            .Min,
+            .Nand,
+            => {
+                const tmp = try cg.load(ptr, ty, 0);
+                const value = try tmp.toLocal(cg, ty);
+
+                // create a loop to cmpxchg the new value
+                try cg.startBlock(.loop, .empty);
+
+                try cg.emitWValue(ptr);
+                try cg.emitWValue(value);
+                if (op == .Nand) {
+                    const wasm_bits = toWasmBits(@intCast(ty.bitSize(zcu))).?;
+
+                    const and_res = try cg.binOp(value, operand, ty, .@"and");
+                    if (wasm_bits == 32)
+                        try cg.addImm32(~@as(u32, 0))
+                    else if (wasm_bits == 64)
+                        try cg.addImm64(~@as(u64, 0))
+                    else
+                        return cg.fail("TODO: `@atomicRmw` with operator `Nand` for types larger than 64 bits", .{});
+                    _ = try cg.binOp(and_res, .stack, ty, .xor);
+                } else {
+                    try cg.emitWValue(value);
+                    try cg.emitWValue(operand);
+                    _ = try cg.cmp(value, operand, ty, if (op == .Max) .gt else .lt);
+                    try cg.addTag(.select);
+                }
+                try cg.addAtomicMemArg(
+                    switch (ty.abiSize(zcu)) {
+                        1 => .i32_atomic_rmw8_cmpxchg_u,
+                        2 => .i32_atomic_rmw16_cmpxchg_u,
+                        4 => .i32_atomic_rmw_cmpxchg,
+                        8 => .i64_atomic_rmw_cmpxchg,
+                        else => return cg.fail("TODO: implement `@atomicRmw` with operation `{s}` for types larger than 64 bits", .{@tagName(op)}),
+                    },
+                    .{
+                        .offset = ptr.offset(),
+                        .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
+                    },
+                );
+                const select_res = try cg.allocLocal(ty);
+                try cg.addLocal(.local_tee, select_res.local.value);
+                _ = try cg.cmp(.stack, value, ty, .neq); // leave on stack so we can use it for br_if
+
+                try cg.emitWValue(select_res);
+                try cg.addLocal(.local_set, value.local.value);
+
+                try cg.addLabel(.br_if, 0);
+                try cg.endBlock();
+                return cg.finishAir(inst, value, &.{ pl_op.operand, extra.operand });
+            },
+
+            // the other operations have their own instructions for Wasm.
+            else => {
+                try cg.emitWValue(ptr);
+                try cg.emitWValue(operand);
+                const tag: std.wasm.AtomicsOpcode = switch (ty.abiSize(zcu)) {
+                    1 => switch (op) {
+                        .Xchg => .i32_atomic_rmw8_xchg_u,
+                        .Add => .i32_atomic_rmw8_add_u,
+                        .Sub => .i32_atomic_rmw8_sub_u,
+                        .And => .i32_atomic_rmw8_and_u,
+                        .Or => .i32_atomic_rmw8_or_u,
+                        .Xor => .i32_atomic_rmw8_xor_u,
+                        else => unreachable,
+                    },
+                    2 => switch (op) {
+                        .Xchg => .i32_atomic_rmw16_xchg_u,
+                        .Add => .i32_atomic_rmw16_add_u,
+                        .Sub => .i32_atomic_rmw16_sub_u,
+                        .And => .i32_atomic_rmw16_and_u,
+                        .Or => .i32_atomic_rmw16_or_u,
+                        .Xor => .i32_atomic_rmw16_xor_u,
+                        else => unreachable,
+                    },
+                    4 => switch (op) {
+                        .Xchg => .i32_atomic_rmw_xchg,
+                        .Add => .i32_atomic_rmw_add,
+                        .Sub => .i32_atomic_rmw_sub,
+                        .And => .i32_atomic_rmw_and,
+                        .Or => .i32_atomic_rmw_or,
+                        .Xor => .i32_atomic_rmw_xor,
+                        else => unreachable,
+                    },
+                    8 => switch (op) {
+                        .Xchg => .i64_atomic_rmw_xchg,
+                        .Add => .i64_atomic_rmw_add,
+                        .Sub => .i64_atomic_rmw_sub,
+                        .And => .i64_atomic_rmw_and,
+                        .Or => .i64_atomic_rmw_or,
+                        .Xor => .i64_atomic_rmw_xor,
+                        else => unreachable,
+                    },
+                    else => |size| return cg.fail("TODO: Implement `@atomicRmw` for types with abi size {d}", .{size}),
+                };
+                try cg.addAtomicMemArg(tag, .{
+                    .offset = ptr.offset(),
+                    .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
+                });
+                return cg.finishAir(inst, .stack, &.{ pl_op.operand, extra.operand });
+            },
+        }
+    } else {
+        const loaded = try cg.load(ptr, ty, 0);
+        const result = try loaded.toLocal(cg, ty);
+
+        switch (op) {
+            .Xchg => {
+                try cg.store(ptr, operand, ty, 0);
+            },
+            .Add,
+            .Sub,
+            .And,
+            .Or,
+            .Xor,
+            => {
+                try cg.emitWValue(ptr);
+                _ = try cg.binOp(result, operand, ty, switch (op) {
+                    .Add => .add,
+                    .Sub => .sub,
+                    .And => .@"and",
+                    .Or => .@"or",
+                    .Xor => .xor,
+                    else => unreachable,
+                });
+                if (ty.isInt(zcu) and (op == .Add or op == .Sub)) {
+                    _ = try cg.wrapOperand(.stack, ty);
+                }
+                try cg.store(.stack, .stack, ty, ptr.offset());
+            },
+            .Max,
+            .Min,
+            => {
+                try cg.emitWValue(ptr);
+                try cg.emitWValue(result);
+                try cg.emitWValue(operand);
+                _ = try cg.cmp(result, operand, ty, if (op == .Max) .gt else .lt);
+                try cg.addTag(.select);
+                try cg.store(.stack, .stack, ty, ptr.offset());
+            },
+            .Nand => {
+                const wasm_bits = toWasmBits(@intCast(ty.bitSize(zcu))).?;
+
+                try cg.emitWValue(ptr);
+                const and_res = try cg.binOp(result, operand, ty, .@"and");
+                if (wasm_bits == 32)
+                    try cg.addImm32(~@as(u32, 0))
+                else if (wasm_bits == 64)
+                    try cg.addImm64(~@as(u64, 0))
+                else
+                    return cg.fail("TODO: `@atomicRmw` with operator `Nand` for types larger than 64 bits", .{});
+                _ = try cg.binOp(and_res, .stack, ty, .xor);
+                try cg.store(.stack, .stack, ty, ptr.offset());
+            },
+        }
+
+        return cg.finishAir(inst, result, &.{ pl_op.operand, extra.operand });
+    }
+}
+
+fn airAtomicStore(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const zcu = cg.pt.zcu;
+    const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op;
+
+    const ptr = try cg.resolveInst(bin_op.lhs);
+    const operand = try cg.resolveInst(bin_op.rhs);
+    const ptr_ty = cg.typeOf(bin_op.lhs);
+    const ty = ptr_ty.childType(zcu);
+
+    if (cg.useAtomicFeature()) {
+        const tag: std.wasm.AtomicsOpcode = switch (ty.abiSize(zcu)) {
+            1 => .i32_atomic_store8,
+            2 => .i32_atomic_store16,
+            4 => .i32_atomic_store,
+            8 => .i64_atomic_store,
+            else => |size| return cg.fail("TODO: @atomicLoad for types with abi size {d}", .{size}),
+        };
+        try cg.emitWValue(ptr);
+        try cg.lowerToStack(operand);
+        try cg.addAtomicMemArg(tag, .{
+            .offset = ptr.offset(),
+            .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?),
+        });
+    } else {
+        try cg.store(ptr, operand, ty, 0);
+    }
+
+    return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs });
+}
+
+fn airFrameAddress(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    if (cg.initial_stack_value == .none) {
+        try cg.initializeStack();
+    }
+    try cg.emitWValue(cg.bottom_stack_value);
+    return cg.finishAir(inst, .stack, &.{});
+}
+
+fn airRuntimeNavPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void {
+    const ty_nav = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_nav;
+    const mod = cg.pt.zcu.navFileScope(cg.owner_nav).mod.?;
+    if (mod.single_threaded) {
+        const result: WValue = .{ .nav_ref = .{
+            .nav_index = ty_nav.nav,
+            .offset = 0,
+        } };
+        return cg.finishAir(inst, result, &.{});
+    }
+    return cg.fail("TODO: thread-local variables", .{});
+}
+
+fn typeOf(cg: *CodeGen, inst: Air.Inst.Ref) Type {
+    const zcu = cg.pt.zcu;
+    return cg.air.typeOf(inst, &zcu.intern_pool);
+}
+
+fn typeOfIndex(cg: *CodeGen, inst: Air.Inst.Index) Type {
+    const zcu = cg.pt.zcu;
+    return cg.air.typeOfIndex(inst, &zcu.intern_pool);
+}
+
+fn floatCmpIntrinsic(op: std.math.CompareOperator, bits: u16) Mir.Intrinsic {
+    return switch (op) {
+        .lt => switch (bits) {
+            80 => .__ltxf2,
+            128 => .__lttf2,
+            else => unreachable,
+        },
+        .lte => switch (bits) {
+            80 => .__lexf2,
+            128 => .__letf2,
+            else => unreachable,
+        },
+        .eq => switch (bits) {
+            80 => .__eqxf2,
+            128 => .__eqtf2,
+            else => unreachable,
+        },
+        .neq => switch (bits) {
+            80 => .__nexf2,
+            128 => .__netf2,
+            else => unreachable,
+        },
+        .gte => switch (bits) {
+            80 => .__gexf2,
+            128 => .__getf2,
+            else => unreachable,
+        },
+        .gt => switch (bits) {
+            80 => .__gtxf2,
+            128 => .__gttf2,
+            else => unreachable,
+        },
+    };
+}
author	Alex Rønne Petersen <alex@alexrp.com>	2025-08-20 15:45:53 +0200
committer	Alex Rønne Petersen <alex@alexrp.com>	2025-09-26 02:02:07 +0200
commit	86077fe6bdac34fe610f4c0b6bac3d6d1b97c22d (patch)
tree	d8f58b4d4e034d5770c816e886690387a1db7ffe /src/codegen/wasm/CodeGen.zig
parent	212715f62d3b22a2da18904f570dbc918ca8470a (diff)
download	zig-86077fe6bdac34fe610f4c0b6bac3d6d1b97c22d.tar.gz zig-86077fe6bdac34fe610f4c0b6bac3d6d1b97c22d.zip