diff options
| author | Alex Rønne Petersen <alex@alexrp.com> | 2025-08-20 15:45:53 +0200 |
|---|---|---|
| committer | Alex Rønne Petersen <alex@alexrp.com> | 2025-09-26 02:02:07 +0200 |
| commit | 86077fe6bdac34fe610f4c0b6bac3d6d1b97c22d (patch) | |
| tree | d8f58b4d4e034d5770c816e886690387a1db7ffe /src/codegen/wasm/CodeGen.zig | |
| parent | 212715f62d3b22a2da18904f570dbc918ca8470a (diff) | |
| download | zig-86077fe6bdac34fe610f4c0b6bac3d6d1b97c22d.tar.gz zig-86077fe6bdac34fe610f4c0b6bac3d6d1b97c22d.zip | |
compiler: move self-hosted backends from src/arch to src/codegen
Diffstat (limited to 'src/codegen/wasm/CodeGen.zig')
| -rw-r--r-- | src/codegen/wasm/CodeGen.zig | 7578 |
1 files changed, 7578 insertions, 0 deletions
diff --git a/src/codegen/wasm/CodeGen.zig b/src/codegen/wasm/CodeGen.zig new file mode 100644 index 0000000000..d8d8933cc3 --- /dev/null +++ b/src/codegen/wasm/CodeGen.zig @@ -0,0 +1,7578 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const Allocator = std.mem.Allocator; +const assert = std.debug.assert; +const testing = std.testing; +const mem = std.mem; +const log = std.log.scoped(.codegen); + +const CodeGen = @This(); +const codegen = @import("../../codegen.zig"); +const Zcu = @import("../../Zcu.zig"); +const InternPool = @import("../../InternPool.zig"); +const Decl = Zcu.Decl; +const Type = @import("../../Type.zig"); +const Value = @import("../../Value.zig"); +const Compilation = @import("../../Compilation.zig"); +const link = @import("../../link.zig"); +const Air = @import("../../Air.zig"); +const Mir = @import("Mir.zig"); +const abi = @import("../../codegen/wasm/abi.zig"); +const Alignment = InternPool.Alignment; +const errUnionPayloadOffset = codegen.errUnionPayloadOffset; +const errUnionErrorOffset = codegen.errUnionErrorOffset; + +const target_util = @import("../../target.zig"); +const libcFloatPrefix = target_util.libcFloatPrefix; +const libcFloatSuffix = target_util.libcFloatSuffix; +const compilerRtFloatAbbrev = target_util.compilerRtFloatAbbrev; +const compilerRtIntAbbrev = target_util.compilerRtIntAbbrev; + +pub fn legalizeFeatures(_: *const std.Target) *const Air.Legalize.Features { + return comptime &.initMany(&.{ + .expand_intcast_safe, + .expand_int_from_float_safe, + .expand_int_from_float_optimized_safe, + .expand_add_safe, + .expand_sub_safe, + .expand_mul_safe, + }); +} + +/// Reference to the function declaration the code +/// section belongs to +owner_nav: InternPool.Nav.Index, +/// Current block depth. Used to calculate the relative difference between a break +/// and block +block_depth: u32 = 0, +air: Air, +liveness: Air.Liveness, +gpa: mem.Allocator, +func_index: InternPool.Index, +/// Contains a list of current branches. +/// When we return from a branch, the branch will be popped from this list, +/// which means branches can only contain references from within its own branch, +/// or a branch higher (lower index) in the tree. +branches: std.ArrayListUnmanaged(Branch) = .empty, +/// Table to save `WValue`'s generated by an `Air.Inst` +// values: ValueTable, +/// Mapping from Air.Inst.Index to block ids +blocks: std.AutoArrayHashMapUnmanaged(Air.Inst.Index, struct { + label: u32, + value: WValue, +}) = .{}, +/// Maps `loop` instructions to their label. `br` to here repeats the loop. +loops: std.AutoHashMapUnmanaged(Air.Inst.Index, u32) = .empty, +/// The index the next local generated will have +/// NOTE: arguments share the index with locals therefore the first variable +/// will have the index that comes after the last argument's index +local_index: u32, +/// The index of the current argument. +/// Used to track which argument is being referenced in `airArg`. +arg_index: u32 = 0, +/// List of simd128 immediates. Each value is stored as an array of bytes. +/// This list will only be populated for 128bit-simd values when the target features +/// are enabled also. +simd_immediates: std.ArrayListUnmanaged([16]u8) = .empty, +/// The Target we're emitting (used to call intInfo) +target: *const std.Target, +ptr_size: enum { wasm32, wasm64 }, +pt: Zcu.PerThread, +/// List of MIR Instructions +mir_instructions: std.MultiArrayList(Mir.Inst), +/// Contains extra data for MIR +mir_extra: std.ArrayListUnmanaged(u32), +/// List of all locals' types generated throughout this declaration +/// used to emit locals count at start of 'code' section. +mir_locals: std.ArrayListUnmanaged(std.wasm.Valtype), +/// Set of all UAVs referenced by this function. Key is the UAV value, value is the alignment. +/// `.none` means naturally aligned. An explicit alignment is never less than the natural alignment. +mir_uavs: std.AutoArrayHashMapUnmanaged(InternPool.Index, Alignment), +/// Set of all functions whose address this function has taken and which therefore might be called +/// via a `call_indirect` function. +mir_indirect_function_set: std.AutoArrayHashMapUnmanaged(InternPool.Nav.Index, void), +/// Set of all function types used by this function. These must be interned by the linker. +mir_func_tys: std.AutoArrayHashMapUnmanaged(InternPool.Index, void), +/// The number of `error_name_table_ref` instructions emitted. +error_name_table_ref_count: u32, +/// When a function is executing, we store the the current stack pointer's value within this local. +/// This value is then used to restore the stack pointer to the original value at the return of the function. +initial_stack_value: WValue = .none, +/// The current stack pointer subtracted with the stack size. From this value, we will calculate +/// all offsets of the stack values. +bottom_stack_value: WValue = .none, +/// Arguments of this function declaration +/// This will be set after `resolveCallingConventionValues` +args: []WValue, +/// This will only be `.none` if the function returns void, or returns an immediate. +/// When it returns a pointer to the stack, the `.local` tag will be active and must be populated +/// before this function returns its execution to the caller. +return_value: WValue, +/// The size of the stack this function occupies. In the function prologue +/// we will move the stack pointer by this number, forward aligned with the `stack_alignment`. +stack_size: u32 = 0, +/// The stack alignment, which is 16 bytes by default. This is specified by the +/// tool-conventions: https://github.com/WebAssembly/tool-conventions/blob/main/BasicCABI.md +/// and also what the llvm backend will emit. +/// However, local variables or the usage of `incoming_stack_alignment` in a `CallingConvention` can overwrite this default. +stack_alignment: Alignment = .@"16", + +// For each individual Wasm valtype we store a seperate free list which +// allows us to re-use locals that are no longer used. e.g. a temporary local. +/// A list of indexes which represents a local of valtype `i32`. +/// It is illegal to store a non-i32 valtype in this list. +free_locals_i32: std.ArrayListUnmanaged(u32) = .empty, +/// A list of indexes which represents a local of valtype `i64`. +/// It is illegal to store a non-i64 valtype in this list. +free_locals_i64: std.ArrayListUnmanaged(u32) = .empty, +/// A list of indexes which represents a local of valtype `f32`. +/// It is illegal to store a non-f32 valtype in this list. +free_locals_f32: std.ArrayListUnmanaged(u32) = .empty, +/// A list of indexes which represents a local of valtype `f64`. +/// It is illegal to store a non-f64 valtype in this list. +free_locals_f64: std.ArrayListUnmanaged(u32) = .empty, +/// A list of indexes which represents a local of valtype `v127`. +/// It is illegal to store a non-v128 valtype in this list. +free_locals_v128: std.ArrayListUnmanaged(u32) = .empty, + +/// When in debug mode, this tracks if no `finishAir` was missed. +/// Forgetting to call `finishAir` will cause the result to not be +/// stored in our `values` map and therefore cause bugs. +air_bookkeeping: @TypeOf(bookkeeping_init) = bookkeeping_init, + +/// Wasm Value, created when generating an instruction +const WValue = union(enum) { + /// `WValue` which has been freed and may no longer hold + /// any references. + dead: void, + /// May be referenced but is unused + none: void, + /// The value lives on top of the stack + stack: void, + /// Index of the local + local: struct { + /// Contains the index to the local + value: u32, + /// The amount of instructions referencing this `WValue` + references: u32, + }, + /// An immediate 32bit value + imm32: u32, + /// An immediate 64bit value + imm64: u64, + /// Index into the list of simd128 immediates. This `WValue` is + /// only possible in very rare cases, therefore it would be + /// a waste of memory to store the value in a 128 bit integer. + imm128: u32, + /// A constant 32bit float value + float32: f32, + /// A constant 64bit float value + float64: f64, + nav_ref: struct { + nav_index: InternPool.Nav.Index, + offset: i32 = 0, + }, + uav_ref: struct { + ip_index: InternPool.Index, + offset: i32 = 0, + orig_ptr_ty: InternPool.Index = .none, + }, + /// Offset from the bottom of the virtual stack, with the offset + /// pointing to where the value lives. + stack_offset: struct { + /// Contains the actual value of the offset + value: u32, + /// The amount of instructions referencing this `WValue` + references: u32, + }, + + /// Returns the offset from the bottom of the stack. This is useful when + /// we use the load or store instruction to ensure we retrieve the value + /// from the correct position, rather than the value that lives at the + /// bottom of the stack. For instances where `WValue` is not `stack_value` + /// this will return 0, which allows us to simply call this function for all + /// loads and stores without requiring checks everywhere. + fn offset(value: WValue) u32 { + switch (value) { + .stack_offset => |stack_offset| return stack_offset.value, + .dead => unreachable, + else => return 0, + } + } + + /// Promotes a `WValue` to a local when given value is on top of the stack. + /// When encountering a `local` or `stack_offset` this is essentially a no-op. + /// All other tags are illegal. + fn toLocal(value: WValue, gen: *CodeGen, ty: Type) InnerError!WValue { + switch (value) { + .stack => { + const new_local = try gen.allocLocal(ty); + try gen.addLocal(.local_set, new_local.local.value); + return new_local; + }, + .local, .stack_offset => return value, + else => unreachable, + } + } + + /// Marks a local as no longer being referenced and essentially allows + /// us to re-use it somewhere else within the function. + /// The valtype of the local is deducted by using the index of the given `WValue`. + fn free(value: *WValue, gen: *CodeGen) void { + if (value.* != .local) return; + const local_value = value.local.value; + const reserved = gen.args.len + @intFromBool(gen.return_value != .none); + if (local_value < reserved + 2) return; // reserved locals may never be re-used. Also accounts for 2 stack locals. + + const index = local_value - reserved; + const valtype = gen.mir_locals.items[index]; + switch (valtype) { + .i32 => gen.free_locals_i32.append(gen.gpa, local_value) catch return, // It's ok to fail any of those, a new local can be allocated instead + .i64 => gen.free_locals_i64.append(gen.gpa, local_value) catch return, + .f32 => gen.free_locals_f32.append(gen.gpa, local_value) catch return, + .f64 => gen.free_locals_f64.append(gen.gpa, local_value) catch return, + .v128 => gen.free_locals_v128.append(gen.gpa, local_value) catch return, + } + log.debug("freed local ({d}) of type {}", .{ local_value, valtype }); + value.* = .dead; + } +}; + +const Op = enum { + @"unreachable", + nop, + block, + loop, + @"if", + @"else", + end, + br, + br_if, + br_table, + @"return", + call, + drop, + select, + global_get, + global_set, + load, + store, + memory_size, + memory_grow, + @"const", + eqz, + eq, + ne, + lt, + gt, + le, + ge, + clz, + ctz, + popcnt, + add, + sub, + mul, + div, + rem, + @"and", + @"or", + xor, + shl, + shr, + rotl, + rotr, + abs, + neg, + ceil, + floor, + trunc, + nearest, + sqrt, + min, + max, + copysign, + wrap, + convert, + demote, + promote, + reinterpret, + extend, +}; + +const OpcodeBuildArguments = struct { + /// First valtype in the opcode (usually represents the type of the output) + valtype1: ?std.wasm.Valtype = null, + /// The operation (e.g. call, unreachable, div, min, sqrt, etc.) + op: Op, + /// Width of the operation (e.g. 8 for i32_load8_s, 16 for i64_extend16_i32_s) + width: ?u8 = null, + /// Second valtype in the opcode name (usually represents the type of the input) + valtype2: ?std.wasm.Valtype = null, + /// Signedness of the op + signedness: ?std.builtin.Signedness = null, +}; + +/// TODO: deprecated, should be split up per tag. +fn buildOpcode(args: OpcodeBuildArguments) std.wasm.Opcode { + switch (args.op) { + .@"unreachable" => unreachable, + .nop => unreachable, + .block => unreachable, + .loop => unreachable, + .@"if" => unreachable, + .@"else" => unreachable, + .end => unreachable, + .br => unreachable, + .br_if => unreachable, + .br_table => unreachable, + .@"return" => unreachable, + .call => unreachable, + .drop => unreachable, + .select => unreachable, + .global_get => unreachable, + .global_set => unreachable, + + .load => if (args.width) |width| switch (width) { + 8 => switch (args.valtype1.?) { + .i32 => if (args.signedness.? == .signed) return .i32_load8_s else return .i32_load8_u, + .i64 => if (args.signedness.? == .signed) return .i64_load8_s else return .i64_load8_u, + .f32, .f64, .v128 => unreachable, + }, + 16 => switch (args.valtype1.?) { + .i32 => if (args.signedness.? == .signed) return .i32_load16_s else return .i32_load16_u, + .i64 => if (args.signedness.? == .signed) return .i64_load16_s else return .i64_load16_u, + .f32, .f64, .v128 => unreachable, + }, + 32 => switch (args.valtype1.?) { + .i64 => if (args.signedness.? == .signed) return .i64_load32_s else return .i64_load32_u, + .i32 => return .i32_load, + .f32 => return .f32_load, + .f64, .v128 => unreachable, + }, + 64 => switch (args.valtype1.?) { + .i64 => return .i64_load, + .f64 => return .f64_load, + else => unreachable, + }, + else => unreachable, + } else switch (args.valtype1.?) { + .i32 => return .i32_load, + .i64 => return .i64_load, + .f32 => return .f32_load, + .f64 => return .f64_load, + .v128 => unreachable, // handled independently + }, + .store => if (args.width) |width| { + switch (width) { + 8 => switch (args.valtype1.?) { + .i32 => return .i32_store8, + .i64 => return .i64_store8, + .f32, .f64, .v128 => unreachable, + }, + 16 => switch (args.valtype1.?) { + .i32 => return .i32_store16, + .i64 => return .i64_store16, + .f32, .f64, .v128 => unreachable, + }, + 32 => switch (args.valtype1.?) { + .i64 => return .i64_store32, + .i32 => return .i32_store, + .f32 => return .f32_store, + .f64, .v128 => unreachable, + }, + 64 => switch (args.valtype1.?) { + .i64 => return .i64_store, + .f64 => return .f64_store, + else => unreachable, + }, + else => unreachable, + } + } else { + switch (args.valtype1.?) { + .i32 => return .i32_store, + .i64 => return .i64_store, + .f32 => return .f32_store, + .f64 => return .f64_store, + .v128 => unreachable, // handled independently + } + }, + + .memory_size => return .memory_size, + .memory_grow => return .memory_grow, + + .@"const" => switch (args.valtype1.?) { + .i32 => return .i32_const, + .i64 => return .i64_const, + .f32 => return .f32_const, + .f64 => return .f64_const, + .v128 => unreachable, // handled independently + }, + + .eqz => switch (args.valtype1.?) { + .i32 => return .i32_eqz, + .i64 => return .i64_eqz, + .f32, .f64, .v128 => unreachable, + }, + .eq => switch (args.valtype1.?) { + .i32 => return .i32_eq, + .i64 => return .i64_eq, + .f32 => return .f32_eq, + .f64 => return .f64_eq, + .v128 => unreachable, // handled independently + }, + .ne => switch (args.valtype1.?) { + .i32 => return .i32_ne, + .i64 => return .i64_ne, + .f32 => return .f32_ne, + .f64 => return .f64_ne, + .v128 => unreachable, // handled independently + }, + + .lt => switch (args.valtype1.?) { + .i32 => if (args.signedness.? == .signed) return .i32_lt_s else return .i32_lt_u, + .i64 => if (args.signedness.? == .signed) return .i64_lt_s else return .i64_lt_u, + .f32 => return .f32_lt, + .f64 => return .f64_lt, + .v128 => unreachable, // handled independently + }, + .gt => switch (args.valtype1.?) { + .i32 => if (args.signedness.? == .signed) return .i32_gt_s else return .i32_gt_u, + .i64 => if (args.signedness.? == .signed) return .i64_gt_s else return .i64_gt_u, + .f32 => return .f32_gt, + .f64 => return .f64_gt, + .v128 => unreachable, // handled independently + }, + .le => switch (args.valtype1.?) { + .i32 => if (args.signedness.? == .signed) return .i32_le_s else return .i32_le_u, + .i64 => if (args.signedness.? == .signed) return .i64_le_s else return .i64_le_u, + .f32 => return .f32_le, + .f64 => return .f64_le, + .v128 => unreachable, // handled independently + }, + .ge => switch (args.valtype1.?) { + .i32 => if (args.signedness.? == .signed) return .i32_ge_s else return .i32_ge_u, + .i64 => if (args.signedness.? == .signed) return .i64_ge_s else return .i64_ge_u, + .f32 => return .f32_ge, + .f64 => return .f64_ge, + .v128 => unreachable, // handled independently + }, + + .clz => switch (args.valtype1.?) { + .i32 => return .i32_clz, + .i64 => return .i64_clz, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + .ctz => switch (args.valtype1.?) { + .i32 => return .i32_ctz, + .i64 => return .i64_ctz, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + .popcnt => switch (args.valtype1.?) { + .i32 => return .i32_popcnt, + .i64 => return .i64_popcnt, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + + .add => switch (args.valtype1.?) { + .i32 => return .i32_add, + .i64 => return .i64_add, + .f32 => return .f32_add, + .f64 => return .f64_add, + .v128 => unreachable, // handled independently + }, + .sub => switch (args.valtype1.?) { + .i32 => return .i32_sub, + .i64 => return .i64_sub, + .f32 => return .f32_sub, + .f64 => return .f64_sub, + .v128 => unreachable, // handled independently + }, + .mul => switch (args.valtype1.?) { + .i32 => return .i32_mul, + .i64 => return .i64_mul, + .f32 => return .f32_mul, + .f64 => return .f64_mul, + .v128 => unreachable, // handled independently + }, + + .div => switch (args.valtype1.?) { + .i32 => if (args.signedness.? == .signed) return .i32_div_s else return .i32_div_u, + .i64 => if (args.signedness.? == .signed) return .i64_div_s else return .i64_div_u, + .f32 => return .f32_div, + .f64 => return .f64_div, + .v128 => unreachable, // handled independently + }, + .rem => switch (args.valtype1.?) { + .i32 => if (args.signedness.? == .signed) return .i32_rem_s else return .i32_rem_u, + .i64 => if (args.signedness.? == .signed) return .i64_rem_s else return .i64_rem_u, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + + .@"and" => switch (args.valtype1.?) { + .i32 => return .i32_and, + .i64 => return .i64_and, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + .@"or" => switch (args.valtype1.?) { + .i32 => return .i32_or, + .i64 => return .i64_or, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + .xor => switch (args.valtype1.?) { + .i32 => return .i32_xor, + .i64 => return .i64_xor, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + + .shl => switch (args.valtype1.?) { + .i32 => return .i32_shl, + .i64 => return .i64_shl, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + .shr => switch (args.valtype1.?) { + .i32 => if (args.signedness.? == .signed) return .i32_shr_s else return .i32_shr_u, + .i64 => if (args.signedness.? == .signed) return .i64_shr_s else return .i64_shr_u, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + .rotl => switch (args.valtype1.?) { + .i32 => return .i32_rotl, + .i64 => return .i64_rotl, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + .rotr => switch (args.valtype1.?) { + .i32 => return .i32_rotr, + .i64 => return .i64_rotr, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + + .abs => switch (args.valtype1.?) { + .i32, .i64 => unreachable, + .f32 => return .f32_abs, + .f64 => return .f64_abs, + .v128 => unreachable, // handled independently + }, + .neg => switch (args.valtype1.?) { + .i32, .i64 => unreachable, + .f32 => return .f32_neg, + .f64 => return .f64_neg, + .v128 => unreachable, // handled independently + }, + .ceil => switch (args.valtype1.?) { + .i64 => unreachable, + .i32 => return .f32_ceil, // when valtype is f16, we store it in i32. + .f32 => return .f32_ceil, + .f64 => return .f64_ceil, + .v128 => unreachable, // handled independently + }, + .floor => switch (args.valtype1.?) { + .i64 => unreachable, + .i32 => return .f32_floor, // when valtype is f16, we store it in i32. + .f32 => return .f32_floor, + .f64 => return .f64_floor, + .v128 => unreachable, // handled independently + }, + .trunc => switch (args.valtype1.?) { + .i32 => if (args.valtype2) |valty| switch (valty) { + .i32 => unreachable, + .i64 => unreachable, + .f32 => if (args.signedness.? == .signed) return .i32_trunc_f32_s else return .i32_trunc_f32_u, + .f64 => if (args.signedness.? == .signed) return .i32_trunc_f64_s else return .i32_trunc_f64_u, + .v128 => unreachable, // handled independently + } else return .f32_trunc, // when no valtype2, it's an f16 instead which is stored in an i32. + .i64 => switch (args.valtype2.?) { + .i32 => unreachable, + .i64 => unreachable, + .f32 => if (args.signedness.? == .signed) return .i64_trunc_f32_s else return .i64_trunc_f32_u, + .f64 => if (args.signedness.? == .signed) return .i64_trunc_f64_s else return .i64_trunc_f64_u, + .v128 => unreachable, // handled independently + }, + .f32 => return .f32_trunc, + .f64 => return .f64_trunc, + .v128 => unreachable, // handled independently + }, + .nearest => switch (args.valtype1.?) { + .i32, .i64 => unreachable, + .f32 => return .f32_nearest, + .f64 => return .f64_nearest, + .v128 => unreachable, // handled independently + }, + .sqrt => switch (args.valtype1.?) { + .i32, .i64 => unreachable, + .f32 => return .f32_sqrt, + .f64 => return .f64_sqrt, + .v128 => unreachable, // handled independently + }, + .min => switch (args.valtype1.?) { + .i32, .i64 => unreachable, + .f32 => return .f32_min, + .f64 => return .f64_min, + .v128 => unreachable, // handled independently + }, + .max => switch (args.valtype1.?) { + .i32, .i64 => unreachable, + .f32 => return .f32_max, + .f64 => return .f64_max, + .v128 => unreachable, // handled independently + }, + .copysign => switch (args.valtype1.?) { + .i32, .i64 => unreachable, + .f32 => return .f32_copysign, + .f64 => return .f64_copysign, + .v128 => unreachable, // handled independently + }, + + .wrap => switch (args.valtype1.?) { + .i32 => switch (args.valtype2.?) { + .i32 => unreachable, + .i64 => return .i32_wrap_i64, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + .i64, .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + .convert => switch (args.valtype1.?) { + .i32, .i64 => unreachable, + .f32 => switch (args.valtype2.?) { + .i32 => if (args.signedness.? == .signed) return .f32_convert_i32_s else return .f32_convert_i32_u, + .i64 => if (args.signedness.? == .signed) return .f32_convert_i64_s else return .f32_convert_i64_u, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + .f64 => switch (args.valtype2.?) { + .i32 => if (args.signedness.? == .signed) return .f64_convert_i32_s else return .f64_convert_i32_u, + .i64 => if (args.signedness.? == .signed) return .f64_convert_i64_s else return .f64_convert_i64_u, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + .v128 => unreachable, // handled independently + }, + .demote => if (args.valtype1.? == .f32 and args.valtype2.? == .f64) return .f32_demote_f64 else unreachable, + .promote => if (args.valtype1.? == .f64 and args.valtype2.? == .f32) return .f64_promote_f32 else unreachable, + .reinterpret => switch (args.valtype1.?) { + .i32 => if (args.valtype2.? == .f32) return .i32_reinterpret_f32 else unreachable, + .i64 => if (args.valtype2.? == .f64) return .i64_reinterpret_f64 else unreachable, + .f32 => if (args.valtype2.? == .i32) return .f32_reinterpret_i32 else unreachable, + .f64 => if (args.valtype2.? == .i64) return .f64_reinterpret_i64 else unreachable, + .v128 => unreachable, // handled independently + }, + .extend => switch (args.valtype1.?) { + .i32 => switch (args.width.?) { + 8 => if (args.signedness.? == .signed) return .i32_extend8_s else unreachable, + 16 => if (args.signedness.? == .signed) return .i32_extend16_s else unreachable, + else => unreachable, + }, + .i64 => switch (args.width.?) { + 8 => if (args.signedness.? == .signed) return .i64_extend8_s else unreachable, + 16 => if (args.signedness.? == .signed) return .i64_extend16_s else unreachable, + 32 => if (args.signedness.? == .signed) return .i64_extend32_s else unreachable, + else => unreachable, + }, + .f32, .f64 => unreachable, + .v128 => unreachable, // handled independently + }, + } +} + +test "Wasm - buildOpcode" { + // Make sure buildOpcode is referenced, and test some examples + const i32_const = buildOpcode(.{ .op = .@"const", .valtype1 = .i32 }); + const i64_extend32_s = buildOpcode(.{ .op = .extend, .valtype1 = .i64, .width = 32, .signedness = .signed }); + const f64_reinterpret_i64 = buildOpcode(.{ .op = .reinterpret, .valtype1 = .f64, .valtype2 = .i64 }); + + try testing.expectEqual(@as(std.wasm.Opcode, .i32_const), i32_const); + try testing.expectEqual(@as(std.wasm.Opcode, .i64_extend32_s), i64_extend32_s); + try testing.expectEqual(@as(std.wasm.Opcode, .f64_reinterpret_i64), f64_reinterpret_i64); +} + +/// Hashmap to store generated `WValue` for each `Air.Inst.Ref` +pub const ValueTable = std.AutoArrayHashMapUnmanaged(Air.Inst.Ref, WValue); + +const bookkeeping_init = if (std.debug.runtime_safety) @as(usize, 0) else {}; + +const InnerError = error{ + OutOfMemory, + /// An error occurred when trying to lower AIR to MIR. + CodegenFail, + /// Compiler implementation could not handle a large integer. + Overflow, +} || link.File.UpdateDebugInfoError; + +pub fn deinit(cg: *CodeGen) void { + const gpa = cg.gpa; + for (cg.branches.items) |*branch| branch.deinit(gpa); + cg.branches.deinit(gpa); + cg.blocks.deinit(gpa); + cg.loops.deinit(gpa); + cg.simd_immediates.deinit(gpa); + cg.free_locals_i32.deinit(gpa); + cg.free_locals_i64.deinit(gpa); + cg.free_locals_f32.deinit(gpa); + cg.free_locals_f64.deinit(gpa); + cg.free_locals_v128.deinit(gpa); + cg.mir_instructions.deinit(gpa); + cg.mir_extra.deinit(gpa); + cg.mir_locals.deinit(gpa); + cg.mir_uavs.deinit(gpa); + cg.mir_indirect_function_set.deinit(gpa); + cg.mir_func_tys.deinit(gpa); + cg.* = undefined; +} + +fn fail(cg: *CodeGen, comptime fmt: []const u8, args: anytype) error{ OutOfMemory, CodegenFail } { + const zcu = cg.pt.zcu; + const func = zcu.funcInfo(cg.func_index); + return zcu.codegenFail(func.owner_nav, fmt, args); +} + +/// Resolves the `WValue` for the given instruction `inst` +/// When the given instruction has a `Value`, it returns a constant instead +fn resolveInst(cg: *CodeGen, ref: Air.Inst.Ref) InnerError!WValue { + var branch_index = cg.branches.items.len; + while (branch_index > 0) : (branch_index -= 1) { + const branch = cg.branches.items[branch_index - 1]; + if (branch.values.get(ref)) |value| { + return value; + } + } + + // when we did not find an existing instruction, it + // means we must generate it from a constant. + // We always store constants in the most outer branch as they must never + // be removed. The most outer branch is always at index 0. + const gop = try cg.branches.items[0].values.getOrPut(cg.gpa, ref); + assert(!gop.found_existing); + + const pt = cg.pt; + const zcu = pt.zcu; + const val = (try cg.air.value(ref, pt)).?; + const ty = cg.typeOf(ref); + if (!ty.hasRuntimeBitsIgnoreComptime(zcu) and !ty.isInt(zcu) and !ty.isError(zcu)) { + gop.value_ptr.* = .none; + return .none; + } + + // When we need to pass the value by reference (such as a struct), we will + // leverage `generateSymbol` to lower the constant to bytes and emit it + // to the 'rodata' section. We then return the index into the section as `WValue`. + // + // In the other cases, we will simply lower the constant to a value that fits + // into a single local (such as a pointer, integer, bool, etc). + const result: WValue = if (isByRef(ty, zcu, cg.target)) + .{ .uav_ref = .{ .ip_index = val.toIntern() } } + else + try cg.lowerConstant(val, ty); + + gop.value_ptr.* = result; + return result; +} + +fn resolveValue(cg: *CodeGen, val: Value) InnerError!WValue { + const zcu = cg.pt.zcu; + const ty = val.typeOf(zcu); + + return if (isByRef(ty, zcu, cg.target)) + .{ .uav_ref = .{ .ip_index = val.toIntern() } } + else + try cg.lowerConstant(val, ty); +} + +/// NOTE: if result == .stack, it will be stored in .local +fn finishAir(cg: *CodeGen, inst: Air.Inst.Index, result: WValue, operands: []const Air.Inst.Ref) InnerError!void { + assert(operands.len <= Air.Liveness.bpi - 1); + var tomb_bits = cg.liveness.getTombBits(inst); + for (operands) |operand| { + const dies = @as(u1, @truncate(tomb_bits)) != 0; + tomb_bits >>= 1; + if (!dies) continue; + processDeath(cg, operand); + } + + // results of `none` can never be referenced. + if (result != .none) { + const trackable_result = if (result != .stack) + result + else + try result.toLocal(cg, cg.typeOfIndex(inst)); + const branch = cg.currentBranch(); + branch.values.putAssumeCapacityNoClobber(inst.toRef(), trackable_result); + } + + if (std.debug.runtime_safety) { + cg.air_bookkeeping += 1; + } +} + +const Branch = struct { + values: ValueTable = .{}, + + fn deinit(branch: *Branch, gpa: Allocator) void { + branch.values.deinit(gpa); + branch.* = undefined; + } +}; + +inline fn currentBranch(cg: *CodeGen) *Branch { + return &cg.branches.items[cg.branches.items.len - 1]; +} + +const BigTomb = struct { + gen: *CodeGen, + inst: Air.Inst.Index, + lbt: Air.Liveness.BigTomb, + + fn feed(bt: *BigTomb, op_ref: Air.Inst.Ref) void { + const dies = bt.lbt.feed(); + if (!dies) return; + // This will be a nop for interned constants. + processDeath(bt.gen, op_ref); + } + + fn finishAir(bt: *BigTomb, result: WValue) void { + assert(result != .stack); + if (result != .none) { + bt.gen.currentBranch().values.putAssumeCapacityNoClobber(bt.inst.toRef(), result); + } + + if (std.debug.runtime_safety) { + bt.gen.air_bookkeeping += 1; + } + } +}; + +fn iterateBigTomb(cg: *CodeGen, inst: Air.Inst.Index, operand_count: usize) !BigTomb { + try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, operand_count + 1); + return BigTomb{ + .gen = cg, + .inst = inst, + .lbt = cg.liveness.iterateBigTomb(inst), + }; +} + +fn processDeath(cg: *CodeGen, ref: Air.Inst.Ref) void { + if (ref.toIndex() == null) return; + // Branches are currently only allowed to free locals allocated + // within their own branch. + // TODO: Upon branch consolidation free any locals if needed. + const value = cg.currentBranch().values.getPtr(ref) orelse return; + if (value.* != .local) return; + const reserved_indexes = cg.args.len + @intFromBool(cg.return_value != .none); + if (value.local.value < reserved_indexes) { + return; // function arguments can never be re-used + } + log.debug("Decreasing reference for ref: %{d}, using local '{d}'", .{ @intFromEnum(ref.toIndex().?), value.local.value }); + value.local.references -= 1; // if this panics, a call to `reuseOperand` was forgotten by the developer + if (value.local.references == 0) { + value.free(cg); + } +} + +fn addInst(cg: *CodeGen, inst: Mir.Inst) error{OutOfMemory}!void { + try cg.mir_instructions.append(cg.gpa, inst); +} + +fn addTag(cg: *CodeGen, tag: Mir.Inst.Tag) error{OutOfMemory}!void { + try cg.addInst(.{ .tag = tag, .data = .{ .tag = {} } }); +} + +fn addExtended(cg: *CodeGen, opcode: std.wasm.MiscOpcode) error{OutOfMemory}!void { + const extra_index: u32 = @intCast(cg.mir_extra.items.len); + try cg.mir_extra.append(cg.gpa, @intFromEnum(opcode)); + try cg.addInst(.{ .tag = .misc_prefix, .data = .{ .payload = extra_index } }); +} + +fn addLabel(cg: *CodeGen, tag: Mir.Inst.Tag, label: u32) error{OutOfMemory}!void { + try cg.addInst(.{ .tag = tag, .data = .{ .label = label } }); +} + +fn addLocal(cg: *CodeGen, tag: Mir.Inst.Tag, local: u32) error{OutOfMemory}!void { + try cg.addInst(.{ .tag = tag, .data = .{ .local = local } }); +} + +/// Accepts an unsigned 32bit integer rather than a signed integer to +/// prevent us from having to bitcast multiple times as most values +/// within codegen are represented as unsigned rather than signed. +fn addImm32(cg: *CodeGen, imm: u32) error{OutOfMemory}!void { + try cg.addInst(.{ .tag = .i32_const, .data = .{ .imm32 = @bitCast(imm) } }); +} + +/// Accepts an unsigned 64bit integer rather than a signed integer to +/// prevent us from having to bitcast multiple times as most values +/// within codegen are represented as unsigned rather than signed. +fn addImm64(cg: *CodeGen, imm: u64) error{OutOfMemory}!void { + const extra_index = try cg.addExtra(Mir.Imm64.init(imm)); + try cg.addInst(.{ .tag = .i64_const, .data = .{ .payload = extra_index } }); +} + +/// Accepts the index into the list of 128bit-immediates +fn addImm128(cg: *CodeGen, index: u32) error{OutOfMemory}!void { + const simd_values = cg.simd_immediates.items[index]; + const extra_index: u32 = @intCast(cg.mir_extra.items.len); + // tag + 128bit value + try cg.mir_extra.ensureUnusedCapacity(cg.gpa, 5); + cg.mir_extra.appendAssumeCapacity(@intFromEnum(std.wasm.SimdOpcode.v128_const)); + cg.mir_extra.appendSliceAssumeCapacity(@alignCast(mem.bytesAsSlice(u32, &simd_values))); + try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); +} + +fn addFloat64(cg: *CodeGen, float: f64) error{OutOfMemory}!void { + const extra_index = try cg.addExtra(Mir.Float64.init(float)); + try cg.addInst(.{ .tag = .f64_const, .data = .{ .payload = extra_index } }); +} + +/// Inserts an instruction to load/store from/to wasm's linear memory dependent on the given `tag`. +fn addMemArg(cg: *CodeGen, tag: Mir.Inst.Tag, mem_arg: Mir.MemArg) error{OutOfMemory}!void { + const extra_index = try cg.addExtra(mem_arg); + try cg.addInst(.{ .tag = tag, .data = .{ .payload = extra_index } }); +} + +/// Inserts an instruction from the 'atomics' feature which accesses wasm's linear memory dependent on the +/// given `tag`. +fn addAtomicMemArg(cg: *CodeGen, tag: std.wasm.AtomicsOpcode, mem_arg: Mir.MemArg) error{OutOfMemory}!void { + const extra_index = try cg.addExtra(@as(struct { val: u32 }, .{ .val = @intFromEnum(tag) })); + _ = try cg.addExtra(mem_arg); + try cg.addInst(.{ .tag = .atomics_prefix, .data = .{ .payload = extra_index } }); +} + +/// Helper function to emit atomic mir opcodes. +fn addAtomicTag(cg: *CodeGen, tag: std.wasm.AtomicsOpcode) error{OutOfMemory}!void { + const extra_index = try cg.addExtra(@as(struct { val: u32 }, .{ .val = @intFromEnum(tag) })); + try cg.addInst(.{ .tag = .atomics_prefix, .data = .{ .payload = extra_index } }); +} + +/// Appends entries to `mir_extra` based on the type of `extra`. +/// Returns the index into `mir_extra` +fn addExtra(cg: *CodeGen, extra: anytype) error{OutOfMemory}!u32 { + const fields = std.meta.fields(@TypeOf(extra)); + try cg.mir_extra.ensureUnusedCapacity(cg.gpa, fields.len); + return cg.addExtraAssumeCapacity(extra); +} + +/// Appends entries to `mir_extra` based on the type of `extra`. +/// Returns the index into `mir_extra` +fn addExtraAssumeCapacity(cg: *CodeGen, extra: anytype) error{OutOfMemory}!u32 { + const fields = std.meta.fields(@TypeOf(extra)); + const result: u32 = @intCast(cg.mir_extra.items.len); + inline for (fields) |field| { + cg.mir_extra.appendAssumeCapacity(switch (field.type) { + u32 => @field(extra, field.name), + i32 => @bitCast(@field(extra, field.name)), + InternPool.Index, + InternPool.Nav.Index, + => @intFromEnum(@field(extra, field.name)), + else => |field_type| @compileError("Unsupported field type " ++ @typeName(field_type)), + }); + } + return result; +} + +/// For `std.builtin.CallingConvention.auto`. +pub fn typeToValtype(ty: Type, zcu: *const Zcu, target: *const std.Target) std.wasm.Valtype { + const ip = &zcu.intern_pool; + return switch (ty.zigTypeTag(zcu)) { + .float => switch (ty.floatBits(target)) { + 16 => .i32, // stored/loaded as u16 + 32 => .f32, + 64 => .f64, + 80, 128 => .i32, + else => unreachable, + }, + .int, .@"enum" => switch (ty.intInfo(zcu).bits) { + 0...32 => .i32, + 33...64 => .i64, + else => .i32, + }, + .@"struct" => blk: { + if (zcu.typeToPackedStruct(ty)) |packed_struct| { + const backing_int_ty = Type.fromInterned(packed_struct.backingIntTypeUnordered(ip)); + break :blk typeToValtype(backing_int_ty, zcu, target); + } else { + break :blk .i32; + } + }, + .vector => switch (CodeGen.determineSimdStoreStrategy(ty, zcu, target)) { + .direct => .v128, + .unrolled => .i32, + }, + .@"union" => switch (ty.containerLayout(zcu)) { + .@"packed" => switch (ty.bitSize(zcu)) { + 0...32 => .i32, + 33...64 => .i64, + else => .i32, + }, + else => .i32, + }, + else => .i32, // all represented as reference/immediate + }; +} + +/// Using a given `Type`, returns the corresponding wasm value type +/// Differently from `typeToValtype` this also allows `void` to create a block +/// with no return type +fn genBlockType(ty: Type, zcu: *const Zcu, target: *const std.Target) std.wasm.BlockType { + return switch (ty.ip_index) { + .void_type, .noreturn_type => .empty, + else => .fromValtype(typeToValtype(ty, zcu, target)), + }; +} + +/// Writes the bytecode depending on the given `WValue` in `val` +fn emitWValue(cg: *CodeGen, value: WValue) InnerError!void { + switch (value) { + .dead => unreachable, // reference to free'd `WValue` (missing reuseOperand?) + .none, .stack => {}, // no-op + .local => |idx| try cg.addLocal(.local_get, idx.value), + .imm32 => |val| try cg.addImm32(val), + .imm64 => |val| try cg.addImm64(val), + .imm128 => |val| try cg.addImm128(val), + .float32 => |val| try cg.addInst(.{ .tag = .f32_const, .data = .{ .float32 = val } }), + .float64 => |val| try cg.addFloat64(val), + .nav_ref => |nav_ref| { + const zcu = cg.pt.zcu; + const ip = &zcu.intern_pool; + if (ip.getNav(nav_ref.nav_index).isFn(ip)) { + assert(nav_ref.offset == 0); + try cg.mir_indirect_function_set.put(cg.gpa, nav_ref.nav_index, {}); + try cg.addInst(.{ .tag = .func_ref, .data = .{ .nav_index = nav_ref.nav_index } }); + } else if (nav_ref.offset == 0) { + try cg.addInst(.{ .tag = .nav_ref, .data = .{ .nav_index = nav_ref.nav_index } }); + } else { + try cg.addInst(.{ + .tag = .nav_ref_off, + .data = .{ + .payload = try cg.addExtra(Mir.NavRefOff{ + .nav_index = nav_ref.nav_index, + .offset = nav_ref.offset, + }), + }, + }); + } + }, + .uav_ref => |uav| { + const zcu = cg.pt.zcu; + const ip = &zcu.intern_pool; + assert(!ip.isFunctionType(ip.typeOf(uav.ip_index))); + const gop = try cg.mir_uavs.getOrPut(cg.gpa, uav.ip_index); + const this_align: Alignment = a: { + if (uav.orig_ptr_ty == .none) break :a .none; + const ptr_type = ip.indexToKey(uav.orig_ptr_ty).ptr_type; + const this_align = ptr_type.flags.alignment; + if (this_align == .none) break :a .none; + const abi_align = Type.fromInterned(ptr_type.child).abiAlignment(zcu); + if (this_align.compare(.lte, abi_align)) break :a .none; + break :a this_align; + }; + if (!gop.found_existing or + gop.value_ptr.* == .none or + (this_align != .none and this_align.compare(.gt, gop.value_ptr.*))) + { + gop.value_ptr.* = this_align; + } + if (uav.offset == 0) { + try cg.addInst(.{ + .tag = .uav_ref, + .data = .{ .ip_index = uav.ip_index }, + }); + } else { + try cg.addInst(.{ + .tag = .uav_ref_off, + .data = .{ .payload = try cg.addExtra(@as(Mir.UavRefOff, .{ + .value = uav.ip_index, + .offset = uav.offset, + })) }, + }); + } + }, + .stack_offset => try cg.addLocal(.local_get, cg.bottom_stack_value.local.value), // caller must ensure to address the offset + } +} + +/// If given a local or stack-offset, increases the reference count by 1. +/// The old `WValue` found at instruction `ref` is then replaced by the +/// modified `WValue` and returned. When given a non-local or non-stack-offset, +/// returns the given `operand` itfunc instead. +fn reuseOperand(cg: *CodeGen, ref: Air.Inst.Ref, operand: WValue) WValue { + if (operand != .local and operand != .stack_offset) return operand; + var new_value = operand; + switch (new_value) { + .local => |*local| local.references += 1, + .stack_offset => |*stack_offset| stack_offset.references += 1, + else => unreachable, + } + const old_value = cg.getResolvedInst(ref); + old_value.* = new_value; + return new_value; +} + +/// From a reference, returns its resolved `WValue`. +/// It's illegal to provide a `Air.Inst.Ref` that hasn't been resolved yet. +fn getResolvedInst(cg: *CodeGen, ref: Air.Inst.Ref) *WValue { + var index = cg.branches.items.len; + while (index > 0) : (index -= 1) { + const branch = cg.branches.items[index - 1]; + if (branch.values.getPtr(ref)) |value| { + return value; + } + } + unreachable; // developer-error: This can only be called on resolved instructions. Use `resolveInst` instead. +} + +/// Creates one locals for a given `Type`. +/// Returns a corresponding `Wvalue` with `local` as active tag +fn allocLocal(cg: *CodeGen, ty: Type) InnerError!WValue { + const zcu = cg.pt.zcu; + const valtype = typeToValtype(ty, zcu, cg.target); + const index_or_null = switch (valtype) { + .i32 => cg.free_locals_i32.pop(), + .i64 => cg.free_locals_i64.pop(), + .f32 => cg.free_locals_f32.pop(), + .f64 => cg.free_locals_f64.pop(), + .v128 => cg.free_locals_v128.pop(), + }; + if (index_or_null) |index| { + log.debug("reusing local ({d}) of type {}", .{ index, valtype }); + return .{ .local = .{ .value = index, .references = 1 } }; + } + log.debug("new local of type {}", .{valtype}); + return cg.ensureAllocLocal(ty); +} + +/// Ensures a new local will be created. This is useful when it's useful +/// to use a zero-initialized local. +fn ensureAllocLocal(cg: *CodeGen, ty: Type) InnerError!WValue { + const zcu = cg.pt.zcu; + try cg.mir_locals.append(cg.gpa, typeToValtype(ty, zcu, cg.target)); + const initial_index = cg.local_index; + cg.local_index += 1; + return .{ .local = .{ .value = initial_index, .references = 1 } }; +} + +pub const Error = error{ + OutOfMemory, + /// Compiler was asked to operate on a number larger than supported. + Overflow, + /// Indicates the error is already stored in Zcu `failed_codegen`. + CodegenFail, +}; + +pub fn generate( + bin_file: *link.File, + pt: Zcu.PerThread, + src_loc: Zcu.LazySrcLoc, + func_index: InternPool.Index, + air: *const Air, + liveness: *const ?Air.Liveness, +) Error!Mir { + _ = src_loc; + _ = bin_file; + const zcu = pt.zcu; + const gpa = zcu.gpa; + const cg = zcu.funcInfo(func_index); + const file_scope = zcu.navFileScope(cg.owner_nav); + const target = &file_scope.mod.?.resolved_target.result; + const fn_ty = zcu.navValue(cg.owner_nav).typeOf(zcu); + const fn_info = zcu.typeToFunc(fn_ty).?; + const ret_ty: Type = .fromInterned(fn_info.return_type); + const any_returns = !firstParamSRet(fn_info.cc, ret_ty, zcu, target) and ret_ty.hasRuntimeBitsIgnoreComptime(zcu); + + var cc_result = try resolveCallingConventionValues(zcu, fn_ty, target); + defer cc_result.deinit(gpa); + + var code_gen: CodeGen = .{ + .gpa = gpa, + .pt = pt, + .air = air.*, + .liveness = liveness.*.?, + .owner_nav = cg.owner_nav, + .target = target, + .ptr_size = switch (target.cpu.arch) { + .wasm32 => .wasm32, + .wasm64 => .wasm64, + else => unreachable, + }, + .func_index = func_index, + .args = cc_result.args, + .return_value = cc_result.return_value, + .local_index = cc_result.local_index, + .mir_instructions = .empty, + .mir_extra = .empty, + .mir_locals = .empty, + .mir_uavs = .empty, + .mir_indirect_function_set = .empty, + .mir_func_tys = .empty, + .error_name_table_ref_count = 0, + }; + defer code_gen.deinit(); + + try code_gen.mir_func_tys.putNoClobber(gpa, fn_ty.toIntern(), {}); + + return generateInner(&code_gen, any_returns) catch |err| switch (err) { + error.CodegenFail, + error.OutOfMemory, + error.Overflow, + => |e| return e, + else => |e| return code_gen.fail("failed to generate function: {s}", .{@errorName(e)}), + }; +} + +fn generateInner(cg: *CodeGen, any_returns: bool) InnerError!Mir { + const zcu = cg.pt.zcu; + try cg.branches.append(cg.gpa, .{}); + // clean up outer branch + defer { + var outer_branch = cg.branches.pop().?; + outer_branch.deinit(cg.gpa); + assert(cg.branches.items.len == 0); // missing branch merge + } + // Generate MIR for function body + try cg.genBody(cg.air.getMainBody()); + + // In case we have a return value, but the last instruction is a noreturn (such as a while loop) + // we emit an unreachable instruction to tell the stack validator that part will never be reached. + if (any_returns and cg.air.instructions.len > 0) { + const inst: Air.Inst.Index = @enumFromInt(cg.air.instructions.len - 1); + const last_inst_ty = cg.typeOfIndex(inst); + if (!last_inst_ty.hasRuntimeBitsIgnoreComptime(zcu) or last_inst_ty.isNoReturn(zcu)) { + try cg.addTag(.@"unreachable"); + } + } + // End of function body + try cg.addTag(.end); + try cg.addTag(.dbg_epilogue_begin); + + var mir: Mir = .{ + .instructions = cg.mir_instructions.toOwnedSlice(), + .extra = &.{}, // fallible so assigned after errdefer + .locals = &.{}, // fallible so assigned after errdefer + .prologue = if (cg.initial_stack_value == .none) .none else .{ + .sp_local = cg.initial_stack_value.local.value, + .flags = .{ .stack_alignment = cg.stack_alignment }, + .stack_size = cg.stack_size, + .bottom_stack_local = cg.bottom_stack_value.local.value, + }, + .uavs = cg.mir_uavs.move(), + .indirect_function_set = cg.mir_indirect_function_set.move(), + .func_tys = cg.mir_func_tys.move(), + .error_name_table_ref_count = cg.error_name_table_ref_count, + }; + errdefer mir.deinit(cg.gpa); + mir.extra = try cg.mir_extra.toOwnedSlice(cg.gpa); + mir.locals = try cg.mir_locals.toOwnedSlice(cg.gpa); + return mir; +} + +const CallWValues = struct { + args: []WValue, + return_value: WValue, + local_index: u32, + + fn deinit(values: *CallWValues, gpa: Allocator) void { + gpa.free(values.args); + values.* = undefined; + } +}; + +fn resolveCallingConventionValues( + zcu: *const Zcu, + fn_ty: Type, + target: *const std.Target, +) Allocator.Error!CallWValues { + const gpa = zcu.gpa; + const ip = &zcu.intern_pool; + const fn_info = zcu.typeToFunc(fn_ty).?; + const cc = fn_info.cc; + + var result: CallWValues = .{ + .args = &.{}, + .return_value = .none, + .local_index = 0, + }; + if (cc == .naked) return result; + + var args = std.array_list.Managed(WValue).init(gpa); + defer args.deinit(); + + // Check if we store the result as a pointer to the stack rather than + // by value + if (firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, target)) { + // the sret arg will be passed as first argument, therefore we + // set the `return_value` before allocating locals for regular args. + result.return_value = .{ .local = .{ .value = result.local_index, .references = 1 } }; + result.local_index += 1; + } + + switch (cc) { + .auto => { + for (fn_info.param_types.get(ip)) |ty| { + if (!Type.fromInterned(ty).hasRuntimeBitsIgnoreComptime(zcu)) { + continue; + } + + try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } }); + result.local_index += 1; + } + }, + .wasm_mvp => { + for (fn_info.param_types.get(ip)) |ty| { + if (!Type.fromInterned(ty).hasRuntimeBitsIgnoreComptime(zcu)) { + continue; + } + switch (abi.classifyType(.fromInterned(ty), zcu)) { + .direct => |scalar_ty| if (!abi.lowerAsDoubleI64(scalar_ty, zcu)) { + try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } }); + result.local_index += 1; + } else { + try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } }); + try args.append(.{ .local = .{ .value = result.local_index + 1, .references = 1 } }); + result.local_index += 2; + }, + .indirect => { + try args.append(.{ .local = .{ .value = result.local_index, .references = 1 } }); + result.local_index += 1; + }, + } + } + }, + else => unreachable, // Frontend is responsible for emitting an error earlier. + } + result.args = try args.toOwnedSlice(); + return result; +} + +pub fn firstParamSRet( + cc: std.builtin.CallingConvention, + return_type: Type, + zcu: *const Zcu, + target: *const std.Target, +) bool { + if (!return_type.hasRuntimeBitsIgnoreComptime(zcu)) return false; + switch (cc) { + .@"inline" => unreachable, + .auto => return isByRef(return_type, zcu, target), + .wasm_mvp => switch (abi.classifyType(return_type, zcu)) { + .direct => |scalar_ty| return abi.lowerAsDoubleI64(scalar_ty, zcu), + .indirect => return true, + }, + else => return false, + } +} + +/// Lowers a Zig type and its value based on a given calling convention to ensure +/// it matches the ABI. +fn lowerArg(cg: *CodeGen, cc: std.builtin.CallingConvention, ty: Type, value: WValue) !void { + if (cc != .wasm_mvp) { + return cg.lowerToStack(value); + } + + const zcu = cg.pt.zcu; + + switch (abi.classifyType(ty, zcu)) { + .direct => |scalar_type| if (!abi.lowerAsDoubleI64(scalar_type, zcu)) { + if (!isByRef(ty, zcu, cg.target)) { + return cg.lowerToStack(value); + } else { + switch (value) { + .nav_ref, .stack_offset => _ = try cg.load(value, scalar_type, 0), + .dead => unreachable, + else => try cg.emitWValue(value), + } + } + } else { + assert(ty.abiSize(zcu) == 16); + // in this case we have an integer or float that must be lowered as 2 i64's. + try cg.emitWValue(value); + try cg.addMemArg(.i64_load, .{ .offset = value.offset(), .alignment = 8 }); + try cg.emitWValue(value); + try cg.addMemArg(.i64_load, .{ .offset = value.offset() + 8, .alignment = 8 }); + }, + .indirect => return cg.lowerToStack(value), + } +} + +/// Lowers a `WValue` to the stack. This means when the `value` results in +/// `.stack_offset` we calculate the pointer of this offset and use that. +/// The value is left on the stack, and not stored in any temporary. +fn lowerToStack(cg: *CodeGen, value: WValue) !void { + switch (value) { + .stack_offset => |offset| { + try cg.emitWValue(value); + if (offset.value > 0) { + switch (cg.ptr_size) { + .wasm32 => { + try cg.addImm32(offset.value); + try cg.addTag(.i32_add); + }, + .wasm64 => { + try cg.addImm64(offset.value); + try cg.addTag(.i64_add); + }, + } + } + }, + else => try cg.emitWValue(value), + } +} + +/// Creates a local for the initial stack value +/// Asserts `initial_stack_value` is `.none` +fn initializeStack(cg: *CodeGen) !void { + assert(cg.initial_stack_value == .none); + // Reserve a local to store the current stack pointer + // We can later use this local to set the stack pointer back to the value + // we have stored here. + cg.initial_stack_value = try cg.ensureAllocLocal(Type.usize); + // Also reserve a local to store the bottom stack value + cg.bottom_stack_value = try cg.ensureAllocLocal(Type.usize); +} + +/// Reads the stack pointer from `Context.initial_stack_value` and writes it +/// to the global stack pointer variable +fn restoreStackPointer(cg: *CodeGen) !void { + // only restore the pointer if it was initialized + if (cg.initial_stack_value == .none) return; + // Get the original stack pointer's value + try cg.emitWValue(cg.initial_stack_value); + + try cg.addTag(.global_set_sp); +} + +/// From a given type, will create space on the virtual stack to store the value of such type. +/// This returns a `WValue` with its active tag set to `local`, containing the index to the local +/// that points to the position on the virtual stack. This function should be used instead of +/// moveStack unless a local was already created to store the pointer. +/// +/// Asserts Type has codegenbits +fn allocStack(cg: *CodeGen, ty: Type) !WValue { + const pt = cg.pt; + const zcu = pt.zcu; + assert(ty.hasRuntimeBitsIgnoreComptime(zcu)); + if (cg.initial_stack_value == .none) { + try cg.initializeStack(); + } + + const abi_size = std.math.cast(u32, ty.abiSize(zcu)) orelse { + return cg.fail("Type {f} with ABI size of {d} exceeds stack frame size", .{ + ty.fmt(pt), ty.abiSize(zcu), + }); + }; + const abi_align = ty.abiAlignment(zcu); + + cg.stack_alignment = cg.stack_alignment.max(abi_align); + + const offset: u32 = @intCast(abi_align.forward(cg.stack_size)); + defer cg.stack_size = offset + abi_size; + + return .{ .stack_offset = .{ .value = offset, .references = 1 } }; +} + +/// From a given AIR instruction generates a pointer to the stack where +/// the value of its type will live. +/// This is different from allocStack where this will use the pointer's alignment +/// if it is set, to ensure the stack alignment will be set correctly. +fn allocStackPtr(cg: *CodeGen, inst: Air.Inst.Index) !WValue { + const pt = cg.pt; + const zcu = pt.zcu; + const ptr_ty = cg.typeOfIndex(inst); + const pointee_ty = ptr_ty.childType(zcu); + + if (cg.initial_stack_value == .none) { + try cg.initializeStack(); + } + + if (!pointee_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + return cg.allocStack(Type.usize); // create a value containing just the stack pointer. + } + + const abi_alignment = ptr_ty.ptrAlignment(zcu); + const abi_size = std.math.cast(u32, pointee_ty.abiSize(zcu)) orelse { + return cg.fail("Type {f} with ABI size of {d} exceeds stack frame size", .{ + pointee_ty.fmt(pt), pointee_ty.abiSize(zcu), + }); + }; + cg.stack_alignment = cg.stack_alignment.max(abi_alignment); + + const offset: u32 = @intCast(abi_alignment.forward(cg.stack_size)); + defer cg.stack_size = offset + abi_size; + + return .{ .stack_offset = .{ .value = offset, .references = 1 } }; +} + +/// From given zig bitsize, returns the wasm bitsize +fn toWasmBits(bits: u16) ?u16 { + return for ([_]u16{ 32, 64, 128 }) |wasm_bits| { + if (bits <= wasm_bits) return wasm_bits; + } else null; +} + +/// Performs a copy of bytes for a given type. Copying all bytes +/// from rhs to lhs. +fn memcpy(cg: *CodeGen, dst: WValue, src: WValue, len: WValue) !void { + const len_known_neq_0 = switch (len) { + .imm32 => |val| if (val != 0) true else return, + .imm64 => |val| if (val != 0) true else return, + else => false, + }; + // When bulk_memory is enabled, we lower it to wasm's memcpy instruction. + // If not, we lower it ourselves manually + if (cg.target.cpu.has(.wasm, .bulk_memory)) { + const len0_ok = cg.target.cpu.has(.wasm, .nontrapping_bulk_memory_len0); + const emit_check = !(len0_ok or len_known_neq_0); + + if (emit_check) { + try cg.startBlock(.block, .empty); + + // Even if `len` is zero, the spec requires an implementation to trap if `src + len` or + // `dst + len` are out of memory bounds. This can easily happen in Zig in a case such + // as: + // + // const dst: [*]u8 = undefined; + // const src: [*]u8 = undefined; + // var len: usize = runtime_zero(); + // @memcpy(dst[0..len], src[0..len]); + // + // So explicitly avoid using `memory.copy` in the `len == 0` case. Lovely design. + try cg.emitWValue(len); + try cg.addTag(.i32_eqz); + try cg.addLabel(.br_if, 0); + } + + try cg.lowerToStack(dst); + try cg.lowerToStack(src); + try cg.emitWValue(len); + try cg.addExtended(.memory_copy); + + if (emit_check) { + try cg.endBlock(); + } + + return; + } + + // when the length is comptime-known, rather than a runtime value, we can optimize the generated code by having + // the loop during codegen, rather than inserting a runtime loop into the binary. + switch (len) { + .imm32, .imm64 => blk: { + const length = switch (len) { + .imm32 => |val| val, + .imm64 => |val| val, + else => unreachable, + }; + // if the size (length) is more than 32 bytes, we use a runtime loop instead to prevent + // binary size bloat. + if (length > 32) break :blk; + var offset: u32 = 0; + const lhs_base = dst.offset(); + const rhs_base = src.offset(); + while (offset < length) : (offset += 1) { + // get dst's address to store the result + try cg.emitWValue(dst); + // load byte from src's address + try cg.emitWValue(src); + switch (cg.ptr_size) { + .wasm32 => { + try cg.addMemArg(.i32_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 }); + try cg.addMemArg(.i32_store8, .{ .offset = lhs_base + offset, .alignment = 1 }); + }, + .wasm64 => { + try cg.addMemArg(.i64_load8_u, .{ .offset = rhs_base + offset, .alignment = 1 }); + try cg.addMemArg(.i64_store8, .{ .offset = lhs_base + offset, .alignment = 1 }); + }, + } + } + return; + }, + else => {}, + } + + // allocate a local for the offset, and set it to 0. + // This to ensure that inside loops we correctly re-set the counter. + var offset = try cg.allocLocal(Type.usize); // local for counter + defer offset.free(cg); + switch (cg.ptr_size) { + .wasm32 => try cg.addImm32(0), + .wasm64 => try cg.addImm64(0), + } + try cg.addLocal(.local_set, offset.local.value); + + // outer block to jump to when loop is done + try cg.startBlock(.block, .empty); + try cg.startBlock(.loop, .empty); + + // loop condition (offset == length -> break) + { + try cg.emitWValue(offset); + try cg.emitWValue(len); + switch (cg.ptr_size) { + .wasm32 => try cg.addTag(.i32_eq), + .wasm64 => try cg.addTag(.i64_eq), + } + try cg.addLabel(.br_if, 1); // jump out of loop into outer block (finished) + } + + // get dst ptr + { + try cg.emitWValue(dst); + try cg.emitWValue(offset); + switch (cg.ptr_size) { + .wasm32 => try cg.addTag(.i32_add), + .wasm64 => try cg.addTag(.i64_add), + } + } + + // get src value and also store in dst + { + try cg.emitWValue(src); + try cg.emitWValue(offset); + switch (cg.ptr_size) { + .wasm32 => { + try cg.addTag(.i32_add); + try cg.addMemArg(.i32_load8_u, .{ .offset = src.offset(), .alignment = 1 }); + try cg.addMemArg(.i32_store8, .{ .offset = dst.offset(), .alignment = 1 }); + }, + .wasm64 => { + try cg.addTag(.i64_add); + try cg.addMemArg(.i64_load8_u, .{ .offset = src.offset(), .alignment = 1 }); + try cg.addMemArg(.i64_store8, .{ .offset = dst.offset(), .alignment = 1 }); + }, + } + } + + // increment loop counter + { + try cg.emitWValue(offset); + switch (cg.ptr_size) { + .wasm32 => { + try cg.addImm32(1); + try cg.addTag(.i32_add); + }, + .wasm64 => { + try cg.addImm64(1); + try cg.addTag(.i64_add); + }, + } + try cg.addLocal(.local_set, offset.local.value); + try cg.addLabel(.br, 0); // jump to start of loop + } + try cg.endBlock(); // close off loop block + try cg.endBlock(); // close off outer block +} + +fn ptrSize(cg: *const CodeGen) u16 { + return @divExact(cg.target.ptrBitWidth(), 8); +} + +/// For a given `Type`, will return true when the type will be passed +/// by reference, rather than by value +fn isByRef(ty: Type, zcu: *const Zcu, target: *const std.Target) bool { + const ip = &zcu.intern_pool; + switch (ty.zigTypeTag(zcu)) { + .type, + .comptime_int, + .comptime_float, + .enum_literal, + .undefined, + .null, + .@"opaque", + => unreachable, + + .noreturn, + .void, + .bool, + .error_set, + .@"fn", + .@"anyframe", + => return false, + + .array, + .frame, + => return ty.hasRuntimeBitsIgnoreComptime(zcu), + .@"union" => { + if (zcu.typeToUnion(ty)) |union_obj| { + if (union_obj.flagsUnordered(ip).layout == .@"packed") { + return ty.abiSize(zcu) > 8; + } + } + return ty.hasRuntimeBitsIgnoreComptime(zcu); + }, + .@"struct" => { + if (zcu.typeToPackedStruct(ty)) |packed_struct| { + return isByRef(Type.fromInterned(packed_struct.backingIntTypeUnordered(ip)), zcu, target); + } + return ty.hasRuntimeBitsIgnoreComptime(zcu); + }, + .vector => return determineSimdStoreStrategy(ty, zcu, target) == .unrolled, + .int => return ty.intInfo(zcu).bits > 64, + .@"enum" => return ty.intInfo(zcu).bits > 64, + .float => return ty.floatBits(target) > 64, + .error_union => { + const pl_ty = ty.errorUnionPayload(zcu); + if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + return false; + } + return true; + }, + .optional => { + if (ty.isPtrLikeOptional(zcu)) return false; + const pl_type = ty.optionalChild(zcu); + if (pl_type.zigTypeTag(zcu) == .error_set) return false; + return pl_type.hasRuntimeBitsIgnoreComptime(zcu); + }, + .pointer => { + // Slices act like struct and will be passed by reference + if (ty.isSlice(zcu)) return true; + return false; + }, + } +} + +const SimdStoreStrategy = enum { + direct, + unrolled, +}; + +/// For a given vector type, returns the `SimdStoreStrategy`. +/// This means when a given type is 128 bits and either the simd128 or relaxed-simd +/// features are enabled, the function will return `.direct`. This would allow to store +/// it using a instruction, rather than an unrolled version. +pub fn determineSimdStoreStrategy(ty: Type, zcu: *const Zcu, target: *const std.Target) SimdStoreStrategy { + assert(ty.zigTypeTag(zcu) == .vector); + if (ty.bitSize(zcu) != 128) return .unrolled; + if (target.cpu.has(.wasm, .relaxed_simd) or target.cpu.has(.wasm, .simd128)) { + return .direct; + } + return .unrolled; +} + +/// Creates a new local for a pointer that points to memory with given offset. +/// This can be used to get a pointer to a struct field, error payload, etc. +/// By providing `modify` as action, it will modify the given `ptr_value` instead of making a new +/// local value to store the pointer. This allows for local re-use and improves binary size. +fn buildPointerOffset(cg: *CodeGen, ptr_value: WValue, offset: u64, action: enum { modify, new }) InnerError!WValue { + // do not perform arithmetic when offset is 0. + if (offset == 0 and ptr_value.offset() == 0 and action == .modify) return ptr_value; + const result_ptr: WValue = switch (action) { + .new => try cg.ensureAllocLocal(Type.usize), + .modify => ptr_value, + }; + try cg.emitWValue(ptr_value); + if (offset + ptr_value.offset() > 0) { + switch (cg.ptr_size) { + .wasm32 => { + try cg.addImm32(@intCast(offset + ptr_value.offset())); + try cg.addTag(.i32_add); + }, + .wasm64 => { + try cg.addImm64(offset + ptr_value.offset()); + try cg.addTag(.i64_add); + }, + } + } + try cg.addLocal(.local_set, result_ptr.local.value); + return result_ptr; +} + +fn genInst(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const air_tags = cg.air.instructions.items(.tag); + return switch (air_tags[@intFromEnum(inst)]) { + .inferred_alloc, .inferred_alloc_comptime => unreachable, + + .add => cg.airBinOp(inst, .add), + .add_sat => cg.airSatBinOp(inst, .add), + .add_wrap => cg.airWrapBinOp(inst, .add), + .sub => cg.airBinOp(inst, .sub), + .sub_sat => cg.airSatBinOp(inst, .sub), + .sub_wrap => cg.airWrapBinOp(inst, .sub), + .mul => cg.airBinOp(inst, .mul), + .mul_sat => cg.airSatMul(inst), + .mul_wrap => cg.airWrapBinOp(inst, .mul), + .div_float, .div_exact => cg.airDiv(inst), + .div_trunc => cg.airDivTrunc(inst), + .div_floor => cg.airDivFloor(inst), + .bit_and => cg.airBinOp(inst, .@"and"), + .bit_or => cg.airBinOp(inst, .@"or"), + .bool_and => cg.airBinOp(inst, .@"and"), + .bool_or => cg.airBinOp(inst, .@"or"), + .rem => cg.airRem(inst), + .mod => cg.airMod(inst), + .shl => cg.airWrapBinOp(inst, .shl), + .shl_exact => cg.airBinOp(inst, .shl), + .shl_sat => cg.airShlSat(inst), + .shr, .shr_exact => cg.airBinOp(inst, .shr), + .xor => cg.airBinOp(inst, .xor), + .max => cg.airMaxMin(inst, .fmax, .gt), + .min => cg.airMaxMin(inst, .fmin, .lt), + .mul_add => cg.airMulAdd(inst), + + .sqrt => cg.airUnaryFloatOp(inst, .sqrt), + .sin => cg.airUnaryFloatOp(inst, .sin), + .cos => cg.airUnaryFloatOp(inst, .cos), + .tan => cg.airUnaryFloatOp(inst, .tan), + .exp => cg.airUnaryFloatOp(inst, .exp), + .exp2 => cg.airUnaryFloatOp(inst, .exp2), + .log => cg.airUnaryFloatOp(inst, .log), + .log2 => cg.airUnaryFloatOp(inst, .log2), + .log10 => cg.airUnaryFloatOp(inst, .log10), + .floor => cg.airUnaryFloatOp(inst, .floor), + .ceil => cg.airUnaryFloatOp(inst, .ceil), + .round => cg.airUnaryFloatOp(inst, .round), + .trunc_float => cg.airUnaryFloatOp(inst, .trunc), + .neg => cg.airUnaryFloatOp(inst, .neg), + + .abs => cg.airAbs(inst), + + .add_with_overflow => cg.airAddSubWithOverflow(inst, .add), + .sub_with_overflow => cg.airAddSubWithOverflow(inst, .sub), + .shl_with_overflow => cg.airShlWithOverflow(inst), + .mul_with_overflow => cg.airMulWithOverflow(inst), + + .clz => cg.airClz(inst), + .ctz => cg.airCtz(inst), + + .cmp_eq => cg.airCmp(inst, .eq), + .cmp_gte => cg.airCmp(inst, .gte), + .cmp_gt => cg.airCmp(inst, .gt), + .cmp_lte => cg.airCmp(inst, .lte), + .cmp_lt => cg.airCmp(inst, .lt), + .cmp_neq => cg.airCmp(inst, .neq), + + .cmp_vector => cg.airCmpVector(inst), + .cmp_lt_errors_len => cg.airCmpLtErrorsLen(inst), + + .array_elem_val => cg.airArrayElemVal(inst), + .array_to_slice => cg.airArrayToSlice(inst), + .alloc => cg.airAlloc(inst), + .arg => cg.airArg(inst), + .bitcast => cg.airBitcast(inst), + .block => cg.airBlock(inst), + .trap => cg.airTrap(inst), + .breakpoint => cg.airBreakpoint(inst), + .br => cg.airBr(inst), + .repeat => cg.airRepeat(inst), + .switch_dispatch => cg.airSwitchDispatch(inst), + .cond_br => cg.airCondBr(inst), + .intcast => cg.airIntcast(inst), + .fptrunc => cg.airFptrunc(inst), + .fpext => cg.airFpext(inst), + .int_from_float => cg.airIntFromFloat(inst), + .float_from_int => cg.airFloatFromInt(inst), + .get_union_tag => cg.airGetUnionTag(inst), + + .@"try" => cg.airTry(inst), + .try_cold => cg.airTry(inst), + .try_ptr => cg.airTryPtr(inst), + .try_ptr_cold => cg.airTryPtr(inst), + + .dbg_stmt => cg.airDbgStmt(inst), + .dbg_empty_stmt => try cg.finishAir(inst, .none, &.{}), + .dbg_inline_block => cg.airDbgInlineBlock(inst), + .dbg_var_ptr => cg.airDbgVar(inst, .local_var, true), + .dbg_var_val => cg.airDbgVar(inst, .local_var, false), + .dbg_arg_inline => cg.airDbgVar(inst, .arg, false), + + .call => cg.airCall(inst, .auto), + .call_always_tail => cg.airCall(inst, .always_tail), + .call_never_tail => cg.airCall(inst, .never_tail), + .call_never_inline => cg.airCall(inst, .never_inline), + + .is_err => cg.airIsErr(inst, .i32_ne, .value), + .is_non_err => cg.airIsErr(inst, .i32_eq, .value), + .is_err_ptr => cg.airIsErr(inst, .i32_ne, .ptr), + .is_non_err_ptr => cg.airIsErr(inst, .i32_eq, .ptr), + + .is_null => cg.airIsNull(inst, .i32_eq, .value), + .is_non_null => cg.airIsNull(inst, .i32_ne, .value), + .is_null_ptr => cg.airIsNull(inst, .i32_eq, .ptr), + .is_non_null_ptr => cg.airIsNull(inst, .i32_ne, .ptr), + + .load => cg.airLoad(inst), + .loop => cg.airLoop(inst), + .memset => cg.airMemset(inst, false), + .memset_safe => cg.airMemset(inst, true), + .not => cg.airNot(inst), + .optional_payload => cg.airOptionalPayload(inst), + .optional_payload_ptr => cg.airOptionalPayloadPtr(inst), + .optional_payload_ptr_set => cg.airOptionalPayloadPtrSet(inst), + .ptr_add => cg.airPtrBinOp(inst, .add), + .ptr_sub => cg.airPtrBinOp(inst, .sub), + .ptr_elem_ptr => cg.airPtrElemPtr(inst), + .ptr_elem_val => cg.airPtrElemVal(inst), + .ret => cg.airRet(inst), + .ret_safe => cg.airRet(inst), // TODO + .ret_ptr => cg.airRetPtr(inst), + .ret_load => cg.airRetLoad(inst), + .splat => cg.airSplat(inst), + .select => cg.airSelect(inst), + .shuffle_one => cg.airShuffleOne(inst), + .shuffle_two => cg.airShuffleTwo(inst), + .reduce => cg.airReduce(inst), + .aggregate_init => cg.airAggregateInit(inst), + .union_init => cg.airUnionInit(inst), + .prefetch => cg.airPrefetch(inst), + .popcount => cg.airPopcount(inst), + .byte_swap => cg.airByteSwap(inst), + .bit_reverse => cg.airBitReverse(inst), + + .slice => cg.airSlice(inst), + .slice_len => cg.airSliceLen(inst), + .slice_elem_val => cg.airSliceElemVal(inst), + .slice_elem_ptr => cg.airSliceElemPtr(inst), + .slice_ptr => cg.airSlicePtr(inst), + .ptr_slice_len_ptr => cg.airPtrSliceFieldPtr(inst, cg.ptrSize()), + .ptr_slice_ptr_ptr => cg.airPtrSliceFieldPtr(inst, 0), + .store => cg.airStore(inst, false), + .store_safe => cg.airStore(inst, true), + + .set_union_tag => cg.airSetUnionTag(inst), + .struct_field_ptr => cg.airStructFieldPtr(inst), + .struct_field_ptr_index_0 => cg.airStructFieldPtrIndex(inst, 0), + .struct_field_ptr_index_1 => cg.airStructFieldPtrIndex(inst, 1), + .struct_field_ptr_index_2 => cg.airStructFieldPtrIndex(inst, 2), + .struct_field_ptr_index_3 => cg.airStructFieldPtrIndex(inst, 3), + .struct_field_val => cg.airStructFieldVal(inst), + .field_parent_ptr => cg.airFieldParentPtr(inst), + + .switch_br => cg.airSwitchBr(inst, false), + .loop_switch_br => cg.airSwitchBr(inst, true), + .trunc => cg.airTrunc(inst), + .unreach => cg.airUnreachable(inst), + + .wrap_optional => cg.airWrapOptional(inst), + .unwrap_errunion_payload => cg.airUnwrapErrUnionPayload(inst, false), + .unwrap_errunion_payload_ptr => cg.airUnwrapErrUnionPayload(inst, true), + .unwrap_errunion_err => cg.airUnwrapErrUnionError(inst, false), + .unwrap_errunion_err_ptr => cg.airUnwrapErrUnionError(inst, true), + .wrap_errunion_payload => cg.airWrapErrUnionPayload(inst), + .wrap_errunion_err => cg.airWrapErrUnionErr(inst), + .errunion_payload_ptr_set => cg.airErrUnionPayloadPtrSet(inst), + .error_name => cg.airErrorName(inst), + + .wasm_memory_size => cg.airWasmMemorySize(inst), + .wasm_memory_grow => cg.airWasmMemoryGrow(inst), + + .memcpy, .memmove => cg.airMemcpy(inst), + + .ret_addr => cg.airRetAddr(inst), + .tag_name => cg.airTagName(inst), + + .error_set_has_value => cg.airErrorSetHasValue(inst), + .frame_addr => cg.airFrameAddress(inst), + + .runtime_nav_ptr => cg.airRuntimeNavPtr(inst), + + .assembly, + + .err_return_trace, + .set_err_return_trace, + .save_err_return_trace_index, + .is_named_enum_value, + .addrspace_cast, + .vector_store_elem, + .c_va_arg, + .c_va_copy, + .c_va_end, + .c_va_start, + => |tag| return cg.fail("TODO: Implement wasm inst: {s}", .{@tagName(tag)}), + + .atomic_load => cg.airAtomicLoad(inst), + .atomic_store_unordered, + .atomic_store_monotonic, + .atomic_store_release, + .atomic_store_seq_cst, + // in WebAssembly, all atomic instructions are sequentially ordered. + => cg.airAtomicStore(inst), + .atomic_rmw => cg.airAtomicRmw(inst), + .cmpxchg_weak => cg.airCmpxchg(inst), + .cmpxchg_strong => cg.airCmpxchg(inst), + + .add_optimized, + .sub_optimized, + .mul_optimized, + .div_float_optimized, + .div_trunc_optimized, + .div_floor_optimized, + .div_exact_optimized, + .rem_optimized, + .mod_optimized, + .neg_optimized, + .cmp_lt_optimized, + .cmp_lte_optimized, + .cmp_eq_optimized, + .cmp_gte_optimized, + .cmp_gt_optimized, + .cmp_neq_optimized, + .cmp_vector_optimized, + .reduce_optimized, + .int_from_float_optimized, + => return cg.fail("TODO implement optimized float mode", .{}), + + .add_safe, + .sub_safe, + .mul_safe, + .intcast_safe, + .int_from_float_safe, + .int_from_float_optimized_safe, + => return cg.fail("TODO implement safety_checked_instructions", .{}), + + .work_item_id, + .work_group_size, + .work_group_id, + => unreachable, + }; +} + +fn genBody(cg: *CodeGen, body: []const Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ip = &zcu.intern_pool; + + for (body) |inst| { + if (cg.liveness.isUnused(inst) and !cg.air.mustLower(inst, ip)) { + continue; + } + const old_bookkeeping_value = cg.air_bookkeeping; + try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, Air.Liveness.bpi); + try cg.genInst(inst); + + if (std.debug.runtime_safety and cg.air_bookkeeping < old_bookkeeping_value + 1) { + std.debug.panic("Missing call to `finishAir` in AIR instruction %{d} ('{t}')", .{ + inst, + cg.air.instructions.items(.tag)[@intFromEnum(inst)], + }); + } + } +} + +fn airRet(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op; + const operand = try cg.resolveInst(un_op); + const fn_info = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?; + const ret_ty = Type.fromInterned(fn_info.return_type); + + // result must be stored in the stack and we return a pointer + // to the stack instead + if (cg.return_value != .none) { + try cg.store(cg.return_value, operand, ret_ty, 0); + } else if (fn_info.cc == .wasm_mvp and ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + switch (abi.classifyType(ret_ty, zcu)) { + .direct => |scalar_type| { + assert(!abi.lowerAsDoubleI64(scalar_type, zcu)); + if (!isByRef(ret_ty, zcu, cg.target)) { + try cg.emitWValue(operand); + } else { + _ = try cg.load(operand, scalar_type, 0); + } + }, + .indirect => unreachable, + } + } else { + if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu) and ret_ty.isError(zcu)) { + try cg.addImm32(0); + } else { + try cg.emitWValue(operand); + } + } + try cg.restoreStackPointer(); + try cg.addTag(.@"return"); + + return cg.finishAir(inst, .none, &.{un_op}); +} + +fn airRetPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const child_type = cg.typeOfIndex(inst).childType(zcu); + + const result = result: { + if (!child_type.isFnOrHasRuntimeBitsIgnoreComptime(zcu)) { + break :result try cg.allocStack(Type.usize); // create pointer to void + } + + const fn_info = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?; + if (firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, cg.target)) { + break :result cg.return_value; + } + + break :result try cg.allocStackPtr(inst); + }; + + return cg.finishAir(inst, result, &.{}); +} + +fn airRetLoad(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op; + const operand = try cg.resolveInst(un_op); + const ret_ty = cg.typeOf(un_op).childType(zcu); + + const fn_info = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?; + if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + if (ret_ty.isError(zcu)) { + try cg.addImm32(0); + } + } else if (!firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, cg.target)) { + // leave on the stack + _ = try cg.load(operand, ret_ty, 0); + } + + try cg.restoreStackPointer(); + try cg.addTag(.@"return"); + return cg.finishAir(inst, .none, &.{un_op}); +} + +fn airCall(cg: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) InnerError!void { + if (modifier == .always_tail) return cg.fail("TODO implement tail calls for wasm", .{}); + const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; + const extra = cg.air.extraData(Air.Call, pl_op.payload); + const args: []const Air.Inst.Ref = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.args_len]); + const ty = cg.typeOf(pl_op.operand); + + const pt = cg.pt; + const zcu = pt.zcu; + const ip = &zcu.intern_pool; + const fn_ty = switch (ty.zigTypeTag(zcu)) { + .@"fn" => ty, + .pointer => ty.childType(zcu), + else => unreachable, + }; + const ret_ty = fn_ty.fnReturnType(zcu); + const fn_info = zcu.typeToFunc(fn_ty).?; + const first_param_sret = firstParamSRet(fn_info.cc, Type.fromInterned(fn_info.return_type), zcu, cg.target); + + const callee: ?InternPool.Nav.Index = blk: { + const func_val = (try cg.air.value(pl_op.operand, pt)) orelse break :blk null; + + switch (ip.indexToKey(func_val.toIntern())) { + inline .func, .@"extern" => |x| break :blk x.owner_nav, + .ptr => |ptr| if (ptr.byte_offset == 0) switch (ptr.base_addr) { + .nav => |nav| break :blk nav, + else => {}, + }, + else => {}, + } + return cg.fail("unable to lower callee to a function index", .{}); + }; + + const sret: WValue = if (first_param_sret) blk: { + const sret_local = try cg.allocStack(ret_ty); + try cg.lowerToStack(sret_local); + break :blk sret_local; + } else .none; + + for (args) |arg| { + const arg_val = try cg.resolveInst(arg); + + const arg_ty = cg.typeOf(arg); + if (!arg_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; + + try cg.lowerArg(zcu.typeToFunc(fn_ty).?.cc, arg_ty, arg_val); + } + + if (callee) |nav_index| { + try cg.addInst(.{ .tag = .call_nav, .data = .{ .nav_index = nav_index } }); + } else { + // in this case we call a function pointer + // so load its value onto the stack + assert(ty.zigTypeTag(zcu) == .pointer); + const operand = try cg.resolveInst(pl_op.operand); + try cg.emitWValue(operand); + + try cg.mir_func_tys.put(cg.gpa, fn_ty.toIntern(), {}); + try cg.addInst(.{ + .tag = .call_indirect, + .data = .{ .ip_index = fn_ty.toIntern() }, + }); + } + + const result_value = result_value: { + if (!ret_ty.hasRuntimeBitsIgnoreComptime(zcu) and !ret_ty.isError(zcu)) { + break :result_value .none; + } else if (ret_ty.isNoReturn(zcu)) { + try cg.addTag(.@"unreachable"); + break :result_value .none; + } else if (first_param_sret) { + break :result_value sret; + } else if (zcu.typeToFunc(fn_ty).?.cc == .wasm_mvp) { + switch (abi.classifyType(ret_ty, zcu)) { + .direct => |scalar_type| { + assert(!abi.lowerAsDoubleI64(scalar_type, zcu)); + if (!isByRef(ret_ty, zcu, cg.target)) { + const result_local = try cg.allocLocal(ret_ty); + try cg.addLocal(.local_set, result_local.local.value); + break :result_value result_local; + } else { + const result_local = try cg.allocLocal(ret_ty); + try cg.addLocal(.local_set, result_local.local.value); + const result = try cg.allocStack(ret_ty); + try cg.store(result, result_local, scalar_type, 0); + break :result_value result; + } + }, + .indirect => unreachable, + } + } else { + const result_local = try cg.allocLocal(ret_ty); + try cg.addLocal(.local_set, result_local.local.value); + break :result_value result_local; + } + }; + + var bt = try cg.iterateBigTomb(inst, 1 + args.len); + bt.feed(pl_op.operand); + for (args) |arg| bt.feed(arg); + return bt.finishAir(result_value); +} + +fn airAlloc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const value = try cg.allocStackPtr(inst); + return cg.finishAir(inst, value, &.{}); +} + +fn airStore(cg: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + const ptr_ty = cg.typeOf(bin_op.lhs); + const ptr_info = ptr_ty.ptrInfo(zcu); + const ty = ptr_ty.childType(zcu); + + if (!safety and bin_op.rhs == .undef) { + return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); + } + + if (ptr_info.packed_offset.host_size == 0) { + try cg.store(lhs, rhs, ty, 0); + } else { + // at this point we have a non-natural alignment, we must + // load the value, and then shift+or the rhs into the result location. + const host_size = ptr_info.packed_offset.host_size * 8; + const host_ty = try pt.intType(.unsigned, host_size); + const bit_size: u16 = @intCast(ty.bitSize(zcu)); + const bit_offset = ptr_info.packed_offset.bit_offset; + + const mask_val = try cg.resolveValue(val: { + const limbs = try cg.gpa.alloc( + std.math.big.Limb, + std.math.big.int.calcTwosCompLimbCount(host_size) + 1, + ); + defer cg.gpa.free(limbs); + + var mask_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined }; + mask_bigint.setTwosCompIntLimit(.max, .unsigned, host_size); + + if (bit_size != host_size) { + mask_bigint.shiftRight(mask_bigint.toConst(), host_size - bit_size); + } + if (bit_offset != 0) { + mask_bigint.shiftLeft(mask_bigint.toConst(), bit_offset); + } + mask_bigint.bitNotWrap(mask_bigint.toConst(), .unsigned, host_size); + + break :val try pt.intValue_big(host_ty, mask_bigint.toConst()); + }); + + const shift_val: WValue = if (33 <= host_size and host_size <= 64) + .{ .imm64 = bit_offset } + else + .{ .imm32 = bit_offset }; + + if (host_size <= 64) { + try cg.emitWValue(lhs); + } + const loaded = if (host_size <= 64) + try cg.load(lhs, host_ty, 0) + else + lhs; + const anded = try cg.binOp(loaded, mask_val, host_ty, .@"and"); + const extended_value = try cg.intcast(rhs, ty, host_ty); + const shifted_value = if (bit_offset > 0) + try cg.binOp(extended_value, shift_val, host_ty, .shl) + else + extended_value; + const result = try cg.binOp(anded, shifted_value, host_ty, .@"or"); + if (host_size <= 64) { + try cg.store(.stack, result, host_ty, lhs.offset()); + } else { + try cg.store(lhs, result, host_ty, lhs.offset()); + } + } + + return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn store(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerError!void { + assert(!(lhs != .stack and rhs == .stack)); + const pt = cg.pt; + const zcu = pt.zcu; + const abi_size = ty.abiSize(zcu); + + if (!ty.hasRuntimeBitsIgnoreComptime(zcu)) return; + + switch (ty.zigTypeTag(zcu)) { + .error_union => { + const pl_ty = ty.errorUnionPayload(zcu); + if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + return cg.store(lhs, rhs, Type.anyerror, offset); + } + + const len = @as(u32, @intCast(abi_size)); + assert(offset == 0); + return cg.memcpy(lhs, rhs, .{ .imm32 = len }); + }, + .optional => { + if (ty.isPtrLikeOptional(zcu)) { + return cg.store(lhs, rhs, Type.usize, offset); + } + const pl_ty = ty.optionalChild(zcu); + if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + return cg.store(lhs, rhs, Type.u8, offset); + } + if (pl_ty.zigTypeTag(zcu) == .error_set) { + return cg.store(lhs, rhs, Type.anyerror, offset); + } + + const len = @as(u32, @intCast(abi_size)); + assert(offset == 0); + return cg.memcpy(lhs, rhs, .{ .imm32 = len }); + }, + .@"struct", .array, .@"union" => if (isByRef(ty, zcu, cg.target)) { + const len = @as(u32, @intCast(abi_size)); + assert(offset == 0); + return cg.memcpy(lhs, rhs, .{ .imm32 = len }); + }, + .vector => switch (determineSimdStoreStrategy(ty, zcu, cg.target)) { + .unrolled => { + const len: u32 = @intCast(abi_size); + return cg.memcpy(lhs, rhs, .{ .imm32 = len }); + }, + .direct => { + try cg.emitWValue(lhs); + try cg.lowerToStack(rhs); + // TODO: Add helper functions for simd opcodes + const extra_index: u32 = @intCast(cg.mir_extra.items.len); + // stores as := opcode, offset, alignment (opcode::memarg) + try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{ + @intFromEnum(std.wasm.SimdOpcode.v128_store), + offset + lhs.offset(), + @intCast(ty.abiAlignment(zcu).toByteUnits() orelse 0), + }); + return cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); + }, + }, + .pointer => { + if (ty.isSlice(zcu)) { + assert(offset == 0); + // store pointer first + // lower it to the stack so we do not have to store rhs into a local first + try cg.emitWValue(lhs); + const ptr_local = try cg.load(rhs, Type.usize, 0); + try cg.store(.stack, ptr_local, Type.usize, 0 + lhs.offset()); + + // retrieve length from rhs, and store that alongside lhs as well + try cg.emitWValue(lhs); + const len_local = try cg.load(rhs, Type.usize, cg.ptrSize()); + try cg.store(.stack, len_local, Type.usize, cg.ptrSize() + lhs.offset()); + return; + } + }, + .int, .@"enum", .float => if (abi_size > 8 and abi_size <= 16) { + assert(offset == 0); + try cg.emitWValue(lhs); + const lsb = try cg.load(rhs, Type.u64, 0); + try cg.store(.stack, lsb, Type.u64, 0 + lhs.offset()); + + try cg.emitWValue(lhs); + const msb = try cg.load(rhs, Type.u64, 8); + try cg.store(.stack, msb, Type.u64, 8 + lhs.offset()); + return; + } else if (abi_size > 16) { + assert(offset == 0); + try cg.memcpy(lhs, rhs, .{ .imm32 = @as(u32, @intCast(ty.abiSize(zcu))) }); + }, + else => if (abi_size > 8) { + return cg.fail("TODO: `store` for type `{f}` with abisize `{d}`", .{ ty.fmt(pt), abi_size }); + }, + } + try cg.emitWValue(lhs); + // In this case we're actually interested in storing the stack position + // into lhs, so we calculate that and emit that instead + try cg.lowerToStack(rhs); + + const valtype = typeToValtype(ty, zcu, cg.target); + const opcode = buildOpcode(.{ + .valtype1 = valtype, + .width = @as(u8, @intCast(abi_size * 8)), + .op = .store, + }); + + // store rhs value at stack pointer's location in memory + try cg.addMemArg( + Mir.Inst.Tag.fromOpcode(opcode), + .{ + .offset = offset + lhs.offset(), + .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?), + }, + ); +} + +fn airLoad(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const operand = try cg.resolveInst(ty_op.operand); + const ty = ty_op.ty.toType(); + const ptr_ty = cg.typeOf(ty_op.operand); + const ptr_info = ptr_ty.ptrInfo(zcu); + + if (!ty.hasRuntimeBitsIgnoreComptime(zcu)) return cg.finishAir(inst, .none, &.{ty_op.operand}); + + const result = result: { + if (isByRef(ty, zcu, cg.target)) { + const new_local = try cg.allocStack(ty); + try cg.store(new_local, operand, ty, 0); + break :result new_local; + } + + if (ptr_info.packed_offset.host_size == 0) { + const loaded = try cg.load(operand, ty, 0); + const ty_size = ty.abiSize(zcu); + if (ty.isAbiInt(zcu) and ty_size * 8 > ty.bitSize(zcu)) { + const int_elem_ty = try pt.intType(.unsigned, @intCast(ty_size * 8)); + break :result try cg.trunc(loaded, ty, int_elem_ty); + } else { + break :result loaded; + } + } else { + const int_elem_ty = try pt.intType(.unsigned, ptr_info.packed_offset.host_size * 8); + const shift_val: WValue = if (ptr_info.packed_offset.host_size <= 4) + .{ .imm32 = ptr_info.packed_offset.bit_offset } + else if (ptr_info.packed_offset.host_size <= 8) + .{ .imm64 = ptr_info.packed_offset.bit_offset } + else + .{ .imm32 = ptr_info.packed_offset.bit_offset }; + + const stack_loaded = if (ptr_info.packed_offset.host_size <= 8) + try cg.load(operand, int_elem_ty, 0) + else + operand; + const shifted = try cg.binOp(stack_loaded, shift_val, int_elem_ty, .shr); + break :result try cg.trunc(shifted, ty, int_elem_ty); + } + }; + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +/// Loads an operand from the linear memory section. +/// NOTE: Leaves the value on the stack. +fn load(cg: *CodeGen, operand: WValue, ty: Type, offset: u32) InnerError!WValue { + const zcu = cg.pt.zcu; + // load local's value from memory by its stack position + try cg.emitWValue(operand); + + if (ty.zigTypeTag(zcu) == .vector) { + // TODO: Add helper functions for simd opcodes + const extra_index: u32 = @intCast(cg.mir_extra.items.len); + // stores as := opcode, offset, alignment (opcode::memarg) + try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{ + @intFromEnum(std.wasm.SimdOpcode.v128_load), + offset + operand.offset(), + @intCast(ty.abiAlignment(zcu).toByteUnits().?), + }); + try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); + return .stack; + } + + const abi_size: u8 = @intCast(ty.abiSize(zcu)); + const opcode = buildOpcode(.{ + .valtype1 = typeToValtype(ty, zcu, cg.target), + .width = abi_size * 8, + .op = .load, + .signedness = if (ty.isSignedInt(zcu)) .signed else .unsigned, + }); + + try cg.addMemArg( + Mir.Inst.Tag.fromOpcode(opcode), + .{ + .offset = offset + operand.offset(), + .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?), + }, + ); + + return .stack; +} + +fn airArg(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const arg_index = cg.arg_index; + const arg = cg.args[arg_index]; + const cc = zcu.typeToFunc(zcu.navValue(cg.owner_nav).typeOf(zcu)).?.cc; + const arg_ty = cg.typeOfIndex(inst); + if (cc == .wasm_mvp) { + switch (abi.classifyType(arg_ty, zcu)) { + .direct => |scalar_ty| if (!abi.lowerAsDoubleI64(scalar_ty, zcu)) { + cg.arg_index += 1; + } else { + cg.arg_index += 2; + const result = try cg.allocStack(arg_ty); + try cg.store(result, arg, Type.u64, 0); + try cg.store(result, cg.args[arg_index + 1], Type.u64, 8); + return cg.finishAir(inst, result, &.{}); + }, + .indirect => cg.arg_index += 1, + } + } else { + cg.arg_index += 1; + } + + return cg.finishAir(inst, arg, &.{}); +} + +fn airBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void { + const zcu = cg.pt.zcu; + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + const lhs_ty = cg.typeOf(bin_op.lhs); + const rhs_ty = cg.typeOf(bin_op.rhs); + + // For certain operations, such as shifting, the types are different. + // When converting this to a WebAssembly type, they *must* match to perform + // an operation. For this reason we verify if the WebAssembly type is different, in which + // case we first coerce the operands to the same type before performing the operation. + // For big integers we can ignore this as we will call into compiler-rt which handles this. + const result = switch (op) { + .shr, .shl => result: { + if (lhs_ty.isVector(zcu) and !rhs_ty.isVector(zcu)) { + return cg.fail("TODO: implement vector '{s}' with scalar rhs", .{@tagName(op)}); + } + + const lhs_wasm_bits = toWasmBits(@intCast(lhs_ty.bitSize(zcu))) orelse { + return cg.fail("TODO: implement '{s}' for types larger than 128 bits", .{@tagName(op)}); + }; + const rhs_wasm_bits = toWasmBits(@intCast(rhs_ty.bitSize(zcu))).?; + const new_rhs = if (lhs_wasm_bits != rhs_wasm_bits and lhs_wasm_bits != 128) + try (try cg.intcast(rhs, rhs_ty, lhs_ty)).toLocal(cg, lhs_ty) + else + rhs; + break :result try cg.binOp(lhs, new_rhs, lhs_ty, op); + }, + else => try cg.binOp(lhs, rhs, lhs_ty, op), + }; + + return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); +} + +/// Performs a binary operation on the given `WValue`'s +/// NOTE: THis leaves the value on top of the stack. +fn binOp(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue { + const pt = cg.pt; + const zcu = pt.zcu; + assert(!(lhs != .stack and rhs == .stack)); + + if (ty.isAnyFloat()) { + const float_op = FloatOp.fromOp(op); + return cg.floatOp(float_op, ty, &.{ lhs, rhs }); + } + + if (isByRef(ty, zcu, cg.target)) { + if (ty.zigTypeTag(zcu) == .int) { + return cg.binOpBigInt(lhs, rhs, ty, op); + } else { + return cg.fail("TODO: Implement binary operation for type: {f}", .{ty.fmt(pt)}); + } + } + + const opcode: std.wasm.Opcode = buildOpcode(.{ + .op = op, + .valtype1 = typeToValtype(ty, zcu, cg.target), + .signedness = if (ty.isSignedInt(zcu)) .signed else .unsigned, + }); + try cg.emitWValue(lhs); + try cg.emitWValue(rhs); + + try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode)); + + return .stack; +} + +fn binOpBigInt(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue { + const zcu = cg.pt.zcu; + const int_info = ty.intInfo(zcu); + if (int_info.bits > 128) { + return cg.fail("TODO: Implement binary operation for big integers larger than 128 bits", .{}); + } + + switch (op) { + .mul => return cg.callIntrinsic(.__multi3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }), + .div => switch (int_info.signedness) { + .signed => return cg.callIntrinsic(.__divti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }), + .unsigned => return cg.callIntrinsic(.__udivti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }), + }, + .rem => switch (int_info.signedness) { + .signed => return cg.callIntrinsic(.__modti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }), + .unsigned => return cg.callIntrinsic(.__umodti3, &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }), + }, + .shr => switch (int_info.signedness) { + .signed => return cg.callIntrinsic(.__ashrti3, &.{ ty.toIntern(), .i32_type }, ty, &.{ lhs, rhs }), + .unsigned => return cg.callIntrinsic(.__lshrti3, &.{ ty.toIntern(), .i32_type }, ty, &.{ lhs, rhs }), + }, + .shl => return cg.callIntrinsic(.__ashlti3, &.{ ty.toIntern(), .i32_type }, ty, &.{ lhs, rhs }), + .@"and", .@"or", .xor => { + const result = try cg.allocStack(ty); + try cg.emitWValue(result); + const lhs_lsb = try cg.load(lhs, Type.u64, 0); + const rhs_lsb = try cg.load(rhs, Type.u64, 0); + const op_lsb = try cg.binOp(lhs_lsb, rhs_lsb, Type.u64, op); + try cg.store(.stack, op_lsb, Type.u64, result.offset()); + + try cg.emitWValue(result); + const lhs_msb = try cg.load(lhs, Type.u64, 8); + const rhs_msb = try cg.load(rhs, Type.u64, 8); + const op_msb = try cg.binOp(lhs_msb, rhs_msb, Type.u64, op); + try cg.store(.stack, op_msb, Type.u64, result.offset() + 8); + return result; + }, + .add, .sub => { + const result = try cg.allocStack(ty); + var lhs_lsb = try (try cg.load(lhs, Type.u64, 0)).toLocal(cg, Type.u64); + defer lhs_lsb.free(cg); + var rhs_lsb = try (try cg.load(rhs, Type.u64, 0)).toLocal(cg, Type.u64); + defer rhs_lsb.free(cg); + var op_lsb = try (try cg.binOp(lhs_lsb, rhs_lsb, Type.u64, op)).toLocal(cg, Type.u64); + defer op_lsb.free(cg); + + const lhs_msb = try cg.load(lhs, Type.u64, 8); + const rhs_msb = try cg.load(rhs, Type.u64, 8); + const op_msb = try cg.binOp(lhs_msb, rhs_msb, Type.u64, op); + + const lt = if (op == .add) blk: { + break :blk try cg.cmp(op_lsb, rhs_lsb, Type.u64, .lt); + } else if (op == .sub) blk: { + break :blk try cg.cmp(lhs_lsb, rhs_lsb, Type.u64, .lt); + } else unreachable; + const tmp = try cg.intcast(lt, Type.u32, Type.u64); + var tmp_op = try (try cg.binOp(op_msb, tmp, Type.u64, op)).toLocal(cg, Type.u64); + defer tmp_op.free(cg); + + try cg.store(result, op_lsb, Type.u64, 0); + try cg.store(result, tmp_op, Type.u64, 8); + return result; + }, + else => return cg.fail("TODO: Implement binary operation for big integers: '{s}'", .{@tagName(op)}), + } +} + +const FloatOp = enum { + add, + ceil, + cos, + div, + exp, + exp2, + fabs, + floor, + fma, + fmax, + fmin, + fmod, + log, + log10, + log2, + mul, + neg, + round, + sin, + sqrt, + sub, + tan, + trunc, + + pub fn fromOp(op: Op) FloatOp { + return switch (op) { + .add => .add, + .ceil => .ceil, + .div => .div, + .abs => .fabs, + .floor => .floor, + .max => .fmax, + .min => .fmin, + .mul => .mul, + .neg => .neg, + .nearest => .round, + .sqrt => .sqrt, + .sub => .sub, + .trunc => .trunc, + .rem => .fmod, + else => unreachable, + }; + } + + pub fn toOp(float_op: FloatOp) ?Op { + return switch (float_op) { + .add => .add, + .ceil => .ceil, + .div => .div, + .fabs => .abs, + .floor => .floor, + .fmax => .max, + .fmin => .min, + .mul => .mul, + .neg => .neg, + .round => .nearest, + .sqrt => .sqrt, + .sub => .sub, + .trunc => .trunc, + + .cos, + .exp, + .exp2, + .fma, + .fmod, + .log, + .log10, + .log2, + .sin, + .tan, + => null, + }; + } + + fn intrinsic(op: FloatOp, bits: u16) Mir.Intrinsic { + return switch (op) { + inline .add, .sub, .div, .mul => |ct_op| switch (bits) { + inline 16, 80, 128 => |ct_bits| @field( + Mir.Intrinsic, + "__" ++ @tagName(ct_op) ++ compilerRtFloatAbbrev(ct_bits) ++ "f3", + ), + else => unreachable, + }, + + inline .ceil, + .fabs, + .floor, + .fmax, + .fmin, + .round, + .sqrt, + .trunc, + => |ct_op| switch (bits) { + inline 16, 80, 128 => |ct_bits| @field( + Mir.Intrinsic, + libcFloatPrefix(ct_bits) ++ @tagName(ct_op) ++ libcFloatSuffix(ct_bits), + ), + else => unreachable, + }, + + inline .cos, + .exp, + .exp2, + .fma, + .fmod, + .log, + .log10, + .log2, + .sin, + .tan, + => |ct_op| switch (bits) { + inline 16, 32, 64, 80, 128 => |ct_bits| @field( + Mir.Intrinsic, + libcFloatPrefix(ct_bits) ++ @tagName(ct_op) ++ libcFloatSuffix(ct_bits), + ), + else => unreachable, + }, + + .neg => unreachable, + }; + } +}; + +fn airAbs(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const operand = try cg.resolveInst(ty_op.operand); + const ty = cg.typeOf(ty_op.operand); + const scalar_ty = ty.scalarType(zcu); + + switch (scalar_ty.zigTypeTag(zcu)) { + .int => if (ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO implement airAbs for {f}", .{ty.fmt(pt)}); + } else { + const int_bits = ty.intInfo(zcu).bits; + const wasm_bits = toWasmBits(int_bits) orelse { + return cg.fail("TODO: airAbs for signed integers larger than '{d}' bits", .{int_bits}); + }; + + switch (wasm_bits) { + 32 => { + try cg.emitWValue(operand); + + try cg.addImm32(31); + try cg.addTag(.i32_shr_s); + + var tmp = try cg.allocLocal(ty); + defer tmp.free(cg); + try cg.addLocal(.local_tee, tmp.local.value); + + try cg.emitWValue(operand); + try cg.addTag(.i32_xor); + try cg.emitWValue(tmp); + try cg.addTag(.i32_sub); + return cg.finishAir(inst, .stack, &.{ty_op.operand}); + }, + 64 => { + try cg.emitWValue(operand); + + try cg.addImm64(63); + try cg.addTag(.i64_shr_s); + + var tmp = try cg.allocLocal(ty); + defer tmp.free(cg); + try cg.addLocal(.local_tee, tmp.local.value); + + try cg.emitWValue(operand); + try cg.addTag(.i64_xor); + try cg.emitWValue(tmp); + try cg.addTag(.i64_sub); + return cg.finishAir(inst, .stack, &.{ty_op.operand}); + }, + 128 => { + const mask = try cg.allocStack(Type.u128); + try cg.emitWValue(mask); + try cg.emitWValue(mask); + + _ = try cg.load(operand, Type.u64, 8); + try cg.addImm64(63); + try cg.addTag(.i64_shr_s); + + var tmp = try cg.allocLocal(Type.u64); + defer tmp.free(cg); + try cg.addLocal(.local_tee, tmp.local.value); + try cg.store(.stack, .stack, Type.u64, mask.offset() + 0); + try cg.emitWValue(tmp); + try cg.store(.stack, .stack, Type.u64, mask.offset() + 8); + + const a = try cg.binOpBigInt(operand, mask, Type.u128, .xor); + const b = try cg.binOpBigInt(a, mask, Type.u128, .sub); + + return cg.finishAir(inst, b, &.{ty_op.operand}); + }, + else => unreachable, + } + }, + .float => { + const result = try cg.floatOp(.fabs, ty, &.{operand}); + return cg.finishAir(inst, result, &.{ty_op.operand}); + }, + else => unreachable, + } +} + +fn airUnaryFloatOp(cg: *CodeGen, inst: Air.Inst.Index, op: FloatOp) InnerError!void { + const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op; + const operand = try cg.resolveInst(un_op); + const ty = cg.typeOf(un_op); + + const result = try cg.floatOp(op, ty, &.{operand}); + return cg.finishAir(inst, result, &.{un_op}); +} + +fn floatOp(cg: *CodeGen, float_op: FloatOp, ty: Type, args: []const WValue) InnerError!WValue { + const zcu = cg.pt.zcu; + if (ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO: Implement floatOps for vectors", .{}); + } + + const float_bits = ty.floatBits(cg.target); + + if (float_op == .neg) { + return cg.floatNeg(ty, args[0]); + } + + if (float_bits == 32 or float_bits == 64) { + if (float_op.toOp()) |op| { + for (args) |operand| { + try cg.emitWValue(operand); + } + const opcode = buildOpcode(.{ .op = op, .valtype1 = typeToValtype(ty, zcu, cg.target) }); + try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode)); + return .stack; + } + } + + const intrinsic = float_op.intrinsic(float_bits); + + // fma requires three operands + var param_types_buffer: [3]InternPool.Index = .{ ty.ip_index, ty.ip_index, ty.ip_index }; + const param_types = param_types_buffer[0..args.len]; + return cg.callIntrinsic(intrinsic, param_types, ty, args); +} + +/// NOTE: The result value remains on top of the stack. +fn floatNeg(cg: *CodeGen, ty: Type, arg: WValue) InnerError!WValue { + const float_bits = ty.floatBits(cg.target); + switch (float_bits) { + 16 => { + try cg.emitWValue(arg); + try cg.addImm32(0x8000); + try cg.addTag(.i32_xor); + return .stack; + }, + 32, 64 => { + try cg.emitWValue(arg); + const val_type: std.wasm.Valtype = if (float_bits == 32) .f32 else .f64; + const opcode = buildOpcode(.{ .op = .neg, .valtype1 = val_type }); + try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode)); + return .stack; + }, + 80, 128 => { + const result = try cg.allocStack(ty); + try cg.emitWValue(result); + try cg.emitWValue(arg); + try cg.addMemArg(.i64_load, .{ .offset = 0 + arg.offset(), .alignment = 2 }); + try cg.addMemArg(.i64_store, .{ .offset = 0 + result.offset(), .alignment = 2 }); + + try cg.emitWValue(result); + try cg.emitWValue(arg); + try cg.addMemArg(.i64_load, .{ .offset = 8 + arg.offset(), .alignment = 2 }); + + if (float_bits == 80) { + try cg.addImm64(0x8000); + try cg.addTag(.i64_xor); + try cg.addMemArg(.i64_store16, .{ .offset = 8 + result.offset(), .alignment = 2 }); + } else { + try cg.addImm64(0x8000000000000000); + try cg.addTag(.i64_xor); + try cg.addMemArg(.i64_store, .{ .offset = 8 + result.offset(), .alignment = 2 }); + } + return result; + }, + else => unreachable, + } +} + +fn airWrapBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void { + const zcu = cg.pt.zcu; + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + const lhs_ty = cg.typeOf(bin_op.lhs); + const rhs_ty = cg.typeOf(bin_op.rhs); + + if (lhs_ty.isVector(zcu)) { + if ((op == .shr or op == .shl) and !rhs_ty.isVector(zcu)) { + return cg.fail("TODO: implement wrapping vector '{s}' with scalar rhs", .{@tagName(op)}); + } else { + return cg.fail("TODO: implement wrapping '{s}' for vectors", .{@tagName(op)}); + } + } + + // For certain operations, such as shifting, the types are different. + // When converting this to a WebAssembly type, they *must* match to perform + // an operation. For this reason we verify if the WebAssembly type is different, in which + // case we first coerce the operands to the same type before performing the operation. + // For big integers we can ignore this as we will call into compiler-rt which handles this. + const result = switch (op) { + .shr, .shl => result: { + const lhs_wasm_bits = toWasmBits(@intCast(lhs_ty.bitSize(zcu))) orelse { + return cg.fail("TODO: implement '{s}' for types larger than 128 bits", .{@tagName(op)}); + }; + const rhs_wasm_bits = toWasmBits(@intCast(rhs_ty.bitSize(zcu))).?; + const new_rhs = if (lhs_wasm_bits != rhs_wasm_bits and lhs_wasm_bits != 128) + try (try cg.intcast(rhs, rhs_ty, lhs_ty)).toLocal(cg, lhs_ty) + else + rhs; + break :result try cg.wrapBinOp(lhs, new_rhs, lhs_ty, op); + }, + else => try cg.wrapBinOp(lhs, rhs, lhs_ty, op), + }; + + return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); +} + +/// Performs a wrapping binary operation. +/// Asserts rhs is not a stack value when lhs also isn't. +/// NOTE: Leaves the result on the stack when its Type is <= 64 bits +fn wrapBinOp(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue { + const bin_local = try cg.binOp(lhs, rhs, ty, op); + return cg.wrapOperand(bin_local, ty); +} + +/// Wraps an operand based on a given type's bitsize. +/// Asserts `Type` is <= 128 bits. +/// NOTE: When the Type is <= 64 bits, leaves the value on top of the stack, if wrapping was needed. +fn wrapOperand(cg: *CodeGen, operand: WValue, ty: Type) InnerError!WValue { + const zcu = cg.pt.zcu; + assert(ty.abiSize(zcu) <= 16); + const int_bits: u16 = @intCast(ty.bitSize(zcu)); // TODO use ty.intInfo(zcu).bits + const wasm_bits = toWasmBits(int_bits) orelse { + return cg.fail("TODO: Implement wrapOperand for bitsize '{d}'", .{int_bits}); + }; + + if (wasm_bits == int_bits) return operand; + + switch (wasm_bits) { + 32 => { + try cg.emitWValue(operand); + if (ty.isSignedInt(zcu)) { + try cg.addImm32(32 - int_bits); + try cg.addTag(.i32_shl); + try cg.addImm32(32 - int_bits); + try cg.addTag(.i32_shr_s); + } else { + try cg.addImm32(~@as(u32, 0) >> @intCast(32 - int_bits)); + try cg.addTag(.i32_and); + } + return .stack; + }, + 64 => { + try cg.emitWValue(operand); + if (ty.isSignedInt(zcu)) { + try cg.addImm64(64 - int_bits); + try cg.addTag(.i64_shl); + try cg.addImm64(64 - int_bits); + try cg.addTag(.i64_shr_s); + } else { + try cg.addImm64(~@as(u64, 0) >> @intCast(64 - int_bits)); + try cg.addTag(.i64_and); + } + return .stack; + }, + 128 => { + assert(operand != .stack); + const result = try cg.allocStack(ty); + + try cg.emitWValue(result); + _ = try cg.load(operand, Type.u64, 0); + try cg.store(.stack, .stack, Type.u64, result.offset()); + + try cg.emitWValue(result); + _ = try cg.load(operand, Type.u64, 8); + if (ty.isSignedInt(zcu)) { + try cg.addImm64(128 - int_bits); + try cg.addTag(.i64_shl); + try cg.addImm64(128 - int_bits); + try cg.addTag(.i64_shr_s); + } else { + try cg.addImm64(~@as(u64, 0) >> @intCast(128 - int_bits)); + try cg.addTag(.i64_and); + } + try cg.store(.stack, .stack, Type.u64, result.offset() + 8); + + return result; + }, + else => unreachable, + } +} + +fn lowerPtr(cg: *CodeGen, ptr_val: InternPool.Index, prev_offset: u64) InnerError!WValue { + const pt = cg.pt; + const zcu = pt.zcu; + const ptr = zcu.intern_pool.indexToKey(ptr_val).ptr; + const offset: u64 = prev_offset + ptr.byte_offset; + return switch (ptr.base_addr) { + .nav => |nav| return .{ .nav_ref = .{ .nav_index = nav, .offset = @intCast(offset) } }, + .uav => |uav| return .{ .uav_ref = .{ .ip_index = uav.val, .offset = @intCast(offset), .orig_ptr_ty = uav.orig_ty } }, + .int => return cg.lowerConstant(try pt.intValue(Type.usize, offset), Type.usize), + .eu_payload => |eu_ptr| try cg.lowerPtr( + eu_ptr, + offset + codegen.errUnionPayloadOffset( + Value.fromInterned(eu_ptr).typeOf(zcu).childType(zcu), + zcu, + ), + ), + .opt_payload => |opt_ptr| return cg.lowerPtr(opt_ptr, offset), + .field => |field| { + const base_ptr = Value.fromInterned(field.base); + const base_ty = base_ptr.typeOf(zcu).childType(zcu); + const field_off: u64 = switch (base_ty.zigTypeTag(zcu)) { + .pointer => off: { + assert(base_ty.isSlice(zcu)); + break :off switch (field.index) { + Value.slice_ptr_index => 0, + Value.slice_len_index => @divExact(cg.target.ptrBitWidth(), 8), + else => unreachable, + }; + }, + .@"struct" => switch (base_ty.containerLayout(zcu)) { + .auto => base_ty.structFieldOffset(@intCast(field.index), zcu), + .@"extern", .@"packed" => unreachable, + }, + .@"union" => switch (base_ty.containerLayout(zcu)) { + .auto => base_ty.structFieldOffset(@intCast(field.index), zcu), + .@"extern", .@"packed" => unreachable, + }, + else => unreachable, + }; + return cg.lowerPtr(field.base, offset + field_off); + }, + .arr_elem, .comptime_field, .comptime_alloc => unreachable, + }; +} + +/// Asserts that `isByRef` returns `false` for `ty`. +fn lowerConstant(cg: *CodeGen, val: Value, ty: Type) InnerError!WValue { + const pt = cg.pt; + const zcu = pt.zcu; + assert(!isByRef(ty, zcu, cg.target)); + const ip = &zcu.intern_pool; + if (val.isUndef(zcu)) return cg.emitUndefined(ty); + + switch (ip.indexToKey(val.ip_index)) { + .int_type, + .ptr_type, + .array_type, + .vector_type, + .opt_type, + .anyframe_type, + .error_union_type, + .simple_type, + .struct_type, + .tuple_type, + .union_type, + .opaque_type, + .enum_type, + .func_type, + .error_set_type, + .inferred_error_set_type, + => unreachable, // types, not values + + .undef => unreachable, // handled above + .simple_value => |simple_value| switch (simple_value) { + .undefined, + .void, + .null, + .empty_tuple, + .@"unreachable", + => unreachable, // non-runtime values + .false, .true => return .{ .imm32 = switch (simple_value) { + .false => 0, + .true => 1, + else => unreachable, + } }, + }, + .variable, + .@"extern", + .func, + .enum_literal, + .empty_enum_value, + => unreachable, // non-runtime values + .int => { + const int_info = ty.intInfo(zcu); + switch (int_info.signedness) { + .signed => switch (int_info.bits) { + 0...32 => return .{ .imm32 = @bitCast(@as(i32, @intCast(val.toSignedInt(zcu)))) }, + 33...64 => return .{ .imm64 = @bitCast(val.toSignedInt(zcu)) }, + else => unreachable, + }, + .unsigned => switch (int_info.bits) { + 0...32 => return .{ .imm32 = @intCast(val.toUnsignedInt(zcu)) }, + 33...64 => return .{ .imm64 = val.toUnsignedInt(zcu) }, + else => unreachable, + }, + } + }, + .err => |err| { + const int = try pt.getErrorValue(err.name); + return .{ .imm32 = int }; + }, + .error_union => |error_union| { + const err_int_ty = try pt.errorIntType(); + const err_ty, const err_val = switch (error_union.val) { + .err_name => |err_name| .{ + ty.errorUnionSet(zcu), + Value.fromInterned(try pt.intern(.{ .err = .{ + .ty = ty.errorUnionSet(zcu).toIntern(), + .name = err_name, + } })), + }, + .payload => .{ + err_int_ty, + try pt.intValue(err_int_ty, 0), + }, + }; + const payload_type = ty.errorUnionPayload(zcu); + if (!payload_type.hasRuntimeBitsIgnoreComptime(zcu)) { + // We use the error type directly as the type. + return cg.lowerConstant(err_val, err_ty); + } + + return cg.fail("Wasm TODO: lowerConstant error union with non-zero-bit payload type", .{}); + }, + .enum_tag => |enum_tag| { + const int_tag_ty = ip.typeOf(enum_tag.int); + return cg.lowerConstant(Value.fromInterned(enum_tag.int), Type.fromInterned(int_tag_ty)); + }, + .float => |float| switch (float.storage) { + .f16 => |f16_val| return .{ .imm32 = @as(u16, @bitCast(f16_val)) }, + .f32 => |f32_val| return .{ .float32 = f32_val }, + .f64 => |f64_val| return .{ .float64 = f64_val }, + else => unreachable, + }, + .slice => unreachable, // isByRef == true + .ptr => return cg.lowerPtr(val.toIntern(), 0), + .opt => if (ty.optionalReprIsPayload(zcu)) { + const pl_ty = ty.optionalChild(zcu); + if (val.optionalValue(zcu)) |payload| { + return cg.lowerConstant(payload, pl_ty); + } else { + return .{ .imm32 = 0 }; + } + } else { + return .{ .imm32 = @intFromBool(!val.isNull(zcu)) }; + }, + .aggregate => switch (ip.indexToKey(ty.ip_index)) { + .array_type => return cg.fail("Wasm TODO: LowerConstant for {f}", .{ty.fmt(pt)}), + .vector_type => { + assert(determineSimdStoreStrategy(ty, zcu, cg.target) == .direct); + var buf: [16]u8 = undefined; + val.writeToMemory(pt, &buf) catch unreachable; + return cg.storeSimdImmd(buf); + }, + .struct_type => { + const struct_type = ip.loadStructType(ty.toIntern()); + // non-packed structs are not handled in this function because they + // are by-ref types. + assert(struct_type.layout == .@"packed"); + var buf: [8]u8 = .{0} ** 8; // zero the buffer so we do not read 0xaa as integer + val.writeToPackedMemory(ty, pt, &buf, 0) catch unreachable; + const backing_int_ty = Type.fromInterned(struct_type.backingIntTypeUnordered(ip)); + const int_val = try pt.intValue( + backing_int_ty, + mem.readInt(u64, &buf, .little), + ); + return cg.lowerConstant(int_val, backing_int_ty); + }, + else => unreachable, + }, + .un => { + const int_type = try pt.intType(.unsigned, @intCast(ty.bitSize(zcu))); + + var buf: [8]u8 = .{0} ** 8; // zero the buffer so we do not read 0xaa as integer + val.writeToPackedMemory(ty, pt, &buf, 0) catch unreachable; + const int_val = try pt.intValue( + int_type, + mem.readInt(u64, &buf, .little), + ); + return cg.lowerConstant(int_val, int_type); + }, + .memoized_call => unreachable, + } +} + +/// Stores the value as a 128bit-immediate value by storing it inside +/// the list and returning the index into this list as `WValue`. +fn storeSimdImmd(cg: *CodeGen, value: [16]u8) !WValue { + const index = @as(u32, @intCast(cg.simd_immediates.items.len)); + try cg.simd_immediates.append(cg.gpa, value); + return .{ .imm128 = index }; +} + +fn emitUndefined(cg: *CodeGen, ty: Type) InnerError!WValue { + const zcu = cg.pt.zcu; + const ip = &zcu.intern_pool; + switch (ty.zigTypeTag(zcu)) { + .bool, .error_set => return .{ .imm32 = 0xaaaaaaaa }, + .int, .@"enum" => switch (ty.intInfo(zcu).bits) { + 0...32 => return .{ .imm32 = 0xaaaaaaaa }, + 33...64 => return .{ .imm64 = 0xaaaaaaaaaaaaaaaa }, + else => unreachable, + }, + .float => switch (ty.floatBits(cg.target)) { + 16 => return .{ .imm32 = 0xaaaaaaaa }, + 32 => return .{ .float32 = @as(f32, @bitCast(@as(u32, 0xaaaaaaaa))) }, + 64 => return .{ .float64 = @as(f64, @bitCast(@as(u64, 0xaaaaaaaaaaaaaaaa))) }, + else => unreachable, + }, + .pointer => switch (cg.ptr_size) { + .wasm32 => return .{ .imm32 = 0xaaaaaaaa }, + .wasm64 => return .{ .imm64 = 0xaaaaaaaaaaaaaaaa }, + }, + .optional => { + const pl_ty = ty.optionalChild(zcu); + if (ty.optionalReprIsPayload(zcu)) { + return cg.emitUndefined(pl_ty); + } + return .{ .imm32 = 0xaaaaaaaa }; + }, + .error_union => { + return .{ .imm32 = 0xaaaaaaaa }; + }, + .@"struct" => { + const packed_struct = zcu.typeToPackedStruct(ty).?; + return cg.emitUndefined(Type.fromInterned(packed_struct.backingIntTypeUnordered(ip))); + }, + .@"union" => switch (ty.containerLayout(zcu)) { + .@"packed" => switch (ty.bitSize(zcu)) { + 0...32 => return .{ .imm32 = 0xaaaaaaaa }, + 33...64 => return .{ .imm64 = 0xaaaaaaaaaaaaaaaa }, + else => unreachable, + }, + else => unreachable, + }, + else => return cg.fail("Wasm TODO: emitUndefined for type: {t}\n", .{ty.zigTypeTag(zcu)}), + } +} + +fn airBlock(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.Block, ty_pl.payload); + try cg.lowerBlock(inst, ty_pl.ty.toType(), @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len])); +} + +fn lowerBlock(cg: *CodeGen, inst: Air.Inst.Index, block_ty: Type, body: []const Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + // if wasm_block_ty is non-empty, we create a register to store the temporary value + const block_result: WValue = if (block_ty.hasRuntimeBitsIgnoreComptime(zcu)) + try cg.allocLocal(block_ty) + else + .none; + + try cg.startBlock(.block, .empty); + // Here we set the current block idx, so breaks know the depth to jump + // to when breaking out. + try cg.blocks.putNoClobber(cg.gpa, inst, .{ + .label = cg.block_depth, + .value = block_result, + }); + + try cg.genBody(body); + try cg.endBlock(); + + const liveness = cg.liveness.getBlock(inst); + try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths.len); + + return cg.finishAir(inst, block_result, &.{}); +} + +/// appends a new wasm block to the code section and increases the `block_depth` by 1 +fn startBlock(cg: *CodeGen, block_tag: std.wasm.Opcode, block_type: std.wasm.BlockType) !void { + cg.block_depth += 1; + try cg.addInst(.{ + .tag = Mir.Inst.Tag.fromOpcode(block_tag), + .data = .{ .block_type = block_type }, + }); +} + +/// Ends the current wasm block and decreases the `block_depth` by 1 +fn endBlock(cg: *CodeGen) !void { + try cg.addTag(.end); + cg.block_depth -= 1; +} + +fn airLoop(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const loop = cg.air.extraData(Air.Block, ty_pl.payload); + const body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[loop.end..][0..loop.data.body_len]); + + // result type of loop is always 'noreturn', meaning we can always + // emit the wasm type 'block_empty'. + try cg.startBlock(.loop, .empty); + + try cg.loops.putNoClobber(cg.gpa, inst, cg.block_depth); + defer assert(cg.loops.remove(inst)); + + try cg.genBody(body); + try cg.endBlock(); + + return cg.finishAir(inst, .none, &.{}); +} + +fn airCondBr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; + const condition = try cg.resolveInst(pl_op.operand); + const extra = cg.air.extraData(Air.CondBr, pl_op.payload); + const then_body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.then_body_len]); + const else_body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end + then_body.len ..][0..extra.data.else_body_len]); + const liveness_condbr = cg.liveness.getCondBr(inst); + + // result type is always noreturn, so use `block_empty` as type. + try cg.startBlock(.block, .empty); + // emit the conditional value + try cg.emitWValue(condition); + + // we inserted the block in front of the condition + // so now check if condition matches. If not, break outside this block + // and continue with the then codepath + try cg.addLabel(.br_if, 0); + + try cg.branches.ensureUnusedCapacity(cg.gpa, 2); + { + cg.branches.appendAssumeCapacity(.{}); + try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, @as(u32, @intCast(liveness_condbr.else_deaths.len))); + defer { + var else_stack = cg.branches.pop().?; + else_stack.deinit(cg.gpa); + } + try cg.genBody(else_body); + try cg.endBlock(); + } + + // Outer block that matches the condition + { + cg.branches.appendAssumeCapacity(.{}); + try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, @as(u32, @intCast(liveness_condbr.then_deaths.len))); + defer { + var then_stack = cg.branches.pop().?; + then_stack.deinit(cg.gpa); + } + try cg.genBody(then_body); + } + + return cg.finishAir(inst, .none, &.{}); +} + +fn airCmp(cg: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) InnerError!void { + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + const operand_ty = cg.typeOf(bin_op.lhs); + const result = try cg.cmp(lhs, rhs, operand_ty, op); + return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); +} + +/// Compares two operands. +/// Asserts rhs is not a stack value when the lhs isn't a stack value either +/// NOTE: This leaves the result on top of the stack, rather than a new local. +fn cmp(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: std.math.CompareOperator) InnerError!WValue { + assert(!(lhs != .stack and rhs == .stack)); + const zcu = cg.pt.zcu; + if (ty.zigTypeTag(zcu) == .optional and !ty.optionalReprIsPayload(zcu)) { + const payload_ty = ty.optionalChild(zcu); + if (payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + // When we hit this case, we must check the value of optionals + // that are not pointers. This means first checking against non-null for + // both lhs and rhs, as well as checking the payload are matching of lhs and rhs + return cg.cmpOptionals(lhs, rhs, ty, op); + } + } else if (ty.isAnyFloat()) { + return cg.cmpFloat(ty, lhs, rhs, op); + } else if (isByRef(ty, zcu, cg.target)) { + return cg.cmpBigInt(lhs, rhs, ty, op); + } + + const signedness: std.builtin.Signedness = blk: { + // by default we tell the operand type is unsigned (i.e. bools and enum values) + if (ty.zigTypeTag(zcu) != .int) break :blk .unsigned; + + // incase of an actual integer, we emit the correct signedness + break :blk ty.intInfo(zcu).signedness; + }; + + // ensure that when we compare pointers, we emit + // the true pointer of a stack value, rather than the stack pointer. + try cg.lowerToStack(lhs); + try cg.lowerToStack(rhs); + + const opcode: std.wasm.Opcode = buildOpcode(.{ + .valtype1 = typeToValtype(ty, zcu, cg.target), + .op = switch (op) { + .lt => .lt, + .lte => .le, + .eq => .eq, + .neq => .ne, + .gte => .ge, + .gt => .gt, + }, + .signedness = signedness, + }); + try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode)); + + return .stack; +} + +/// Compares two floats. +/// NOTE: Leaves the result of the comparison on top of the stack. +fn cmpFloat(cg: *CodeGen, ty: Type, lhs: WValue, rhs: WValue, cmp_op: std.math.CompareOperator) InnerError!WValue { + const float_bits = ty.floatBits(cg.target); + + const op: Op = switch (cmp_op) { + .lt => .lt, + .lte => .le, + .eq => .eq, + .neq => .ne, + .gte => .ge, + .gt => .gt, + }; + + switch (float_bits) { + 16 => { + _ = try cg.fpext(lhs, Type.f16, Type.f32); + _ = try cg.fpext(rhs, Type.f16, Type.f32); + const opcode = buildOpcode(.{ .op = op, .valtype1 = .f32 }); + try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode)); + return .stack; + }, + 32, 64 => { + try cg.emitWValue(lhs); + try cg.emitWValue(rhs); + const val_type: std.wasm.Valtype = if (float_bits == 32) .f32 else .f64; + const opcode = buildOpcode(.{ .op = op, .valtype1 = val_type }); + try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode)); + return .stack; + }, + 80, 128 => { + const intrinsic = floatCmpIntrinsic(cmp_op, float_bits); + const result = try cg.callIntrinsic(intrinsic, &.{ ty.ip_index, ty.ip_index }, Type.bool, &.{ lhs, rhs }); + return cg.cmp(result, .{ .imm32 = 0 }, Type.i32, cmp_op); + }, + else => unreachable, + } +} + +fn airCmpVector(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + _ = inst; + return cg.fail("TODO implement airCmpVector for wasm", .{}); +} + +fn airCmpLtErrorsLen(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op; + const operand = try cg.resolveInst(un_op); + + try cg.emitWValue(operand); + const pt = cg.pt; + const err_int_ty = try pt.errorIntType(); + try cg.addTag(.errors_len); + const result = try cg.cmp(.stack, .stack, err_int_ty, .lt); + + return cg.finishAir(inst, result, &.{un_op}); +} + +fn airBr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const br = cg.air.instructions.items(.data)[@intFromEnum(inst)].br; + const block = cg.blocks.get(br.block_inst).?; + + // if operand has codegen bits we should break with a value + if (block.value != .none) { + const operand = try cg.resolveInst(br.operand); + try cg.lowerToStack(operand); + try cg.addLocal(.local_set, block.value.local.value); + } + + // We map every block to its block index. + // We then determine how far we have to jump to it by subtracting it from current block depth + const idx: u32 = cg.block_depth - block.label; + try cg.addLabel(.br, idx); + + return cg.finishAir(inst, .none, &.{br.operand}); +} + +fn airRepeat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const repeat = cg.air.instructions.items(.data)[@intFromEnum(inst)].repeat; + const loop_label = cg.loops.get(repeat.loop_inst).?; + + const idx: u32 = cg.block_depth - loop_label; + try cg.addLabel(.br, idx); + + return cg.finishAir(inst, .none, &.{}); +} + +fn airNot(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const operand_ty = cg.typeOf(ty_op.operand); + const pt = cg.pt; + const zcu = pt.zcu; + + const result = result: { + if (operand_ty.zigTypeTag(zcu) == .bool) { + try cg.emitWValue(operand); + try cg.addTag(.i32_eqz); + const not_tmp = try cg.allocLocal(operand_ty); + try cg.addLocal(.local_set, not_tmp.local.value); + break :result not_tmp; + } else { + const int_info = operand_ty.intInfo(zcu); + const wasm_bits = toWasmBits(int_info.bits) orelse { + return cg.fail("TODO: Implement binary NOT for {f}", .{operand_ty.fmt(pt)}); + }; + + switch (wasm_bits) { + 32 => { + try cg.emitWValue(operand); + try cg.addImm32(switch (int_info.signedness) { + .unsigned => ~@as(u32, 0) >> @intCast(32 - int_info.bits), + .signed => ~@as(u32, 0), + }); + try cg.addTag(.i32_xor); + break :result .stack; + }, + 64 => { + try cg.emitWValue(operand); + try cg.addImm64(switch (int_info.signedness) { + .unsigned => ~@as(u64, 0) >> @intCast(64 - int_info.bits), + .signed => ~@as(u64, 0), + }); + try cg.addTag(.i64_xor); + break :result .stack; + }, + 128 => { + const ptr = try cg.allocStack(operand_ty); + + try cg.emitWValue(ptr); + _ = try cg.load(operand, Type.u64, 0); + try cg.addImm64(~@as(u64, 0)); + try cg.addTag(.i64_xor); + try cg.store(.stack, .stack, Type.u64, ptr.offset()); + + try cg.emitWValue(ptr); + _ = try cg.load(operand, Type.u64, 8); + try cg.addImm64(switch (int_info.signedness) { + .unsigned => ~@as(u64, 0) >> @intCast(128 - int_info.bits), + .signed => ~@as(u64, 0), + }); + try cg.addTag(.i64_xor); + try cg.store(.stack, .stack, Type.u64, ptr.offset() + 8); + + break :result ptr; + }, + else => unreachable, + } + } + }; + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airTrap(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + try cg.addTag(.@"unreachable"); + return cg.finishAir(inst, .none, &.{}); +} + +fn airBreakpoint(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + // unsupported by wasm itfunc. Can be implemented once we support DWARF + // for wasm + try cg.addTag(.@"unreachable"); + return cg.finishAir(inst, .none, &.{}); +} + +fn airUnreachable(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + try cg.addTag(.@"unreachable"); + return cg.finishAir(inst, .none, &.{}); +} + +fn airBitcast(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const operand = try cg.resolveInst(ty_op.operand); + const wanted_ty = cg.typeOfIndex(inst); + const given_ty = cg.typeOf(ty_op.operand); + + const bit_size = given_ty.bitSize(zcu); + const needs_wrapping = (given_ty.isSignedInt(zcu) != wanted_ty.isSignedInt(zcu)) and + bit_size != 32 and bit_size != 64 and bit_size != 128; + + const result = result: { + if (given_ty.isAnyFloat() or wanted_ty.isAnyFloat()) { + break :result try cg.bitcast(wanted_ty, given_ty, operand); + } + + if (isByRef(given_ty, zcu, cg.target) and !isByRef(wanted_ty, zcu, cg.target)) { + const loaded_memory = try cg.load(operand, wanted_ty, 0); + if (needs_wrapping) { + break :result try cg.wrapOperand(loaded_memory, wanted_ty); + } else { + break :result loaded_memory; + } + } + if (!isByRef(given_ty, zcu, cg.target) and isByRef(wanted_ty, zcu, cg.target)) { + const stack_memory = try cg.allocStack(wanted_ty); + try cg.store(stack_memory, operand, given_ty, 0); + if (needs_wrapping) { + break :result try cg.wrapOperand(stack_memory, wanted_ty); + } else { + break :result stack_memory; + } + } + + if (needs_wrapping) { + break :result try cg.wrapOperand(operand, wanted_ty); + } + + break :result switch (operand) { + // for stack offset, return a pointer to this offset. + .stack_offset => try cg.buildPointerOffset(operand, 0, .new), + else => cg.reuseOperand(ty_op.operand, operand), + }; + }; + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn bitcast(cg: *CodeGen, wanted_ty: Type, given_ty: Type, operand: WValue) InnerError!WValue { + const zcu = cg.pt.zcu; + // if we bitcast a float to or from an integer we must use the 'reinterpret' instruction + if (!(wanted_ty.isAnyFloat() or given_ty.isAnyFloat())) return operand; + if (wanted_ty.ip_index == .f16_type or given_ty.ip_index == .f16_type) return operand; + if (wanted_ty.bitSize(zcu) > 64) return operand; + assert((wanted_ty.isInt(zcu) and given_ty.isAnyFloat()) or (wanted_ty.isAnyFloat() and given_ty.isInt(zcu))); + + const opcode = buildOpcode(.{ + .op = .reinterpret, + .valtype1 = typeToValtype(wanted_ty, zcu, cg.target), + .valtype2 = typeToValtype(given_ty, zcu, cg.target), + }); + try cg.emitWValue(operand); + try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode)); + return .stack; +} + +fn airStructFieldPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.StructField, ty_pl.payload); + + const struct_ptr = try cg.resolveInst(extra.data.struct_operand); + const struct_ptr_ty = cg.typeOf(extra.data.struct_operand); + const struct_ty = struct_ptr_ty.childType(zcu); + const result = try cg.structFieldPtr(inst, extra.data.struct_operand, struct_ptr, struct_ptr_ty, struct_ty, extra.data.field_index); + return cg.finishAir(inst, result, &.{extra.data.struct_operand}); +} + +fn airStructFieldPtrIndex(cg: *CodeGen, inst: Air.Inst.Index, index: u32) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const struct_ptr = try cg.resolveInst(ty_op.operand); + const struct_ptr_ty = cg.typeOf(ty_op.operand); + const struct_ty = struct_ptr_ty.childType(zcu); + + const result = try cg.structFieldPtr(inst, ty_op.operand, struct_ptr, struct_ptr_ty, struct_ty, index); + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn structFieldPtr( + cg: *CodeGen, + inst: Air.Inst.Index, + ref: Air.Inst.Ref, + struct_ptr: WValue, + struct_ptr_ty: Type, + struct_ty: Type, + index: u32, +) InnerError!WValue { + const pt = cg.pt; + const zcu = pt.zcu; + const result_ty = cg.typeOfIndex(inst); + const struct_ptr_ty_info = struct_ptr_ty.ptrInfo(zcu); + + const offset = switch (struct_ty.containerLayout(zcu)) { + .@"packed" => switch (struct_ty.zigTypeTag(zcu)) { + .@"struct" => offset: { + if (result_ty.ptrInfo(zcu).packed_offset.host_size != 0) { + break :offset @as(u32, 0); + } + const struct_type = zcu.typeToStruct(struct_ty).?; + break :offset @divExact(zcu.structPackedFieldBitOffset(struct_type, index) + struct_ptr_ty_info.packed_offset.bit_offset, 8); + }, + .@"union" => 0, + else => unreachable, + }, + else => struct_ty.structFieldOffset(index, zcu), + }; + // save a load and store when we can simply reuse the operand + if (offset == 0) { + return cg.reuseOperand(ref, struct_ptr); + } + switch (struct_ptr) { + .stack_offset => |stack_offset| { + return .{ .stack_offset = .{ .value = stack_offset.value + @as(u32, @intCast(offset)), .references = 1 } }; + }, + else => return cg.buildPointerOffset(struct_ptr, offset, .new), + } +} + +fn airStructFieldVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const ip = &zcu.intern_pool; + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const struct_field = cg.air.extraData(Air.StructField, ty_pl.payload).data; + + const struct_ty = cg.typeOf(struct_field.struct_operand); + const operand = try cg.resolveInst(struct_field.struct_operand); + const field_index = struct_field.field_index; + const field_ty = struct_ty.fieldType(field_index, zcu); + if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) return cg.finishAir(inst, .none, &.{struct_field.struct_operand}); + + const result: WValue = switch (struct_ty.containerLayout(zcu)) { + .@"packed" => switch (struct_ty.zigTypeTag(zcu)) { + .@"struct" => result: { + const packed_struct = zcu.typeToPackedStruct(struct_ty).?; + const offset = zcu.structPackedFieldBitOffset(packed_struct, field_index); + const backing_ty = Type.fromInterned(packed_struct.backingIntTypeUnordered(ip)); + const host_bits = backing_ty.intInfo(zcu).bits; + + const const_wvalue: WValue = if (33 <= host_bits and host_bits <= 64) + .{ .imm64 = offset } + else + .{ .imm32 = offset }; + + // for first field we don't require any shifting + const shifted_value = if (offset == 0) + operand + else + try cg.binOp(operand, const_wvalue, backing_ty, .shr); + + if (field_ty.zigTypeTag(zcu) == .float) { + const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu)))); + const truncated = try cg.trunc(shifted_value, int_type, backing_ty); + break :result try cg.bitcast(field_ty, int_type, truncated); + } else if (field_ty.isPtrAtRuntime(zcu) and packed_struct.field_types.len == 1) { + // In this case we do not have to perform any transformations, + // we can simply reuse the operand. + break :result cg.reuseOperand(struct_field.struct_operand, operand); + } else if (field_ty.isPtrAtRuntime(zcu)) { + const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu)))); + break :result try cg.trunc(shifted_value, int_type, backing_ty); + } + break :result try cg.trunc(shifted_value, field_ty, backing_ty); + }, + .@"union" => result: { + if (isByRef(struct_ty, zcu, cg.target)) { + if (!isByRef(field_ty, zcu, cg.target)) { + break :result try cg.load(operand, field_ty, 0); + } else { + const new_stack_val = try cg.allocStack(field_ty); + try cg.store(new_stack_val, operand, field_ty, 0); + break :result new_stack_val; + } + } + + const union_int_type = try pt.intType(.unsigned, @as(u16, @intCast(struct_ty.bitSize(zcu)))); + if (field_ty.zigTypeTag(zcu) == .float) { + const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu)))); + const truncated = try cg.trunc(operand, int_type, union_int_type); + break :result try cg.bitcast(field_ty, int_type, truncated); + } else if (field_ty.isPtrAtRuntime(zcu)) { + const int_type = try pt.intType(.unsigned, @as(u16, @intCast(field_ty.bitSize(zcu)))); + break :result try cg.trunc(operand, int_type, union_int_type); + } + break :result try cg.trunc(operand, field_ty, union_int_type); + }, + else => unreachable, + }, + else => result: { + const offset = std.math.cast(u32, struct_ty.structFieldOffset(field_index, zcu)) orelse { + return cg.fail("Field type '{f}' too big to fit into stack frame", .{field_ty.fmt(pt)}); + }; + if (isByRef(field_ty, zcu, cg.target)) { + switch (operand) { + .stack_offset => |stack_offset| { + break :result .{ .stack_offset = .{ .value = stack_offset.value + offset, .references = 1 } }; + }, + else => break :result try cg.buildPointerOffset(operand, offset, .new), + } + } + break :result try cg.load(operand, field_ty, offset); + }, + }; + + return cg.finishAir(inst, result, &.{struct_field.struct_operand}); +} + +fn airSwitchBr(cg: *CodeGen, inst: Air.Inst.Index, is_dispatch_loop: bool) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + + const switch_br = cg.air.unwrapSwitch(inst); + const target_ty = cg.typeOf(switch_br.operand); + + assert(target_ty.hasRuntimeBitsIgnoreComptime(zcu)); + + // swap target value with placeholder local, for dispatching + const target = if (is_dispatch_loop) target: { + const initial_target = try cg.resolveInst(switch_br.operand); + const target: WValue = try cg.allocLocal(target_ty); + try cg.lowerToStack(initial_target); + try cg.addLocal(.local_set, target.local.value); + + try cg.startBlock(.loop, .empty); // dispatch loop start + try cg.blocks.putNoClobber(cg.gpa, inst, .{ + .label = cg.block_depth, + .value = target, + }); + + break :target target; + } else try cg.resolveInst(switch_br.operand); + + const liveness = try cg.liveness.getSwitchBr(cg.gpa, inst, switch_br.cases_len + 1); + defer cg.gpa.free(liveness.deaths); + + const has_else_body = switch_br.else_body_len != 0; + const branch_count = switch_br.cases_len + 1; // if else branch is missing, we trap when failing all conditions + try cg.branches.ensureUnusedCapacity(cg.gpa, switch_br.cases_len + @intFromBool(has_else_body)); + + if (switch_br.cases_len == 0) { + assert(has_else_body); + + var it = switch_br.iterateCases(); + const else_body = it.elseBody(); + + cg.branches.appendAssumeCapacity(.{}); + const else_deaths = liveness.deaths.len - 1; + try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths[else_deaths].len); + defer { + var else_branch = cg.branches.pop().?; + else_branch.deinit(cg.gpa); + } + try cg.genBody(else_body); + + if (is_dispatch_loop) { + try cg.endBlock(); // dispatch loop end + } + return cg.finishAir(inst, .none, &.{}); + } + + var min: ?Value = null; + var max: ?Value = null; + var branching_size: u32 = 0; // single item +1, range +2 + + { + var cases_it = switch_br.iterateCases(); + while (cases_it.next()) |case| { + for (case.items) |item| { + const val = Value.fromInterned(item.toInterned().?); + if (min == null or val.compareHetero(.lt, min.?, zcu)) min = val; + if (max == null or val.compareHetero(.gt, max.?, zcu)) max = val; + branching_size += 1; + } + for (case.ranges) |range| { + const low = Value.fromInterned(range[0].toInterned().?); + if (min == null or low.compareHetero(.lt, min.?, zcu)) min = low; + const high = Value.fromInterned(range[1].toInterned().?); + if (max == null or high.compareHetero(.gt, max.?, zcu)) max = high; + branching_size += 2; + } + } + } + + var min_space: Value.BigIntSpace = undefined; + const min_bigint = min.?.toBigInt(&min_space, zcu); + var max_space: Value.BigIntSpace = undefined; + const max_bigint = max.?.toBigInt(&max_space, zcu); + const limbs = try cg.gpa.alloc( + std.math.big.Limb, + @max(min_bigint.limbs.len, max_bigint.limbs.len) + 1, + ); + defer cg.gpa.free(limbs); + + const width_maybe: ?u32 = width: { + var width_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined }; + width_bigint.sub(max_bigint, min_bigint); + width_bigint.addScalar(width_bigint.toConst(), 1); + break :width width_bigint.toConst().toInt(u32) catch null; + }; + + try cg.startBlock(.block, .empty); // whole switch block start + + for (0..branch_count) |_| { + try cg.startBlock(.block, .empty); + } + + // Heuristic on deciding when to use .br_table instead of .br_if jump table + // 1. Differences between lowest and highest values should fit into u32 + // 2. .br_table should be applied for "dense" switch, we test it by checking .br_if jumps will need more instructions + // 3. Do not use .br_table for tiny switches + const use_br_table = cond: { + const width = width_maybe orelse break :cond false; + if (width > 2 * branching_size) break :cond false; + if (width < 2 or branch_count < 2) break :cond false; + break :cond true; + }; + + if (use_br_table) { + const width = width_maybe.?; + + const br_value_original = try cg.binOp(target, try cg.resolveValue(min.?), target_ty, .sub); + _ = try cg.intcast(br_value_original, target_ty, Type.u32); + + const jump_table: Mir.JumpTable = .{ .length = width + 1 }; + const table_extra_index = try cg.addExtra(jump_table); + try cg.addInst(.{ .tag = .br_table, .data = .{ .payload = table_extra_index } }); + + const branch_list = try cg.mir_extra.addManyAsSlice(cg.gpa, width + 1); + @memset(branch_list, branch_count - 1); + + var cases_it = switch_br.iterateCases(); + while (cases_it.next()) |case| { + for (case.items) |item| { + const val = Value.fromInterned(item.toInterned().?); + var val_space: Value.BigIntSpace = undefined; + const val_bigint = val.toBigInt(&val_space, zcu); + var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined }; + index_bigint.sub(val_bigint, min_bigint); + branch_list[index_bigint.toConst().toInt(u32) catch unreachable] = case.idx; + } + for (case.ranges) |range| { + var low_space: Value.BigIntSpace = undefined; + const low_bigint = Value.fromInterned(range[0].toInterned().?).toBigInt(&low_space, zcu); + var high_space: Value.BigIntSpace = undefined; + const high_bigint = Value.fromInterned(range[1].toInterned().?).toBigInt(&high_space, zcu); + var index_bigint: std.math.big.int.Mutable = .{ .limbs = limbs, .positive = undefined, .len = undefined }; + index_bigint.sub(low_bigint, min_bigint); + const start = index_bigint.toConst().toInt(u32) catch unreachable; + index_bigint.sub(high_bigint, min_bigint); + const end = (index_bigint.toConst().toInt(u32) catch unreachable) + 1; + @memset(branch_list[start..end], case.idx); + } + } + } else { + var cases_it = switch_br.iterateCases(); + while (cases_it.next()) |case| { + for (case.items) |ref| { + const val = try cg.resolveInst(ref); + _ = try cg.cmp(target, val, target_ty, .eq); + try cg.addLabel(.br_if, case.idx); // item match found + } + for (case.ranges) |range| { + const low = try cg.resolveInst(range[0]); + const high = try cg.resolveInst(range[1]); + + const gte = try cg.cmp(target, low, target_ty, .gte); + const lte = try cg.cmp(target, high, target_ty, .lte); + _ = try cg.binOp(gte, lte, Type.bool, .@"and"); + try cg.addLabel(.br_if, case.idx); // range match found + } + } + try cg.addLabel(.br, branch_count - 1); + } + + var cases_it = switch_br.iterateCases(); + while (cases_it.next()) |case| { + try cg.endBlock(); + + cg.branches.appendAssumeCapacity(.{}); + try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths[case.idx].len); + defer { + var case_branch = cg.branches.pop().?; + case_branch.deinit(cg.gpa); + } + try cg.genBody(case.body); + + try cg.addLabel(.br, branch_count - case.idx - 1); // matching case found and executed => exit switch + } + + try cg.endBlock(); + if (has_else_body) { + const else_body = cases_it.elseBody(); + + cg.branches.appendAssumeCapacity(.{}); + const else_deaths = liveness.deaths.len - 1; + try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.deaths[else_deaths].len); + defer { + var else_branch = cg.branches.pop().?; + else_branch.deinit(cg.gpa); + } + try cg.genBody(else_body); + } else { + try cg.addTag(.@"unreachable"); + } + + try cg.endBlock(); // whole switch block end + + if (is_dispatch_loop) { + try cg.endBlock(); // dispatch loop end + } + + return cg.finishAir(inst, .none, &.{}); +} + +fn airSwitchDispatch(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const br = cg.air.instructions.items(.data)[@intFromEnum(inst)].br; + const switch_loop = cg.blocks.get(br.block_inst).?; + + const operand = try cg.resolveInst(br.operand); + try cg.lowerToStack(operand); + try cg.addLocal(.local_set, switch_loop.value.local.value); + + const idx: u32 = cg.block_depth - switch_loop.label; + try cg.addLabel(.br, idx); + + return cg.finishAir(inst, .none, &.{br.operand}); +} + +fn airIsErr(cg: *CodeGen, inst: Air.Inst.Index, opcode: std.wasm.Opcode, op_kind: enum { value, ptr }) InnerError!void { + const zcu = cg.pt.zcu; + const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op; + const operand = try cg.resolveInst(un_op); + const err_union_ty = cg.typeOf(un_op); + const pl_ty = err_union_ty.errorUnionPayload(zcu); + + const result: WValue = result: { + if (err_union_ty.errorUnionSet(zcu).errorSetIsEmpty(zcu)) { + switch (opcode) { + .i32_ne => break :result .{ .imm32 = 0 }, + .i32_eq => break :result .{ .imm32 = 1 }, + else => unreachable, + } + } + + try cg.emitWValue(operand); + if (op_kind == .ptr or pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + try cg.addMemArg(.i32_load16_u, .{ + .offset = operand.offset() + @as(u32, @intCast(errUnionErrorOffset(pl_ty, zcu))), + .alignment = @intCast(Type.anyerror.abiAlignment(zcu).toByteUnits().?), + }); + } + + // Compare the error value with '0' + try cg.addImm32(0); + try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode)); + break :result .stack; + }; + return cg.finishAir(inst, result, &.{un_op}); +} + +/// E!T -> T op_is_ptr == false +/// *(E!T) -> *T op_is_prt == true +fn airUnwrapErrUnionPayload(cg: *CodeGen, inst: Air.Inst.Index, op_is_ptr: bool) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const op_ty = cg.typeOf(ty_op.operand); + const eu_ty = if (op_is_ptr) op_ty.childType(zcu) else op_ty; + const payload_ty = eu_ty.errorUnionPayload(zcu); + + const result: WValue = result: { + if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + if (op_is_ptr) { + break :result cg.reuseOperand(ty_op.operand, operand); + } else { + break :result .none; + } + } + + const pl_offset: u32 = @intCast(errUnionPayloadOffset(payload_ty, zcu)); + if (op_is_ptr or isByRef(payload_ty, zcu, cg.target)) { + break :result try cg.buildPointerOffset(operand, pl_offset, .new); + } else { + assert(isByRef(eu_ty, zcu, cg.target)); + break :result try cg.load(operand, payload_ty, pl_offset); + } + }; + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +/// E!T -> E op_is_ptr == false +/// *(E!T) -> E op_is_prt == true +/// NOTE: op_is_ptr will not change return type +fn airUnwrapErrUnionError(cg: *CodeGen, inst: Air.Inst.Index, op_is_ptr: bool) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const op_ty = cg.typeOf(ty_op.operand); + const eu_ty = if (op_is_ptr) op_ty.childType(zcu) else op_ty; + const payload_ty = eu_ty.errorUnionPayload(zcu); + + const result: WValue = result: { + if (eu_ty.errorUnionSet(zcu).errorSetIsEmpty(zcu)) { + break :result .{ .imm32 = 0 }; + } + + const err_offset: u32 = @intCast(errUnionErrorOffset(payload_ty, zcu)); + if (op_is_ptr or isByRef(eu_ty, zcu, cg.target)) { + break :result try cg.load(operand, Type.anyerror, err_offset); + } else { + assert(!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)); + break :result cg.reuseOperand(ty_op.operand, operand); + } + }; + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airWrapErrUnionPayload(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const err_ty = cg.typeOfIndex(inst); + + const pl_ty = cg.typeOf(ty_op.operand); + const result = result: { + if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + break :result cg.reuseOperand(ty_op.operand, operand); + } + + const err_union = try cg.allocStack(err_ty); + const payload_ptr = try cg.buildPointerOffset(err_union, @as(u32, @intCast(errUnionPayloadOffset(pl_ty, zcu))), .new); + try cg.store(payload_ptr, operand, pl_ty, 0); + + // ensure we also write '0' to the error part, so any present stack value gets overwritten by it. + try cg.emitWValue(err_union); + try cg.addImm32(0); + const err_val_offset: u32 = @intCast(errUnionErrorOffset(pl_ty, zcu)); + try cg.addMemArg(.i32_store16, .{ + .offset = err_union.offset() + err_val_offset, + .alignment = 2, + }); + break :result err_union; + }; + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airWrapErrUnionErr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const err_ty = ty_op.ty.toType(); + const pl_ty = err_ty.errorUnionPayload(zcu); + + const result = result: { + if (!pl_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + break :result cg.reuseOperand(ty_op.operand, operand); + } + + const err_union = try cg.allocStack(err_ty); + // store error value + try cg.store(err_union, operand, Type.anyerror, @intCast(errUnionErrorOffset(pl_ty, zcu))); + + // write 'undefined' to the payload + const payload_ptr = try cg.buildPointerOffset(err_union, @as(u32, @intCast(errUnionPayloadOffset(pl_ty, zcu))), .new); + const len = @as(u32, @intCast(err_ty.errorUnionPayload(zcu).abiSize(zcu))); + try cg.memset(Type.u8, payload_ptr, .{ .imm32 = len }, .{ .imm32 = 0xaa }); + + break :result err_union; + }; + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airIntcast(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const ty = ty_op.ty.toType(); + const operand = try cg.resolveInst(ty_op.operand); + const operand_ty = cg.typeOf(ty_op.operand); + const zcu = cg.pt.zcu; + if (ty.zigTypeTag(zcu) == .vector or operand_ty.zigTypeTag(zcu) == .vector) { + return cg.fail("todo Wasm intcast for vectors", .{}); + } + if (ty.abiSize(zcu) > 16 or operand_ty.abiSize(zcu) > 16) { + return cg.fail("todo Wasm intcast for bitsize > 128", .{}); + } + + const op_bits = toWasmBits(@intCast(operand_ty.bitSize(zcu))).?; + const wanted_bits = toWasmBits(@intCast(ty.bitSize(zcu))).?; + const result = if (op_bits == wanted_bits) + cg.reuseOperand(ty_op.operand, operand) + else + try cg.intcast(operand, operand_ty, ty); + + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +/// Upcasts or downcasts an integer based on the given and wanted types, +/// and stores the result in a new operand. +/// Asserts type's bitsize <= 128 +/// NOTE: May leave the result on the top of the stack. +fn intcast(cg: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError!WValue { + const zcu = cg.pt.zcu; + const given_bitsize = @as(u16, @intCast(given.bitSize(zcu))); + const wanted_bitsize = @as(u16, @intCast(wanted.bitSize(zcu))); + assert(given_bitsize <= 128); + assert(wanted_bitsize <= 128); + + const op_bits = toWasmBits(given_bitsize).?; + const wanted_bits = toWasmBits(wanted_bitsize).?; + if (op_bits == wanted_bits) { + return operand; + } + + if (op_bits == 64 and wanted_bits == 32) { + try cg.emitWValue(operand); + try cg.addTag(.i32_wrap_i64); + return .stack; + } else if (op_bits == 32 and wanted_bits == 64) { + try cg.emitWValue(operand); + try cg.addTag(if (wanted.isSignedInt(zcu)) .i64_extend_i32_s else .i64_extend_i32_u); + return .stack; + } else if (wanted_bits == 128) { + // for 128bit integers we store the integer in the virtual stack, rather than a local + const stack_ptr = try cg.allocStack(wanted); + try cg.emitWValue(stack_ptr); + + // for 32 bit integers, we first coerce the value into a 64 bit integer before storing it + // meaning less store operations are required. + const lhs = if (op_bits == 32) blk: { + const sign_ty = if (wanted.isSignedInt(zcu)) Type.i64 else Type.u64; + break :blk try (try cg.intcast(operand, given, sign_ty)).toLocal(cg, sign_ty); + } else operand; + + // store lsb first + try cg.store(.stack, lhs, Type.u64, 0 + stack_ptr.offset()); + + // For signed integers we shift lsb by 63 (64bit integer - 1 sign bit) and store remaining value + if (wanted.isSignedInt(zcu)) { + try cg.emitWValue(stack_ptr); + const shr = try cg.binOp(lhs, .{ .imm64 = 63 }, Type.i64, .shr); + try cg.store(.stack, shr, Type.u64, 8 + stack_ptr.offset()); + } else { + // Ensure memory of msb is zero'd + try cg.store(stack_ptr, .{ .imm64 = 0 }, Type.u64, 8); + } + return stack_ptr; + } else return cg.load(operand, wanted, 0); +} + +fn airIsNull(cg: *CodeGen, inst: Air.Inst.Index, opcode: std.wasm.Opcode, op_kind: enum { value, ptr }) InnerError!void { + const zcu = cg.pt.zcu; + const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op; + const operand = try cg.resolveInst(un_op); + + const op_ty = cg.typeOf(un_op); + const optional_ty = if (op_kind == .ptr) op_ty.childType(zcu) else op_ty; + const result = try cg.isNull(operand, optional_ty, opcode); + return cg.finishAir(inst, result, &.{un_op}); +} + +/// For a given type and operand, checks if it's considered `null`. +/// NOTE: Leaves the result on the stack +fn isNull(cg: *CodeGen, operand: WValue, optional_ty: Type, opcode: std.wasm.Opcode) InnerError!WValue { + const pt = cg.pt; + const zcu = pt.zcu; + try cg.emitWValue(operand); + const payload_ty = optional_ty.optionalChild(zcu); + if (!optional_ty.optionalReprIsPayload(zcu)) { + // When payload is zero-bits, we can treat operand as a value, rather than + // a pointer to the stack value + if (payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + const offset = std.math.cast(u32, payload_ty.abiSize(zcu)) orelse { + return cg.fail("Optional type {f} too big to fit into stack frame", .{optional_ty.fmt(pt)}); + }; + try cg.addMemArg(.i32_load8_u, .{ .offset = operand.offset() + offset, .alignment = 1 }); + } + } else if (payload_ty.isSlice(zcu)) { + switch (cg.ptr_size) { + .wasm32 => try cg.addMemArg(.i32_load, .{ .offset = operand.offset(), .alignment = 4 }), + .wasm64 => try cg.addMemArg(.i64_load, .{ .offset = operand.offset(), .alignment = 8 }), + } + } + + // Compare the null value with '0' + try cg.addImm32(0); + try cg.addTag(Mir.Inst.Tag.fromOpcode(opcode)); + + return .stack; +} + +fn airOptionalPayload(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const opt_ty = cg.typeOf(ty_op.operand); + const payload_ty = cg.typeOfIndex(inst); + if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + return cg.finishAir(inst, .none, &.{ty_op.operand}); + } + + const result = result: { + const operand = try cg.resolveInst(ty_op.operand); + if (opt_ty.optionalReprIsPayload(zcu)) break :result cg.reuseOperand(ty_op.operand, operand); + + if (isByRef(payload_ty, zcu, cg.target)) { + break :result try cg.buildPointerOffset(operand, 0, .new); + } + + break :result try cg.load(operand, payload_ty, 0); + }; + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airOptionalPayloadPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const operand = try cg.resolveInst(ty_op.operand); + const opt_ty = cg.typeOf(ty_op.operand).childType(zcu); + + const result = result: { + const payload_ty = opt_ty.optionalChild(zcu); + if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu) or opt_ty.optionalReprIsPayload(zcu)) { + break :result cg.reuseOperand(ty_op.operand, operand); + } + + break :result try cg.buildPointerOffset(operand, 0, .new); + }; + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airOptionalPayloadPtrSet(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const operand = try cg.resolveInst(ty_op.operand); + const opt_ty = cg.typeOf(ty_op.operand).childType(zcu); + const payload_ty = opt_ty.optionalChild(zcu); + + if (opt_ty.optionalReprIsPayload(zcu)) { + return cg.finishAir(inst, operand, &.{ty_op.operand}); + } + + const offset = std.math.cast(u32, payload_ty.abiSize(zcu)) orelse { + return cg.fail("Optional type {f} too big to fit into stack frame", .{opt_ty.fmt(pt)}); + }; + + try cg.emitWValue(operand); + try cg.addImm32(1); + try cg.addMemArg(.i32_store8, .{ .offset = operand.offset() + offset, .alignment = 1 }); + + const result = try cg.buildPointerOffset(operand, 0, .new); + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airWrapOptional(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const payload_ty = cg.typeOf(ty_op.operand); + const pt = cg.pt; + const zcu = pt.zcu; + + const result = result: { + if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + const non_null_bit = try cg.allocStack(Type.u1); + try cg.emitWValue(non_null_bit); + try cg.addImm32(1); + try cg.addMemArg(.i32_store8, .{ .offset = non_null_bit.offset(), .alignment = 1 }); + break :result non_null_bit; + } + + const operand = try cg.resolveInst(ty_op.operand); + const op_ty = cg.typeOfIndex(inst); + if (op_ty.optionalReprIsPayload(zcu)) { + break :result cg.reuseOperand(ty_op.operand, operand); + } + const offset = std.math.cast(u32, payload_ty.abiSize(zcu)) orelse { + return cg.fail("Optional type {f} too big to fit into stack frame", .{op_ty.fmt(pt)}); + }; + + // Create optional type, set the non-null bit, and store the operand inside the optional type + const result_ptr = try cg.allocStack(op_ty); + try cg.emitWValue(result_ptr); + try cg.addImm32(1); + try cg.addMemArg(.i32_store8, .{ .offset = result_ptr.offset() + offset, .alignment = 1 }); + + const payload_ptr = try cg.buildPointerOffset(result_ptr, 0, .new); + try cg.store(payload_ptr, operand, payload_ty, 0); + break :result result_ptr; + }; + + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airSlice(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; + + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + const slice_ty = cg.typeOfIndex(inst); + + const slice = try cg.allocStack(slice_ty); + try cg.store(slice, lhs, Type.usize, 0); + try cg.store(slice, rhs, Type.usize, cg.ptrSize()); + + return cg.finishAir(inst, slice, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airSliceLen(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + return cg.finishAir(inst, try cg.sliceLen(operand), &.{ty_op.operand}); +} + +fn airSliceElemVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const slice_ty = cg.typeOf(bin_op.lhs); + const slice = try cg.resolveInst(bin_op.lhs); + const index = try cg.resolveInst(bin_op.rhs); + const elem_ty = slice_ty.childType(zcu); + const elem_size = elem_ty.abiSize(zcu); + + // load pointer onto stack + _ = try cg.load(slice, Type.usize, 0); + + // calculate index into slice + try cg.emitWValue(index); + try cg.addImm32(@intCast(elem_size)); + try cg.addTag(.i32_mul); + try cg.addTag(.i32_add); + + const elem_result = if (isByRef(elem_ty, zcu, cg.target)) + .stack + else + try cg.load(.stack, elem_ty, 0); + + return cg.finishAir(inst, elem_result, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airSliceElemPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; + + const elem_ty = ty_pl.ty.toType().childType(zcu); + const elem_size = elem_ty.abiSize(zcu); + + const slice = try cg.resolveInst(bin_op.lhs); + const index = try cg.resolveInst(bin_op.rhs); + + _ = try cg.load(slice, Type.usize, 0); + + // calculate index into slice + try cg.emitWValue(index); + try cg.addImm32(@intCast(elem_size)); + try cg.addTag(.i32_mul); + try cg.addTag(.i32_add); + + return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airSlicePtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const operand = try cg.resolveInst(ty_op.operand); + return cg.finishAir(inst, try cg.slicePtr(operand), &.{ty_op.operand}); +} + +fn slicePtr(cg: *CodeGen, operand: WValue) InnerError!WValue { + const ptr = try cg.load(operand, Type.usize, 0); + return ptr.toLocal(cg, Type.usize); +} + +fn sliceLen(cg: *CodeGen, operand: WValue) InnerError!WValue { + const len = try cg.load(operand, Type.usize, cg.ptrSize()); + return len.toLocal(cg, Type.usize); +} + +fn airTrunc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const wanted_ty: Type = ty_op.ty.toType(); + const op_ty = cg.typeOf(ty_op.operand); + const zcu = cg.pt.zcu; + + if (wanted_ty.zigTypeTag(zcu) == .vector or op_ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO: trunc for vectors", .{}); + } + + const result = if (op_ty.bitSize(zcu) == wanted_ty.bitSize(zcu)) + cg.reuseOperand(ty_op.operand, operand) + else + try cg.trunc(operand, wanted_ty, op_ty); + + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +/// Truncates a given operand to a given type, discarding any overflown bits. +/// NOTE: Resulting value is left on the stack. +fn trunc(cg: *CodeGen, operand: WValue, wanted_ty: Type, given_ty: Type) InnerError!WValue { + const zcu = cg.pt.zcu; + const given_bits = @as(u16, @intCast(given_ty.bitSize(zcu))); + if (toWasmBits(given_bits) == null) { + return cg.fail("TODO: Implement wasm integer truncation for integer bitsize: {d}", .{given_bits}); + } + + var result = try cg.intcast(operand, given_ty, wanted_ty); + const wanted_bits = @as(u16, @intCast(wanted_ty.bitSize(zcu))); + const wasm_bits = toWasmBits(wanted_bits).?; + if (wasm_bits != wanted_bits) { + result = try cg.wrapOperand(result, wanted_ty); + } + return result; +} + +fn airArrayToSlice(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const array_ty = cg.typeOf(ty_op.operand).childType(zcu); + const slice_ty = ty_op.ty.toType(); + + // create a slice on the stack + const slice_local = try cg.allocStack(slice_ty); + + // store the array ptr in the slice + if (array_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + try cg.store(slice_local, operand, Type.usize, 0); + } + + // store the length of the array in the slice + const array_len: u32 = @intCast(array_ty.arrayLen(zcu)); + try cg.store(slice_local, .{ .imm32 = array_len }, Type.usize, cg.ptrSize()); + + return cg.finishAir(inst, slice_local, &.{ty_op.operand}); +} + +fn airPtrElemVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const ptr_ty = cg.typeOf(bin_op.lhs); + const ptr = try cg.resolveInst(bin_op.lhs); + const index = try cg.resolveInst(bin_op.rhs); + const elem_ty = ptr_ty.childType(zcu); + const elem_size = elem_ty.abiSize(zcu); + + // load pointer onto the stack + if (ptr_ty.isSlice(zcu)) { + _ = try cg.load(ptr, Type.usize, 0); + } else { + try cg.lowerToStack(ptr); + } + + // calculate index into slice + try cg.emitWValue(index); + try cg.addImm32(@intCast(elem_size)); + try cg.addTag(.i32_mul); + try cg.addTag(.i32_add); + + const elem_result = if (isByRef(elem_ty, zcu, cg.target)) + .stack + else + try cg.load(.stack, elem_ty, 0); + + return cg.finishAir(inst, elem_result, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airPtrElemPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; + + const ptr_ty = cg.typeOf(bin_op.lhs); + const elem_ty = ty_pl.ty.toType().childType(zcu); + const elem_size = elem_ty.abiSize(zcu); + + const ptr = try cg.resolveInst(bin_op.lhs); + const index = try cg.resolveInst(bin_op.rhs); + + // load pointer onto the stack + if (ptr_ty.isSlice(zcu)) { + _ = try cg.load(ptr, Type.usize, 0); + } else { + try cg.lowerToStack(ptr); + } + + // calculate index into ptr + try cg.emitWValue(index); + try cg.addImm32(@intCast(elem_size)); + try cg.addTag(.i32_mul); + try cg.addTag(.i32_add); + + return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airPtrBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void { + const zcu = cg.pt.zcu; + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const bin_op = cg.air.extraData(Air.Bin, ty_pl.payload).data; + + const ptr = try cg.resolveInst(bin_op.lhs); + const offset = try cg.resolveInst(bin_op.rhs); + const ptr_ty = cg.typeOf(bin_op.lhs); + const pointee_ty = switch (ptr_ty.ptrSize(zcu)) { + .one => ptr_ty.childType(zcu).childType(zcu), // ptr to array, so get array element type + else => ptr_ty.childType(zcu), + }; + + const valtype = typeToValtype(Type.usize, zcu, cg.target); + const mul_opcode = buildOpcode(.{ .valtype1 = valtype, .op = .mul }); + const bin_opcode = buildOpcode(.{ .valtype1 = valtype, .op = op }); + + try cg.lowerToStack(ptr); + try cg.emitWValue(offset); + try cg.addImm32(@intCast(pointee_ty.abiSize(zcu))); + try cg.addTag(Mir.Inst.Tag.fromOpcode(mul_opcode)); + try cg.addTag(Mir.Inst.Tag.fromOpcode(bin_opcode)); + + return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airMemset(cg: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void { + const zcu = cg.pt.zcu; + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const ptr = try cg.resolveInst(bin_op.lhs); + const ptr_ty = cg.typeOf(bin_op.lhs); + const value = try cg.resolveInst(bin_op.rhs); + const len = switch (ptr_ty.ptrSize(zcu)) { + .slice => try cg.sliceLen(ptr), + .one => @as(WValue, .{ .imm32 = @as(u32, @intCast(ptr_ty.childType(zcu).arrayLen(zcu))) }), + .c, .many => unreachable, + }; + + const elem_ty = if (ptr_ty.ptrSize(zcu) == .one) + ptr_ty.childType(zcu).childType(zcu) + else + ptr_ty.childType(zcu); + + if (!safety and bin_op.rhs == .undef) { + return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); + } + + const dst_ptr = try cg.sliceOrArrayPtr(ptr, ptr_ty); + try cg.memset(elem_ty, dst_ptr, len, value); + + return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); +} + +/// Sets a region of memory at `ptr` to the value of `value` +/// When the user has enabled the bulk_memory feature, we lower +/// this to wasm's memset instruction. When the feature is not present, +/// we implement it manually. +fn memset(cg: *CodeGen, elem_ty: Type, ptr: WValue, len: WValue, value: WValue) InnerError!void { + const zcu = cg.pt.zcu; + const abi_size = @as(u32, @intCast(elem_ty.abiSize(zcu))); + + // When bulk_memory is enabled, we lower it to wasm's memset instruction. + // If not, we lower it ourselves. + if (cg.target.cpu.has(.wasm, .bulk_memory) and abi_size == 1) { + const len0_ok = cg.target.cpu.has(.wasm, .nontrapping_bulk_memory_len0); + + if (!len0_ok) { + try cg.startBlock(.block, .empty); + + // Even if `len` is zero, the spec requires an implementation to trap if `ptr + len` is + // out of memory bounds. This can easily happen in Zig in a case such as: + // + // const ptr: [*]u8 = undefined; + // var len: usize = runtime_zero(); + // @memset(ptr[0..len], 42); + // + // So explicitly avoid using `memory.fill` in the `len == 0` case. Lovely design. + try cg.emitWValue(len); + try cg.addTag(.i32_eqz); + try cg.addLabel(.br_if, 0); + } + + try cg.lowerToStack(ptr); + try cg.emitWValue(value); + try cg.emitWValue(len); + try cg.addExtended(.memory_fill); + + if (!len0_ok) { + try cg.endBlock(); + } + + return; + } + + const final_len: WValue = switch (len) { + .imm32 => |val| .{ .imm32 = val * abi_size }, + .imm64 => |val| .{ .imm64 = val * abi_size }, + else => if (abi_size != 1) blk: { + const new_len = try cg.ensureAllocLocal(Type.usize); + try cg.emitWValue(len); + switch (cg.ptr_size) { + .wasm32 => { + try cg.emitWValue(.{ .imm32 = abi_size }); + try cg.addTag(.i32_mul); + }, + .wasm64 => { + try cg.emitWValue(.{ .imm64 = abi_size }); + try cg.addTag(.i64_mul); + }, + } + try cg.addLocal(.local_set, new_len.local.value); + break :blk new_len; + } else len, + }; + + var end_ptr = try cg.allocLocal(Type.usize); + defer end_ptr.free(cg); + var new_ptr = try cg.buildPointerOffset(ptr, 0, .new); + defer new_ptr.free(cg); + + // get the loop conditional: if current pointer address equals final pointer's address + try cg.lowerToStack(ptr); + try cg.emitWValue(final_len); + switch (cg.ptr_size) { + .wasm32 => try cg.addTag(.i32_add), + .wasm64 => try cg.addTag(.i64_add), + } + try cg.addLocal(.local_set, end_ptr.local.value); + + // outer block to jump to when loop is done + try cg.startBlock(.block, .empty); + try cg.startBlock(.loop, .empty); + + // check for condition for loop end + try cg.emitWValue(new_ptr); + try cg.emitWValue(end_ptr); + switch (cg.ptr_size) { + .wasm32 => try cg.addTag(.i32_eq), + .wasm64 => try cg.addTag(.i64_eq), + } + try cg.addLabel(.br_if, 1); // jump out of loop into outer block (finished) + + // store the value at the current position of the pointer + try cg.store(new_ptr, value, elem_ty, 0); + + // move the pointer to the next element + try cg.emitWValue(new_ptr); + switch (cg.ptr_size) { + .wasm32 => { + try cg.emitWValue(.{ .imm32 = abi_size }); + try cg.addTag(.i32_add); + }, + .wasm64 => { + try cg.emitWValue(.{ .imm64 = abi_size }); + try cg.addTag(.i64_add); + }, + } + try cg.addLocal(.local_set, new_ptr.local.value); + + // end of loop + try cg.addLabel(.br, 0); // jump to start of loop + try cg.endBlock(); + try cg.endBlock(); +} + +fn airArrayElemVal(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const array_ty = cg.typeOf(bin_op.lhs); + const array = try cg.resolveInst(bin_op.lhs); + const index = try cg.resolveInst(bin_op.rhs); + const elem_ty = array_ty.childType(zcu); + const elem_size = elem_ty.abiSize(zcu); + + if (isByRef(array_ty, zcu, cg.target)) { + try cg.lowerToStack(array); + try cg.emitWValue(index); + try cg.addImm32(@intCast(elem_size)); + try cg.addTag(.i32_mul); + try cg.addTag(.i32_add); + } else { + assert(array_ty.zigTypeTag(zcu) == .vector); + + switch (index) { + inline .imm32, .imm64 => |lane| { + const opcode: std.wasm.SimdOpcode = switch (elem_ty.bitSize(zcu)) { + 8 => if (elem_ty.isSignedInt(zcu)) .i8x16_extract_lane_s else .i8x16_extract_lane_u, + 16 => if (elem_ty.isSignedInt(zcu)) .i16x8_extract_lane_s else .i16x8_extract_lane_u, + 32 => if (elem_ty.isInt(zcu)) .i32x4_extract_lane else .f32x4_extract_lane, + 64 => if (elem_ty.isInt(zcu)) .i64x2_extract_lane else .f64x2_extract_lane, + else => unreachable, + }; + + var operands = [_]u32{ @intFromEnum(opcode), @as(u8, @intCast(lane)) }; + + try cg.emitWValue(array); + + const extra_index: u32 = @intCast(cg.mir_extra.items.len); + try cg.mir_extra.appendSlice(cg.gpa, &operands); + try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); + + return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs }); + }, + else => { + const stack_vec = try cg.allocStack(array_ty); + try cg.store(stack_vec, array, array_ty, 0); + + // Is a non-unrolled vector (v128) + try cg.lowerToStack(stack_vec); + try cg.emitWValue(index); + try cg.addImm32(@intCast(elem_size)); + try cg.addTag(.i32_mul); + try cg.addTag(.i32_add); + }, + } + } + + const elem_result = if (isByRef(elem_ty, zcu, cg.target)) + .stack + else + try cg.load(.stack, elem_ty, 0); + + return cg.finishAir(inst, elem_result, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airIntFromFloat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const op_ty = cg.typeOf(ty_op.operand); + const op_bits = op_ty.floatBits(cg.target); + + const dest_ty = cg.typeOfIndex(inst); + const dest_info = dest_ty.intInfo(zcu); + + if (dest_info.bits > 128) { + return cg.fail("TODO: intFromFloat for integers/floats with bitsize {}", .{dest_info.bits}); + } + + if ((op_bits != 32 and op_bits != 64) or dest_info.bits > 64) { + const dest_bitsize = if (dest_info.bits <= 32) 32 else std.math.ceilPowerOfTwoAssert(u16, dest_info.bits); + + const intrinsic = switch (dest_info.signedness) { + inline .signed, .unsigned => |ct_s| switch (op_bits) { + inline 16, 32, 64, 80, 128 => |ct_op_bits| switch (dest_bitsize) { + inline 32, 64, 128 => |ct_dest_bits| @field( + Mir.Intrinsic, + "__fix" ++ switch (ct_s) { + .signed => "", + .unsigned => "uns", + } ++ + compilerRtFloatAbbrev(ct_op_bits) ++ "f" ++ + compilerRtIntAbbrev(ct_dest_bits) ++ "i", + ), + else => unreachable, + }, + else => unreachable, + }, + }; + const result = try cg.callIntrinsic(intrinsic, &.{op_ty.ip_index}, dest_ty, &.{operand}); + return cg.finishAir(inst, result, &.{ty_op.operand}); + } + + try cg.emitWValue(operand); + const op = buildOpcode(.{ + .op = .trunc, + .valtype1 = typeToValtype(dest_ty, zcu, cg.target), + .valtype2 = typeToValtype(op_ty, zcu, cg.target), + .signedness = dest_info.signedness, + }); + try cg.addTag(Mir.Inst.Tag.fromOpcode(op)); + const result = try cg.wrapOperand(.stack, dest_ty); + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airFloatFromInt(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const op_ty = cg.typeOf(ty_op.operand); + const op_info = op_ty.intInfo(zcu); + + const dest_ty = cg.typeOfIndex(inst); + const dest_bits = dest_ty.floatBits(cg.target); + + if (op_info.bits > 128) { + return cg.fail("TODO: floatFromInt for integers/floats with bitsize {d} bits", .{op_info.bits}); + } + + if (op_info.bits > 64 or (dest_bits > 64 or dest_bits < 32)) { + const op_bitsize = if (op_info.bits <= 32) 32 else std.math.ceilPowerOfTwoAssert(u16, op_info.bits); + + const intrinsic = switch (op_info.signedness) { + inline .signed, .unsigned => |ct_s| switch (op_bitsize) { + inline 32, 64, 128 => |ct_int_bits| switch (dest_bits) { + inline 16, 32, 64, 80, 128 => |ct_float_bits| @field( + Mir.Intrinsic, + "__float" ++ switch (ct_s) { + .signed => "", + .unsigned => "un", + } ++ + compilerRtIntAbbrev(ct_int_bits) ++ "i" ++ + compilerRtFloatAbbrev(ct_float_bits) ++ "f", + ), + else => unreachable, + }, + else => unreachable, + }, + }; + + const result = try cg.callIntrinsic(intrinsic, &.{op_ty.ip_index}, dest_ty, &.{operand}); + return cg.finishAir(inst, result, &.{ty_op.operand}); + } + + try cg.emitWValue(operand); + const op = buildOpcode(.{ + .op = .convert, + .valtype1 = typeToValtype(dest_ty, zcu, cg.target), + .valtype2 = typeToValtype(op_ty, zcu, cg.target), + .signedness = op_info.signedness, + }); + try cg.addTag(Mir.Inst.Tag.fromOpcode(op)); + + return cg.finishAir(inst, .stack, &.{ty_op.operand}); +} + +fn airSplat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const operand = try cg.resolveInst(ty_op.operand); + const ty = cg.typeOfIndex(inst); + const elem_ty = ty.childType(zcu); + + if (determineSimdStoreStrategy(ty, zcu, cg.target) == .direct) blk: { + switch (operand) { + // when the operand lives in the linear memory section, we can directly + // load and splat the value at once. Meaning we do not first have to load + // the scalar value onto the stack. + .stack_offset, .nav_ref, .uav_ref => { + const opcode = switch (elem_ty.bitSize(zcu)) { + 8 => @intFromEnum(std.wasm.SimdOpcode.v128_load8_splat), + 16 => @intFromEnum(std.wasm.SimdOpcode.v128_load16_splat), + 32 => @intFromEnum(std.wasm.SimdOpcode.v128_load32_splat), + 64 => @intFromEnum(std.wasm.SimdOpcode.v128_load64_splat), + else => break :blk, // Cannot make use of simd-instructions + }; + try cg.emitWValue(operand); + const extra_index: u32 = @intCast(cg.mir_extra.items.len); + // stores as := opcode, offset, alignment (opcode::memarg) + try cg.mir_extra.appendSlice(cg.gpa, &[_]u32{ + opcode, + operand.offset(), + @intCast(elem_ty.abiAlignment(zcu).toByteUnits().?), + }); + try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); + return cg.finishAir(inst, .stack, &.{ty_op.operand}); + }, + .local => { + const opcode = switch (elem_ty.bitSize(zcu)) { + 8 => @intFromEnum(std.wasm.SimdOpcode.i8x16_splat), + 16 => @intFromEnum(std.wasm.SimdOpcode.i16x8_splat), + 32 => if (elem_ty.isInt(zcu)) @intFromEnum(std.wasm.SimdOpcode.i32x4_splat) else @intFromEnum(std.wasm.SimdOpcode.f32x4_splat), + 64 => if (elem_ty.isInt(zcu)) @intFromEnum(std.wasm.SimdOpcode.i64x2_splat) else @intFromEnum(std.wasm.SimdOpcode.f64x2_splat), + else => break :blk, // Cannot make use of simd-instructions + }; + try cg.emitWValue(operand); + const extra_index: u32 = @intCast(cg.mir_extra.items.len); + try cg.mir_extra.append(cg.gpa, opcode); + try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); + return cg.finishAir(inst, .stack, &.{ty_op.operand}); + }, + else => unreachable, + } + } + const elem_size = elem_ty.bitSize(zcu); + const vector_len = @as(usize, @intCast(ty.vectorLen(zcu))); + if ((!std.math.isPowerOfTwo(elem_size) or elem_size % 8 != 0) and vector_len > 1) { + return cg.fail("TODO: WebAssembly `@splat` for arbitrary element bitsize {d}", .{elem_size}); + } + + const result = try cg.allocStack(ty); + const elem_byte_size = @as(u32, @intCast(elem_ty.abiSize(zcu))); + var index: usize = 0; + var offset: u32 = 0; + while (index < vector_len) : (index += 1) { + try cg.store(result, operand, elem_ty, offset); + offset += elem_byte_size; + } + + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airSelect(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; + const operand = try cg.resolveInst(pl_op.operand); + + _ = operand; + return cg.fail("TODO: Implement wasm airSelect", .{}); +} + +fn airShuffleOne(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + + const unwrapped = cg.air.unwrapShuffleOne(zcu, inst); + const result_ty = unwrapped.result_ty; + const mask = unwrapped.mask; + const operand = try cg.resolveInst(unwrapped.operand); + + const elem_ty = result_ty.childType(zcu); + const elem_size = elem_ty.abiSize(zcu); + + // TODO: this function could have an `i8x16_shuffle` fast path like `airShuffleTwo` if we were + // to lower the comptime-known operands to a non-by-ref vector value. + + // TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible. + // I tried to fix it, but I couldn't make much sense of how this backend handles memory. + if (!isByRef(result_ty, zcu, cg.target) or + !isByRef(cg.typeOf(unwrapped.operand), zcu, cg.target)) return cg.fail("TODO: handle mixed by-ref shuffle", .{}); + + const dest_alloc = try cg.allocStack(result_ty); + for (mask, 0..) |mask_elem, out_idx| { + try cg.emitWValue(dest_alloc); + const elem_val = switch (mask_elem.unwrap()) { + .elem => |idx| try cg.load(operand, elem_ty, @intCast(elem_size * idx)), + .value => |val| try cg.lowerConstant(.fromInterned(val), elem_ty), + }; + try cg.store(.stack, elem_val, elem_ty, @intCast(dest_alloc.offset() + elem_size * out_idx)); + } + return cg.finishAir(inst, dest_alloc, &.{unwrapped.operand}); +} + +fn airShuffleTwo(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + + const unwrapped = cg.air.unwrapShuffleTwo(zcu, inst); + const result_ty = unwrapped.result_ty; + const mask = unwrapped.mask; + const operand_a = try cg.resolveInst(unwrapped.operand_a); + const operand_b = try cg.resolveInst(unwrapped.operand_b); + + const a_ty = cg.typeOf(unwrapped.operand_a); + const b_ty = cg.typeOf(unwrapped.operand_b); + const elem_ty = result_ty.childType(zcu); + const elem_size = elem_ty.abiSize(zcu); + + // WASM has `i8x16_shuffle`, which we can apply if the element type bit size is a multiple of 8 + // and the input and output vectors have a bit size of 128 (and are hence not by-ref). Otherwise, + // we fall back to a naive loop lowering. + if (!isByRef(a_ty, zcu, cg.target) and + !isByRef(b_ty, zcu, cg.target) and + !isByRef(result_ty, zcu, cg.target) and + elem_ty.bitSize(zcu) % 8 == 0) + { + var lane_map: [16]u8 align(4) = undefined; + const lanes_per_elem: usize = @intCast(elem_ty.bitSize(zcu) / 8); + for (mask, 0..) |mask_elem, out_idx| { + const out_first_lane = out_idx * lanes_per_elem; + const in_first_lane = switch (mask_elem.unwrap()) { + .a_elem => |i| i * lanes_per_elem, + .b_elem => |i| i * lanes_per_elem + 16, + .undef => 0, // doesn't matter + }; + for (lane_map[out_first_lane..][0..lanes_per_elem], in_first_lane..) |*out, in| { + out.* = @intCast(in); + } + } + try cg.emitWValue(operand_a); + try cg.emitWValue(operand_b); + const extra_index: u32 = @intCast(cg.mir_extra.items.len); + try cg.mir_extra.appendSlice(cg.gpa, &.{ + @intFromEnum(std.wasm.SimdOpcode.i8x16_shuffle), + @bitCast(lane_map[0..4].*), + @bitCast(lane_map[4..8].*), + @bitCast(lane_map[8..12].*), + @bitCast(lane_map[12..].*), + }); + try cg.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); + return cg.finishAir(inst, .stack, &.{ unwrapped.operand_a, unwrapped.operand_b }); + } + + // TODO: this is incorrect if either operand or the result is *not* by-ref, which is possible. + // I tried to fix it, but I couldn't make much sense of how this backend handles memory. + if (!isByRef(result_ty, zcu, cg.target) or + !isByRef(a_ty, zcu, cg.target) or + !isByRef(b_ty, zcu, cg.target)) return cg.fail("TODO: handle mixed by-ref shuffle", .{}); + + const dest_alloc = try cg.allocStack(result_ty); + for (mask, 0..) |mask_elem, out_idx| { + try cg.emitWValue(dest_alloc); + const elem_val = switch (mask_elem.unwrap()) { + .a_elem => |idx| try cg.load(operand_a, elem_ty, @intCast(elem_size * idx)), + .b_elem => |idx| try cg.load(operand_b, elem_ty, @intCast(elem_size * idx)), + .undef => try cg.emitUndefined(elem_ty), + }; + try cg.store(.stack, elem_val, elem_ty, @intCast(dest_alloc.offset() + elem_size * out_idx)); + } + return cg.finishAir(inst, dest_alloc, &.{ unwrapped.operand_a, unwrapped.operand_b }); +} + +fn airReduce(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const reduce = cg.air.instructions.items(.data)[@intFromEnum(inst)].reduce; + const operand = try cg.resolveInst(reduce.operand); + + _ = operand; + return cg.fail("TODO: Implement wasm airReduce", .{}); +} + +fn airAggregateInit(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const ip = &zcu.intern_pool; + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const result_ty = cg.typeOfIndex(inst); + const len = @as(usize, @intCast(result_ty.arrayLen(zcu))); + const elements: []const Air.Inst.Ref = @ptrCast(cg.air.extra.items[ty_pl.payload..][0..len]); + + const result: WValue = result_value: { + switch (result_ty.zigTypeTag(zcu)) { + .array => { + const result = try cg.allocStack(result_ty); + const elem_ty = result_ty.childType(zcu); + const elem_size = @as(u32, @intCast(elem_ty.abiSize(zcu))); + const sentinel = result_ty.sentinel(zcu); + + // When the element type is by reference, we must copy the entire + // value. It is therefore safer to move the offset pointer and store + // each value individually, instead of using store offsets. + if (isByRef(elem_ty, zcu, cg.target)) { + // copy stack pointer into a temporary local, which is + // moved for each element to store each value in the right position. + const offset = try cg.buildPointerOffset(result, 0, .new); + for (elements, 0..) |elem, elem_index| { + const elem_val = try cg.resolveInst(elem); + try cg.store(offset, elem_val, elem_ty, 0); + + if (elem_index < elements.len - 1 or sentinel != null) { + _ = try cg.buildPointerOffset(offset, elem_size, .modify); + } + } + if (sentinel) |s| { + const val = try cg.resolveValue(s); + try cg.store(offset, val, elem_ty, 0); + } + } else { + var offset: u32 = 0; + for (elements) |elem| { + const elem_val = try cg.resolveInst(elem); + try cg.store(result, elem_val, elem_ty, offset); + offset += elem_size; + } + if (sentinel) |s| { + const val = try cg.resolveValue(s); + try cg.store(result, val, elem_ty, offset); + } + } + break :result_value result; + }, + .@"struct" => switch (result_ty.containerLayout(zcu)) { + .@"packed" => { + if (isByRef(result_ty, zcu, cg.target)) { + return cg.fail("TODO: airAggregateInit for packed structs larger than 64 bits", .{}); + } + const packed_struct = zcu.typeToPackedStruct(result_ty).?; + const field_types = packed_struct.field_types; + const backing_type = Type.fromInterned(packed_struct.backingIntTypeUnordered(ip)); + + // ensure the result is zero'd + const result = try cg.allocLocal(backing_type); + if (backing_type.bitSize(zcu) <= 32) + try cg.addImm32(0) + else + try cg.addImm64(0); + try cg.addLocal(.local_set, result.local.value); + + var current_bit: u16 = 0; + for (elements, 0..) |elem, elem_index| { + const field_ty = Type.fromInterned(field_types.get(ip)[elem_index]); + if (!field_ty.hasRuntimeBitsIgnoreComptime(zcu)) continue; + + const shift_val: WValue = if (backing_type.bitSize(zcu) <= 32) + .{ .imm32 = current_bit } + else + .{ .imm64 = current_bit }; + + const value = try cg.resolveInst(elem); + const value_bit_size: u16 = @intCast(field_ty.bitSize(zcu)); + const int_ty = try pt.intType(.unsigned, value_bit_size); + + // load our current result on stack so we can perform all transformations + // using only stack values. Saving the cost of loads and stores. + try cg.emitWValue(result); + const bitcasted = try cg.bitcast(int_ty, field_ty, value); + const extended_val = try cg.intcast(bitcasted, int_ty, backing_type); + // no need to shift any values when the current offset is 0 + const shifted = if (current_bit != 0) shifted: { + break :shifted try cg.binOp(extended_val, shift_val, backing_type, .shl); + } else extended_val; + // we ignore the result as we keep it on the stack to assign it directly to `result` + _ = try cg.binOp(.stack, shifted, backing_type, .@"or"); + try cg.addLocal(.local_set, result.local.value); + current_bit += value_bit_size; + } + break :result_value result; + }, + else => { + const result = try cg.allocStack(result_ty); + const offset = try cg.buildPointerOffset(result, 0, .new); // pointer to offset + var prev_field_offset: u64 = 0; + for (elements, 0..) |elem, elem_index| { + if (try result_ty.structFieldValueComptime(pt, elem_index) != null) continue; + + const elem_ty = result_ty.fieldType(elem_index, zcu); + const field_offset = result_ty.structFieldOffset(elem_index, zcu); + _ = try cg.buildPointerOffset(offset, @intCast(field_offset - prev_field_offset), .modify); + prev_field_offset = field_offset; + + const value = try cg.resolveInst(elem); + try cg.store(offset, value, elem_ty, 0); + } + + break :result_value result; + }, + }, + .vector => return cg.fail("TODO: Wasm backend: implement airAggregateInit for vectors", .{}), + else => unreachable, + } + }; + + if (elements.len <= Air.Liveness.bpi - 1) { + var buf = [1]Air.Inst.Ref{.none} ** (Air.Liveness.bpi - 1); + @memcpy(buf[0..elements.len], elements); + return cg.finishAir(inst, result, &buf); + } + var bt = try cg.iterateBigTomb(inst, elements.len); + for (elements) |arg| bt.feed(arg); + return bt.finishAir(result); +} + +fn airUnionInit(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const ip = &zcu.intern_pool; + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.UnionInit, ty_pl.payload).data; + + const result = result: { + const union_ty = cg.typeOfIndex(inst); + const layout = union_ty.unionGetLayout(zcu); + const union_obj = zcu.typeToUnion(union_ty).?; + const field_ty = Type.fromInterned(union_obj.field_types.get(ip)[extra.field_index]); + const field_name = union_obj.loadTagType(ip).names.get(ip)[extra.field_index]; + + const tag_int = blk: { + const tag_ty = union_ty.unionTagTypeHypothetical(zcu); + const enum_field_index = tag_ty.enumFieldIndex(field_name, zcu).?; + const tag_val = try pt.enumValueFieldIndex(tag_ty, enum_field_index); + break :blk try cg.lowerConstant(tag_val, tag_ty); + }; + if (layout.payload_size == 0) { + if (layout.tag_size == 0) { + break :result .none; + } + assert(!isByRef(union_ty, zcu, cg.target)); + break :result tag_int; + } + + if (isByRef(union_ty, zcu, cg.target)) { + const result_ptr = try cg.allocStack(union_ty); + const payload = try cg.resolveInst(extra.init); + if (layout.tag_align.compare(.gte, layout.payload_align)) { + if (isByRef(field_ty, zcu, cg.target)) { + const payload_ptr = try cg.buildPointerOffset(result_ptr, layout.tag_size, .new); + try cg.store(payload_ptr, payload, field_ty, 0); + } else { + try cg.store(result_ptr, payload, field_ty, @intCast(layout.tag_size)); + } + + if (layout.tag_size > 0) { + try cg.store(result_ptr, tag_int, Type.fromInterned(union_obj.enum_tag_ty), 0); + } + } else { + try cg.store(result_ptr, payload, field_ty, 0); + if (layout.tag_size > 0) { + try cg.store( + result_ptr, + tag_int, + Type.fromInterned(union_obj.enum_tag_ty), + @intCast(layout.payload_size), + ); + } + } + break :result result_ptr; + } else { + const operand = try cg.resolveInst(extra.init); + const union_int_type = try pt.intType(.unsigned, @as(u16, @intCast(union_ty.bitSize(zcu)))); + if (field_ty.zigTypeTag(zcu) == .float) { + const int_type = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu))); + const bitcasted = try cg.bitcast(field_ty, int_type, operand); + break :result try cg.trunc(bitcasted, int_type, union_int_type); + } else if (field_ty.isPtrAtRuntime(zcu)) { + const int_type = try pt.intType(.unsigned, @intCast(field_ty.bitSize(zcu))); + break :result try cg.intcast(operand, int_type, union_int_type); + } + break :result try cg.intcast(operand, field_ty, union_int_type); + } + }; + + return cg.finishAir(inst, result, &.{extra.init}); +} + +fn airPrefetch(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const prefetch = cg.air.instructions.items(.data)[@intFromEnum(inst)].prefetch; + return cg.finishAir(inst, .none, &.{prefetch.ptr}); +} + +fn airWasmMemorySize(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; + + try cg.addLabel(.memory_size, pl_op.payload); + return cg.finishAir(inst, .stack, &.{pl_op.operand}); +} + +fn airWasmMemoryGrow(cg: *CodeGen, inst: Air.Inst.Index) !void { + const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; + + const operand = try cg.resolveInst(pl_op.operand); + try cg.emitWValue(operand); + try cg.addLabel(.memory_grow, pl_op.payload); + return cg.finishAir(inst, .stack, &.{pl_op.operand}); +} + +fn cmpOptionals(cg: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std.math.CompareOperator) InnerError!WValue { + const zcu = cg.pt.zcu; + assert(operand_ty.hasRuntimeBitsIgnoreComptime(zcu)); + assert(op == .eq or op == .neq); + const payload_ty = operand_ty.optionalChild(zcu); + assert(!isByRef(payload_ty, zcu, cg.target)); + + var result = try cg.allocLocal(Type.i32); + defer result.free(cg); + + var lhs_null = try cg.allocLocal(Type.i32); + defer lhs_null.free(cg); + + try cg.startBlock(.block, .empty); + + try cg.addImm32(if (op == .eq) 0 else 1); + try cg.addLocal(.local_set, result.local.value); + + _ = try cg.isNull(lhs, operand_ty, .i32_eq); + try cg.addLocal(.local_tee, lhs_null.local.value); + _ = try cg.isNull(rhs, operand_ty, .i32_eq); + try cg.addTag(.i32_ne); + try cg.addLabel(.br_if, 0); // only one is null + + try cg.addImm32(if (op == .eq) 1 else 0); + try cg.addLocal(.local_set, result.local.value); + + try cg.addLocal(.local_get, lhs_null.local.value); + try cg.addLabel(.br_if, 0); // both are null + + _ = try cg.load(lhs, payload_ty, 0); + _ = try cg.load(rhs, payload_ty, 0); + _ = try cg.cmp(.stack, .stack, payload_ty, op); + try cg.addLocal(.local_set, result.local.value); + + try cg.endBlock(); + + try cg.addLocal(.local_get, result.local.value); + + return .stack; +} + +/// Compares big integers by checking both its high bits and low bits. +/// NOTE: Leaves the result of the comparison on top of the stack. +/// TODO: Lower this to compiler_rt call when bitsize > 128 +fn cmpBigInt(cg: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std.math.CompareOperator) InnerError!WValue { + const zcu = cg.pt.zcu; + assert(operand_ty.abiSize(zcu) >= 16); + assert(!(lhs != .stack and rhs == .stack)); + if (operand_ty.bitSize(zcu) > 128) { + return cg.fail("TODO: Support cmpBigInt for integer bitsize: '{d}'", .{operand_ty.bitSize(zcu)}); + } + + var lhs_msb = try (try cg.load(lhs, Type.u64, 8)).toLocal(cg, Type.u64); + defer lhs_msb.free(cg); + var rhs_msb = try (try cg.load(rhs, Type.u64, 8)).toLocal(cg, Type.u64); + defer rhs_msb.free(cg); + + switch (op) { + .eq, .neq => { + const xor_high = try cg.binOp(lhs_msb, rhs_msb, Type.u64, .xor); + const lhs_lsb = try cg.load(lhs, Type.u64, 0); + const rhs_lsb = try cg.load(rhs, Type.u64, 0); + const xor_low = try cg.binOp(lhs_lsb, rhs_lsb, Type.u64, .xor); + const or_result = try cg.binOp(xor_high, xor_low, Type.u64, .@"or"); + + switch (op) { + .eq => return cg.cmp(or_result, .{ .imm64 = 0 }, Type.u64, .eq), + .neq => return cg.cmp(or_result, .{ .imm64 = 0 }, Type.u64, .neq), + else => unreachable, + } + }, + else => { + const ty = if (operand_ty.isSignedInt(zcu)) Type.i64 else Type.u64; + // leave those value on top of the stack for '.select' + const lhs_lsb = try cg.load(lhs, Type.u64, 0); + const rhs_lsb = try cg.load(rhs, Type.u64, 0); + _ = try cg.cmp(lhs_lsb, rhs_lsb, Type.u64, op); + _ = try cg.cmp(lhs_msb, rhs_msb, ty, op); + _ = try cg.cmp(lhs_msb, rhs_msb, ty, .eq); + try cg.addTag(.select); + }, + } + + return .stack; +} + +fn airSetUnionTag(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + const un_ty = cg.typeOf(bin_op.lhs).childType(zcu); + const tag_ty = cg.typeOf(bin_op.rhs); + const layout = un_ty.unionGetLayout(zcu); + if (layout.tag_size == 0) return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); + + const union_ptr = try cg.resolveInst(bin_op.lhs); + const new_tag = try cg.resolveInst(bin_op.rhs); + if (layout.payload_size == 0) { + try cg.store(union_ptr, new_tag, tag_ty, 0); + return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); + } + + // when the tag alignment is smaller than the payload, the field will be stored + // after the payload. + const offset: u32 = if (layout.tag_align.compare(.lt, layout.payload_align)) blk: { + break :blk @intCast(layout.payload_size); + } else 0; + try cg.store(union_ptr, new_tag, tag_ty, offset); + return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airGetUnionTag(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const un_ty = cg.typeOf(ty_op.operand); + const tag_ty = cg.typeOfIndex(inst); + const layout = un_ty.unionGetLayout(zcu); + if (layout.tag_size == 0) return cg.finishAir(inst, .none, &.{ty_op.operand}); + + const operand = try cg.resolveInst(ty_op.operand); + // when the tag alignment is smaller than the payload, the field will be stored + // after the payload. + const offset: u32 = if (layout.tag_align.compare(.lt, layout.payload_align)) + @intCast(layout.payload_size) + else + 0; + const result = try cg.load(operand, tag_ty, offset); + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airFpext(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const dest_ty = cg.typeOfIndex(inst); + const operand = try cg.resolveInst(ty_op.operand); + const result = try cg.fpext(operand, cg.typeOf(ty_op.operand), dest_ty); + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +/// Extends a float from a given `Type` to a larger wanted `Type`, leaving the +/// result on the stack. +fn fpext(cg: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError!WValue { + const given_bits = given.floatBits(cg.target); + const wanted_bits = wanted.floatBits(cg.target); + + const intrinsic: Mir.Intrinsic = switch (given_bits) { + 16 => switch (wanted_bits) { + 32 => { + assert(.stack == try cg.callIntrinsic(.__extendhfsf2, &.{.f16_type}, Type.f32, &.{operand})); + return .stack; + }, + 64 => { + assert(.stack == try cg.callIntrinsic(.__extendhfsf2, &.{.f16_type}, Type.f32, &.{operand})); + try cg.addTag(.f64_promote_f32); + return .stack; + }, + 80 => .__extendhfxf2, + 128 => .__extendhftf2, + else => unreachable, + }, + 32 => switch (wanted_bits) { + 64 => { + try cg.emitWValue(operand); + try cg.addTag(.f64_promote_f32); + return .stack; + }, + 80 => .__extendsfxf2, + 128 => .__extendsftf2, + else => unreachable, + }, + 64 => switch (wanted_bits) { + 80 => .__extenddfxf2, + 128 => .__extenddftf2, + else => unreachable, + }, + 80 => switch (wanted_bits) { + 128 => .__extendxftf2, + else => unreachable, + }, + else => unreachable, + }; + return cg.callIntrinsic(intrinsic, &.{given.ip_index}, wanted, &.{operand}); +} + +fn airFptrunc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const dest_ty = cg.typeOfIndex(inst); + const operand = try cg.resolveInst(ty_op.operand); + const result = try cg.fptrunc(operand, cg.typeOf(ty_op.operand), dest_ty); + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +/// Truncates a float from a given `Type` to its wanted `Type`, leaving the +/// result on the stack. +fn fptrunc(cg: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError!WValue { + const given_bits = given.floatBits(cg.target); + const wanted_bits = wanted.floatBits(cg.target); + + const intrinsic: Mir.Intrinsic = switch (given_bits) { + 32 => switch (wanted_bits) { + 16 => { + return cg.callIntrinsic(.__truncsfhf2, &.{.f32_type}, Type.f16, &.{operand}); + }, + else => unreachable, + }, + 64 => switch (wanted_bits) { + 16 => { + try cg.emitWValue(operand); + try cg.addTag(.f32_demote_f64); + return cg.callIntrinsic(.__truncsfhf2, &.{.f32_type}, Type.f16, &.{.stack}); + }, + 32 => { + try cg.emitWValue(operand); + try cg.addTag(.f32_demote_f64); + return .stack; + }, + else => unreachable, + }, + 80 => switch (wanted_bits) { + 16 => .__truncxfhf2, + 32 => .__truncxfsf2, + 64 => .__truncxfdf2, + else => unreachable, + }, + 128 => switch (wanted_bits) { + 16 => .__trunctfhf2, + 32 => .__trunctfsf2, + 64 => .__trunctfdf2, + 80 => .__trunctfxf2, + else => unreachable, + }, + else => unreachable, + }; + return cg.callIntrinsic(intrinsic, &.{given.ip_index}, wanted, &.{operand}); +} + +fn airErrUnionPayloadPtrSet(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const err_set_ty = cg.typeOf(ty_op.operand).childType(zcu); + const payload_ty = err_set_ty.errorUnionPayload(zcu); + const operand = try cg.resolveInst(ty_op.operand); + + // set error-tag to '0' to annotate error union is non-error + try cg.store( + operand, + .{ .imm32 = 0 }, + Type.anyerror, + @intCast(errUnionErrorOffset(payload_ty, zcu)), + ); + + const result = result: { + if (!payload_ty.hasRuntimeBitsIgnoreComptime(zcu)) { + break :result cg.reuseOperand(ty_op.operand, operand); + } + + break :result try cg.buildPointerOffset(operand, @as(u32, @intCast(errUnionPayloadOffset(payload_ty, zcu))), .new); + }; + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airFieldParentPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; + + const field_ptr = try cg.resolveInst(extra.field_ptr); + const parent_ptr_ty = cg.typeOfIndex(inst); + const parent_ty = parent_ptr_ty.childType(zcu); + const field_ptr_ty = cg.typeOf(extra.field_ptr); + const field_index = extra.field_index; + const field_offset = switch (parent_ty.containerLayout(zcu)) { + .auto, .@"extern" => parent_ty.structFieldOffset(field_index, zcu), + .@"packed" => offset: { + const parent_ptr_offset = parent_ptr_ty.ptrInfo(zcu).packed_offset.bit_offset; + const field_offset = if (zcu.typeToStruct(parent_ty)) |loaded_struct| zcu.structPackedFieldBitOffset(loaded_struct, field_index) else 0; + const field_ptr_offset = field_ptr_ty.ptrInfo(zcu).packed_offset.bit_offset; + break :offset @divExact(parent_ptr_offset + field_offset - field_ptr_offset, 8); + }, + }; + + const result = if (field_offset != 0) result: { + const base = try cg.buildPointerOffset(field_ptr, 0, .new); + try cg.addLocal(.local_get, base.local.value); + try cg.addImm32(@intCast(field_offset)); + try cg.addTag(.i32_sub); + try cg.addLocal(.local_set, base.local.value); + break :result base; + } else cg.reuseOperand(extra.field_ptr, field_ptr); + + return cg.finishAir(inst, result, &.{extra.field_ptr}); +} + +fn sliceOrArrayPtr(cg: *CodeGen, ptr: WValue, ptr_ty: Type) InnerError!WValue { + const zcu = cg.pt.zcu; + if (ptr_ty.isSlice(zcu)) { + return cg.slicePtr(ptr); + } else { + return ptr; + } +} + +fn airMemcpy(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + const dst = try cg.resolveInst(bin_op.lhs); + const dst_ty = cg.typeOf(bin_op.lhs); + const ptr_elem_ty = dst_ty.childType(zcu); + const src = try cg.resolveInst(bin_op.rhs); + const src_ty = cg.typeOf(bin_op.rhs); + const len = switch (dst_ty.ptrSize(zcu)) { + .slice => blk: { + const slice_len = try cg.sliceLen(dst); + if (ptr_elem_ty.abiSize(zcu) != 1) { + try cg.emitWValue(slice_len); + try cg.emitWValue(.{ .imm32 = @as(u32, @intCast(ptr_elem_ty.abiSize(zcu))) }); + try cg.addTag(.i32_mul); + try cg.addLocal(.local_set, slice_len.local.value); + } + break :blk slice_len; + }, + .one => @as(WValue, .{ + .imm32 = @as(u32, @intCast(ptr_elem_ty.arrayLen(zcu) * ptr_elem_ty.childType(zcu).abiSize(zcu))), + }), + .c, .many => unreachable, + }; + const dst_ptr = try cg.sliceOrArrayPtr(dst, dst_ty); + const src_ptr = try cg.sliceOrArrayPtr(src, src_ty); + try cg.memcpy(dst_ptr, src_ptr, len); + + return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airRetAddr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + // TODO: Implement this properly once stack serialization is solved + return cg.finishAir(inst, switch (cg.ptr_size) { + .wasm32 => .{ .imm32 = 0 }, + .wasm64 => .{ .imm64 = 0 }, + }, &.{}); +} + +fn airPopcount(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const op_ty = cg.typeOf(ty_op.operand); + + if (op_ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO: Implement @popCount for vectors", .{}); + } + + const int_info = op_ty.intInfo(zcu); + const bits = int_info.bits; + const wasm_bits = toWasmBits(bits) orelse { + return cg.fail("TODO: Implement @popCount for integers with bitsize '{d}'", .{bits}); + }; + + switch (wasm_bits) { + 32 => { + try cg.emitWValue(operand); + if (op_ty.isSignedInt(zcu) and bits != wasm_bits) { + _ = try cg.wrapOperand(.stack, try pt.intType(.unsigned, bits)); + } + try cg.addTag(.i32_popcnt); + }, + 64 => { + try cg.emitWValue(operand); + if (op_ty.isSignedInt(zcu) and bits != wasm_bits) { + _ = try cg.wrapOperand(.stack, try pt.intType(.unsigned, bits)); + } + try cg.addTag(.i64_popcnt); + try cg.addTag(.i32_wrap_i64); + try cg.emitWValue(operand); + }, + 128 => { + _ = try cg.load(operand, Type.u64, 0); + try cg.addTag(.i64_popcnt); + _ = try cg.load(operand, Type.u64, 8); + if (op_ty.isSignedInt(zcu) and bits != wasm_bits) { + _ = try cg.wrapOperand(.stack, try pt.intType(.unsigned, bits - 64)); + } + try cg.addTag(.i64_popcnt); + try cg.addTag(.i64_add); + try cg.addTag(.i32_wrap_i64); + }, + else => unreachable, + } + + return cg.finishAir(inst, .stack, &.{ty_op.operand}); +} + +fn airBitReverse(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const ty = cg.typeOf(ty_op.operand); + + if (ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO: Implement @bitReverse for vectors", .{}); + } + + const int_info = ty.intInfo(zcu); + const bits = int_info.bits; + const wasm_bits = toWasmBits(bits) orelse { + return cg.fail("TODO: Implement @bitReverse for integers with bitsize '{d}'", .{bits}); + }; + + switch (wasm_bits) { + 32 => { + const intrin_ret = try cg.callIntrinsic( + .__bitreversesi2, + &.{.u32_type}, + Type.u32, + &.{operand}, + ); + const result = if (bits == 32) + intrin_ret + else + try cg.binOp(intrin_ret, .{ .imm32 = 32 - bits }, ty, .shr); + return cg.finishAir(inst, result, &.{ty_op.operand}); + }, + 64 => { + const intrin_ret = try cg.callIntrinsic( + .__bitreversedi2, + &.{.u64_type}, + Type.u64, + &.{operand}, + ); + const result = if (bits == 64) + intrin_ret + else + try cg.binOp(intrin_ret, .{ .imm64 = 64 - bits }, ty, .shr); + return cg.finishAir(inst, result, &.{ty_op.operand}); + }, + 128 => { + const result = try cg.allocStack(ty); + + try cg.emitWValue(result); + const first_half = try cg.load(operand, Type.u64, 8); + const intrin_ret_first = try cg.callIntrinsic( + .__bitreversedi2, + &.{.u64_type}, + Type.u64, + &.{first_half}, + ); + try cg.emitWValue(intrin_ret_first); + if (bits < 128) { + try cg.emitWValue(.{ .imm64 = 128 - bits }); + try cg.addTag(.i64_shr_u); + } + try cg.emitWValue(result); + const second_half = try cg.load(operand, Type.u64, 0); + const intrin_ret_second = try cg.callIntrinsic( + .__bitreversedi2, + &.{.u64_type}, + Type.u64, + &.{second_half}, + ); + try cg.emitWValue(intrin_ret_second); + if (bits == 128) { + try cg.store(.stack, .stack, Type.u64, result.offset() + 8); + try cg.store(.stack, .stack, Type.u64, result.offset()); + } else { + var tmp = try cg.allocLocal(Type.u64); + defer tmp.free(cg); + try cg.addLocal(.local_tee, tmp.local.value); + try cg.emitWValue(.{ .imm64 = 128 - bits }); + if (ty.isSignedInt(zcu)) { + try cg.addTag(.i64_shr_s); + } else { + try cg.addTag(.i64_shr_u); + } + try cg.store(.stack, .stack, Type.u64, result.offset() + 8); + try cg.addLocal(.local_get, tmp.local.value); + try cg.emitWValue(.{ .imm64 = bits - 64 }); + try cg.addTag(.i64_shl); + try cg.addTag(.i64_or); + try cg.store(.stack, .stack, Type.u64, result.offset()); + } + return cg.finishAir(inst, result, &.{ty_op.operand}); + }, + else => unreachable, + } +} + +fn airErrorName(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op; + const operand = try cg.resolveInst(un_op); + // Each entry to this table is a slice (ptr+len). + // The operand in this instruction represents the index within this table. + // This means to get the final name, we emit the base pointer and then perform + // pointer arithmetic to find the pointer to this slice and return that. + // + // As the names are global and the slice elements are constant, we do not have + // to make a copy of the ptr+value but can point towards them directly. + const pt = cg.pt; + const name_ty = Type.slice_const_u8_sentinel_0; + const abi_size = name_ty.abiSize(pt.zcu); + + // Lowers to a i32.const or i64.const with the error table memory address. + cg.error_name_table_ref_count += 1; + try cg.addTag(.error_name_table_ref); + try cg.emitWValue(operand); + switch (cg.ptr_size) { + .wasm32 => { + try cg.addImm32(@intCast(abi_size)); + try cg.addTag(.i32_mul); + try cg.addTag(.i32_add); + }, + .wasm64 => { + try cg.addImm64(abi_size); + try cg.addTag(.i64_mul); + try cg.addTag(.i64_add); + }, + } + + return cg.finishAir(inst, .stack, &.{un_op}); +} + +fn airPtrSliceFieldPtr(cg: *CodeGen, inst: Air.Inst.Index, offset: u32) InnerError!void { + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + const slice_ptr = try cg.resolveInst(ty_op.operand); + const result = try cg.buildPointerOffset(slice_ptr, offset, .new); + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +/// NOTE: Allocates place for result on virtual stack, when integer size > 64 bits +fn intZeroValue(cg: *CodeGen, ty: Type) InnerError!WValue { + const zcu = cg.pt.zcu; + const int_info = ty.intInfo(zcu); + const wasm_bits = toWasmBits(int_info.bits) orelse { + return cg.fail("TODO: Implement intZeroValue for integer bitsize: {d}", .{int_info.bits}); + }; + switch (wasm_bits) { + 32 => return .{ .imm32 = 0 }, + 64 => return .{ .imm64 = 0 }, + 128 => { + const result = try cg.allocStack(ty); + try cg.store(result, .{ .imm64 = 0 }, Type.u64, 0); + try cg.store(result, .{ .imm64 = 0 }, Type.u64, 8); + return result; + }, + else => unreachable, + } +} + +fn airAddSubWithOverflow(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void { + assert(op == .add or op == .sub); + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.Bin, ty_pl.payload).data; + + const lhs = try cg.resolveInst(extra.lhs); + const rhs = try cg.resolveInst(extra.rhs); + const ty = cg.typeOf(extra.lhs); + const pt = cg.pt; + const zcu = pt.zcu; + + if (ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO: Implement overflow arithmetic for vectors", .{}); + } + + const int_info = ty.intInfo(zcu); + const is_signed = int_info.signedness == .signed; + if (int_info.bits > 128) { + return cg.fail("TODO: Implement {{add/sub}}_with_overflow for integer bitsize: {d}", .{int_info.bits}); + } + + const op_result = try cg.wrapBinOp(lhs, rhs, ty, op); + var op_tmp = try op_result.toLocal(cg, ty); + defer op_tmp.free(cg); + + const cmp_op: std.math.CompareOperator = switch (op) { + .add => .lt, + .sub => .gt, + else => unreachable, + }; + const overflow_bit = if (is_signed) blk: { + const zero = try intZeroValue(cg, ty); + const rhs_is_neg = try cg.cmp(rhs, zero, ty, .lt); + const overflow_cmp = try cg.cmp(op_tmp, lhs, ty, cmp_op); + break :blk try cg.cmp(rhs_is_neg, overflow_cmp, Type.u1, .neq); + } else try cg.cmp(op_tmp, lhs, ty, cmp_op); + var bit_tmp = try overflow_bit.toLocal(cg, Type.u1); + defer bit_tmp.free(cg); + + const result = try cg.allocStack(cg.typeOfIndex(inst)); + const offset: u32 = @intCast(ty.abiSize(zcu)); + try cg.store(result, op_tmp, ty, 0); + try cg.store(result, bit_tmp, Type.u1, offset); + + return cg.finishAir(inst, result, &.{ extra.lhs, extra.rhs }); +} + +fn airShlWithOverflow(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pt = cg.pt; + const zcu = pt.zcu; + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.Bin, ty_pl.payload).data; + + const lhs = try cg.resolveInst(extra.lhs); + const rhs = try cg.resolveInst(extra.rhs); + const ty = cg.typeOf(extra.lhs); + const rhs_ty = cg.typeOf(extra.rhs); + + if (ty.isVector(zcu)) { + if (!rhs_ty.isVector(zcu)) { + return cg.fail("TODO: implement vector 'shl_with_overflow' with scalar rhs", .{}); + } else { + return cg.fail("TODO: implement vector 'shl_with_overflow'", .{}); + } + } + + const int_info = ty.intInfo(zcu); + const wasm_bits = toWasmBits(int_info.bits) orelse { + return cg.fail("TODO: implement 'shl_with_overflow' for integer bitsize: {d}", .{int_info.bits}); + }; + + // Ensure rhs is coerced to lhs as they must have the same WebAssembly types + // before we can perform any binary operation. + const rhs_wasm_bits = toWasmBits(rhs_ty.intInfo(zcu).bits).?; + // If wasm_bits == 128, compiler-rt expects i32 for shift + const rhs_final = if (wasm_bits != rhs_wasm_bits and wasm_bits == 64) blk: { + const rhs_casted = try cg.intcast(rhs, rhs_ty, ty); + break :blk try rhs_casted.toLocal(cg, ty); + } else rhs; + + var shl = try (try cg.wrapBinOp(lhs, rhs_final, ty, .shl)).toLocal(cg, ty); + defer shl.free(cg); + + const overflow_bit = blk: { + const shr = try cg.binOp(shl, rhs_final, ty, .shr); + break :blk try cg.cmp(shr, lhs, ty, .neq); + }; + var overflow_local = try overflow_bit.toLocal(cg, Type.u1); + defer overflow_local.free(cg); + + const result = try cg.allocStack(cg.typeOfIndex(inst)); + const offset: u32 = @intCast(ty.abiSize(zcu)); + try cg.store(result, shl, ty, 0); + try cg.store(result, overflow_local, Type.u1, offset); + + return cg.finishAir(inst, result, &.{ extra.lhs, extra.rhs }); +} + +fn airMulWithOverflow(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.Bin, ty_pl.payload).data; + + const lhs = try cg.resolveInst(extra.lhs); + const rhs = try cg.resolveInst(extra.rhs); + const ty = cg.typeOf(extra.lhs); + const pt = cg.pt; + const zcu = pt.zcu; + + if (ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO: Implement overflow arithmetic for vectors", .{}); + } + + // We store the bit if it's overflowed or not in this. As it's zero-initialized + // we only need to update it if an overflow (or underflow) occurred. + var overflow_bit = try cg.ensureAllocLocal(Type.u1); + defer overflow_bit.free(cg); + + const int_info = ty.intInfo(zcu); + const wasm_bits = toWasmBits(int_info.bits) orelse { + return cg.fail("TODO: Implement `@mulWithOverflow` for integer bitsize: {d}", .{int_info.bits}); + }; + + const zero: WValue = switch (wasm_bits) { + 32 => .{ .imm32 = 0 }, + 64, 128 => .{ .imm64 = 0 }, + else => unreachable, + }; + + // for 32 bit integers we upcast it to a 64bit integer + const mul = if (wasm_bits == 32) blk: { + const new_ty = if (int_info.signedness == .signed) Type.i64 else Type.u64; + const lhs_upcast = try cg.intcast(lhs, ty, new_ty); + const rhs_upcast = try cg.intcast(rhs, ty, new_ty); + const bin_op = try (try cg.binOp(lhs_upcast, rhs_upcast, new_ty, .mul)).toLocal(cg, new_ty); + const res = try (try cg.trunc(bin_op, ty, new_ty)).toLocal(cg, ty); + const res_upcast = try cg.intcast(res, ty, new_ty); + _ = try cg.cmp(res_upcast, bin_op, new_ty, .neq); + try cg.addLocal(.local_set, overflow_bit.local.value); + break :blk res; + } else if (wasm_bits == 64) blk: { + const new_ty = if (int_info.signedness == .signed) Type.i128 else Type.u128; + const lhs_upcast = try cg.intcast(lhs, ty, new_ty); + const rhs_upcast = try cg.intcast(rhs, ty, new_ty); + const bin_op = try (try cg.binOp(lhs_upcast, rhs_upcast, new_ty, .mul)).toLocal(cg, new_ty); + const res = try (try cg.trunc(bin_op, ty, new_ty)).toLocal(cg, ty); + const res_upcast = try cg.intcast(res, ty, new_ty); + _ = try cg.cmp(res_upcast, bin_op, new_ty, .neq); + try cg.addLocal(.local_set, overflow_bit.local.value); + break :blk res; + } else if (int_info.bits == 128 and int_info.signedness == .unsigned) blk: { + var lhs_lsb = try (try cg.load(lhs, Type.u64, 0)).toLocal(cg, Type.u64); + defer lhs_lsb.free(cg); + var lhs_msb = try (try cg.load(lhs, Type.u64, 8)).toLocal(cg, Type.u64); + defer lhs_msb.free(cg); + var rhs_lsb = try (try cg.load(rhs, Type.u64, 0)).toLocal(cg, Type.u64); + defer rhs_lsb.free(cg); + var rhs_msb = try (try cg.load(rhs, Type.u64, 8)).toLocal(cg, Type.u64); + defer rhs_msb.free(cg); + + const cross_1 = try cg.callIntrinsic( + .__multi3, + &[_]InternPool.Index{.i64_type} ** 4, + Type.i128, + &.{ lhs_msb, zero, rhs_lsb, zero }, + ); + const cross_2 = try cg.callIntrinsic( + .__multi3, + &[_]InternPool.Index{.i64_type} ** 4, + Type.i128, + &.{ rhs_msb, zero, lhs_lsb, zero }, + ); + const mul_lsb = try cg.callIntrinsic( + .__multi3, + &[_]InternPool.Index{.i64_type} ** 4, + Type.i128, + &.{ rhs_lsb, zero, lhs_lsb, zero }, + ); + + const rhs_msb_not_zero = try cg.cmp(rhs_msb, zero, Type.u64, .neq); + const lhs_msb_not_zero = try cg.cmp(lhs_msb, zero, Type.u64, .neq); + const both_msb_not_zero = try cg.binOp(rhs_msb_not_zero, lhs_msb_not_zero, Type.bool, .@"and"); + const cross_1_msb = try cg.load(cross_1, Type.u64, 8); + const cross_1_msb_not_zero = try cg.cmp(cross_1_msb, zero, Type.u64, .neq); + const cond_1 = try cg.binOp(both_msb_not_zero, cross_1_msb_not_zero, Type.bool, .@"or"); + const cross_2_msb = try cg.load(cross_2, Type.u64, 8); + const cross_2_msb_not_zero = try cg.cmp(cross_2_msb, zero, Type.u64, .neq); + const cond_2 = try cg.binOp(cond_1, cross_2_msb_not_zero, Type.bool, .@"or"); + + const cross_1_lsb = try cg.load(cross_1, Type.u64, 0); + const cross_2_lsb = try cg.load(cross_2, Type.u64, 0); + const cross_add = try cg.binOp(cross_1_lsb, cross_2_lsb, Type.u64, .add); + + var mul_lsb_msb = try (try cg.load(mul_lsb, Type.u64, 8)).toLocal(cg, Type.u64); + defer mul_lsb_msb.free(cg); + var all_add = try (try cg.binOp(cross_add, mul_lsb_msb, Type.u64, .add)).toLocal(cg, Type.u64); + defer all_add.free(cg); + const add_overflow = try cg.cmp(all_add, mul_lsb_msb, Type.u64, .lt); + + // result for overflow bit + _ = try cg.binOp(cond_2, add_overflow, Type.bool, .@"or"); + try cg.addLocal(.local_set, overflow_bit.local.value); + + const tmp_result = try cg.allocStack(Type.u128); + try cg.emitWValue(tmp_result); + const mul_lsb_lsb = try cg.load(mul_lsb, Type.u64, 0); + try cg.store(.stack, mul_lsb_lsb, Type.u64, tmp_result.offset()); + try cg.store(tmp_result, all_add, Type.u64, 8); + break :blk tmp_result; + } else if (int_info.bits == 128 and int_info.signedness == .signed) blk: { + const overflow_ret = try cg.allocStack(Type.i32); + const res = try cg.callIntrinsic( + .__muloti4, + &[_]InternPool.Index{ .i128_type, .i128_type, .usize_type }, + Type.i128, + &.{ lhs, rhs, overflow_ret }, + ); + _ = try cg.load(overflow_ret, Type.i32, 0); + try cg.addLocal(.local_set, overflow_bit.local.value); + break :blk res; + } else return cg.fail("TODO: @mulWithOverflow for {f}", .{ty.fmt(pt)}); + var bin_op_local = try mul.toLocal(cg, ty); + defer bin_op_local.free(cg); + + const result = try cg.allocStack(cg.typeOfIndex(inst)); + const offset: u32 = @intCast(ty.abiSize(zcu)); + try cg.store(result, bin_op_local, ty, 0); + try cg.store(result, overflow_bit, Type.u1, offset); + + return cg.finishAir(inst, result, &.{ extra.lhs, extra.rhs }); +} + +fn airMaxMin( + cg: *CodeGen, + inst: Air.Inst.Index, + op: enum { fmax, fmin }, + cmp_op: std.math.CompareOperator, +) InnerError!void { + const zcu = cg.pt.zcu; + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const ty = cg.typeOfIndex(inst); + if (ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO: `@maximum` and `@minimum` for vectors", .{}); + } + + if (ty.abiSize(zcu) > 16) { + return cg.fail("TODO: `@maximum` and `@minimum` for types larger than 16 bytes", .{}); + } + + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + + if (ty.zigTypeTag(zcu) == .float) { + const intrinsic = switch (op) { + inline .fmin, .fmax => |ct_op| switch (ty.floatBits(cg.target)) { + inline 16, 32, 64, 80, 128 => |bits| @field( + Mir.Intrinsic, + libcFloatPrefix(bits) ++ @tagName(ct_op) ++ libcFloatSuffix(bits), + ), + else => unreachable, + }, + }; + const result = try cg.callIntrinsic(intrinsic, &.{ ty.ip_index, ty.ip_index }, ty, &.{ lhs, rhs }); + try cg.lowerToStack(result); + } else { + // operands to select from + try cg.lowerToStack(lhs); + try cg.lowerToStack(rhs); + _ = try cg.cmp(lhs, rhs, ty, cmp_op); + + // based on the result from comparison, return operand 0 or 1. + try cg.addTag(.select); + } + + return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airMulAdd(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; + const bin_op = cg.air.extraData(Air.Bin, pl_op.payload).data; + + const ty = cg.typeOfIndex(inst); + if (ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO: `@mulAdd` for vectors", .{}); + } + + const addend = try cg.resolveInst(pl_op.operand); + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + + const result = if (ty.floatBits(cg.target) == 16) fl_result: { + const rhs_ext = try cg.fpext(rhs, ty, Type.f32); + const lhs_ext = try cg.fpext(lhs, ty, Type.f32); + const addend_ext = try cg.fpext(addend, ty, Type.f32); + // call to compiler-rt `fn fmaf(f32, f32, f32) f32` + const result = try cg.callIntrinsic( + .fmaf, + &.{ .f32_type, .f32_type, .f32_type }, + Type.f32, + &.{ rhs_ext, lhs_ext, addend_ext }, + ); + break :fl_result try cg.fptrunc(result, Type.f32, ty); + } else result: { + const mul_result = try cg.binOp(lhs, rhs, ty, .mul); + break :result try cg.binOp(mul_result, addend, ty, .add); + }; + + return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs, pl_op.operand }); +} + +fn airClz(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const ty = cg.typeOf(ty_op.operand); + if (ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO: `@clz` for vectors", .{}); + } + + const operand = try cg.resolveInst(ty_op.operand); + const int_info = ty.intInfo(zcu); + const wasm_bits = toWasmBits(int_info.bits) orelse { + return cg.fail("TODO: `@clz` for integers with bitsize '{d}'", .{int_info.bits}); + }; + + switch (wasm_bits) { + 32 => { + if (int_info.signedness == .signed) { + const mask = ~@as(u32, 0) >> @intCast(32 - int_info.bits); + _ = try cg.binOp(operand, .{ .imm32 = mask }, ty, .@"and"); + } else { + try cg.emitWValue(operand); + } + try cg.addTag(.i32_clz); + }, + 64 => { + if (int_info.signedness == .signed) { + const mask = ~@as(u64, 0) >> @intCast(64 - int_info.bits); + _ = try cg.binOp(operand, .{ .imm64 = mask }, ty, .@"and"); + } else { + try cg.emitWValue(operand); + } + try cg.addTag(.i64_clz); + try cg.addTag(.i32_wrap_i64); + }, + 128 => { + var msb = try (try cg.load(operand, Type.u64, 8)).toLocal(cg, Type.u64); + defer msb.free(cg); + + try cg.emitWValue(msb); + try cg.addTag(.i64_clz); + _ = try cg.load(operand, Type.u64, 0); + try cg.addTag(.i64_clz); + try cg.emitWValue(.{ .imm64 = 64 }); + try cg.addTag(.i64_add); + _ = try cg.cmp(msb, .{ .imm64 = 0 }, Type.u64, .neq); + try cg.addTag(.select); + try cg.addTag(.i32_wrap_i64); + }, + else => unreachable, + } + + if (wasm_bits != int_info.bits) { + try cg.emitWValue(.{ .imm32 = wasm_bits - int_info.bits }); + try cg.addTag(.i32_sub); + } + + return cg.finishAir(inst, .stack, &.{ty_op.operand}); +} + +fn airCtz(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const ty = cg.typeOf(ty_op.operand); + + if (ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO: `@ctz` for vectors", .{}); + } + + const operand = try cg.resolveInst(ty_op.operand); + const int_info = ty.intInfo(zcu); + const wasm_bits = toWasmBits(int_info.bits) orelse { + return cg.fail("TODO: `@clz` for integers with bitsize '{d}'", .{int_info.bits}); + }; + + switch (wasm_bits) { + 32 => { + if (wasm_bits != int_info.bits) { + const val: u32 = @as(u32, 1) << @as(u5, @intCast(int_info.bits)); + // leave value on the stack + _ = try cg.binOp(operand, .{ .imm32 = val }, ty, .@"or"); + } else try cg.emitWValue(operand); + try cg.addTag(.i32_ctz); + }, + 64 => { + if (wasm_bits != int_info.bits) { + const val: u64 = @as(u64, 1) << @as(u6, @intCast(int_info.bits)); + // leave value on the stack + _ = try cg.binOp(operand, .{ .imm64 = val }, ty, .@"or"); + } else try cg.emitWValue(operand); + try cg.addTag(.i64_ctz); + try cg.addTag(.i32_wrap_i64); + }, + 128 => { + var lsb = try (try cg.load(operand, Type.u64, 0)).toLocal(cg, Type.u64); + defer lsb.free(cg); + + try cg.emitWValue(lsb); + try cg.addTag(.i64_ctz); + _ = try cg.load(operand, Type.u64, 8); + if (wasm_bits != int_info.bits) { + try cg.addImm64(@as(u64, 1) << @as(u6, @intCast(int_info.bits - 64))); + try cg.addTag(.i64_or); + } + try cg.addTag(.i64_ctz); + try cg.addImm64(64); + if (wasm_bits != int_info.bits) { + try cg.addTag(.i64_or); + } else { + try cg.addTag(.i64_add); + } + _ = try cg.cmp(lsb, .{ .imm64 = 0 }, Type.u64, .neq); + try cg.addTag(.select); + try cg.addTag(.i32_wrap_i64); + }, + else => unreachable, + } + + return cg.finishAir(inst, .stack, &.{ty_op.operand}); +} + +fn airDbgStmt(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const dbg_stmt = cg.air.instructions.items(.data)[@intFromEnum(inst)].dbg_stmt; + try cg.addInst(.{ .tag = .dbg_line, .data = .{ + .payload = try cg.addExtra(Mir.DbgLineColumn{ + .line = dbg_stmt.line, + .column = dbg_stmt.column, + }), + } }); + return cg.finishAir(inst, .none, &.{}); +} + +fn airDbgInlineBlock(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.DbgInlineBlock, ty_pl.payload); + // TODO + try cg.lowerBlock(inst, ty_pl.ty.toType(), @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len])); +} + +fn airDbgVar( + cg: *CodeGen, + inst: Air.Inst.Index, + local_tag: link.File.Dwarf.WipNav.LocalVarTag, + is_ptr: bool, +) InnerError!void { + _ = is_ptr; + _ = local_tag; + return cg.finishAir(inst, .none, &.{}); +} + +fn airTry(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; + const err_union = try cg.resolveInst(pl_op.operand); + const extra = cg.air.extraData(Air.Try, pl_op.payload); + const body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len]); + const err_union_ty = cg.typeOf(pl_op.operand); + const result = try lowerTry(cg, inst, err_union, body, err_union_ty, false); + return cg.finishAir(inst, result, &.{pl_op.operand}); +} + +fn airTryPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.TryPtr, ty_pl.payload); + const err_union_ptr = try cg.resolveInst(extra.data.ptr); + const body: []const Air.Inst.Index = @ptrCast(cg.air.extra.items[extra.end..][0..extra.data.body_len]); + const err_union_ty = cg.typeOf(extra.data.ptr).childType(zcu); + const result = try lowerTry(cg, inst, err_union_ptr, body, err_union_ty, true); + return cg.finishAir(inst, result, &.{extra.data.ptr}); +} + +fn lowerTry( + cg: *CodeGen, + inst: Air.Inst.Index, + err_union: WValue, + body: []const Air.Inst.Index, + err_union_ty: Type, + operand_is_ptr: bool, +) InnerError!WValue { + const zcu = cg.pt.zcu; + + const pl_ty = err_union_ty.errorUnionPayload(zcu); + const pl_has_bits = pl_ty.hasRuntimeBitsIgnoreComptime(zcu); + + if (!err_union_ty.errorUnionSet(zcu).errorSetIsEmpty(zcu)) { + // Block we can jump out of when error is not set + try cg.startBlock(.block, .empty); + + // check if the error tag is set for the error union. + try cg.emitWValue(err_union); + if (pl_has_bits or operand_is_ptr) { + const err_offset: u32 = @intCast(errUnionErrorOffset(pl_ty, zcu)); + try cg.addMemArg(.i32_load16_u, .{ + .offset = err_union.offset() + err_offset, + .alignment = @intCast(Type.anyerror.abiAlignment(zcu).toByteUnits().?), + }); + } + try cg.addTag(.i32_eqz); + try cg.addLabel(.br_if, 0); // jump out of block when error is '0' + + const liveness = cg.liveness.getCondBr(inst); + try cg.branches.append(cg.gpa, .{}); + try cg.currentBranch().values.ensureUnusedCapacity(cg.gpa, liveness.else_deaths.len + liveness.then_deaths.len); + defer { + var branch = cg.branches.pop().?; + branch.deinit(cg.gpa); + } + try cg.genBody(body); + try cg.endBlock(); + } + + // if we reach here it means error was not set, and we want the payload + if (!pl_has_bits and !operand_is_ptr) { + return .none; + } + + const pl_offset: u32 = @intCast(errUnionPayloadOffset(pl_ty, zcu)); + if (operand_is_ptr or isByRef(pl_ty, zcu, cg.target)) { + return buildPointerOffset(cg, err_union, pl_offset, .new); + } + const payload = try cg.load(err_union, pl_ty, pl_offset); + return payload.toLocal(cg, pl_ty); +} + +fn airByteSwap(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const ty = cg.typeOfIndex(inst); + const operand = try cg.resolveInst(ty_op.operand); + + if (ty.zigTypeTag(zcu) == .vector) { + return cg.fail("TODO: @byteSwap for vectors", .{}); + } + const int_info = ty.intInfo(zcu); + const wasm_bits = toWasmBits(int_info.bits) orelse { + return cg.fail("TODO: @byteSwap for integers with bitsize {d}", .{int_info.bits}); + }; + + // bytes are no-op + if (int_info.bits == 8) { + return cg.finishAir(inst, cg.reuseOperand(ty_op.operand, operand), &.{ty_op.operand}); + } + + const result = result: { + switch (wasm_bits) { + 32 => { + const intrin_ret = try cg.callIntrinsic( + .__bswapsi2, + &.{.u32_type}, + Type.u32, + &.{operand}, + ); + break :result if (int_info.bits == 32) + intrin_ret + else + try cg.binOp(intrin_ret, .{ .imm32 = 32 - int_info.bits }, ty, .shr); + }, + 64 => { + const intrin_ret = try cg.callIntrinsic( + .__bswapdi2, + &.{.u64_type}, + Type.u64, + &.{operand}, + ); + break :result if (int_info.bits == 64) + intrin_ret + else + try cg.binOp(intrin_ret, .{ .imm64 = 64 - int_info.bits }, ty, .shr); + }, + else => return cg.fail("TODO: @byteSwap for integers with bitsize {d}", .{int_info.bits}), + } + }; + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +fn airDiv(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const ty = cg.typeOfIndex(inst); + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + + const result = try cg.binOp(lhs, rhs, ty, .div); + return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airDivTrunc(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const ty = cg.typeOfIndex(inst); + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + + const div_result = try cg.binOp(lhs, rhs, ty, .div); + + if (ty.isAnyFloat()) { + const trunc_result = try cg.floatOp(.trunc, ty, &.{div_result}); + return cg.finishAir(inst, trunc_result, &.{ bin_op.lhs, bin_op.rhs }); + } + + return cg.finishAir(inst, div_result, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airDivFloor(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const zcu = cg.pt.zcu; + const ty = cg.typeOfIndex(inst); + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + + if (ty.isUnsignedInt(zcu)) { + _ = try cg.binOp(lhs, rhs, ty, .div); + } else if (ty.isSignedInt(zcu)) { + const int_bits = ty.intInfo(zcu).bits; + const wasm_bits = toWasmBits(int_bits) orelse { + return cg.fail("TODO: `@divFloor` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits}); + }; + + if (wasm_bits > 64) { + return cg.fail("TODO: `@divFloor` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits}); + } + + const zero: WValue = switch (wasm_bits) { + 32 => .{ .imm32 = 0 }, + 64 => .{ .imm64 = 0 }, + else => unreachable, + }; + + // tee leaves the value on the stack and stores it in a local. + const quotient = try cg.allocLocal(ty); + _ = try cg.binOp(lhs, rhs, ty, .div); + try cg.addLocal(.local_tee, quotient.local.value); + + // select takes a 32 bit value as the condition, so in the 64 bit case we use eqz to narrow + // the 64 bit value we want to use as the condition to 32 bits. + // This also inverts the condition (non 0 => 0, 0 => 1), so we put the adjusted and + // non-adjusted quotients on the stack in the opposite order for 32 vs 64 bits. + if (wasm_bits == 64) { + try cg.emitWValue(quotient); + } + + // 0 if the signs of rhs_wasm and lhs_wasm are the same, 1 otherwise. + _ = try cg.binOp(lhs, rhs, ty, .xor); + _ = try cg.cmp(.stack, zero, ty, .lt); + + switch (wasm_bits) { + 32 => { + try cg.addTag(.i32_sub); + try cg.emitWValue(quotient); + }, + 64 => { + try cg.addTag(.i64_extend_i32_u); + try cg.addTag(.i64_sub); + }, + else => unreachable, + } + + _ = try cg.binOp(lhs, rhs, ty, .rem); + + if (wasm_bits == 64) { + try cg.addTag(.i64_eqz); + } + + try cg.addTag(.select); + + // We need to zero the high bits because N bit comparisons consider all 32 or 64 bits, and + // expect all but the lowest N bits to be 0. + // TODO: Should we be zeroing the high bits here or should we be ignoring the high bits + // when performing comparisons? + if (int_bits != wasm_bits) { + _ = try cg.wrapOperand(.stack, ty); + } + } else { + const float_bits = ty.floatBits(cg.target); + if (float_bits > 64) { + return cg.fail("TODO: `@divFloor` for floats with bitsize: {d}", .{float_bits}); + } + const is_f16 = float_bits == 16; + + const lhs_wasm = if (is_f16) try cg.fpext(lhs, Type.f16, Type.f32) else lhs; + const rhs_wasm = if (is_f16) try cg.fpext(rhs, Type.f16, Type.f32) else rhs; + + try cg.emitWValue(lhs_wasm); + try cg.emitWValue(rhs_wasm); + + switch (float_bits) { + 16, 32 => { + try cg.addTag(.f32_div); + try cg.addTag(.f32_floor); + }, + 64 => { + try cg.addTag(.f64_div); + try cg.addTag(.f64_floor); + }, + else => unreachable, + } + + if (is_f16) { + _ = try cg.fptrunc(.stack, Type.f32, Type.f16); + } + } + + return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airRem(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const ty = cg.typeOfIndex(inst); + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + + const result = try cg.binOp(lhs, rhs, ty, .rem); + + return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); +} + +/// Remainder after floor division, defined by: +/// @divFloor(a, b) * b + @mod(a, b) = a +fn airMod(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const pt = cg.pt; + const zcu = pt.zcu; + const ty = cg.typeOfIndex(inst); + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + + const result = result: { + if (ty.isUnsignedInt(zcu)) { + break :result try cg.binOp(lhs, rhs, ty, .rem); + } + if (ty.isSignedInt(zcu)) { + // The wasm rem instruction gives the remainder after truncating division (rounding towards + // 0), equivalent to @rem. + // We make use of the fact that: + // @mod(a, b) = @rem(@rem(a, b) + b, b) + const int_bits = ty.intInfo(zcu).bits; + const wasm_bits = toWasmBits(int_bits) orelse { + return cg.fail("TODO: `@mod` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits}); + }; + + if (wasm_bits > 64) { + return cg.fail("TODO: `@mod` for signed integers larger than 64 bits ({d} bits requested)", .{int_bits}); + } + + _ = try cg.binOp(lhs, rhs, ty, .rem); + _ = try cg.binOp(.stack, rhs, ty, .add); + break :result try cg.binOp(.stack, rhs, ty, .rem); + } + if (ty.isAnyFloat()) { + const rem = try cg.binOp(lhs, rhs, ty, .rem); + const add = try cg.binOp(rem, rhs, ty, .add); + break :result try cg.binOp(add, rhs, ty, .rem); + } + return cg.fail("TODO: @mod for {f}", .{ty.fmt(pt)}); + }; + + return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airSatMul(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const pt = cg.pt; + const zcu = pt.zcu; + const ty = cg.typeOfIndex(inst); + const int_info = ty.intInfo(zcu); + const is_signed = int_info.signedness == .signed; + + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + const wasm_bits = toWasmBits(int_info.bits) orelse { + return cg.fail("TODO: mul_sat for {f}", .{ty.fmt(pt)}); + }; + + switch (wasm_bits) { + 32 => { + const upcast_ty: Type = if (is_signed) Type.i64 else Type.u64; + const lhs_up = try cg.intcast(lhs, ty, upcast_ty); + const rhs_up = try cg.intcast(rhs, ty, upcast_ty); + var mul_res = try (try cg.binOp(lhs_up, rhs_up, upcast_ty, .mul)).toLocal(cg, upcast_ty); + defer mul_res.free(cg); + if (is_signed) { + const imm_max: WValue = .{ .imm64 = ~@as(u64, 0) >> @intCast(64 - (int_info.bits - 1)) }; + try cg.emitWValue(mul_res); + try cg.emitWValue(imm_max); + _ = try cg.cmp(mul_res, imm_max, upcast_ty, .lt); + try cg.addTag(.select); + + var tmp = try cg.allocLocal(upcast_ty); + defer tmp.free(cg); + try cg.addLocal(.local_set, tmp.local.value); + + const imm_min: WValue = .{ .imm64 = ~@as(u64, 0) << @intCast(int_info.bits - 1) }; + try cg.emitWValue(tmp); + try cg.emitWValue(imm_min); + _ = try cg.cmp(tmp, imm_min, upcast_ty, .gt); + try cg.addTag(.select); + } else { + const imm_max: WValue = .{ .imm64 = ~@as(u64, 0) >> @intCast(64 - int_info.bits) }; + try cg.emitWValue(mul_res); + try cg.emitWValue(imm_max); + _ = try cg.cmp(mul_res, imm_max, upcast_ty, .lt); + try cg.addTag(.select); + } + try cg.addTag(.i32_wrap_i64); + }, + 64 => { + if (!(int_info.bits == 64 and int_info.signedness == .signed)) { + return cg.fail("TODO: mul_sat for {f}", .{ty.fmt(pt)}); + } + const overflow_ret = try cg.allocStack(Type.i32); + _ = try cg.callIntrinsic( + .__mulodi4, + &[_]InternPool.Index{ .i64_type, .i64_type, .usize_type }, + Type.i64, + &.{ lhs, rhs, overflow_ret }, + ); + const xor = try cg.binOp(lhs, rhs, Type.i64, .xor); + const sign_v = try cg.binOp(xor, .{ .imm64 = 63 }, Type.i64, .shr); + _ = try cg.binOp(sign_v, .{ .imm64 = ~@as(u63, 0) }, Type.i64, .xor); + _ = try cg.load(overflow_ret, Type.i32, 0); + try cg.addTag(.i32_eqz); + try cg.addTag(.select); + }, + 128 => { + if (!(int_info.bits == 128 and int_info.signedness == .signed)) { + return cg.fail("TODO: mul_sat for {f}", .{ty.fmt(pt)}); + } + const overflow_ret = try cg.allocStack(Type.i32); + const ret = try cg.callIntrinsic( + .__muloti4, + &[_]InternPool.Index{ .i128_type, .i128_type, .usize_type }, + Type.i128, + &.{ lhs, rhs, overflow_ret }, + ); + try cg.lowerToStack(ret); + const xor = try cg.binOp(lhs, rhs, Type.i128, .xor); + const sign_v = try cg.binOp(xor, .{ .imm32 = 127 }, Type.i128, .shr); + + // xor ~@as(u127, 0) + try cg.emitWValue(sign_v); + const lsb = try cg.load(sign_v, Type.u64, 0); + _ = try cg.binOp(lsb, .{ .imm64 = ~@as(u64, 0) }, Type.u64, .xor); + try cg.store(.stack, .stack, Type.u64, sign_v.offset()); + try cg.emitWValue(sign_v); + const msb = try cg.load(sign_v, Type.u64, 8); + _ = try cg.binOp(msb, .{ .imm64 = ~@as(u63, 0) }, Type.u64, .xor); + try cg.store(.stack, .stack, Type.u64, sign_v.offset() + 8); + + try cg.lowerToStack(sign_v); + _ = try cg.load(overflow_ret, Type.i32, 0); + try cg.addTag(.i32_eqz); + try cg.addTag(.select); + }, + else => unreachable, + } + return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airSatBinOp(cg: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void { + assert(op == .add or op == .sub); + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const zcu = cg.pt.zcu; + const ty = cg.typeOfIndex(inst); + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + + const int_info = ty.intInfo(zcu); + const is_signed = int_info.signedness == .signed; + + if (int_info.bits > 64) { + return cg.fail("TODO: saturating arithmetic for integers with bitsize '{d}'", .{int_info.bits}); + } + + if (is_signed) { + const result = try signedSat(cg, lhs, rhs, ty, op); + return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); + } + + const wasm_bits = toWasmBits(int_info.bits).?; + var bin_result = try (try cg.binOp(lhs, rhs, ty, op)).toLocal(cg, ty); + defer bin_result.free(cg); + if (wasm_bits != int_info.bits and op == .add) { + const val: u64 = @as(u64, @intCast((@as(u65, 1) << @as(u7, @intCast(int_info.bits))) - 1)); + const imm_val: WValue = switch (wasm_bits) { + 32 => .{ .imm32 = @intCast(val) }, + 64 => .{ .imm64 = val }, + else => unreachable, + }; + + try cg.emitWValue(bin_result); + try cg.emitWValue(imm_val); + _ = try cg.cmp(bin_result, imm_val, ty, .lt); + } else { + switch (wasm_bits) { + 32 => try cg.addImm32(if (op == .add) std.math.maxInt(u32) else 0), + 64 => try cg.addImm64(if (op == .add) std.math.maxInt(u64) else 0), + else => unreachable, + } + try cg.emitWValue(bin_result); + _ = try cg.cmp(bin_result, lhs, ty, if (op == .add) .lt else .gt); + } + + try cg.addTag(.select); + return cg.finishAir(inst, .stack, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn signedSat(cg: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue { + const pt = cg.pt; + const zcu = pt.zcu; + const int_info = ty.intInfo(zcu); + const wasm_bits = toWasmBits(int_info.bits).?; + const is_wasm_bits = wasm_bits == int_info.bits; + const ext_ty = if (!is_wasm_bits) try pt.intType(int_info.signedness, wasm_bits) else ty; + + const max_val: u64 = @as(u64, @intCast((@as(u65, 1) << @as(u7, @intCast(int_info.bits - 1))) - 1)); + const min_val: i64 = (-@as(i64, @intCast(@as(u63, @intCast(max_val))))) - 1; + const max_wvalue: WValue = switch (wasm_bits) { + 32 => .{ .imm32 = @truncate(max_val) }, + 64 => .{ .imm64 = max_val }, + else => unreachable, + }; + const min_wvalue: WValue = switch (wasm_bits) { + 32 => .{ .imm32 = @bitCast(@as(i32, @truncate(min_val))) }, + 64 => .{ .imm64 = @bitCast(min_val) }, + else => unreachable, + }; + + var bin_result = try (try cg.binOp(lhs, rhs, ext_ty, op)).toLocal(cg, ext_ty); + if (!is_wasm_bits) { + defer bin_result.free(cg); // not returned in this branch + try cg.emitWValue(bin_result); + try cg.emitWValue(max_wvalue); + _ = try cg.cmp(bin_result, max_wvalue, ext_ty, .lt); + try cg.addTag(.select); + try cg.addLocal(.local_set, bin_result.local.value); // re-use local + + try cg.emitWValue(bin_result); + try cg.emitWValue(min_wvalue); + _ = try cg.cmp(bin_result, min_wvalue, ext_ty, .gt); + try cg.addTag(.select); + try cg.addLocal(.local_set, bin_result.local.value); // re-use local + return (try cg.wrapOperand(bin_result, ty)).toLocal(cg, ty); + } else { + const zero: WValue = switch (wasm_bits) { + 32 => .{ .imm32 = 0 }, + 64 => .{ .imm64 = 0 }, + else => unreachable, + }; + try cg.emitWValue(max_wvalue); + try cg.emitWValue(min_wvalue); + _ = try cg.cmp(bin_result, zero, ty, .lt); + try cg.addTag(.select); + try cg.emitWValue(bin_result); + // leave on stack + const cmp_zero_result = try cg.cmp(rhs, zero, ty, if (op == .add) .lt else .gt); + const cmp_bin_result = try cg.cmp(bin_result, lhs, ty, .lt); + _ = try cg.binOp(cmp_zero_result, cmp_bin_result, Type.u32, .xor); // comparisons always return i32, so provide u32 as type to xor. + try cg.addTag(.select); + try cg.addLocal(.local_set, bin_result.local.value); // re-use local + return bin_result; + } +} + +fn airShlSat(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const pt = cg.pt; + const zcu = pt.zcu; + + if (cg.typeOf(bin_op.lhs).isVector(zcu) and !cg.typeOf(bin_op.rhs).isVector(zcu)) { + return cg.fail("TODO: implement vector 'shl_sat' with scalar rhs", .{}); + } + + const ty = cg.typeOfIndex(inst); + const int_info = ty.intInfo(zcu); + const is_signed = int_info.signedness == .signed; + if (int_info.bits > 64) { + return cg.fail("TODO: Saturating shifting left for integers with bitsize '{d}'", .{int_info.bits}); + } + + const lhs = try cg.resolveInst(bin_op.lhs); + const rhs = try cg.resolveInst(bin_op.rhs); + const wasm_bits = toWasmBits(int_info.bits).?; + const result = try cg.allocLocal(ty); + + if (wasm_bits == int_info.bits) { + var shl = try (try cg.binOp(lhs, rhs, ty, .shl)).toLocal(cg, ty); + defer shl.free(cg); + var shr = try (try cg.binOp(shl, rhs, ty, .shr)).toLocal(cg, ty); + defer shr.free(cg); + + switch (wasm_bits) { + 32 => blk: { + if (!is_signed) { + try cg.addImm32(std.math.maxInt(u32)); + break :blk; + } + try cg.addImm32(@bitCast(@as(i32, std.math.minInt(i32)))); + try cg.addImm32(@bitCast(@as(i32, std.math.maxInt(i32)))); + _ = try cg.cmp(lhs, .{ .imm32 = 0 }, ty, .lt); + try cg.addTag(.select); + }, + 64 => blk: { + if (!is_signed) { + try cg.addImm64(std.math.maxInt(u64)); + break :blk; + } + try cg.addImm64(@bitCast(@as(i64, std.math.minInt(i64)))); + try cg.addImm64(@bitCast(@as(i64, std.math.maxInt(i64)))); + _ = try cg.cmp(lhs, .{ .imm64 = 0 }, ty, .lt); + try cg.addTag(.select); + }, + else => unreachable, + } + try cg.emitWValue(shl); + _ = try cg.cmp(lhs, shr, ty, .neq); + try cg.addTag(.select); + try cg.addLocal(.local_set, result.local.value); + } else { + const shift_size = wasm_bits - int_info.bits; + const shift_value: WValue = switch (wasm_bits) { + 32 => .{ .imm32 = shift_size }, + 64 => .{ .imm64 = shift_size }, + else => unreachable, + }; + const ext_ty = try pt.intType(int_info.signedness, wasm_bits); + + var shl_res = try (try cg.binOp(lhs, shift_value, ext_ty, .shl)).toLocal(cg, ext_ty); + defer shl_res.free(cg); + var shl = try (try cg.binOp(shl_res, rhs, ext_ty, .shl)).toLocal(cg, ext_ty); + defer shl.free(cg); + var shr = try (try cg.binOp(shl, rhs, ext_ty, .shr)).toLocal(cg, ext_ty); + defer shr.free(cg); + + switch (wasm_bits) { + 32 => blk: { + if (!is_signed) { + try cg.addImm32(std.math.maxInt(u32)); + break :blk; + } + + try cg.addImm32(@bitCast(@as(i32, std.math.minInt(i32)))); + try cg.addImm32(@bitCast(@as(i32, std.math.maxInt(i32)))); + _ = try cg.cmp(shl_res, .{ .imm32 = 0 }, ext_ty, .lt); + try cg.addTag(.select); + }, + 64 => blk: { + if (!is_signed) { + try cg.addImm64(std.math.maxInt(u64)); + break :blk; + } + + try cg.addImm64(@bitCast(@as(i64, std.math.minInt(i64)))); + try cg.addImm64(@bitCast(@as(i64, std.math.maxInt(i64)))); + _ = try cg.cmp(shl_res, .{ .imm64 = 0 }, ext_ty, .lt); + try cg.addTag(.select); + }, + else => unreachable, + } + try cg.emitWValue(shl); + _ = try cg.cmp(shl_res, shr, ext_ty, .neq); + try cg.addTag(.select); + try cg.addLocal(.local_set, result.local.value); + var shift_result = try cg.binOp(result, shift_value, ext_ty, .shr); + if (is_signed) { + shift_result = try cg.wrapOperand(shift_result, ty); + } + try cg.addLocal(.local_set, result.local.value); + } + + return cg.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); +} + +/// Calls a compiler-rt intrinsic by creating an undefined symbol, +/// then lowering the arguments and calling the symbol as a function call. +/// This function call assumes the C-ABI. +/// Asserts arguments are not stack values when the return value is +/// passed as the first parameter. +/// May leave the return value on the stack. +fn callIntrinsic( + cg: *CodeGen, + intrinsic: Mir.Intrinsic, + param_types: []const InternPool.Index, + return_type: Type, + args: []const WValue, +) InnerError!WValue { + assert(param_types.len == args.len); + const zcu = cg.pt.zcu; + + // Always pass over C-ABI + + const want_sret_param = firstParamSRet(.{ .wasm_mvp = .{} }, return_type, zcu, cg.target); + // if we want return as first param, we allocate a pointer to stack, + // and emit it as our first argument + const sret = if (want_sret_param) blk: { + const sret_local = try cg.allocStack(return_type); + try cg.lowerToStack(sret_local); + break :blk sret_local; + } else .none; + + // Lower all arguments to the stack before we call our function + for (args, 0..) |arg, arg_i| { + assert(!(want_sret_param and arg == .stack)); + assert(Type.fromInterned(param_types[arg_i]).hasRuntimeBitsIgnoreComptime(zcu)); + try cg.lowerArg(.{ .wasm_mvp = .{} }, Type.fromInterned(param_types[arg_i]), arg); + } + + try cg.addInst(.{ .tag = .call_intrinsic, .data = .{ .intrinsic = intrinsic } }); + + if (!return_type.hasRuntimeBitsIgnoreComptime(zcu)) { + return .none; + } else if (return_type.isNoReturn(zcu)) { + try cg.addTag(.@"unreachable"); + return .none; + } else if (want_sret_param) { + return sret; + } else { + return .stack; + } +} + +fn airTagName(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const un_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].un_op; + const operand = try cg.resolveInst(un_op); + const enum_ty = cg.typeOf(un_op); + + const result_ptr = try cg.allocStack(cg.typeOfIndex(inst)); + try cg.lowerToStack(result_ptr); + try cg.emitWValue(operand); + try cg.addInst(.{ .tag = .call_tag_name, .data = .{ .ip_index = enum_ty.toIntern() } }); + + return cg.finishAir(inst, result_ptr, &.{un_op}); +} + +fn airErrorSetHasValue(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ip = &zcu.intern_pool; + const ty_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_op; + + const operand = try cg.resolveInst(ty_op.operand); + const error_set_ty = ty_op.ty.toType(); + const result = try cg.allocLocal(Type.bool); + + const names = error_set_ty.errorSetNames(zcu); + var values = try std.array_list.Managed(u32).initCapacity(cg.gpa, names.len); + defer values.deinit(); + + var lowest: ?u32 = null; + var highest: ?u32 = null; + for (0..names.len) |name_index| { + const err_int = ip.getErrorValueIfExists(names.get(ip)[name_index]).?; + if (lowest) |*l| { + if (err_int < l.*) { + l.* = err_int; + } + } else { + lowest = err_int; + } + if (highest) |*h| { + if (err_int > h.*) { + highest = err_int; + } + } else { + highest = err_int; + } + + values.appendAssumeCapacity(err_int); + } + + // start block for 'true' branch + try cg.startBlock(.block, .empty); + // start block for 'false' branch + try cg.startBlock(.block, .empty); + // block for the jump table itself + try cg.startBlock(.block, .empty); + + // lower operand to determine jump table target + try cg.emitWValue(operand); + try cg.addImm32(lowest.?); + try cg.addTag(.i32_sub); + + // Account for default branch so always add '1' + const depth = @as(u32, @intCast(highest.? - lowest.? + 1)); + const jump_table: Mir.JumpTable = .{ .length = depth }; + const table_extra_index = try cg.addExtra(jump_table); + try cg.addInst(.{ .tag = .br_table, .data = .{ .payload = table_extra_index } }); + try cg.mir_extra.ensureUnusedCapacity(cg.gpa, depth); + + var value: u32 = lowest.?; + while (value <= highest.?) : (value += 1) { + const idx: u32 = blk: { + for (values.items) |val| { + if (val == value) break :blk 1; + } + break :blk 0; + }; + cg.mir_extra.appendAssumeCapacity(idx); + } + try cg.endBlock(); + + // 'false' branch (i.e. error set does not have value + // ensure we set local to 0 in case the local was re-used. + try cg.addImm32(0); + try cg.addLocal(.local_set, result.local.value); + try cg.addLabel(.br, 1); + try cg.endBlock(); + + // 'true' branch + try cg.addImm32(1); + try cg.addLocal(.local_set, result.local.value); + try cg.addLabel(.br, 0); + try cg.endBlock(); + + return cg.finishAir(inst, result, &.{ty_op.operand}); +} + +inline fn useAtomicFeature(cg: *const CodeGen) bool { + return cg.target.cpu.has(.wasm, .atomics); +} + +fn airCmpxchg(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const ty_pl = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_pl; + const extra = cg.air.extraData(Air.Cmpxchg, ty_pl.payload).data; + + const ptr_ty = cg.typeOf(extra.ptr); + const ty = ptr_ty.childType(zcu); + const result_ty = cg.typeOfIndex(inst); + + const ptr_operand = try cg.resolveInst(extra.ptr); + const expected_val = try cg.resolveInst(extra.expected_value); + const new_val = try cg.resolveInst(extra.new_value); + + const cmp_result = try cg.allocLocal(Type.bool); + + const ptr_val = if (cg.useAtomicFeature()) val: { + const val_local = try cg.allocLocal(ty); + try cg.emitWValue(ptr_operand); + try cg.lowerToStack(expected_val); + try cg.lowerToStack(new_val); + try cg.addAtomicMemArg(switch (ty.abiSize(zcu)) { + 1 => .i32_atomic_rmw8_cmpxchg_u, + 2 => .i32_atomic_rmw16_cmpxchg_u, + 4 => .i32_atomic_rmw_cmpxchg, + 8 => .i32_atomic_rmw_cmpxchg, + else => |size| return cg.fail("TODO: implement `@cmpxchg` for types with abi size '{d}'", .{size}), + }, .{ + .offset = ptr_operand.offset(), + .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?), + }); + try cg.addLocal(.local_tee, val_local.local.value); + _ = try cg.cmp(.stack, expected_val, ty, .eq); + try cg.addLocal(.local_set, cmp_result.local.value); + break :val val_local; + } else val: { + if (ty.abiSize(zcu) > 8) { + return cg.fail("TODO: Implement `@cmpxchg` for types larger than abi size of 8 bytes", .{}); + } + const ptr_val = try WValue.toLocal(try cg.load(ptr_operand, ty, 0), cg, ty); + + try cg.lowerToStack(ptr_operand); + try cg.lowerToStack(new_val); + try cg.emitWValue(ptr_val); + _ = try cg.cmp(ptr_val, expected_val, ty, .eq); + try cg.addLocal(.local_tee, cmp_result.local.value); + try cg.addTag(.select); + try cg.store(.stack, .stack, ty, 0); + + break :val ptr_val; + }; + + const result = if (isByRef(result_ty, zcu, cg.target)) val: { + try cg.emitWValue(cmp_result); + try cg.addImm32(~@as(u32, 0)); + try cg.addTag(.i32_xor); + try cg.addImm32(1); + try cg.addTag(.i32_and); + const and_result = try WValue.toLocal(.stack, cg, Type.bool); + const result_ptr = try cg.allocStack(result_ty); + try cg.store(result_ptr, and_result, Type.bool, @as(u32, @intCast(ty.abiSize(zcu)))); + try cg.store(result_ptr, ptr_val, ty, 0); + break :val result_ptr; + } else val: { + try cg.addImm32(0); + try cg.emitWValue(ptr_val); + try cg.emitWValue(cmp_result); + try cg.addTag(.select); + break :val .stack; + }; + + return cg.finishAir(inst, result, &.{ extra.ptr, extra.expected_value, extra.new_value }); +} + +fn airAtomicLoad(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const atomic_load = cg.air.instructions.items(.data)[@intFromEnum(inst)].atomic_load; + const ptr = try cg.resolveInst(atomic_load.ptr); + const ty = cg.typeOfIndex(inst); + + if (cg.useAtomicFeature()) { + const tag: std.wasm.AtomicsOpcode = switch (ty.abiSize(zcu)) { + 1 => .i32_atomic_load8_u, + 2 => .i32_atomic_load16_u, + 4 => .i32_atomic_load, + 8 => .i64_atomic_load, + else => |size| return cg.fail("TODO: @atomicLoad for types with abi size {d}", .{size}), + }; + try cg.emitWValue(ptr); + try cg.addAtomicMemArg(tag, .{ + .offset = ptr.offset(), + .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?), + }); + } else { + _ = try cg.load(ptr, ty, 0); + } + + return cg.finishAir(inst, .stack, &.{atomic_load.ptr}); +} + +fn airAtomicRmw(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const pl_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].pl_op; + const extra = cg.air.extraData(Air.AtomicRmw, pl_op.payload).data; + + const ptr = try cg.resolveInst(pl_op.operand); + const operand = try cg.resolveInst(extra.operand); + const ty = cg.typeOfIndex(inst); + const op: std.builtin.AtomicRmwOp = extra.op(); + + if (cg.useAtomicFeature()) { + switch (op) { + .Max, + .Min, + .Nand, + => { + const tmp = try cg.load(ptr, ty, 0); + const value = try tmp.toLocal(cg, ty); + + // create a loop to cmpxchg the new value + try cg.startBlock(.loop, .empty); + + try cg.emitWValue(ptr); + try cg.emitWValue(value); + if (op == .Nand) { + const wasm_bits = toWasmBits(@intCast(ty.bitSize(zcu))).?; + + const and_res = try cg.binOp(value, operand, ty, .@"and"); + if (wasm_bits == 32) + try cg.addImm32(~@as(u32, 0)) + else if (wasm_bits == 64) + try cg.addImm64(~@as(u64, 0)) + else + return cg.fail("TODO: `@atomicRmw` with operator `Nand` for types larger than 64 bits", .{}); + _ = try cg.binOp(and_res, .stack, ty, .xor); + } else { + try cg.emitWValue(value); + try cg.emitWValue(operand); + _ = try cg.cmp(value, operand, ty, if (op == .Max) .gt else .lt); + try cg.addTag(.select); + } + try cg.addAtomicMemArg( + switch (ty.abiSize(zcu)) { + 1 => .i32_atomic_rmw8_cmpxchg_u, + 2 => .i32_atomic_rmw16_cmpxchg_u, + 4 => .i32_atomic_rmw_cmpxchg, + 8 => .i64_atomic_rmw_cmpxchg, + else => return cg.fail("TODO: implement `@atomicRmw` with operation `{s}` for types larger than 64 bits", .{@tagName(op)}), + }, + .{ + .offset = ptr.offset(), + .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?), + }, + ); + const select_res = try cg.allocLocal(ty); + try cg.addLocal(.local_tee, select_res.local.value); + _ = try cg.cmp(.stack, value, ty, .neq); // leave on stack so we can use it for br_if + + try cg.emitWValue(select_res); + try cg.addLocal(.local_set, value.local.value); + + try cg.addLabel(.br_if, 0); + try cg.endBlock(); + return cg.finishAir(inst, value, &.{ pl_op.operand, extra.operand }); + }, + + // the other operations have their own instructions for Wasm. + else => { + try cg.emitWValue(ptr); + try cg.emitWValue(operand); + const tag: std.wasm.AtomicsOpcode = switch (ty.abiSize(zcu)) { + 1 => switch (op) { + .Xchg => .i32_atomic_rmw8_xchg_u, + .Add => .i32_atomic_rmw8_add_u, + .Sub => .i32_atomic_rmw8_sub_u, + .And => .i32_atomic_rmw8_and_u, + .Or => .i32_atomic_rmw8_or_u, + .Xor => .i32_atomic_rmw8_xor_u, + else => unreachable, + }, + 2 => switch (op) { + .Xchg => .i32_atomic_rmw16_xchg_u, + .Add => .i32_atomic_rmw16_add_u, + .Sub => .i32_atomic_rmw16_sub_u, + .And => .i32_atomic_rmw16_and_u, + .Or => .i32_atomic_rmw16_or_u, + .Xor => .i32_atomic_rmw16_xor_u, + else => unreachable, + }, + 4 => switch (op) { + .Xchg => .i32_atomic_rmw_xchg, + .Add => .i32_atomic_rmw_add, + .Sub => .i32_atomic_rmw_sub, + .And => .i32_atomic_rmw_and, + .Or => .i32_atomic_rmw_or, + .Xor => .i32_atomic_rmw_xor, + else => unreachable, + }, + 8 => switch (op) { + .Xchg => .i64_atomic_rmw_xchg, + .Add => .i64_atomic_rmw_add, + .Sub => .i64_atomic_rmw_sub, + .And => .i64_atomic_rmw_and, + .Or => .i64_atomic_rmw_or, + .Xor => .i64_atomic_rmw_xor, + else => unreachable, + }, + else => |size| return cg.fail("TODO: Implement `@atomicRmw` for types with abi size {d}", .{size}), + }; + try cg.addAtomicMemArg(tag, .{ + .offset = ptr.offset(), + .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?), + }); + return cg.finishAir(inst, .stack, &.{ pl_op.operand, extra.operand }); + }, + } + } else { + const loaded = try cg.load(ptr, ty, 0); + const result = try loaded.toLocal(cg, ty); + + switch (op) { + .Xchg => { + try cg.store(ptr, operand, ty, 0); + }, + .Add, + .Sub, + .And, + .Or, + .Xor, + => { + try cg.emitWValue(ptr); + _ = try cg.binOp(result, operand, ty, switch (op) { + .Add => .add, + .Sub => .sub, + .And => .@"and", + .Or => .@"or", + .Xor => .xor, + else => unreachable, + }); + if (ty.isInt(zcu) and (op == .Add or op == .Sub)) { + _ = try cg.wrapOperand(.stack, ty); + } + try cg.store(.stack, .stack, ty, ptr.offset()); + }, + .Max, + .Min, + => { + try cg.emitWValue(ptr); + try cg.emitWValue(result); + try cg.emitWValue(operand); + _ = try cg.cmp(result, operand, ty, if (op == .Max) .gt else .lt); + try cg.addTag(.select); + try cg.store(.stack, .stack, ty, ptr.offset()); + }, + .Nand => { + const wasm_bits = toWasmBits(@intCast(ty.bitSize(zcu))).?; + + try cg.emitWValue(ptr); + const and_res = try cg.binOp(result, operand, ty, .@"and"); + if (wasm_bits == 32) + try cg.addImm32(~@as(u32, 0)) + else if (wasm_bits == 64) + try cg.addImm64(~@as(u64, 0)) + else + return cg.fail("TODO: `@atomicRmw` with operator `Nand` for types larger than 64 bits", .{}); + _ = try cg.binOp(and_res, .stack, ty, .xor); + try cg.store(.stack, .stack, ty, ptr.offset()); + }, + } + + return cg.finishAir(inst, result, &.{ pl_op.operand, extra.operand }); + } +} + +fn airAtomicStore(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const zcu = cg.pt.zcu; + const bin_op = cg.air.instructions.items(.data)[@intFromEnum(inst)].bin_op; + + const ptr = try cg.resolveInst(bin_op.lhs); + const operand = try cg.resolveInst(bin_op.rhs); + const ptr_ty = cg.typeOf(bin_op.lhs); + const ty = ptr_ty.childType(zcu); + + if (cg.useAtomicFeature()) { + const tag: std.wasm.AtomicsOpcode = switch (ty.abiSize(zcu)) { + 1 => .i32_atomic_store8, + 2 => .i32_atomic_store16, + 4 => .i32_atomic_store, + 8 => .i64_atomic_store, + else => |size| return cg.fail("TODO: @atomicLoad for types with abi size {d}", .{size}), + }; + try cg.emitWValue(ptr); + try cg.lowerToStack(operand); + try cg.addAtomicMemArg(tag, .{ + .offset = ptr.offset(), + .alignment = @intCast(ty.abiAlignment(zcu).toByteUnits().?), + }); + } else { + try cg.store(ptr, operand, ty, 0); + } + + return cg.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); +} + +fn airFrameAddress(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + if (cg.initial_stack_value == .none) { + try cg.initializeStack(); + } + try cg.emitWValue(cg.bottom_stack_value); + return cg.finishAir(inst, .stack, &.{}); +} + +fn airRuntimeNavPtr(cg: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const ty_nav = cg.air.instructions.items(.data)[@intFromEnum(inst)].ty_nav; + const mod = cg.pt.zcu.navFileScope(cg.owner_nav).mod.?; + if (mod.single_threaded) { + const result: WValue = .{ .nav_ref = .{ + .nav_index = ty_nav.nav, + .offset = 0, + } }; + return cg.finishAir(inst, result, &.{}); + } + return cg.fail("TODO: thread-local variables", .{}); +} + +fn typeOf(cg: *CodeGen, inst: Air.Inst.Ref) Type { + const zcu = cg.pt.zcu; + return cg.air.typeOf(inst, &zcu.intern_pool); +} + +fn typeOfIndex(cg: *CodeGen, inst: Air.Inst.Index) Type { + const zcu = cg.pt.zcu; + return cg.air.typeOfIndex(inst, &zcu.intern_pool); +} + +fn floatCmpIntrinsic(op: std.math.CompareOperator, bits: u16) Mir.Intrinsic { + return switch (op) { + .lt => switch (bits) { + 80 => .__ltxf2, + 128 => .__lttf2, + else => unreachable, + }, + .lte => switch (bits) { + 80 => .__lexf2, + 128 => .__letf2, + else => unreachable, + }, + .eq => switch (bits) { + 80 => .__eqxf2, + 128 => .__eqtf2, + else => unreachable, + }, + .neq => switch (bits) { + 80 => .__nexf2, + 128 => .__netf2, + else => unreachable, + }, + .gte => switch (bits) { + 80 => .__gexf2, + 128 => .__getf2, + else => unreachable, + }, + .gt => switch (bits) { + 80 => .__gtxf2, + 128 => .__gttf2, + else => unreachable, + }, + }; +} |
