diff options
Diffstat (limited to 'src/arch')
| -rw-r--r-- | src/arch/aarch64/CodeGen.zig | 747 | ||||
| -rw-r--r-- | src/arch/aarch64/abi.zig | 54 | ||||
| -rw-r--r-- | src/arch/arm/CodeGen.zig | 719 | ||||
| -rw-r--r-- | src/arch/arm/abi.zig | 59 | ||||
| -rw-r--r-- | src/arch/riscv64/CodeGen.zig | 207 | ||||
| -rw-r--r-- | src/arch/riscv64/abi.zig | 26 | ||||
| -rw-r--r-- | src/arch/sparc64/CodeGen.zig | 684 | ||||
| -rw-r--r-- | src/arch/sparc64/Emit.zig | 33 | ||||
| -rw-r--r-- | src/arch/sparc64/Mir.zig | 30 | ||||
| -rw-r--r-- | src/arch/sparc64/bits.zig | 32 | ||||
| -rw-r--r-- | src/arch/wasm/CodeGen.zig | 2569 | ||||
| -rw-r--r-- | src/arch/wasm/Emit.zig | 2 | ||||
| -rw-r--r-- | src/arch/wasm/abi.zig | 63 | ||||
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 7371 | ||||
| -rw-r--r-- | src/arch/x86_64/Emit.zig | 280 | ||||
| -rw-r--r-- | src/arch/x86_64/Encoding.zig | 556 | ||||
| -rw-r--r-- | src/arch/x86_64/Lower.zig | 699 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 930 | ||||
| -rw-r--r-- | src/arch/x86_64/abi.zig | 126 | ||||
| -rw-r--r-- | src/arch/x86_64/bits.zig | 111 | ||||
| -rw-r--r-- | src/arch/x86_64/encoder.zig | 198 | ||||
| -rw-r--r-- | src/arch/x86_64/encodings.zig | 2401 |
22 files changed, 11706 insertions, 6191 deletions
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 649edd3b9c..1355f96231 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -328,7 +328,7 @@ const Self = @This(); pub fn generate( bin_file: *link.File, src_loc: Module.SrcLoc, - module_fn: *Module.Fn, + module_fn_index: Module.Fn.Index, air: Air, liveness: Liveness, code: *std.ArrayList(u8), @@ -339,6 +339,7 @@ pub fn generate( } const mod = bin_file.options.module.?; + const module_fn = mod.funcPtr(module_fn_index); const fn_owner_decl = mod.declPtr(module_fn.owner_decl); assert(fn_owner_decl.has_tv); const fn_type = fn_owner_decl.ty; @@ -471,7 +472,8 @@ pub fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 { } fn gen(self: *Self) !void { - const cc = self.fn_type.fnCallingConvention(); + const mod = self.bin_file.options.module.?; + const cc = self.fn_type.fnCallingConvention(mod); if (cc != .Naked) { // stp fp, lr, [sp, #-16]! _ = try self.addInst(.{ @@ -501,7 +503,7 @@ fn gen(self: *Self) !void { // (or w0 when pointer size is 32 bits). As this register // might get overwritten along the way, save the address // to the stack. - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bits = self.target.ptrBitWidth(); const ptr_bytes = @divExact(ptr_bits, 8); const ret_ptr_reg = self.registerAlias(.x0, Type.usize); @@ -520,10 +522,10 @@ fn gen(self: *Self) !void { const inst = self.air.getMainBody()[arg_index]; assert(self.air.instructions.items(.tag)[inst] == .arg); - const ty = self.air.typeOfIndex(inst); + const ty = self.typeOfIndex(inst); - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - const abi_align = ty.abiAlignment(self.target.*); + const abi_size = @intCast(u32, ty.abiSize(mod)); + const abi_align = ty.abiAlignment(mod); const stack_offset = try self.allocMem(abi_size, abi_align, inst); try self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); @@ -564,7 +566,7 @@ fn gen(self: *Self) !void { // Backpatch stack offset const total_stack_size = self.max_end_stack + self.saved_regs_stack_space; - const aligned_total_stack_end = mem.alignForwardGeneric(u32, total_stack_size, self.stack_align); + const aligned_total_stack_end = mem.alignForward(u32, total_stack_size, self.stack_align); const stack_size = aligned_total_stack_end - self.saved_regs_stack_space; self.max_end_stack = stack_size; if (math.cast(u12, stack_size)) |size| { @@ -652,13 +654,14 @@ fn gen(self: *Self) !void { } fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { + const mod = self.bin_file.options.module.?; + const ip = &mod.intern_pool; const air_tags = self.air.instructions.items(.tag); for (body) |inst| { // TODO: remove now-redundant isUnused calls from AIR handler functions - if (self.liveness.isUnused(inst) and !self.air.mustLower(inst)) { + if (self.liveness.isUnused(inst) and !self.air.mustLower(inst, ip)) continue; - } const old_air_bookkeeping = self.air_bookkeeping; try self.ensureProcessDeathCapacity(Liveness.bpi); @@ -842,8 +845,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .ptr_elem_val => try self.airPtrElemVal(inst), .ptr_elem_ptr => try self.airPtrElemPtr(inst), - .constant => unreachable, // excluded from function bodies - .const_ty => unreachable, // excluded from function bodies + .inferred_alloc, .inferred_alloc_comptime, .interned => unreachable, .unreach => self.finishAirBookkeeping(), .optional_payload => try self.airOptionalPayload(inst), @@ -916,8 +918,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { /// Asserts there is already capacity to insert into top branch inst_table. fn processDeath(self: *Self, inst: Air.Inst.Index) void { - const air_tags = self.air.instructions.items(.tag); - if (air_tags[inst] == .constant) return; // Constants are immortal. + assert(self.air.instructions.items(.tag)[inst] != .interned); // When editing this function, note that the logic must synchronize with `reuseOperand`. const prev_value = self.getResolvedInstValue(inst); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; @@ -951,8 +952,8 @@ fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Live tomb_bits >>= 1; if (!dies) continue; const op_int = @enumToInt(op); - if (op_int < Air.Inst.Ref.typed_value_map.len) continue; - const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len); + if (op_int < Air.ref_start_index) continue; + const op_index = @intCast(Air.Inst.Index, op_int - Air.ref_start_index); self.processDeath(op_index); } const is_used = @truncate(u1, tomb_bits) == 0; @@ -1010,7 +1011,7 @@ fn allocMem( std.math.ceilPowerOfTwoAssert(u32, abi_size); // TODO find a free slot instead of always appending - const offset = mem.alignForwardGeneric(u32, self.next_stack_offset, adjusted_align) + abi_size; + const offset = mem.alignForward(u32, self.next_stack_offset, adjusted_align) + abi_size; self.next_stack_offset = offset; self.max_end_stack = @max(self.max_end_stack, self.next_stack_offset); @@ -1026,31 +1027,31 @@ fn allocMem( /// Use a pointer instruction as the basis for allocating stack memory. fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !u32 { - const elem_ty = self.air.typeOfIndex(inst).elemType(); + const mod = self.bin_file.options.module.?; + const elem_ty = self.typeOfIndex(inst).childType(mod); - if (!elem_ty.hasRuntimeBits()) { + if (!elem_ty.hasRuntimeBits(mod)) { // return the stack offset 0. Stack offset 0 will be where all // zero-sized stack allocations live as non-zero-sized // allocations will always have an offset > 0. return @as(u32, 0); } - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { - const mod = self.bin_file.options.module.?; + const abi_size = math.cast(u32, elem_ty.abiSize(mod)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); }; // TODO swap this for inst.ty.ptrAlign - const abi_align = elem_ty.abiAlignment(self.target.*); + const abi_align = elem_ty.abiAlignment(mod); return self.allocMem(abi_size, abi_align, inst); } fn allocRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool, maybe_inst: ?Air.Inst.Index) !MCValue { - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { - const mod = self.bin_file.options.module.?; + const mod = self.bin_file.options.module.?; + const abi_size = math.cast(u32, elem_ty.abiSize(mod)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); }; - const abi_align = elem_ty.abiAlignment(self.target.*); + const abi_align = elem_ty.abiAlignment(mod); if (reg_ok) { // Make sure the type can fit in a register before we try to allocate one. @@ -1066,7 +1067,7 @@ fn allocRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool, maybe_inst: ?Air.Inst } pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void { - const stack_mcv = try self.allocRegOrMem(self.air.typeOfIndex(inst), false, inst); + const stack_mcv = try self.allocRegOrMem(self.typeOfIndex(inst), false, inst); log.debug("spilling {d} to stack mcv {any}", .{ inst, stack_mcv }); const reg_mcv = self.getResolvedInstValue(inst); @@ -1078,14 +1079,14 @@ pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; try branch.inst_table.put(self.gpa, inst, stack_mcv); - try self.genSetStack(self.air.typeOfIndex(inst), stack_mcv.stack_offset, reg_mcv); + try self.genSetStack(self.typeOfIndex(inst), stack_mcv.stack_offset, reg_mcv); } /// Save the current instruction stored in the compare flags if /// occupied fn spillCompareFlagsIfOccupied(self: *Self) !void { if (self.compare_flags_inst) |inst_to_save| { - const ty = self.air.typeOfIndex(inst_to_save); + const ty = self.typeOfIndex(inst_to_save); const mcv = self.getResolvedInstValue(inst_to_save); const new_mcv = switch (mcv) { .compare_flags => try self.allocRegOrMem(ty, true, inst_to_save), @@ -1093,7 +1094,7 @@ fn spillCompareFlagsIfOccupied(self: *Self) !void { else => unreachable, // mcv doesn't occupy the compare flags }; - try self.setRegOrMem(self.air.typeOfIndex(inst_to_save), new_mcv, mcv); + try self.setRegOrMem(self.typeOfIndex(inst_to_save), new_mcv, mcv); log.debug("spilling {d} to mcv {any}", .{ inst_to_save, new_mcv }); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; @@ -1125,9 +1126,9 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToNewRegister(self: *Self, reg_owner: Air.Inst.Index, mcv: MCValue) !MCValue { const raw_reg = try self.register_manager.allocReg(reg_owner, gp); - const ty = self.air.typeOfIndex(reg_owner); + const ty = self.typeOfIndex(reg_owner); const reg = self.registerAlias(raw_reg, ty); - try self.genSetReg(self.air.typeOfIndex(reg_owner), reg, mcv); + try self.genSetReg(self.typeOfIndex(reg_owner), reg, mcv); return MCValue{ .register = reg }; } @@ -1137,17 +1138,14 @@ fn airAlloc(self: *Self, inst: Air.Inst.Index) !void { } fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const result: MCValue = switch (self.ret_mcv) { .none, .register => .{ .ptr_stack_offset = try self.allocMemPtr(inst) }, .stack_offset => blk: { // self.ret_mcv is an address to where this function // should store its result into - const ret_ty = self.fn_type.fnReturnType(); - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = ret_ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ret_ty = self.fn_type.fnReturnType(mod); + const ptr_ty = try mod.singleMutPtrType(ret_ty); // addr_reg will contain the address of where to store the // result into @@ -1177,13 +1175,14 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ ty_op.operand, .none, .none }); + const mod = self.bin_file.options.module.?; const operand = ty_op.operand; const operand_mcv = try self.resolveInst(operand); - const operand_ty = self.air.typeOf(operand); - const operand_info = operand_ty.intInfo(self.target.*); + const operand_ty = self.typeOf(operand); + const operand_info = operand_ty.intInfo(mod); - const dest_ty = self.air.typeOfIndex(inst); - const dest_info = dest_ty.intInfo(self.target.*); + const dest_ty = self.typeOfIndex(inst); + const dest_info = dest_ty.intInfo(mod); const result: MCValue = result: { const operand_lock: ?RegisterLock = switch (operand_mcv) { @@ -1199,14 +1198,14 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { if (dest_info.bits > operand_info.bits) { const dest_mcv = try self.allocRegOrMem(dest_ty, true, inst); - try self.setRegOrMem(self.air.typeOfIndex(inst), dest_mcv, truncated); + try self.setRegOrMem(self.typeOfIndex(inst), dest_mcv, truncated); break :result dest_mcv; } else { if (self.reuseOperand(inst, operand, 0, truncated)) { break :result truncated; } else { const dest_mcv = try self.allocRegOrMem(dest_ty, true, inst); - try self.setRegOrMem(self.air.typeOfIndex(inst), dest_mcv, truncated); + try self.setRegOrMem(self.typeOfIndex(inst), dest_mcv, truncated); break :result dest_mcv; } } @@ -1257,8 +1256,9 @@ fn trunc( operand_ty: Type, dest_ty: Type, ) !MCValue { - const info_a = operand_ty.intInfo(self.target.*); - const info_b = dest_ty.intInfo(self.target.*); + const mod = self.bin_file.options.module.?; + const info_a = operand_ty.intInfo(mod); + const info_b = dest_ty.intInfo(mod); if (info_b.bits <= 64) { const operand_reg = switch (operand) { @@ -1300,8 +1300,8 @@ fn trunc( fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand = try self.resolveInst(ty_op.operand); - const operand_ty = self.air.typeOf(ty_op.operand); - const dest_ty = self.air.typeOfIndex(inst); + const operand_ty = self.typeOf(ty_op.operand); + const dest_ty = self.typeOfIndex(inst); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else blk: { break :blk try self.trunc(inst, operand, operand_ty, dest_ty); @@ -1319,15 +1319,16 @@ fn airBoolToInt(self: *Self, inst: Air.Inst.Index) !void { fn airNot(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const mod = self.bin_file.options.module.?; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand = try self.resolveInst(ty_op.operand); - const operand_ty = self.air.typeOf(ty_op.operand); + const operand_ty = self.typeOf(ty_op.operand); switch (operand) { .dead => unreachable, .unreach => unreachable, .compare_flags => |cond| break :result MCValue{ .compare_flags = cond.negate() }, else => { - switch (operand_ty.zigTypeTag()) { + switch (operand_ty.zigTypeTag(mod)) { .Bool => { // TODO convert this to mvn + and const op_reg = switch (operand) { @@ -1361,7 +1362,7 @@ fn airNot(self: *Self, inst: Air.Inst.Index) !void { }, .Vector => return self.fail("TODO bitwise not for vectors", .{}), .Int => { - const int_info = operand_ty.intInfo(self.target.*); + const int_info = operand_ty.intInfo(mod); if (int_info.bits <= 64) { const op_reg = switch (operand) { .register => |r| r, @@ -1413,13 +1414,13 @@ fn minMax( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) !MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO ARM min/max on floats", .{}), .Vector => return self.fail("TODO ARM min/max on vectors", .{}), .Int => { - const mod = self.bin_file.options.module.?; assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { var lhs_reg: Register = undefined; var rhs_reg: Register = undefined; @@ -1488,8 +1489,8 @@ fn minMax( fn airMinMax(self: *Self, inst: Air.Inst.Index) !void { const tag = self.air.instructions.items(.tag)[inst]; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs_bind: ReadArg.Bind = .{ .inst = bin_op.lhs }; @@ -1508,11 +1509,11 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const ptr = try self.resolveInst(bin_op.lhs); - const ptr_ty = self.air.typeOf(bin_op.lhs); + const ptr_ty = self.typeOf(bin_op.lhs); const len = try self.resolveInst(bin_op.rhs); - const len_ty = self.air.typeOf(bin_op.rhs); + const len_ty = self.typeOf(bin_op.rhs); - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bits = self.target.ptrBitWidth(); const ptr_bytes = @divExact(ptr_bits, 8); const stack_offset = try self.allocMem(ptr_bytes * 2, ptr_bytes * 2, inst); @@ -1907,12 +1908,12 @@ fn addSub( maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO binary operations on floats", .{}), .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { const lhs_immediate = try lhs_bind.resolveToImmediate(self); const rhs_immediate = try rhs_bind.resolveToImmediate(self); @@ -1968,11 +1969,11 @@ fn mul( maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { // TODO add optimisations for multiplication // with immediates, for example a * 2 can be @@ -1999,7 +2000,8 @@ fn divFloat( _ = rhs_ty; _ = maybe_inst; - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO div_float", .{}), .Vector => return self.fail("TODO div_float on vectors", .{}), else => unreachable, @@ -2015,12 +2017,12 @@ fn divTrunc( maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO div on floats", .{}), .Vector => return self.fail("TODO div on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { switch (int_info.signedness) { .signed => { @@ -2049,12 +2051,12 @@ fn divFloor( maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO div on floats", .{}), .Vector => return self.fail("TODO div on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { switch (int_info.signedness) { .signed => { @@ -2082,12 +2084,12 @@ fn divExact( maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO div on floats", .{}), .Vector => return self.fail("TODO div on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { switch (int_info.signedness) { .signed => { @@ -2118,12 +2120,12 @@ fn rem( _ = maybe_inst; const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO rem/mod on floats", .{}), .Vector => return self.fail("TODO rem/mod on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { var lhs_reg: Register = undefined; var rhs_reg: Register = undefined; @@ -2188,7 +2190,8 @@ fn modulo( _ = rhs_ty; _ = maybe_inst; - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO mod on floats", .{}), .Vector => return self.fail("TODO mod on vectors", .{}), .Int => return self.fail("TODO mod on ints", .{}), @@ -2205,10 +2208,11 @@ fn wrappingArithmetic( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { // Generate an add/sub/mul const result: MCValue = switch (tag) { @@ -2240,11 +2244,11 @@ fn bitwise( maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { // TODO implement bitwise operations with immediates const mir_tag: Mir.Inst.Tag = switch (tag) { @@ -2274,10 +2278,11 @@ fn shiftExact( ) InnerError!MCValue { _ = rhs_ty; - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { const rhs_immediate = try rhs_bind.resolveToImmediate(self); @@ -2323,10 +2328,11 @@ fn shiftNormal( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { // Generate a shl_exact/shr_exact const result: MCValue = switch (tag) { @@ -2362,7 +2368,8 @@ fn booleanOp( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Bool => { assert((try lhs_bind.resolveToImmediate(self)) == null); // should have been handled by Sema assert((try rhs_bind.resolveToImmediate(self)) == null); // should have been handled by Sema @@ -2388,17 +2395,17 @@ fn ptrArithmetic( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Pointer => { - const mod = self.bin_file.options.module.?; assert(rhs_ty.eql(Type.usize, mod)); const ptr_ty = lhs_ty; - const elem_ty = switch (ptr_ty.ptrSize()) { - .One => ptr_ty.childType().childType(), // ptr to array, so get array element type - else => ptr_ty.childType(), + const elem_ty = switch (ptr_ty.ptrSize(mod)) { + .One => ptr_ty.childType(mod).childType(mod), // ptr to array, so get array element type + else => ptr_ty.childType(mod), }; - const elem_size = elem_ty.abiSize(self.target.*); + const elem_size = elem_ty.abiSize(mod); const base_tag: Air.Inst.Tag = switch (tag) { .ptr_add => .add, @@ -2426,8 +2433,8 @@ fn ptrArithmetic( fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs_bind: ReadArg.Bind = .{ .inst = bin_op.lhs }; @@ -2477,8 +2484,8 @@ fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { fn airPtrArithmetic(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs_bind: ReadArg.Bind = .{ .inst = bin_op.lhs }; @@ -2511,23 +2518,23 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void { const tag = self.air.instructions.items(.tag)[inst]; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; + const mod = self.bin_file.options.module.?; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs_bind: ReadArg.Bind = .{ .inst = extra.lhs }; const rhs_bind: ReadArg.Bind = .{ .inst = extra.rhs }; - const lhs_ty = self.air.typeOf(extra.lhs); - const rhs_ty = self.air.typeOf(extra.rhs); + const lhs_ty = self.typeOf(extra.lhs); + const rhs_ty = self.typeOf(extra.rhs); - const tuple_ty = self.air.typeOfIndex(inst); - const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*)); - const tuple_align = tuple_ty.abiAlignment(self.target.*); - const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, self.target.*)); + const tuple_ty = self.typeOfIndex(inst); + const tuple_size = @intCast(u32, tuple_ty.abiSize(mod)); + const tuple_align = tuple_ty.abiAlignment(mod); + const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, mod)); - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO implement add_with_overflow/sub_with_overflow for vectors", .{}), .Int => { - const mod = self.bin_file.options.module.?; assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); switch (int_info.bits) { 1...31, 33...63 => { const stack_offset = try self.allocMem(tuple_size, tuple_align, inst); @@ -2565,7 +2572,7 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void { }); try self.genSetStack(lhs_ty, stack_offset, .{ .register = truncated_reg }); - try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{ .compare_flags = .ne }); + try self.genSetStack(Type.u1, stack_offset - overflow_bit_offset, .{ .compare_flags = .ne }); break :result MCValue{ .stack_offset = stack_offset }; }, @@ -2639,24 +2646,23 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ extra.lhs, extra.rhs, .none }); + const mod = self.bin_file.options.module.?; const result: MCValue = result: { - const mod = self.bin_file.options.module.?; - const lhs_bind: ReadArg.Bind = .{ .inst = extra.lhs }; const rhs_bind: ReadArg.Bind = .{ .inst = extra.rhs }; - const lhs_ty = self.air.typeOf(extra.lhs); - const rhs_ty = self.air.typeOf(extra.rhs); + const lhs_ty = self.typeOf(extra.lhs); + const rhs_ty = self.typeOf(extra.rhs); - const tuple_ty = self.air.typeOfIndex(inst); - const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*)); - const tuple_align = tuple_ty.abiAlignment(self.target.*); - const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, self.target.*)); + const tuple_ty = self.typeOfIndex(inst); + const tuple_size = @intCast(u32, tuple_ty.abiSize(mod)); + const tuple_align = tuple_ty.abiAlignment(mod); + const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, mod)); - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO implement mul_with_overflow for vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { const stack_offset = try self.allocMem(tuple_size, tuple_align, inst); @@ -2709,7 +2715,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } try self.genSetStack(lhs_ty, stack_offset, .{ .register = truncated_reg }); - try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{ .compare_flags = .ne }); + try self.genSetStack(Type.u1, stack_offset - overflow_bit_offset, .{ .compare_flags = .ne }); break :result MCValue{ .stack_offset = stack_offset }; } else if (int_info.bits <= 64) { @@ -2849,7 +2855,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { try self.truncRegister(dest_reg, truncated_reg, int_info.signedness, int_info.bits); try self.genSetStack(lhs_ty, stack_offset, .{ .register = truncated_reg }); - try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{ .compare_flags = .ne }); + try self.genSetStack(Type.u1, stack_offset - overflow_bit_offset, .{ .compare_flags = .ne }); break :result MCValue{ .stack_offset = stack_offset }; } else return self.fail("TODO implement mul_with_overflow for integers > u64/i64", .{}); @@ -2864,21 +2870,22 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ extra.lhs, extra.rhs, .none }); + const mod = self.bin_file.options.module.?; const result: MCValue = result: { const lhs_bind: ReadArg.Bind = .{ .inst = extra.lhs }; const rhs_bind: ReadArg.Bind = .{ .inst = extra.rhs }; - const lhs_ty = self.air.typeOf(extra.lhs); - const rhs_ty = self.air.typeOf(extra.rhs); + const lhs_ty = self.typeOf(extra.lhs); + const rhs_ty = self.typeOf(extra.rhs); - const tuple_ty = self.air.typeOfIndex(inst); - const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*)); - const tuple_align = tuple_ty.abiAlignment(self.target.*); - const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, self.target.*)); + const tuple_ty = self.typeOfIndex(inst); + const tuple_size = @intCast(u32, tuple_ty.abiSize(mod)); + const tuple_align = tuple_ty.abiAlignment(mod); + const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, mod)); - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO implement shl_with_overflow for vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { const stack_offset = try self.allocMem(tuple_size, tuple_align, inst); @@ -2981,7 +2988,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }); try self.genSetStack(lhs_ty, stack_offset, .{ .register = dest_reg }); - try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{ .compare_flags = .ne }); + try self.genSetStack(Type.u1, stack_offset - overflow_bit_offset, .{ .compare_flags = .ne }); break :result MCValue{ .stack_offset = stack_offset }; } else { @@ -3003,7 +3010,7 @@ fn airShlSat(self: *Self, inst: Air.Inst.Index) !void { fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const optional_ty = self.air.typeOf(ty_op.operand); + const optional_ty = self.typeOf(ty_op.operand); const mcv = try self.resolveInst(ty_op.operand); break :result try self.optionalPayload(inst, mcv, optional_ty); }; @@ -3011,10 +3018,10 @@ fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void { } fn optionalPayload(self: *Self, inst: Air.Inst.Index, mcv: MCValue, optional_ty: Type) !MCValue { - var opt_buf: Type.Payload.ElemType = undefined; - const payload_ty = optional_ty.optionalChild(&opt_buf); - if (!payload_ty.hasRuntimeBits()) return MCValue.none; - if (optional_ty.isPtrLikeOptional()) { + const mod = self.bin_file.options.module.?; + const payload_ty = optional_ty.optionalChild(mod); + if (!payload_ty.hasRuntimeBits(mod)) return MCValue.none; + if (optional_ty.isPtrLikeOptional(mod)) { // TODO should we reuse the operand here? const raw_reg = try self.register_manager.allocReg(inst, gp); const reg = self.registerAlias(raw_reg, payload_ty); @@ -3055,16 +3062,17 @@ fn errUnionErr( error_union_ty: Type, maybe_inst: ?Air.Inst.Index, ) !MCValue { - const err_ty = error_union_ty.errorUnionSet(); - const payload_ty = error_union_ty.errorUnionPayload(); - if (err_ty.errorSetIsEmpty()) { + const mod = self.bin_file.options.module.?; + const err_ty = error_union_ty.errorUnionSet(mod); + const payload_ty = error_union_ty.errorUnionPayload(mod); + if (err_ty.errorSetIsEmpty(mod)) { return MCValue{ .immediate = 0 }; } - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { return try error_union_bind.resolveToMcv(self); } - const err_offset = @intCast(u32, errUnionErrorOffset(payload_ty, self.target.*)); + const err_offset = @intCast(u32, errUnionErrorOffset(payload_ty, mod)); switch (try error_union_bind.resolveToMcv(self)) { .register => { var operand_reg: Register = undefined; @@ -3086,7 +3094,7 @@ fn errUnionErr( ); const err_bit_offset = err_offset * 8; - const err_bit_size = @intCast(u32, err_ty.abiSize(self.target.*)) * 8; + const err_bit_size = @intCast(u32, err_ty.abiSize(mod)) * 8; _ = try self.addInst(.{ .tag = .ubfx, // errors are unsigned integers @@ -3120,7 +3128,7 @@ fn airUnwrapErrErr(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_bind: ReadArg.Bind = .{ .inst = ty_op.operand }; - const error_union_ty = self.air.typeOf(ty_op.operand); + const error_union_ty = self.typeOf(ty_op.operand); break :result try self.errUnionErr(error_union_bind, error_union_ty, inst); }; @@ -3134,16 +3142,17 @@ fn errUnionPayload( error_union_ty: Type, maybe_inst: ?Air.Inst.Index, ) !MCValue { - const err_ty = error_union_ty.errorUnionSet(); - const payload_ty = error_union_ty.errorUnionPayload(); - if (err_ty.errorSetIsEmpty()) { + const mod = self.bin_file.options.module.?; + const err_ty = error_union_ty.errorUnionSet(mod); + const payload_ty = error_union_ty.errorUnionPayload(mod); + if (err_ty.errorSetIsEmpty(mod)) { return try error_union_bind.resolveToMcv(self); } - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { return MCValue.none; } - const payload_offset = @intCast(u32, errUnionPayloadOffset(payload_ty, self.target.*)); + const payload_offset = @intCast(u32, errUnionPayloadOffset(payload_ty, mod)); switch (try error_union_bind.resolveToMcv(self)) { .register => { var operand_reg: Register = undefined; @@ -3165,10 +3174,10 @@ fn errUnionPayload( ); const payload_bit_offset = payload_offset * 8; - const payload_bit_size = @intCast(u32, payload_ty.abiSize(self.target.*)) * 8; + const payload_bit_size = @intCast(u32, payload_ty.abiSize(mod)) * 8; _ = try self.addInst(.{ - .tag = if (payload_ty.isSignedInt()) Mir.Inst.Tag.sbfx else .ubfx, + .tag = if (payload_ty.isSignedInt(mod)) Mir.Inst.Tag.sbfx else .ubfx, .data = .{ .rr_lsb_width = .{ // Set both registers to the X variant to get the full width @@ -3199,7 +3208,7 @@ fn airUnwrapErrPayload(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_bind: ReadArg.Bind = .{ .inst = ty_op.operand }; - const error_union_ty = self.air.typeOf(ty_op.operand); + const error_union_ty = self.typeOf(ty_op.operand); break :result try self.errUnionPayload(error_union_bind, error_union_ty, inst); }; @@ -3245,6 +3254,7 @@ fn airSaveErrReturnTraceIndex(self: *Self, inst: Air.Inst.Index) !void { } fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; if (self.liveness.isUnused(inst)) { @@ -3252,12 +3262,12 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { } const result: MCValue = result: { - const payload_ty = self.air.typeOf(ty_op.operand); - if (!payload_ty.hasRuntimeBits()) { + const payload_ty = self.typeOf(ty_op.operand); + if (!payload_ty.hasRuntimeBits(mod)) { break :result MCValue{ .immediate = 1 }; } - const optional_ty = self.air.typeOfIndex(inst); + const optional_ty = self.typeOfIndex(inst); const operand = try self.resolveInst(ty_op.operand); const operand_lock: ?RegisterLock = switch (operand) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), @@ -3265,7 +3275,7 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { }; defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); - if (optional_ty.isPtrLikeOptional()) { + if (optional_ty.isPtrLikeOptional(mod)) { // TODO should we check if we can reuse the operand? const raw_reg = try self.register_manager.allocReg(inst, gp); const reg = self.registerAlias(raw_reg, payload_ty); @@ -3273,9 +3283,9 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { break :result MCValue{ .register = reg }; } - const optional_abi_size = @intCast(u32, optional_ty.abiSize(self.target.*)); - const optional_abi_align = optional_ty.abiAlignment(self.target.*); - const offset = @intCast(u32, payload_ty.abiSize(self.target.*)); + const optional_abi_size = @intCast(u32, optional_ty.abiSize(mod)); + const optional_abi_align = optional_ty.abiAlignment(mod); + const offset = @intCast(u32, payload_ty.abiSize(mod)); const stack_offset = try self.allocMem(optional_abi_size, optional_abi_align, inst); try self.genSetStack(payload_ty, stack_offset, operand); @@ -3289,19 +3299,20 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { /// T to E!T fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_ty = self.air.getRefType(ty_op.ty); - const error_ty = error_union_ty.errorUnionSet(); - const payload_ty = error_union_ty.errorUnionPayload(); + const error_ty = error_union_ty.errorUnionSet(mod); + const payload_ty = error_union_ty.errorUnionPayload(mod); const operand = try self.resolveInst(ty_op.operand); - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) break :result operand; + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result operand; - const abi_size = @intCast(u32, error_union_ty.abiSize(self.target.*)); - const abi_align = error_union_ty.abiAlignment(self.target.*); + const abi_size = @intCast(u32, error_union_ty.abiSize(mod)); + const abi_align = error_union_ty.abiAlignment(mod); const stack_offset = try self.allocMem(abi_size, abi_align, inst); - const payload_off = errUnionPayloadOffset(payload_ty, self.target.*); - const err_off = errUnionErrorOffset(payload_ty, self.target.*); + const payload_off = errUnionPayloadOffset(payload_ty, mod); + const err_off = errUnionErrorOffset(payload_ty, mod); try self.genSetStack(payload_ty, stack_offset - @intCast(u32, payload_off), operand); try self.genSetStack(error_ty, stack_offset - @intCast(u32, err_off), .{ .immediate = 0 }); @@ -3314,17 +3325,18 @@ fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const mod = self.bin_file.options.module.?; const error_union_ty = self.air.getRefType(ty_op.ty); - const error_ty = error_union_ty.errorUnionSet(); - const payload_ty = error_union_ty.errorUnionPayload(); + const error_ty = error_union_ty.errorUnionSet(mod); + const payload_ty = error_union_ty.errorUnionPayload(mod); const operand = try self.resolveInst(ty_op.operand); - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) break :result operand; + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result operand; - const abi_size = @intCast(u32, error_union_ty.abiSize(self.target.*)); - const abi_align = error_union_ty.abiAlignment(self.target.*); + const abi_size = @intCast(u32, error_union_ty.abiSize(mod)); + const abi_align = error_union_ty.abiAlignment(mod); const stack_offset = try self.allocMem(abi_size, abi_align, inst); - const payload_off = errUnionPayloadOffset(payload_ty, self.target.*); - const err_off = errUnionErrorOffset(payload_ty, self.target.*); + const payload_off = errUnionPayloadOffset(payload_ty, mod); + const err_off = errUnionErrorOffset(payload_ty, mod); try self.genSetStack(error_ty, stack_offset - @intCast(u32, err_off), operand); try self.genSetStack(payload_ty, stack_offset - @intCast(u32, payload_off), .undef); @@ -3362,7 +3374,7 @@ fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void { fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bits = self.target.ptrBitWidth(); const ptr_bytes = @divExact(ptr_bits, 8); const mcv = try self.resolveInst(ty_op.operand); switch (mcv) { @@ -3386,7 +3398,7 @@ fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void { fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bits = self.target.ptrBitWidth(); const ptr_bytes = @divExact(ptr_bits, 8); const mcv = try self.resolveInst(ty_op.operand); switch (mcv) { @@ -3416,11 +3428,11 @@ fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void { } fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const slice_ty = self.air.typeOf(bin_op.lhs); - const result: MCValue = if (!slice_ty.isVolatilePtr() and self.liveness.isUnused(inst)) .dead else result: { - var buf: Type.SlicePtrFieldTypeBuffer = undefined; - const ptr_ty = slice_ty.slicePtrFieldType(&buf); + const slice_ty = self.typeOf(bin_op.lhs); + const result: MCValue = if (!slice_ty.isVolatilePtr(mod) and self.liveness.isUnused(inst)) .dead else result: { + const ptr_ty = slice_ty.slicePtrFieldType(mod); const slice_mcv = try self.resolveInst(bin_op.lhs); const base_mcv = slicePtr(slice_mcv); @@ -3440,8 +3452,9 @@ fn ptrElemVal( ptr_ty: Type, maybe_inst: ?Air.Inst.Index, ) !MCValue { - const elem_ty = ptr_ty.childType(); - const elem_size = @intCast(u32, elem_ty.abiSize(self.target.*)); + const mod = self.bin_file.options.module.?; + const elem_ty = ptr_ty.childType(mod); + const elem_size = @intCast(u32, elem_ty.abiSize(mod)); // TODO optimize for elem_sizes of 1, 2, 4, 8 switch (elem_size) { @@ -3465,8 +3478,8 @@ fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void { const base_bind: ReadArg.Bind = .{ .mcv = base_mcv }; const index_bind: ReadArg.Bind = .{ .inst = extra.rhs }; - const slice_ty = self.air.typeOf(extra.lhs); - const index_ty = self.air.typeOf(extra.rhs); + const slice_ty = self.typeOf(extra.lhs); + const index_ty = self.typeOf(extra.rhs); const addr = try self.ptrArithmetic(.ptr_add, base_bind, index_bind, slice_ty, index_ty, null); break :result addr; @@ -3481,9 +3494,10 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { } fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const ptr_ty = self.air.typeOf(bin_op.lhs); - const result: MCValue = if (!ptr_ty.isVolatilePtr() and self.liveness.isUnused(inst)) .dead else result: { + const ptr_ty = self.typeOf(bin_op.lhs); + const result: MCValue = if (!ptr_ty.isVolatilePtr(mod) and self.liveness.isUnused(inst)) .dead else result: { const base_bind: ReadArg.Bind = .{ .inst = bin_op.lhs }; const index_bind: ReadArg.Bind = .{ .inst = bin_op.rhs }; @@ -3499,8 +3513,8 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { const ptr_bind: ReadArg.Bind = .{ .inst = extra.lhs }; const index_bind: ReadArg.Bind = .{ .inst = extra.rhs }; - const ptr_ty = self.air.typeOf(extra.lhs); - const index_ty = self.air.typeOf(extra.rhs); + const ptr_ty = self.typeOf(extra.lhs); + const index_ty = self.typeOf(extra.rhs); const addr = try self.ptrArithmetic(.ptr_add, ptr_bind, index_bind, ptr_ty, index_ty, null); break :result addr; @@ -3597,8 +3611,9 @@ fn reuseOperand( } fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!void { - const elem_ty = ptr_ty.elemType(); - const elem_size = elem_ty.abiSize(self.target.*); + const mod = self.bin_file.options.module.?; + const elem_ty = ptr_ty.childType(mod); + const elem_size = elem_ty.abiSize(mod); switch (ptr) { .none => unreachable, @@ -3753,14 +3768,14 @@ fn genInlineMemset( ) !void { const dst_reg = switch (dst) { .register => |r| r, - else => try self.copyToTmpRegister(Type.initTag(.manyptr_u8), dst), + else => try self.copyToTmpRegister(Type.manyptr_u8, dst), }; const dst_reg_lock = self.register_manager.lockReg(dst_reg); defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock); const val_reg = switch (val) { .register => |r| r, - else => try self.copyToTmpRegister(Type.initTag(.u8), val), + else => try self.copyToTmpRegister(Type.u8, val), }; const val_reg_lock = self.register_manager.lockReg(val_reg); defer if (val_reg_lock) |lock| self.register_manager.unlockReg(lock); @@ -3844,15 +3859,16 @@ fn genInlineMemsetCode( } fn airLoad(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const elem_ty = self.air.typeOfIndex(inst); - const elem_size = elem_ty.abiSize(self.target.*); + const elem_ty = self.typeOfIndex(inst); + const elem_size = elem_ty.abiSize(mod); const result: MCValue = result: { - if (!elem_ty.hasRuntimeBits()) + if (!elem_ty.hasRuntimeBits(mod)) break :result MCValue.none; const ptr = try self.resolveInst(ty_op.operand); - const is_volatile = self.air.typeOf(ty_op.operand).isVolatilePtr(); + const is_volatile = self.typeOf(ty_op.operand).isVolatilePtr(mod); if (self.liveness.isUnused(inst) and !is_volatile) break :result MCValue.dead; @@ -3867,18 +3883,19 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { break :blk try self.allocRegOrMem(elem_ty, true, inst); } }; - try self.load(dst_mcv, ptr, self.air.typeOf(ty_op.operand)); + try self.load(dst_mcv, ptr, self.typeOf(ty_op.operand)); break :result dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn genLdrRegister(self: *Self, value_reg: Register, addr_reg: Register, ty: Type) !void { - const abi_size = ty.abiSize(self.target.*); + const mod = self.bin_file.options.module.?; + const abi_size = ty.abiSize(mod); const tag: Mir.Inst.Tag = switch (abi_size) { - 1 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsb_immediate else .ldrb_immediate, - 2 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsh_immediate else .ldrh_immediate, + 1 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsb_immediate else .ldrb_immediate, + 2 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsh_immediate else .ldrh_immediate, 4 => .ldr_immediate, 8 => .ldr_immediate, 3, 5, 6, 7 => return self.fail("TODO: genLdrRegister for more abi_sizes", .{}), @@ -3896,7 +3913,8 @@ fn genLdrRegister(self: *Self, value_reg: Register, addr_reg: Register, ty: Type } fn genStrRegister(self: *Self, value_reg: Register, addr_reg: Register, ty: Type) !void { - const abi_size = ty.abiSize(self.target.*); + const mod = self.bin_file.options.module.?; + const abi_size = ty.abiSize(mod); const tag: Mir.Inst.Tag = switch (abi_size) { 1 => .strb_immediate, @@ -3917,8 +3935,9 @@ fn genStrRegister(self: *Self, value_reg: Register, addr_reg: Register, ty: Type } fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type) InnerError!void { + const mod = self.bin_file.options.module.?; log.debug("store: storing {} to {}", .{ value, ptr }); - const abi_size = value_ty.abiSize(self.target.*); + const abi_size = value_ty.abiSize(mod); switch (ptr) { .none => unreachable, @@ -4046,8 +4065,8 @@ fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const ptr = try self.resolveInst(bin_op.lhs); const value = try self.resolveInst(bin_op.rhs); - const ptr_ty = self.air.typeOf(bin_op.lhs); - const value_ty = self.air.typeOf(bin_op.rhs); + const ptr_ty = self.typeOf(bin_op.lhs); + const value_ty = self.typeOf(bin_op.rhs); try self.store(ptr, value, ptr_ty, value_ty); @@ -4069,10 +4088,11 @@ fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void { fn structFieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue { return if (self.liveness.isUnused(inst)) .dead else result: { + const mod = self.bin_file.options.module.?; const mcv = try self.resolveInst(operand); - const ptr_ty = self.air.typeOf(operand); - const struct_ty = ptr_ty.childType(); - const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, self.target.*)); + const ptr_ty = self.typeOf(operand); + const struct_ty = ptr_ty.childType(mod); + const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, mod)); switch (mcv) { .ptr_stack_offset => |off| { break :result MCValue{ .ptr_stack_offset = off - struct_field_offset }; @@ -4093,10 +4113,11 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const operand = extra.struct_operand; const index = extra.field_index; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const mod = self.bin_file.options.module.?; const mcv = try self.resolveInst(operand); - const struct_ty = self.air.typeOf(operand); - const struct_field_ty = struct_ty.structFieldType(index); - const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, self.target.*)); + const struct_ty = self.typeOf(operand); + const struct_field_ty = struct_ty.structFieldType(index, mod); + const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, mod)); switch (mcv) { .dead, .unreach => unreachable, @@ -4142,12 +4163,13 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { } fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const field_ptr = try self.resolveInst(extra.field_ptr); - const struct_ty = self.air.getRefType(ty_pl.ty).childType(); - const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(extra.field_index, self.target.*)); + const struct_ty = self.air.getRefType(ty_pl.ty).childType(mod); + const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(extra.field_index, mod)); switch (field_ptr) { .ptr_stack_offset => |off| { break :result MCValue{ .ptr_stack_offset = off + struct_field_offset }; @@ -4169,7 +4191,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { while (self.args[arg_index] == .none) arg_index += 1; self.arg_index = arg_index + 1; - const ty = self.air.typeOfIndex(inst); + const ty = self.typeOfIndex(inst); const tag = self.air.instructions.items(.tag)[inst]; const src_index = self.air.instructions.items(.data)[inst].arg.src_index; const name = self.mod_fn.getParamName(self.bin_file.options.module.?, src_index); @@ -4222,11 +4244,12 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const callee = pl_op.operand; const extra = self.air.extraData(Air.Call, pl_op.payload); const args = @ptrCast([]const Air.Inst.Ref, self.air.extra[extra.end..][0..extra.data.args_len]); - const ty = self.air.typeOf(callee); + const ty = self.typeOf(callee); + const mod = self.bin_file.options.module.?; - const fn_ty = switch (ty.zigTypeTag()) { + const fn_ty = switch (ty.zigTypeTag(mod)) { .Fn => ty, - .Pointer => ty.childType(), + .Pointer => ty.childType(mod), else => unreachable, }; @@ -4245,18 +4268,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier if (info.return_value == .stack_offset) { log.debug("airCall: return by reference", .{}); - const ret_ty = fn_ty.fnReturnType(); - const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*)); - const ret_abi_align = @intCast(u32, ret_ty.abiAlignment(self.target.*)); + const ret_ty = fn_ty.fnReturnType(mod); + const ret_abi_size = @intCast(u32, ret_ty.abiSize(mod)); + const ret_abi_align = @intCast(u32, ret_ty.abiAlignment(mod)); const stack_offset = try self.allocMem(ret_abi_size, ret_abi_align, inst); const ret_ptr_reg = self.registerAlias(.x0, Type.usize); - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = ret_ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ptr_ty = try mod.singleMutPtrType(ret_ty); try self.register_manager.getReg(ret_ptr_reg, null); try self.genSetReg(ptr_ty, ret_ptr_reg, .{ .ptr_stack_offset = stack_offset }); @@ -4268,7 +4287,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier for (info.args, 0..) |mc_arg, arg_i| { const arg = args[arg_i]; - const arg_ty = self.air.typeOf(arg); + const arg_ty = self.typeOf(arg); const arg_mcv = try self.resolveInst(args[arg_i]); switch (mc_arg) { @@ -4289,21 +4308,18 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier // Due to incremental compilation, how function calls are generated depends // on linking. - const mod = self.bin_file.options.module.?; - if (self.air.value(callee)) |func_value| { - if (func_value.castTag(.function)) |func_payload| { - const func = func_payload.data; - + if (try self.air.value(callee, mod)) |func_value| { + if (func_value.getFunction(mod)) |func| { if (self.bin_file.cast(link.File.Elf)) |elf_file| { const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); const atom = elf_file.getAtom(atom_index); _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = @intCast(u32, atom.getOffsetTableAddress(elf_file)); - try self.genSetReg(Type.initTag(.usize), .x30, .{ .memory = got_addr }); + try self.genSetReg(Type.usize, .x30, .{ .memory = got_addr }); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { const atom = try macho_file.getOrCreateAtomForDecl(func.owner_decl); const sym_index = macho_file.getAtom(atom).getSymbolIndex().?; - try self.genSetReg(Type.initTag(.u64), .x30, .{ + try self.genSetReg(Type.u64, .x30, .{ .linker_load = .{ .type = .got, .sym_index = sym_index, @@ -4312,31 +4328,25 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const atom = try coff_file.getOrCreateAtomForDecl(func.owner_decl); const sym_index = coff_file.getAtom(atom).getSymbolIndex().?; - try self.genSetReg(Type.initTag(.u64), .x30, .{ + try self.genSetReg(Type.u64, .x30, .{ .linker_load = .{ .type = .got, .sym_index = sym_index, }, }); } else if (self.bin_file.cast(link.File.Plan9)) |p9| { - const decl_block_index = try p9.seeDecl(func.owner_decl); - const decl_block = p9.getDeclBlock(decl_block_index); - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - const got_addr = p9.bases.data; - const got_index = decl_block.got_index.?; - const fn_got_addr = got_addr + got_index * ptr_bytes; - try self.genSetReg(Type.initTag(.usize), .x30, .{ .memory = fn_got_addr }); + const atom_index = try p9.seeDecl(func.owner_decl); + const atom = p9.getAtom(atom_index); + try self.genSetReg(Type.usize, .x30, .{ .memory = atom.getOffsetTableAddress(p9) }); } else unreachable; _ = try self.addInst(.{ .tag = .blr, .data = .{ .reg = .x30 }, }); - } else if (func_value.castTag(.extern_fn)) |func_payload| { - const extern_fn = func_payload.data; - const decl_name = mem.sliceTo(mod.declPtr(extern_fn.owner_decl).name, 0); - const lib_name = mem.sliceTo(extern_fn.lib_name, 0); + } else if (func_value.getExternFunc(mod)) |extern_func| { + const decl_name = mod.intern_pool.stringToSlice(mod.declPtr(extern_func.decl).name); + const lib_name = mod.intern_pool.stringToSliceUnwrap(extern_func.lib_name); if (self.bin_file.cast(link.File.MachO)) |macho_file| { const sym_index = try macho_file.getGlobalSymbol(decl_name, lib_name); const atom = try macho_file.getOrCreateAtomForDecl(self.mod_fn.owner_decl); @@ -4352,7 +4362,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier }); } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { const sym_index = try coff_file.getGlobalSymbol(decl_name, lib_name); - try self.genSetReg(Type.initTag(.u64), .x30, .{ + try self.genSetReg(Type.u64, .x30, .{ .linker_load = .{ .type = .import, .sym_index = sym_index, @@ -4369,7 +4379,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier return self.fail("TODO implement calling bitcasted functions", .{}); } } else { - assert(ty.zigTypeTag() == .Pointer); + assert(ty.zigTypeTag(mod) == .Pointer); const mcv = try self.resolveInst(callee); try self.genSetReg(ty, .x30, mcv); @@ -4407,14 +4417,15 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier } fn airRet(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const operand = try self.resolveInst(un_op); - const ret_ty = self.fn_type.fnReturnType(); + const ret_ty = self.fn_type.fnReturnType(mod); switch (self.ret_mcv) { .none => {}, .immediate => { - assert(ret_ty.isError()); + assert(ret_ty.isError(mod)); }, .register => |reg| { // Return result by value @@ -4425,11 +4436,7 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void { // // self.ret_mcv is an address to where this function // should store its result into - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = ret_ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ptr_ty = try mod.singleMutPtrType(ret_ty); try self.store(self.ret_mcv, operand, ptr_ty, ret_ty); }, else => unreachable, @@ -4442,10 +4449,11 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void { } fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const ptr = try self.resolveInst(un_op); - const ptr_ty = self.air.typeOf(un_op); - const ret_ty = self.fn_type.fnReturnType(); + const ptr_ty = self.typeOf(un_op); + const ret_ty = self.fn_type.fnReturnType(mod); switch (self.ret_mcv) { .none => {}, @@ -4465,8 +4473,8 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { // location. const op_inst = Air.refToIndex(un_op).?; if (self.air.instructions.items(.tag)[op_inst] != .ret_ptr) { - const abi_size = @intCast(u32, ret_ty.abiSize(self.target.*)); - const abi_align = ret_ty.abiAlignment(self.target.*); + const abi_size = @intCast(u32, ret_ty.abiSize(mod)); + const abi_align = ret_ty.abiAlignment(mod); const offset = try self.allocMem(abi_size, abi_align, null); @@ -4485,7 +4493,7 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const lhs_ty = self.air.typeOf(bin_op.lhs); + const lhs_ty = self.typeOf(bin_op.lhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else blk: { break :blk try self.cmp(.{ .inst = bin_op.lhs }, .{ .inst = bin_op.rhs }, lhs_ty, op); @@ -4501,29 +4509,28 @@ fn cmp( lhs_ty: Type, op: math.CompareOperator, ) !MCValue { - var int_buffer: Type.Payload.Bits = undefined; - const int_ty = switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + const int_ty = switch (lhs_ty.zigTypeTag(mod)) { .Optional => blk: { - var opt_buffer: Type.Payload.ElemType = undefined; - const payload_ty = lhs_ty.optionalChild(&opt_buffer); - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { - break :blk Type.initTag(.u1); - } else if (lhs_ty.isPtrLikeOptional()) { + const payload_ty = lhs_ty.optionalChild(mod); + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { + break :blk Type.u1; + } else if (lhs_ty.isPtrLikeOptional(mod)) { break :blk Type.usize; } else { return self.fail("TODO ARM cmp non-pointer optionals", .{}); } }, .Float => return self.fail("TODO ARM cmp floats", .{}), - .Enum => lhs_ty.intTagType(&int_buffer), + .Enum => lhs_ty.intTagType(mod), .Int => lhs_ty, - .Bool => Type.initTag(.u1), + .Bool => Type.u1, .Pointer => Type.usize, - .ErrorSet => Type.initTag(.u16), + .ErrorSet => Type.u16, else => unreachable, }; - const int_info = int_ty.intInfo(self.target.*); + const int_info = int_ty.intInfo(mod); if (int_info.bits <= 64) { try self.spillCompareFlagsIfOccupied(); @@ -4609,8 +4616,9 @@ fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { } fn airDbgInline(self: *Self, inst: Air.Inst.Index) !void { - const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; - const function = self.air.values[ty_pl.payload].castTag(.function).?.data; + const ty_fn = self.air.instructions.items(.data)[inst].ty_fn; + const mod = self.bin_file.options.module.?; + const function = mod.funcPtr(ty_fn.func); // TODO emit debug info for function change _ = function; return self.finishAir(inst, .dead, .{ .none, .none, .none }); @@ -4625,7 +4633,7 @@ fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[inst].pl_op; const operand = pl_op.operand; const tag = self.air.instructions.items(.tag)[inst]; - const ty = self.air.typeOf(operand); + const ty = self.typeOf(operand); const mcv = try self.resolveInst(operand); const name = self.air.nullTerminatedString(pl_op.payload); @@ -4687,8 +4695,8 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { // whether it needs to be spilled in the branches if (self.liveness.operandDies(inst, 0)) { const op_int = @enumToInt(pl_op.operand); - if (op_int >= Air.Inst.Ref.typed_value_map.len) { - const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len); + if (op_int >= Air.ref_start_index) { + const op_index = @intCast(Air.Inst.Index, op_int - Air.ref_start_index); self.processDeath(op_index); } } @@ -4777,7 +4785,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { log.debug("consolidating else_entry {d} {}=>{}", .{ else_key, else_value, canon_mcv }); // TODO make sure the destination stack offset / register does not already have something // going on there. - try self.setRegOrMem(self.air.typeOfIndex(else_key), canon_mcv, else_value); + try self.setRegOrMem(self.typeOfIndex(else_key), canon_mcv, else_value); // TODO track the new register / stack allocation } try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, saved_then_branch.inst_table.count()); @@ -4804,7 +4812,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { log.debug("consolidating then_entry {d} {}=>{}", .{ then_key, parent_mcv, then_value }); // TODO make sure the destination stack offset / register does not already have something // going on there. - try self.setRegOrMem(self.air.typeOfIndex(then_key), parent_mcv, then_value); + try self.setRegOrMem(self.typeOfIndex(then_key), parent_mcv, then_value); // TODO track the new register / stack allocation } @@ -4819,13 +4827,13 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { } fn isNull(self: *Self, operand_bind: ReadArg.Bind, operand_ty: Type) !MCValue { - const sentinel: struct { ty: Type, bind: ReadArg.Bind } = if (!operand_ty.isPtrLikeOptional()) blk: { - var buf: Type.Payload.ElemType = undefined; - const payload_ty = operand_ty.optionalChild(&buf); - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) + const mod = self.bin_file.options.module.?; + const sentinel: struct { ty: Type, bind: ReadArg.Bind } = if (!operand_ty.isPtrLikeOptional(mod)) blk: { + const payload_ty = operand_ty.optionalChild(mod); + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) break :blk .{ .ty = operand_ty, .bind = operand_bind }; - const offset = @intCast(u32, payload_ty.abiSize(self.target.*)); + const offset = @intCast(u32, payload_ty.abiSize(mod)); const operand_mcv = try operand_bind.resolveToMcv(self); const new_mcv: MCValue = switch (operand_mcv) { .register => |source_reg| new: { @@ -4838,7 +4846,7 @@ fn isNull(self: *Self, operand_bind: ReadArg.Bind, operand_ty: Type) !MCValue { try self.genSetReg(payload_ty, dest_reg, operand_mcv); } else { _ = try self.addInst(.{ - .tag = if (payload_ty.isSignedInt()) + .tag = if (payload_ty.isSignedInt(mod)) Mir.Inst.Tag.asr_immediate else Mir.Inst.Tag.lsr_immediate, @@ -4875,9 +4883,10 @@ fn isErr( error_union_bind: ReadArg.Bind, error_union_ty: Type, ) !MCValue { - const error_type = error_union_ty.errorUnionSet(); + const mod = self.bin_file.options.module.?; + const error_type = error_union_ty.errorUnionSet(mod); - if (error_type.errorSetIsEmpty()) { + if (error_type.errorSetIsEmpty(mod)) { return MCValue{ .immediate = 0 }; // always false } @@ -4908,7 +4917,7 @@ fn airIsNull(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand = try self.resolveInst(un_op); - const operand_ty = self.air.typeOf(un_op); + const operand_ty = self.typeOf(un_op); break :result try self.isNull(.{ .mcv = operand }, operand_ty); }; @@ -4916,11 +4925,12 @@ fn airIsNull(self: *Self, inst: Air.Inst.Index) !void { } fn airIsNullPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand_ptr = try self.resolveInst(un_op); - const ptr_ty = self.air.typeOf(un_op); - const elem_ty = ptr_ty.elemType(); + const ptr_ty = self.typeOf(un_op); + const elem_ty = ptr_ty.childType(mod); const operand = try self.allocRegOrMem(elem_ty, true, null); try self.load(operand, operand_ptr, ptr_ty); @@ -4934,7 +4944,7 @@ fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand = try self.resolveInst(un_op); - const operand_ty = self.air.typeOf(un_op); + const operand_ty = self.typeOf(un_op); break :result try self.isNonNull(.{ .mcv = operand }, operand_ty); }; @@ -4942,11 +4952,12 @@ fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void { } fn airIsNonNullPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand_ptr = try self.resolveInst(un_op); - const ptr_ty = self.air.typeOf(un_op); - const elem_ty = ptr_ty.elemType(); + const ptr_ty = self.typeOf(un_op); + const elem_ty = ptr_ty.childType(mod); const operand = try self.allocRegOrMem(elem_ty, true, null); try self.load(operand, operand_ptr, ptr_ty); @@ -4960,7 +4971,7 @@ fn airIsErr(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_bind: ReadArg.Bind = .{ .inst = un_op }; - const error_union_ty = self.air.typeOf(un_op); + const error_union_ty = self.typeOf(un_op); break :result try self.isErr(error_union_bind, error_union_ty); }; @@ -4968,11 +4979,12 @@ fn airIsErr(self: *Self, inst: Air.Inst.Index) !void { } fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand_ptr = try self.resolveInst(un_op); - const ptr_ty = self.air.typeOf(un_op); - const elem_ty = ptr_ty.elemType(); + const ptr_ty = self.typeOf(un_op); + const elem_ty = ptr_ty.childType(mod); const operand = try self.allocRegOrMem(elem_ty, true, null); try self.load(operand, operand_ptr, ptr_ty); @@ -4986,7 +4998,7 @@ fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_bind: ReadArg.Bind = .{ .inst = un_op }; - const error_union_ty = self.air.typeOf(un_op); + const error_union_ty = self.typeOf(un_op); break :result try self.isNonErr(error_union_bind, error_union_ty); }; @@ -4994,11 +5006,12 @@ fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void { } fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand_ptr = try self.resolveInst(un_op); - const ptr_ty = self.air.typeOf(un_op); - const elem_ty = ptr_ty.elemType(); + const ptr_ty = self.typeOf(un_op); + const elem_ty = ptr_ty.childType(mod); const operand = try self.allocRegOrMem(elem_ty, true, null); try self.load(operand, operand_ptr, ptr_ty); @@ -5065,7 +5078,7 @@ fn airBlock(self: *Self, inst: Air.Inst.Index) !void { fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[inst].pl_op; - const condition_ty = self.air.typeOf(pl_op.operand); + const condition_ty = self.typeOf(pl_op.operand); const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload); const liveness = try self.liveness.getSwitchBr( self.gpa, @@ -5210,9 +5223,10 @@ fn airBr(self: *Self, inst: Air.Inst.Index) !void { } fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void { + const mod = self.bin_file.options.module.?; const block_data = self.blocks.getPtr(block).?; - if (self.air.typeOf(operand).hasRuntimeBits()) { + if (self.typeOf(operand).hasRuntimeBits(mod)) { const operand_mcv = try self.resolveInst(operand); const block_mcv = block_data.mcv; if (block_mcv == .none) { @@ -5220,14 +5234,14 @@ fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void { .none, .dead, .unreach => unreachable, .register, .stack_offset, .memory => operand_mcv, .immediate, .stack_argument_offset, .compare_flags => blk: { - const new_mcv = try self.allocRegOrMem(self.air.typeOfIndex(block), true, block); - try self.setRegOrMem(self.air.typeOfIndex(block), new_mcv, operand_mcv); + const new_mcv = try self.allocRegOrMem(self.typeOfIndex(block), true, block); + try self.setRegOrMem(self.typeOfIndex(block), new_mcv, operand_mcv); break :blk new_mcv; }, else => return self.fail("TODO implement block_data.mcv = operand_mcv for {}", .{operand_mcv}), }; } else { - try self.setRegOrMem(self.air.typeOfIndex(block), block_mcv, operand_mcv); + try self.setRegOrMem(self.typeOfIndex(block), block_mcv, operand_mcv); } } return self.brVoid(block); @@ -5293,7 +5307,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { const arg_mcv = try self.resolveInst(input); try self.register_manager.getReg(reg, null); - try self.genSetReg(self.air.typeOf(input), reg, arg_mcv); + try self.genSetReg(self.typeOf(input), reg, arg_mcv); } { @@ -5386,7 +5400,8 @@ fn setRegOrMem(self: *Self, ty: Type, loc: MCValue, val: MCValue) !void { } fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void { - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const mod = self.bin_file.options.module.?; + const abi_size = @intCast(u32, ty.abiSize(mod)); switch (mcv) { .dead => unreachable, .unreach, .none => return, // Nothing to do. @@ -5441,11 +5456,11 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro const reg_lock = self.register_manager.lockReg(rwo.reg); defer if (reg_lock) |locked_reg| self.register_manager.unlockReg(locked_reg); - const wrapped_ty = ty.structFieldType(0); + const wrapped_ty = ty.structFieldType(0, mod); try self.genSetStack(wrapped_ty, stack_offset, .{ .register = rwo.reg }); - const overflow_bit_ty = ty.structFieldType(1); - const overflow_bit_offset = @intCast(u32, ty.structFieldOffset(1, self.target.*)); + const overflow_bit_ty = ty.structFieldType(1, mod); + const overflow_bit_offset = @intCast(u32, ty.structFieldOffset(1, mod)); const raw_cond_reg = try self.register_manager.allocReg(null, gp); const cond_reg = self.registerAlias(raw_cond_reg, overflow_bit_ty); @@ -5478,11 +5493,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro const reg = try self.copyToTmpRegister(ty, mcv); return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); } else { - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ptr_ty = try mod.singleMutPtrType(ty); // TODO call extern memcpy const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp); @@ -5559,6 +5570,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro } fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void { + const mod = self.bin_file.options.module.?; switch (mcv) { .dead => unreachable, .unreach, .none => return, // Nothing to do. @@ -5669,13 +5681,13 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void try self.genLdrRegister(reg, reg.toX(), ty); }, .stack_offset => |off| { - const abi_size = ty.abiSize(self.target.*); + const abi_size = ty.abiSize(mod); switch (abi_size) { 1, 2, 4, 8 => { const tag: Mir.Inst.Tag = switch (abi_size) { - 1 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsb_stack else .ldrb_stack, - 2 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsh_stack else .ldrh_stack, + 1 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsb_stack else .ldrb_stack, + 2 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsh_stack else .ldrh_stack, 4, 8 => .ldr_stack, else => unreachable, // unexpected abi size }; @@ -5693,13 +5705,13 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void } }, .stack_argument_offset => |off| { - const abi_size = ty.abiSize(self.target.*); + const abi_size = ty.abiSize(mod); switch (abi_size) { 1, 2, 4, 8 => { const tag: Mir.Inst.Tag = switch (abi_size) { - 1 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsb_stack_argument else .ldrb_stack_argument, - 2 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsh_stack_argument else .ldrh_stack_argument, + 1 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsb_stack_argument else .ldrb_stack_argument, + 2 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsh_stack_argument else .ldrh_stack_argument, 4, 8 => .ldr_stack_argument, else => unreachable, // unexpected abi size }; @@ -5720,7 +5732,8 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void } fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void { - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const mod = self.bin_file.options.module.?; + const abi_size = @intCast(u32, ty.abiSize(mod)); switch (mcv) { .dead => unreachable, .none, .unreach => return, @@ -5728,7 +5741,7 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I if (!self.wantSafety()) return; // The already existing value will do just fine. // TODO Upgrade this to a memset call when we have that available. - switch (ty.abiSize(self.target.*)) { + switch (ty.abiSize(mod)) { 1 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaa }), 2 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaa }), 4 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaa }), @@ -5798,11 +5811,7 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I const reg = try self.copyToTmpRegister(ty, mcv); return self.genSetStackArgument(ty, stack_offset, MCValue{ .register = reg }); } else { - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ptr_ty = try mod.singleMutPtrType(ty); // TODO call extern memcpy const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp); @@ -5913,7 +5922,7 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { }; defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); - const dest_ty = self.air.typeOfIndex(inst); + const dest_ty = self.typeOfIndex(inst); const dest = try self.allocRegOrMem(dest_ty, true, inst); try self.setRegOrMem(dest_ty, dest, operand); break :result dest; @@ -5922,19 +5931,20 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { } fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const ptr_ty = self.air.typeOf(ty_op.operand); + const ptr_ty = self.typeOf(ty_op.operand); const ptr = try self.resolveInst(ty_op.operand); - const array_ty = ptr_ty.childType(); - const array_len = @intCast(u32, array_ty.arrayLen()); + const array_ty = ptr_ty.childType(mod); + const array_len = @intCast(u32, array_ty.arrayLen(mod)); - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bits = self.target.ptrBitWidth(); const ptr_bytes = @divExact(ptr_bits, 8); const stack_offset = try self.allocMem(ptr_bytes * 2, ptr_bytes * 2, inst); try self.genSetStack(ptr_ty, stack_offset, ptr); - try self.genSetStack(Type.initTag(.usize), stack_offset - ptr_bytes, .{ .immediate = array_len }); + try self.genSetStack(Type.usize, stack_offset - ptr_bytes, .{ .immediate = array_len }); break :result MCValue{ .stack_offset = stack_offset }; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -6044,8 +6054,9 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { } fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { - const vector_ty = self.air.typeOfIndex(inst); - const len = vector_ty.vectorLen(); + const mod = self.bin_file.options.module.?; + const vector_ty = self.typeOfIndex(inst); + const len = vector_ty.vectorLen(mod); const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const elements = @ptrCast([]const Air.Inst.Ref, self.air.extra[ty_pl.payload..][0..len]); const result: MCValue = res: { @@ -6087,14 +6098,15 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { } fn airTry(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const pl_op = self.air.instructions.items(.data)[inst].pl_op; const extra = self.air.extraData(Air.Try, pl_op.payload); const body = self.air.extra[extra.end..][0..extra.data.body_len]; const result: MCValue = result: { const error_union_bind: ReadArg.Bind = .{ .inst = pl_op.operand }; - const error_union_ty = self.air.typeOf(pl_op.operand); - const error_union_size = @intCast(u32, error_union_ty.abiSize(self.target.*)); - const error_union_align = error_union_ty.abiAlignment(self.target.*); + const error_union_ty = self.typeOf(pl_op.operand); + const error_union_size = @intCast(u32, error_union_ty.abiSize(mod)); + const error_union_align = error_union_ty.abiAlignment(mod); // The error union will die in the body. However, we need the // error union after the body in order to extract the payload @@ -6123,37 +6135,32 @@ fn airTryPtr(self: *Self, inst: Air.Inst.Index) !void { } fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue { - // First section of indexes correspond to a set number of constant values. - const ref_int = @enumToInt(inst); - if (ref_int < Air.Inst.Ref.typed_value_map.len) { - const tv = Air.Inst.Ref.typed_value_map[ref_int]; - if (!tv.ty.hasRuntimeBitsIgnoreComptime() and !tv.ty.isError()) { - return MCValue{ .none = {} }; - } - return self.genTypedValue(tv); - } + const mod = self.bin_file.options.module.?; // If the type has no codegen bits, no need to store it. - const inst_ty = self.air.typeOf(inst); - if (!inst_ty.hasRuntimeBitsIgnoreComptime() and !inst_ty.isError()) + const inst_ty = self.typeOf(inst); + if (!inst_ty.hasRuntimeBitsIgnoreComptime(mod) and !inst_ty.isError(mod)) return MCValue{ .none = {} }; - const inst_index = @intCast(Air.Inst.Index, ref_int - Air.Inst.Ref.typed_value_map.len); + const inst_index = Air.refToIndex(inst) orelse return self.genTypedValue(.{ + .ty = inst_ty, + .val = (try self.air.value(inst, mod)).?, + }); + switch (self.air.instructions.items(.tag)[inst_index]) { - .constant => { + .interned => { // Constants have static lifetimes, so they are always memoized in the outer most table. const branch = &self.branch_stack.items[0]; const gop = try branch.inst_table.getOrPut(self.gpa, inst_index); if (!gop.found_existing) { - const ty_pl = self.air.instructions.items(.data)[inst_index].ty_pl; + const interned = self.air.instructions.items(.data)[inst_index].interned; gop.value_ptr.* = try self.genTypedValue(.{ .ty = inst_ty, - .val = self.air.values[ty_pl.payload], + .val = interned.toValue(), }); } return gop.value_ptr.*; }, - .const_ty => unreachable, else => return self.getResolvedInstValue(inst_index), } } @@ -6208,12 +6215,11 @@ const CallMCValues = struct { /// Caller must call `CallMCValues.deinit`. fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { - const cc = fn_ty.fnCallingConvention(); - const param_types = try self.gpa.alloc(Type, fn_ty.fnParamLen()); - defer self.gpa.free(param_types); - fn_ty.fnParamTypes(param_types); + const mod = self.bin_file.options.module.?; + const fn_info = mod.typeToFunc(fn_ty).?; + const cc = fn_info.cc; var result: CallMCValues = .{ - .args = try self.gpa.alloc(MCValue, param_types.len), + .args = try self.gpa.alloc(MCValue, fn_info.param_types.len), // These undefined values must be populated before returning from this function. .return_value = undefined, .stack_byte_count = undefined, @@ -6221,7 +6227,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { }; errdefer self.gpa.free(result.args); - const ret_ty = fn_ty.fnReturnType(); + const ret_ty = fn_ty.fnReturnType(mod); switch (cc) { .Naked => { @@ -6236,14 +6242,14 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { var ncrn: usize = 0; // Next Core Register Number var nsaa: u32 = 0; // Next stacked argument address - if (ret_ty.zigTypeTag() == .NoReturn) { + if (ret_ty.zigTypeTag(mod) == .NoReturn) { result.return_value = .{ .unreach = {} }; - } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) { + } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod) and !ret_ty.isError(mod)) { result.return_value = .{ .none = {} }; } else { - const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + const ret_ty_size = @intCast(u32, ret_ty.abiSize(mod)); if (ret_ty_size == 0) { - assert(ret_ty.isError()); + assert(ret_ty.isError(mod)); result.return_value = .{ .immediate = 0 }; } else if (ret_ty_size <= 8) { result.return_value = .{ .register = self.registerAlias(c_abi_int_return_regs[0], ret_ty) }; @@ -6252,8 +6258,8 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { } } - for (param_types, 0..) |ty, i| { - const param_size = @intCast(u32, ty.abiSize(self.target.*)); + for (fn_info.param_types, 0..) |ty, i| { + const param_size = @intCast(u32, ty.toType().abiSize(mod)); if (param_size == 0) { result.args[i] = .{ .none = {} }; continue; @@ -6261,14 +6267,14 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { // We round up NCRN only for non-Apple platforms which allow the 16-byte aligned // values to spread across odd-numbered registers. - if (ty.abiAlignment(self.target.*) == 16 and !self.target.isDarwin()) { + if (ty.toType().abiAlignment(mod) == 16 and !self.target.isDarwin()) { // Round up NCRN to the next even number ncrn += ncrn % 2; } if (std.math.divCeil(u32, param_size, 8) catch unreachable <= 8 - ncrn) { if (param_size <= 8) { - result.args[i] = .{ .register = self.registerAlias(c_abi_int_param_regs[ncrn], ty) }; + result.args[i] = .{ .register = self.registerAlias(c_abi_int_param_regs[ncrn], ty.toType()) }; ncrn += 1; } else { return self.fail("TODO MCValues with multiple registers", .{}); @@ -6279,7 +6285,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { ncrn = 8; // TODO Apple allows the arguments on the stack to be non-8-byte aligned provided // that the entire stack space consumed by the arguments is 8-byte aligned. - if (ty.abiAlignment(self.target.*) == 8) { + if (ty.toType().abiAlignment(mod) == 8) { if (nsaa % 8 != 0) { nsaa += 8 - (nsaa % 8); } @@ -6294,14 +6300,14 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { result.stack_align = 16; }, .Unspecified => { - if (ret_ty.zigTypeTag() == .NoReturn) { + if (ret_ty.zigTypeTag(mod) == .NoReturn) { result.return_value = .{ .unreach = {} }; - } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) { + } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod) and !ret_ty.isError(mod)) { result.return_value = .{ .none = {} }; } else { - const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + const ret_ty_size = @intCast(u32, ret_ty.abiSize(mod)); if (ret_ty_size == 0) { - assert(ret_ty.isError()); + assert(ret_ty.isError(mod)); result.return_value = .{ .immediate = 0 }; } else if (ret_ty_size <= 8) { result.return_value = .{ .register = self.registerAlias(.x0, ret_ty) }; @@ -6317,12 +6323,12 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { var stack_offset: u32 = 0; - for (param_types, 0..) |ty, i| { - if (ty.abiSize(self.target.*) > 0) { - const param_size = @intCast(u32, ty.abiSize(self.target.*)); - const param_alignment = ty.abiAlignment(self.target.*); + for (fn_info.param_types, 0..) |ty, i| { + if (ty.toType().abiSize(mod) > 0) { + const param_size = @intCast(u32, ty.toType().abiSize(mod)); + const param_alignment = ty.toType().abiAlignment(mod); - stack_offset = std.mem.alignForwardGeneric(u32, stack_offset, param_alignment); + stack_offset = std.mem.alignForward(u32, stack_offset, param_alignment); result.args[i] = .{ .stack_argument_offset = stack_offset }; stack_offset += param_size; } else { @@ -6371,7 +6377,8 @@ fn parseRegName(name: []const u8) ?Register { } fn registerAlias(self: *Self, reg: Register, ty: Type) Register { - const abi_size = ty.abiSize(self.target.*); + const mod = self.bin_file.options.module.?; + const abi_size = ty.abiSize(mod); switch (reg.class()) { .general_purpose => { @@ -6397,3 +6404,13 @@ fn registerAlias(self: *Self, reg: Register, ty: Type) Register { }, } } + +fn typeOf(self: *Self, inst: Air.Inst.Ref) Type { + const mod = self.bin_file.options.module.?; + return self.air.typeOf(inst, &mod.intern_pool); +} + +fn typeOfIndex(self: *Self, inst: Air.Inst.Index) Type { + const mod = self.bin_file.options.module.?; + return self.air.typeOfIndex(inst, &mod.intern_pool); +} diff --git a/src/arch/aarch64/abi.zig b/src/arch/aarch64/abi.zig index 0c48f33ea1..72a6172895 100644 --- a/src/arch/aarch64/abi.zig +++ b/src/arch/aarch64/abi.zig @@ -4,6 +4,7 @@ const bits = @import("bits.zig"); const Register = bits.Register; const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; const Type = @import("../../type.zig").Type; +const Module = @import("../../Module.zig"); pub const Class = union(enum) { memory, @@ -14,44 +15,44 @@ pub const Class = union(enum) { }; /// For `float_array` the second element will be the amount of floats. -pub fn classifyType(ty: Type, target: std.Target) Class { - std.debug.assert(ty.hasRuntimeBitsIgnoreComptime()); +pub fn classifyType(ty: Type, mod: *Module) Class { + std.debug.assert(ty.hasRuntimeBitsIgnoreComptime(mod)); var maybe_float_bits: ?u16 = null; - switch (ty.zigTypeTag()) { + switch (ty.zigTypeTag(mod)) { .Struct => { - if (ty.containerLayout() == .Packed) return .byval; - const float_count = countFloats(ty, target, &maybe_float_bits); + if (ty.containerLayout(mod) == .Packed) return .byval; + const float_count = countFloats(ty, mod, &maybe_float_bits); if (float_count <= sret_float_count) return .{ .float_array = float_count }; - const bit_size = ty.bitSize(target); + const bit_size = ty.bitSize(mod); if (bit_size > 128) return .memory; if (bit_size > 64) return .double_integer; return .integer; }, .Union => { - if (ty.containerLayout() == .Packed) return .byval; - const float_count = countFloats(ty, target, &maybe_float_bits); + if (ty.containerLayout(mod) == .Packed) return .byval; + const float_count = countFloats(ty, mod, &maybe_float_bits); if (float_count <= sret_float_count) return .{ .float_array = float_count }; - const bit_size = ty.bitSize(target); + const bit_size = ty.bitSize(mod); if (bit_size > 128) return .memory; if (bit_size > 64) return .double_integer; return .integer; }, .Int, .Enum, .ErrorSet, .Float, .Bool => return .byval, .Vector => { - const bit_size = ty.bitSize(target); + const bit_size = ty.bitSize(mod); // TODO is this controlled by a cpu feature? if (bit_size > 128) return .memory; return .byval; }, .Optional => { - std.debug.assert(ty.isPtrLikeOptional()); + std.debug.assert(ty.isPtrLikeOptional(mod)); return .byval; }, .Pointer => { - std.debug.assert(!ty.isSlice()); + std.debug.assert(!ty.isSlice(mod)); return .byval; }, .ErrorUnion, @@ -73,14 +74,15 @@ pub fn classifyType(ty: Type, target: std.Target) Class { } const sret_float_count = 4; -fn countFloats(ty: Type, target: std.Target, maybe_float_bits: *?u16) u8 { +fn countFloats(ty: Type, mod: *Module, maybe_float_bits: *?u16) u8 { + const target = mod.getTarget(); const invalid = std.math.maxInt(u8); - switch (ty.zigTypeTag()) { + switch (ty.zigTypeTag(mod)) { .Union => { - const fields = ty.unionFields(); + const fields = ty.unionFields(mod); var max_count: u8 = 0; for (fields.values()) |field| { - const field_count = countFloats(field.ty, target, maybe_float_bits); + const field_count = countFloats(field.ty, mod, maybe_float_bits); if (field_count == invalid) return invalid; if (field_count > max_count) max_count = field_count; if (max_count > sret_float_count) return invalid; @@ -88,12 +90,12 @@ fn countFloats(ty: Type, target: std.Target, maybe_float_bits: *?u16) u8 { return max_count; }, .Struct => { - const fields_len = ty.structFieldCount(); + const fields_len = ty.structFieldCount(mod); var count: u8 = 0; var i: u32 = 0; while (i < fields_len) : (i += 1) { - const field_ty = ty.structFieldType(i); - const field_count = countFloats(field_ty, target, maybe_float_bits); + const field_ty = ty.structFieldType(i, mod); + const field_count = countFloats(field_ty, mod, maybe_float_bits); if (field_count == invalid) return invalid; count += field_count; if (count > sret_float_count) return invalid; @@ -113,21 +115,21 @@ fn countFloats(ty: Type, target: std.Target, maybe_float_bits: *?u16) u8 { } } -pub fn getFloatArrayType(ty: Type) ?Type { - switch (ty.zigTypeTag()) { +pub fn getFloatArrayType(ty: Type, mod: *Module) ?Type { + switch (ty.zigTypeTag(mod)) { .Union => { - const fields = ty.unionFields(); + const fields = ty.unionFields(mod); for (fields.values()) |field| { - if (getFloatArrayType(field.ty)) |some| return some; + if (getFloatArrayType(field.ty, mod)) |some| return some; } return null; }, .Struct => { - const fields_len = ty.structFieldCount(); + const fields_len = ty.structFieldCount(mod); var i: u32 = 0; while (i < fields_len) : (i += 1) { - const field_ty = ty.structFieldType(i); - if (getFloatArrayType(field_ty)) |some| return some; + const field_ty = ty.structFieldType(i, mod); + if (getFloatArrayType(field_ty, mod)) |some| return some; } return null; }, diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 5353b78e4d..a2a5a3d4d3 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -334,7 +334,7 @@ const Self = @This(); pub fn generate( bin_file: *link.File, src_loc: Module.SrcLoc, - module_fn: *Module.Fn, + module_fn_index: Module.Fn.Index, air: Air, liveness: Liveness, code: *std.ArrayList(u8), @@ -345,6 +345,7 @@ pub fn generate( } const mod = bin_file.options.module.?; + const module_fn = mod.funcPtr(module_fn_index); const fn_owner_decl = mod.declPtr(module_fn.owner_decl); assert(fn_owner_decl.has_tv); const fn_type = fn_owner_decl.ty; @@ -477,7 +478,8 @@ pub fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 { } fn gen(self: *Self) !void { - const cc = self.fn_type.fnCallingConvention(); + const mod = self.bin_file.options.module.?; + const cc = self.fn_type.fnCallingConvention(mod); if (cc != .Naked) { // push {fp, lr} const push_reloc = try self.addNop(); @@ -518,10 +520,10 @@ fn gen(self: *Self) !void { const inst = self.air.getMainBody()[arg_index]; assert(self.air.instructions.items(.tag)[inst] == .arg); - const ty = self.air.typeOfIndex(inst); + const ty = self.typeOfIndex(inst); - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - const abi_align = ty.abiAlignment(self.target.*); + const abi_size = @intCast(u32, ty.abiSize(mod)); + const abi_align = ty.abiAlignment(mod); const stack_offset = try self.allocMem(abi_size, abi_align, inst); try self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); @@ -558,7 +560,7 @@ fn gen(self: *Self) !void { // Backpatch stack offset const total_stack_size = self.max_end_stack + self.saved_regs_stack_space; - const aligned_total_stack_end = mem.alignForwardGeneric(u32, total_stack_size, self.stack_align); + const aligned_total_stack_end = mem.alignForward(u32, total_stack_size, self.stack_align); const stack_size = aligned_total_stack_end - self.saved_regs_stack_space; self.max_end_stack = stack_size; self.mir_instructions.set(sub_reloc, .{ @@ -636,13 +638,14 @@ fn gen(self: *Self) !void { } fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { + const mod = self.bin_file.options.module.?; + const ip = &mod.intern_pool; const air_tags = self.air.instructions.items(.tag); for (body) |inst| { // TODO: remove now-redundant isUnused calls from AIR handler functions - if (self.liveness.isUnused(inst) and !self.air.mustLower(inst)) { + if (self.liveness.isUnused(inst) and !self.air.mustLower(inst, ip)) continue; - } const old_air_bookkeeping = self.air_bookkeeping; try self.ensureProcessDeathCapacity(Liveness.bpi); @@ -826,8 +829,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .ptr_elem_val => try self.airPtrElemVal(inst), .ptr_elem_ptr => try self.airPtrElemPtr(inst), - .constant => unreachable, // excluded from function bodies - .const_ty => unreachable, // excluded from function bodies + .inferred_alloc, .inferred_alloc_comptime, .interned => unreachable, .unreach => self.finishAirBookkeeping(), .optional_payload => try self.airOptionalPayload(inst), @@ -900,8 +902,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { /// Asserts there is already capacity to insert into top branch inst_table. fn processDeath(self: *Self, inst: Air.Inst.Index) void { - const air_tags = self.air.instructions.items(.tag); - if (air_tags[inst] == .constant) return; // Constants are immortal. + assert(self.air.instructions.items(.tag)[inst] != .interned); // When editing this function, note that the logic must synchronize with `reuseOperand`. const prev_value = self.getResolvedInstValue(inst); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; @@ -937,8 +938,8 @@ fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Live tomb_bits >>= 1; if (!dies) continue; const op_int = @enumToInt(op); - if (op_int < Air.Inst.Ref.typed_value_map.len) continue; - const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len); + if (op_int < Air.ref_start_index) continue; + const op_index = @intCast(Air.Inst.Index, op_int - Air.ref_start_index); self.processDeath(op_index); } const is_used = @truncate(u1, tomb_bits) == 0; @@ -990,7 +991,7 @@ fn allocMem( assert(abi_align > 0); // TODO find a free slot instead of always appending - const offset = mem.alignForwardGeneric(u32, self.next_stack_offset, abi_align) + abi_size; + const offset = mem.alignForward(u32, self.next_stack_offset, abi_align) + abi_size; self.next_stack_offset = offset; self.max_end_stack = @max(self.max_end_stack, self.next_stack_offset); @@ -1006,9 +1007,10 @@ fn allocMem( /// Use a pointer instruction as the basis for allocating stack memory. fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !u32 { - const elem_ty = self.air.typeOfIndex(inst).elemType(); + const mod = self.bin_file.options.module.?; + const elem_ty = self.typeOfIndex(inst).childType(mod); - if (!elem_ty.hasRuntimeBits()) { + if (!elem_ty.hasRuntimeBits(mod)) { // As this stack item will never be dereferenced at runtime, // return the stack offset 0. Stack offset 0 will be where all // zero-sized stack allocations live as non-zero-sized @@ -1016,26 +1018,25 @@ fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !u32 { return @as(u32, 0); } - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { - const mod = self.bin_file.options.module.?; + const abi_size = math.cast(u32, elem_ty.abiSize(mod)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); }; // TODO swap this for inst.ty.ptrAlign - const abi_align = elem_ty.abiAlignment(self.target.*); + const abi_align = elem_ty.abiAlignment(mod); return self.allocMem(abi_size, abi_align, inst); } fn allocRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool, maybe_inst: ?Air.Inst.Index) !MCValue { - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { - const mod = self.bin_file.options.module.?; + const mod = self.bin_file.options.module.?; + const abi_size = math.cast(u32, elem_ty.abiSize(mod)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); }; - const abi_align = elem_ty.abiAlignment(self.target.*); + const abi_align = elem_ty.abiAlignment(mod); if (reg_ok) { // Make sure the type can fit in a register before we try to allocate one. - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bits = self.target.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); if (abi_size <= ptr_bytes) { if (self.register_manager.tryAllocReg(maybe_inst, gp)) |reg| { @@ -1049,7 +1050,7 @@ fn allocRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool, maybe_inst: ?Air.Inst } pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void { - const stack_mcv = try self.allocRegOrMem(self.air.typeOfIndex(inst), false, inst); + const stack_mcv = try self.allocRegOrMem(self.typeOfIndex(inst), false, inst); log.debug("spilling {} (%{d}) to stack mcv {any}", .{ reg, inst, stack_mcv }); const reg_mcv = self.getResolvedInstValue(inst); @@ -1063,14 +1064,14 @@ pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; try branch.inst_table.put(self.gpa, inst, stack_mcv); - try self.genSetStack(self.air.typeOfIndex(inst), stack_mcv.stack_offset, reg_mcv); + try self.genSetStack(self.typeOfIndex(inst), stack_mcv.stack_offset, reg_mcv); } /// Save the current instruction stored in the compare flags if /// occupied fn spillCompareFlagsIfOccupied(self: *Self) !void { if (self.cpsr_flags_inst) |inst_to_save| { - const ty = self.air.typeOfIndex(inst_to_save); + const ty = self.typeOfIndex(inst_to_save); const mcv = self.getResolvedInstValue(inst_to_save); const new_mcv = switch (mcv) { .cpsr_flags => try self.allocRegOrMem(ty, true, inst_to_save), @@ -1080,7 +1081,7 @@ fn spillCompareFlagsIfOccupied(self: *Self) !void { else => unreachable, // mcv doesn't occupy the compare flags }; - try self.setRegOrMem(self.air.typeOfIndex(inst_to_save), new_mcv, mcv); + try self.setRegOrMem(self.typeOfIndex(inst_to_save), new_mcv, mcv); log.debug("spilling {d} to mcv {any}", .{ inst_to_save, new_mcv }); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; @@ -1114,17 +1115,14 @@ fn airAlloc(self: *Self, inst: Air.Inst.Index) !void { } fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const result: MCValue = switch (self.ret_mcv) { .none, .register => .{ .ptr_stack_offset = try self.allocMemPtr(inst) }, .stack_offset => blk: { // self.ret_mcv is an address to where this function // should store its result into - const ret_ty = self.fn_type.fnReturnType(); - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = ret_ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ret_ty = self.fn_type.fnReturnType(mod); + const ptr_ty = try mod.singleMutPtrType(ret_ty); // addr_reg will contain the address of where to store the // result into @@ -1150,18 +1148,19 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { } fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ ty_op.operand, .none, .none }); const operand = try self.resolveInst(ty_op.operand); - const operand_ty = self.air.typeOf(ty_op.operand); - const dest_ty = self.air.typeOfIndex(inst); + const operand_ty = self.typeOf(ty_op.operand); + const dest_ty = self.typeOfIndex(inst); - const operand_abi_size = operand_ty.abiSize(self.target.*); - const dest_abi_size = dest_ty.abiSize(self.target.*); - const info_a = operand_ty.intInfo(self.target.*); - const info_b = dest_ty.intInfo(self.target.*); + const operand_abi_size = operand_ty.abiSize(mod); + const dest_abi_size = dest_ty.abiSize(mod); + const info_a = operand_ty.intInfo(mod); + const info_b = dest_ty.intInfo(mod); const dst_mcv: MCValue = blk: { if (info_a.bits == info_b.bits) { @@ -1215,8 +1214,9 @@ fn trunc( operand_ty: Type, dest_ty: Type, ) !MCValue { - const info_a = operand_ty.intInfo(self.target.*); - const info_b = dest_ty.intInfo(self.target.*); + const mod = self.bin_file.options.module.?; + const info_a = operand_ty.intInfo(mod); + const info_b = dest_ty.intInfo(mod); if (info_b.bits <= 32) { if (info_a.bits > 32) { @@ -1259,8 +1259,8 @@ fn trunc( fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand_bind: ReadArg.Bind = .{ .inst = ty_op.operand }; - const operand_ty = self.air.typeOf(ty_op.operand); - const dest_ty = self.air.typeOfIndex(inst); + const operand_ty = self.typeOf(ty_op.operand); + const dest_ty = self.typeOfIndex(inst); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else blk: { break :blk try self.trunc(inst, operand_bind, operand_ty, dest_ty); @@ -1278,15 +1278,16 @@ fn airBoolToInt(self: *Self, inst: Air.Inst.Index) !void { fn airNot(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const mod = self.bin_file.options.module.?; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand_bind: ReadArg.Bind = .{ .inst = ty_op.operand }; - const operand_ty = self.air.typeOf(ty_op.operand); + const operand_ty = self.typeOf(ty_op.operand); switch (try operand_bind.resolveToMcv(self)) { .dead => unreachable, .unreach => unreachable, .cpsr_flags => |cond| break :result MCValue{ .cpsr_flags = cond.negate() }, else => { - switch (operand_ty.zigTypeTag()) { + switch (operand_ty.zigTypeTag(mod)) { .Bool => { var op_reg: Register = undefined; var dest_reg: Register = undefined; @@ -1319,7 +1320,7 @@ fn airNot(self: *Self, inst: Air.Inst.Index) !void { }, .Vector => return self.fail("TODO bitwise not for vectors", .{}), .Int => { - const int_info = operand_ty.intInfo(self.target.*); + const int_info = operand_ty.intInfo(mod); if (int_info.bits <= 32) { var op_reg: Register = undefined; var dest_reg: Register = undefined; @@ -1373,13 +1374,13 @@ fn minMax( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) !MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO ARM min/max on floats", .{}), .Vector => return self.fail("TODO ARM min/max on vectors", .{}), .Int => { - const mod = self.bin_file.options.module.?; assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { var lhs_reg: Register = undefined; var rhs_reg: Register = undefined; @@ -1463,8 +1464,8 @@ fn minMax( fn airMinMax(self: *Self, inst: Air.Inst.Index) !void { const tag = self.air.instructions.items(.tag)[inst]; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs_bind: ReadArg.Bind = .{ .inst = bin_op.lhs }; @@ -1483,9 +1484,9 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const ptr = try self.resolveInst(bin_op.lhs); - const ptr_ty = self.air.typeOf(bin_op.lhs); + const ptr_ty = self.typeOf(bin_op.lhs); const len = try self.resolveInst(bin_op.rhs); - const len_ty = self.air.typeOf(bin_op.rhs); + const len_ty = self.typeOf(bin_op.rhs); const stack_offset = try self.allocMem(8, 4, inst); try self.genSetStack(ptr_ty, stack_offset, ptr); @@ -1497,8 +1498,8 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void { fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs_bind: ReadArg.Bind = .{ .inst = bin_op.lhs }; @@ -1548,8 +1549,8 @@ fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { fn airPtrArithmetic(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs_bind: ReadArg.Bind = .{ .inst = bin_op.lhs }; @@ -1582,23 +1583,23 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void { const tag = self.air.instructions.items(.tag)[inst]; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; + const mod = self.bin_file.options.module.?; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs_bind: ReadArg.Bind = .{ .inst = extra.lhs }; const rhs_bind: ReadArg.Bind = .{ .inst = extra.rhs }; - const lhs_ty = self.air.typeOf(extra.lhs); - const rhs_ty = self.air.typeOf(extra.rhs); + const lhs_ty = self.typeOf(extra.lhs); + const rhs_ty = self.typeOf(extra.rhs); - const tuple_ty = self.air.typeOfIndex(inst); - const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*)); - const tuple_align = tuple_ty.abiAlignment(self.target.*); - const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, self.target.*)); + const tuple_ty = self.typeOfIndex(inst); + const tuple_size = @intCast(u32, tuple_ty.abiSize(mod)); + const tuple_align = tuple_ty.abiAlignment(mod); + const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, mod)); - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO implement add_with_overflow/sub_with_overflow for vectors", .{}), .Int => { - const mod = self.bin_file.options.module.?; assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits < 32) { const stack_offset = try self.allocMem(tuple_size, tuple_align, inst); @@ -1631,7 +1632,7 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void { }); try self.genSetStack(lhs_ty, stack_offset, .{ .register = truncated_reg }); - try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{ .cpsr_flags = .ne }); + try self.genSetStack(Type.u1, stack_offset - overflow_bit_offset, .{ .cpsr_flags = .ne }); break :result MCValue{ .stack_offset = stack_offset }; } else if (int_info.bits == 32) { @@ -1695,23 +1696,23 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ extra.lhs, extra.rhs, .none }); + const mod = self.bin_file.options.module.?; const result: MCValue = result: { const lhs_bind: ReadArg.Bind = .{ .inst = extra.lhs }; const rhs_bind: ReadArg.Bind = .{ .inst = extra.rhs }; - const lhs_ty = self.air.typeOf(extra.lhs); - const rhs_ty = self.air.typeOf(extra.rhs); + const lhs_ty = self.typeOf(extra.lhs); + const rhs_ty = self.typeOf(extra.rhs); - const tuple_ty = self.air.typeOfIndex(inst); - const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*)); - const tuple_align = tuple_ty.abiAlignment(self.target.*); - const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, self.target.*)); + const tuple_ty = self.typeOfIndex(inst); + const tuple_size = @intCast(u32, tuple_ty.abiSize(mod)); + const tuple_align = tuple_ty.abiAlignment(mod); + const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, mod)); - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO implement mul_with_overflow for vectors", .{}), .Int => { - const mod = self.bin_file.options.module.?; assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 16) { const stack_offset = try self.allocMem(tuple_size, tuple_align, inst); @@ -1744,7 +1745,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }); try self.genSetStack(lhs_ty, stack_offset, .{ .register = truncated_reg }); - try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{ .cpsr_flags = .ne }); + try self.genSetStack(Type.u1, stack_offset - overflow_bit_offset, .{ .cpsr_flags = .ne }); break :result MCValue{ .stack_offset = stack_offset }; } else if (int_info.bits <= 32) { @@ -1842,7 +1843,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }); // strb rdlo, [...] - try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{ .register = rdlo }); + try self.genSetStack(Type.u1, stack_offset - overflow_bit_offset, .{ .register = rdlo }); break :result MCValue{ .stack_offset = stack_offset }; } else { @@ -1859,19 +1860,20 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ extra.lhs, extra.rhs, .none }); + const mod = self.bin_file.options.module.?; const result: MCValue = result: { - const lhs_ty = self.air.typeOf(extra.lhs); - const rhs_ty = self.air.typeOf(extra.rhs); + const lhs_ty = self.typeOf(extra.lhs); + const rhs_ty = self.typeOf(extra.rhs); - const tuple_ty = self.air.typeOfIndex(inst); - const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*)); - const tuple_align = tuple_ty.abiAlignment(self.target.*); - const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, self.target.*)); + const tuple_ty = self.typeOfIndex(inst); + const tuple_size = @intCast(u32, tuple_ty.abiSize(mod)); + const tuple_align = tuple_ty.abiAlignment(mod); + const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, mod)); - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO implement shl_with_overflow for vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { const stack_offset = try self.allocMem(tuple_size, tuple_align, inst); @@ -1976,7 +1978,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }); try self.genSetStack(lhs_ty, stack_offset, .{ .register = dest_reg }); - try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{ .cpsr_flags = .ne }); + try self.genSetStack(Type.u1, stack_offset - overflow_bit_offset, .{ .cpsr_flags = .ne }); break :result MCValue{ .stack_offset = stack_offset }; } else { @@ -2014,10 +2016,11 @@ fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { } fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const optional_ty = self.air.typeOfIndex(inst); - const abi_size = @intCast(u32, optional_ty.abiSize(self.target.*)); + const optional_ty = self.typeOfIndex(inst); + const abi_size = @intCast(u32, optional_ty.abiSize(mod)); // Optional with a zero-bit payload type is just a boolean true if (abi_size == 1) { @@ -2036,16 +2039,17 @@ fn errUnionErr( error_union_ty: Type, maybe_inst: ?Air.Inst.Index, ) !MCValue { - const err_ty = error_union_ty.errorUnionSet(); - const payload_ty = error_union_ty.errorUnionPayload(); - if (err_ty.errorSetIsEmpty()) { + const mod = self.bin_file.options.module.?; + const err_ty = error_union_ty.errorUnionSet(mod); + const payload_ty = error_union_ty.errorUnionPayload(mod); + if (err_ty.errorSetIsEmpty(mod)) { return MCValue{ .immediate = 0 }; } - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { return try error_union_bind.resolveToMcv(self); } - const err_offset = @intCast(u32, errUnionErrorOffset(payload_ty, self.target.*)); + const err_offset = @intCast(u32, errUnionErrorOffset(payload_ty, mod)); switch (try error_union_bind.resolveToMcv(self)) { .register => { var operand_reg: Register = undefined; @@ -2067,7 +2071,7 @@ fn errUnionErr( ); const err_bit_offset = err_offset * 8; - const err_bit_size = @intCast(u32, err_ty.abiSize(self.target.*)) * 8; + const err_bit_size = @intCast(u32, err_ty.abiSize(mod)) * 8; _ = try self.addInst(.{ .tag = .ubfx, // errors are unsigned integers @@ -2098,7 +2102,7 @@ fn airUnwrapErrErr(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_bind: ReadArg.Bind = .{ .inst = ty_op.operand }; - const error_union_ty = self.air.typeOf(ty_op.operand); + const error_union_ty = self.typeOf(ty_op.operand); break :result try self.errUnionErr(error_union_bind, error_union_ty, inst); }; @@ -2112,16 +2116,17 @@ fn errUnionPayload( error_union_ty: Type, maybe_inst: ?Air.Inst.Index, ) !MCValue { - const err_ty = error_union_ty.errorUnionSet(); - const payload_ty = error_union_ty.errorUnionPayload(); - if (err_ty.errorSetIsEmpty()) { + const mod = self.bin_file.options.module.?; + const err_ty = error_union_ty.errorUnionSet(mod); + const payload_ty = error_union_ty.errorUnionPayload(mod); + if (err_ty.errorSetIsEmpty(mod)) { return try error_union_bind.resolveToMcv(self); } - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { return MCValue.none; } - const payload_offset = @intCast(u32, errUnionPayloadOffset(payload_ty, self.target.*)); + const payload_offset = @intCast(u32, errUnionPayloadOffset(payload_ty, mod)); switch (try error_union_bind.resolveToMcv(self)) { .register => { var operand_reg: Register = undefined; @@ -2143,10 +2148,10 @@ fn errUnionPayload( ); const payload_bit_offset = payload_offset * 8; - const payload_bit_size = @intCast(u32, payload_ty.abiSize(self.target.*)) * 8; + const payload_bit_size = @intCast(u32, payload_ty.abiSize(mod)) * 8; _ = try self.addInst(.{ - .tag = if (payload_ty.isSignedInt()) Mir.Inst.Tag.sbfx else .ubfx, + .tag = if (payload_ty.isSignedInt(mod)) Mir.Inst.Tag.sbfx else .ubfx, .data = .{ .rr_lsb_width = .{ .rd = dest_reg, .rn = operand_reg, @@ -2174,7 +2179,7 @@ fn airUnwrapErrPayload(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_bind: ReadArg.Bind = .{ .inst = ty_op.operand }; - const error_union_ty = self.air.typeOf(ty_op.operand); + const error_union_ty = self.typeOf(ty_op.operand); break :result try self.errUnionPayload(error_union_bind, error_union_ty, inst); }; @@ -2221,19 +2226,20 @@ fn airSaveErrReturnTraceIndex(self: *Self, inst: Air.Inst.Index) !void { /// T to E!T fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_ty = self.air.getRefType(ty_op.ty); - const error_ty = error_union_ty.errorUnionSet(); - const payload_ty = error_union_ty.errorUnionPayload(); + const error_ty = error_union_ty.errorUnionSet(mod); + const payload_ty = error_union_ty.errorUnionPayload(mod); const operand = try self.resolveInst(ty_op.operand); - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) break :result operand; + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result operand; - const abi_size = @intCast(u32, error_union_ty.abiSize(self.target.*)); - const abi_align = error_union_ty.abiAlignment(self.target.*); + const abi_size = @intCast(u32, error_union_ty.abiSize(mod)); + const abi_align = error_union_ty.abiAlignment(mod); const stack_offset = @intCast(u32, try self.allocMem(abi_size, abi_align, inst)); - const payload_off = errUnionPayloadOffset(payload_ty, self.target.*); - const err_off = errUnionErrorOffset(payload_ty, self.target.*); + const payload_off = errUnionPayloadOffset(payload_ty, mod); + const err_off = errUnionErrorOffset(payload_ty, mod); try self.genSetStack(payload_ty, stack_offset - @intCast(u32, payload_off), operand); try self.genSetStack(error_ty, stack_offset - @intCast(u32, err_off), .{ .immediate = 0 }); @@ -2244,19 +2250,20 @@ fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { /// E to E!T fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_ty = self.air.getRefType(ty_op.ty); - const error_ty = error_union_ty.errorUnionSet(); - const payload_ty = error_union_ty.errorUnionPayload(); + const error_ty = error_union_ty.errorUnionSet(mod); + const payload_ty = error_union_ty.errorUnionPayload(mod); const operand = try self.resolveInst(ty_op.operand); - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) break :result operand; + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result operand; - const abi_size = @intCast(u32, error_union_ty.abiSize(self.target.*)); - const abi_align = error_union_ty.abiAlignment(self.target.*); + const abi_size = @intCast(u32, error_union_ty.abiSize(mod)); + const abi_align = error_union_ty.abiAlignment(mod); const stack_offset = @intCast(u32, try self.allocMem(abi_size, abi_align, inst)); - const payload_off = errUnionPayloadOffset(payload_ty, self.target.*); - const err_off = errUnionErrorOffset(payload_ty, self.target.*); + const payload_off = errUnionPayloadOffset(payload_ty, mod); + const err_off = errUnionErrorOffset(payload_ty, mod); try self.genSetStack(error_ty, stack_offset - @intCast(u32, err_off), operand); try self.genSetStack(payload_ty, stack_offset - @intCast(u32, payload_off), .undef); @@ -2360,8 +2367,9 @@ fn ptrElemVal( ptr_ty: Type, maybe_inst: ?Air.Inst.Index, ) !MCValue { - const elem_ty = ptr_ty.childType(); - const elem_size = @intCast(u32, elem_ty.abiSize(self.target.*)); + const mod = self.bin_file.options.module.?; + const elem_ty = ptr_ty.childType(mod); + const elem_size = @intCast(u32, elem_ty.abiSize(mod)); switch (elem_size) { 1, 4 => { @@ -2418,11 +2426,11 @@ fn ptrElemVal( } fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const slice_ty = self.air.typeOf(bin_op.lhs); - const result: MCValue = if (!slice_ty.isVolatilePtr() and self.liveness.isUnused(inst)) .dead else result: { - var buf: Type.SlicePtrFieldTypeBuffer = undefined; - const ptr_ty = slice_ty.slicePtrFieldType(&buf); + const slice_ty = self.typeOf(bin_op.lhs); + const result: MCValue = if (!slice_ty.isVolatilePtr(mod) and self.liveness.isUnused(inst)) .dead else result: { + const ptr_ty = slice_ty.slicePtrFieldType(mod); const slice_mcv = try self.resolveInst(bin_op.lhs); const base_mcv = slicePtr(slice_mcv); @@ -2445,8 +2453,8 @@ fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void { const base_bind: ReadArg.Bind = .{ .mcv = base_mcv }; const index_bind: ReadArg.Bind = .{ .inst = extra.rhs }; - const slice_ty = self.air.typeOf(extra.lhs); - const index_ty = self.air.typeOf(extra.rhs); + const slice_ty = self.typeOf(extra.lhs); + const index_ty = self.typeOf(extra.rhs); const addr = try self.ptrArithmetic(.ptr_add, base_bind, index_bind, slice_ty, index_ty, null); break :result addr; @@ -2461,7 +2469,8 @@ fn arrayElemVal( array_ty: Type, maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { - const elem_ty = array_ty.childType(); + const mod = self.bin_file.options.module.?; + const elem_ty = array_ty.childType(mod); const mcv = try array_bind.resolveToMcv(self); switch (mcv) { @@ -2495,11 +2504,7 @@ fn arrayElemVal( const base_bind: ReadArg.Bind = .{ .mcv = ptr_to_mcv }; - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = elem_ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ptr_ty = try mod.singleMutPtrType(elem_ty); return try self.ptrElemVal(base_bind, index_bind, ptr_ty, maybe_inst); }, @@ -2512,7 +2517,7 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const array_bind: ReadArg.Bind = .{ .inst = bin_op.lhs }; const index_bind: ReadArg.Bind = .{ .inst = bin_op.rhs }; - const array_ty = self.air.typeOf(bin_op.lhs); + const array_ty = self.typeOf(bin_op.lhs); break :result try self.arrayElemVal(array_bind, index_bind, array_ty, inst); }; @@ -2520,9 +2525,10 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { } fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const ptr_ty = self.air.typeOf(bin_op.lhs); - const result: MCValue = if (!ptr_ty.isVolatilePtr() and self.liveness.isUnused(inst)) .dead else result: { + const ptr_ty = self.typeOf(bin_op.lhs); + const result: MCValue = if (!ptr_ty.isVolatilePtr(mod) and self.liveness.isUnused(inst)) .dead else result: { const base_bind: ReadArg.Bind = .{ .inst = bin_op.lhs }; const index_bind: ReadArg.Bind = .{ .inst = bin_op.rhs }; @@ -2538,8 +2544,8 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { const ptr_bind: ReadArg.Bind = .{ .inst = extra.lhs }; const index_bind: ReadArg.Bind = .{ .inst = extra.rhs }; - const ptr_ty = self.air.typeOf(extra.lhs); - const index_ty = self.air.typeOf(extra.rhs); + const ptr_ty = self.typeOf(extra.lhs); + const index_ty = self.typeOf(extra.rhs); const addr = try self.ptrArithmetic(.ptr_add, ptr_bind, index_bind, ptr_ty, index_ty, null); break :result addr; @@ -2646,8 +2652,9 @@ fn reuseOperand( } fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!void { - const elem_ty = ptr_ty.elemType(); - const elem_size = @intCast(u32, elem_ty.abiSize(self.target.*)); + const mod = self.bin_file.options.module.?; + const elem_ty = ptr_ty.childType(mod); + const elem_size = @intCast(u32, elem_ty.abiSize(mod)); switch (ptr) { .none => unreachable, @@ -2722,19 +2729,20 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo } fn airLoad(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const elem_ty = self.air.typeOfIndex(inst); + const elem_ty = self.typeOfIndex(inst); const result: MCValue = result: { - if (!elem_ty.hasRuntimeBits()) + if (!elem_ty.hasRuntimeBits(mod)) break :result MCValue.none; const ptr = try self.resolveInst(ty_op.operand); - const is_volatile = self.air.typeOf(ty_op.operand).isVolatilePtr(); + const is_volatile = self.typeOf(ty_op.operand).isVolatilePtr(mod); if (self.liveness.isUnused(inst) and !is_volatile) break :result MCValue.dead; const dest_mcv: MCValue = blk: { - const ptr_fits_dest = elem_ty.abiSize(self.target.*) <= 4; + const ptr_fits_dest = elem_ty.abiSize(mod) <= 4; if (ptr_fits_dest and self.reuseOperand(inst, ty_op.operand, 0, ptr)) { // The MCValue that holds the pointer can be re-used as the value. break :blk ptr; @@ -2742,7 +2750,7 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { break :blk try self.allocRegOrMem(elem_ty, true, inst); } }; - try self.load(dest_mcv, ptr, self.air.typeOf(ty_op.operand)); + try self.load(dest_mcv, ptr, self.typeOf(ty_op.operand)); break :result dest_mcv; }; @@ -2750,7 +2758,8 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { } fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type) InnerError!void { - const elem_size = @intCast(u32, value_ty.abiSize(self.target.*)); + const mod = self.bin_file.options.module.?; + const elem_size = @intCast(u32, value_ty.abiSize(mod)); switch (ptr) { .none => unreachable, @@ -2846,8 +2855,8 @@ fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const ptr = try self.resolveInst(bin_op.lhs); const value = try self.resolveInst(bin_op.rhs); - const ptr_ty = self.air.typeOf(bin_op.lhs); - const value_ty = self.air.typeOf(bin_op.rhs); + const ptr_ty = self.typeOf(bin_op.lhs); + const value_ty = self.typeOf(bin_op.rhs); try self.store(ptr, value, ptr_ty, value_ty); @@ -2869,10 +2878,11 @@ fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void { fn structFieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue { return if (self.liveness.isUnused(inst)) .dead else result: { + const mod = self.bin_file.options.module.?; const mcv = try self.resolveInst(operand); - const ptr_ty = self.air.typeOf(operand); - const struct_ty = ptr_ty.childType(); - const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, self.target.*)); + const ptr_ty = self.typeOf(operand); + const struct_ty = ptr_ty.childType(mod); + const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, mod)); switch (mcv) { .ptr_stack_offset => |off| { break :result MCValue{ .ptr_stack_offset = off - struct_field_offset }; @@ -2892,11 +2902,12 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const extra = self.air.extraData(Air.StructField, ty_pl.payload).data; const operand = extra.struct_operand; const index = extra.field_index; + const mod = self.bin_file.options.module.?; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const mcv = try self.resolveInst(operand); - const struct_ty = self.air.typeOf(operand); - const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, self.target.*)); - const struct_field_ty = struct_ty.structFieldType(index); + const struct_ty = self.typeOf(operand); + const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, mod)); + const struct_field_ty = struct_ty.structFieldType(index, mod); switch (mcv) { .dead, .unreach => unreachable, @@ -2959,10 +2970,10 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { ); const field_bit_offset = struct_field_offset * 8; - const field_bit_size = @intCast(u32, struct_field_ty.abiSize(self.target.*)) * 8; + const field_bit_size = @intCast(u32, struct_field_ty.abiSize(mod)) * 8; _ = try self.addInst(.{ - .tag = if (struct_field_ty.isSignedInt()) Mir.Inst.Tag.sbfx else .ubfx, + .tag = if (struct_field_ty.isSignedInt(mod)) Mir.Inst.Tag.sbfx else .ubfx, .data = .{ .rr_lsb_width = .{ .rd = dest_reg, .rn = operand_reg, @@ -2981,17 +2992,18 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { } fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const field_ptr = try self.resolveInst(extra.field_ptr); - const struct_ty = self.air.getRefType(ty_pl.ty).childType(); + const struct_ty = self.air.getRefType(ty_pl.ty).childType(mod); - if (struct_ty.zigTypeTag() == .Union) { + if (struct_ty.zigTypeTag(mod) == .Union) { return self.fail("TODO implement @fieldParentPtr codegen for unions", .{}); } - const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(extra.field_index, self.target.*)); + const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(extra.field_index, mod)); switch (field_ptr) { .ptr_stack_offset => |off| { break :result MCValue{ .ptr_stack_offset = off + struct_field_offset }; @@ -3375,12 +3387,12 @@ fn addSub( maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO ARM binary operations on floats", .{}), .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { const lhs_immediate = try lhs_bind.resolveToImmediate(self); const rhs_immediate = try rhs_bind.resolveToImmediate(self); @@ -3431,12 +3443,12 @@ fn mul( maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO ARM binary operations on floats", .{}), .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { // TODO add optimisations for multiplication // with immediates, for example a * 2 can be @@ -3463,7 +3475,8 @@ fn divFloat( _ = rhs_ty; _ = maybe_inst; - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO ARM binary operations on floats", .{}), .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), else => unreachable, @@ -3479,12 +3492,12 @@ fn divTrunc( maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO ARM binary operations on floats", .{}), .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { switch (int_info.signedness) { .signed => { @@ -3522,12 +3535,12 @@ fn divFloor( maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO ARM binary operations on floats", .{}), .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { switch (int_info.signedness) { .signed => { @@ -3569,7 +3582,8 @@ fn divExact( _ = rhs_ty; _ = maybe_inst; - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO ARM binary operations on floats", .{}), .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), .Int => return self.fail("TODO ARM div_exact", .{}), @@ -3586,12 +3600,12 @@ fn rem( maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { const mod = self.bin_file.options.module.?; - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO ARM binary operations on floats", .{}), .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { switch (int_info.signedness) { .signed => { @@ -3654,7 +3668,8 @@ fn modulo( _ = rhs_ty; _ = maybe_inst; - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO ARM binary operations on floats", .{}), .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), .Int => return self.fail("TODO ARM mod", .{}), @@ -3671,10 +3686,11 @@ fn wrappingArithmetic( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { // Generate an add/sub/mul const result: MCValue = switch (tag) { @@ -3708,12 +3724,12 @@ fn bitwise( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), .Int => { - const mod = self.bin_file.options.module.?; assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { const lhs_immediate = try lhs_bind.resolveToImmediate(self); const rhs_immediate = try rhs_bind.resolveToImmediate(self); @@ -3753,16 +3769,17 @@ fn shiftExact( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { const rhs_immediate = try rhs_bind.resolveToImmediate(self); const mir_tag: Mir.Inst.Tag = switch (tag) { .shl_exact => .lsl, - .shr_exact => switch (lhs_ty.intInfo(self.target.*).signedness) { + .shr_exact => switch (lhs_ty.intInfo(mod).signedness) { .signed => Mir.Inst.Tag.asr, .unsigned => Mir.Inst.Tag.lsr, }, @@ -3791,10 +3808,11 @@ fn shiftNormal( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO ARM binary operations on vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 32) { // Generate a shl_exact/shr_exact const result: MCValue = switch (tag) { @@ -3833,7 +3851,8 @@ fn booleanOp( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Bool => { const lhs_immediate = try lhs_bind.resolveToImmediate(self); const rhs_immediate = try rhs_bind.resolveToImmediate(self); @@ -3866,17 +3885,17 @@ fn ptrArithmetic( rhs_ty: Type, maybe_inst: ?Air.Inst.Index, ) InnerError!MCValue { - switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + switch (lhs_ty.zigTypeTag(mod)) { .Pointer => { - const mod = self.bin_file.options.module.?; assert(rhs_ty.eql(Type.usize, mod)); const ptr_ty = lhs_ty; - const elem_ty = switch (ptr_ty.ptrSize()) { - .One => ptr_ty.childType().childType(), // ptr to array, so get array element type - else => ptr_ty.childType(), + const elem_ty = switch (ptr_ty.ptrSize(mod)) { + .One => ptr_ty.childType(mod).childType(mod), // ptr to array, so get array element type + else => ptr_ty.childType(mod), }; - const elem_size = @intCast(u32, elem_ty.abiSize(self.target.*)); + const elem_size = @intCast(u32, elem_ty.abiSize(mod)); const base_tag: Air.Inst.Tag = switch (tag) { .ptr_add => .add, @@ -3903,11 +3922,12 @@ fn ptrArithmetic( } fn genLdrRegister(self: *Self, dest_reg: Register, addr_reg: Register, ty: Type) !void { - const abi_size = ty.abiSize(self.target.*); + const mod = self.bin_file.options.module.?; + const abi_size = ty.abiSize(mod); const tag: Mir.Inst.Tag = switch (abi_size) { - 1 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsb else .ldrb, - 2 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsh else .ldrh, + 1 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsb else .ldrb, + 2 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsh else .ldrh, 3, 4 => .ldr, else => unreachable, }; @@ -3924,7 +3944,7 @@ fn genLdrRegister(self: *Self, dest_reg: Register, addr_reg: Register, ty: Type) } }; const data: Mir.Inst.Data = switch (abi_size) { - 1 => if (ty.isSignedInt()) rr_extra_offset else rr_offset, + 1 => if (ty.isSignedInt(mod)) rr_extra_offset else rr_offset, 2 => rr_extra_offset, 3, 4 => rr_offset, else => unreachable, @@ -3937,7 +3957,8 @@ fn genLdrRegister(self: *Self, dest_reg: Register, addr_reg: Register, ty: Type) } fn genStrRegister(self: *Self, source_reg: Register, addr_reg: Register, ty: Type) !void { - const abi_size = ty.abiSize(self.target.*); + const mod = self.bin_file.options.module.?; + const abi_size = ty.abiSize(mod); const tag: Mir.Inst.Tag = switch (abi_size) { 1 => .strb, @@ -4051,14 +4072,14 @@ fn genInlineMemset( ) !void { const dst_reg = switch (dst) { .register => |r| r, - else => try self.copyToTmpRegister(Type.initTag(.manyptr_u8), dst), + else => try self.copyToTmpRegister(Type.manyptr_u8, dst), }; const dst_reg_lock = self.register_manager.lockReg(dst_reg); defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock); const val_reg = switch (val) { .register => |r| r, - else => try self.copyToTmpRegister(Type.initTag(.u8), val), + else => try self.copyToTmpRegister(Type.u8, val), }; const val_reg_lock = self.register_manager.lockReg(val_reg); defer if (val_reg_lock) |lock| self.register_manager.unlockReg(lock); @@ -4143,7 +4164,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { while (self.args[arg_index] == .none) arg_index += 1; self.arg_index = arg_index + 1; - const ty = self.air.typeOfIndex(inst); + const ty = self.typeOfIndex(inst); const tag = self.air.instructions.items(.tag)[inst]; const src_index = self.air.instructions.items(.data)[inst].arg.src_index; const name = self.mod_fn.getParamName(self.bin_file.options.module.?, src_index); @@ -4196,11 +4217,12 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const callee = pl_op.operand; const extra = self.air.extraData(Air.Call, pl_op.payload); const args = @ptrCast([]const Air.Inst.Ref, self.air.extra[extra.end..][0..extra.data.args_len]); - const ty = self.air.typeOf(callee); + const ty = self.typeOf(callee); + const mod = self.bin_file.options.module.?; - const fn_ty = switch (ty.zigTypeTag()) { + const fn_ty = switch (ty.zigTypeTag(mod)) { .Fn => ty, - .Pointer => ty.childType(), + .Pointer => ty.childType(mod), else => unreachable, }; @@ -4225,16 +4247,12 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier // untouched by the parameter passing code const r0_lock: ?RegisterLock = if (info.return_value == .stack_offset) blk: { log.debug("airCall: return by reference", .{}); - const ret_ty = fn_ty.fnReturnType(); - const ret_abi_size = @intCast(u32, ret_ty.abiSize(self.target.*)); - const ret_abi_align = @intCast(u32, ret_ty.abiAlignment(self.target.*)); + const ret_ty = fn_ty.fnReturnType(mod); + const ret_abi_size = @intCast(u32, ret_ty.abiSize(mod)); + const ret_abi_align = @intCast(u32, ret_ty.abiAlignment(mod)); const stack_offset = try self.allocMem(ret_abi_size, ret_abi_align, inst); - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = ret_ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ptr_ty = try mod.singleMutPtrType(ret_ty); try self.register_manager.getReg(.r0, null); try self.genSetReg(ptr_ty, .r0, .{ .ptr_stack_offset = stack_offset }); @@ -4249,7 +4267,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier for (info.args, 0..) |mc_arg, arg_i| { const arg = args[arg_i]; - const arg_ty = self.air.typeOf(arg); + const arg_ty = self.typeOf(arg); const arg_mcv = try self.resolveInst(args[arg_i]); switch (mc_arg) { @@ -4270,16 +4288,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier // Due to incremental compilation, how function calls are generated depends // on linking. - if (self.air.value(callee)) |func_value| { - if (func_value.castTag(.function)) |func_payload| { - const func = func_payload.data; - + if (try self.air.value(callee, mod)) |func_value| { + if (func_value.getFunction(mod)) |func| { if (self.bin_file.cast(link.File.Elf)) |elf_file| { const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); const atom = elf_file.getAtom(atom_index); _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = @intCast(u32, atom.getOffsetTableAddress(elf_file)); - try self.genSetReg(Type.initTag(.usize), .lr, .{ .memory = got_addr }); + try self.genSetReg(Type.usize, .lr, .{ .memory = got_addr }); } else if (self.bin_file.cast(link.File.MachO)) |_| { unreachable; // unsupported architecture for MachO } else { @@ -4288,16 +4304,16 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier @tagName(self.target.cpu.arch), }); } - } else if (func_value.castTag(.extern_fn)) |_| { + } else if (func_value.getExternFunc(mod)) |_| { return self.fail("TODO implement calling extern functions", .{}); } else { return self.fail("TODO implement calling bitcasted functions", .{}); } } else { - assert(ty.zigTypeTag() == .Pointer); + assert(ty.zigTypeTag(mod) == .Pointer); const mcv = try self.resolveInst(callee); - try self.genSetReg(Type.initTag(.usize), .lr, mcv); + try self.genSetReg(Type.usize, .lr, mcv); } // TODO: add Instruction.supportedOn @@ -4329,7 +4345,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier if (RegisterManager.indexOfRegIntoTracked(reg) == null) { // Save function return value into a tracked register log.debug("airCall: copying {} as it is not tracked", .{reg}); - const new_reg = try self.copyToTmpRegister(fn_ty.fnReturnType(), info.return_value); + const new_reg = try self.copyToTmpRegister(fn_ty.fnReturnType(mod), info.return_value); break :result MCValue{ .register = new_reg }; } }, @@ -4353,14 +4369,15 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier } fn airRet(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const operand = try self.resolveInst(un_op); - const ret_ty = self.fn_type.fnReturnType(); + const ret_ty = self.fn_type.fnReturnType(mod); switch (self.ret_mcv) { .none => {}, .immediate => { - assert(ret_ty.isError()); + assert(ret_ty.isError(mod)); }, .register => |reg| { // Return result by value @@ -4371,11 +4388,7 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void { // // self.ret_mcv is an address to where this function // should store its result into - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = ret_ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ptr_ty = try mod.singleMutPtrType(ret_ty); try self.store(self.ret_mcv, operand, ptr_ty, ret_ty); }, else => unreachable, // invalid return result @@ -4388,10 +4401,11 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void { } fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const ptr = try self.resolveInst(un_op); - const ptr_ty = self.air.typeOf(un_op); - const ret_ty = self.fn_type.fnReturnType(); + const ptr_ty = self.typeOf(un_op); + const ret_ty = self.fn_type.fnReturnType(mod); switch (self.ret_mcv) { .none => {}, @@ -4411,8 +4425,8 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { // location. const op_inst = Air.refToIndex(un_op).?; if (self.air.instructions.items(.tag)[op_inst] != .ret_ptr) { - const abi_size = @intCast(u32, ret_ty.abiSize(self.target.*)); - const abi_align = ret_ty.abiAlignment(self.target.*); + const abi_size = @intCast(u32, ret_ty.abiSize(mod)); + const abi_align = ret_ty.abiAlignment(mod); const offset = try self.allocMem(abi_size, abi_align, null); @@ -4432,7 +4446,7 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const lhs_ty = self.air.typeOf(bin_op.lhs); + const lhs_ty = self.typeOf(bin_op.lhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else blk: { break :blk try self.cmp(.{ .inst = bin_op.lhs }, .{ .inst = bin_op.rhs }, lhs_ty, op); @@ -4448,29 +4462,28 @@ fn cmp( lhs_ty: Type, op: math.CompareOperator, ) !MCValue { - var int_buffer: Type.Payload.Bits = undefined; - const int_ty = switch (lhs_ty.zigTypeTag()) { + const mod = self.bin_file.options.module.?; + const int_ty = switch (lhs_ty.zigTypeTag(mod)) { .Optional => blk: { - var opt_buffer: Type.Payload.ElemType = undefined; - const payload_ty = lhs_ty.optionalChild(&opt_buffer); - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { - break :blk Type.initTag(.u1); - } else if (lhs_ty.isPtrLikeOptional()) { + const payload_ty = lhs_ty.optionalChild(mod); + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { + break :blk Type.u1; + } else if (lhs_ty.isPtrLikeOptional(mod)) { break :blk Type.usize; } else { return self.fail("TODO ARM cmp non-pointer optionals", .{}); } }, .Float => return self.fail("TODO ARM cmp floats", .{}), - .Enum => lhs_ty.intTagType(&int_buffer), + .Enum => lhs_ty.intTagType(mod), .Int => lhs_ty, - .Bool => Type.initTag(.u1), + .Bool => Type.u1, .Pointer => Type.usize, - .ErrorSet => Type.initTag(.u16), + .ErrorSet => Type.u16, else => unreachable, }; - const int_info = int_ty.intInfo(self.target.*); + const int_info = int_ty.intInfo(mod); if (int_info.bits <= 32) { try self.spillCompareFlagsIfOccupied(); @@ -4555,8 +4568,9 @@ fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { } fn airDbgInline(self: *Self, inst: Air.Inst.Index) !void { - const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; - const function = self.air.values[ty_pl.payload].castTag(.function).?.data; + const ty_fn = self.air.instructions.items(.data)[inst].ty_fn; + const mod = self.bin_file.options.module.?; + const function = mod.funcPtr(ty_fn.func); // TODO emit debug info for function change _ = function; return self.finishAir(inst, .dead, .{ .none, .none, .none }); @@ -4571,7 +4585,7 @@ fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[inst].pl_op; const operand = pl_op.operand; const tag = self.air.instructions.items(.tag)[inst]; - const ty = self.air.typeOf(operand); + const ty = self.typeOf(operand); const mcv = try self.resolveInst(operand); const name = self.air.nullTerminatedString(pl_op.payload); @@ -4636,8 +4650,8 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { // whether it needs to be spilled in the branches if (self.liveness.operandDies(inst, 0)) { const op_int = @enumToInt(pl_op.operand); - if (op_int >= Air.Inst.Ref.typed_value_map.len) { - const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len); + if (op_int >= Air.ref_start_index) { + const op_index = @intCast(Air.Inst.Index, op_int - Air.ref_start_index); self.processDeath(op_index); } } @@ -4726,7 +4740,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { log.debug("consolidating else_entry {d} {}=>{}", .{ else_key, else_value, canon_mcv }); // TODO make sure the destination stack offset / register does not already have something // going on there. - try self.setRegOrMem(self.air.typeOfIndex(else_key), canon_mcv, else_value); + try self.setRegOrMem(self.typeOfIndex(else_key), canon_mcv, else_value); // TODO track the new register / stack allocation } try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, saved_then_branch.inst_table.count()); @@ -4753,7 +4767,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { log.debug("consolidating then_entry {d} {}=>{}", .{ then_key, parent_mcv, then_value }); // TODO make sure the destination stack offset / register does not already have something // going on there. - try self.setRegOrMem(self.air.typeOfIndex(then_key), parent_mcv, then_value); + try self.setRegOrMem(self.typeOfIndex(then_key), parent_mcv, then_value); // TODO track the new register / stack allocation } @@ -4772,8 +4786,9 @@ fn isNull( operand_bind: ReadArg.Bind, operand_ty: Type, ) !MCValue { - if (operand_ty.isPtrLikeOptional()) { - assert(operand_ty.abiSize(self.target.*) == 4); + const mod = self.bin_file.options.module.?; + if (operand_ty.isPtrLikeOptional(mod)) { + assert(operand_ty.abiSize(mod) == 4); const imm_bind: ReadArg.Bind = .{ .mcv = .{ .immediate = 0 } }; return self.cmp(operand_bind, imm_bind, Type.usize, .eq); @@ -4797,7 +4812,7 @@ fn airIsNull(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand_bind: ReadArg.Bind = .{ .inst = un_op }; - const operand_ty = self.air.typeOf(un_op); + const operand_ty = self.typeOf(un_op); break :result try self.isNull(operand_bind, operand_ty); }; @@ -4805,11 +4820,12 @@ fn airIsNull(self: *Self, inst: Air.Inst.Index) !void { } fn airIsNullPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand_ptr = try self.resolveInst(un_op); - const ptr_ty = self.air.typeOf(un_op); - const elem_ty = ptr_ty.elemType(); + const ptr_ty = self.typeOf(un_op); + const elem_ty = ptr_ty.childType(mod); const operand = try self.allocRegOrMem(elem_ty, true, null); try self.load(operand, operand_ptr, ptr_ty); @@ -4823,7 +4839,7 @@ fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand_bind: ReadArg.Bind = .{ .inst = un_op }; - const operand_ty = self.air.typeOf(un_op); + const operand_ty = self.typeOf(un_op); break :result try self.isNonNull(operand_bind, operand_ty); }; @@ -4831,11 +4847,12 @@ fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void { } fn airIsNonNullPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand_ptr = try self.resolveInst(un_op); - const ptr_ty = self.air.typeOf(un_op); - const elem_ty = ptr_ty.elemType(); + const ptr_ty = self.typeOf(un_op); + const elem_ty = ptr_ty.childType(mod); const operand = try self.allocRegOrMem(elem_ty, true, null); try self.load(operand, operand_ptr, ptr_ty); @@ -4850,9 +4867,10 @@ fn isErr( error_union_bind: ReadArg.Bind, error_union_ty: Type, ) !MCValue { - const error_type = error_union_ty.errorUnionSet(); + const mod = self.bin_file.options.module.?; + const error_type = error_union_ty.errorUnionSet(mod); - if (error_type.errorSetIsEmpty()) { + if (error_type.errorSetIsEmpty(mod)) { return MCValue{ .immediate = 0 }; // always false } @@ -4883,7 +4901,7 @@ fn airIsErr(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_bind: ReadArg.Bind = .{ .inst = un_op }; - const error_union_ty = self.air.typeOf(un_op); + const error_union_ty = self.typeOf(un_op); break :result try self.isErr(error_union_bind, error_union_ty); }; @@ -4891,11 +4909,12 @@ fn airIsErr(self: *Self, inst: Air.Inst.Index) !void { } fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand_ptr = try self.resolveInst(un_op); - const ptr_ty = self.air.typeOf(un_op); - const elem_ty = ptr_ty.elemType(); + const ptr_ty = self.typeOf(un_op); + const elem_ty = ptr_ty.childType(mod); const operand = try self.allocRegOrMem(elem_ty, true, null); try self.load(operand, operand_ptr, ptr_ty); @@ -4909,7 +4928,7 @@ fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_bind: ReadArg.Bind = .{ .inst = un_op }; - const error_union_ty = self.air.typeOf(un_op); + const error_union_ty = self.typeOf(un_op); break :result try self.isNonErr(error_union_bind, error_union_ty); }; @@ -4917,11 +4936,12 @@ fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void { } fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand_ptr = try self.resolveInst(un_op); - const ptr_ty = self.air.typeOf(un_op); - const elem_ty = ptr_ty.elemType(); + const ptr_ty = self.typeOf(un_op); + const elem_ty = ptr_ty.childType(mod); const operand = try self.allocRegOrMem(elem_ty, true, null); try self.load(operand, operand_ptr, ptr_ty); @@ -4988,7 +5008,7 @@ fn airBlock(self: *Self, inst: Air.Inst.Index) !void { fn airSwitch(self: *Self, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[inst].pl_op; - const condition_ty = self.air.typeOf(pl_op.operand); + const condition_ty = self.typeOf(pl_op.operand); const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload); const liveness = try self.liveness.getSwitchBr( self.gpa, @@ -5131,9 +5151,10 @@ fn airBr(self: *Self, inst: Air.Inst.Index) !void { } fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void { + const mod = self.bin_file.options.module.?; const block_data = self.blocks.getPtr(block).?; - if (self.air.typeOf(operand).hasRuntimeBits()) { + if (self.typeOf(operand).hasRuntimeBits(mod)) { const operand_mcv = try self.resolveInst(operand); const block_mcv = block_data.mcv; if (block_mcv == .none) { @@ -5141,14 +5162,14 @@ fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void { .none, .dead, .unreach => unreachable, .register, .stack_offset, .memory => operand_mcv, .immediate, .stack_argument_offset, .cpsr_flags => blk: { - const new_mcv = try self.allocRegOrMem(self.air.typeOfIndex(block), true, block); - try self.setRegOrMem(self.air.typeOfIndex(block), new_mcv, operand_mcv); + const new_mcv = try self.allocRegOrMem(self.typeOfIndex(block), true, block); + try self.setRegOrMem(self.typeOfIndex(block), new_mcv, operand_mcv); break :blk new_mcv; }, else => return self.fail("TODO implement block_data.mcv = operand_mcv for {}", .{operand_mcv}), }; } else { - try self.setRegOrMem(self.air.typeOfIndex(block), block_mcv, operand_mcv); + try self.setRegOrMem(self.typeOfIndex(block), block_mcv, operand_mcv); } } return self.brVoid(block); @@ -5212,7 +5233,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { const arg_mcv = try self.resolveInst(input); try self.register_manager.getReg(reg, null); - try self.genSetReg(self.air.typeOf(input), reg, arg_mcv); + try self.genSetReg(self.typeOf(input), reg, arg_mcv); } { @@ -5301,7 +5322,8 @@ fn setRegOrMem(self: *Self, ty: Type, loc: MCValue, val: MCValue) !void { } fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void { - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const mod = self.bin_file.options.module.?; + const abi_size = @intCast(u32, ty.abiSize(mod)); switch (mcv) { .dead => unreachable, .unreach, .none => return, // Nothing to do. @@ -5332,7 +5354,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro 1, 4 => { const offset = if (math.cast(u12, stack_offset)) |imm| blk: { break :blk Instruction.Offset.imm(imm); - } else Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = stack_offset }), .none); + } else Instruction.Offset.reg(try self.copyToTmpRegister(Type.u32, MCValue{ .immediate = stack_offset }), .none); const tag: Mir.Inst.Tag = switch (abi_size) { 1 => .strb, @@ -5355,7 +5377,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro 2 => { const offset = if (stack_offset <= math.maxInt(u8)) blk: { break :blk Instruction.ExtraLoadStoreOffset.imm(@intCast(u8, stack_offset)); - } else Instruction.ExtraLoadStoreOffset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = stack_offset })); + } else Instruction.ExtraLoadStoreOffset.reg(try self.copyToTmpRegister(Type.u32, MCValue{ .immediate = stack_offset })); _ = try self.addInst(.{ .tag = .strh, @@ -5378,11 +5400,11 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro const reg_lock = self.register_manager.lockReg(reg); defer if (reg_lock) |locked_reg| self.register_manager.unlockReg(locked_reg); - const wrapped_ty = ty.structFieldType(0); + const wrapped_ty = ty.structFieldType(0, mod); try self.genSetStack(wrapped_ty, stack_offset, .{ .register = reg }); - const overflow_bit_ty = ty.structFieldType(1); - const overflow_bit_offset = @intCast(u32, ty.structFieldOffset(1, self.target.*)); + const overflow_bit_ty = ty.structFieldType(1, mod); + const overflow_bit_offset = @intCast(u32, ty.structFieldOffset(1, mod)); const cond_reg = try self.register_manager.allocReg(null, gp); // C flag: movcs reg, #1 @@ -5420,11 +5442,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro const reg = try self.copyToTmpRegister(ty, mcv); return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); } else { - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ptr_ty = try mod.singleMutPtrType(ty); // TODO call extern memcpy const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp); @@ -5466,6 +5484,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro } fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void { + const mod = self.bin_file.options.module.?; switch (mcv) { .dead => unreachable, .unreach, .none => return, // Nothing to do. @@ -5640,17 +5659,17 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void }, .stack_offset => |off| { // TODO: maybe addressing from sp instead of fp - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const abi_size = @intCast(u32, ty.abiSize(mod)); const tag: Mir.Inst.Tag = switch (abi_size) { - 1 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsb else .ldrb, - 2 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsh else .ldrh, + 1 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsb else .ldrb, + 2 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsh else .ldrh, 3, 4 => .ldr, else => unreachable, }; const extra_offset = switch (abi_size) { - 1 => ty.isSignedInt(), + 1 => ty.isSignedInt(mod), 2 => true, 3, 4 => false, else => unreachable, @@ -5659,7 +5678,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void if (extra_offset) { const offset = if (off <= math.maxInt(u8)) blk: { break :blk Instruction.ExtraLoadStoreOffset.imm(@intCast(u8, off)); - } else Instruction.ExtraLoadStoreOffset.reg(try self.copyToTmpRegister(Type.initTag(.usize), MCValue{ .immediate = off })); + } else Instruction.ExtraLoadStoreOffset.reg(try self.copyToTmpRegister(Type.usize, MCValue{ .immediate = off })); _ = try self.addInst(.{ .tag = tag, @@ -5675,7 +5694,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void } else { const offset = if (off <= math.maxInt(u12)) blk: { break :blk Instruction.Offset.imm(@intCast(u12, off)); - } else Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.usize), MCValue{ .immediate = off }), .none); + } else Instruction.Offset.reg(try self.copyToTmpRegister(Type.usize, MCValue{ .immediate = off }), .none); _ = try self.addInst(.{ .tag = tag, @@ -5691,11 +5710,11 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void } }, .stack_argument_offset => |off| { - const abi_size = ty.abiSize(self.target.*); + const abi_size = ty.abiSize(mod); const tag: Mir.Inst.Tag = switch (abi_size) { - 1 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsb_stack_argument else .ldrb_stack_argument, - 2 => if (ty.isSignedInt()) Mir.Inst.Tag.ldrsh_stack_argument else .ldrh_stack_argument, + 1 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsb_stack_argument else .ldrb_stack_argument, + 2 => if (ty.isSignedInt(mod)) Mir.Inst.Tag.ldrsh_stack_argument else .ldrh_stack_argument, 3, 4 => .ldr_stack_argument, else => unreachable, }; @@ -5712,7 +5731,8 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void } fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void { - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const mod = self.bin_file.options.module.?; + const abi_size = @intCast(u32, ty.abiSize(mod)); switch (mcv) { .dead => unreachable, .none, .unreach => return, @@ -5732,7 +5752,7 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I 1, 4 => { const offset = if (math.cast(u12, stack_offset)) |imm| blk: { break :blk Instruction.Offset.imm(imm); - } else Instruction.Offset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = stack_offset }), .none); + } else Instruction.Offset.reg(try self.copyToTmpRegister(Type.u32, MCValue{ .immediate = stack_offset }), .none); const tag: Mir.Inst.Tag = switch (abi_size) { 1 => .strb, @@ -5752,7 +5772,7 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I 2 => { const offset = if (stack_offset <= math.maxInt(u8)) blk: { break :blk Instruction.ExtraLoadStoreOffset.imm(@intCast(u8, stack_offset)); - } else Instruction.ExtraLoadStoreOffset.reg(try self.copyToTmpRegister(Type.initTag(.u32), MCValue{ .immediate = stack_offset })); + } else Instruction.ExtraLoadStoreOffset.reg(try self.copyToTmpRegister(Type.u32, MCValue{ .immediate = stack_offset })); _ = try self.addInst(.{ .tag = .strh, @@ -5779,11 +5799,7 @@ fn genSetStackArgument(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) I const reg = try self.copyToTmpRegister(ty, mcv); return self.genSetStackArgument(ty, stack_offset, MCValue{ .register = reg }); } else { - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ptr_ty = try mod.singleMutPtrType(ty); // TODO call extern memcpy const regs = try self.register_manager.allocRegs(5, .{ null, null, null, null, null }, gp); @@ -5862,7 +5878,7 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { }; defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); - const dest_ty = self.air.typeOfIndex(inst); + const dest_ty = self.typeOfIndex(inst); const dest = try self.allocRegOrMem(dest_ty, true, inst); try self.setRegOrMem(dest_ty, dest, operand); break :result dest; @@ -5871,16 +5887,17 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { } fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const ptr_ty = self.air.typeOf(ty_op.operand); + const ptr_ty = self.typeOf(ty_op.operand); const ptr = try self.resolveInst(ty_op.operand); - const array_ty = ptr_ty.childType(); - const array_len = @intCast(u32, array_ty.arrayLen()); + const array_ty = ptr_ty.childType(mod); + const array_len = @intCast(u32, array_ty.arrayLen(mod)); const stack_offset = try self.allocMem(8, 8, inst); try self.genSetStack(ptr_ty, stack_offset, ptr); - try self.genSetStack(Type.initTag(.usize), stack_offset - 4, .{ .immediate = array_len }); + try self.genSetStack(Type.usize, stack_offset - 4, .{ .immediate = array_len }); break :result MCValue{ .stack_offset = stack_offset }; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -5989,8 +6006,9 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { } fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { - const vector_ty = self.air.typeOfIndex(inst); - const len = vector_ty.vectorLen(); + const mod = self.bin_file.options.module.?; + const vector_ty = self.typeOfIndex(inst); + const len = vector_ty.vectorLen(mod); const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const elements = @ptrCast([]const Air.Inst.Ref, self.air.extra[ty_pl.payload..][0..len]); const result: MCValue = res: { @@ -6038,9 +6056,10 @@ fn airTry(self: *Self, inst: Air.Inst.Index) !void { const body = self.air.extra[extra.end..][0..extra.data.body_len]; const result: MCValue = result: { const error_union_bind: ReadArg.Bind = .{ .inst = pl_op.operand }; - const error_union_ty = self.air.typeOf(pl_op.operand); - const error_union_size = @intCast(u32, error_union_ty.abiSize(self.target.*)); - const error_union_align = error_union_ty.abiAlignment(self.target.*); + const error_union_ty = self.typeOf(pl_op.operand); + const mod = self.bin_file.options.module.?; + const error_union_size = @intCast(u32, error_union_ty.abiSize(mod)); + const error_union_align = error_union_ty.abiAlignment(mod); // The error union will die in the body. However, we need the // error union after the body in order to extract the payload @@ -6069,37 +6088,32 @@ fn airTryPtr(self: *Self, inst: Air.Inst.Index) !void { } fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue { - // First section of indexes correspond to a set number of constant values. - const ref_int = @enumToInt(inst); - if (ref_int < Air.Inst.Ref.typed_value_map.len) { - const tv = Air.Inst.Ref.typed_value_map[ref_int]; - if (!tv.ty.hasRuntimeBitsIgnoreComptime() and !tv.ty.isError()) { - return MCValue{ .none = {} }; - } - return self.genTypedValue(tv); - } + const mod = self.bin_file.options.module.?; // If the type has no codegen bits, no need to store it. - const inst_ty = self.air.typeOf(inst); - if (!inst_ty.hasRuntimeBitsIgnoreComptime() and !inst_ty.isError()) + const inst_ty = self.typeOf(inst); + if (!inst_ty.hasRuntimeBitsIgnoreComptime(mod) and !inst_ty.isError(mod)) return MCValue{ .none = {} }; - const inst_index = @intCast(Air.Inst.Index, ref_int - Air.Inst.Ref.typed_value_map.len); + const inst_index = Air.refToIndex(inst) orelse return self.genTypedValue(.{ + .ty = inst_ty, + .val = (try self.air.value(inst, mod)).?, + }); + switch (self.air.instructions.items(.tag)[inst_index]) { - .constant => { + .interned => { // Constants have static lifetimes, so they are always memoized in the outer most table. const branch = &self.branch_stack.items[0]; const gop = try branch.inst_table.getOrPut(self.gpa, inst_index); if (!gop.found_existing) { - const ty_pl = self.air.instructions.items(.data)[inst_index].ty_pl; + const interned = self.air.instructions.items(.data)[inst_index].interned; gop.value_ptr.* = try self.genTypedValue(.{ .ty = inst_ty, - .val = self.air.values[ty_pl.payload], + .val = interned.toValue(), }); } return gop.value_ptr.*; }, - .const_ty => unreachable, else => return self.getResolvedInstValue(inst_index), } } @@ -6152,12 +6166,11 @@ const CallMCValues = struct { /// Caller must call `CallMCValues.deinit`. fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { - const cc = fn_ty.fnCallingConvention(); - const param_types = try self.gpa.alloc(Type, fn_ty.fnParamLen()); - defer self.gpa.free(param_types); - fn_ty.fnParamTypes(param_types); + const mod = self.bin_file.options.module.?; + const fn_info = mod.typeToFunc(fn_ty).?; + const cc = fn_info.cc; var result: CallMCValues = .{ - .args = try self.gpa.alloc(MCValue, param_types.len), + .args = try self.gpa.alloc(MCValue, fn_info.param_types.len), // These undefined values must be populated before returning from this function. .return_value = undefined, .stack_byte_count = undefined, @@ -6165,7 +6178,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { }; errdefer self.gpa.free(result.args); - const ret_ty = fn_ty.fnReturnType(); + const ret_ty = fn_ty.fnReturnType(mod); switch (cc) { .Naked => { @@ -6180,12 +6193,12 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { var ncrn: usize = 0; // Next Core Register Number var nsaa: u32 = 0; // Next stacked argument address - if (ret_ty.zigTypeTag() == .NoReturn) { + if (ret_ty.zigTypeTag(mod) == .NoReturn) { result.return_value = .{ .unreach = {} }; - } else if (!ret_ty.hasRuntimeBitsIgnoreComptime()) { + } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod)) { result.return_value = .{ .none = {} }; } else { - const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + const ret_ty_size = @intCast(u32, ret_ty.abiSize(mod)); // TODO handle cases where multiple registers are used if (ret_ty_size <= 4) { result.return_value = .{ .register = c_abi_int_return_regs[0] }; @@ -6199,11 +6212,11 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { } } - for (param_types, 0..) |ty, i| { - if (ty.abiAlignment(self.target.*) == 8) - ncrn = std.mem.alignForwardGeneric(usize, ncrn, 2); + for (fn_info.param_types, 0..) |ty, i| { + if (ty.toType().abiAlignment(mod) == 8) + ncrn = std.mem.alignForward(usize, ncrn, 2); - const param_size = @intCast(u32, ty.abiSize(self.target.*)); + const param_size = @intCast(u32, ty.toType().abiSize(mod)); if (std.math.divCeil(u32, param_size, 4) catch unreachable <= 4 - ncrn) { if (param_size <= 4) { result.args[i] = .{ .register = c_abi_int_param_regs[ncrn] }; @@ -6215,8 +6228,8 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { return self.fail("TODO MCValues split between registers and stack", .{}); } else { ncrn = 4; - if (ty.abiAlignment(self.target.*) == 8) - nsaa = std.mem.alignForwardGeneric(u32, nsaa, 8); + if (ty.toType().abiAlignment(mod) == 8) + nsaa = std.mem.alignForward(u32, nsaa, 8); result.args[i] = .{ .stack_argument_offset = nsaa }; nsaa += param_size; @@ -6227,14 +6240,14 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { result.stack_align = 8; }, .Unspecified => { - if (ret_ty.zigTypeTag() == .NoReturn) { + if (ret_ty.zigTypeTag(mod) == .NoReturn) { result.return_value = .{ .unreach = {} }; - } else if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) { + } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod) and !ret_ty.isError(mod)) { result.return_value = .{ .none = {} }; } else { - const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + const ret_ty_size = @intCast(u32, ret_ty.abiSize(mod)); if (ret_ty_size == 0) { - assert(ret_ty.isError()); + assert(ret_ty.isError(mod)); result.return_value = .{ .immediate = 0 }; } else if (ret_ty_size <= 4) { result.return_value = .{ .register = .r0 }; @@ -6249,12 +6262,12 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { var stack_offset: u32 = 0; - for (param_types, 0..) |ty, i| { - if (ty.abiSize(self.target.*) > 0) { - const param_size = @intCast(u32, ty.abiSize(self.target.*)); - const param_alignment = ty.abiAlignment(self.target.*); + for (fn_info.param_types, 0..) |ty, i| { + if (ty.toType().abiSize(mod) > 0) { + const param_size = @intCast(u32, ty.toType().abiSize(mod)); + const param_alignment = ty.toType().abiAlignment(mod); - stack_offset = std.mem.alignForwardGeneric(u32, stack_offset, param_alignment); + stack_offset = std.mem.alignForward(u32, stack_offset, param_alignment); result.args[i] = .{ .stack_argument_offset = stack_offset }; stack_offset += param_size; } else { @@ -6301,3 +6314,13 @@ fn parseRegName(name: []const u8) ?Register { } return std.meta.stringToEnum(Register, name); } + +fn typeOf(self: *Self, inst: Air.Inst.Ref) Type { + const mod = self.bin_file.options.module.?; + return self.air.typeOf(inst, &mod.intern_pool); +} + +fn typeOfIndex(self: *Self, inst: Air.Inst.Index) Type { + const mod = self.bin_file.options.module.?; + return self.air.typeOfIndex(inst, &mod.intern_pool); +} diff --git a/src/arch/arm/abi.zig b/src/arch/arm/abi.zig index 8b9ec45e24..a4a4fe472b 100644 --- a/src/arch/arm/abi.zig +++ b/src/arch/arm/abi.zig @@ -1,8 +1,10 @@ const std = @import("std"); +const assert = std.debug.assert; const bits = @import("bits.zig"); const Register = bits.Register; const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; const Type = @import("../../type.zig").Type; +const Module = @import("../../Module.zig"); pub const Class = union(enum) { memory, @@ -11,7 +13,7 @@ pub const Class = union(enum) { i64_array: u8, fn arrSize(total_size: u64, arr_size: u64) Class { - const count = @intCast(u8, std.mem.alignForwardGeneric(u64, total_size, arr_size) / arr_size); + const count = @intCast(u8, std.mem.alignForward(u64, total_size, arr_size) / arr_size); if (arr_size == 32) { return .{ .i32_array = count }; } else { @@ -22,28 +24,28 @@ pub const Class = union(enum) { pub const Context = enum { ret, arg }; -pub fn classifyType(ty: Type, target: std.Target, ctx: Context) Class { - std.debug.assert(ty.hasRuntimeBitsIgnoreComptime()); +pub fn classifyType(ty: Type, mod: *Module, ctx: Context) Class { + assert(ty.hasRuntimeBitsIgnoreComptime(mod)); var maybe_float_bits: ?u16 = null; const max_byval_size = 512; - switch (ty.zigTypeTag()) { + switch (ty.zigTypeTag(mod)) { .Struct => { - const bit_size = ty.bitSize(target); - if (ty.containerLayout() == .Packed) { + const bit_size = ty.bitSize(mod); + if (ty.containerLayout(mod) == .Packed) { if (bit_size > 64) return .memory; return .byval; } if (bit_size > max_byval_size) return .memory; - const float_count = countFloats(ty, target, &maybe_float_bits); + const float_count = countFloats(ty, mod, &maybe_float_bits); if (float_count <= byval_float_count) return .byval; - const fields = ty.structFieldCount(); + const fields = ty.structFieldCount(mod); var i: u32 = 0; while (i < fields) : (i += 1) { - const field_ty = ty.structFieldType(i); - const field_alignment = ty.structFieldAlign(i, target); - const field_size = field_ty.bitSize(target); + const field_ty = ty.structFieldType(i, mod); + const field_alignment = ty.structFieldAlign(i, mod); + const field_size = field_ty.bitSize(mod); if (field_size > 32 or field_alignment > 32) { return Class.arrSize(bit_size, 64); } @@ -51,17 +53,17 @@ pub fn classifyType(ty: Type, target: std.Target, ctx: Context) Class { return Class.arrSize(bit_size, 32); }, .Union => { - const bit_size = ty.bitSize(target); - if (ty.containerLayout() == .Packed) { + const bit_size = ty.bitSize(mod); + if (ty.containerLayout(mod) == .Packed) { if (bit_size > 64) return .memory; return .byval; } if (bit_size > max_byval_size) return .memory; - const float_count = countFloats(ty, target, &maybe_float_bits); + const float_count = countFloats(ty, mod, &maybe_float_bits); if (float_count <= byval_float_count) return .byval; - for (ty.unionFields().values()) |field| { - if (field.ty.bitSize(target) > 32 or field.normalAlignment(target) > 32) { + for (ty.unionFields(mod).values()) |field| { + if (field.ty.bitSize(mod) > 32 or field.normalAlignment(mod) > 32) { return Class.arrSize(bit_size, 64); } } @@ -71,28 +73,28 @@ pub fn classifyType(ty: Type, target: std.Target, ctx: Context) Class { .Int => { // TODO this is incorrect for _BitInt(128) but implementing // this correctly makes implementing compiler-rt impossible. - // const bit_size = ty.bitSize(target); + // const bit_size = ty.bitSize(mod); // if (bit_size > 64) return .memory; return .byval; }, .Enum, .ErrorSet => { - const bit_size = ty.bitSize(target); + const bit_size = ty.bitSize(mod); if (bit_size > 64) return .memory; return .byval; }, .Vector => { - const bit_size = ty.bitSize(target); + const bit_size = ty.bitSize(mod); // TODO is this controlled by a cpu feature? if (ctx == .ret and bit_size > 128) return .memory; if (bit_size > 512) return .memory; return .byval; }, .Optional => { - std.debug.assert(ty.isPtrLikeOptional()); + assert(ty.isPtrLikeOptional(mod)); return .byval; }, .Pointer => { - std.debug.assert(!ty.isSlice()); + assert(!ty.isSlice(mod)); return .byval; }, .ErrorUnion, @@ -114,14 +116,15 @@ pub fn classifyType(ty: Type, target: std.Target, ctx: Context) Class { } const byval_float_count = 4; -fn countFloats(ty: Type, target: std.Target, maybe_float_bits: *?u16) u32 { +fn countFloats(ty: Type, mod: *Module, maybe_float_bits: *?u16) u32 { + const target = mod.getTarget(); const invalid = std.math.maxInt(u32); - switch (ty.zigTypeTag()) { + switch (ty.zigTypeTag(mod)) { .Union => { - const fields = ty.unionFields(); + const fields = ty.unionFields(mod); var max_count: u32 = 0; for (fields.values()) |field| { - const field_count = countFloats(field.ty, target, maybe_float_bits); + const field_count = countFloats(field.ty, mod, maybe_float_bits); if (field_count == invalid) return invalid; if (field_count > max_count) max_count = field_count; if (max_count > byval_float_count) return invalid; @@ -129,12 +132,12 @@ fn countFloats(ty: Type, target: std.Target, maybe_float_bits: *?u16) u32 { return max_count; }, .Struct => { - const fields_len = ty.structFieldCount(); + const fields_len = ty.structFieldCount(mod); var count: u32 = 0; var i: u32 = 0; while (i < fields_len) : (i += 1) { - const field_ty = ty.structFieldType(i); - const field_count = countFloats(field_ty, target, maybe_float_bits); + const field_ty = ty.structFieldType(i, mod); + const field_count = countFloats(field_ty, mod, maybe_float_bits); if (field_count == invalid) return invalid; count += field_count; if (count > byval_float_count) return invalid; diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index d4c7eb0c70..c6ac3255c6 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -217,7 +217,7 @@ const Self = @This(); pub fn generate( bin_file: *link.File, src_loc: Module.SrcLoc, - module_fn: *Module.Fn, + module_fn_index: Module.Fn.Index, air: Air, liveness: Liveness, code: *std.ArrayList(u8), @@ -228,6 +228,7 @@ pub fn generate( } const mod = bin_file.options.module.?; + const module_fn = mod.funcPtr(module_fn_index); const fn_owner_decl = mod.declPtr(module_fn.owner_decl); assert(fn_owner_decl.has_tv); const fn_type = fn_owner_decl.ty; @@ -347,7 +348,8 @@ pub fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 { } fn gen(self: *Self) !void { - const cc = self.fn_type.fnCallingConvention(); + const mod = self.bin_file.options.module.?; + const cc = self.fn_type.fnCallingConvention(mod); if (cc != .Naked) { // TODO Finish function prologue and epilogue for riscv64. @@ -470,13 +472,14 @@ fn gen(self: *Self) !void { } fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { + const mod = self.bin_file.options.module.?; + const ip = &mod.intern_pool; const air_tags = self.air.instructions.items(.tag); for (body) |inst| { // TODO: remove now-redundant isUnused calls from AIR handler functions - if (self.liveness.isUnused(inst) and !self.air.mustLower(inst)) { + if (self.liveness.isUnused(inst) and !self.air.mustLower(inst, ip)) continue; - } const old_air_bookkeeping = self.air_bookkeeping; try self.ensureProcessDeathCapacity(Liveness.bpi); @@ -656,8 +659,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .ptr_elem_val => try self.airPtrElemVal(inst), .ptr_elem_ptr => try self.airPtrElemPtr(inst), - .constant => unreachable, // excluded from function bodies - .const_ty => unreachable, // excluded from function bodies + .inferred_alloc, .inferred_alloc_comptime, .interned => unreachable, .unreach => self.finishAirBookkeeping(), .optional_payload => try self.airOptionalPayload(inst), @@ -727,8 +729,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { /// Asserts there is already capacity to insert into top branch inst_table. fn processDeath(self: *Self, inst: Air.Inst.Index) void { - const air_tags = self.air.instructions.items(.tag); - if (air_tags[inst] == .constant) return; // Constants are immortal. + assert(self.air.instructions.items(.tag)[inst] != .interned); // When editing this function, note that the logic must synchronize with `reuseOperand`. const prev_value = self.getResolvedInstValue(inst); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; @@ -755,8 +756,8 @@ fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Live tomb_bits >>= 1; if (!dies) continue; const op_int = @enumToInt(op); - if (op_int < Air.Inst.Ref.typed_value_map.len) continue; - const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len); + if (op_int < Air.ref_start_index) continue; + const op_index = @intCast(Air.Inst.Index, op_int - Air.ref_start_index); self.processDeath(op_index); } const is_used = @truncate(u1, tomb_bits) == 0; @@ -791,7 +792,7 @@ fn allocMem(self: *Self, inst: Air.Inst.Index, abi_size: u32, abi_align: u32) !u if (abi_align > self.stack_align) self.stack_align = abi_align; // TODO find a free slot instead of always appending - const offset = mem.alignForwardGeneric(u32, self.next_stack_offset, abi_align); + const offset = mem.alignForward(u32, self.next_stack_offset, abi_align); self.next_stack_offset = offset + abi_size; if (self.next_stack_offset > self.max_end_stack) self.max_end_stack = self.next_stack_offset; @@ -804,29 +805,29 @@ fn allocMem(self: *Self, inst: Air.Inst.Index, abi_size: u32, abi_align: u32) !u /// Use a pointer instruction as the basis for allocating stack memory. fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !u32 { - const elem_ty = self.air.typeOfIndex(inst).elemType(); - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { - const mod = self.bin_file.options.module.?; + const mod = self.bin_file.options.module.?; + const elem_ty = self.typeOfIndex(inst).childType(mod); + const abi_size = math.cast(u32, elem_ty.abiSize(mod)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); }; // TODO swap this for inst.ty.ptrAlign - const abi_align = elem_ty.abiAlignment(self.target.*); + const abi_align = elem_ty.abiAlignment(mod); return self.allocMem(inst, abi_size, abi_align); } fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { - const elem_ty = self.air.typeOfIndex(inst); - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { - const mod = self.bin_file.options.module.?; + const mod = self.bin_file.options.module.?; + const elem_ty = self.typeOfIndex(inst); + const abi_size = math.cast(u32, elem_ty.abiSize(mod)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); }; - const abi_align = elem_ty.abiAlignment(self.target.*); + const abi_align = elem_ty.abiAlignment(mod); if (abi_align > self.stack_align) self.stack_align = abi_align; if (reg_ok) { // Make sure the type can fit in a register before we try to allocate one. - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bits = self.target.ptrBitWidth(); const ptr_bytes: u64 = @divExact(ptr_bits, 8); if (abi_size <= ptr_bytes) { if (self.register_manager.tryAllocReg(inst, gp)) |reg| { @@ -845,7 +846,7 @@ pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void assert(reg == reg_mcv.register); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; try branch.inst_table.put(self.gpa, inst, stack_mcv); - try self.genSetStack(self.air.typeOfIndex(inst), stack_mcv.stack_offset, reg_mcv); + try self.genSetStack(self.typeOfIndex(inst), stack_mcv.stack_offset, reg_mcv); } /// Copies a value to a register without tracking the register. The register is not considered @@ -862,7 +863,7 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToNewRegister(self: *Self, reg_owner: Air.Inst.Index, mcv: MCValue) !MCValue { const reg = try self.register_manager.allocReg(reg_owner, gp); - try self.genSetReg(self.air.typeOfIndex(reg_owner), reg, mcv); + try self.genSetReg(self.typeOfIndex(reg_owner), reg, mcv); return MCValue{ .register = reg }; } @@ -893,10 +894,11 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ ty_op.operand, .none, .none }); - const operand_ty = self.air.typeOf(ty_op.operand); + const mod = self.bin_file.options.module.?; + const operand_ty = self.typeOf(ty_op.operand); const operand = try self.resolveInst(ty_op.operand); - const info_a = operand_ty.intInfo(self.target.*); - const info_b = self.air.typeOfIndex(inst).intInfo(self.target.*); + const info_a = operand_ty.intInfo(mod); + const info_b = self.typeOfIndex(inst).intInfo(mod); if (info_a.signedness != info_b.signedness) return self.fail("TODO gen intcast sign safety in semantic analysis", .{}); @@ -1068,18 +1070,18 @@ fn binOp( lhs_ty: Type, rhs_ty: Type, ) InnerError!MCValue { + const mod = self.bin_file.options.module.?; switch (tag) { // Arithmetic operations on integers and floats .add, .sub, => { - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO binary operations on floats", .{}), .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { - const mod = self.bin_file.options.module.?; assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { // TODO immediate operands return try self.binOpRegister(tag, maybe_inst, lhs, rhs, lhs_ty, rhs_ty); @@ -1093,14 +1095,14 @@ fn binOp( .ptr_add, .ptr_sub, => { - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Pointer => { const ptr_ty = lhs_ty; - const elem_ty = switch (ptr_ty.ptrSize()) { - .One => ptr_ty.childType().childType(), // ptr to array, so get array element type - else => ptr_ty.childType(), + const elem_ty = switch (ptr_ty.ptrSize(mod)) { + .One => ptr_ty.childType(mod).childType(mod), // ptr to array, so get array element type + else => ptr_ty.childType(mod), }; - const elem_size = elem_ty.abiSize(self.target.*); + const elem_size = elem_ty.abiSize(mod); if (elem_size == 1) { const base_tag: Air.Inst.Tag = switch (tag) { @@ -1125,8 +1127,8 @@ fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else try self.binOp(tag, inst, lhs, rhs, lhs_ty, rhs_ty); return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -1137,8 +1139,8 @@ fn airPtrArithmetic(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else try self.binOp(tag, inst, lhs, rhs, lhs_ty, rhs_ty); return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -1331,10 +1333,11 @@ fn airSaveErrReturnTraceIndex(self: *Self, inst: Air.Inst.Index) !void { fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const optional_ty = self.air.typeOfIndex(inst); + const mod = self.bin_file.options.module.?; + const optional_ty = self.typeOfIndex(inst); // Optional with a zero-bit payload type is just a boolean true - if (optional_ty.abiSize(self.target.*) == 1) + if (optional_ty.abiSize(mod) == 1) break :result MCValue{ .immediate = 1 }; return self.fail("TODO implement wrap optional for {}", .{self.target.cpu.arch}); @@ -1498,7 +1501,8 @@ fn reuseOperand(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, op_ind } fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!void { - const elem_ty = ptr_ty.elemType(); + const mod = self.bin_file.options.module.?; + const elem_ty = ptr_ty.childType(mod); switch (ptr) { .none => unreachable, .undef => unreachable, @@ -1523,14 +1527,15 @@ fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!vo } fn airLoad(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const elem_ty = self.air.typeOfIndex(inst); + const elem_ty = self.typeOfIndex(inst); const result: MCValue = result: { - if (!elem_ty.hasRuntimeBits()) + if (!elem_ty.hasRuntimeBits(mod)) break :result MCValue.none; const ptr = try self.resolveInst(ty_op.operand); - const is_volatile = self.air.typeOf(ty_op.operand).isVolatilePtr(); + const is_volatile = self.typeOf(ty_op.operand).isVolatilePtr(mod); if (self.liveness.isUnused(inst) and !is_volatile) break :result MCValue.dead; @@ -1542,7 +1547,7 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { break :blk try self.allocRegOrMem(inst, true); } }; - try self.load(dst_mcv, ptr, self.air.typeOf(ty_op.operand)); + try self.load(dst_mcv, ptr, self.typeOf(ty_op.operand)); break :result dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -1583,8 +1588,8 @@ fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const ptr = try self.resolveInst(bin_op.lhs); const value = try self.resolveInst(bin_op.rhs); - const ptr_ty = self.air.typeOf(bin_op.lhs); - const value_ty = self.air.typeOf(bin_op.rhs); + const ptr_ty = self.typeOf(bin_op.lhs); + const value_ty = self.typeOf(bin_op.rhs); try self.store(ptr, value, ptr_ty, value_ty); @@ -1644,7 +1649,7 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { const arg_index = self.arg_index; self.arg_index += 1; - const ty = self.air.typeOfIndex(inst); + const ty = self.typeOfIndex(inst); _ = ty; const result = self.args[arg_index]; @@ -1698,9 +1703,10 @@ fn airFence(self: *Self) !void { } fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void { + const mod = self.bin_file.options.module.?; if (modifier == .always_tail) return self.fail("TODO implement tail calls for riscv64", .{}); const pl_op = self.air.instructions.items(.data)[inst].pl_op; - const fn_ty = self.air.typeOf(pl_op.operand); + const fn_ty = self.typeOf(pl_op.operand); const callee = pl_op.operand; const extra = self.air.extraData(Air.Call, pl_op.payload); const args = @ptrCast([]const Air.Inst.Ref, self.air.extra[extra.end..][0..extra.data.args_len]); @@ -1713,7 +1719,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier if (self.bin_file.cast(link.File.Elf)) |elf_file| { for (info.args, 0..) |mc_arg, arg_i| { const arg = args[arg_i]; - const arg_ty = self.air.typeOf(arg); + const arg_ty = self.typeOf(arg); const arg_mcv = try self.resolveInst(args[arg_i]); switch (mc_arg) { @@ -1736,14 +1742,13 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier } } - if (self.air.value(callee)) |func_value| { - if (func_value.castTag(.function)) |func_payload| { - const func = func_payload.data; + if (try self.air.value(callee, mod)) |func_value| { + if (mod.funcPtrUnwrap(mod.intern_pool.indexToFunc(func_value.ip_index))) |func| { const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); const atom = elf_file.getAtom(atom_index); _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = @intCast(u32, atom.getOffsetTableAddress(elf_file)); - try self.genSetReg(Type.initTag(.usize), .ra, .{ .memory = got_addr }); + try self.genSetReg(Type.usize, .ra, .{ .memory = got_addr }); _ = try self.addInst(.{ .tag = .jalr, .data = .{ .i_type = .{ @@ -1752,7 +1757,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier .imm12 = 0, } }, }); - } else if (func_value.castTag(.extern_fn)) |_| { + } else if (mod.intern_pool.indexToKey(func_value.ip_index) == .extern_func) { return self.fail("TODO implement calling extern functions", .{}); } else { return self.fail("TODO implement calling bitcasted functions", .{}); @@ -1796,7 +1801,8 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier } fn ret(self: *Self, mcv: MCValue) !void { - const ret_ty = self.fn_type.fnReturnType(); + const mod = self.bin_file.options.module.?; + const ret_ty = self.fn_type.fnReturnType(mod); try self.setRegOrMem(ret_ty, self.ret_mcv, mcv); // Just add space for an instruction, patch this later const index = try self.addInst(.{ @@ -1825,10 +1831,10 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none }); - const ty = self.air.typeOf(bin_op.lhs); + const ty = self.typeOf(bin_op.lhs); const mod = self.bin_file.options.module.?; - assert(ty.eql(self.air.typeOf(bin_op.rhs), mod)); - if (ty.zigTypeTag() == .ErrorSet) + assert(ty.eql(self.typeOf(bin_op.rhs), mod)); + if (ty.zigTypeTag(mod) == .ErrorSet) return self.fail("TODO implement cmp for errors", .{}); const lhs = try self.resolveInst(bin_op.lhs); @@ -1869,8 +1875,9 @@ fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { } fn airDbgInline(self: *Self, inst: Air.Inst.Index) !void { - const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; - const function = self.air.values[ty_pl.payload].castTag(.function).?.data; + const ty_fn = self.air.instructions.items(.data)[inst].ty_fn; + const mod = self.bin_file.options.module.?; + const function = mod.funcPtr(ty_fn.func); // TODO emit debug info for function change _ = function; return self.finishAir(inst, .dead, .{ .none, .none, .none }); @@ -1946,7 +1953,7 @@ fn airIsNullPtr(self: *Self, inst: Air.Inst.Index) !void { break :blk try self.allocRegOrMem(inst, true); } }; - try self.load(operand, operand_ptr, self.air.typeOf(un_op)); + try self.load(operand, operand_ptr, self.typeOf(un_op)); break :result try self.isNull(operand); }; return self.finishAir(inst, result, .{ un_op, .none, .none }); @@ -1973,7 +1980,7 @@ fn airIsNonNullPtr(self: *Self, inst: Air.Inst.Index) !void { break :blk try self.allocRegOrMem(inst, true); } }; - try self.load(operand, operand_ptr, self.air.typeOf(un_op)); + try self.load(operand, operand_ptr, self.typeOf(un_op)); break :result try self.isNonNull(operand); }; return self.finishAir(inst, result, .{ un_op, .none, .none }); @@ -2000,7 +2007,7 @@ fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void { break :blk try self.allocRegOrMem(inst, true); } }; - try self.load(operand, operand_ptr, self.air.typeOf(un_op)); + try self.load(operand, operand_ptr, self.typeOf(un_op)); break :result try self.isErr(operand); }; return self.finishAir(inst, result, .{ un_op, .none, .none }); @@ -2027,7 +2034,7 @@ fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void { break :blk try self.allocRegOrMem(inst, true); } }; - try self.load(operand, operand_ptr, self.air.typeOf(un_op)); + try self.load(operand, operand_ptr, self.typeOf(un_op)); break :result try self.isNonErr(operand); }; return self.finishAir(inst, result, .{ un_op, .none, .none }); @@ -2107,13 +2114,14 @@ fn airBoolOp(self: *Self, inst: Air.Inst.Index) !void { fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void { const block_data = self.blocks.getPtr(block).?; - if (self.air.typeOf(operand).hasRuntimeBits()) { + const mod = self.bin_file.options.module.?; + if (self.typeOf(operand).hasRuntimeBits(mod)) { const operand_mcv = try self.resolveInst(operand); const block_mcv = block_data.mcv; if (block_mcv == .none) { block_data.mcv = operand_mcv; } else { - try self.setRegOrMem(self.air.typeOfIndex(block), block_mcv, operand_mcv); + try self.setRegOrMem(self.typeOfIndex(block), block_mcv, operand_mcv); } } return self.brVoid(block); @@ -2176,7 +2184,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { const arg_mcv = try self.resolveInst(input); try self.register_manager.getReg(reg, null); - try self.genSetReg(self.air.typeOf(input), reg, arg_mcv); + try self.genSetReg(self.typeOf(input), reg, arg_mcv); } { @@ -2372,7 +2380,7 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); const dest = try self.allocRegOrMem(inst, true); - try self.setRegOrMem(self.air.typeOfIndex(inst), dest, operand); + try self.setRegOrMem(self.typeOfIndex(inst), dest, operand); break :result dest; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -2489,8 +2497,9 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { } fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { - const vector_ty = self.air.typeOfIndex(inst); - const len = vector_ty.vectorLen(); + const mod = self.bin_file.options.module.?; + const vector_ty = self.typeOfIndex(inst); + const len = vector_ty.vectorLen(mod); const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const elements = @ptrCast([]const Air.Inst.Ref, self.air.extra[ty_pl.payload..][0..len]); const result: MCValue = res: { @@ -2533,37 +2542,32 @@ fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { } fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue { - // First section of indexes correspond to a set number of constant values. - const ref_int = @enumToInt(inst); - if (ref_int < Air.Inst.Ref.typed_value_map.len) { - const tv = Air.Inst.Ref.typed_value_map[ref_int]; - if (!tv.ty.hasRuntimeBits()) { - return MCValue{ .none = {} }; - } - return self.genTypedValue(tv); - } + const mod = self.bin_file.options.module.?; // If the type has no codegen bits, no need to store it. - const inst_ty = self.air.typeOf(inst); - if (!inst_ty.hasRuntimeBits()) + const inst_ty = self.typeOf(inst); + if (!inst_ty.hasRuntimeBits(mod)) return MCValue{ .none = {} }; - const inst_index = @intCast(Air.Inst.Index, ref_int - Air.Inst.Ref.typed_value_map.len); + const inst_index = Air.refToIndex(inst) orelse return self.genTypedValue(.{ + .ty = inst_ty, + .val = (try self.air.value(inst, mod)).?, + }); + switch (self.air.instructions.items(.tag)[inst_index]) { - .constant => { + .interned => { // Constants have static lifetimes, so they are always memoized in the outer most table. const branch = &self.branch_stack.items[0]; const gop = try branch.inst_table.getOrPut(self.gpa, inst_index); if (!gop.found_existing) { - const ty_pl = self.air.instructions.items(.data)[inst_index].ty_pl; + const interned = self.air.instructions.items(.data)[inst_index].interned; gop.value_ptr.* = try self.genTypedValue(.{ .ty = inst_ty, - .val = self.air.values[ty_pl.payload], + .val = interned.toValue(), }); } return gop.value_ptr.*; }, - .const_ty => unreachable, else => return self.getResolvedInstValue(inst_index), } } @@ -2616,12 +2620,11 @@ const CallMCValues = struct { /// Caller must call `CallMCValues.deinit`. fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { - const cc = fn_ty.fnCallingConvention(); - const param_types = try self.gpa.alloc(Type, fn_ty.fnParamLen()); - defer self.gpa.free(param_types); - fn_ty.fnParamTypes(param_types); + const mod = self.bin_file.options.module.?; + const fn_info = mod.typeToFunc(fn_ty).?; + const cc = fn_info.cc; var result: CallMCValues = .{ - .args = try self.gpa.alloc(MCValue, param_types.len), + .args = try self.gpa.alloc(MCValue, fn_info.param_types.len), // These undefined values must be populated before returning from this function. .return_value = undefined, .stack_byte_count = undefined, @@ -2629,7 +2632,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { }; errdefer self.gpa.free(result.args); - const ret_ty = fn_ty.fnReturnType(); + const ret_ty = fn_ty.fnReturnType(mod); switch (cc) { .Naked => { @@ -2649,8 +2652,8 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { var next_stack_offset: u32 = 0; const argument_registers = [_]Register{ .a0, .a1, .a2, .a3, .a4, .a5, .a6, .a7 }; - for (param_types, 0..) |ty, i| { - const param_size = @intCast(u32, ty.abiSize(self.target.*)); + for (fn_info.param_types, 0..) |ty, i| { + const param_size = @intCast(u32, ty.toType().abiSize(mod)); if (param_size <= 8) { if (next_register < argument_registers.len) { result.args[i] = .{ .register = argument_registers[next_register] }; @@ -2680,14 +2683,14 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type) !CallMCValues { else => return self.fail("TODO implement function parameters for {} on riscv64", .{cc}), } - if (ret_ty.zigTypeTag() == .NoReturn) { + if (ret_ty.zigTypeTag(mod) == .NoReturn) { result.return_value = .{ .unreach = {} }; - } else if (!ret_ty.hasRuntimeBits()) { + } else if (!ret_ty.hasRuntimeBits(mod)) { result.return_value = .{ .none = {} }; } else switch (cc) { .Naked => unreachable, .Unspecified, .C => { - const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + const ret_ty_size = @intCast(u32, ret_ty.abiSize(mod)); if (ret_ty_size <= 8) { result.return_value = .{ .register = .a0 }; } else if (ret_ty_size <= 16) { @@ -2731,3 +2734,13 @@ fn parseRegName(name: []const u8) ?Register { } return std.meta.stringToEnum(Register, name); } + +fn typeOf(self: *Self, inst: Air.Inst.Ref) Type { + const mod = self.bin_file.options.module.?; + return self.air.typeOf(inst, &mod.intern_pool); +} + +fn typeOfIndex(self: *Self, inst: Air.Inst.Index) Type { + const mod = self.bin_file.options.module.?; + return self.air.typeOfIndex(inst, &mod.intern_pool); +} diff --git a/src/arch/riscv64/abi.zig b/src/arch/riscv64/abi.zig index 26286a1e22..41a1850635 100644 --- a/src/arch/riscv64/abi.zig +++ b/src/arch/riscv64/abi.zig @@ -3,17 +3,19 @@ const bits = @import("bits.zig"); const Register = bits.Register; const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; const Type = @import("../../type.zig").Type; +const Module = @import("../../Module.zig"); pub const Class = enum { memory, byval, integer, double_integer }; -pub fn classifyType(ty: Type, target: std.Target) Class { - std.debug.assert(ty.hasRuntimeBitsIgnoreComptime()); +pub fn classifyType(ty: Type, mod: *Module) Class { + const target = mod.getTarget(); + std.debug.assert(ty.hasRuntimeBitsIgnoreComptime(mod)); - const max_byval_size = target.cpu.arch.ptrBitWidth() * 2; - switch (ty.zigTypeTag()) { + const max_byval_size = target.ptrBitWidth() * 2; + switch (ty.zigTypeTag(mod)) { .Struct => { - const bit_size = ty.bitSize(target); - if (ty.containerLayout() == .Packed) { + const bit_size = ty.bitSize(mod); + if (ty.containerLayout(mod) == .Packed) { if (bit_size > max_byval_size) return .memory; return .byval; } @@ -23,8 +25,8 @@ pub fn classifyType(ty: Type, target: std.Target) Class { return .integer; }, .Union => { - const bit_size = ty.bitSize(target); - if (ty.containerLayout() == .Packed) { + const bit_size = ty.bitSize(mod); + if (ty.containerLayout(mod) == .Packed) { if (bit_size > max_byval_size) return .memory; return .byval; } @@ -36,21 +38,21 @@ pub fn classifyType(ty: Type, target: std.Target) Class { .Bool => return .integer, .Float => return .byval, .Int, .Enum, .ErrorSet => { - const bit_size = ty.bitSize(target); + const bit_size = ty.bitSize(mod); if (bit_size > max_byval_size) return .memory; return .byval; }, .Vector => { - const bit_size = ty.bitSize(target); + const bit_size = ty.bitSize(mod); if (bit_size > max_byval_size) return .memory; return .integer; }, .Optional => { - std.debug.assert(ty.isPtrLikeOptional()); + std.debug.assert(ty.isPtrLikeOptional(mod)); return .byval; }, .Pointer => { - std.debug.assert(!ty.isSlice()); + std.debug.assert(!ty.isSlice(mod)); return .byval; }, .ErrorUnion, diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index 2686852bab..e339794fd4 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -22,6 +22,7 @@ const Type = @import("../../type.zig").Type; const CodeGenError = codegen.CodeGenError; const Result = @import("../../codegen.zig").Result; const DebugInfoOutput = @import("../../codegen.zig").DebugInfoOutput; +const Endian = std.builtin.Endian; const build_options = @import("build_options"); @@ -30,6 +31,7 @@ const abi = @import("abi.zig"); const errUnionPayloadOffset = codegen.errUnionPayloadOffset; const errUnionErrorOffset = codegen.errUnionErrorOffset; const Instruction = bits.Instruction; +const ASI = Instruction.ASI; const ShiftWidth = Instruction.ShiftWidth; const RegisterManager = abi.RegisterManager; const RegisterLock = RegisterManager.RegisterLock; @@ -141,6 +143,8 @@ const MCValue = union(enum) { /// The value is one of the stack variables. /// If the type is a pointer, it means the pointer address is in the stack at this offset. /// Note that this stores the plain value (i.e without the effects of the stack bias). + /// Always convert this value into machine offsets with realStackOffset() before + /// lowering into asm! stack_offset: u32, /// The value is a pointer to one of the stack variables (payload is stack offset). ptr_stack_offset: u32, @@ -256,7 +260,7 @@ const BigTomb = struct { pub fn generate( bin_file: *link.File, src_loc: Module.SrcLoc, - module_fn: *Module.Fn, + module_fn_index: Module.Fn.Index, air: Air, liveness: Liveness, code: *std.ArrayList(u8), @@ -267,12 +271,11 @@ pub fn generate( } const mod = bin_file.options.module.?; + const module_fn = mod.funcPtr(module_fn_index); const fn_owner_decl = mod.declPtr(module_fn.owner_decl); assert(fn_owner_decl.has_tv); const fn_type = fn_owner_decl.ty; - log.debug("fn {s}", .{fn_owner_decl.name}); - var branch_stack = std.ArrayList(Branch).init(bin_file.allocator); defer { assert(branch_stack.items.len == 1); @@ -359,7 +362,8 @@ pub fn generate( } fn gen(self: *Self) !void { - const cc = self.fn_type.fnCallingConvention(); + const mod = self.bin_file.options.module.?; + const cc = self.fn_type.fnCallingConvention(mod); if (cc != .Naked) { // TODO Finish function prologue and epilogue for sparc64. @@ -419,7 +423,7 @@ fn gen(self: *Self) !void { // Backpatch stack offset const total_stack_size = self.max_end_stack + abi.stack_reserved_area; - const stack_size = mem.alignForwardGeneric(u32, total_stack_size, self.stack_align); + const stack_size = mem.alignForward(u32, total_stack_size, self.stack_align); if (math.cast(i13, stack_size)) |size| { self.mir_instructions.set(save_inst, .{ .tag = .save, @@ -486,13 +490,14 @@ fn gen(self: *Self) !void { } fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { + const mod = self.bin_file.options.module.?; + const ip = &mod.intern_pool; const air_tags = self.air.instructions.items(.tag); for (body) |inst| { // TODO: remove now-redundant isUnused calls from AIR handler functions - if (self.liveness.isUnused(inst) and !self.air.mustLower(inst)) { + if (self.liveness.isUnused(inst) and !self.air.mustLower(inst, ip)) continue; - } const old_air_bookkeeping = self.air_bookkeeping; try self.ensureProcessDeathCapacity(Liveness.bpi); @@ -595,7 +600,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .ret_load => try self.airRetLoad(inst), .store => try self.airStore(inst, false), .store_safe => try self.airStore(inst, true), - .struct_field_ptr=> @panic("TODO try self.airStructFieldPtr(inst)"), + .struct_field_ptr=> try self.airStructFieldPtr(inst), .struct_field_val=> try self.airStructFieldVal(inst), .array_to_slice => try self.airArrayToSlice(inst), .int_to_float => try self.airIntToFloat(inst), @@ -613,7 +618,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .clz => try self.airClz(inst), .ctz => try self.airCtz(inst), .popcount => try self.airPopcount(inst), - .byte_swap => @panic("TODO try self.airByteSwap(inst)"), + .byte_swap => try self.airByteSwap(inst), .bit_reverse => try self.airBitReverse(inst), .tag_name => try self.airTagName(inst), .error_name => try self.airErrorName(inst), @@ -663,8 +668,8 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .slice_ptr => try self.airSlicePtr(inst), .slice_len => try self.airSliceLen(inst), - .ptr_slice_len_ptr => @panic("TODO try self.airPtrSliceLenPtr(inst)"), - .ptr_slice_ptr_ptr => @panic("TODO try self.airPtrSlicePtrPtr(inst)"), + .ptr_slice_len_ptr => try self.airPtrSliceLenPtr(inst), + .ptr_slice_ptr_ptr => try self.airPtrSlicePtrPtr(inst), .array_elem_val => try self.airArrayElemVal(inst), .slice_elem_val => try self.airSliceElemVal(inst), @@ -672,8 +677,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .ptr_elem_val => try self.airPtrElemVal(inst), .ptr_elem_ptr => try self.airPtrElemPtr(inst), - .constant => unreachable, // excluded from function bodies - .const_ty => unreachable, // excluded from function bodies + .inferred_alloc, .inferred_alloc_comptime, .interned => unreachable, .unreach => self.finishAirBookkeeping(), .optional_payload => try self.airOptionalPayload(inst), @@ -720,10 +724,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .error_set_has_value => @panic("TODO implement error_set_has_value"), .vector_store_elem => @panic("TODO implement vector_store_elem"), - .c_va_arg => @panic("TODO implement c_va_arg"), - .c_va_copy => @panic("TODO implement c_va_copy"), - .c_va_end => @panic("TODO implement c_va_end"), - .c_va_start => @panic("TODO implement c_va_start"), + .c_va_arg => return self.fail("TODO implement c_va_arg", .{}), + .c_va_copy => return self.fail("TODO implement c_va_copy", .{}), + .c_va_end => return self.fail("TODO implement c_va_end", .{}), + .c_va_start => return self.fail("TODO implement c_va_start", .{}), .wasm_memory_size => unreachable, .wasm_memory_grow => unreachable, @@ -754,18 +758,18 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const tag = self.air.instructions.items(.tag)[inst]; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; + const mod = self.bin_file.options.module.?; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs = try self.resolveInst(extra.lhs); const rhs = try self.resolveInst(extra.rhs); - const lhs_ty = self.air.typeOf(extra.lhs); - const rhs_ty = self.air.typeOf(extra.rhs); + const lhs_ty = self.typeOf(extra.lhs); + const rhs_ty = self.typeOf(extra.rhs); - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO implement add_with_overflow/sub_with_overflow for vectors", .{}), .Int => { - const mod = self.bin_file.options.module.?; assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); switch (int_info.bits) { 32, 64 => { // Only say yes if the operation is @@ -832,8 +836,9 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { - const vector_ty = self.air.typeOfIndex(inst); - const len = vector_ty.vectorLen(); + const mod = self.bin_file.options.module.?; + const vector_ty = self.typeOfIndex(inst); + const len = vector_ty.vectorLen(mod); const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const elements = @ptrCast([]const Air.Inst.Ref, self.air.extra[ty_pl.payload..][0..len]); const result: MCValue = res: { @@ -865,19 +870,20 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { } fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const ptr_ty = self.air.typeOf(ty_op.operand); + const ptr_ty = self.typeOf(ty_op.operand); const ptr = try self.resolveInst(ty_op.operand); - const array_ty = ptr_ty.childType(); - const array_len = @intCast(u32, array_ty.arrayLen()); + const array_ty = ptr_ty.childType(mod); + const array_len = @intCast(u32, array_ty.arrayLen(mod)); - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bits = self.target.ptrBitWidth(); const ptr_bytes = @divExact(ptr_bits, 8); const stack_offset = try self.allocMem(inst, ptr_bytes * 2, ptr_bytes * 2); try self.genSetStack(ptr_ty, stack_offset, ptr); - try self.genSetStack(Type.initTag(.usize), stack_offset - ptr_bytes, .{ .immediate = array_len }); + try self.genSetStack(Type.usize, stack_offset - ptr_bytes, .{ .immediate = array_len }); break :result MCValue{ .stack_offset = stack_offset }; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -931,7 +937,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { const arg_mcv = try self.resolveInst(input); try self.register_manager.getReg(reg, null); - try self.genSetReg(self.air.typeOf(input), reg, arg_mcv); + try self.genSetReg(self.typeOf(input), reg, arg_mcv); } { @@ -1004,17 +1010,17 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } fn airArg(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const arg_index = self.arg_index; self.arg_index += 1; - const ty = self.air.typeOfIndex(inst); + const ty = self.typeOfIndex(inst); const arg = self.args[arg_index]; const mcv = blk: { switch (arg) { .stack_offset => |off| { - const mod = self.bin_file.options.module.?; - const abi_size = math.cast(u32, ty.abiSize(self.target.*)) orelse { + const abi_size = math.cast(u32, ty.abiSize(mod)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{ty.fmt(mod)}); }; const offset = off + abi_size; @@ -1059,8 +1065,8 @@ fn airBinOp(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else @@ -1084,8 +1090,8 @@ fn airPtrArithmetic(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else @@ -1111,7 +1117,7 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); const dest = try self.allocRegOrMem(inst, true); - try self.setRegOrMem(self.air.typeOfIndex(inst), dest, operand); + try self.setRegOrMem(self.typeOfIndex(inst), dest, operand); break :result dest; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -1198,6 +1204,91 @@ fn airBreakpoint(self: *Self) !void { return self.finishAirBookkeeping(); } +fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + + // We have hardware byteswapper in SPARCv9, don't let mainstream compilers mislead you. + // That being said, the strategy to lower this is: + // - If src is an immediate, comptime-swap it. + // - If src is in memory then issue an LD*A with #ASI_P_[oppposite-endian] + // - If src is a register then issue an ST*A with #ASI_P_[oppposite-endian] + // to a stack slot, then follow with a normal load from said stack slot. + // This is because on some implementations, ASI-tagged memory operations are non-piplelinable + // and loads tend to have longer latency than stores, so the sequence will minimize stall. + // The result will always be either another immediate or stored in a register. + // TODO: Fold byteswap+store into a single ST*A and load+byteswap into a single LD*A. + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const operand = try self.resolveInst(ty_op.operand); + const operand_ty = self.typeOf(ty_op.operand); + switch (operand_ty.zigTypeTag(mod)) { + .Vector => return self.fail("TODO byteswap for vectors", .{}), + .Int => { + const int_info = operand_ty.intInfo(mod); + if (int_info.bits == 8) break :result operand; + + const abi_size = int_info.bits >> 3; + const abi_align = operand_ty.abiAlignment(mod); + const opposite_endian_asi = switch (self.target.cpu.arch.endian()) { + Endian.Big => ASI.asi_primary_little, + Endian.Little => ASI.asi_primary, + }; + + switch (operand) { + .immediate => |imm| { + const swapped = switch (int_info.bits) { + 16 => @byteSwap(@intCast(u16, imm)), + 24 => @byteSwap(@intCast(u24, imm)), + 32 => @byteSwap(@intCast(u32, imm)), + 40 => @byteSwap(@intCast(u40, imm)), + 48 => @byteSwap(@intCast(u48, imm)), + 56 => @byteSwap(@intCast(u56, imm)), + 64 => @byteSwap(@intCast(u64, imm)), + else => return self.fail("TODO synthesize SPARCv9 byteswap for other integer sizes", .{}), + }; + break :result .{ .immediate = swapped }; + }, + .register => |reg| { + if (int_info.bits > 64 or @popCount(int_info.bits) != 1) + return self.fail("TODO synthesize SPARCv9 byteswap for other integer sizes", .{}); + + const off = try self.allocMem(inst, abi_size, abi_align); + const off_reg = try self.copyToTmpRegister(operand_ty, .{ .immediate = realStackOffset(off) }); + + try self.genStoreASI(reg, .sp, off_reg, abi_size, opposite_endian_asi); + try self.genLoad(reg, .sp, Register, off_reg, abi_size); + break :result .{ .register = reg }; + }, + .memory => { + if (int_info.bits > 64 or @popCount(int_info.bits) != 1) + return self.fail("TODO synthesize SPARCv9 byteswap for other integer sizes", .{}); + + const addr_reg = try self.copyToTmpRegister(operand_ty, operand); + const dst_reg = try self.register_manager.allocReg(null, gp); + + try self.genLoadASI(dst_reg, addr_reg, .g0, abi_size, opposite_endian_asi); + break :result .{ .register = dst_reg }; + }, + .stack_offset => |off| { + if (int_info.bits > 64 or @popCount(int_info.bits) != 1) + return self.fail("TODO synthesize SPARCv9 byteswap for other integer sizes", .{}); + + const off_reg = try self.copyToTmpRegister(operand_ty, .{ .immediate = realStackOffset(off) }); + const dst_reg = try self.register_manager.allocReg(null, gp); + + try self.genLoadASI(dst_reg, .sp, off_reg, abi_size, opposite_endian_asi); + break :result .{ .register = dst_reg }; + }, + else => unreachable, + } + }, + else => unreachable, + } + }; + + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void { if (modifier == .always_tail) return self.fail("TODO implement tail calls for {}", .{self.target.cpu.arch}); @@ -1205,10 +1296,11 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const callee = pl_op.operand; const extra = self.air.extraData(Air.Call, pl_op.payload); const args = @ptrCast([]const Air.Inst.Ref, self.air.extra[extra.end .. extra.end + extra.data.args_len]); - const ty = self.air.typeOf(callee); - const fn_ty = switch (ty.zigTypeTag()) { + const ty = self.typeOf(callee); + const mod = self.bin_file.options.module.?; + const fn_ty = switch (ty.zigTypeTag(mod)) { .Fn => ty, - .Pointer => ty.childType(), + .Pointer => ty.childType(mod), else => unreachable, }; @@ -1228,7 +1320,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier for (info.args, 0..) |mc_arg, arg_i| { const arg = args[arg_i]; - const arg_ty = self.air.typeOf(arg); + const arg_ty = self.typeOf(arg); const arg_mcv = try self.resolveInst(arg); switch (mc_arg) { @@ -1249,10 +1341,9 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier // Due to incremental compilation, how function calls are generated depends // on linking. - if (self.air.value(callee)) |func_value| { + if (try self.air.value(callee, mod)) |func_value| { if (self.bin_file.tag == link.File.Elf.base_tag) { - if (func_value.castTag(.function)) |func_payload| { - const func = func_payload.data; + if (mod.funcPtrUnwrap(mod.intern_pool.indexToFunc(func_value.ip_index))) |func| { const got_addr = if (self.bin_file.cast(link.File.Elf)) |elf_file| blk: { const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); const atom = elf_file.getAtom(atom_index); @@ -1260,7 +1351,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier break :blk @intCast(u32, atom.getOffsetTableAddress(elf_file)); } else unreachable; - try self.genSetReg(Type.initTag(.usize), .o7, .{ .memory = got_addr }); + try self.genSetReg(Type.usize, .o7, .{ .memory = got_addr }); _ = try self.addInst(.{ .tag = .jmpl, @@ -1279,14 +1370,14 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier .tag = .nop, .data = .{ .nop = {} }, }); - } else if (func_value.castTag(.extern_fn)) |_| { + } else if (mod.intern_pool.indexToKey(func_value.ip_index) == .extern_func) { return self.fail("TODO implement calling extern functions", .{}); } else { return self.fail("TODO implement calling bitcasted functions", .{}); } } else @panic("TODO SPARCv9 currently does not support non-ELF binaries"); } else { - assert(ty.zigTypeTag() == .Pointer); + assert(ty.zigTypeTag(mod) == .Pointer); const mcv = try self.resolveInst(callee); try self.genSetReg(ty, .o7, mcv); @@ -1334,25 +1425,24 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; + const mod = self.bin_file.options.module.?; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const lhs_ty = self.air.typeOf(bin_op.lhs); + const lhs_ty = self.typeOf(bin_op.lhs); - var int_buffer: Type.Payload.Bits = undefined; - const int_ty = switch (lhs_ty.zigTypeTag()) { + const int_ty = switch (lhs_ty.zigTypeTag(mod)) { .Vector => unreachable, // Handled by cmp_vector. - .Enum => lhs_ty.intTagType(&int_buffer), + .Enum => lhs_ty.intTagType(mod), .Int => lhs_ty, - .Bool => Type.initTag(.u1), + .Bool => Type.u1, .Pointer => Type.usize, - .ErrorSet => Type.initTag(.u16), + .ErrorSet => Type.u16, .Optional => blk: { - var opt_buffer: Type.Payload.ElemType = undefined; - const payload_ty = lhs_ty.optionalChild(&opt_buffer); - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { - break :blk Type.initTag(.u1); - } else if (lhs_ty.isPtrLikeOptional()) { + const payload_ty = lhs_ty.optionalChild(mod); + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { + break :blk Type.u1; + } else if (lhs_ty.isPtrLikeOptional(mod)) { break :blk Type.usize; } else { return self.fail("TODO SPARCv9 cmp non-pointer optionals", .{}); @@ -1362,7 +1452,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { else => unreachable, }; - const int_info = int_ty.intInfo(self.target.*); + const int_info = int_ty.intInfo(mod); if (int_info.bits <= 64) { _ = try self.binOp(.cmp_eq, lhs, rhs, int_ty, int_ty, BinOpMetadata{ .lhs = bin_op.lhs, @@ -1424,8 +1514,8 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { // whether it needs to be spilled in the branches if (self.liveness.operandDies(inst, 0)) { const op_int = @enumToInt(pl_op.operand); - if (op_int >= Air.Inst.Ref.typed_value_map.len) { - const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len); + if (op_int >= Air.ref_start_index) { + const op_index = @intCast(Air.Inst.Index, op_int - Air.ref_start_index); self.processDeath(op_index); } } @@ -1515,7 +1605,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { log.debug("consolidating else_entry {d} {}=>{}", .{ else_key, else_value, canon_mcv }); // TODO make sure the destination stack offset / register does not already have something // going on there. - try self.setRegOrMem(self.air.typeOfIndex(else_key), canon_mcv, else_value); + try self.setRegOrMem(self.typeOfIndex(else_key), canon_mcv, else_value); // TODO track the new register / stack allocation } try parent_branch.inst_table.ensureUnusedCapacity(self.gpa, saved_then_branch.inst_table.count()); @@ -1542,7 +1632,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { log.debug("consolidating then_entry {d} {}=>{}", .{ then_key, parent_mcv, then_value }); // TODO make sure the destination stack offset / register does not already have something // going on there. - try self.setRegOrMem(self.air.typeOfIndex(then_key), parent_mcv, then_value); + try self.setRegOrMem(self.typeOfIndex(then_key), parent_mcv, then_value); // TODO track the new register / stack allocation } @@ -1568,8 +1658,9 @@ fn airDbgBlock(self: *Self, inst: Air.Inst.Index) !void { } fn airDbgInline(self: *Self, inst: Air.Inst.Index) !void { - const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; - const function = self.air.values[ty_pl.payload].castTag(.function).?.data; + const ty_fn = self.air.instructions.items(.data)[inst].ty_fn; + const mod = self.bin_file.options.module.?; + const function = mod.funcPtr(ty_fn.func); // TODO emit debug info for function change _ = function; return self.finishAir(inst, .dead, .{ .none, .none, .none }); @@ -1664,10 +1755,11 @@ fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ ty_op.operand, .none, .none }); - const operand_ty = self.air.typeOf(ty_op.operand); + const mod = self.bin_file.options.module.?; + const operand_ty = self.typeOf(ty_op.operand); const operand = try self.resolveInst(ty_op.operand); - const info_a = operand_ty.intInfo(self.target.*); - const info_b = self.air.typeOfIndex(inst).intInfo(self.target.*); + const info_a = operand_ty.intInfo(mod); + const info_b = self.typeOfIndex(inst).intInfo(mod); if (info_a.signedness != info_b.signedness) return self.fail("TODO gen intcast sign safety in semantic analysis", .{}); @@ -1689,7 +1781,7 @@ fn airIsErr(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand = try self.resolveInst(un_op); - const ty = self.air.typeOf(un_op); + const ty = self.typeOf(un_op); break :result try self.isErr(ty, operand); }; return self.finishAir(inst, result, .{ un_op, .none, .none }); @@ -1699,7 +1791,7 @@ fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand = try self.resolveInst(un_op); - const ty = self.air.typeOf(un_op); + const ty = self.typeOf(un_op); break :result try self.isNonErr(ty, operand); }; return self.finishAir(inst, result, .{ un_op, .none, .none }); @@ -1724,15 +1816,16 @@ fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void { } fn airLoad(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const elem_ty = self.air.typeOfIndex(inst); - const elem_size = elem_ty.abiSize(self.target.*); + const elem_ty = self.typeOfIndex(inst); + const elem_size = elem_ty.abiSize(mod); const result: MCValue = result: { - if (!elem_ty.hasRuntimeBits()) + if (!elem_ty.hasRuntimeBits(mod)) break :result MCValue.none; const ptr = try self.resolveInst(ty_op.operand); - const is_volatile = self.air.typeOf(ty_op.operand).isVolatilePtr(); + const is_volatile = self.typeOf(ty_op.operand).isVolatilePtr(mod); if (self.liveness.isUnused(inst) and !is_volatile) break :result MCValue.dead; @@ -1747,7 +1840,7 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { break :blk try self.allocRegOrMem(inst, true); } }; - try self.load(dst_mcv, ptr, self.air.typeOf(ty_op.operand)); + try self.load(dst_mcv, ptr, self.typeOf(ty_op.operand)); break :result dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -1790,8 +1883,8 @@ fn airMinMax(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result: MCValue = if (self.liveness.isUnused(inst)) .dead @@ -1805,8 +1898,8 @@ fn airMod(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); assert(lhs_ty.eql(rhs_ty, self.bin_file.options.module.?)); if (self.liveness.isUnused(inst)) @@ -1949,18 +2042,18 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { //const tag = self.air.instructions.items(.tag)[inst]; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; + const mod = self.bin_file.options.module.?; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs = try self.resolveInst(extra.lhs); const rhs = try self.resolveInst(extra.rhs); - const lhs_ty = self.air.typeOf(extra.lhs); - const rhs_ty = self.air.typeOf(extra.rhs); + const lhs_ty = self.typeOf(extra.lhs); + const rhs_ty = self.typeOf(extra.rhs); - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO implement mul_with_overflow for vectors", .{}), .Int => { - const mod = self.bin_file.options.module.?; assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); switch (int_info.bits) { 1...32 => { try self.spillConditionFlagsIfOccupied(); @@ -2013,9 +2106,10 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { fn airNot(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const mod = self.bin_file.options.module.?; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const operand = try self.resolveInst(ty_op.operand); - const operand_ty = self.air.typeOf(ty_op.operand); + const operand_ty = self.typeOf(ty_op.operand); switch (operand) { .dead => unreachable, .unreach => unreachable, @@ -2028,7 +2122,7 @@ fn airNot(self: *Self, inst: Air.Inst.Index) !void { }; }, else => { - switch (operand_ty.zigTypeTag()) { + switch (operand_ty.zigTypeTag(mod)) { .Bool => { const op_reg = switch (operand) { .register => |r| r, @@ -2062,7 +2156,7 @@ fn airNot(self: *Self, inst: Air.Inst.Index) !void { }, .Vector => return self.fail("TODO bitwise not for vectors", .{}), .Int => { - const int_info = operand_ty.intInfo(self.target.*); + const int_info = operand_ty.intInfo(mod); if (int_info.bits <= 64) { const op_reg = switch (operand) { .register => |r| r, @@ -2150,6 +2244,38 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); } +fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const ptr_bits = self.target.ptrBitWidth(); + const ptr_bytes = @divExact(ptr_bits, 8); + const mcv = try self.resolveInst(ty_op.operand); + switch (mcv) { + .dead, .unreach, .none => unreachable, + .ptr_stack_offset => |off| { + break :result MCValue{ .ptr_stack_offset = off - ptr_bytes }; + }, + else => return self.fail("TODO implement ptr_slice_len_ptr for {}", .{mcv}), + } + }; + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + +fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const mcv = try self.resolveInst(ty_op.operand); + switch (mcv) { + .dead, .unreach, .none => unreachable, + .ptr_stack_offset => |off| { + break :result MCValue{ .ptr_stack_offset = off }; + }, + else => return self.fail("TODO implement ptr_slice_len_ptr for {}", .{mcv}), + } + }; + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); +} + fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const result = try self.resolveInst(un_op); @@ -2160,8 +2286,8 @@ fn airRem(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); // TODO add safety check @@ -2212,16 +2338,17 @@ fn airShlSat(self: *Self, inst: Air.Inst.Index) !void { fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; + const mod = self.bin_file.options.module.?; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const lhs = try self.resolveInst(extra.lhs); const rhs = try self.resolveInst(extra.rhs); - const lhs_ty = self.air.typeOf(extra.lhs); - const rhs_ty = self.air.typeOf(extra.rhs); + const lhs_ty = self.typeOf(extra.lhs); + const rhs_ty = self.typeOf(extra.rhs); - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO implement mul_with_overflow for vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { try self.spillConditionFlagsIfOccupied(); @@ -2303,11 +2430,11 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void { const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const ptr = try self.resolveInst(bin_op.lhs); - const ptr_ty = self.air.typeOf(bin_op.lhs); + const ptr_ty = self.typeOf(bin_op.lhs); const len = try self.resolveInst(bin_op.rhs); - const len_ty = self.air.typeOf(bin_op.rhs); + const len_ty = self.typeOf(bin_op.rhs); - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bits = self.target.ptrBitWidth(); const ptr_bytes = @divExact(ptr_bits, 8); const stack_offset = try self.allocMem(inst, ptr_bytes * 2, ptr_bytes * 2); @@ -2319,6 +2446,7 @@ fn airSlice(self: *Self, inst: Air.Inst.Index) !void { } fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const is_volatile = false; // TODO const bin_op = self.air.instructions.items(.data)[inst].bin_op; @@ -2327,12 +2455,11 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { const slice_mcv = try self.resolveInst(bin_op.lhs); const index_mcv = try self.resolveInst(bin_op.rhs); - const slice_ty = self.air.typeOf(bin_op.lhs); - const elem_ty = slice_ty.childType(); - const elem_size = elem_ty.abiSize(self.target.*); + const slice_ty = self.typeOf(bin_op.lhs); + const elem_ty = slice_ty.childType(mod); + const elem_size = elem_ty.abiSize(mod); - var buf: Type.SlicePtrFieldTypeBuffer = undefined; - const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf); + const slice_ptr_field_type = slice_ty.slicePtrFieldType(mod); const index_lock: ?RegisterLock = if (index_mcv == .register) self.register_manager.lockRegAssumeUnused(index_mcv.register) @@ -2365,7 +2492,7 @@ fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); + const ptr_bits = self.target.ptrBitWidth(); const ptr_bytes = @divExact(ptr_bits, 8); const mcv = try self.resolveInst(ty_op.operand); switch (mcv) { @@ -2417,14 +2544,21 @@ fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const ptr = try self.resolveInst(bin_op.lhs); const value = try self.resolveInst(bin_op.rhs); - const ptr_ty = self.air.typeOf(bin_op.lhs); - const value_ty = self.air.typeOf(bin_op.rhs); + const ptr_ty = self.typeOf(bin_op.lhs); + const value_ty = self.typeOf(bin_op.rhs); try self.store(ptr, value, ptr_ty, value_ty); return self.finishAir(inst, .dead, .{ bin_op.lhs, bin_op.rhs, .none }); } +fn airStructFieldPtr(self: *Self, inst: Air.Inst.Index) !void { + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const extra = self.air.extraData(Air.StructField, ty_pl.payload).data; + const result = try self.structFieldPtr(inst, extra.struct_operand, extra.field_index); + return self.finishAir(inst, result, .{ extra.struct_operand, .none, .none }); +} + fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result = try self.structFieldPtr(inst, ty_op.operand, index); @@ -2437,9 +2571,10 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const operand = extra.struct_operand; const index = extra.field_index; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { + const mod = self.bin_file.options.module.?; const mcv = try self.resolveInst(operand); - const struct_ty = self.air.typeOf(operand); - const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, self.target.*)); + const struct_ty = self.typeOf(operand); + const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, mod)); switch (mcv) { .dead, .unreach => unreachable, @@ -2524,8 +2659,8 @@ fn airTagName(self: *Self, inst: Air.Inst.Index) !void { fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand = try self.resolveInst(ty_op.operand); - const operand_ty = self.air.typeOf(ty_op.operand); - const dest_ty = self.air.typeOfIndex(inst); + const operand_ty = self.typeOf(ty_op.operand); + const dest_ty = self.typeOfIndex(inst); const result: MCValue = if (self.liveness.isUnused(inst)) .dead else blk: { break :blk try self.trunc(inst, operand, operand_ty, dest_ty); @@ -2539,7 +2674,7 @@ fn airTry(self: *Self, inst: Air.Inst.Index) !void { const extra = self.air.extraData(Air.Try, pl_op.payload); const body = self.air.extra[extra.end..][0..extra.data.body_len]; const result: MCValue = result: { - const error_union_ty = self.air.typeOf(pl_op.operand); + const error_union_ty = self.typeOf(pl_op.operand); const error_union = try self.resolveInst(pl_op.operand); const is_err_result = try self.isErr(error_union_ty, error_union); const reloc = try self.condBr(is_err_result); @@ -2569,12 +2704,13 @@ fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void { } fn airUnwrapErrErr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const error_union_ty = self.air.typeOf(ty_op.operand); - const payload_ty = error_union_ty.errorUnionPayload(); + const error_union_ty = self.typeOf(ty_op.operand); + const payload_ty = error_union_ty.errorUnionPayload(mod); const mcv = try self.resolveInst(ty_op.operand); - if (!payload_ty.hasRuntimeBits()) break :result mcv; + if (!payload_ty.hasRuntimeBits(mod)) break :result mcv; return self.fail("TODO implement unwrap error union error for non-empty payloads", .{}); }; @@ -2582,11 +2718,12 @@ fn airUnwrapErrErr(self: *Self, inst: Air.Inst.Index) !void { } fn airUnwrapErrPayload(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const error_union_ty = self.air.typeOf(ty_op.operand); - const payload_ty = error_union_ty.errorUnionPayload(); - if (!payload_ty.hasRuntimeBits()) break :result MCValue.none; + const error_union_ty = self.typeOf(ty_op.operand); + const payload_ty = error_union_ty.errorUnionPayload(mod); + if (!payload_ty.hasRuntimeBits(mod)) break :result MCValue.none; return self.fail("TODO implement unwrap error union payload for non-empty payloads", .{}); }; @@ -2595,12 +2732,13 @@ fn airUnwrapErrPayload(self: *Self, inst: Air.Inst.Index) !void { /// E to E!T fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { const error_union_ty = self.air.getRefType(ty_op.ty); - const payload_ty = error_union_ty.errorUnionPayload(); + const payload_ty = error_union_ty.errorUnionPayload(mod); const mcv = try self.resolveInst(ty_op.operand); - if (!payload_ty.hasRuntimeBits()) break :result mcv; + if (!payload_ty.hasRuntimeBits(mod)) break :result mcv; return self.fail("TODO implement wrap errunion error for non-empty payloads", .{}); }; @@ -2615,12 +2753,13 @@ fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { } fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = if (self.liveness.isUnused(inst)) .dead else result: { - const optional_ty = self.air.typeOfIndex(inst); + const optional_ty = self.typeOfIndex(inst); // Optional with a zero-bit payload type is just a boolean true - if (optional_ty.abiSize(self.target.*) == 1) + if (optional_ty.abiSize(mod) == 1) break :result MCValue{ .immediate = 1 }; return self.fail("TODO implement wrap optional for {}", .{self.target.cpu.arch}); @@ -2642,7 +2781,7 @@ fn allocMem(self: *Self, inst: Air.Inst.Index, abi_size: u32, abi_align: u32) !u if (abi_align > self.stack_align) self.stack_align = abi_align; // TODO find a free slot instead of always appending - const offset = mem.alignForwardGeneric(u32, self.next_stack_offset, abi_align) + abi_size; + const offset = mem.alignForward(u32, self.next_stack_offset, abi_align) + abi_size; self.next_stack_offset = offset; if (self.next_stack_offset > self.max_end_stack) self.max_end_stack = self.next_stack_offset; @@ -2655,9 +2794,10 @@ fn allocMem(self: *Self, inst: Air.Inst.Index, abi_size: u32, abi_align: u32) !u /// Use a pointer instruction as the basis for allocating stack memory. fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !u32 { - const elem_ty = self.air.typeOfIndex(inst).elemType(); + const mod = self.bin_file.options.module.?; + const elem_ty = self.typeOfIndex(inst).childType(mod); - if (!elem_ty.hasRuntimeBits()) { + if (!elem_ty.hasRuntimeBits(mod)) { // As this stack item will never be dereferenced at runtime, // return the stack offset 0. Stack offset 0 will be where all // zero-sized stack allocations live as non-zero-sized @@ -2665,22 +2805,21 @@ fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !u32 { return @as(u32, 0); } - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { - const mod = self.bin_file.options.module.?; + const abi_size = math.cast(u32, elem_ty.abiSize(mod)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); }; // TODO swap this for inst.ty.ptrAlign - const abi_align = elem_ty.abiAlignment(self.target.*); + const abi_align = elem_ty.abiAlignment(mod); return self.allocMem(inst, abi_size, abi_align); } fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { - const elem_ty = self.air.typeOfIndex(inst); - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { - const mod = self.bin_file.options.module.?; + const mod = self.bin_file.options.module.?; + const elem_ty = self.typeOfIndex(inst); + const abi_size = math.cast(u32, elem_ty.abiSize(mod)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); }; - const abi_align = elem_ty.abiAlignment(self.target.*); + const abi_align = elem_ty.abiAlignment(mod); if (abi_align > self.stack_align) self.stack_align = abi_align; @@ -2733,12 +2872,12 @@ fn binOp( .xor, .cmp_eq, => { - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO binary operations on floats", .{}), .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { // Only say yes if the operation is // commutative, i.e. we can swap both of the @@ -2807,10 +2946,10 @@ fn binOp( const result = try self.binOp(base_tag, lhs, rhs, lhs_ty, rhs_ty, metadata); // Truncate if necessary - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { const result_reg = result.register; try self.truncRegister(result_reg, result_reg, int_info.signedness, int_info.bits); @@ -2824,11 +2963,11 @@ fn binOp( }, .div_trunc => { - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { assert(lhs_ty.eql(rhs_ty, mod)); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { const rhs_immediate_ok = switch (tag) { .div_trunc => rhs == .immediate and rhs.immediate <= std.math.maxInt(u12), @@ -2857,14 +2996,14 @@ fn binOp( }, .ptr_add => { - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Pointer => { const ptr_ty = lhs_ty; - const elem_ty = switch (ptr_ty.ptrSize()) { - .One => ptr_ty.childType().childType(), // ptr to array, so get array element type - else => ptr_ty.childType(), + const elem_ty = switch (ptr_ty.ptrSize(mod)) { + .One => ptr_ty.childType(mod).childType(mod), // ptr to array, so get array element type + else => ptr_ty.childType(mod), }; - const elem_size = elem_ty.abiSize(self.target.*); + const elem_size = elem_ty.abiSize(mod); if (elem_size == 1) { const base_tag: Mir.Inst.Tag = switch (tag) { @@ -2878,7 +3017,7 @@ fn binOp( // multiplying it with elem_size const offset = try self.binOp(.mul, rhs, .{ .immediate = elem_size }, Type.usize, Type.usize, null); - const addr = try self.binOp(tag, lhs, offset, Type.initTag(.manyptr_u8), Type.usize, null); + const addr = try self.binOp(tag, lhs, offset, Type.manyptr_u8, Type.usize, null); return addr; } }, @@ -2889,7 +3028,7 @@ fn binOp( .bool_and, .bool_or, => { - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Bool => { assert(lhs != .immediate); // should have been handled by Sema assert(rhs != .immediate); // should have been handled by Sema @@ -2919,10 +3058,10 @@ fn binOp( const result = try self.binOp(base_tag, lhs, rhs, lhs_ty, rhs_ty, metadata); // Truncate if necessary - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { // 32 and 64 bit operands doesn't need truncating if (int_info.bits == 32 or int_info.bits == 64) return result; @@ -2941,10 +3080,10 @@ fn binOp( .shl_exact, .shr_exact, => { - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO binary operations on vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { const rhs_immediate_ok = rhs == .immediate; @@ -3266,7 +3405,8 @@ fn binOpRegister( fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void { const block_data = self.blocks.getPtr(block).?; - if (self.air.typeOf(operand).hasRuntimeBits()) { + const mod = self.bin_file.options.module.?; + if (self.typeOf(operand).hasRuntimeBits(mod)) { const operand_mcv = try self.resolveInst(operand); const block_mcv = block_data.mcv; if (block_mcv == .none) { @@ -3275,13 +3415,13 @@ fn br(self: *Self, block: Air.Inst.Index, operand: Air.Inst.Ref) !void { .register, .stack_offset, .memory => operand_mcv, .immediate => blk: { const new_mcv = try self.allocRegOrMem(block, true); - try self.setRegOrMem(self.air.typeOfIndex(block), new_mcv, operand_mcv); + try self.setRegOrMem(self.typeOfIndex(block), new_mcv, operand_mcv); break :blk new_mcv; }, else => return self.fail("TODO implement block_data.mcv = operand_mcv for {}", .{operand_mcv}), }; } else { - try self.setRegOrMem(self.air.typeOfIndex(block), block_mcv, operand_mcv); + try self.setRegOrMem(self.typeOfIndex(block), block_mcv, operand_mcv); } } return self.brVoid(block); @@ -3385,16 +3525,17 @@ fn ensureProcessDeathCapacity(self: *Self, additional_count: usize) !void { /// Given an error union, returns the payload fn errUnionPayload(self: *Self, error_union_mcv: MCValue, error_union_ty: Type) !MCValue { - const err_ty = error_union_ty.errorUnionSet(); - const payload_ty = error_union_ty.errorUnionPayload(); - if (err_ty.errorSetIsEmpty()) { + const mod = self.bin_file.options.module.?; + const err_ty = error_union_ty.errorUnionSet(mod); + const payload_ty = error_union_ty.errorUnionPayload(mod); + if (err_ty.errorSetIsEmpty(mod)) { return error_union_mcv; } - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { return MCValue.none; } - const payload_offset = @intCast(u32, errUnionPayloadOffset(payload_ty, self.target.*)); + const payload_offset = @intCast(u32, errUnionPayloadOffset(payload_ty, mod)); switch (error_union_mcv) { .register => return self.fail("TODO errUnionPayload for registers", .{}), .stack_offset => |off| { @@ -3428,8 +3569,8 @@ fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Live tomb_bits >>= 1; if (!dies) continue; const op_int = @enumToInt(op); - if (op_int < Air.Inst.Ref.typed_value_map.len) continue; - const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len); + if (op_int < Air.ref_start_index) continue; + const op_index = @intCast(Air.Inst.Index, op_int - Air.ref_start_index); self.processDeath(op_index); } const is_used = @truncate(u1, tomb_bits) == 0; @@ -3574,7 +3715,36 @@ fn genLoad(self: *Self, value_reg: Register, addr_reg: Register, comptime off_ty } } +fn genLoadASI(self: *Self, value_reg: Register, addr_reg: Register, off_reg: Register, abi_size: u64, asi: ASI) !void { + switch (abi_size) { + 1, 2, 4, 8 => { + const tag: Mir.Inst.Tag = switch (abi_size) { + 1 => .lduba, + 2 => .lduha, + 4 => .lduwa, + 8 => .ldxa, + else => unreachable, // unexpected abi size + }; + + _ = try self.addInst(.{ + .tag = tag, + .data = .{ + .mem_asi = .{ + .rd = value_reg, + .rs1 = addr_reg, + .rs2 = off_reg, + .asi = asi, + }, + }, + }); + }, + 3, 5, 6, 7 => return self.fail("TODO: genLoad for more abi_sizes", .{}), + else => unreachable, + } +} + fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void { + const mod = self.bin_file.options.module.?; switch (mcv) { .dead => unreachable, .unreach, .none => return, // Nothing to do. @@ -3644,7 +3814,7 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void return self.genSetReg(ty, reg, .{ .immediate = 0xaaaaaaaaaaaaaaaa }); }, .ptr_stack_offset => |off| { - const real_offset = off + abi.stack_bias + abi.stack_reserved_area; + const real_offset = realStackOffset(off); const simm13 = math.cast(i13, real_offset) orelse return self.fail("TODO larger stack offsets: {}", .{real_offset}); @@ -3773,19 +3943,20 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void // The value is in memory at a hard-coded address. // If the type is a pointer, it means the pointer address is at this memory location. try self.genSetReg(ty, reg, .{ .immediate = addr }); - try self.genLoad(reg, reg, i13, 0, ty.abiSize(self.target.*)); + try self.genLoad(reg, reg, i13, 0, ty.abiSize(mod)); }, .stack_offset => |off| { - const real_offset = off + abi.stack_bias + abi.stack_reserved_area; + const real_offset = realStackOffset(off); const simm13 = math.cast(i13, real_offset) orelse return self.fail("TODO larger stack offsets: {}", .{real_offset}); - try self.genLoad(reg, .sp, i13, simm13, ty.abiSize(self.target.*)); + try self.genLoad(reg, .sp, i13, simm13, ty.abiSize(mod)); }, } } fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerError!void { - const abi_size = ty.abiSize(self.target.*); + const mod = self.bin_file.options.module.?; + const abi_size = ty.abiSize(mod); switch (mcv) { .dead => unreachable, .unreach, .none => return, // Nothing to do. @@ -3793,7 +3964,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro if (!self.wantSafety()) return; // The already existing value will do just fine. // TODO Upgrade this to a memset call when we have that available. - switch (ty.abiSize(self.target.*)) { + switch (ty.abiSize(mod)) { 1 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaa }), 2 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaa }), 4 => return self.genSetStack(ty, stack_offset, .{ .immediate = 0xaaaaaaaa }), @@ -3810,7 +3981,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); }, .register => |reg| { - const real_offset = stack_offset + abi.stack_bias + abi.stack_reserved_area; + const real_offset = realStackOffset(stack_offset); const simm13 = math.cast(i13, real_offset) orelse return self.fail("TODO larger stack offsets: {}", .{real_offset}); return self.genStore(reg, .sp, i13, simm13, abi_size); @@ -3819,11 +3990,11 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro const reg_lock = self.register_manager.lockReg(rwo.reg); defer if (reg_lock) |locked_reg| self.register_manager.unlockReg(locked_reg); - const wrapped_ty = ty.structFieldType(0); + const wrapped_ty = ty.structFieldType(0, mod); try self.genSetStack(wrapped_ty, stack_offset, .{ .register = rwo.reg }); - const overflow_bit_ty = ty.structFieldType(1); - const overflow_bit_offset = @intCast(u32, ty.structFieldOffset(1, self.target.*)); + const overflow_bit_ty = ty.structFieldType(1, mod); + const overflow_bit_offset = @intCast(u32, ty.structFieldOffset(1, mod)); const cond_reg = try self.register_manager.allocReg(null, gp); // TODO handle floating point CCRs @@ -3869,11 +4040,7 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: u32, mcv: MCValue) InnerErro const reg = try self.copyToTmpRegister(ty, mcv); return self.genSetStack(ty, stack_offset, MCValue{ .register = reg }); } else { - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); + const ptr_ty = try mod.singleMutPtrType(ty); const regs = try self.register_manager.allocRegs(4, .{ null, null, null, null }, gp); const regs_locks = self.register_manager.lockRegsAssumeUnused(4, regs); @@ -3933,6 +4100,34 @@ fn genStore(self: *Self, value_reg: Register, addr_reg: Register, comptime off_t } } +fn genStoreASI(self: *Self, value_reg: Register, addr_reg: Register, off_reg: Register, abi_size: u64, asi: ASI) !void { + switch (abi_size) { + 1, 2, 4, 8 => { + const tag: Mir.Inst.Tag = switch (abi_size) { + 1 => .stba, + 2 => .stha, + 4 => .stwa, + 8 => .stxa, + else => unreachable, // unexpected abi size + }; + + _ = try self.addInst(.{ + .tag = tag, + .data = .{ + .mem_asi = .{ + .rd = value_reg, + .rs1 = addr_reg, + .rs2 = off_reg, + .asi = asi, + }, + }, + }); + }, + 3, 5, 6, 7 => return self.fail("TODO: genLoad for more abi_sizes", .{}), + else => unreachable, + } +} + fn genTypedValue(self: *Self, typed_value: TypedValue) InnerError!MCValue { const mcv: MCValue = switch (try codegen.genTypedValue( self.bin_file, @@ -3969,13 +4164,14 @@ fn getResolvedInstValue(self: *Self, inst: Air.Inst.Index) MCValue { } fn isErr(self: *Self, ty: Type, operand: MCValue) !MCValue { - const error_type = ty.errorUnionSet(); - const payload_type = ty.errorUnionPayload(); + const mod = self.bin_file.options.module.?; + const error_type = ty.errorUnionSet(mod); + const payload_type = ty.errorUnionPayload(mod); - if (!error_type.hasRuntimeBits()) { + if (!error_type.hasRuntimeBits(mod)) { return MCValue{ .immediate = 0 }; // always false - } else if (!payload_type.hasRuntimeBits()) { - if (error_type.abiSize(self.target.*) <= 8) { + } else if (!payload_type.hasRuntimeBits(mod)) { + if (error_type.abiSize(mod) <= 8) { const reg_mcv: MCValue = switch (operand) { .register => operand, else => .{ .register = try self.copyToTmpRegister(error_type, operand) }, @@ -4066,8 +4262,9 @@ fn jump(self: *Self, inst: Mir.Inst.Index) !void { } fn load(self: *Self, dst_mcv: MCValue, ptr: MCValue, ptr_ty: Type) InnerError!void { - const elem_ty = ptr_ty.elemType(); - const elem_size = elem_ty.abiSize(self.target.*); + const mod = self.bin_file.options.module.?; + const elem_ty = ptr_ty.childType(mod); + const elem_size = elem_ty.abiSize(mod); switch (ptr) { .none => unreachable, @@ -4138,11 +4335,11 @@ fn minMax( ) InnerError!MCValue { const mod = self.bin_file.options.module.?; assert(lhs_ty.eql(rhs_ty, mod)); - switch (lhs_ty.zigTypeTag()) { + switch (lhs_ty.zigTypeTag(mod)) { .Float => return self.fail("TODO min/max on floats", .{}), .Vector => return self.fail("TODO min/max on vectors", .{}), .Int => { - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); if (int_info.bits <= 64) { // TODO skip register setting when one of the operands // is a small (fits in i13) immediate. @@ -4223,8 +4420,7 @@ fn performReloc(self: *Self, inst: Mir.Inst.Index) !void { /// Asserts there is already capacity to insert into top branch inst_table. fn processDeath(self: *Self, inst: Air.Inst.Index) void { - const air_tags = self.air.instructions.items(.tag); - if (air_tags[inst] == .constant) return; // Constants are immortal. + assert(self.air.instructions.items(.tag)[inst] != .interned); // When editing this function, note that the logic must synchronize with `reuseOperand`. const prev_value = self.getResolvedInstValue(inst); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; @@ -4245,14 +4441,24 @@ fn processDeath(self: *Self, inst: Air.Inst.Index) void { } } +/// Turns stack_offset MCV into a real SPARCv9 stack offset usable for asm. +fn realStackOffset(off: u32) u32 { + return off + // SPARCv9 %sp points away from the stack by some amount. + + abi.stack_bias + // The first couple bytes of each stack frame is reserved + // for ABI and hardware purposes. + + abi.stack_reserved_area; + // Only after that we have the usable stack frame portion. +} + /// Caller must call `CallMCValues.deinit`. fn resolveCallingConventionValues(self: *Self, fn_ty: Type, role: RegisterView) !CallMCValues { - const cc = fn_ty.fnCallingConvention(); - const param_types = try self.gpa.alloc(Type, fn_ty.fnParamLen()); - defer self.gpa.free(param_types); - fn_ty.fnParamTypes(param_types); + const mod = self.bin_file.options.module.?; + const fn_info = mod.typeToFunc(fn_ty).?; + const cc = fn_info.cc; var result: CallMCValues = .{ - .args = try self.gpa.alloc(MCValue, param_types.len), + .args = try self.gpa.alloc(MCValue, fn_info.param_types.len), // These undefined values must be populated before returning from this function. .return_value = undefined, .stack_byte_count = undefined, @@ -4260,7 +4466,7 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type, role: RegisterView) }; errdefer self.gpa.free(result.args); - const ret_ty = fn_ty.fnReturnType(); + const ret_ty = fn_ty.fnReturnType(mod); switch (cc) { .Naked => { @@ -4283,8 +4489,8 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type, role: RegisterView) .callee => abi.c_abi_int_param_regs_callee_view, }; - for (param_types, 0..) |ty, i| { - const param_size = @intCast(u32, ty.abiSize(self.target.*)); + for (fn_info.param_types, 0..) |ty, i| { + const param_size = @intCast(u32, ty.toType().abiSize(mod)); if (param_size <= 8) { if (next_register < argument_registers.len) { result.args[i] = .{ .register = argument_registers[next_register] }; @@ -4311,12 +4517,12 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type, role: RegisterView) result.stack_byte_count = next_stack_offset; result.stack_align = 16; - if (ret_ty.zigTypeTag() == .NoReturn) { + if (ret_ty.zigTypeTag(mod) == .NoReturn) { result.return_value = .{ .unreach = {} }; - } else if (!ret_ty.hasRuntimeBits()) { + } else if (!ret_ty.hasRuntimeBits(mod)) { result.return_value = .{ .none = {} }; } else { - const ret_ty_size = @intCast(u32, ret_ty.abiSize(self.target.*)); + const ret_ty_size = @intCast(u32, ret_ty.abiSize(mod)); // The callee puts the return values in %i0-%i3, which becomes %o0-%o3 inside the caller. if (ret_ty_size <= 8) { result.return_value = switch (role) { @@ -4334,44 +4540,41 @@ fn resolveCallingConventionValues(self: *Self, fn_ty: Type, role: RegisterView) return result; } -fn resolveInst(self: *Self, inst: Air.Inst.Ref) InnerError!MCValue { - // First section of indexes correspond to a set number of constant values. - const ref_int = @enumToInt(inst); - if (ref_int < Air.Inst.Ref.typed_value_map.len) { - const tv = Air.Inst.Ref.typed_value_map[ref_int]; - if (!tv.ty.hasRuntimeBitsIgnoreComptime() and !tv.ty.isError()) { - return MCValue{ .none = {} }; - } - return self.genTypedValue(tv); - } +fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { + const mod = self.bin_file.options.module.?; + const ty = self.typeOf(ref); // If the type has no codegen bits, no need to store it. - const inst_ty = self.air.typeOf(inst); - if (!inst_ty.hasRuntimeBitsIgnoreComptime() and !inst_ty.isError()) - return MCValue{ .none = {} }; - - const inst_index = @intCast(Air.Inst.Index, ref_int - Air.Inst.Ref.typed_value_map.len); - switch (self.air.instructions.items(.tag)[inst_index]) { - .constant => { - // Constants have static lifetimes, so they are always memoized in the outer most table. - const branch = &self.branch_stack.items[0]; - const gop = try branch.inst_table.getOrPut(self.gpa, inst_index); - if (!gop.found_existing) { - const ty_pl = self.air.instructions.items(.data)[inst_index].ty_pl; - gop.value_ptr.* = try self.genTypedValue(.{ - .ty = inst_ty, - .val = self.air.values[ty_pl.payload], - }); - } - return gop.value_ptr.*; - }, - .const_ty => unreachable, - else => return self.getResolvedInstValue(inst_index), + if (!ty.hasRuntimeBitsIgnoreComptime(mod)) return .none; + + if (Air.refToIndex(ref)) |inst| { + switch (self.air.instructions.items(.tag)[inst]) { + .interned => { + // Constants have static lifetimes, so they are always memoized in the outer most table. + const branch = &self.branch_stack.items[0]; + const gop = try branch.inst_table.getOrPut(self.gpa, inst); + if (!gop.found_existing) { + const interned = self.air.instructions.items(.data)[inst].interned; + gop.value_ptr.* = try self.genTypedValue(.{ + .ty = ty, + .val = interned.toValue(), + }); + } + return gop.value_ptr.*; + }, + else => return self.getResolvedInstValue(inst), + } } + + return self.genTypedValue(.{ + .ty = ty, + .val = (try self.air.value(ref, mod)).?, + }); } fn ret(self: *Self, mcv: MCValue) !void { - const ret_ty = self.fn_type.fnReturnType(); + const mod = self.bin_file.options.module.?; + const ret_ty = self.fn_type.fnReturnType(mod); try self.setRegOrMem(ret_ty, self.ret_mcv, mcv); // Just add space for a branch instruction, patch this later @@ -4444,7 +4647,7 @@ fn spillConditionFlagsIfOccupied(self: *Self) !void { else => unreachable, // mcv doesn't occupy the compare flags }; - try self.setRegOrMem(self.air.typeOfIndex(inst_to_save), new_mcv, mcv); + try self.setRegOrMem(self.typeOfIndex(inst_to_save), new_mcv, mcv); log.debug("spilling {d} to mcv {any}", .{ inst_to_save, new_mcv }); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; @@ -4468,11 +4671,12 @@ pub fn spillInstruction(self: *Self, reg: Register, inst: Air.Inst.Index) !void assert(reg == reg_mcv.register); const branch = &self.branch_stack.items[self.branch_stack.items.len - 1]; try branch.inst_table.put(self.gpa, inst, stack_mcv); - try self.genSetStack(self.air.typeOfIndex(inst), stack_mcv.stack_offset, reg_mcv); + try self.genSetStack(self.typeOfIndex(inst), stack_mcv.stack_offset, reg_mcv); } fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type) InnerError!void { - const abi_size = value_ty.abiSize(self.target.*); + const mod = self.bin_file.options.module.?; + const abi_size = value_ty.abiSize(mod); switch (ptr) { .none => unreachable, @@ -4513,10 +4717,11 @@ fn store(self: *Self, ptr: MCValue, value: MCValue, ptr_ty: Type, value_ty: Type fn structFieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue { return if (self.liveness.isUnused(inst)) .dead else result: { + const mod = self.bin_file.options.module.?; const mcv = try self.resolveInst(operand); - const ptr_ty = self.air.typeOf(operand); - const struct_ty = ptr_ty.childType(); - const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, self.target.*)); + const ptr_ty = self.typeOf(operand); + const struct_ty = ptr_ty.childType(mod); + const struct_field_offset = @intCast(u32, struct_ty.structFieldOffset(index, mod)); switch (mcv) { .ptr_stack_offset => |off| { break :result MCValue{ .ptr_stack_offset = off - struct_field_offset }; @@ -4554,8 +4759,9 @@ fn trunc( operand_ty: Type, dest_ty: Type, ) !MCValue { - const info_a = operand_ty.intInfo(self.target.*); - const info_b = dest_ty.intInfo(self.target.*); + const mod = self.bin_file.options.module.?; + const info_a = operand_ty.intInfo(mod); + const info_b = dest_ty.intInfo(mod); if (info_b.bits <= 64) { const operand_reg = switch (operand) { @@ -4672,3 +4878,13 @@ fn wantSafety(self: *Self) bool { .ReleaseSmall => false, }; } + +fn typeOf(self: *Self, inst: Air.Inst.Ref) Type { + const mod = self.bin_file.options.module.?; + return self.air.typeOf(inst, &mod.intern_pool); +} + +fn typeOfIndex(self: *Self, inst: Air.Inst.Index) Type { + const mod = self.bin_file.options.module.?; + return self.air.typeOfIndex(inst, &mod.intern_pool); +} diff --git a/src/arch/sparc64/Emit.zig b/src/arch/sparc64/Emit.zig index 7e71492af7..7d16105348 100644 --- a/src/arch/sparc64/Emit.zig +++ b/src/arch/sparc64/Emit.zig @@ -91,6 +91,11 @@ pub fn emitMir( .lduw => try emit.mirArithmetic3Op(inst), .ldx => try emit.mirArithmetic3Op(inst), + .lduba => try emit.mirMemASI(inst), + .lduha => try emit.mirMemASI(inst), + .lduwa => try emit.mirMemASI(inst), + .ldxa => try emit.mirMemASI(inst), + .@"and" => try emit.mirArithmetic3Op(inst), .@"or" => try emit.mirArithmetic3Op(inst), .xor => try emit.mirArithmetic3Op(inst), @@ -127,6 +132,11 @@ pub fn emitMir( .stw => try emit.mirArithmetic3Op(inst), .stx => try emit.mirArithmetic3Op(inst), + .stba => try emit.mirMemASI(inst), + .stha => try emit.mirMemASI(inst), + .stwa => try emit.mirMemASI(inst), + .stxa => try emit.mirMemASI(inst), + .sub => try emit.mirArithmetic3Op(inst), .subcc => try emit.mirArithmetic3Op(inst), @@ -368,6 +378,29 @@ fn mirConditionalMove(emit: *Emit, inst: Mir.Inst.Index) !void { } } +fn mirMemASI(emit: *Emit, inst: Mir.Inst.Index) !void { + const tag = emit.mir.instructions.items(.tag)[inst]; + const data = emit.mir.instructions.items(.data)[inst].mem_asi; + + const rd = data.rd; + const rs1 = data.rs1; + const rs2 = data.rs2; + const asi = data.asi; + + switch (tag) { + .lduba => try emit.writeInstruction(Instruction.lduba(rs1, rs2, asi, rd)), + .lduha => try emit.writeInstruction(Instruction.lduha(rs1, rs2, asi, rd)), + .lduwa => try emit.writeInstruction(Instruction.lduwa(rs1, rs2, asi, rd)), + .ldxa => try emit.writeInstruction(Instruction.ldxa(rs1, rs2, asi, rd)), + + .stba => try emit.writeInstruction(Instruction.stba(rs1, rs2, asi, rd)), + .stha => try emit.writeInstruction(Instruction.stha(rs1, rs2, asi, rd)), + .stwa => try emit.writeInstruction(Instruction.stwa(rs1, rs2, asi, rd)), + .stxa => try emit.writeInstruction(Instruction.stxa(rs1, rs2, asi, rd)), + else => unreachable, + } +} + fn mirMembar(emit: *Emit, inst: Mir.Inst.Index) !void { const tag = emit.mir.instructions.items(.tag)[inst]; const mask = emit.mir.instructions.items(.data)[inst].membar_mask; diff --git a/src/arch/sparc64/Mir.zig b/src/arch/sparc64/Mir.zig index f854152a2f..f9a4056705 100644 --- a/src/arch/sparc64/Mir.zig +++ b/src/arch/sparc64/Mir.zig @@ -15,6 +15,7 @@ const bits = @import("bits.zig"); const Air = @import("../../Air.zig"); const Instruction = bits.Instruction; +const ASI = bits.Instruction.ASI; const Register = bits.Register; instructions: std.MultiArrayList(Inst).Slice, @@ -70,6 +71,16 @@ pub const Inst = struct { lduw, ldx, + /// A.28 Load Integer from Alternate Space + /// This uses the mem_asi field. + /// Note that the ldda variant of this instruction is deprecated, so do not emit + /// it unless specifically requested (e.g. by inline assembly). + // TODO add other operations. + lduba, + lduha, + lduwa, + ldxa, + /// A.31 Logical Operations /// This uses the arithmetic_3op field. // TODO add other operations. @@ -132,6 +143,16 @@ pub const Inst = struct { stw, stx, + /// A.55 Store Integer into Alternate Space + /// This uses the mem_asi field. + /// Note that the stda variant of this instruction is deprecated, so do not emit + /// it unless specifically requested (e.g. by inline assembly). + // TODO add other operations. + stba, + stha, + stwa, + stxa, + /// A.56 Subtract /// This uses the arithmetic_3op field. // TODO add other operations. @@ -241,6 +262,15 @@ pub const Inst = struct { inst: Index, }, + /// ASI-tagged memory operations. + /// Used by e.g. ldxa, stxa + mem_asi: struct { + rd: Register, + rs1: Register, + rs2: Register = .g0, + asi: ASI, + }, + /// Membar mask, controls the barrier behavior /// Used by e.g. membar membar_mask: struct { diff --git a/src/arch/sparc64/bits.zig b/src/arch/sparc64/bits.zig index 0446a84d6d..7c943626f9 100644 --- a/src/arch/sparc64/bits.zig +++ b/src/arch/sparc64/bits.zig @@ -1229,6 +1229,22 @@ pub const Instruction = union(enum) { }; } + pub fn lduba(rs1: Register, rs2: Register, asi: ASI, rd: Register) Instruction { + return format3i(0b11, 0b01_0001, rs1, rs2, rd, asi); + } + + pub fn lduha(rs1: Register, rs2: Register, asi: ASI, rd: Register) Instruction { + return format3i(0b11, 0b01_0010, rs1, rs2, rd, asi); + } + + pub fn lduwa(rs1: Register, rs2: Register, asi: ASI, rd: Register) Instruction { + return format3i(0b11, 0b01_0000, rs1, rs2, rd, asi); + } + + pub fn ldxa(rs1: Register, rs2: Register, asi: ASI, rd: Register) Instruction { + return format3i(0b11, 0b01_1011, rs1, rs2, rd, asi); + } + pub fn @"and"(comptime s2: type, rs1: Register, rs2: s2, rd: Register) Instruction { return switch (s2) { Register => format3a(0b10, 0b00_0001, rs1, rs2, rd), @@ -1417,6 +1433,22 @@ pub const Instruction = union(enum) { }; } + pub fn stba(rs1: Register, rs2: Register, asi: ASI, rd: Register) Instruction { + return format3i(0b11, 0b01_0101, rs1, rs2, rd, asi); + } + + pub fn stha(rs1: Register, rs2: Register, asi: ASI, rd: Register) Instruction { + return format3i(0b11, 0b01_0110, rs1, rs2, rd, asi); + } + + pub fn stwa(rs1: Register, rs2: Register, asi: ASI, rd: Register) Instruction { + return format3i(0b11, 0b01_0100, rs1, rs2, rd, asi); + } + + pub fn stxa(rs1: Register, rs2: Register, asi: ASI, rd: Register) Instruction { + return format3i(0b11, 0b01_1110, rs1, rs2, rd, asi); + } + pub fn sub(comptime s2: type, rs1: Register, rs2: s2, rd: Register) Instruction { return switch (s2) { Register => format3a(0b10, 0b00_0100, rs1, rs2, rd), diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index 11969d567a..495ca7f6dd 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -11,6 +11,7 @@ const log = std.log.scoped(.codegen); const codegen = @import("../../codegen.zig"); const Module = @import("../../Module.zig"); +const InternPool = @import("../../InternPool.zig"); const Decl = Module.Decl; const Type = @import("../../type.zig").Type; const Value = @import("../../value.zig").Value; @@ -29,6 +30,9 @@ const errUnionErrorOffset = codegen.errUnionErrorOffset; /// Wasm Value, created when generating an instruction const WValue = union(enum) { + /// `WValue` which has been freed and may no longer hold + /// any references. + dead: void, /// May be referenced but is unused none: void, /// The value lives on top of the stack @@ -86,6 +90,7 @@ const WValue = union(enum) { fn offset(value: WValue) u32 { switch (value) { .stack_offset => |stack_offset| return stack_offset.value, + .dead => unreachable, else => return 0, } } @@ -123,7 +128,8 @@ const WValue = union(enum) { .f64 => gen.free_locals_f64.append(gen.gpa, local_value) catch return, .v128 => gen.free_locals_v128.append(gen.gpa, local_value) catch return, } - value.* = undefined; + log.debug("freed local ({d}) of type {}", .{ local_value, valtype }); + value.* = .dead; } }; @@ -759,8 +765,9 @@ pub fn deinit(func: *CodeGen) void { /// Sets `err_msg` on `CodeGen` and returns `error.CodegenFail` which is caught in link/Wasm.zig fn fail(func: *CodeGen, comptime fmt: []const u8, args: anytype) InnerError { + const mod = func.bin_file.base.options.module.?; const src = LazySrcLoc.nodeOffset(0); - const src_loc = src.toSrcLoc(func.decl); + const src_loc = src.toSrcLoc(func.decl, mod); func.err_msg = try Module.ErrorMsg.create(func.gpa, src_loc, fmt, args); return error.CodegenFail; } @@ -783,9 +790,10 @@ fn resolveInst(func: *CodeGen, ref: Air.Inst.Ref) InnerError!WValue { const gop = try func.branches.items[0].values.getOrPut(func.gpa, ref); assert(!gop.found_existing); - const val = func.air.value(ref).?; - const ty = func.air.typeOf(ref); - if (!ty.hasRuntimeBitsIgnoreComptime() and !ty.isInt() and !ty.isError()) { + const mod = func.bin_file.base.options.module.?; + const val = (try func.air.value(ref, mod)).?; + const ty = func.typeOf(ref); + if (!ty.hasRuntimeBitsIgnoreComptime(mod) and !ty.isInt(mod) and !ty.isError(mod)) { gop.value_ptr.* = WValue{ .none = {} }; return gop.value_ptr.*; } @@ -796,7 +804,7 @@ fn resolveInst(func: *CodeGen, ref: Air.Inst.Ref) InnerError!WValue { // // In the other cases, we will simply lower the constant to a value that fits // into a single local (such as a pointer, integer, bool, etc). - const result = if (isByRef(ty, func.target)) blk: { + const result = if (isByRef(ty, mod)) blk: { const sym_index = try func.bin_file.lowerUnnamedConst(.{ .ty = ty, .val = val }, func.decl_index); break :blk WValue{ .memory = sym_index }; } else try func.lowerConstant(val, ty); @@ -832,6 +840,7 @@ const Branch = struct { fn deinit(branch: *Branch, gpa: Allocator) void { branch.values.deinit(gpa); + branch.* = undefined; } }; @@ -874,13 +883,17 @@ fn iterateBigTomb(func: *CodeGen, inst: Air.Inst.Index, operand_count: usize) !B fn processDeath(func: *CodeGen, ref: Air.Inst.Ref) void { const inst = Air.refToIndex(ref) orelse return; - if (func.air.instructions.items(.tag)[inst] == .constant) return; + assert(func.air.instructions.items(.tag)[inst] != .interned); // Branches are currently only allowed to free locals allocated // within their own branch. // TODO: Upon branch consolidation free any locals if needed. const value = func.currentBranch().values.getPtr(ref) orelse return; if (value.* != .local) return; - log.debug("Decreasing reference for ref: %{?d}\n", .{Air.refToIndex(ref)}); + const reserved_indexes = func.args.len + @boolToInt(func.return_value != .none); + if (value.local.value < reserved_indexes) { + return; // function arguments can never be re-used + } + log.debug("Decreasing reference for ref: %{?d}, using local '{d}'", .{ Air.refToIndex(ref), value.local.value }); value.local.references -= 1; // if this panics, a call to `reuseOperand` was forgotten by the developer if (value.local.references == 0) { value.free(func); @@ -977,8 +990,9 @@ fn addExtraAssumeCapacity(func: *CodeGen, extra: anytype) error{OutOfMemory}!u32 } /// Using a given `Type`, returns the corresponding type -fn typeToValtype(ty: Type, target: std.Target) wasm.Valtype { - return switch (ty.zigTypeTag()) { +fn typeToValtype(ty: Type, mod: *Module) wasm.Valtype { + const target = mod.getTarget(); + return switch (ty.zigTypeTag(mod)) { .Float => blk: { const bits = ty.floatBits(target); if (bits == 16) return wasm.Valtype.i32; // stored/loaded as u16 @@ -988,44 +1002,52 @@ fn typeToValtype(ty: Type, target: std.Target) wasm.Valtype { return wasm.Valtype.i32; // represented as pointer to stack }, .Int, .Enum => blk: { - const info = ty.intInfo(target); + const info = ty.intInfo(mod); if (info.bits <= 32) break :blk wasm.Valtype.i32; if (info.bits > 32 and info.bits <= 128) break :blk wasm.Valtype.i64; break :blk wasm.Valtype.i32; // represented as pointer to stack }, - .Struct => switch (ty.containerLayout()) { + .Struct => switch (ty.containerLayout(mod)) { .Packed => { - const struct_obj = ty.castTag(.@"struct").?.data; - return typeToValtype(struct_obj.backing_int_ty, target); + const struct_obj = mod.typeToStruct(ty).?; + return typeToValtype(struct_obj.backing_int_ty, mod); }, else => wasm.Valtype.i32, }, - .Vector => switch (determineSimdStoreStrategy(ty, target)) { + .Vector => switch (determineSimdStoreStrategy(ty, mod)) { .direct => wasm.Valtype.v128, .unrolled => wasm.Valtype.i32, }, + .Union => switch (ty.containerLayout(mod)) { + .Packed => { + const int_ty = mod.intType(.unsigned, @intCast(u16, ty.bitSize(mod))) catch @panic("out of memory"); + return typeToValtype(int_ty, mod); + }, + else => wasm.Valtype.i32, + }, else => wasm.Valtype.i32, // all represented as reference/immediate }; } /// Using a given `Type`, returns the byte representation of its wasm value type -fn genValtype(ty: Type, target: std.Target) u8 { - return wasm.valtype(typeToValtype(ty, target)); +fn genValtype(ty: Type, mod: *Module) u8 { + return wasm.valtype(typeToValtype(ty, mod)); } /// Using a given `Type`, returns the corresponding wasm value type /// Differently from `genValtype` this also allows `void` to create a block /// with no return type -fn genBlockType(ty: Type, target: std.Target) u8 { - return switch (ty.tag()) { - .void, .noreturn => wasm.block_empty, - else => genValtype(ty, target), +fn genBlockType(ty: Type, mod: *Module) u8 { + return switch (ty.ip_index) { + .void_type, .noreturn_type => wasm.block_empty, + else => genValtype(ty, mod), }; } /// Writes the bytecode depending on the given `WValue` in `val` fn emitWValue(func: *CodeGen, value: WValue) InnerError!void { switch (value) { + .dead => unreachable, // reference to free'd `WValue` (missing reuseOperand?) .none, .stack => {}, // no-op .local => |idx| try func.addLabel(.local_get, idx.value), .imm32 => |val| try func.addImm32(@bitCast(i32, val)), @@ -1079,30 +1101,31 @@ fn getResolvedInst(func: *CodeGen, ref: Air.Inst.Ref) *WValue { /// Creates one locals for a given `Type`. /// Returns a corresponding `Wvalue` with `local` as active tag fn allocLocal(func: *CodeGen, ty: Type) InnerError!WValue { - const valtype = typeToValtype(ty, func.target); + const mod = func.bin_file.base.options.module.?; + const valtype = typeToValtype(ty, mod); switch (valtype) { .i32 => if (func.free_locals_i32.popOrNull()) |index| { - log.debug("reusing local ({d}) of type {}\n", .{ index, valtype }); + log.debug("reusing local ({d}) of type {}", .{ index, valtype }); return WValue{ .local = .{ .value = index, .references = 1 } }; }, .i64 => if (func.free_locals_i64.popOrNull()) |index| { - log.debug("reusing local ({d}) of type {}\n", .{ index, valtype }); + log.debug("reusing local ({d}) of type {}", .{ index, valtype }); return WValue{ .local = .{ .value = index, .references = 1 } }; }, .f32 => if (func.free_locals_f32.popOrNull()) |index| { - log.debug("reusing local ({d}) of type {}\n", .{ index, valtype }); + log.debug("reusing local ({d}) of type {}", .{ index, valtype }); return WValue{ .local = .{ .value = index, .references = 1 } }; }, .f64 => if (func.free_locals_f64.popOrNull()) |index| { - log.debug("reusing local ({d}) of type {}\n", .{ index, valtype }); + log.debug("reusing local ({d}) of type {}", .{ index, valtype }); return WValue{ .local = .{ .value = index, .references = 1 } }; }, .v128 => if (func.free_locals_v128.popOrNull()) |index| { - log.debug("reusing local ({d}) of type {}\n", .{ index, valtype }); + log.debug("reusing local ({d}) of type {}", .{ index, valtype }); return WValue{ .local = .{ .value = index, .references = 1 } }; }, } - log.debug("new local of type {}\n", .{valtype}); + log.debug("new local of type {}", .{valtype}); // no local was free to be re-used, so allocate a new local instead return func.ensureAllocLocal(ty); } @@ -1110,7 +1133,8 @@ fn allocLocal(func: *CodeGen, ty: Type) InnerError!WValue { /// Ensures a new local will be created. This is useful when it's useful /// to use a zero-initialized local. fn ensureAllocLocal(func: *CodeGen, ty: Type) InnerError!WValue { - try func.locals.append(func.gpa, genValtype(ty, func.target)); + const mod = func.bin_file.base.options.module.?; + try func.locals.append(func.gpa, genValtype(ty, mod)); const initial_index = func.local_index; func.local_index += 1; return WValue{ .local = .{ .value = initial_index, .references = 1 } }; @@ -1118,48 +1142,55 @@ fn ensureAllocLocal(func: *CodeGen, ty: Type) InnerError!WValue { /// Generates a `wasm.Type` from a given function type. /// Memory is owned by the caller. -fn genFunctype(gpa: Allocator, cc: std.builtin.CallingConvention, params: []const Type, return_type: Type, target: std.Target) !wasm.Type { +fn genFunctype( + gpa: Allocator, + cc: std.builtin.CallingConvention, + params: []const InternPool.Index, + return_type: Type, + mod: *Module, +) !wasm.Type { var temp_params = std.ArrayList(wasm.Valtype).init(gpa); defer temp_params.deinit(); var returns = std.ArrayList(wasm.Valtype).init(gpa); defer returns.deinit(); - if (firstParamSRet(cc, return_type, target)) { + if (firstParamSRet(cc, return_type, mod)) { try temp_params.append(.i32); // memory address is always a 32-bit handle - } else if (return_type.hasRuntimeBitsIgnoreComptime()) { + } else if (return_type.hasRuntimeBitsIgnoreComptime(mod)) { if (cc == .C) { - const res_classes = abi.classifyType(return_type, target); + const res_classes = abi.classifyType(return_type, mod); assert(res_classes[0] == .direct and res_classes[1] == .none); - const scalar_type = abi.scalarType(return_type, target); - try returns.append(typeToValtype(scalar_type, target)); + const scalar_type = abi.scalarType(return_type, mod); + try returns.append(typeToValtype(scalar_type, mod)); } else { - try returns.append(typeToValtype(return_type, target)); + try returns.append(typeToValtype(return_type, mod)); } - } else if (return_type.isError()) { + } else if (return_type.isError(mod)) { try returns.append(.i32); } // param types - for (params) |param_type| { - if (!param_type.hasRuntimeBitsIgnoreComptime()) continue; + for (params) |param_type_ip| { + const param_type = param_type_ip.toType(); + if (!param_type.hasRuntimeBitsIgnoreComptime(mod)) continue; switch (cc) { .C => { - const param_classes = abi.classifyType(param_type, target); + const param_classes = abi.classifyType(param_type, mod); for (param_classes) |class| { if (class == .none) continue; if (class == .direct) { - const scalar_type = abi.scalarType(param_type, target); - try temp_params.append(typeToValtype(scalar_type, target)); + const scalar_type = abi.scalarType(param_type, mod); + try temp_params.append(typeToValtype(scalar_type, mod)); } else { - try temp_params.append(typeToValtype(param_type, target)); + try temp_params.append(typeToValtype(param_type, mod)); } } }, - else => if (isByRef(param_type, target)) + else => if (isByRef(param_type, mod)) try temp_params.append(.i32) else - try temp_params.append(typeToValtype(param_type, target)), + try temp_params.append(typeToValtype(param_type, mod)), } } @@ -1172,20 +1203,22 @@ fn genFunctype(gpa: Allocator, cc: std.builtin.CallingConvention, params: []cons pub fn generate( bin_file: *link.File, src_loc: Module.SrcLoc, - func: *Module.Fn, + func_index: Module.Fn.Index, air: Air, liveness: Liveness, code: *std.ArrayList(u8), debug_output: codegen.DebugInfoOutput, ) codegen.CodeGenError!codegen.Result { _ = src_loc; + const mod = bin_file.options.module.?; + const func = mod.funcPtr(func_index); var code_gen: CodeGen = .{ .gpa = bin_file.allocator, .air = air, .liveness = liveness, .code = code, .decl_index = func.owner_decl, - .decl = bin_file.options.module.?.declPtr(func.owner_decl), + .decl = mod.declPtr(func.owner_decl), .err_msg = undefined, .locals = .{}, .target = bin_file.options.target, @@ -1204,8 +1237,9 @@ pub fn generate( } fn genFunc(func: *CodeGen) InnerError!void { - const fn_info = func.decl.ty.fnInfo(); - var func_type = try genFunctype(func.gpa, fn_info.cc, fn_info.param_types, fn_info.return_type, func.target); + const mod = func.bin_file.base.options.module.?; + const fn_info = mod.typeToFunc(func.decl.ty).?; + var func_type = try genFunctype(func.gpa, fn_info.cc, fn_info.param_types, fn_info.return_type.toType(), mod); defer func_type.deinit(func.gpa); _ = try func.bin_file.storeDeclType(func.decl_index, func_type); @@ -1222,6 +1256,7 @@ fn genFunc(func: *CodeGen) InnerError!void { defer { var outer_branch = func.branches.pop(); outer_branch.deinit(func.gpa); + assert(func.branches.items.len == 0); // missing branch merge } // Generate MIR for function body try func.genBody(func.air.getMainBody()); @@ -1230,8 +1265,8 @@ fn genFunc(func: *CodeGen) InnerError!void { // we emit an unreachable instruction to tell the stack validator that part will never be reached. if (func_type.returns.len != 0 and func.air.instructions.len > 0) { const inst = @intCast(u32, func.air.instructions.len - 1); - const last_inst_ty = func.air.typeOfIndex(inst); - if (!last_inst_ty.hasRuntimeBitsIgnoreComptime() or last_inst_ty.isNoReturn()) { + const last_inst_ty = func.typeOfIndex(inst); + if (!last_inst_ty.hasRuntimeBitsIgnoreComptime(mod) or last_inst_ty.isNoReturn(mod)) { try func.addTag(.@"unreachable"); } } @@ -1242,7 +1277,7 @@ fn genFunc(func: *CodeGen) InnerError!void { // check if we have to initialize and allocate anything into the stack frame. // If so, create enough stack space and insert the instructions at the front of the list. - if (func.stack_size > 0) { + if (func.initial_stack_value != .none) { var prologue = std.ArrayList(Mir.Inst).init(func.gpa); defer prologue.deinit(); @@ -1251,7 +1286,7 @@ fn genFunc(func: *CodeGen) InnerError!void { // store stack pointer so we can restore it when we return from the function try prologue.append(.{ .tag = .local_tee, .data = .{ .label = func.initial_stack_value.local.value } }); // get the total stack size - const aligned_stack = std.mem.alignForwardGeneric(u32, func.stack_size, func.stack_alignment); + const aligned_stack = std.mem.alignForward(u32, func.stack_size, func.stack_alignment); try prologue.append(.{ .tag = .i32_const, .data = .{ .imm32 = @intCast(i32, aligned_stack) } }); // substract it from the current stack pointer try prologue.append(.{ .tag = .i32_sub, .data = .{ .tag = {} } }); @@ -1312,10 +1347,9 @@ const CallWValues = struct { }; fn resolveCallingConventionValues(func: *CodeGen, fn_ty: Type) InnerError!CallWValues { - const cc = fn_ty.fnCallingConvention(); - const param_types = try func.gpa.alloc(Type, fn_ty.fnParamLen()); - defer func.gpa.free(param_types); - fn_ty.fnParamTypes(param_types); + const mod = func.bin_file.base.options.module.?; + const fn_info = mod.typeToFunc(fn_ty).?; + const cc = fn_info.cc; var result: CallWValues = .{ .args = &.{}, .return_value = .none, @@ -1327,8 +1361,7 @@ fn resolveCallingConventionValues(func: *CodeGen, fn_ty: Type) InnerError!CallWV // Check if we store the result as a pointer to the stack rather than // by value - const fn_info = fn_ty.fnInfo(); - if (firstParamSRet(fn_info.cc, fn_info.return_type, func.target)) { + if (firstParamSRet(fn_info.cc, fn_info.return_type.toType(), mod)) { // the sret arg will be passed as first argument, therefore we // set the `return_value` before allocating locals for regular args. result.return_value = .{ .local = .{ .value = func.local_index, .references = 1 } }; @@ -1337,8 +1370,8 @@ fn resolveCallingConventionValues(func: *CodeGen, fn_ty: Type) InnerError!CallWV switch (cc) { .Unspecified => { - for (param_types) |ty| { - if (!ty.hasRuntimeBitsIgnoreComptime()) { + for (fn_info.param_types) |ty| { + if (!ty.toType().hasRuntimeBitsIgnoreComptime(mod)) { continue; } @@ -1347,8 +1380,8 @@ fn resolveCallingConventionValues(func: *CodeGen, fn_ty: Type) InnerError!CallWV } }, .C => { - for (param_types) |ty| { - const ty_classes = abi.classifyType(ty, func.target); + for (fn_info.param_types) |ty| { + const ty_classes = abi.classifyType(ty.toType(), mod); for (ty_classes) |class| { if (class == .none) continue; try args.append(.{ .local = .{ .value = func.local_index, .references = 1 } }); @@ -1362,11 +1395,11 @@ fn resolveCallingConventionValues(func: *CodeGen, fn_ty: Type) InnerError!CallWV return result; } -fn firstParamSRet(cc: std.builtin.CallingConvention, return_type: Type, target: std.Target) bool { +fn firstParamSRet(cc: std.builtin.CallingConvention, return_type: Type, mod: *Module) bool { switch (cc) { - .Unspecified, .Inline => return isByRef(return_type, target), + .Unspecified, .Inline => return isByRef(return_type, mod), .C => { - const ty_classes = abi.classifyType(return_type, target); + const ty_classes = abi.classifyType(return_type, mod); if (ty_classes[0] == .indirect) return true; if (ty_classes[0] == .direct and ty_classes[1] == .direct) return true; return false; @@ -1382,16 +1415,17 @@ fn lowerArg(func: *CodeGen, cc: std.builtin.CallingConvention, ty: Type, value: return func.lowerToStack(value); } - const ty_classes = abi.classifyType(ty, func.target); + const mod = func.bin_file.base.options.module.?; + const ty_classes = abi.classifyType(ty, mod); assert(ty_classes[0] != .none); - switch (ty.zigTypeTag()) { + switch (ty.zigTypeTag(mod)) { .Struct, .Union => { if (ty_classes[0] == .indirect) { return func.lowerToStack(value); } assert(ty_classes[0] == .direct); - const scalar_type = abi.scalarType(ty, func.target); - const abi_size = scalar_type.abiSize(func.target); + const scalar_type = abi.scalarType(ty, mod); + const abi_size = scalar_type.abiSize(mod); try func.emitWValue(value); // When the value lives in the virtual stack, we must load it onto the actual stack @@ -1399,12 +1433,12 @@ fn lowerArg(func: *CodeGen, cc: std.builtin.CallingConvention, ty: Type, value: const opcode = buildOpcode(.{ .op = .load, .width = @intCast(u8, abi_size), - .signedness = if (scalar_type.isSignedInt()) .signed else .unsigned, - .valtype1 = typeToValtype(scalar_type, func.target), + .signedness = if (scalar_type.isSignedInt(mod)) .signed else .unsigned, + .valtype1 = typeToValtype(scalar_type, mod), }); try func.addMemArg(Mir.Inst.Tag.fromOpcode(opcode), .{ .offset = value.offset(), - .alignment = scalar_type.abiAlignment(func.target), + .alignment = scalar_type.abiAlignment(mod), }); } }, @@ -1413,7 +1447,7 @@ fn lowerArg(func: *CodeGen, cc: std.builtin.CallingConvention, ty: Type, value: return func.lowerToStack(value); } assert(ty_classes[0] == .direct and ty_classes[1] == .direct); - assert(ty.abiSize(func.target) == 16); + assert(ty.abiSize(mod) == 16); // in this case we have an integer or float that must be lowered as 2 i64's. try func.emitWValue(value); try func.addMemArg(.i64_load, .{ .offset = value.offset(), .alignment = 8 }); @@ -1480,24 +1514,24 @@ fn restoreStackPointer(func: *CodeGen) !void { /// /// Asserts Type has codegenbits fn allocStack(func: *CodeGen, ty: Type) !WValue { - assert(ty.hasRuntimeBitsIgnoreComptime()); + const mod = func.bin_file.base.options.module.?; + assert(ty.hasRuntimeBitsIgnoreComptime(mod)); if (func.initial_stack_value == .none) { try func.initializeStack(); } - const abi_size = std.math.cast(u32, ty.abiSize(func.target)) orelse { - const module = func.bin_file.base.options.module.?; + const abi_size = std.math.cast(u32, ty.abiSize(mod)) orelse { return func.fail("Type {} with ABI size of {d} exceeds stack frame size", .{ - ty.fmt(module), ty.abiSize(func.target), + ty.fmt(mod), ty.abiSize(mod), }); }; - const abi_align = ty.abiAlignment(func.target); + const abi_align = ty.abiAlignment(mod); if (abi_align > func.stack_alignment) { func.stack_alignment = abi_align; } - const offset = std.mem.alignForwardGeneric(u32, func.stack_size, abi_align); + const offset = std.mem.alignForward(u32, func.stack_size, abi_align); defer func.stack_size = offset + abi_size; return WValue{ .stack_offset = .{ .value = offset, .references = 1 } }; @@ -1508,29 +1542,29 @@ fn allocStack(func: *CodeGen, ty: Type) !WValue { /// This is different from allocStack where this will use the pointer's alignment /// if it is set, to ensure the stack alignment will be set correctly. fn allocStackPtr(func: *CodeGen, inst: Air.Inst.Index) !WValue { - const ptr_ty = func.air.typeOfIndex(inst); - const pointee_ty = ptr_ty.childType(); + const mod = func.bin_file.base.options.module.?; + const ptr_ty = func.typeOfIndex(inst); + const pointee_ty = ptr_ty.childType(mod); if (func.initial_stack_value == .none) { try func.initializeStack(); } - if (!pointee_ty.hasRuntimeBitsIgnoreComptime()) { + if (!pointee_ty.hasRuntimeBitsIgnoreComptime(mod)) { return func.allocStack(Type.usize); // create a value containing just the stack pointer. } - const abi_alignment = ptr_ty.ptrAlignment(func.target); - const abi_size = std.math.cast(u32, pointee_ty.abiSize(func.target)) orelse { - const module = func.bin_file.base.options.module.?; + const abi_alignment = ptr_ty.ptrAlignment(mod); + const abi_size = std.math.cast(u32, pointee_ty.abiSize(mod)) orelse { return func.fail("Type {} with ABI size of {d} exceeds stack frame size", .{ - pointee_ty.fmt(module), pointee_ty.abiSize(func.target), + pointee_ty.fmt(mod), pointee_ty.abiSize(mod), }); }; if (abi_alignment > func.stack_alignment) { func.stack_alignment = abi_alignment; } - const offset = std.mem.alignForwardGeneric(u32, func.stack_size, abi_alignment); + const offset = std.mem.alignForward(u32, func.stack_size, abi_alignment); defer func.stack_size = offset + abi_size; return WValue{ .stack_offset = .{ .value = offset, .references = 1 } }; @@ -1593,10 +1627,16 @@ fn memcpy(func: *CodeGen, dst: WValue, src: WValue, len: WValue) !void { else => {}, } - // TODO: We should probably lower this to a call to compiler_rt - // But for now, we implement it manually - var offset = try func.ensureAllocLocal(Type.usize); // local for counter + // allocate a local for the offset, and set it to 0. + // This to ensure that inside loops we correctly re-set the counter. + var offset = try func.allocLocal(Type.usize); // local for counter defer offset.free(func); + switch (func.arch()) { + .wasm32 => try func.addImm32(0), + .wasm64 => try func.addImm64(0), + else => unreachable, + } + try func.addLabel(.local_set, offset.local.value); // outer block to jump to when loop is done try func.startBlock(.block, wasm.block_empty); @@ -1666,7 +1706,7 @@ fn memcpy(func: *CodeGen, dst: WValue, src: WValue, len: WValue) !void { } fn ptrSize(func: *const CodeGen) u16 { - return @divExact(func.target.cpu.arch.ptrBitWidth(), 8); + return @divExact(func.target.ptrBitWidth(), 8); } fn arch(func: *const CodeGen) std.Target.Cpu.Arch { @@ -1675,8 +1715,9 @@ fn arch(func: *const CodeGen) std.Target.Cpu.Arch { /// For a given `Type`, will return true when the type will be passed /// by reference, rather than by value -fn isByRef(ty: Type, target: std.Target) bool { - switch (ty.zigTypeTag()) { +fn isByRef(ty: Type, mod: *Module) bool { + const target = mod.getTarget(); + switch (ty.zigTypeTag(mod)) { .Type, .ComptimeInt, .ComptimeFloat, @@ -1697,37 +1738,42 @@ fn isByRef(ty: Type, target: std.Target) bool { .Array, .Frame, - .Union, - => return ty.hasRuntimeBitsIgnoreComptime(), + => return ty.hasRuntimeBitsIgnoreComptime(mod), + .Union => { + if (mod.typeToUnion(ty)) |union_obj| { + if (union_obj.layout == .Packed) { + return ty.abiSize(mod) > 8; + } + } + return ty.hasRuntimeBitsIgnoreComptime(mod); + }, .Struct => { - if (ty.castTag(.@"struct")) |struct_ty| { - const struct_obj = struct_ty.data; + if (mod.typeToStruct(ty)) |struct_obj| { if (struct_obj.layout == .Packed and struct_obj.haveFieldTypes()) { - return isByRef(struct_obj.backing_int_ty, target); + return isByRef(struct_obj.backing_int_ty, mod); } } - return ty.hasRuntimeBitsIgnoreComptime(); + return ty.hasRuntimeBitsIgnoreComptime(mod); }, - .Vector => return determineSimdStoreStrategy(ty, target) == .unrolled, - .Int => return ty.intInfo(target).bits > 64, + .Vector => return determineSimdStoreStrategy(ty, mod) == .unrolled, + .Int => return ty.intInfo(mod).bits > 64, .Float => return ty.floatBits(target) > 64, .ErrorUnion => { - const pl_ty = ty.errorUnionPayload(); - if (!pl_ty.hasRuntimeBitsIgnoreComptime()) { + const pl_ty = ty.errorUnionPayload(mod); + if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) { return false; } return true; }, .Optional => { - if (ty.isPtrLikeOptional()) return false; - var buf: Type.Payload.ElemType = undefined; - const pl_type = ty.optionalChild(&buf); - if (pl_type.zigTypeTag() == .ErrorSet) return false; - return pl_type.hasRuntimeBitsIgnoreComptime(); + if (ty.isPtrLikeOptional(mod)) return false; + const pl_type = ty.optionalChild(mod); + if (pl_type.zigTypeTag(mod) == .ErrorSet) return false; + return pl_type.hasRuntimeBitsIgnoreComptime(mod); }, .Pointer => { // Slices act like struct and will be passed by reference - if (ty.isSlice()) return true; + if (ty.isSlice(mod)) return true; return false; }, } @@ -1742,10 +1788,11 @@ const SimdStoreStrategy = enum { /// This means when a given type is 128 bits and either the simd128 or relaxed-simd /// features are enabled, the function will return `.direct`. This would allow to store /// it using a instruction, rather than an unrolled version. -fn determineSimdStoreStrategy(ty: Type, target: std.Target) SimdStoreStrategy { - std.debug.assert(ty.zigTypeTag() == .Vector); - if (ty.bitSize(target) != 128) return .unrolled; +fn determineSimdStoreStrategy(ty: Type, mod: *Module) SimdStoreStrategy { + std.debug.assert(ty.zigTypeTag(mod) == .Vector); + if (ty.bitSize(mod) != 128) return .unrolled; const hasFeature = std.Target.wasm.featureSetHas; + const target = mod.getTarget(); const features = target.cpu.features; if (hasFeature(features, .relaxed_simd) or hasFeature(features, .simd128)) { return .direct; @@ -1785,8 +1832,7 @@ fn buildPointerOffset(func: *CodeGen, ptr_value: WValue, offset: u64, action: en fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const air_tags = func.air.instructions.items(.tag); return switch (air_tags[inst]) { - .constant => unreachable, - .const_ty => unreachable, + .inferred_alloc, .inferred_alloc_comptime, .interned => unreachable, .add => func.airBinOp(inst, .add), .add_sat => func.airSatBinOp(inst, .add), @@ -1796,10 +1842,8 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { .subwrap => func.airWrapBinOp(inst, .sub), .mul => func.airBinOp(inst, .mul), .mulwrap => func.airWrapBinOp(inst, .mul), - .div_float, - .div_exact, - .div_trunc, - => func.airDiv(inst), + .div_float, .div_exact => func.airDiv(inst), + .div_trunc => func.airDivTrunc(inst), .div_floor => func.airDivFloor(inst), .bit_and => func.airBinOp(inst, .@"and"), .bit_or => func.airBinOp(inst, .@"or"), @@ -1963,11 +2007,11 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { .tag_name => func.airTagName(inst), .error_set_has_value => func.airErrorSetHasValue(inst), + .frame_addr => func.airFrameAddress(inst), .mul_sat, .mod, .assembly, - .frame_addr, .bit_reverse, .is_err_ptr, .is_non_err_ptr, @@ -2028,8 +2072,11 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn genBody(func: *CodeGen, body: []const Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; + const ip = &mod.intern_pool; + for (body) |inst| { - if (func.liveness.isUnused(inst) and !func.air.mustLower(inst)) { + if (func.liveness.isUnused(inst) and !func.air.mustLower(inst, ip)) { continue; } const old_bookkeeping_value = func.air_bookkeeping; @@ -2046,36 +2093,37 @@ fn genBody(func: *CodeGen, body: []const Air.Inst.Index) InnerError!void { } fn airRet(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const un_op = func.air.instructions.items(.data)[inst].un_op; const operand = try func.resolveInst(un_op); - const fn_info = func.decl.ty.fnInfo(); - const ret_ty = fn_info.return_type; + const fn_info = mod.typeToFunc(func.decl.ty).?; + const ret_ty = fn_info.return_type.toType(); // result must be stored in the stack and we return a pointer // to the stack instead if (func.return_value != .none) { try func.store(func.return_value, operand, ret_ty, 0); - } else if (fn_info.cc == .C and ret_ty.hasRuntimeBitsIgnoreComptime()) { - switch (ret_ty.zigTypeTag()) { + } else if (fn_info.cc == .C and ret_ty.hasRuntimeBitsIgnoreComptime(mod)) { + switch (ret_ty.zigTypeTag(mod)) { // Aggregate types can be lowered as a singular value .Struct, .Union => { - const scalar_type = abi.scalarType(ret_ty, func.target); + const scalar_type = abi.scalarType(ret_ty, mod); try func.emitWValue(operand); const opcode = buildOpcode(.{ .op = .load, - .width = @intCast(u8, scalar_type.abiSize(func.target) * 8), - .signedness = if (scalar_type.isSignedInt()) .signed else .unsigned, - .valtype1 = typeToValtype(scalar_type, func.target), + .width = @intCast(u8, scalar_type.abiSize(mod) * 8), + .signedness = if (scalar_type.isSignedInt(mod)) .signed else .unsigned, + .valtype1 = typeToValtype(scalar_type, mod), }); try func.addMemArg(Mir.Inst.Tag.fromOpcode(opcode), .{ .offset = operand.offset(), - .alignment = scalar_type.abiAlignment(func.target), + .alignment = scalar_type.abiAlignment(mod), }); }, else => try func.emitWValue(operand), } } else { - if (!ret_ty.hasRuntimeBitsIgnoreComptime() and ret_ty.isError()) { + if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod) and ret_ty.isError(mod)) { try func.addImm32(0); } else { try func.emitWValue(operand); @@ -2088,15 +2136,16 @@ fn airRet(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airRetPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { - const child_type = func.air.typeOfIndex(inst).childType(); + const mod = func.bin_file.base.options.module.?; + const child_type = func.typeOfIndex(inst).childType(mod); var result = result: { - if (!child_type.isFnOrHasRuntimeBitsIgnoreComptime()) { + if (!child_type.isFnOrHasRuntimeBitsIgnoreComptime(mod)) { break :result try func.allocStack(Type.usize); // create pointer to void } - const fn_info = func.decl.ty.fnInfo(); - if (firstParamSRet(fn_info.cc, fn_info.return_type, func.target)) { + const fn_info = mod.typeToFunc(func.decl.ty).?; + if (firstParamSRet(fn_info.cc, fn_info.return_type.toType(), mod)) { break :result func.return_value; } @@ -2107,19 +2156,17 @@ fn airRetPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airRetLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const un_op = func.air.instructions.items(.data)[inst].un_op; const operand = try func.resolveInst(un_op); - const ret_ty = func.air.typeOf(un_op).childType(); - if (!ret_ty.hasRuntimeBitsIgnoreComptime()) { - if (ret_ty.isError()) { + const ret_ty = func.typeOf(un_op).childType(mod); + + const fn_info = mod.typeToFunc(func.decl.ty).?; + if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod)) { + if (ret_ty.isError(mod)) { try func.addImm32(0); - } else { - return func.finishAir(inst, .none, &.{}); } - } - - const fn_info = func.decl.ty.fnInfo(); - if (!firstParamSRet(fn_info.cc, fn_info.return_type, func.target)) { + } else if (!firstParamSRet(fn_info.cc, fn_info.return_type.toType(), mod)) { // leave on the stack _ = try func.load(operand, ret_ty, 0); } @@ -2134,42 +2181,48 @@ fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif const pl_op = func.air.instructions.items(.data)[inst].pl_op; const extra = func.air.extraData(Air.Call, pl_op.payload); const args = @ptrCast([]const Air.Inst.Ref, func.air.extra[extra.end..][0..extra.data.args_len]); - const ty = func.air.typeOf(pl_op.operand); + const ty = func.typeOf(pl_op.operand); - const fn_ty = switch (ty.zigTypeTag()) { + const mod = func.bin_file.base.options.module.?; + const fn_ty = switch (ty.zigTypeTag(mod)) { .Fn => ty, - .Pointer => ty.childType(), + .Pointer => ty.childType(mod), else => unreachable, }; - const ret_ty = fn_ty.fnReturnType(); - const fn_info = fn_ty.fnInfo(); - const first_param_sret = firstParamSRet(fn_info.cc, fn_info.return_type, func.target); + const ret_ty = fn_ty.fnReturnType(mod); + const fn_info = mod.typeToFunc(fn_ty).?; + const first_param_sret = firstParamSRet(fn_info.cc, fn_info.return_type.toType(), mod); const callee: ?Decl.Index = blk: { - const func_val = func.air.value(pl_op.operand) orelse break :blk null; - const module = func.bin_file.base.options.module.?; - - if (func_val.castTag(.function)) |function| { - _ = try func.bin_file.getOrCreateAtomForDecl(function.data.owner_decl); - break :blk function.data.owner_decl; - } else if (func_val.castTag(.extern_fn)) |extern_fn| { - const ext_decl = module.declPtr(extern_fn.data.owner_decl); - const ext_info = ext_decl.ty.fnInfo(); - var func_type = try genFunctype(func.gpa, ext_info.cc, ext_info.param_types, ext_info.return_type, func.target); + const func_val = (try func.air.value(pl_op.operand, mod)) orelse break :blk null; + + if (func_val.getFunction(mod)) |function| { + _ = try func.bin_file.getOrCreateAtomForDecl(function.owner_decl); + break :blk function.owner_decl; + } else if (func_val.getExternFunc(mod)) |extern_func| { + const ext_decl = mod.declPtr(extern_func.decl); + const ext_info = mod.typeToFunc(ext_decl.ty).?; + var func_type = try genFunctype(func.gpa, ext_info.cc, ext_info.param_types, ext_info.return_type.toType(), mod); defer func_type.deinit(func.gpa); - const atom_index = try func.bin_file.getOrCreateAtomForDecl(extern_fn.data.owner_decl); + const atom_index = try func.bin_file.getOrCreateAtomForDecl(extern_func.decl); const atom = func.bin_file.getAtomPtr(atom_index); - const type_index = try func.bin_file.storeDeclType(extern_fn.data.owner_decl, func_type); + const type_index = try func.bin_file.storeDeclType(extern_func.decl, func_type); try func.bin_file.addOrUpdateImport( - mem.sliceTo(ext_decl.name, 0), + mod.intern_pool.stringToSlice(ext_decl.name), atom.getSymbolIndex().?, - ext_decl.getExternFn().?.lib_name, + mod.intern_pool.stringToSliceUnwrap(ext_decl.getOwnedExternFunc(mod).?.lib_name), type_index, ); - break :blk extern_fn.data.owner_decl; - } else if (func_val.castTag(.decl_ref)) |decl_ref| { - _ = try func.bin_file.getOrCreateAtomForDecl(decl_ref.data); - break :blk decl_ref.data; + break :blk extern_func.decl; + } else switch (mod.intern_pool.indexToKey(func_val.ip_index)) { + .ptr => |ptr| switch (ptr.addr) { + .decl => |decl| { + _ = try func.bin_file.getOrCreateAtomForDecl(decl); + break :blk decl; + }, + else => {}, + }, + else => {}, } return func.fail("Expected a function, but instead found type '{}'", .{func_val.tag()}); }; @@ -2183,10 +2236,10 @@ fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif for (args) |arg| { const arg_val = try func.resolveInst(arg); - const arg_ty = func.air.typeOf(arg); - if (!arg_ty.hasRuntimeBitsIgnoreComptime()) continue; + const arg_ty = func.typeOf(arg); + if (!arg_ty.hasRuntimeBitsIgnoreComptime(mod)) continue; - try func.lowerArg(fn_ty.fnInfo().cc, arg_ty, arg_val); + try func.lowerArg(mod.typeToFunc(fn_ty).?.cc, arg_ty, arg_val); } if (callee) |direct| { @@ -2195,11 +2248,11 @@ fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif } else { // in this case we call a function pointer // so load its value onto the stack - std.debug.assert(ty.zigTypeTag() == .Pointer); + std.debug.assert(ty.zigTypeTag(mod) == .Pointer); const operand = try func.resolveInst(pl_op.operand); try func.emitWValue(operand); - var fn_type = try genFunctype(func.gpa, fn_info.cc, fn_info.param_types, fn_info.return_type, func.target); + var fn_type = try genFunctype(func.gpa, fn_info.cc, fn_info.param_types, fn_info.return_type.toType(), mod); defer fn_type.deinit(func.gpa); const fn_type_index = try func.bin_file.putOrGetFuncType(fn_type); @@ -2207,18 +2260,18 @@ fn airCall(func: *CodeGen, inst: Air.Inst.Index, modifier: std.builtin.CallModif } const result_value = result_value: { - if (!ret_ty.hasRuntimeBitsIgnoreComptime() and !ret_ty.isError()) { + if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod) and !ret_ty.isError(mod)) { break :result_value WValue{ .none = {} }; - } else if (ret_ty.isNoReturn()) { + } else if (ret_ty.isNoReturn(mod)) { try func.addTag(.@"unreachable"); break :result_value WValue{ .none = {} }; } else if (first_param_sret) { break :result_value sret; // TODO: Make this less fragile and optimize - } else if (fn_ty.fnInfo().cc == .C and ret_ty.zigTypeTag() == .Struct or ret_ty.zigTypeTag() == .Union) { + } else if (mod.typeToFunc(fn_ty).?.cc == .C and ret_ty.zigTypeTag(mod) == .Struct or ret_ty.zigTypeTag(mod) == .Union) { const result_local = try func.allocLocal(ret_ty); try func.addLabel(.local_set, result_local.local.value); - const scalar_type = abi.scalarType(ret_ty, func.target); + const scalar_type = abi.scalarType(ret_ty, mod); const result = try func.allocStack(scalar_type); try func.store(result, result_local, scalar_type, 0); break :result_value result; @@ -2241,6 +2294,7 @@ fn airAlloc(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airStore(func: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void { + const mod = func.bin_file.base.options.module.?; if (safety) { // TODO if the value is undef, write 0xaa bytes to dest } else { @@ -2250,26 +2304,22 @@ fn airStore(func: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void const lhs = try func.resolveInst(bin_op.lhs); const rhs = try func.resolveInst(bin_op.rhs); - const ptr_ty = func.air.typeOf(bin_op.lhs); - const ptr_info = ptr_ty.ptrInfo().data; - const ty = ptr_ty.childType(); + const ptr_ty = func.typeOf(bin_op.lhs); + const ptr_info = ptr_ty.ptrInfo(mod); + const ty = ptr_ty.childType(mod); if (ptr_info.host_size == 0) { try func.store(lhs, rhs, ty, 0); } else { // at this point we have a non-natural alignment, we must // load the value, and then shift+or the rhs into the result location. - var int_ty_payload: Type.Payload.Bits = .{ - .base = .{ .tag = .int_unsigned }, - .data = ptr_info.host_size * 8, - }; - const int_elem_ty = Type.initPayload(&int_ty_payload.base); + const int_elem_ty = try mod.intType(.unsigned, ptr_info.host_size * 8); - if (isByRef(int_elem_ty, func.target)) { + if (isByRef(int_elem_ty, mod)) { return func.fail("TODO: airStore for pointers to bitfields with backing type larger than 64bits", .{}); } - var mask = @intCast(u64, (@as(u65, 1) << @intCast(u7, ty.bitSize(func.target))) - 1); + var mask = @intCast(u64, (@as(u65, 1) << @intCast(u7, ty.bitSize(mod))) - 1); mask <<= @intCast(u6, ptr_info.bit_offset); mask ^= ~@as(u64, 0); const shift_val = if (ptr_info.host_size <= 4) @@ -2298,39 +2348,40 @@ fn airStore(func: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void fn store(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerError!void { assert(!(lhs != .stack and rhs == .stack)); - switch (ty.zigTypeTag()) { + const mod = func.bin_file.base.options.module.?; + const abi_size = ty.abiSize(mod); + switch (ty.zigTypeTag(mod)) { .ErrorUnion => { - const pl_ty = ty.errorUnionPayload(); - if (!pl_ty.hasRuntimeBitsIgnoreComptime()) { + const pl_ty = ty.errorUnionPayload(mod); + if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) { return func.store(lhs, rhs, Type.anyerror, 0); } - const len = @intCast(u32, ty.abiSize(func.target)); + const len = @intCast(u32, abi_size); return func.memcpy(lhs, rhs, .{ .imm32 = len }); }, .Optional => { - if (ty.isPtrLikeOptional()) { + if (ty.isPtrLikeOptional(mod)) { return func.store(lhs, rhs, Type.usize, 0); } - var buf: Type.Payload.ElemType = undefined; - const pl_ty = ty.optionalChild(&buf); - if (!pl_ty.hasRuntimeBitsIgnoreComptime()) { + const pl_ty = ty.optionalChild(mod); + if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) { return func.store(lhs, rhs, Type.u8, 0); } - if (pl_ty.zigTypeTag() == .ErrorSet) { + if (pl_ty.zigTypeTag(mod) == .ErrorSet) { return func.store(lhs, rhs, Type.anyerror, 0); } - const len = @intCast(u32, ty.abiSize(func.target)); + const len = @intCast(u32, abi_size); return func.memcpy(lhs, rhs, .{ .imm32 = len }); }, - .Struct, .Array, .Union => if (isByRef(ty, func.target)) { - const len = @intCast(u32, ty.abiSize(func.target)); + .Struct, .Array, .Union => if (isByRef(ty, mod)) { + const len = @intCast(u32, abi_size); return func.memcpy(lhs, rhs, .{ .imm32 = len }); }, - .Vector => switch (determineSimdStoreStrategy(ty, func.target)) { + .Vector => switch (determineSimdStoreStrategy(ty, mod)) { .unrolled => { - const len = @intCast(u32, ty.abiSize(func.target)); + const len = @intCast(u32, abi_size); return func.memcpy(lhs, rhs, .{ .imm32 = len }); }, .direct => { @@ -2342,13 +2393,13 @@ fn store(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerE try func.mir_extra.appendSlice(func.gpa, &[_]u32{ std.wasm.simdOpcode(.v128_store), offset + lhs.offset(), - ty.abiAlignment(func.target), + ty.abiAlignment(mod), }); return func.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); }, }, .Pointer => { - if (ty.isSlice()) { + if (ty.isSlice(mod)) { // store pointer first // lower it to the stack so we do not have to store rhs into a local first try func.emitWValue(lhs); @@ -2362,7 +2413,7 @@ fn store(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerE return; } }, - .Int => if (ty.intInfo(func.target).bits > 64) { + .Int, .Float => if (abi_size > 8 and abi_size <= 16) { try func.emitWValue(lhs); const lsb = try func.load(rhs, Type.u64, 0); try func.store(.{ .stack = {} }, lsb, Type.u64, 0 + lhs.offset()); @@ -2371,41 +2422,47 @@ fn store(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, offset: u32) InnerE const msb = try func.load(rhs, Type.u64, 8); try func.store(.{ .stack = {} }, msb, Type.u64, 8 + lhs.offset()); return; + } else if (abi_size > 16) { + try func.memcpy(lhs, rhs, .{ .imm32 = @intCast(u32, ty.abiSize(mod)) }); + }, + else => if (abi_size > 8) { + return func.fail("TODO: `store` for type `{}` with abisize `{d}`", .{ + ty.fmt(func.bin_file.base.options.module.?), + abi_size, + }); }, - else => {}, } try func.emitWValue(lhs); // In this case we're actually interested in storing the stack position // into lhs, so we calculate that and emit that instead try func.lowerToStack(rhs); - const valtype = typeToValtype(ty, func.target); - const abi_size = @intCast(u8, ty.abiSize(func.target)); - + const valtype = typeToValtype(ty, mod); const opcode = buildOpcode(.{ .valtype1 = valtype, - .width = abi_size * 8, + .width = @intCast(u8, abi_size * 8), .op = .store, }); // store rhs value at stack pointer's location in memory try func.addMemArg( Mir.Inst.Tag.fromOpcode(opcode), - .{ .offset = offset + lhs.offset(), .alignment = ty.abiAlignment(func.target) }, + .{ .offset = offset + lhs.offset(), .alignment = ty.abiAlignment(mod) }, ); } fn airLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); const ty = func.air.getRefType(ty_op.ty); - const ptr_ty = func.air.typeOf(ty_op.operand); - const ptr_info = ptr_ty.ptrInfo().data; + const ptr_ty = func.typeOf(ty_op.operand); + const ptr_info = ptr_ty.ptrInfo(mod); - if (!ty.hasRuntimeBitsIgnoreComptime()) return func.finishAir(inst, .none, &.{ty_op.operand}); + if (!ty.hasRuntimeBitsIgnoreComptime(mod)) return func.finishAir(inst, .none, &.{ty_op.operand}); const result = result: { - if (isByRef(ty, func.target)) { + if (isByRef(ty, mod)) { const new_local = try func.allocStack(ty); try func.store(new_local, operand, ty, 0); break :result new_local; @@ -2418,11 +2475,7 @@ fn airLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { // at this point we have a non-natural alignment, we must // shift the value to obtain the correct bit. - var int_ty_payload: Type.Payload.Bits = .{ - .base = .{ .tag = .int_unsigned }, - .data = ptr_info.host_size * 8, - }; - const int_elem_ty = Type.initPayload(&int_ty_payload.base); + const int_elem_ty = try mod.intType(.unsigned, ptr_info.host_size * 8); const shift_val = if (ptr_info.host_size <= 4) WValue{ .imm32 = ptr_info.bit_offset } else if (ptr_info.host_size <= 8) @@ -2442,25 +2495,26 @@ fn airLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { /// Loads an operand from the linear memory section. /// NOTE: Leaves the value on the stack. fn load(func: *CodeGen, operand: WValue, ty: Type, offset: u32) InnerError!WValue { + const mod = func.bin_file.base.options.module.?; // load local's value from memory by its stack position try func.emitWValue(operand); - if (ty.zigTypeTag() == .Vector) { + if (ty.zigTypeTag(mod) == .Vector) { // TODO: Add helper functions for simd opcodes const extra_index = @intCast(u32, func.mir_extra.items.len); // stores as := opcode, offset, alignment (opcode::memarg) try func.mir_extra.appendSlice(func.gpa, &[_]u32{ std.wasm.simdOpcode(.v128_load), offset + operand.offset(), - ty.abiAlignment(func.target), + ty.abiAlignment(mod), }); try func.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); return WValue{ .stack = {} }; } - const abi_size = @intCast(u8, ty.abiSize(func.target)); + const abi_size = @intCast(u8, ty.abiSize(mod)); const opcode = buildOpcode(.{ - .valtype1 = typeToValtype(ty, func.target), + .valtype1 = typeToValtype(ty, mod), .width = abi_size * 8, .op = .load, .signedness = .unsigned, @@ -2468,19 +2522,20 @@ fn load(func: *CodeGen, operand: WValue, ty: Type, offset: u32) InnerError!WValu try func.addMemArg( Mir.Inst.Tag.fromOpcode(opcode), - .{ .offset = offset + operand.offset(), .alignment = ty.abiAlignment(func.target) }, + .{ .offset = offset + operand.offset(), .alignment = ty.abiAlignment(mod) }, ); return WValue{ .stack = {} }; } fn airArg(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const arg_index = func.arg_index; const arg = func.args[arg_index]; - const cc = func.decl.ty.fnInfo().cc; - const arg_ty = func.air.typeOfIndex(inst); + const cc = mod.typeToFunc(func.decl.ty).?.cc; + const arg_ty = func.typeOfIndex(inst); if (cc == .C) { - const arg_classes = abi.classifyType(arg_ty, func.target); + const arg_classes = abi.classifyType(arg_ty, mod); for (arg_classes) |class| { if (class != .none) { func.arg_index += 1; @@ -2490,7 +2545,7 @@ fn airArg(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { // When we have an argument that's passed using more than a single parameter, // we combine them into a single stack value if (arg_classes[0] == .direct and arg_classes[1] == .direct) { - if (arg_ty.zigTypeTag() != .Int and arg_ty.zigTypeTag() != .Float) { + if (arg_ty.zigTypeTag(mod) != .Int and arg_ty.zigTypeTag(mod) != .Float) { return func.fail( "TODO: Implement C-ABI argument for type '{}'", .{arg_ty.fmt(func.bin_file.base.options.module.?)}, @@ -2520,18 +2575,44 @@ fn airArg(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airBinOp(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void { + const mod = func.bin_file.base.options.module.?; const bin_op = func.air.instructions.items(.data)[inst].bin_op; const lhs = try func.resolveInst(bin_op.lhs); const rhs = try func.resolveInst(bin_op.rhs); - const ty = func.air.typeOf(bin_op.lhs); + const lhs_ty = func.typeOf(bin_op.lhs); + const rhs_ty = func.typeOf(bin_op.rhs); + + // For certain operations, such as shifting, the types are different. + // When converting this to a WebAssembly type, they *must* match to perform + // an operation. For this reason we verify if the WebAssembly type is different, in which + // case we first coerce the operands to the same type before performing the operation. + // For big integers we can ignore this as we will call into compiler-rt which handles this. + const result = switch (op) { + .shr, .shl => res: { + const lhs_wasm_bits = toWasmBits(@intCast(u16, lhs_ty.bitSize(mod))) orelse { + return func.fail("TODO: implement '{s}' for types larger than 128 bits", .{@tagName(op)}); + }; + const rhs_wasm_bits = toWasmBits(@intCast(u16, rhs_ty.bitSize(mod))).?; + const new_rhs = if (lhs_wasm_bits != rhs_wasm_bits and lhs_wasm_bits != 128) blk: { + const tmp = try func.intcast(rhs, rhs_ty, lhs_ty); + break :blk try tmp.toLocal(func, lhs_ty); + } else rhs; + const stack_result = try func.binOp(lhs, new_rhs, lhs_ty, op); + break :res try stack_result.toLocal(func, lhs_ty); + }, + else => res: { + const stack_result = try func.binOp(lhs, rhs, lhs_ty, op); + break :res try stack_result.toLocal(func, lhs_ty); + }, + }; - const stack_value = try func.binOp(lhs, rhs, ty, op); - func.finishAir(inst, try stack_value.toLocal(func, ty), &.{ bin_op.lhs, bin_op.rhs }); + func.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); } /// Performs a binary operation on the given `WValue`'s /// NOTE: THis leaves the value on top of the stack. fn binOp(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue { + const mod = func.bin_file.base.options.module.?; assert(!(lhs != .stack and rhs == .stack)); if (ty.isAnyFloat()) { @@ -2539,8 +2620,8 @@ fn binOp(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError! return func.floatOp(float_op, ty, &.{ lhs, rhs }); } - if (isByRef(ty, func.target)) { - if (ty.zigTypeTag() == .Int) { + if (isByRef(ty, mod)) { + if (ty.zigTypeTag(mod) == .Int) { return func.binOpBigInt(lhs, rhs, ty, op); } else { return func.fail( @@ -2552,8 +2633,8 @@ fn binOp(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError! const opcode: wasm.Opcode = buildOpcode(.{ .op = op, - .valtype1 = typeToValtype(ty, func.target), - .signedness = if (ty.isSignedInt()) .signed else .unsigned, + .valtype1 = typeToValtype(ty, mod), + .signedness = if (ty.isSignedInt(mod)) .signed else .unsigned, }); try func.emitWValue(lhs); try func.emitWValue(rhs); @@ -2564,38 +2645,58 @@ fn binOp(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError! } fn binOpBigInt(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerError!WValue { - if (ty.intInfo(func.target).bits > 128) { - return func.fail("TODO: Implement binary operation for big integer", .{}); + const mod = func.bin_file.base.options.module.?; + if (ty.intInfo(mod).bits > 128) { + return func.fail("TODO: Implement binary operation for big integers larger than 128 bits", .{}); } - if (op != .add and op != .sub) { - return func.fail("TODO: Implement binary operation for big integers", .{}); - } - - const result = try func.allocStack(ty); - var lhs_high_bit = try (try func.load(lhs, Type.u64, 0)).toLocal(func, Type.u64); - defer lhs_high_bit.free(func); - var rhs_high_bit = try (try func.load(rhs, Type.u64, 0)).toLocal(func, Type.u64); - defer rhs_high_bit.free(func); - var high_op_res = try (try func.binOp(lhs_high_bit, rhs_high_bit, Type.u64, op)).toLocal(func, Type.u64); - defer high_op_res.free(func); - - const lhs_low_bit = try func.load(lhs, Type.u64, 8); - const rhs_low_bit = try func.load(rhs, Type.u64, 8); - const low_op_res = try func.binOp(lhs_low_bit, rhs_low_bit, Type.u64, op); + switch (op) { + .mul => return func.callIntrinsic("__multi3", &.{ ty.toIntern(), ty.toIntern() }, ty, &.{ lhs, rhs }), + .shr => return func.callIntrinsic("__lshrti3", &.{ ty.toIntern(), .i32_type }, ty, &.{ lhs, rhs }), + .shl => return func.callIntrinsic("__ashlti3", &.{ ty.toIntern(), .i32_type }, ty, &.{ lhs, rhs }), + .xor => { + const result = try func.allocStack(ty); + try func.emitWValue(result); + const lhs_high_bit = try func.load(lhs, Type.u64, 0); + const rhs_high_bit = try func.load(rhs, Type.u64, 0); + const xor_high_bit = try func.binOp(lhs_high_bit, rhs_high_bit, Type.u64, .xor); + try func.store(.stack, xor_high_bit, Type.u64, result.offset()); - const lt = if (op == .add) blk: { - break :blk try func.cmp(high_op_res, rhs_high_bit, Type.u64, .lt); - } else if (op == .sub) blk: { - break :blk try func.cmp(lhs_high_bit, rhs_high_bit, Type.u64, .lt); - } else unreachable; - const tmp = try func.intcast(lt, Type.u32, Type.u64); - var tmp_op = try (try func.binOp(low_op_res, tmp, Type.u64, op)).toLocal(func, Type.u64); - defer tmp_op.free(func); + try func.emitWValue(result); + const lhs_low_bit = try func.load(lhs, Type.u64, 8); + const rhs_low_bit = try func.load(rhs, Type.u64, 8); + const xor_low_bit = try func.binOp(lhs_low_bit, rhs_low_bit, Type.u64, .xor); + try func.store(.stack, xor_low_bit, Type.u64, result.offset() + 8); + return result; + }, + .add, .sub => { + const result = try func.allocStack(ty); + var lhs_high_bit = try (try func.load(lhs, Type.u64, 0)).toLocal(func, Type.u64); + defer lhs_high_bit.free(func); + var rhs_high_bit = try (try func.load(rhs, Type.u64, 0)).toLocal(func, Type.u64); + defer rhs_high_bit.free(func); + var high_op_res = try (try func.binOp(lhs_high_bit, rhs_high_bit, Type.u64, op)).toLocal(func, Type.u64); + defer high_op_res.free(func); - try func.store(result, high_op_res, Type.u64, 0); - try func.store(result, tmp_op, Type.u64, 8); - return result; + const lhs_low_bit = try func.load(lhs, Type.u64, 8); + const rhs_low_bit = try func.load(rhs, Type.u64, 8); + const low_op_res = try func.binOp(lhs_low_bit, rhs_low_bit, Type.u64, op); + + const lt = if (op == .add) blk: { + break :blk try func.cmp(high_op_res, rhs_high_bit, Type.u64, .lt); + } else if (op == .sub) blk: { + break :blk try func.cmp(lhs_high_bit, rhs_high_bit, Type.u64, .lt); + } else unreachable; + const tmp = try func.intcast(lt, Type.u32, Type.u64); + var tmp_op = try (try func.binOp(low_op_res, tmp, Type.u64, op)).toLocal(func, Type.u64); + defer tmp_op.free(func); + + try func.store(result, high_op_res, Type.u64, 0); + try func.store(result, tmp_op, Type.u64, 8); + return result; + }, + else => return func.fail("TODO: Implement binary operation for big integers: '{s}'", .{@tagName(op)}), + } } const FloatOp = enum { @@ -2676,14 +2777,15 @@ const FloatOp = enum { fn airUnaryFloatOp(func: *CodeGen, inst: Air.Inst.Index, op: FloatOp) InnerError!void { const un_op = func.air.instructions.items(.data)[inst].un_op; const operand = try func.resolveInst(un_op); - const ty = func.air.typeOf(un_op); + const ty = func.typeOf(un_op); const result = try (try func.floatOp(op, ty, &.{operand})).toLocal(func, ty); func.finishAir(inst, result, &.{un_op}); } fn floatOp(func: *CodeGen, float_op: FloatOp, ty: Type, args: []const WValue) InnerError!WValue { - if (ty.zigTypeTag() == .Vector) { + const mod = func.bin_file.base.options.module.?; + if (ty.zigTypeTag(mod) == .Vector) { return func.fail("TODO: Implement floatOps for vectors", .{}); } @@ -2693,7 +2795,7 @@ fn floatOp(func: *CodeGen, float_op: FloatOp, ty: Type, args: []const WValue) In for (args) |operand| { try func.emitWValue(operand); } - const opcode = buildOpcode(.{ .op = op, .valtype1 = typeToValtype(ty, func.target) }); + const opcode = buildOpcode(.{ .op = op, .valtype1 = typeToValtype(ty, mod) }); try func.addTag(Mir.Inst.Tag.fromOpcode(opcode)); return .stack; } @@ -2741,24 +2843,49 @@ fn floatOp(func: *CodeGen, float_op: FloatOp, ty: Type, args: []const WValue) In }; // fma requires three operands - var param_types_buffer: [3]Type = .{ ty, ty, ty }; + var param_types_buffer: [3]InternPool.Index = .{ ty.ip_index, ty.ip_index, ty.ip_index }; const param_types = param_types_buffer[0..args.len]; return func.callIntrinsic(fn_name, param_types, ty, args); } fn airWrapBinOp(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void { + const mod = func.bin_file.base.options.module.?; const bin_op = func.air.instructions.items(.data)[inst].bin_op; const lhs = try func.resolveInst(bin_op.lhs); const rhs = try func.resolveInst(bin_op.rhs); - const ty = func.air.typeOf(bin_op.lhs); + const lhs_ty = func.typeOf(bin_op.lhs); + const rhs_ty = func.typeOf(bin_op.rhs); - if (ty.zigTypeTag() == .Vector) { + if (lhs_ty.zigTypeTag(mod) == .Vector or rhs_ty.zigTypeTag(mod) == .Vector) { return func.fail("TODO: Implement wrapping arithmetic for vectors", .{}); } - const result = try (try func.wrapBinOp(lhs, rhs, ty, op)).toLocal(func, ty); - func.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); + // For certain operations, such as shifting, the types are different. + // When converting this to a WebAssembly type, they *must* match to perform + // an operation. For this reason we verify if the WebAssembly type is different, in which + // case we first coerce the operands to the same type before performing the operation. + // For big integers we can ignore this as we will call into compiler-rt which handles this. + const result = switch (op) { + .shr, .shl => res: { + const lhs_wasm_bits = toWasmBits(@intCast(u16, lhs_ty.bitSize(mod))) orelse { + return func.fail("TODO: implement '{s}' for types larger than 128 bits", .{@tagName(op)}); + }; + const rhs_wasm_bits = toWasmBits(@intCast(u16, rhs_ty.bitSize(mod))).?; + const new_rhs = if (lhs_wasm_bits != rhs_wasm_bits and lhs_wasm_bits != 128) blk: { + const tmp = try func.intcast(rhs, rhs_ty, lhs_ty); + break :blk try tmp.toLocal(func, lhs_ty); + } else rhs; + const stack_result = try func.wrapBinOp(lhs, new_rhs, lhs_ty, op); + break :res try stack_result.toLocal(func, lhs_ty); + }, + else => res: { + const stack_result = try func.wrapBinOp(lhs, rhs, lhs_ty, op); + break :res try stack_result.toLocal(func, lhs_ty); + }, + }; + + return func.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); } /// Performs a wrapping binary operation. @@ -2773,8 +2900,9 @@ fn wrapBinOp(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: Op) InnerEr /// Asserts `Type` is <= 128 bits. /// NOTE: When the Type is <= 64 bits, leaves the value on top of the stack. fn wrapOperand(func: *CodeGen, operand: WValue, ty: Type) InnerError!WValue { - assert(ty.abiSize(func.target) <= 16); - const bitsize = @intCast(u16, ty.bitSize(func.target)); + const mod = func.bin_file.base.options.module.?; + assert(ty.abiSize(mod) <= 16); + const bitsize = @intCast(u16, ty.bitSize(mod)); const wasm_bits = toWasmBits(bitsize) orelse { return func.fail("TODO: Implement wrapOperand for bitsize '{d}'", .{bitsize}); }; @@ -2810,44 +2938,48 @@ fn wrapOperand(func: *CodeGen, operand: WValue, ty: Type) InnerError!WValue { return WValue{ .stack = {} }; } -fn lowerParentPtr(func: *CodeGen, ptr_val: Value, ptr_child_ty: Type) InnerError!WValue { - switch (ptr_val.tag()) { - .decl_ref_mut => { - const decl_index = ptr_val.castTag(.decl_ref_mut).?.data.decl_index; - return func.lowerParentPtrDecl(ptr_val, decl_index); +fn lowerParentPtr(func: *CodeGen, ptr_val: Value, offset: u32) InnerError!WValue { + const mod = func.bin_file.base.options.module.?; + const ptr = mod.intern_pool.indexToKey(ptr_val.ip_index).ptr; + switch (ptr.addr) { + .decl => |decl_index| { + return func.lowerParentPtrDecl(ptr_val, decl_index, offset); }, - .decl_ref => { - const decl_index = ptr_val.castTag(.decl_ref).?.data; - return func.lowerParentPtrDecl(ptr_val, decl_index); + .mut_decl => |mut_decl| { + const decl_index = mut_decl.decl; + return func.lowerParentPtrDecl(ptr_val, decl_index, offset); }, - .variable => { - const decl_index = ptr_val.castTag(.variable).?.data.owner_decl; - return func.lowerParentPtrDecl(ptr_val, decl_index); + .eu_payload => |tag| return func.fail("TODO: Implement lowerParentPtr for {}", .{tag}), + .int => |base| return func.lowerConstant(base.toValue(), Type.usize), + .opt_payload => |base_ptr| return func.lowerParentPtr(base_ptr.toValue(), offset), + .comptime_field => unreachable, + .elem => |elem| { + const index = elem.index; + const elem_type = mod.intern_pool.typeOf(elem.base).toType().elemType2(mod); + const elem_offset = index * elem_type.abiSize(mod); + return func.lowerParentPtr(elem.base.toValue(), @intCast(u32, elem_offset + offset)); }, - .field_ptr => { - const field_ptr = ptr_val.castTag(.field_ptr).?.data; - const parent_ty = field_ptr.container_ty; - const parent_ptr = try func.lowerParentPtr(field_ptr.container_ptr, parent_ty); - - const offset = switch (parent_ty.zigTypeTag()) { - .Struct => switch (parent_ty.containerLayout()) { - .Packed => parent_ty.packedStructFieldByteOffset(field_ptr.field_index, func.target), - else => parent_ty.structFieldOffset(field_ptr.field_index, func.target), + .field => |field| { + const parent_ty = mod.intern_pool.typeOf(field.base).toType().childType(mod); + + const field_offset = switch (parent_ty.zigTypeTag(mod)) { + .Struct => switch (parent_ty.containerLayout(mod)) { + .Packed => parent_ty.packedStructFieldByteOffset(@intCast(usize, field.index), mod), + else => parent_ty.structFieldOffset(@intCast(usize, field.index), mod), }, - .Union => switch (parent_ty.containerLayout()) { + .Union => switch (parent_ty.containerLayout(mod)) { .Packed => 0, else => blk: { - const layout: Module.Union.Layout = parent_ty.unionGetLayout(func.target); + const layout: Module.Union.Layout = parent_ty.unionGetLayout(mod); if (layout.payload_size == 0) break :blk 0; if (layout.payload_align > layout.tag_align) break :blk 0; // tag is stored first so calculate offset from where payload starts - const offset = @intCast(u32, std.mem.alignForwardGeneric(u64, layout.tag_size, layout.tag_align)); - break :blk offset; + break :blk @intCast(u32, std.mem.alignForward(u64, layout.tag_size, layout.tag_align)); }, }, - .Pointer => switch (parent_ty.ptrSize()) { - .Slice => switch (field_ptr.field_index) { + .Pointer => switch (parent_ty.ptrSize(mod)) { + .Slice => switch (field.index) { 0 => 0, 1 => func.ptrSize(), else => unreachable, @@ -2856,74 +2988,52 @@ fn lowerParentPtr(func: *CodeGen, ptr_val: Value, ptr_child_ty: Type) InnerError }, else => unreachable, }; - - return switch (parent_ptr) { - .memory => |ptr| WValue{ - .memory_offset = .{ - .pointer = ptr, - .offset = @intCast(u32, offset), - }, - }, - .memory_offset => |mem_off| WValue{ - .memory_offset = .{ - .pointer = mem_off.pointer, - .offset = @intCast(u32, offset) + mem_off.offset, - }, - }, - else => unreachable, - }; - }, - .elem_ptr => { - const elem_ptr = ptr_val.castTag(.elem_ptr).?.data; - const index = elem_ptr.index; - const offset = index * ptr_child_ty.abiSize(func.target); - const array_ptr = try func.lowerParentPtr(elem_ptr.array_ptr, elem_ptr.elem_ty); - - return WValue{ .memory_offset = .{ - .pointer = array_ptr.memory, - .offset = @intCast(u32, offset), - } }; + return func.lowerParentPtr(field.base.toValue(), @intCast(u32, offset + field_offset)); }, - .opt_payload_ptr => { - const payload_ptr = ptr_val.castTag(.opt_payload_ptr).?.data; - return func.lowerParentPtr(payload_ptr.container_ptr, payload_ptr.container_ty); - }, - else => |tag| return func.fail("TODO: Implement lowerParentPtr for tag: {}", .{tag}), } } -fn lowerParentPtrDecl(func: *CodeGen, ptr_val: Value, decl_index: Module.Decl.Index) InnerError!WValue { - const module = func.bin_file.base.options.module.?; - const decl = module.declPtr(decl_index); - module.markDeclAlive(decl); - var ptr_ty_payload: Type.Payload.ElemType = .{ - .base = .{ .tag = .single_mut_pointer }, - .data = decl.ty, - }; - const ptr_ty = Type.initPayload(&ptr_ty_payload.base); - return func.lowerDeclRefValue(.{ .ty = ptr_ty, .val = ptr_val }, decl_index); +fn lowerParentPtrDecl(func: *CodeGen, ptr_val: Value, decl_index: Module.Decl.Index, offset: u32) InnerError!WValue { + const mod = func.bin_file.base.options.module.?; + const decl = mod.declPtr(decl_index); + try mod.markDeclAlive(decl); + const ptr_ty = try mod.singleMutPtrType(decl.ty); + return func.lowerDeclRefValue(.{ .ty = ptr_ty, .val = ptr_val }, decl_index, offset); } -fn lowerDeclRefValue(func: *CodeGen, tv: TypedValue, decl_index: Module.Decl.Index) InnerError!WValue { - if (tv.ty.isSlice()) { +fn lowerDeclRefValue(func: *CodeGen, tv: TypedValue, decl_index: Module.Decl.Index, offset: u32) InnerError!WValue { + const mod = func.bin_file.base.options.module.?; + if (tv.ty.isSlice(mod)) { return WValue{ .memory = try func.bin_file.lowerUnnamedConst(tv, decl_index) }; } - const module = func.bin_file.base.options.module.?; - const decl = module.declPtr(decl_index); - if (decl.ty.zigTypeTag() != .Fn and !decl.ty.hasRuntimeBitsIgnoreComptime()) { + const decl = mod.declPtr(decl_index); + // check if decl is an alias to a function, in which case we + // want to lower the actual decl, rather than the alias itself. + if (decl.val.getFunction(mod)) |func_val| { + if (func_val.owner_decl != decl_index) { + return func.lowerDeclRefValue(tv, func_val.owner_decl, offset); + } + } else if (decl.val.getExternFunc(mod)) |func_val| { + if (func_val.decl != decl_index) { + return func.lowerDeclRefValue(tv, func_val.decl, offset); + } + } + if (decl.ty.zigTypeTag(mod) != .Fn and !decl.ty.hasRuntimeBitsIgnoreComptime(mod)) { return WValue{ .imm32 = 0xaaaaaaaa }; } - module.markDeclAlive(decl); + try mod.markDeclAlive(decl); const atom_index = try func.bin_file.getOrCreateAtomForDecl(decl_index); const atom = func.bin_file.getAtom(atom_index); const target_sym_index = atom.sym_index; - if (decl.ty.zigTypeTag() == .Fn) { + if (decl.ty.zigTypeTag(mod) == .Fn) { try func.bin_file.addTableFunction(target_sym_index); return WValue{ .function_index = target_sym_index }; - } else return WValue{ .memory = target_sym_index }; + } else if (offset == 0) { + return WValue{ .memory = target_sym_index }; + } else return WValue{ .memory_offset = .{ .pointer = target_sym_index, .offset = offset } }; } /// Converts a signed integer to its 2's complement form and returns @@ -2943,131 +3053,201 @@ fn toTwosComplement(value: anytype, bits: u7) std.meta.Int(.unsigned, @typeInfo( } fn lowerConstant(func: *CodeGen, arg_val: Value, ty: Type) InnerError!WValue { + const mod = func.bin_file.base.options.module.?; var val = arg_val; - if (val.castTag(.runtime_value)) |rt| { - val = rt.data; - } - if (val.isUndefDeep()) return func.emitUndefined(ty); - if (val.castTag(.decl_ref)) |decl_ref| { - const decl_index = decl_ref.data; - return func.lowerDeclRefValue(.{ .ty = ty, .val = val }, decl_index); - } - if (val.castTag(.decl_ref_mut)) |decl_ref_mut| { - const decl_index = decl_ref_mut.data.decl_index; - return func.lowerDeclRefValue(.{ .ty = ty, .val = val }, decl_index); - } - const target = func.target; - switch (ty.zigTypeTag()) { - .Void => return WValue{ .none = {} }, - .Int => { - const int_info = ty.intInfo(func.target); + switch (mod.intern_pool.indexToKey(val.ip_index)) { + .runtime_value => |rt| val = rt.val.toValue(), + else => {}, + } + if (val.isUndefDeep(mod)) return func.emitUndefined(ty); + + if (val.ip_index == .none) switch (ty.zigTypeTag(mod)) { + .Array => |zig_type| return func.fail("Wasm TODO: LowerConstant for zigTypeTag {}", .{zig_type}), + .Struct => { + const struct_obj = mod.typeToStruct(ty).?; + assert(struct_obj.layout == .Packed); + var buf: [8]u8 = .{0} ** 8; // zero the buffer so we do not read 0xaa as integer + val.writeToPackedMemory(ty, func.bin_file.base.options.module.?, &buf, 0) catch unreachable; + const int_val = try mod.intValue( + struct_obj.backing_int_ty, + std.mem.readIntLittle(u64, &buf), + ); + return func.lowerConstant(int_val, struct_obj.backing_int_ty); + }, + .Vector => { + assert(determineSimdStoreStrategy(ty, mod) == .direct); + var buf: [16]u8 = undefined; + val.writeToMemory(ty, mod, &buf) catch unreachable; + return func.storeSimdImmd(buf); + }, + .Frame, + .AnyFrame, + => return func.fail("Wasm TODO: LowerConstant for type {}", .{ty.fmt(mod)}), + .Float, + .Union, + .Optional, + .ErrorUnion, + .ErrorSet, + .Int, + .Enum, + .Bool, + .Pointer, + => unreachable, // handled below + .Type, + .Void, + .NoReturn, + .ComptimeFloat, + .ComptimeInt, + .Undefined, + .Null, + .Opaque, + .EnumLiteral, + .Fn, + => unreachable, // comptime-only types + }; + + switch (mod.intern_pool.indexToKey(val.ip_index)) { + .int_type, + .ptr_type, + .array_type, + .vector_type, + .opt_type, + .anyframe_type, + .error_union_type, + .simple_type, + .struct_type, + .anon_struct_type, + .union_type, + .opaque_type, + .enum_type, + .func_type, + .error_set_type, + .inferred_error_set_type, + => unreachable, // types, not values + + .undef, .runtime_value => unreachable, // handled above + .simple_value => |simple_value| switch (simple_value) { + .undefined, + .void, + .null, + .empty_struct, + .@"unreachable", + .generic_poison, + => unreachable, // non-runtime values + .false, .true => return WValue{ .imm32 = switch (simple_value) { + .false => 0, + .true => 1, + else => unreachable, + } }, + }, + .variable, + .extern_func, + .func, + .enum_literal, + .empty_enum_value, + => unreachable, // non-runtime values + .int => { + const int_info = ty.intInfo(mod); switch (int_info.signedness) { .signed => switch (int_info.bits) { 0...32 => return WValue{ .imm32 = @intCast(u32, toTwosComplement( - val.toSignedInt(target), + val.toSignedInt(mod), @intCast(u6, int_info.bits), )) }, 33...64 => return WValue{ .imm64 = toTwosComplement( - val.toSignedInt(target), + val.toSignedInt(mod), @intCast(u7, int_info.bits), ) }, else => unreachable, }, .unsigned => switch (int_info.bits) { - 0...32 => return WValue{ .imm32 = @intCast(u32, val.toUnsignedInt(target)) }, - 33...64 => return WValue{ .imm64 = val.toUnsignedInt(target) }, + 0...32 => return WValue{ .imm32 = @intCast(u32, val.toUnsignedInt(mod)) }, + 33...64 => return WValue{ .imm64 = val.toUnsignedInt(mod) }, else => unreachable, }, } }, - .Bool => return WValue{ .imm32 = @intCast(u32, val.toUnsignedInt(target)) }, - .Float => switch (ty.floatBits(func.target)) { - 16 => return WValue{ .imm32 = @bitCast(u16, val.toFloat(f16)) }, - 32 => return WValue{ .float32 = val.toFloat(f32) }, - 64 => return WValue{ .float64 = val.toFloat(f64) }, - else => unreachable, - }, - .Pointer => switch (val.tag()) { - .field_ptr, .elem_ptr, .opt_payload_ptr => { - return func.lowerParentPtr(val, ty.childType()); - }, - .int_u64, .one => return WValue{ .imm32 = @intCast(u32, val.toUnsignedInt(target)) }, - .zero, .null_value => return WValue{ .imm32 = 0 }, - else => return func.fail("Wasm TODO: lowerConstant for other const pointer tag {}", .{val.tag()}), + .err => |err| { + const int = try mod.getErrorValue(err.name); + return WValue{ .imm32 = int }; }, - .Enum => { - if (val.castTag(.enum_field_index)) |field_index| { - switch (ty.tag()) { - .enum_simple => return WValue{ .imm32 = field_index.data }, - .enum_full, .enum_nonexhaustive => { - const enum_full = ty.cast(Type.Payload.EnumFull).?.data; - if (enum_full.values.count() != 0) { - const tag_val = enum_full.values.keys()[field_index.data]; - return func.lowerConstant(tag_val, enum_full.tag_ty); - } else { - return WValue{ .imm32 = field_index.data }; - } - }, - .enum_numbered => { - const index = field_index.data; - const enum_data = ty.castTag(.enum_numbered).?.data; - const enum_val = enum_data.values.keys()[index]; - return func.lowerConstant(enum_val, enum_data.tag_ty); - }, - else => return func.fail("TODO: lowerConstant for enum tag: {}", .{ty.tag()}), - } - } else { - var int_tag_buffer: Type.Payload.Bits = undefined; - const int_tag_ty = ty.intTagType(&int_tag_buffer); - return func.lowerConstant(val, int_tag_ty); + .error_union => |error_union| { + const err_tv: TypedValue = switch (error_union.val) { + .err_name => |err_name| .{ + .ty = ty.errorUnionSet(mod), + .val = (try mod.intern(.{ .err = .{ + .ty = ty.errorUnionSet(mod).toIntern(), + .name = err_name, + } })).toValue(), + }, + .payload => .{ + .ty = Type.err_int, + .val = try mod.intValue(Type.err_int, 0), + }, + }; + const payload_type = ty.errorUnionPayload(mod); + if (!payload_type.hasRuntimeBitsIgnoreComptime(mod)) { + // We use the error type directly as the type. + return func.lowerConstant(err_tv.val, err_tv.ty); } + + return func.fail("Wasm TODO: lowerConstant error union with non-zero-bit payload type", .{}); }, - .ErrorSet => switch (val.tag()) { - .@"error" => { - const kv = try func.bin_file.base.options.module.?.getErrorValue(val.getError().?); - return WValue{ .imm32 = kv.value }; - }, - else => return WValue{ .imm32 = 0 }, + .enum_tag => |enum_tag| { + const int_tag_ty = mod.intern_pool.typeOf(enum_tag.int); + return func.lowerConstant(enum_tag.int.toValue(), int_tag_ty.toType()); }, - .ErrorUnion => { - const error_type = ty.errorUnionSet(); - const is_pl = val.errorUnionIsPayload(); - const err_val = if (!is_pl) val else Value.initTag(.zero); - return func.lowerConstant(err_val, error_type); + .float => |float| switch (float.storage) { + .f16 => |f16_val| return WValue{ .imm32 = @bitCast(u16, f16_val) }, + .f32 => |f32_val| return WValue{ .float32 = f32_val }, + .f64 => |f64_val| return WValue{ .float64 = f64_val }, + else => unreachable, }, - .Optional => if (ty.optionalReprIsPayload()) { - var buf: Type.Payload.ElemType = undefined; - const pl_ty = ty.optionalChild(&buf); - if (val.castTag(.opt_payload)) |payload| { - return func.lowerConstant(payload.data, pl_ty); - } else if (val.isNull()) { - return WValue{ .imm32 = 0 }; + .ptr => |ptr| switch (ptr.addr) { + .decl => |decl| return func.lowerDeclRefValue(.{ .ty = ty, .val = val }, decl, 0), + .mut_decl => |mut_decl| return func.lowerDeclRefValue(.{ .ty = ty, .val = val }, mut_decl.decl, 0), + .int => |int| return func.lowerConstant(int.toValue(), mod.intern_pool.typeOf(int).toType()), + .opt_payload, .elem, .field => return func.lowerParentPtr(val, 0), + else => return func.fail("Wasm TODO: lowerConstant for other const addr tag {}", .{ptr.addr}), + }, + .opt => if (ty.optionalReprIsPayload(mod)) { + const pl_ty = ty.optionalChild(mod); + if (val.optionalValue(mod)) |payload| { + return func.lowerConstant(payload, pl_ty); } else { - return func.lowerConstant(val, pl_ty); + return WValue{ .imm32 = 0 }; } } else { - const is_pl = val.tag() == .opt_payload; - return WValue{ .imm32 = @boolToInt(is_pl) }; + return WValue{ .imm32 = @boolToInt(!val.isNull(mod)) }; }, - .Struct => { - const struct_obj = ty.castTag(.@"struct").?.data; - assert(struct_obj.layout == .Packed); - var buf: [8]u8 = .{0} ** 8; // zero the buffer so we do not read 0xaa as integer - val.writeToPackedMemory(ty, func.bin_file.base.options.module.?, &buf, 0) catch unreachable; - var payload: Value.Payload.U64 = .{ - .base = .{ .tag = .int_u64 }, - .data = std.mem.readIntLittle(u64, &buf), - }; - const int_val = Value.initPayload(&payload.base); - return func.lowerConstant(int_val, struct_obj.backing_int_ty); + .aggregate => switch (mod.intern_pool.indexToKey(ty.ip_index)) { + .array_type => return func.fail("Wasm TODO: LowerConstant for {}", .{ty.fmt(mod)}), + .vector_type => { + assert(determineSimdStoreStrategy(ty, mod) == .direct); + var buf: [16]u8 = undefined; + val.writeToMemory(ty, mod, &buf) catch unreachable; + return func.storeSimdImmd(buf); + }, + .struct_type, .anon_struct_type => { + const struct_obj = mod.typeToStruct(ty).?; + assert(struct_obj.layout == .Packed); + var buf: [8]u8 = .{0} ** 8; // zero the buffer so we do not read 0xaa as integer + val.writeToPackedMemory(ty, func.bin_file.base.options.module.?, &buf, 0) catch unreachable; + const int_val = try mod.intValue( + struct_obj.backing_int_ty, + std.mem.readIntLittle(u64, &buf), + ); + return func.lowerConstant(int_val, struct_obj.backing_int_ty); + }, + else => unreachable, }, - .Vector => { - assert(determineSimdStoreStrategy(ty, target) == .direct); - var buf: [16]u8 = undefined; - val.writeToMemory(ty, func.bin_file.base.options.module.?, &buf) catch unreachable; - return func.storeSimdImmd(buf); + .un => |union_obj| { + // in this case we have a packed union which will not be passed by reference. + const field_index = ty.unionTagFieldIndex(union_obj.tag.toValue(), func.bin_file.base.options.module.?).?; + const field_ty = ty.unionFields(mod).values()[field_index].ty; + return func.lowerConstant(union_obj.val.toValue(), field_ty); }, - else => |zig_type| return func.fail("Wasm TODO: LowerConstant for zigTypeTag {}", .{zig_type}), + .memoized_call => unreachable, } } @@ -3080,9 +3260,10 @@ fn storeSimdImmd(func: *CodeGen, value: [16]u8) !WValue { } fn emitUndefined(func: *CodeGen, ty: Type) InnerError!WValue { - switch (ty.zigTypeTag()) { + const mod = func.bin_file.base.options.module.?; + switch (ty.zigTypeTag(mod)) { .Bool, .ErrorSet => return WValue{ .imm32 = 0xaaaaaaaa }, - .Int => switch (ty.intInfo(func.target).bits) { + .Int, .Enum => switch (ty.intInfo(mod).bits) { 0...32 => return WValue{ .imm32 = 0xaaaaaaaa }, 33...64 => return WValue{ .imm64 = 0xaaaaaaaaaaaaaaaa }, else => unreachable, @@ -3099,9 +3280,8 @@ fn emitUndefined(func: *CodeGen, ty: Type) InnerError!WValue { else => unreachable, }, .Optional => { - var buf: Type.Payload.ElemType = undefined; - const pl_ty = ty.optionalChild(&buf); - if (ty.optionalReprIsPayload()) { + const pl_ty = ty.optionalChild(mod); + if (ty.optionalReprIsPayload(mod)) { return func.emitUndefined(pl_ty); } return WValue{ .imm32 = 0xaaaaaaaa }; @@ -3110,11 +3290,11 @@ fn emitUndefined(func: *CodeGen, ty: Type) InnerError!WValue { return WValue{ .imm32 = 0xaaaaaaaa }; }, .Struct => { - const struct_obj = ty.castTag(.@"struct").?.data; + const struct_obj = mod.typeToStruct(ty).?; assert(struct_obj.layout == .Packed); return func.emitUndefined(struct_obj.backing_int_ty); }, - else => return func.fail("Wasm TODO: emitUndefined for type: {}\n", .{ty.zigTypeTag()}), + else => return func.fail("Wasm TODO: emitUndefined for type: {}\n", .{ty.zigTypeTag(mod)}), } } @@ -3122,56 +3302,52 @@ fn emitUndefined(func: *CodeGen, ty: Type) InnerError!WValue { /// It's illegal to provide a value with a type that cannot be represented /// as an integer value. fn valueAsI32(func: *const CodeGen, val: Value, ty: Type) i32 { - const target = func.target; - switch (ty.zigTypeTag()) { - .Enum => { - if (val.castTag(.enum_field_index)) |field_index| { - switch (ty.tag()) { - .enum_simple => return @bitCast(i32, field_index.data), - .enum_full, .enum_nonexhaustive => { - const enum_full = ty.cast(Type.Payload.EnumFull).?.data; - if (enum_full.values.count() != 0) { - const tag_val = enum_full.values.keys()[field_index.data]; - return func.valueAsI32(tag_val, enum_full.tag_ty); - } else return @bitCast(i32, field_index.data); - }, - .enum_numbered => { - const index = field_index.data; - const enum_data = ty.castTag(.enum_numbered).?.data; - return func.valueAsI32(enum_data.values.keys()[index], enum_data.tag_ty); - }, - else => unreachable, - } - } else { - var int_tag_buffer: Type.Payload.Bits = undefined; - const int_tag_ty = ty.intTagType(&int_tag_buffer); - return func.valueAsI32(val, int_tag_ty); - } - }, - .Int => switch (ty.intInfo(func.target).signedness) { - .signed => return @truncate(i32, val.toSignedInt(target)), - .unsigned => return @bitCast(i32, @truncate(u32, val.toUnsignedInt(target))), - }, - .ErrorSet => { - const kv = func.bin_file.base.options.module.?.getErrorValue(val.getError().?) catch unreachable; // passed invalid `Value` to function - return @bitCast(i32, kv.value); + const mod = func.bin_file.base.options.module.?; + + switch (val.ip_index) { + .none => {}, + .bool_true => return 1, + .bool_false => return 0, + else => return switch (mod.intern_pool.indexToKey(val.ip_index)) { + .enum_tag => |enum_tag| intIndexAsI32(&mod.intern_pool, enum_tag.int, mod), + .int => |int| intStorageAsI32(int.storage, mod), + .ptr => |ptr| intIndexAsI32(&mod.intern_pool, ptr.addr.int, mod), + .err => |err| @bitCast(i32, @intCast(Module.ErrorInt, mod.global_error_set.getIndex(err.name).?)), + else => unreachable, }, - .Bool => return @intCast(i32, val.toSignedInt(target)), - .Pointer => return @intCast(i32, val.toSignedInt(target)), - else => unreachable, // Programmer called this function for an illegal type } + + return switch (ty.zigTypeTag(mod)) { + .ErrorSet => @bitCast(i32, val.getErrorInt(mod)), + else => unreachable, // Programmer called this function for an illegal type + }; +} + +fn intIndexAsI32(ip: *const InternPool, int: InternPool.Index, mod: *Module) i32 { + return intStorageAsI32(ip.indexToKey(int).int.storage, mod); +} + +fn intStorageAsI32(storage: InternPool.Key.Int.Storage, mod: *Module) i32 { + return switch (storage) { + .i64 => |x| @intCast(i32, x), + .u64 => |x| @bitCast(i32, @intCast(u32, x)), + .big_int => unreachable, + .lazy_align => |ty| @bitCast(i32, ty.toType().abiAlignment(mod)), + .lazy_size => |ty| @bitCast(i32, @intCast(u32, ty.toType().abiSize(mod))), + }; } fn airBlock(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const block_ty = func.air.getRefType(ty_pl.ty); - const wasm_block_ty = genBlockType(block_ty, func.target); + const wasm_block_ty = genBlockType(block_ty, mod); const extra = func.air.extraData(Air.Block, ty_pl.payload); const body = func.air.extra[extra.end..][0..extra.data.body_len]; // if wasm_block_ty is non-empty, we create a register to store the temporary value const block_result: WValue = if (wasm_block_ty != wasm.block_empty) blk: { - const ty: Type = if (isByRef(block_ty, func.target)) Type.u32 else block_ty; + const ty: Type = if (isByRef(block_ty, mod)) Type.u32 else block_ty; break :blk try func.ensureAllocLocal(ty); // make sure it's a clean local as it may never get overwritten } else WValue.none; @@ -3182,9 +3358,13 @@ fn airBlock(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { .label = func.block_depth, .value = block_result, }); + try func.genBody(body); try func.endBlock(); + const liveness = func.liveness.getBlock(inst); + try func.currentBranch().values.ensureUnusedCapacity(func.gpa, liveness.deaths.len); + func.finishAir(inst, block_result, &.{}); } @@ -3239,47 +3419,37 @@ fn airCondBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.addLabel(.br_if, 0); try func.branches.ensureUnusedCapacity(func.gpa, 2); - - func.branches.appendAssumeCapacity(.{}); - try func.currentBranch().values.ensureUnusedCapacity(func.gpa, @intCast(u32, liveness_condbr.else_deaths.len)); - try func.genBody(else_body); - try func.endBlock(); - var else_stack = func.branches.pop(); - defer else_stack.deinit(func.gpa); + { + func.branches.appendAssumeCapacity(.{}); + try func.currentBranch().values.ensureUnusedCapacity(func.gpa, @intCast(u32, liveness_condbr.else_deaths.len)); + defer { + var else_stack = func.branches.pop(); + else_stack.deinit(func.gpa); + } + try func.genBody(else_body); + try func.endBlock(); + } // Outer block that matches the condition - func.branches.appendAssumeCapacity(.{}); - try func.currentBranch().values.ensureUnusedCapacity(func.gpa, @intCast(u32, liveness_condbr.then_deaths.len)); - try func.genBody(then_body); - var then_stack = func.branches.pop(); - defer then_stack.deinit(func.gpa); - - try func.mergeBranch(&else_stack); - try func.mergeBranch(&then_stack); + { + func.branches.appendAssumeCapacity(.{}); + try func.currentBranch().values.ensureUnusedCapacity(func.gpa, @intCast(u32, liveness_condbr.then_deaths.len)); + defer { + var then_stack = func.branches.pop(); + then_stack.deinit(func.gpa); + } + try func.genBody(then_body); + } func.finishAir(inst, .none, &.{}); } -fn mergeBranch(func: *CodeGen, branch: *const Branch) !void { - const parent = func.currentBranch(); - - const target_slice = branch.values.entries.slice(); - const target_keys = target_slice.items(.key); - const target_values = target_slice.items(.value); - - try parent.values.ensureTotalCapacity(func.gpa, parent.values.capacity() + branch.values.count()); - for (target_keys, 0..) |key, index| { - // TODO: process deaths from branches - parent.values.putAssumeCapacity(key, target_values[index]); - } -} - fn airCmp(func: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) InnerError!void { const bin_op = func.air.instructions.items(.data)[inst].bin_op; const lhs = try func.resolveInst(bin_op.lhs); const rhs = try func.resolveInst(bin_op.rhs); - const operand_ty = func.air.typeOf(bin_op.lhs); + const operand_ty = func.typeOf(bin_op.lhs); const result = try (try func.cmp(lhs, rhs, operand_ty, op)).toLocal(func, Type.u32); // comparison result is always 32 bits func.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); } @@ -3289,16 +3459,16 @@ fn airCmp(func: *CodeGen, inst: Air.Inst.Index, op: std.math.CompareOperator) In /// NOTE: This leaves the result on top of the stack, rather than a new local. fn cmp(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: std.math.CompareOperator) InnerError!WValue { assert(!(lhs != .stack and rhs == .stack)); - if (ty.zigTypeTag() == .Optional and !ty.optionalReprIsPayload()) { - var buf: Type.Payload.ElemType = undefined; - const payload_ty = ty.optionalChild(&buf); - if (payload_ty.hasRuntimeBitsIgnoreComptime()) { + const mod = func.bin_file.base.options.module.?; + if (ty.zigTypeTag(mod) == .Optional and !ty.optionalReprIsPayload(mod)) { + const payload_ty = ty.optionalChild(mod); + if (payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { // When we hit this case, we must check the value of optionals // that are not pointers. This means first checking against non-null for // both lhs and rhs, as well as checking the payload are matching of lhs and rhs return func.cmpOptionals(lhs, rhs, ty, op); } - } else if (isByRef(ty, func.target)) { + } else if (isByRef(ty, mod)) { return func.cmpBigInt(lhs, rhs, ty, op); } else if (ty.isAnyFloat() and ty.floatBits(func.target) == 16) { return func.cmpFloat16(lhs, rhs, op); @@ -3311,13 +3481,13 @@ fn cmp(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, op: std.math.CompareO const signedness: std.builtin.Signedness = blk: { // by default we tell the operand type is unsigned (i.e. bools and enum values) - if (ty.zigTypeTag() != .Int) break :blk .unsigned; + if (ty.zigTypeTag(mod) != .Int) break :blk .unsigned; // incase of an actual integer, we emit the correct signedness - break :blk ty.intInfo(func.target).signedness; + break :blk ty.intInfo(mod).signedness; }; const opcode: wasm.Opcode = buildOpcode(.{ - .valtype1 = typeToValtype(ty, func.target), + .valtype1 = typeToValtype(ty, mod), .op = switch (op) { .lt => .lt, .lte => .le, @@ -3374,11 +3544,12 @@ fn airCmpLtErrorsLen(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const br = func.air.instructions.items(.data)[inst].br; const block = func.blocks.get(br.block_inst).?; // if operand has codegen bits we should break with a value - if (func.air.typeOf(br.operand).hasRuntimeBitsIgnoreComptime()) { + if (func.typeOf(br.operand).hasRuntimeBitsIgnoreComptime(mod)) { const operand = try func.resolveInst(br.operand); try func.lowerToStack(operand); @@ -3399,17 +3570,18 @@ fn airNot(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); - const operand_ty = func.air.typeOf(ty_op.operand); + const operand_ty = func.typeOf(ty_op.operand); + const mod = func.bin_file.base.options.module.?; const result = result: { - if (operand_ty.zigTypeTag() == .Bool) { + if (operand_ty.zigTypeTag(mod) == .Bool) { try func.emitWValue(operand); try func.addTag(.i32_eqz); const not_tmp = try func.allocLocal(operand_ty); try func.addLabel(.local_set, not_tmp.local.value); break :result not_tmp; } else { - const operand_bits = operand_ty.intInfo(func.target).bits; + const operand_bits = operand_ty.intInfo(mod).bits; const wasm_bits = toWasmBits(operand_bits) orelse { return func.fail("TODO: Implement binary NOT for integer with bitsize '{d}'", .{operand_bits}); }; @@ -3464,8 +3636,8 @@ fn airBitcast(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const ty_op = func.air.instructions.items(.data)[inst].ty_op; const result = result: { const operand = try func.resolveInst(ty_op.operand); - const wanted_ty = func.air.typeOfIndex(inst); - const given_ty = func.air.typeOf(ty_op.operand); + const wanted_ty = func.typeOfIndex(inst); + const given_ty = func.typeOf(ty_op.operand); if (given_ty.isAnyFloat() or wanted_ty.isAnyFloat()) { const bitcast_result = try func.bitcast(wanted_ty, given_ty, operand); break :result try bitcast_result.toLocal(func, wanted_ty); @@ -3476,16 +3648,17 @@ fn airBitcast(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn bitcast(func: *CodeGen, wanted_ty: Type, given_ty: Type, operand: WValue) InnerError!WValue { + const mod = func.bin_file.base.options.module.?; // if we bitcast a float to or from an integer we must use the 'reinterpret' instruction if (!(wanted_ty.isAnyFloat() or given_ty.isAnyFloat())) return operand; - if (wanted_ty.tag() == .f16 or given_ty.tag() == .f16) return operand; - if (wanted_ty.bitSize(func.target) > 64) return operand; - assert((wanted_ty.isInt() and given_ty.isAnyFloat()) or (wanted_ty.isAnyFloat() and given_ty.isInt())); + if (wanted_ty.ip_index == .f16_type or given_ty.ip_index == .f16_type) return operand; + if (wanted_ty.bitSize(mod) > 64) return operand; + assert((wanted_ty.isInt(mod) and given_ty.isAnyFloat()) or (wanted_ty.isAnyFloat() and given_ty.isInt(mod))); const opcode = buildOpcode(.{ .op = .reinterpret, - .valtype1 = typeToValtype(wanted_ty, func.target), - .valtype2 = typeToValtype(given_ty, func.target), + .valtype1 = typeToValtype(wanted_ty, mod), + .valtype2 = typeToValtype(given_ty, mod), }); try func.emitWValue(operand); try func.addTag(Mir.Inst.Tag.fromOpcode(opcode)); @@ -3493,19 +3666,21 @@ fn bitcast(func: *CodeGen, wanted_ty: Type, given_ty: Type, operand: WValue) Inn } fn airStructFieldPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const extra = func.air.extraData(Air.StructField, ty_pl.payload); const struct_ptr = try func.resolveInst(extra.data.struct_operand); - const struct_ty = func.air.typeOf(extra.data.struct_operand).childType(); + const struct_ty = func.typeOf(extra.data.struct_operand).childType(mod); const result = try func.structFieldPtr(inst, extra.data.struct_operand, struct_ptr, struct_ty, extra.data.field_index); func.finishAir(inst, result, &.{extra.data.struct_operand}); } fn airStructFieldPtrIndex(func: *CodeGen, inst: Air.Inst.Index, index: u32) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const struct_ptr = try func.resolveInst(ty_op.operand); - const struct_ty = func.air.typeOf(ty_op.operand).childType(); + const struct_ty = func.typeOf(ty_op.operand).childType(mod); const result = try func.structFieldPtr(inst, ty_op.operand, struct_ptr, struct_ty, index); func.finishAir(inst, result, &.{ty_op.operand}); @@ -3519,19 +3694,20 @@ fn structFieldPtr( struct_ty: Type, index: u32, ) InnerError!WValue { - const result_ty = func.air.typeOfIndex(inst); - const offset = switch (struct_ty.containerLayout()) { - .Packed => switch (struct_ty.zigTypeTag()) { + const mod = func.bin_file.base.options.module.?; + const result_ty = func.typeOfIndex(inst); + const offset = switch (struct_ty.containerLayout(mod)) { + .Packed => switch (struct_ty.zigTypeTag(mod)) { .Struct => offset: { - if (result_ty.ptrInfo().data.host_size != 0) { + if (result_ty.ptrInfo(mod).host_size != 0) { break :offset @as(u32, 0); } - break :offset struct_ty.packedStructFieldByteOffset(index, func.target); + break :offset struct_ty.packedStructFieldByteOffset(index, mod); }, .Union => 0, else => unreachable, }, - else => struct_ty.structFieldOffset(index, func.target), + else => struct_ty.structFieldOffset(index, mod), }; // save a load and store when we can simply reuse the operand if (offset == 0) { @@ -3546,23 +3722,23 @@ fn structFieldPtr( } fn airStructFieldVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const struct_field = func.air.extraData(Air.StructField, ty_pl.payload).data; - const struct_ty = func.air.typeOf(struct_field.struct_operand); + const struct_ty = func.typeOf(struct_field.struct_operand); const operand = try func.resolveInst(struct_field.struct_operand); const field_index = struct_field.field_index; - const field_ty = struct_ty.structFieldType(field_index); - if (!field_ty.hasRuntimeBitsIgnoreComptime()) return func.finishAir(inst, .none, &.{struct_field.struct_operand}); + const field_ty = struct_ty.structFieldType(field_index, mod); + if (!field_ty.hasRuntimeBitsIgnoreComptime(mod)) return func.finishAir(inst, .none, &.{struct_field.struct_operand}); - const result = switch (struct_ty.containerLayout()) { - .Packed => switch (struct_ty.zigTypeTag()) { + const result = switch (struct_ty.containerLayout(mod)) { + .Packed => switch (struct_ty.zigTypeTag(mod)) { .Struct => result: { - const struct_obj = struct_ty.castTag(.@"struct").?.data; - assert(struct_obj.layout == .Packed); - const offset = struct_obj.packedFieldBitOffset(func.target, field_index); + const struct_obj = mod.typeToStruct(struct_ty).?; + const offset = struct_obj.packedFieldBitOffset(mod, field_index); const backing_ty = struct_obj.backing_int_ty; - const wasm_bits = toWasmBits(backing_ty.intInfo(func.target).bits) orelse { + const wasm_bits = toWasmBits(backing_ty.intInfo(mod).bits) orelse { return func.fail("TODO: airStructFieldVal for packed structs larger than 128 bits", .{}); }; const const_wvalue = if (wasm_bits == 32) @@ -3578,40 +3754,56 @@ fn airStructFieldVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { else try func.binOp(operand, const_wvalue, backing_ty, .shr); - if (field_ty.zigTypeTag() == .Float) { - var payload: Type.Payload.Bits = .{ - .base = .{ .tag = .int_unsigned }, - .data = @intCast(u16, field_ty.bitSize(func.target)), - }; - const int_type = Type.initPayload(&payload.base); + if (field_ty.zigTypeTag(mod) == .Float) { + const int_type = try mod.intType(.unsigned, @intCast(u16, field_ty.bitSize(mod))); const truncated = try func.trunc(shifted_value, int_type, backing_ty); const bitcasted = try func.bitcast(field_ty, int_type, truncated); break :result try bitcasted.toLocal(func, field_ty); - } else if (field_ty.isPtrAtRuntime() and struct_obj.fields.count() == 1) { + } else if (field_ty.isPtrAtRuntime(mod) and struct_obj.fields.count() == 1) { // In this case we do not have to perform any transformations, // we can simply reuse the operand. break :result func.reuseOperand(struct_field.struct_operand, operand); - } else if (field_ty.isPtrAtRuntime()) { - var payload: Type.Payload.Bits = .{ - .base = .{ .tag = .int_unsigned }, - .data = @intCast(u16, field_ty.bitSize(func.target)), - }; - const int_type = Type.initPayload(&payload.base); + } else if (field_ty.isPtrAtRuntime(mod)) { + const int_type = try mod.intType(.unsigned, @intCast(u16, field_ty.bitSize(mod))); const truncated = try func.trunc(shifted_value, int_type, backing_ty); break :result try truncated.toLocal(func, field_ty); } const truncated = try func.trunc(shifted_value, field_ty, backing_ty); break :result try truncated.toLocal(func, field_ty); }, - .Union => return func.fail("TODO: airStructFieldVal for packed unions", .{}), + .Union => result: { + if (isByRef(struct_ty, mod)) { + if (!isByRef(field_ty, mod)) { + const val = try func.load(operand, field_ty, 0); + break :result try val.toLocal(func, field_ty); + } else { + const new_stack_val = try func.allocStack(field_ty); + try func.store(new_stack_val, operand, field_ty, 0); + break :result new_stack_val; + } + } + + const union_int_type = try mod.intType(.unsigned, @intCast(u16, struct_ty.bitSize(mod))); + if (field_ty.zigTypeTag(mod) == .Float) { + const int_type = try mod.intType(.unsigned, @intCast(u16, field_ty.bitSize(mod))); + const truncated = try func.trunc(operand, int_type, union_int_type); + const bitcasted = try func.bitcast(field_ty, int_type, truncated); + break :result try bitcasted.toLocal(func, field_ty); + } else if (field_ty.isPtrAtRuntime(mod)) { + const int_type = try mod.intType(.unsigned, @intCast(u16, field_ty.bitSize(mod))); + const truncated = try func.trunc(operand, int_type, union_int_type); + break :result try truncated.toLocal(func, field_ty); + } + const truncated = try func.trunc(operand, field_ty, union_int_type); + break :result try truncated.toLocal(func, field_ty); + }, else => unreachable, }, else => result: { - const offset = std.math.cast(u32, struct_ty.structFieldOffset(field_index, func.target)) orelse { - const module = func.bin_file.base.options.module.?; - return func.fail("Field type '{}' too big to fit into stack frame", .{field_ty.fmt(module)}); + const offset = std.math.cast(u32, struct_ty.structFieldOffset(field_index, mod)) orelse { + return func.fail("Field type '{}' too big to fit into stack frame", .{field_ty.fmt(mod)}); }; - if (isByRef(field_ty, func.target)) { + if (isByRef(field_ty, mod)) { switch (operand) { .stack_offset => |stack_offset| { break :result WValue{ .stack_offset = .{ .value = stack_offset.value + offset, .references = 1 } }; @@ -3628,11 +3820,12 @@ fn airStructFieldVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; // result type is always 'noreturn' const blocktype = wasm.block_empty; const pl_op = func.air.instructions.items(.data)[inst].pl_op; const target = try func.resolveInst(pl_op.operand); - const target_ty = func.air.typeOf(pl_op.operand); + const target_ty = func.typeOf(pl_op.operand); const switch_br = func.air.extraData(Air.SwitchBr, pl_op.payload); const liveness = try func.liveness.getSwitchBr(func.gpa, inst, switch_br.data.cases_len + 1); defer func.gpa.free(liveness.deaths); @@ -3661,7 +3854,7 @@ fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { errdefer func.gpa.free(values); for (items, 0..) |ref, i| { - const item_val = func.air.value(ref).?; + const item_val = (try func.air.value(ref, mod)).?; const int_val = func.valueAsI32(item_val, target_ty); if (lowest_maybe == null or int_val < lowest_maybe.?) { lowest_maybe = int_val; @@ -3684,7 +3877,7 @@ fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { // When the target is an integer size larger than u32, we have no way to use the value // as an index, therefore we also use an if/else-chain for those cases. // TODO: Benchmark this to find a proper value, LLVM seems to draw the line at '40~45'. - const is_sparse = highest - lowest > 50 or target_ty.bitSize(func.target) > 32; + const is_sparse = highest - lowest > 50 or target_ty.bitSize(mod) > 32; const else_body = func.air.extra[extra_index..][0..switch_br.data.else_body_len]; const has_else_body = else_body.len != 0; @@ -3729,7 +3922,7 @@ fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { // for errors that are not present in any branch. This is fine as this default // case will never be hit for those cases but we do save runtime cost and size // by using a jump table for this instead of if-else chains. - break :blk if (has_else_body or target_ty.zigTypeTag() == .ErrorSet) case_i else unreachable; + break :blk if (has_else_body or target_ty.zigTypeTag(mod) == .ErrorSet) case_i else unreachable; }; func.mir_extra.appendAssumeCapacity(idx); } else if (has_else_body) { @@ -3740,10 +3933,10 @@ fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const signedness: std.builtin.Signedness = blk: { // by default we tell the operand type is unsigned (i.e. bools and enum values) - if (target_ty.zigTypeTag() != .Int) break :blk .unsigned; + if (target_ty.zigTypeTag(mod) != .Int) break :blk .unsigned; // incase of an actual integer, we emit the correct signedness - break :blk target_ty.intInfo(func.target).signedness; + break :blk target_ty.intInfo(mod).signedness; }; try func.branches.ensureUnusedCapacity(func.gpa, case_list.items.len + @boolToInt(has_else_body)); @@ -3756,7 +3949,7 @@ fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const val = try func.lowerConstant(case.values[0].value, target_ty); try func.emitWValue(val); const opcode = buildOpcode(.{ - .valtype1 = typeToValtype(target_ty, func.target), + .valtype1 = typeToValtype(target_ty, mod), .op = .ne, // not equal, because we want to jump out of this block if it does not match the condition. .signedness = signedness, }); @@ -3770,7 +3963,7 @@ fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const val = try func.lowerConstant(value.value, target_ty); try func.emitWValue(val); const opcode = buildOpcode(.{ - .valtype1 = typeToValtype(target_ty, func.target), + .valtype1 = typeToValtype(target_ty, mod), .op = .eq, .signedness = signedness, }); @@ -3783,42 +3976,38 @@ fn airSwitchBr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } } func.branches.appendAssumeCapacity(.{}); - try func.currentBranch().values.ensureUnusedCapacity(func.gpa, liveness.deaths[index].len); - for (liveness.deaths[index]) |operand| { - func.processDeath(Air.indexToRef(operand)); + defer { + var case_branch = func.branches.pop(); + case_branch.deinit(func.gpa); } try func.genBody(case.body); try func.endBlock(); - var case_branch = func.branches.pop(); - defer case_branch.deinit(func.gpa); - try func.mergeBranch(&case_branch); } if (has_else_body) { func.branches.appendAssumeCapacity(.{}); const else_deaths = liveness.deaths.len - 1; try func.currentBranch().values.ensureUnusedCapacity(func.gpa, liveness.deaths[else_deaths].len); - for (liveness.deaths[else_deaths]) |operand| { - func.processDeath(Air.indexToRef(operand)); + defer { + var else_branch = func.branches.pop(); + else_branch.deinit(func.gpa); } try func.genBody(else_body); try func.endBlock(); - var else_branch = func.branches.pop(); - defer else_branch.deinit(func.gpa); - try func.mergeBranch(&else_branch); } func.finishAir(inst, .none, &.{}); } fn airIsErr(func: *CodeGen, inst: Air.Inst.Index, opcode: wasm.Opcode) InnerError!void { + const mod = func.bin_file.base.options.module.?; const un_op = func.air.instructions.items(.data)[inst].un_op; const operand = try func.resolveInst(un_op); - const err_union_ty = func.air.typeOf(un_op); - const pl_ty = err_union_ty.errorUnionPayload(); + const err_union_ty = func.typeOf(un_op); + const pl_ty = err_union_ty.errorUnionPayload(mod); const result = result: { - if (err_union_ty.errorUnionSet().errorSetIsEmpty()) { + if (err_union_ty.errorUnionSet(mod).errorSetIsEmpty(mod)) { switch (opcode) { .i32_ne => break :result WValue{ .imm32 = 0 }, .i32_eq => break :result WValue{ .imm32 = 1 }, @@ -3827,10 +4016,10 @@ fn airIsErr(func: *CodeGen, inst: Air.Inst.Index, opcode: wasm.Opcode) InnerErro } try func.emitWValue(operand); - if (pl_ty.hasRuntimeBitsIgnoreComptime()) { + if (pl_ty.hasRuntimeBitsIgnoreComptime(mod)) { try func.addMemArg(.i32_load16_u, .{ - .offset = operand.offset() + @intCast(u32, errUnionErrorOffset(pl_ty, func.target)), - .alignment = Type.anyerror.abiAlignment(func.target), + .offset = operand.offset() + @intCast(u32, errUnionErrorOffset(pl_ty, mod)), + .alignment = Type.anyerror.abiAlignment(mod), }); } @@ -3846,18 +4035,24 @@ fn airIsErr(func: *CodeGen, inst: Air.Inst.Index, opcode: wasm.Opcode) InnerErro } fn airUnwrapErrUnionPayload(func: *CodeGen, inst: Air.Inst.Index, op_is_ptr: bool) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); - const op_ty = func.air.typeOf(ty_op.operand); - const err_ty = if (op_is_ptr) op_ty.childType() else op_ty; - const payload_ty = err_ty.errorUnionPayload(); + const op_ty = func.typeOf(ty_op.operand); + const err_ty = if (op_is_ptr) op_ty.childType(mod) else op_ty; + const payload_ty = err_ty.errorUnionPayload(mod); const result = result: { - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) break :result WValue{ .none = {} }; + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { + if (op_is_ptr) { + break :result func.reuseOperand(ty_op.operand, operand); + } + break :result WValue{ .none = {} }; + } - const pl_offset = @intCast(u32, errUnionPayloadOffset(payload_ty, func.target)); - if (op_is_ptr or isByRef(payload_ty, func.target)) { + const pl_offset = @intCast(u32, errUnionPayloadOffset(payload_ty, mod)); + if (op_is_ptr or isByRef(payload_ty, mod)) { break :result try func.buildPointerOffset(operand, pl_offset, .new); } @@ -3868,48 +4063,50 @@ fn airUnwrapErrUnionPayload(func: *CodeGen, inst: Air.Inst.Index, op_is_ptr: boo } fn airUnwrapErrUnionError(func: *CodeGen, inst: Air.Inst.Index, op_is_ptr: bool) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); - const op_ty = func.air.typeOf(ty_op.operand); - const err_ty = if (op_is_ptr) op_ty.childType() else op_ty; - const payload_ty = err_ty.errorUnionPayload(); + const op_ty = func.typeOf(ty_op.operand); + const err_ty = if (op_is_ptr) op_ty.childType(mod) else op_ty; + const payload_ty = err_ty.errorUnionPayload(mod); const result = result: { - if (err_ty.errorUnionSet().errorSetIsEmpty()) { + if (err_ty.errorUnionSet(mod).errorSetIsEmpty(mod)) { break :result WValue{ .imm32 = 0 }; } - if (op_is_ptr or !payload_ty.hasRuntimeBitsIgnoreComptime()) { + if (op_is_ptr or !payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { break :result func.reuseOperand(ty_op.operand, operand); } - const error_val = try func.load(operand, Type.anyerror, @intCast(u32, errUnionErrorOffset(payload_ty, func.target))); + const error_val = try func.load(operand, Type.anyerror, @intCast(u32, errUnionErrorOffset(payload_ty, mod))); break :result try error_val.toLocal(func, Type.anyerror); }; func.finishAir(inst, result, &.{ty_op.operand}); } fn airWrapErrUnionPayload(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); - const err_ty = func.air.typeOfIndex(inst); + const err_ty = func.typeOfIndex(inst); - const pl_ty = func.air.typeOf(ty_op.operand); + const pl_ty = func.typeOf(ty_op.operand); const result = result: { - if (!pl_ty.hasRuntimeBitsIgnoreComptime()) { + if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) { break :result func.reuseOperand(ty_op.operand, operand); } const err_union = try func.allocStack(err_ty); - const payload_ptr = try func.buildPointerOffset(err_union, @intCast(u32, errUnionPayloadOffset(pl_ty, func.target)), .new); + const payload_ptr = try func.buildPointerOffset(err_union, @intCast(u32, errUnionPayloadOffset(pl_ty, mod)), .new); try func.store(payload_ptr, operand, pl_ty, 0); // ensure we also write '0' to the error part, so any present stack value gets overwritten by it. try func.emitWValue(err_union); try func.addImm32(0); - const err_val_offset = @intCast(u32, errUnionErrorOffset(pl_ty, func.target)); + const err_val_offset = @intCast(u32, errUnionErrorOffset(pl_ty, mod)); try func.addMemArg(.i32_store16, .{ .offset = err_union.offset() + err_val_offset, .alignment = 2 }); break :result err_union; }; @@ -3917,25 +4114,26 @@ fn airWrapErrUnionPayload(func: *CodeGen, inst: Air.Inst.Index) InnerError!void } fn airWrapErrUnionErr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); const err_ty = func.air.getRefType(ty_op.ty); - const pl_ty = err_ty.errorUnionPayload(); + const pl_ty = err_ty.errorUnionPayload(mod); const result = result: { - if (!pl_ty.hasRuntimeBitsIgnoreComptime()) { + if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) { break :result func.reuseOperand(ty_op.operand, operand); } const err_union = try func.allocStack(err_ty); // store error value - try func.store(err_union, operand, Type.anyerror, @intCast(u32, errUnionErrorOffset(pl_ty, func.target))); + try func.store(err_union, operand, Type.anyerror, @intCast(u32, errUnionErrorOffset(pl_ty, mod))); // write 'undefined' to the payload - const payload_ptr = try func.buildPointerOffset(err_union, @intCast(u32, errUnionPayloadOffset(pl_ty, func.target)), .new); - const len = @intCast(u32, err_ty.errorUnionPayload().abiSize(func.target)); - try func.memset(payload_ptr, .{ .imm32 = len }, .{ .imm32 = 0xaaaaaaaa }); + const payload_ptr = try func.buildPointerOffset(err_union, @intCast(u32, errUnionPayloadOffset(pl_ty, mod)), .new); + const len = @intCast(u32, err_ty.errorUnionPayload(mod).abiSize(mod)); + try func.memset(Type.u8, payload_ptr, .{ .imm32 = len }, .{ .imm32 = 0xaa }); break :result err_union; }; @@ -3947,15 +4145,22 @@ fn airIntcast(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const ty = func.air.getRefType(ty_op.ty); const operand = try func.resolveInst(ty_op.operand); - const operand_ty = func.air.typeOf(ty_op.operand); - if (ty.zigTypeTag() == .Vector or operand_ty.zigTypeTag() == .Vector) { + const operand_ty = func.typeOf(ty_op.operand); + const mod = func.bin_file.base.options.module.?; + if (ty.zigTypeTag(mod) == .Vector or operand_ty.zigTypeTag(mod) == .Vector) { return func.fail("todo Wasm intcast for vectors", .{}); } - if (ty.abiSize(func.target) > 16 or operand_ty.abiSize(func.target) > 16) { + if (ty.abiSize(mod) > 16 or operand_ty.abiSize(mod) > 16) { return func.fail("todo Wasm intcast for bitsize > 128", .{}); } - const result = try (try func.intcast(operand, operand_ty, ty)).toLocal(func, ty); + const op_bits = toWasmBits(@intCast(u16, operand_ty.bitSize(mod))).?; + const wanted_bits = toWasmBits(@intCast(u16, ty.bitSize(mod))).?; + const result = if (op_bits == wanted_bits) + func.reuseOperand(ty_op.operand, operand) + else + try (try func.intcast(operand, operand_ty, ty)).toLocal(func, ty); + func.finishAir(inst, result, &.{}); } @@ -3964,8 +4169,9 @@ fn airIntcast(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { /// Asserts type's bitsize <= 128 /// NOTE: May leave the result on the top of the stack. fn intcast(func: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError!WValue { - const given_bitsize = @intCast(u16, given.bitSize(func.target)); - const wanted_bitsize = @intCast(u16, wanted.bitSize(func.target)); + const mod = func.bin_file.base.options.module.?; + const given_bitsize = @intCast(u16, given.bitSize(mod)); + const wanted_bitsize = @intCast(u16, wanted.bitSize(mod)); assert(given_bitsize <= 128); assert(wanted_bitsize <= 128); @@ -3978,7 +4184,7 @@ fn intcast(func: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerErro try func.addTag(.i32_wrap_i64); } else if (op_bits == 32 and wanted_bits > 32 and wanted_bits <= 64) { try func.emitWValue(operand); - try func.addTag(if (wanted.isSignedInt()) .i64_extend_i32_s else .i64_extend_i32_u); + try func.addTag(if (wanted.isSignedInt(mod)) .i64_extend_i32_s else .i64_extend_i32_u); } else if (wanted_bits == 128) { // for 128bit integers we store the integer in the virtual stack, rather than a local const stack_ptr = try func.allocStack(wanted); @@ -3987,14 +4193,14 @@ fn intcast(func: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerErro // for 32 bit integers, we first coerce the value into a 64 bit integer before storing it // meaning less store operations are required. const lhs = if (op_bits == 32) blk: { - break :blk try func.intcast(operand, given, if (wanted.isSignedInt()) Type.i64 else Type.u64); + break :blk try func.intcast(operand, given, if (wanted.isSignedInt(mod)) Type.i64 else Type.u64); } else operand; // store msb first try func.store(.{ .stack = {} }, lhs, Type.u64, 0 + stack_ptr.offset()); // For signed integers we shift msb by 63 (64bit integer - 1 sign bit) and store remaining value - if (wanted.isSignedInt()) { + if (wanted.isSignedInt(mod)) { try func.emitWValue(stack_ptr); const shr = try func.binOp(lhs, .{ .imm64 = 63 }, Type.i64, .shr); try func.store(.{ .stack = {} }, shr, Type.u64, 8 + stack_ptr.offset()); @@ -4009,11 +4215,12 @@ fn intcast(func: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerErro } fn airIsNull(func: *CodeGen, inst: Air.Inst.Index, opcode: wasm.Opcode, op_kind: enum { value, ptr }) InnerError!void { + const mod = func.bin_file.base.options.module.?; const un_op = func.air.instructions.items(.data)[inst].un_op; const operand = try func.resolveInst(un_op); - const op_ty = func.air.typeOf(un_op); - const optional_ty = if (op_kind == .ptr) op_ty.childType() else op_ty; + const op_ty = func.typeOf(un_op); + const optional_ty = if (op_kind == .ptr) op_ty.childType(mod) else op_ty; const is_null = try func.isNull(operand, optional_ty, opcode); const result = try is_null.toLocal(func, optional_ty); func.finishAir(inst, result, &.{un_op}); @@ -4022,20 +4229,19 @@ fn airIsNull(func: *CodeGen, inst: Air.Inst.Index, opcode: wasm.Opcode, op_kind: /// For a given type and operand, checks if it's considered `null`. /// NOTE: Leaves the result on the stack fn isNull(func: *CodeGen, operand: WValue, optional_ty: Type, opcode: wasm.Opcode) InnerError!WValue { + const mod = func.bin_file.base.options.module.?; try func.emitWValue(operand); - var buf: Type.Payload.ElemType = undefined; - const payload_ty = optional_ty.optionalChild(&buf); - if (!optional_ty.optionalReprIsPayload()) { + const payload_ty = optional_ty.optionalChild(mod); + if (!optional_ty.optionalReprIsPayload(mod)) { // When payload is zero-bits, we can treat operand as a value, rather than // a pointer to the stack value - if (payload_ty.hasRuntimeBitsIgnoreComptime()) { - const offset = std.math.cast(u32, payload_ty.abiSize(func.target)) orelse { - const module = func.bin_file.base.options.module.?; - return func.fail("Optional type {} too big to fit into stack frame", .{optional_ty.fmt(module)}); + if (payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { + const offset = std.math.cast(u32, payload_ty.abiSize(mod)) orelse { + return func.fail("Optional type {} too big to fit into stack frame", .{optional_ty.fmt(mod)}); }; try func.addMemArg(.i32_load8_u, .{ .offset = operand.offset() + offset, .alignment = 1 }); } - } else if (payload_ty.isSlice()) { + } else if (payload_ty.isSlice(mod)) { switch (func.arch()) { .wasm32 => try func.addMemArg(.i32_load, .{ .offset = operand.offset(), .alignment = 4 }), .wasm64 => try func.addMemArg(.i64_load, .{ .offset = operand.offset(), .alignment = 8 }), @@ -4051,18 +4257,19 @@ fn isNull(func: *CodeGen, operand: WValue, optional_ty: Type, opcode: wasm.Opcod } fn airOptionalPayload(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; - const opt_ty = func.air.typeOf(ty_op.operand); - const payload_ty = func.air.typeOfIndex(inst); - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { + const opt_ty = func.typeOf(ty_op.operand); + const payload_ty = func.typeOfIndex(inst); + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { return func.finishAir(inst, .none, &.{ty_op.operand}); } const result = result: { const operand = try func.resolveInst(ty_op.operand); - if (opt_ty.optionalReprIsPayload()) break :result func.reuseOperand(ty_op.operand, operand); + if (opt_ty.optionalReprIsPayload(mod)) break :result func.reuseOperand(ty_op.operand, operand); - if (isByRef(payload_ty, func.target)) { + if (isByRef(payload_ty, mod)) { break :result try func.buildPointerOffset(operand, 0, .new); } @@ -4073,14 +4280,14 @@ fn airOptionalPayload(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airOptionalPayloadPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); - const opt_ty = func.air.typeOf(ty_op.operand).childType(); + const opt_ty = func.typeOf(ty_op.operand).childType(mod); const result = result: { - var buf: Type.Payload.ElemType = undefined; - const payload_ty = opt_ty.optionalChild(&buf); - if (!payload_ty.hasRuntimeBitsIgnoreComptime() or opt_ty.optionalReprIsPayload()) { + const payload_ty = opt_ty.optionalChild(mod); + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod) or opt_ty.optionalReprIsPayload(mod)) { break :result func.reuseOperand(ty_op.operand, operand); } @@ -4090,22 +4297,21 @@ fn airOptionalPayloadPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airOptionalPayloadPtrSet(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); - const opt_ty = func.air.typeOf(ty_op.operand).childType(); - var buf: Type.Payload.ElemType = undefined; - const payload_ty = opt_ty.optionalChild(&buf); - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { + const opt_ty = func.typeOf(ty_op.operand).childType(mod); + const payload_ty = opt_ty.optionalChild(mod); + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { return func.fail("TODO: Implement OptionalPayloadPtrSet for optional with zero-sized type {}", .{payload_ty.fmtDebug()}); } - if (opt_ty.optionalReprIsPayload()) { + if (opt_ty.optionalReprIsPayload(mod)) { return func.finishAir(inst, operand, &.{ty_op.operand}); } - const offset = std.math.cast(u32, payload_ty.abiSize(func.target)) orelse { - const module = func.bin_file.base.options.module.?; - return func.fail("Optional type {} too big to fit into stack frame", .{opt_ty.fmt(module)}); + const offset = std.math.cast(u32, payload_ty.abiSize(mod)) orelse { + return func.fail("Optional type {} too big to fit into stack frame", .{opt_ty.fmt(mod)}); }; try func.emitWValue(operand); @@ -4118,11 +4324,12 @@ fn airOptionalPayloadPtrSet(func: *CodeGen, inst: Air.Inst.Index) InnerError!voi fn airWrapOptional(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const ty_op = func.air.instructions.items(.data)[inst].ty_op; - const payload_ty = func.air.typeOf(ty_op.operand); + const payload_ty = func.typeOf(ty_op.operand); + const mod = func.bin_file.base.options.module.?; const result = result: { - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { - const non_null_bit = try func.allocStack(Type.initTag(.u1)); + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { + const non_null_bit = try func.allocStack(Type.u1); try func.emitWValue(non_null_bit); try func.addImm32(1); try func.addMemArg(.i32_store8, .{ .offset = non_null_bit.offset(), .alignment = 1 }); @@ -4130,13 +4337,12 @@ fn airWrapOptional(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } const operand = try func.resolveInst(ty_op.operand); - const op_ty = func.air.typeOfIndex(inst); - if (op_ty.optionalReprIsPayload()) { + const op_ty = func.typeOfIndex(inst); + if (op_ty.optionalReprIsPayload(mod)) { break :result func.reuseOperand(ty_op.operand, operand); } - const offset = std.math.cast(u32, payload_ty.abiSize(func.target)) orelse { - const module = func.bin_file.base.options.module.?; - return func.fail("Optional type {} too big to fit into stack frame", .{op_ty.fmt(module)}); + const offset = std.math.cast(u32, payload_ty.abiSize(mod)) orelse { + return func.fail("Optional type {} too big to fit into stack frame", .{op_ty.fmt(mod)}); }; // Create optional type, set the non-null bit, and store the operand inside the optional type @@ -4159,7 +4365,7 @@ fn airSlice(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const lhs = try func.resolveInst(bin_op.lhs); const rhs = try func.resolveInst(bin_op.rhs); - const slice_ty = func.air.typeOfIndex(inst); + const slice_ty = func.typeOfIndex(inst); const slice = try func.allocStack(slice_ty); try func.store(slice, lhs, Type.usize, 0); @@ -4176,13 +4382,14 @@ fn airSliceLen(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airSliceElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const bin_op = func.air.instructions.items(.data)[inst].bin_op; - const slice_ty = func.air.typeOf(bin_op.lhs); + const slice_ty = func.typeOf(bin_op.lhs); const slice = try func.resolveInst(bin_op.lhs); const index = try func.resolveInst(bin_op.rhs); - const elem_ty = slice_ty.childType(); - const elem_size = elem_ty.abiSize(func.target); + const elem_ty = slice_ty.childType(mod); + const elem_size = elem_ty.abiSize(mod); // load pointer onto stack _ = try func.load(slice, Type.usize, 0); @@ -4196,7 +4403,7 @@ fn airSliceElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const result_ptr = try func.allocLocal(Type.usize); try func.addLabel(.local_set, result_ptr.local.value); - const result = if (!isByRef(elem_ty, func.target)) result: { + const result = if (!isByRef(elem_ty, mod)) result: { const elem_val = try func.load(result_ptr, elem_ty, 0); break :result try elem_val.toLocal(func, elem_ty); } else result_ptr; @@ -4205,11 +4412,12 @@ fn airSliceElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airSliceElemPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const bin_op = func.air.extraData(Air.Bin, ty_pl.payload).data; - const elem_ty = func.air.getRefType(ty_pl.ty).childType(); - const elem_size = elem_ty.abiSize(func.target); + const elem_ty = func.air.getRefType(ty_pl.ty).childType(mod); + const elem_size = elem_ty.abiSize(mod); const slice = try func.resolveInst(bin_op.lhs); const index = try func.resolveInst(bin_op.rhs); @@ -4248,7 +4456,7 @@ fn airTrunc(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const operand = try func.resolveInst(ty_op.operand); const wanted_ty = func.air.getRefType(ty_op.ty); - const op_ty = func.air.typeOf(ty_op.operand); + const op_ty = func.typeOf(ty_op.operand); const result = try func.trunc(operand, wanted_ty, op_ty); func.finishAir(inst, try result.toLocal(func, wanted_ty), &.{ty_op.operand}); @@ -4257,13 +4465,14 @@ fn airTrunc(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { /// Truncates a given operand to a given type, discarding any overflown bits. /// NOTE: Resulting value is left on the stack. fn trunc(func: *CodeGen, operand: WValue, wanted_ty: Type, given_ty: Type) InnerError!WValue { - const given_bits = @intCast(u16, given_ty.bitSize(func.target)); + const mod = func.bin_file.base.options.module.?; + const given_bits = @intCast(u16, given_ty.bitSize(mod)); if (toWasmBits(given_bits) == null) { return func.fail("TODO: Implement wasm integer truncation for integer bitsize: {d}", .{given_bits}); } var result = try func.intcast(operand, given_ty, wanted_ty); - const wanted_bits = @intCast(u16, wanted_ty.bitSize(func.target)); + const wanted_bits = @intCast(u16, wanted_ty.bitSize(mod)); const wasm_bits = toWasmBits(wanted_bits).?; if (wasm_bits != wanted_bits) { result = try func.wrapOperand(result, wanted_ty); @@ -4280,32 +4489,34 @@ fn airBoolToInt(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airArrayToSlice(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); - const array_ty = func.air.typeOf(ty_op.operand).childType(); + const array_ty = func.typeOf(ty_op.operand).childType(mod); const slice_ty = func.air.getRefType(ty_op.ty); // create a slice on the stack const slice_local = try func.allocStack(slice_ty); // store the array ptr in the slice - if (array_ty.hasRuntimeBitsIgnoreComptime()) { + if (array_ty.hasRuntimeBitsIgnoreComptime(mod)) { try func.store(slice_local, operand, Type.usize, 0); } // store the length of the array in the slice - const len = WValue{ .imm32 = @intCast(u32, array_ty.arrayLen()) }; + const len = WValue{ .imm32 = @intCast(u32, array_ty.arrayLen(mod)) }; try func.store(slice_local, len, Type.usize, func.ptrSize()); func.finishAir(inst, slice_local, &.{ty_op.operand}); } fn airPtrToInt(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const un_op = func.air.instructions.items(.data)[inst].un_op; const operand = try func.resolveInst(un_op); - const ptr_ty = func.air.typeOf(un_op); - const result = if (ptr_ty.isSlice()) + const ptr_ty = func.typeOf(un_op); + const result = if (ptr_ty.isSlice(mod)) try func.slicePtr(operand) else switch (operand) { // for stack offset, return a pointer to this offset. @@ -4316,16 +4527,17 @@ fn airPtrToInt(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airPtrElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const bin_op = func.air.instructions.items(.data)[inst].bin_op; - const ptr_ty = func.air.typeOf(bin_op.lhs); + const ptr_ty = func.typeOf(bin_op.lhs); const ptr = try func.resolveInst(bin_op.lhs); const index = try func.resolveInst(bin_op.rhs); - const elem_ty = ptr_ty.childType(); - const elem_size = elem_ty.abiSize(func.target); + const elem_ty = ptr_ty.childType(mod); + const elem_size = elem_ty.abiSize(mod); // load pointer onto the stack - if (ptr_ty.isSlice()) { + if (ptr_ty.isSlice(mod)) { _ = try func.load(ptr, Type.usize, 0); } else { try func.lowerToStack(ptr); @@ -4338,9 +4550,9 @@ fn airPtrElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.addTag(.i32_add); const elem_result = val: { - var result = try func.allocLocal(elem_ty); + var result = try func.allocLocal(Type.usize); try func.addLabel(.local_set, result.local.value); - if (isByRef(elem_ty, func.target)) { + if (isByRef(elem_ty, mod)) { break :val result; } defer result.free(func); // only free if it's not returned like above @@ -4352,18 +4564,19 @@ fn airPtrElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airPtrElemPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const bin_op = func.air.extraData(Air.Bin, ty_pl.payload).data; - const ptr_ty = func.air.typeOf(bin_op.lhs); - const elem_ty = func.air.getRefType(ty_pl.ty).childType(); - const elem_size = elem_ty.abiSize(func.target); + const ptr_ty = func.typeOf(bin_op.lhs); + const elem_ty = func.air.getRefType(ty_pl.ty).childType(mod); + const elem_size = elem_ty.abiSize(mod); const ptr = try func.resolveInst(bin_op.lhs); const index = try func.resolveInst(bin_op.rhs); // load pointer onto the stack - if (ptr_ty.isSlice()) { + if (ptr_ty.isSlice(mod)) { _ = try func.load(ptr, Type.usize, 0); } else { try func.lowerToStack(ptr); @@ -4381,24 +4594,25 @@ fn airPtrElemPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airPtrBinOp(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const bin_op = func.air.extraData(Air.Bin, ty_pl.payload).data; const ptr = try func.resolveInst(bin_op.lhs); const offset = try func.resolveInst(bin_op.rhs); - const ptr_ty = func.air.typeOf(bin_op.lhs); - const pointee_ty = switch (ptr_ty.ptrSize()) { - .One => ptr_ty.childType().childType(), // ptr to array, so get array element type - else => ptr_ty.childType(), + const ptr_ty = func.typeOf(bin_op.lhs); + const pointee_ty = switch (ptr_ty.ptrSize(mod)) { + .One => ptr_ty.childType(mod).childType(mod), // ptr to array, so get array element type + else => ptr_ty.childType(mod), }; - const valtype = typeToValtype(Type.usize, func.target); + const valtype = typeToValtype(Type.usize, mod); const mul_opcode = buildOpcode(.{ .valtype1 = valtype, .op = .mul }); const bin_opcode = buildOpcode(.{ .valtype1 = valtype, .op = op }); try func.lowerToStack(ptr); try func.emitWValue(offset); - try func.addImm32(@bitCast(i32, @intCast(u32, pointee_ty.abiSize(func.target)))); + try func.addImm32(@bitCast(i32, @intCast(u32, pointee_ty.abiSize(mod)))); try func.addTag(Mir.Inst.Tag.fromOpcode(mul_opcode)); try func.addTag(Mir.Inst.Tag.fromOpcode(bin_opcode)); @@ -4408,6 +4622,7 @@ fn airPtrBinOp(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void { } fn airMemset(func: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void { + const mod = func.bin_file.base.options.module.?; if (safety) { // TODO if the value is undef, write 0xaa bytes to dest } else { @@ -4416,14 +4631,21 @@ fn airMemset(func: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void const bin_op = func.air.instructions.items(.data)[inst].bin_op; const ptr = try func.resolveInst(bin_op.lhs); - const ptr_ty = func.air.typeOf(bin_op.lhs); + const ptr_ty = func.typeOf(bin_op.lhs); const value = try func.resolveInst(bin_op.rhs); - const len = switch (ptr_ty.ptrSize()) { + const len = switch (ptr_ty.ptrSize(mod)) { .Slice => try func.sliceLen(ptr), - .One => @as(WValue, .{ .imm32 = @intCast(u32, ptr_ty.childType().arrayLen()) }), + .One => @as(WValue, .{ .imm32 = @intCast(u32, ptr_ty.childType(mod).arrayLen(mod)) }), .C, .Many => unreachable, }; - try func.memset(ptr, len, value); + + const elem_ty = if (ptr_ty.ptrSize(mod) == .One) + ptr_ty.childType(mod).childType(mod) + else + ptr_ty.childType(mod); + + const dst_ptr = try func.sliceOrArrayPtr(ptr, ptr_ty); + try func.memset(elem_ty, dst_ptr, len, value); func.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); } @@ -4432,10 +4654,13 @@ fn airMemset(func: *CodeGen, inst: Air.Inst.Index, safety: bool) InnerError!void /// When the user has enabled the bulk_memory feature, we lower /// this to wasm's memset instruction. When the feature is not present, /// we implement it manually. -fn memset(func: *CodeGen, ptr: WValue, len: WValue, value: WValue) InnerError!void { +fn memset(func: *CodeGen, elem_ty: Type, ptr: WValue, len: WValue, value: WValue) InnerError!void { + const mod = func.bin_file.base.options.module.?; + const abi_size = @intCast(u32, elem_ty.abiSize(mod)); + // When bulk_memory is enabled, we lower it to wasm's memset instruction. - // If not, we lower it ourselves - if (std.Target.wasm.featureSetHas(func.target.cpu.features, .bulk_memory)) { + // If not, we lower it ourselves. + if (std.Target.wasm.featureSetHas(func.target.cpu.features, .bulk_memory) and abi_size == 1) { try func.lowerToStack(ptr); try func.emitWValue(value); try func.emitWValue(len); @@ -4443,101 +4668,107 @@ fn memset(func: *CodeGen, ptr: WValue, len: WValue, value: WValue) InnerError!vo return; } - // When the length is comptime-known we do the loop at codegen, rather - // than emitting a runtime loop into the binary - switch (len) { - .imm32, .imm64 => { - const length = switch (len) { - .imm32 => |val| val, - .imm64 => |val| val, - else => unreachable, - }; - - var offset: u32 = 0; - const base = ptr.offset(); - while (offset < length) : (offset += 1) { - try func.emitWValue(ptr); - try func.emitWValue(value); - switch (func.arch()) { - .wasm32 => { - try func.addMemArg(.i32_store8, .{ .offset = base + offset, .alignment = 1 }); - }, - .wasm64 => { - try func.addMemArg(.i64_store8, .{ .offset = base + offset, .alignment = 1 }); - }, - else => unreachable, - } - } - }, - else => { - // TODO: We should probably lower this to a call to compiler_rt - // But for now, we implement it manually - const offset = try func.ensureAllocLocal(Type.usize); // local for counter - // outer block to jump to when loop is done - try func.startBlock(.block, wasm.block_empty); - try func.startBlock(.loop, wasm.block_empty); - try func.emitWValue(offset); + const final_len = switch (len) { + .imm32 => |val| WValue{ .imm32 = val * abi_size }, + .imm64 => |val| WValue{ .imm64 = val * abi_size }, + else => if (abi_size != 1) blk: { + const new_len = try func.ensureAllocLocal(Type.usize); try func.emitWValue(len); switch (func.arch()) { - .wasm32 => try func.addTag(.i32_eq), - .wasm64 => try func.addTag(.i64_eq), - else => unreachable, - } - try func.addLabel(.br_if, 1); // jump out of loop into outer block (finished) - try func.emitWValue(ptr); - try func.emitWValue(offset); - switch (func.arch()) { - .wasm32 => try func.addTag(.i32_add), - .wasm64 => try func.addTag(.i64_add), - else => unreachable, - } - try func.emitWValue(value); - const mem_store_op: Mir.Inst.Tag = switch (func.arch()) { - .wasm32 => .i32_store8, - .wasm64 => .i64_store8, - else => unreachable, - }; - try func.addMemArg(mem_store_op, .{ .offset = ptr.offset(), .alignment = 1 }); - try func.emitWValue(offset); - try func.addImm32(1); - switch (func.arch()) { - .wasm32 => try func.addTag(.i32_add), - .wasm64 => try func.addTag(.i64_add), + .wasm32 => { + try func.emitWValue(.{ .imm32 = abi_size }); + try func.addTag(.i32_mul); + }, + .wasm64 => { + try func.emitWValue(.{ .imm64 = abi_size }); + try func.addTag(.i64_mul); + }, else => unreachable, } - try func.addLabel(.local_set, offset.local.value); - try func.addLabel(.br, 0); // jump to start of loop - try func.endBlock(); - try func.endBlock(); + try func.addLabel(.local_set, new_len.local.value); + break :blk new_len; + } else len, + }; + + var end_ptr = try func.allocLocal(Type.usize); + defer end_ptr.free(func); + var new_ptr = try func.buildPointerOffset(ptr, 0, .new); + defer new_ptr.free(func); + + // get the loop conditional: if current pointer address equals final pointer's address + try func.lowerToStack(ptr); + try func.emitWValue(final_len); + switch (func.arch()) { + .wasm32 => try func.addTag(.i32_add), + .wasm64 => try func.addTag(.i64_add), + else => unreachable, + } + try func.addLabel(.local_set, end_ptr.local.value); + + // outer block to jump to when loop is done + try func.startBlock(.block, wasm.block_empty); + try func.startBlock(.loop, wasm.block_empty); + + // check for codition for loop end + try func.emitWValue(new_ptr); + try func.emitWValue(end_ptr); + switch (func.arch()) { + .wasm32 => try func.addTag(.i32_eq), + .wasm64 => try func.addTag(.i64_eq), + else => unreachable, + } + try func.addLabel(.br_if, 1); // jump out of loop into outer block (finished) + + // store the value at the current position of the pointer + try func.store(new_ptr, value, elem_ty, 0); + + // move the pointer to the next element + try func.emitWValue(new_ptr); + switch (func.arch()) { + .wasm32 => { + try func.emitWValue(.{ .imm32 = abi_size }); + try func.addTag(.i32_add); + }, + .wasm64 => { + try func.emitWValue(.{ .imm64 = abi_size }); + try func.addTag(.i64_add); }, + else => unreachable, } + try func.addLabel(.local_set, new_ptr.local.value); + + // end of loop + try func.addLabel(.br, 0); // jump to start of loop + try func.endBlock(); + try func.endBlock(); } fn airArrayElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const bin_op = func.air.instructions.items(.data)[inst].bin_op; - const array_ty = func.air.typeOf(bin_op.lhs); + const array_ty = func.typeOf(bin_op.lhs); const array = try func.resolveInst(bin_op.lhs); const index = try func.resolveInst(bin_op.rhs); - const elem_ty = array_ty.childType(); - const elem_size = elem_ty.abiSize(func.target); + const elem_ty = array_ty.childType(mod); + const elem_size = elem_ty.abiSize(mod); - if (isByRef(array_ty, func.target)) { + if (isByRef(array_ty, mod)) { try func.lowerToStack(array); try func.emitWValue(index); try func.addImm32(@bitCast(i32, @intCast(u32, elem_size))); try func.addTag(.i32_mul); try func.addTag(.i32_add); } else { - std.debug.assert(array_ty.zigTypeTag() == .Vector); + std.debug.assert(array_ty.zigTypeTag(mod) == .Vector); switch (index) { inline .imm32, .imm64 => |lane| { - const opcode: wasm.SimdOpcode = switch (elem_ty.bitSize(func.target)) { - 8 => if (elem_ty.isSignedInt()) .i8x16_extract_lane_s else .i8x16_extract_lane_u, - 16 => if (elem_ty.isSignedInt()) .i16x8_extract_lane_s else .i16x8_extract_lane_u, - 32 => if (elem_ty.isInt()) .i32x4_extract_lane else .f32x4_extract_lane, - 64 => if (elem_ty.isInt()) .i64x2_extract_lane else .f64x2_extract_lane, + const opcode: wasm.SimdOpcode = switch (elem_ty.bitSize(mod)) { + 8 => if (elem_ty.isSignedInt(mod)) .i8x16_extract_lane_s else .i8x16_extract_lane_u, + 16 => if (elem_ty.isSignedInt(mod)) .i16x8_extract_lane_s else .i16x8_extract_lane_u, + 32 => if (elem_ty.isInt(mod)) .i32x4_extract_lane else .f32x4_extract_lane, + 64 => if (elem_ty.isInt(mod)) .i64x2_extract_lane else .f64x2_extract_lane, else => unreachable, }; @@ -4569,7 +4800,7 @@ fn airArrayElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { var result = try func.allocLocal(Type.usize); try func.addLabel(.local_set, result.local.value); - if (isByRef(elem_ty, func.target)) { + if (isByRef(elem_ty, mod)) { break :val result; } defer result.free(func); // only free if no longer needed and not returned like above @@ -4582,22 +4813,23 @@ fn airArrayElemVal(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airFloatToInt(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); - const dest_ty = func.air.typeOfIndex(inst); - const op_ty = func.air.typeOf(ty_op.operand); + const dest_ty = func.typeOfIndex(inst); + const op_ty = func.typeOf(ty_op.operand); - if (op_ty.abiSize(func.target) > 8) { + if (op_ty.abiSize(mod) > 8) { return func.fail("TODO: floatToInt for integers/floats with bitsize larger than 64 bits", .{}); } try func.emitWValue(operand); const op = buildOpcode(.{ .op = .trunc, - .valtype1 = typeToValtype(dest_ty, func.target), - .valtype2 = typeToValtype(op_ty, func.target), - .signedness = if (dest_ty.isSignedInt()) .signed else .unsigned, + .valtype1 = typeToValtype(dest_ty, mod), + .valtype2 = typeToValtype(op_ty, mod), + .signedness = if (dest_ty.isSignedInt(mod)) .signed else .unsigned, }); try func.addTag(Mir.Inst.Tag.fromOpcode(op)); const wrapped = try func.wrapOperand(.{ .stack = {} }, dest_ty); @@ -4606,22 +4838,23 @@ fn airFloatToInt(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airIntToFloat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); - const dest_ty = func.air.typeOfIndex(inst); - const op_ty = func.air.typeOf(ty_op.operand); + const dest_ty = func.typeOfIndex(inst); + const op_ty = func.typeOf(ty_op.operand); - if (op_ty.abiSize(func.target) > 8) { + if (op_ty.abiSize(mod) > 8) { return func.fail("TODO: intToFloat for integers/floats with bitsize larger than 64 bits", .{}); } try func.emitWValue(operand); const op = buildOpcode(.{ .op = .convert, - .valtype1 = typeToValtype(dest_ty, func.target), - .valtype2 = typeToValtype(op_ty, func.target), - .signedness = if (op_ty.isSignedInt()) .signed else .unsigned, + .valtype1 = typeToValtype(dest_ty, mod), + .valtype2 = typeToValtype(op_ty, mod), + .signedness = if (op_ty.isSignedInt(mod)) .signed else .unsigned, }); try func.addTag(Mir.Inst.Tag.fromOpcode(op)); @@ -4631,18 +4864,19 @@ fn airIntToFloat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airSplat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); - const ty = func.air.typeOfIndex(inst); - const elem_ty = ty.childType(); + const ty = func.typeOfIndex(inst); + const elem_ty = ty.childType(mod); - if (determineSimdStoreStrategy(ty, func.target) == .direct) blk: { + if (determineSimdStoreStrategy(ty, mod) == .direct) blk: { switch (operand) { // when the operand lives in the linear memory section, we can directly // load and splat the value at once. Meaning we do not first have to load // the scalar value onto the stack. .stack_offset, .memory, .memory_offset => { - const opcode = switch (elem_ty.bitSize(func.target)) { + const opcode = switch (elem_ty.bitSize(mod)) { 8 => std.wasm.simdOpcode(.v128_load8_splat), 16 => std.wasm.simdOpcode(.v128_load16_splat), 32 => std.wasm.simdOpcode(.v128_load32_splat), @@ -4657,18 +4891,18 @@ fn airSplat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.mir_extra.appendSlice(func.gpa, &[_]u32{ opcode, operand.offset(), - elem_ty.abiAlignment(func.target), + elem_ty.abiAlignment(mod), }); try func.addInst(.{ .tag = .simd_prefix, .data = .{ .payload = extra_index } }); try func.addLabel(.local_set, result.local.value); return func.finishAir(inst, result, &.{ty_op.operand}); }, .local => { - const opcode = switch (elem_ty.bitSize(func.target)) { + const opcode = switch (elem_ty.bitSize(mod)) { 8 => std.wasm.simdOpcode(.i8x16_splat), 16 => std.wasm.simdOpcode(.i16x8_splat), - 32 => if (elem_ty.isInt()) std.wasm.simdOpcode(.i32x4_splat) else std.wasm.simdOpcode(.f32x4_splat), - 64 => if (elem_ty.isInt()) std.wasm.simdOpcode(.i64x2_splat) else std.wasm.simdOpcode(.f64x2_splat), + 32 => if (elem_ty.isInt(mod)) std.wasm.simdOpcode(.i32x4_splat) else std.wasm.simdOpcode(.f32x4_splat), + 64 => if (elem_ty.isInt(mod)) std.wasm.simdOpcode(.i64x2_splat) else std.wasm.simdOpcode(.f64x2_splat), else => break :blk, // Cannot make use of simd-instructions }; const result = try func.allocLocal(ty); @@ -4682,14 +4916,14 @@ fn airSplat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { else => unreachable, } } - const elem_size = elem_ty.bitSize(func.target); - const vector_len = @intCast(usize, ty.vectorLen()); + const elem_size = elem_ty.bitSize(mod); + const vector_len = @intCast(usize, ty.vectorLen(mod)); if ((!std.math.isPowerOfTwo(elem_size) or elem_size % 8 != 0) and vector_len > 1) { return func.fail("TODO: WebAssembly `@splat` for arbitrary element bitsize {d}", .{elem_size}); } const result = try func.allocStack(ty); - const elem_byte_size = @intCast(u32, elem_ty.abiSize(func.target)); + const elem_byte_size = @intCast(u32, elem_ty.abiSize(mod)); var index: usize = 0; var offset: u32 = 0; while (index < vector_len) : (index += 1) { @@ -4709,26 +4943,25 @@ fn airSelect(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airShuffle(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { - const inst_ty = func.air.typeOfIndex(inst); + const mod = func.bin_file.base.options.module.?; + const inst_ty = func.typeOfIndex(inst); const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const extra = func.air.extraData(Air.Shuffle, ty_pl.payload).data; const a = try func.resolveInst(extra.a); const b = try func.resolveInst(extra.b); - const mask = func.air.values[extra.mask]; + const mask = extra.mask.toValue(); const mask_len = extra.mask_len; - const child_ty = inst_ty.childType(); - const elem_size = child_ty.abiSize(func.target); + const child_ty = inst_ty.childType(mod); + const elem_size = child_ty.abiSize(mod); - const module = func.bin_file.base.options.module.?; // TODO: One of them could be by ref; handle in loop - if (isByRef(func.air.typeOf(extra.a), func.target) or isByRef(inst_ty, func.target)) { + if (isByRef(func.typeOf(extra.a), mod) or isByRef(inst_ty, mod)) { const result = try func.allocStack(inst_ty); for (0..mask_len) |index| { - var buf: Value.ElemValueBuffer = undefined; - const value = mask.elemValueBuffer(module, index, &buf).toSignedInt(func.target); + const value = (try mask.elemValue(mod, index)).toSignedInt(mod); try func.emitWValue(result); @@ -4748,8 +4981,7 @@ fn airShuffle(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { var lanes = std.mem.asBytes(operands[1..]); for (0..@intCast(usize, mask_len)) |index| { - var buf: Value.ElemValueBuffer = undefined; - const mask_elem = mask.elemValueBuffer(module, index, &buf).toSignedInt(func.target); + const mask_elem = (try mask.elemValue(mod, index)).toSignedInt(mod); const base_index = if (mask_elem >= 0) @intCast(u8, @intCast(i64, elem_size) * mask_elem) else @@ -4780,22 +5012,26 @@ fn airReduce(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airAggregateInit(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; - const result_ty = func.air.typeOfIndex(inst); - const len = @intCast(usize, result_ty.arrayLen()); + const result_ty = func.typeOfIndex(inst); + const len = @intCast(usize, result_ty.arrayLen(mod)); const elements = @ptrCast([]const Air.Inst.Ref, func.air.extra[ty_pl.payload..][0..len]); const result: WValue = result_value: { - switch (result_ty.zigTypeTag()) { + switch (result_ty.zigTypeTag(mod)) { .Array => { const result = try func.allocStack(result_ty); - const elem_ty = result_ty.childType(); - const elem_size = @intCast(u32, elem_ty.abiSize(func.target)); + const elem_ty = result_ty.childType(mod); + const elem_size = @intCast(u32, elem_ty.abiSize(mod)); + const sentinel = if (result_ty.sentinel(mod)) |sent| blk: { + break :blk try func.lowerConstant(sent, elem_ty); + } else null; // When the element type is by reference, we must copy the entire // value. It is therefore safer to move the offset pointer and store // each value individually, instead of using store offsets. - if (isByRef(elem_ty, func.target)) { + if (isByRef(elem_ty, mod)) { // copy stack pointer into a temporary local, which is // moved for each element to store each value in the right position. const offset = try func.buildPointerOffset(result, 0, .new); @@ -4803,10 +5039,13 @@ fn airAggregateInit(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const elem_val = try func.resolveInst(elem); try func.store(offset, elem_val, elem_ty, 0); - if (elem_index < elements.len - 1) { + if (elem_index < elements.len - 1 and sentinel == null) { _ = try func.buildPointerOffset(offset, elem_size, .modify); } } + if (sentinel) |sent| { + try func.store(offset, sent, elem_ty, 0); + } } else { var offset: u32 = 0; for (elements) |elem| { @@ -4814,36 +5053,42 @@ fn airAggregateInit(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.store(result, elem_val, elem_ty, offset); offset += elem_size; } + if (sentinel) |sent| { + try func.store(result, sent, elem_ty, offset); + } } break :result_value result; }, - .Struct => switch (result_ty.containerLayout()) { + .Struct => switch (result_ty.containerLayout(mod)) { .Packed => { - if (isByRef(result_ty, func.target)) { + if (isByRef(result_ty, mod)) { return func.fail("TODO: airAggregateInit for packed structs larger than 64 bits", .{}); } - const struct_obj = result_ty.castTag(.@"struct").?.data; + const struct_obj = mod.typeToStruct(result_ty).?; const fields = struct_obj.fields.values(); const backing_type = struct_obj.backing_int_ty; - // we ensure a new local is created so it's zero-initialized - const result = try func.ensureAllocLocal(backing_type); + + // ensure the result is zero'd + const result = try func.allocLocal(backing_type); + if (struct_obj.backing_int_ty.bitSize(mod) <= 32) + try func.addImm32(0) + else + try func.addImm64(0); + try func.addLabel(.local_set, result.local.value); + var current_bit: u16 = 0; for (elements, 0..) |elem, elem_index| { const field = fields[elem_index]; - if (!field.ty.hasRuntimeBitsIgnoreComptime()) continue; + if (!field.ty.hasRuntimeBitsIgnoreComptime(mod)) continue; - const shift_val = if (struct_obj.backing_int_ty.bitSize(func.target) <= 32) + const shift_val = if (struct_obj.backing_int_ty.bitSize(mod) <= 32) WValue{ .imm32 = current_bit } else WValue{ .imm64 = current_bit }; const value = try func.resolveInst(elem); - const value_bit_size = @intCast(u16, field.ty.bitSize(func.target)); - var int_ty_payload: Type.Payload.Bits = .{ - .base = .{ .tag = .int_unsigned }, - .data = value_bit_size, - }; - const int_ty = Type.initPayload(&int_ty_payload.base); + const value_bit_size = @intCast(u16, field.ty.bitSize(mod)); + const int_ty = try mod.intType(.unsigned, value_bit_size); // load our current result on stack so we can perform all transformations // using only stack values. Saving the cost of loads and stores. @@ -4865,10 +5110,10 @@ fn airAggregateInit(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const result = try func.allocStack(result_ty); const offset = try func.buildPointerOffset(result, 0, .new); // pointer to offset for (elements, 0..) |elem, elem_index| { - if (result_ty.structFieldValueComptime(elem_index) != null) continue; + if ((try result_ty.structFieldValueComptime(mod, elem_index)) != null) continue; - const elem_ty = result_ty.structFieldType(elem_index); - const elem_size = @intCast(u32, elem_ty.abiSize(func.target)); + const elem_ty = result_ty.structFieldType(elem_index, mod); + const elem_size = @intCast(u32, elem_ty.abiSize(mod)); const value = try func.resolveInst(elem); try func.store(offset, value, elem_ty, 0); @@ -4884,42 +5129,88 @@ fn airAggregateInit(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { else => unreachable, } }; - // TODO: this is incorrect Liveness handling code - func.finishAir(inst, result, &.{}); + + if (elements.len <= Liveness.bpi - 1) { + var buf = [1]Air.Inst.Ref{.none} ** (Liveness.bpi - 1); + @memcpy(buf[0..elements.len], elements); + return func.finishAir(inst, result, &buf); + } + var bt = try func.iterateBigTomb(inst, elements.len); + for (elements) |arg| bt.feed(arg); + return bt.finishAir(result); } fn airUnionInit(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const extra = func.air.extraData(Air.UnionInit, ty_pl.payload).data; const result = result: { - const union_ty = func.air.typeOfIndex(inst); - const layout = union_ty.unionGetLayout(func.target); + const union_ty = func.typeOfIndex(inst); + const layout = union_ty.unionGetLayout(mod); + const union_obj = mod.typeToUnion(union_ty).?; + const field = union_obj.fields.values()[extra.field_index]; + const field_name = union_obj.fields.keys()[extra.field_index]; + + const tag_int = blk: { + const tag_ty = union_ty.unionTagTypeHypothetical(mod); + const enum_field_index = tag_ty.enumFieldIndex(field_name, mod).?; + const tag_val = try mod.enumValueFieldIndex(tag_ty, enum_field_index); + break :blk try func.lowerConstant(tag_val, tag_ty); + }; if (layout.payload_size == 0) { if (layout.tag_size == 0) { break :result WValue{ .none = {} }; } - assert(!isByRef(union_ty, func.target)); - break :result WValue{ .imm32 = extra.field_index }; + assert(!isByRef(union_ty, mod)); + break :result tag_int; } - assert(isByRef(union_ty, func.target)); - const result_ptr = try func.allocStack(union_ty); - const payload = try func.resolveInst(extra.init); - const union_obj = union_ty.cast(Type.Payload.Union).?.data; - assert(union_obj.haveFieldTypes()); - const field = union_obj.fields.values()[extra.field_index]; + if (isByRef(union_ty, mod)) { + const result_ptr = try func.allocStack(union_ty); + const payload = try func.resolveInst(extra.init); + if (layout.tag_align >= layout.payload_align) { + if (isByRef(field.ty, mod)) { + const payload_ptr = try func.buildPointerOffset(result_ptr, layout.tag_size, .new); + try func.store(payload_ptr, payload, field.ty, 0); + } else { + try func.store(result_ptr, payload, field.ty, @intCast(u32, layout.tag_size)); + } - if (layout.tag_align >= layout.payload_align) { - const payload_ptr = try func.buildPointerOffset(result_ptr, layout.tag_size, .new); - try func.store(payload_ptr, payload, field.ty, 0); + if (layout.tag_size > 0) { + try func.store(result_ptr, tag_int, union_obj.tag_ty, 0); + } + } else { + try func.store(result_ptr, payload, field.ty, 0); + if (layout.tag_size > 0) { + try func.store( + result_ptr, + tag_int, + union_obj.tag_ty, + @intCast(u32, layout.payload_size), + ); + } + } + break :result result_ptr; } else { - try func.store(result_ptr, payload, field.ty, 0); + const operand = try func.resolveInst(extra.init); + const union_int_type = try mod.intType(.unsigned, @intCast(u16, union_ty.bitSize(mod))); + if (field.ty.zigTypeTag(mod) == .Float) { + const int_type = try mod.intType(.unsigned, @intCast(u16, field.ty.bitSize(mod))); + const bitcasted = try func.bitcast(field.ty, int_type, operand); + const casted = try func.trunc(bitcasted, int_type, union_int_type); + break :result try casted.toLocal(func, field.ty); + } else if (field.ty.isPtrAtRuntime(mod)) { + const int_type = try mod.intType(.unsigned, @intCast(u16, field.ty.bitSize(mod))); + const casted = try func.intcast(operand, int_type, union_int_type); + break :result try casted.toLocal(func, field.ty); + } + const casted = try func.intcast(operand, field.ty, union_int_type); + break :result try casted.toLocal(func, field.ty); } - break :result result_ptr; }; - func.finishAir(inst, result, &.{extra.init}); + return func.finishAir(inst, result, &.{extra.init}); } fn airPrefetch(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { @@ -4930,7 +5221,7 @@ fn airPrefetch(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { fn airWasmMemorySize(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const pl_op = func.air.instructions.items(.data)[inst].pl_op; - const result = try func.allocLocal(func.air.typeOfIndex(inst)); + const result = try func.allocLocal(func.typeOfIndex(inst)); try func.addLabel(.memory_size, pl_op.payload); try func.addLabel(.local_set, result.local.value); func.finishAir(inst, result, &.{pl_op.operand}); @@ -4940,7 +5231,7 @@ fn airWasmMemoryGrow(func: *CodeGen, inst: Air.Inst.Index) !void { const pl_op = func.air.instructions.items(.data)[inst].pl_op; const operand = try func.resolveInst(pl_op.operand); - const result = try func.allocLocal(func.air.typeOfIndex(inst)); + const result = try func.allocLocal(func.typeOfIndex(inst)); try func.emitWValue(operand); try func.addLabel(.memory_grow, pl_op.payload); try func.addLabel(.local_set, result.local.value); @@ -4948,14 +5239,14 @@ fn airWasmMemoryGrow(func: *CodeGen, inst: Air.Inst.Index) !void { } fn cmpOptionals(func: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std.math.CompareOperator) InnerError!WValue { - assert(operand_ty.hasRuntimeBitsIgnoreComptime()); + const mod = func.bin_file.base.options.module.?; + assert(operand_ty.hasRuntimeBitsIgnoreComptime(mod)); assert(op == .eq or op == .neq); - var buf: Type.Payload.ElemType = undefined; - const payload_ty = operand_ty.optionalChild(&buf); + const payload_ty = operand_ty.optionalChild(mod); // We store the final result in here that will be validated // if the optional is truly equal. - var result = try func.ensureAllocLocal(Type.initTag(.i32)); + var result = try func.ensureAllocLocal(Type.i32); defer result.free(func); try func.startBlock(.block, wasm.block_empty); @@ -4966,7 +5257,7 @@ fn cmpOptionals(func: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: _ = try func.load(lhs, payload_ty, 0); _ = try func.load(rhs, payload_ty, 0); - const opcode = buildOpcode(.{ .op = .ne, .valtype1 = typeToValtype(payload_ty, func.target) }); + const opcode = buildOpcode(.{ .op = .ne, .valtype1 = typeToValtype(payload_ty, mod) }); try func.addTag(Mir.Inst.Tag.fromOpcode(opcode)); try func.addLabel(.br_if, 0); @@ -4984,10 +5275,11 @@ fn cmpOptionals(func: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: /// NOTE: Leaves the result of the comparison on top of the stack. /// TODO: Lower this to compiler_rt call when bitsize > 128 fn cmpBigInt(func: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std.math.CompareOperator) InnerError!WValue { - assert(operand_ty.abiSize(func.target) >= 16); + const mod = func.bin_file.base.options.module.?; + assert(operand_ty.abiSize(mod) >= 16); assert(!(lhs != .stack and rhs == .stack)); - if (operand_ty.intInfo(func.target).bits > 128) { - return func.fail("TODO: Support cmpBigInt for integer bitsize: '{d}'", .{operand_ty.intInfo(func.target).bits}); + if (operand_ty.bitSize(mod) > 128) { + return func.fail("TODO: Support cmpBigInt for integer bitsize: '{d}'", .{operand_ty.bitSize(mod)}); } var lhs_high_bit = try (try func.load(lhs, Type.u64, 0)).toLocal(func, Type.u64); @@ -5010,7 +5302,7 @@ fn cmpBigInt(func: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std } }, else => { - const ty = if (operand_ty.isSignedInt()) Type.i64 else Type.u64; + const ty = if (operand_ty.isSignedInt(mod)) Type.i64 else Type.u64; // leave those value on top of the stack for '.select' const lhs_low_bit = try func.load(lhs, Type.u64, 8); const rhs_low_bit = try func.load(rhs, Type.u64, 8); @@ -5025,10 +5317,11 @@ fn cmpBigInt(func: *CodeGen, lhs: WValue, rhs: WValue, operand_ty: Type, op: std } fn airSetUnionTag(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const bin_op = func.air.instructions.items(.data)[inst].bin_op; - const un_ty = func.air.typeOf(bin_op.lhs).childType(); - const tag_ty = func.air.typeOf(bin_op.rhs); - const layout = un_ty.unionGetLayout(func.target); + const un_ty = func.typeOf(bin_op.lhs).childType(mod); + const tag_ty = func.typeOf(bin_op.rhs); + const layout = un_ty.unionGetLayout(mod); if (layout.tag_size == 0) return func.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); const union_ptr = try func.resolveInst(bin_op.lhs); @@ -5048,11 +5341,12 @@ fn airSetUnionTag(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airGetUnionTag(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; - const un_ty = func.air.typeOf(ty_op.operand); - const tag_ty = func.air.typeOfIndex(inst); - const layout = un_ty.unionGetLayout(func.target); + const un_ty = func.typeOf(ty_op.operand); + const tag_ty = func.typeOfIndex(inst); + const layout = un_ty.unionGetLayout(mod); if (layout.tag_size == 0) return func.finishAir(inst, .none, &.{ty_op.operand}); const operand = try func.resolveInst(ty_op.operand); @@ -5069,9 +5363,9 @@ fn airGetUnionTag(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { fn airFpext(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const ty_op = func.air.instructions.items(.data)[inst].ty_op; - const dest_ty = func.air.typeOfIndex(inst); + const dest_ty = func.typeOfIndex(inst); const operand = try func.resolveInst(ty_op.operand); - const extended = try func.fpext(operand, func.air.typeOf(ty_op.operand), dest_ty); + const extended = try func.fpext(operand, func.typeOf(ty_op.operand), dest_ty); const result = try extended.toLocal(func, dest_ty); func.finishAir(inst, result, &.{ty_op.operand}); } @@ -5090,7 +5384,7 @@ fn fpext(func: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError! // call __extendhfsf2(f16) f32 const f32_result = try func.callIntrinsic( "__extendhfsf2", - &.{Type.f16}, + &.{.f16_type}, Type.f32, &.{operand}, ); @@ -5108,15 +5402,15 @@ fn fpext(func: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerError! target_util.compilerRtFloatAbbrev(wanted_bits), }) catch unreachable; - return func.callIntrinsic(fn_name, &.{given}, wanted, &.{operand}); + return func.callIntrinsic(fn_name, &.{given.ip_index}, wanted, &.{operand}); } fn airFptrunc(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const ty_op = func.air.instructions.items(.data)[inst].ty_op; - const dest_ty = func.air.typeOfIndex(inst); + const dest_ty = func.typeOfIndex(inst); const operand = try func.resolveInst(ty_op.operand); - const truncated = try func.fptrunc(operand, func.air.typeOf(ty_op.operand), dest_ty); + const truncated = try func.fptrunc(operand, func.typeOf(ty_op.operand), dest_ty); const result = try truncated.toLocal(func, dest_ty); func.finishAir(inst, result, &.{ty_op.operand}); } @@ -5139,7 +5433,7 @@ fn fptrunc(func: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerErro } else operand; // call __truncsfhf2(f32) f16 - return func.callIntrinsic("__truncsfhf2", &.{Type.f32}, Type.f16, &.{op}); + return func.callIntrinsic("__truncsfhf2", &.{.f32_type}, Type.f16, &.{op}); } var fn_name_buf: [12]u8 = undefined; @@ -5148,14 +5442,15 @@ fn fptrunc(func: *CodeGen, operand: WValue, given: Type, wanted: Type) InnerErro target_util.compilerRtFloatAbbrev(wanted_bits), }) catch unreachable; - return func.callIntrinsic(fn_name, &.{given}, wanted, &.{operand}); + return func.callIntrinsic(fn_name, &.{given.ip_index}, wanted, &.{operand}); } fn airErrUnionPayloadPtrSet(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; - const err_set_ty = func.air.typeOf(ty_op.operand).childType(); - const payload_ty = err_set_ty.errorUnionPayload(); + const err_set_ty = func.typeOf(ty_op.operand).childType(mod); + const payload_ty = err_set_ty.errorUnionPayload(mod); const operand = try func.resolveInst(ty_op.operand); // set error-tag to '0' to annotate error union is non-error @@ -5163,26 +5458,27 @@ fn airErrUnionPayloadPtrSet(func: *CodeGen, inst: Air.Inst.Index) InnerError!voi operand, .{ .imm32 = 0 }, Type.anyerror, - @intCast(u32, errUnionErrorOffset(payload_ty, func.target)), + @intCast(u32, errUnionErrorOffset(payload_ty, mod)), ); const result = result: { - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { break :result func.reuseOperand(ty_op.operand, operand); } - break :result try func.buildPointerOffset(operand, @intCast(u32, errUnionPayloadOffset(payload_ty, func.target)), .new); + break :result try func.buildPointerOffset(operand, @intCast(u32, errUnionPayloadOffset(payload_ty, mod)), .new); }; func.finishAir(inst, result, &.{ty_op.operand}); } fn airFieldParentPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const extra = func.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; const field_ptr = try func.resolveInst(extra.field_ptr); - const parent_ty = func.air.getRefType(ty_pl.ty).childType(); - const field_offset = parent_ty.structFieldOffset(extra.field_index, func.target); + const parent_ty = func.air.getRefType(ty_pl.ty).childType(mod); + const field_offset = parent_ty.structFieldOffset(extra.field_index, mod); const result = if (field_offset != 0) result: { const base = try func.buildPointerOffset(field_ptr, 0, .new); @@ -5197,7 +5493,8 @@ fn airFieldParentPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn sliceOrArrayPtr(func: *CodeGen, ptr: WValue, ptr_ty: Type) InnerError!WValue { - if (ptr_ty.isSlice()) { + const mod = func.bin_file.base.options.module.?; + if (ptr_ty.isSlice(mod)) { return func.slicePtr(ptr); } else { return ptr; @@ -5205,14 +5502,27 @@ fn sliceOrArrayPtr(func: *CodeGen, ptr: WValue, ptr_ty: Type) InnerError!WValue } fn airMemcpy(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const bin_op = func.air.instructions.items(.data)[inst].bin_op; const dst = try func.resolveInst(bin_op.lhs); - const dst_ty = func.air.typeOf(bin_op.lhs); + const dst_ty = func.typeOf(bin_op.lhs); + const ptr_elem_ty = dst_ty.childType(mod); const src = try func.resolveInst(bin_op.rhs); - const src_ty = func.air.typeOf(bin_op.rhs); - const len = switch (dst_ty.ptrSize()) { - .Slice => try func.sliceLen(dst), - .One => @as(WValue, .{ .imm64 = dst_ty.childType().arrayLen() }), + const src_ty = func.typeOf(bin_op.rhs); + const len = switch (dst_ty.ptrSize(mod)) { + .Slice => blk: { + const slice_len = try func.sliceLen(dst); + if (ptr_elem_ty.abiSize(mod) != 1) { + try func.emitWValue(slice_len); + try func.emitWValue(.{ .imm32 = @intCast(u32, ptr_elem_ty.abiSize(mod)) }); + try func.addTag(.i32_mul); + try func.addLabel(.local_set, slice_len.local.value); + } + break :blk slice_len; + }, + .One => @as(WValue, .{ + .imm32 = @intCast(u32, ptr_elem_ty.arrayLen(mod) * ptr_elem_ty.childType(mod).abiSize(mod)), + }), .C, .Many => unreachable, }; const dst_ptr = try func.sliceOrArrayPtr(dst, dst_ty); @@ -5232,17 +5542,18 @@ fn airRetAddr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airPopcount(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); - const op_ty = func.air.typeOf(ty_op.operand); - const result_ty = func.air.typeOfIndex(inst); + const op_ty = func.typeOf(ty_op.operand); + const result_ty = func.typeOfIndex(inst); - if (op_ty.zigTypeTag() == .Vector) { + if (op_ty.zigTypeTag(mod) == .Vector) { return func.fail("TODO: Implement @popCount for vectors", .{}); } - const int_info = op_ty.intInfo(func.target); + const int_info = op_ty.intInfo(mod); const bits = int_info.bits; const wasm_bits = toWasmBits(bits) orelse { return func.fail("TODO: Implement @popCount for integers with bitsize '{d}'", .{bits}); @@ -5291,8 +5602,9 @@ fn airErrorName(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { // As the names are global and the slice elements are constant, we do not have // to make a copy of the ptr+value but can point towards them directly. const error_table_symbol = try func.bin_file.getErrorTableSymbol(); - const name_ty = Type.initTag(.const_slice_u8_sentinel_0); - const abi_size = name_ty.abiSize(func.target); + const name_ty = Type.slice_const_u8_sentinel_0; + const mod = func.bin_file.base.options.module.?; + const abi_size = name_ty.abiSize(mod); const error_name_value: WValue = .{ .memory = error_table_symbol }; // emitting this will create a relocation try func.emitWValue(error_name_value); @@ -5330,20 +5642,21 @@ fn airAddSubWithOverflow(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerErro const lhs_op = try func.resolveInst(extra.lhs); const rhs_op = try func.resolveInst(extra.rhs); - const lhs_ty = func.air.typeOf(extra.lhs); + const lhs_ty = func.typeOf(extra.lhs); + const mod = func.bin_file.base.options.module.?; - if (lhs_ty.zigTypeTag() == .Vector) { + if (lhs_ty.zigTypeTag(mod) == .Vector) { return func.fail("TODO: Implement overflow arithmetic for vectors", .{}); } - const int_info = lhs_ty.intInfo(func.target); + const int_info = lhs_ty.intInfo(mod); const is_signed = int_info.signedness == .signed; const wasm_bits = toWasmBits(int_info.bits) orelse { return func.fail("TODO: Implement {{add/sub}}_with_overflow for integer bitsize: {d}", .{int_info.bits}); }; if (wasm_bits == 128) { - const result = try func.addSubWithOverflowBigInt(lhs_op, rhs_op, lhs_ty, func.air.typeOfIndex(inst), op); + const result = try func.addSubWithOverflowBigInt(lhs_op, rhs_op, lhs_ty, func.typeOfIndex(inst), op); return func.finishAir(inst, result, &.{ extra.lhs, extra.rhs }); } @@ -5372,11 +5685,10 @@ fn airAddSubWithOverflow(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerErro }; var bin_op = try (try func.binOp(lhs, rhs, lhs_ty, op)).toLocal(func, lhs_ty); - defer bin_op.free(func); var result = if (wasm_bits != int_info.bits) blk: { break :blk try (try func.wrapOperand(bin_op, lhs_ty)).toLocal(func, lhs_ty); } else bin_op; - defer result.free(func); // no-op when wasm_bits == int_info.bits + defer result.free(func); const cmp_op: std.math.CompareOperator = if (op == .sub) .gt else .lt; const overflow_bit: WValue = if (is_signed) blk: { @@ -5394,17 +5706,18 @@ fn airAddSubWithOverflow(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerErro var overflow_local = try overflow_bit.toLocal(func, Type.u32); defer overflow_local.free(func); - const result_ptr = try func.allocStack(func.air.typeOfIndex(inst)); + const result_ptr = try func.allocStack(func.typeOfIndex(inst)); try func.store(result_ptr, result, lhs_ty, 0); - const offset = @intCast(u32, lhs_ty.abiSize(func.target)); - try func.store(result_ptr, overflow_local, Type.initTag(.u1), offset); + const offset = @intCast(u32, lhs_ty.abiSize(mod)); + try func.store(result_ptr, overflow_local, Type.u1, offset); func.finishAir(inst, result_ptr, &.{ extra.lhs, extra.rhs }); } fn addSubWithOverflowBigInt(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, result_ty: Type, op: Op) InnerError!WValue { + const mod = func.bin_file.base.options.module.?; assert(op == .add or op == .sub); - const int_info = ty.intInfo(func.target); + const int_info = ty.intInfo(mod); const is_signed = int_info.signedness == .signed; if (int_info.bits != 128) { return func.fail("TODO: Implement @{{add/sub}}WithOverflow for integer bitsize '{d}'", .{int_info.bits}); @@ -5455,36 +5768,46 @@ fn addSubWithOverflowBigInt(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type, break :blk WValue{ .stack = {} }; }; - var overflow_local = try overflow_bit.toLocal(func, Type.initTag(.u1)); + var overflow_local = try overflow_bit.toLocal(func, Type.u1); defer overflow_local.free(func); const result_ptr = try func.allocStack(result_ty); try func.store(result_ptr, high_op_res, Type.u64, 0); try func.store(result_ptr, tmp_op, Type.u64, 8); - try func.store(result_ptr, overflow_local, Type.initTag(.u1), 16); + try func.store(result_ptr, overflow_local, Type.u1, 16); return result_ptr; } fn airShlWithOverflow(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const extra = func.air.extraData(Air.Bin, ty_pl.payload).data; const lhs = try func.resolveInst(extra.lhs); const rhs = try func.resolveInst(extra.rhs); - const lhs_ty = func.air.typeOf(extra.lhs); + const lhs_ty = func.typeOf(extra.lhs); + const rhs_ty = func.typeOf(extra.rhs); - if (lhs_ty.zigTypeTag() == .Vector) { + if (lhs_ty.zigTypeTag(mod) == .Vector) { return func.fail("TODO: Implement overflow arithmetic for vectors", .{}); } - const int_info = lhs_ty.intInfo(func.target); + const int_info = lhs_ty.intInfo(mod); const is_signed = int_info.signedness == .signed; const wasm_bits = toWasmBits(int_info.bits) orelse { return func.fail("TODO: Implement shl_with_overflow for integer bitsize: {d}", .{int_info.bits}); }; - var shl = try (try func.binOp(lhs, rhs, lhs_ty, .shl)).toLocal(func, lhs_ty); + // Ensure rhs is coerced to lhs as they must have the same WebAssembly types + // before we can perform any binary operation. + const rhs_wasm_bits = toWasmBits(rhs_ty.intInfo(mod).bits).?; + const rhs_final = if (wasm_bits != rhs_wasm_bits) blk: { + const rhs_casted = try func.intcast(rhs, rhs_ty, lhs_ty); + break :blk try rhs_casted.toLocal(func, lhs_ty); + } else rhs; + + var shl = try (try func.binOp(lhs, rhs_final, lhs_ty, .shl)).toLocal(func, lhs_ty); defer shl.free(func); var result = if (wasm_bits != int_info.bits) blk: { break :blk try (try func.wrapOperand(shl, lhs_ty)).toLocal(func, lhs_ty); @@ -5495,20 +5818,20 @@ fn airShlWithOverflow(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { // emit lhs to stack to we can keep 'wrapped' on the stack also try func.emitWValue(lhs); const abs = try func.signAbsValue(shl, lhs_ty); - const wrapped = try func.wrapBinOp(abs, rhs, lhs_ty, .shr); + const wrapped = try func.wrapBinOp(abs, rhs_final, lhs_ty, .shr); break :blk try func.cmp(.{ .stack = {} }, wrapped, lhs_ty, .neq); } else blk: { try func.emitWValue(lhs); - const shr = try func.binOp(result, rhs, lhs_ty, .shr); + const shr = try func.binOp(result, rhs_final, lhs_ty, .shr); break :blk try func.cmp(.{ .stack = {} }, shr, lhs_ty, .neq); }; - var overflow_local = try overflow_bit.toLocal(func, Type.initTag(.u1)); + var overflow_local = try overflow_bit.toLocal(func, Type.u1); defer overflow_local.free(func); - const result_ptr = try func.allocStack(func.air.typeOfIndex(inst)); + const result_ptr = try func.allocStack(func.typeOfIndex(inst)); try func.store(result_ptr, result, lhs_ty, 0); - const offset = @intCast(u32, lhs_ty.abiSize(func.target)); - try func.store(result_ptr, overflow_local, Type.initTag(.u1), offset); + const offset = @intCast(u32, lhs_ty.abiSize(mod)); + try func.store(result_ptr, overflow_local, Type.u1, offset); func.finishAir(inst, result_ptr, &.{ extra.lhs, extra.rhs }); } @@ -5519,29 +5842,26 @@ fn airMulWithOverflow(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const lhs = try func.resolveInst(extra.lhs); const rhs = try func.resolveInst(extra.rhs); - const lhs_ty = func.air.typeOf(extra.lhs); + const lhs_ty = func.typeOf(extra.lhs); + const mod = func.bin_file.base.options.module.?; - if (lhs_ty.zigTypeTag() == .Vector) { + if (lhs_ty.zigTypeTag(mod) == .Vector) { return func.fail("TODO: Implement overflow arithmetic for vectors", .{}); } // We store the bit if it's overflowed or not in this. As it's zero-initialized // we only need to update it if an overflow (or underflow) occurred. - var overflow_bit = try func.ensureAllocLocal(Type.initTag(.u1)); + var overflow_bit = try func.ensureAllocLocal(Type.u1); defer overflow_bit.free(func); - const int_info = lhs_ty.intInfo(func.target); + const int_info = lhs_ty.intInfo(mod); const wasm_bits = toWasmBits(int_info.bits) orelse { - return func.fail("TODO: Implement overflow arithmetic for integer bitsize: {d}", .{int_info.bits}); - }; - - if (wasm_bits > 32) { return func.fail("TODO: Implement `@mulWithOverflow` for integer bitsize: {d}", .{int_info.bits}); - } + }; const zero = switch (wasm_bits) { 32 => WValue{ .imm32 = 0 }, - 64 => WValue{ .imm64 = 0 }, + 64, 128 => WValue{ .imm64 = 0 }, else => unreachable, }; @@ -5568,7 +5888,7 @@ fn airMulWithOverflow(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.addLabel(.local_set, overflow_bit.local.value); break :blk down_cast; } - } else if (int_info.signedness == .signed) blk: { + } else if (int_info.signedness == .signed and wasm_bits == 32) blk: { const lhs_abs = try func.signAbsValue(lhs, lhs_ty); const rhs_abs = try func.signAbsValue(rhs, lhs_ty); const bin_op = try (try func.binOp(lhs_abs, rhs_abs, lhs_ty, .mul)).toLocal(func, lhs_ty); @@ -5576,7 +5896,7 @@ fn airMulWithOverflow(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { _ = try func.cmp(mul_abs, bin_op, lhs_ty, .neq); try func.addLabel(.local_set, overflow_bit.local.value); break :blk try func.wrapOperand(bin_op, lhs_ty); - } else blk: { + } else if (wasm_bits == 32) blk: { var bin_op = try (try func.binOp(lhs, rhs, lhs_ty, .mul)).toLocal(func, lhs_ty); defer bin_op.free(func); const shift_imm = if (wasm_bits == 32) @@ -5587,27 +5907,120 @@ fn airMulWithOverflow(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { _ = try func.cmp(shr, zero, lhs_ty, .neq); try func.addLabel(.local_set, overflow_bit.local.value); break :blk try func.wrapOperand(bin_op, lhs_ty); - }; + } else if (int_info.bits == 64 and int_info.signedness == .unsigned) blk: { + const new_ty = Type.u128; + var lhs_upcast = try (try func.intcast(lhs, lhs_ty, new_ty)).toLocal(func, lhs_ty); + defer lhs_upcast.free(func); + var rhs_upcast = try (try func.intcast(rhs, lhs_ty, new_ty)).toLocal(func, lhs_ty); + defer rhs_upcast.free(func); + const bin_op = try func.binOp(lhs_upcast, rhs_upcast, new_ty, .mul); + const lsb = try func.load(bin_op, lhs_ty, 8); + _ = try func.cmp(lsb, zero, lhs_ty, .neq); + try func.addLabel(.local_set, overflow_bit.local.value); + + break :blk try func.load(bin_op, lhs_ty, 0); + } else if (int_info.bits == 64 and int_info.signedness == .signed) blk: { + const shift_val: WValue = .{ .imm64 = 63 }; + var lhs_shifted = try (try func.binOp(lhs, shift_val, lhs_ty, .shr)).toLocal(func, lhs_ty); + defer lhs_shifted.free(func); + var rhs_shifted = try (try func.binOp(rhs, shift_val, lhs_ty, .shr)).toLocal(func, lhs_ty); + defer rhs_shifted.free(func); + + const bin_op = try func.callIntrinsic( + "__multi3", + &[_]InternPool.Index{.i64_type} ** 4, + Type.i128, + &.{ lhs, lhs_shifted, rhs, rhs_shifted }, + ); + const res = try func.allocLocal(lhs_ty); + const msb = try func.load(bin_op, lhs_ty, 0); + try func.addLabel(.local_tee, res.local.value); + const msb_shifted = try func.binOp(msb, shift_val, lhs_ty, .shr); + const lsb = try func.load(bin_op, lhs_ty, 8); + _ = try func.cmp(lsb, msb_shifted, lhs_ty, .neq); + try func.addLabel(.local_set, overflow_bit.local.value); + break :blk res; + } else if (int_info.bits == 128 and int_info.signedness == .unsigned) blk: { + var lhs_msb = try (try func.load(lhs, Type.u64, 0)).toLocal(func, Type.u64); + defer lhs_msb.free(func); + var lhs_lsb = try (try func.load(lhs, Type.u64, 8)).toLocal(func, Type.u64); + defer lhs_lsb.free(func); + var rhs_msb = try (try func.load(rhs, Type.u64, 0)).toLocal(func, Type.u64); + defer rhs_msb.free(func); + var rhs_lsb = try (try func.load(rhs, Type.u64, 8)).toLocal(func, Type.u64); + defer rhs_lsb.free(func); + + const mul1 = try func.callIntrinsic( + "__multi3", + &[_]InternPool.Index{.i64_type} ** 4, + Type.i128, + &.{ lhs_lsb, zero, rhs_msb, zero }, + ); + const mul2 = try func.callIntrinsic( + "__multi3", + &[_]InternPool.Index{.i64_type} ** 4, + Type.i128, + &.{ rhs_lsb, zero, lhs_msb, zero }, + ); + const mul3 = try func.callIntrinsic( + "__multi3", + &[_]InternPool.Index{.i64_type} ** 4, + Type.i128, + &.{ lhs_msb, zero, rhs_msb, zero }, + ); + + const rhs_lsb_not_zero = try func.cmp(rhs_lsb, zero, Type.u64, .neq); + const lhs_lsb_not_zero = try func.cmp(lhs_lsb, zero, Type.u64, .neq); + const lsb_and = try func.binOp(rhs_lsb_not_zero, lhs_lsb_not_zero, Type.bool, .@"and"); + const mul1_lsb = try func.load(mul1, Type.u64, 8); + const mul1_lsb_not_zero = try func.cmp(mul1_lsb, zero, Type.u64, .neq); + const lsb_or1 = try func.binOp(lsb_and, mul1_lsb_not_zero, Type.bool, .@"or"); + const mul2_lsb = try func.load(mul2, Type.u64, 8); + const mul2_lsb_not_zero = try func.cmp(mul2_lsb, zero, Type.u64, .neq); + const lsb_or = try func.binOp(lsb_or1, mul2_lsb_not_zero, Type.bool, .@"or"); + + const mul1_msb = try func.load(mul1, Type.u64, 0); + const mul2_msb = try func.load(mul2, Type.u64, 0); + const mul_add1 = try func.binOp(mul1_msb, mul2_msb, Type.u64, .add); + + var mul3_lsb = try (try func.load(mul3, Type.u64, 8)).toLocal(func, Type.u64); + defer mul3_lsb.free(func); + var mul_add2 = try (try func.binOp(mul_add1, mul3_lsb, Type.u64, .add)).toLocal(func, Type.u64); + defer mul_add2.free(func); + const mul_add_lt = try func.cmp(mul_add2, mul3_lsb, Type.u64, .lt); + + // result for overflow bit + _ = try func.binOp(lsb_or, mul_add_lt, Type.bool, .@"or"); + try func.addLabel(.local_set, overflow_bit.local.value); + + const tmp_result = try func.allocStack(Type.u128); + try func.emitWValue(tmp_result); + const mul3_msb = try func.load(mul3, Type.u64, 0); + try func.store(.stack, mul3_msb, Type.u64, tmp_result.offset()); + try func.store(tmp_result, mul_add2, Type.u64, 8); + break :blk tmp_result; + } else return func.fail("TODO: @mulWithOverflow for integers between 32 and 64 bits", .{}); var bin_op_local = try bin_op.toLocal(func, lhs_ty); defer bin_op_local.free(func); - const result_ptr = try func.allocStack(func.air.typeOfIndex(inst)); + const result_ptr = try func.allocStack(func.typeOfIndex(inst)); try func.store(result_ptr, bin_op_local, lhs_ty, 0); - const offset = @intCast(u32, lhs_ty.abiSize(func.target)); - try func.store(result_ptr, overflow_bit, Type.initTag(.u1), offset); + const offset = @intCast(u32, lhs_ty.abiSize(mod)); + try func.store(result_ptr, overflow_bit, Type.u1, offset); func.finishAir(inst, result_ptr, &.{ extra.lhs, extra.rhs }); } fn airMaxMin(func: *CodeGen, inst: Air.Inst.Index, op: enum { max, min }) InnerError!void { + const mod = func.bin_file.base.options.module.?; const bin_op = func.air.instructions.items(.data)[inst].bin_op; - const ty = func.air.typeOfIndex(inst); - if (ty.zigTypeTag() == .Vector) { + const ty = func.typeOfIndex(inst); + if (ty.zigTypeTag(mod) == .Vector) { return func.fail("TODO: `@maximum` and `@minimum` for vectors", .{}); } - if (ty.abiSize(func.target) > 16) { + if (ty.abiSize(mod) > 16) { return func.fail("TODO: `@maximum` and `@minimum` for types larger than 16 bytes", .{}); } @@ -5623,18 +6036,19 @@ fn airMaxMin(func: *CodeGen, inst: Air.Inst.Index, op: enum { max, min }) InnerE try func.addTag(.select); // store result in local - const result_ty = if (isByRef(ty, func.target)) Type.u32 else ty; + const result_ty = if (isByRef(ty, mod)) Type.u32 else ty; const result = try func.allocLocal(result_ty); try func.addLabel(.local_set, result.local.value); func.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); } fn airMulAdd(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const pl_op = func.air.instructions.items(.data)[inst].pl_op; const bin_op = func.air.extraData(Air.Bin, pl_op.payload).data; - const ty = func.air.typeOfIndex(inst); - if (ty.zigTypeTag() == .Vector) { + const ty = func.typeOfIndex(inst); + if (ty.zigTypeTag(mod) == .Vector) { return func.fail("TODO: `@mulAdd` for vectors", .{}); } @@ -5649,7 +6063,7 @@ fn airMulAdd(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { // call to compiler-rt `fn fmaf(f32, f32, f32) f32` var result = try func.callIntrinsic( "fmaf", - &.{ Type.f32, Type.f32, Type.f32 }, + &.{ .f32_type, .f32_type, .f32_type }, Type.f32, &.{ rhs_ext, lhs_ext, addend_ext }, ); @@ -5663,16 +6077,17 @@ fn airMulAdd(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airClz(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; - const ty = func.air.typeOf(ty_op.operand); - const result_ty = func.air.typeOfIndex(inst); - if (ty.zigTypeTag() == .Vector) { + const ty = func.typeOf(ty_op.operand); + const result_ty = func.typeOfIndex(inst); + if (ty.zigTypeTag(mod) == .Vector) { return func.fail("TODO: `@clz` for vectors", .{}); } const operand = try func.resolveInst(ty_op.operand); - const int_info = ty.intInfo(func.target); + const int_info = ty.intInfo(mod); const wasm_bits = toWasmBits(int_info.bits) orelse { return func.fail("TODO: `@clz` for integers with bitsize '{d}'", .{int_info.bits}); }; @@ -5715,17 +6130,18 @@ fn airClz(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airCtz(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; - const ty = func.air.typeOf(ty_op.operand); - const result_ty = func.air.typeOfIndex(inst); + const ty = func.typeOf(ty_op.operand); + const result_ty = func.typeOfIndex(inst); - if (ty.zigTypeTag() == .Vector) { + if (ty.zigTypeTag(mod) == .Vector) { return func.fail("TODO: `@ctz` for vectors", .{}); } const operand = try func.resolveInst(ty_op.operand); - const int_info = ty.intInfo(func.target); + const int_info = ty.intInfo(mod); const wasm_bits = toWasmBits(int_info.bits) orelse { return func.fail("TODO: `@clz` for integers with bitsize '{d}'", .{int_info.bits}); }; @@ -5782,7 +6198,7 @@ fn airDbgVar(func: *CodeGen, inst: Air.Inst.Index, is_ptr: bool) !void { if (func.debug_output != .dwarf) return func.finishAir(inst, .none, &.{}); const pl_op = func.air.instructions.items(.data)[inst].pl_op; - const ty = func.air.typeOf(pl_op.operand); + const ty = func.typeOf(pl_op.operand); const operand = try func.resolveInst(pl_op.operand); log.debug("airDbgVar: %{d}: {}, {}", .{ inst, ty.fmtDebug(), operand }); @@ -5820,50 +6236,61 @@ fn airTry(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const err_union = try func.resolveInst(pl_op.operand); const extra = func.air.extraData(Air.Try, pl_op.payload); const body = func.air.extra[extra.end..][0..extra.data.body_len]; - const err_union_ty = func.air.typeOf(pl_op.operand); - const result = try lowerTry(func, err_union, body, err_union_ty, false); + const err_union_ty = func.typeOf(pl_op.operand); + const result = try lowerTry(func, inst, err_union, body, err_union_ty, false); func.finishAir(inst, result, &.{pl_op.operand}); } fn airTryPtr(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const extra = func.air.extraData(Air.TryPtr, ty_pl.payload); const err_union_ptr = try func.resolveInst(extra.data.ptr); const body = func.air.extra[extra.end..][0..extra.data.body_len]; - const err_union_ty = func.air.typeOf(extra.data.ptr).childType(); - const result = try lowerTry(func, err_union_ptr, body, err_union_ty, true); + const err_union_ty = func.typeOf(extra.data.ptr).childType(mod); + const result = try lowerTry(func, inst, err_union_ptr, body, err_union_ty, true); func.finishAir(inst, result, &.{extra.data.ptr}); } fn lowerTry( func: *CodeGen, + inst: Air.Inst.Index, err_union: WValue, body: []const Air.Inst.Index, err_union_ty: Type, operand_is_ptr: bool, ) InnerError!WValue { + const mod = func.bin_file.base.options.module.?; if (operand_is_ptr) { return func.fail("TODO: lowerTry for pointers", .{}); } - const pl_ty = err_union_ty.errorUnionPayload(); - const pl_has_bits = pl_ty.hasRuntimeBitsIgnoreComptime(); + const pl_ty = err_union_ty.errorUnionPayload(mod); + const pl_has_bits = pl_ty.hasRuntimeBitsIgnoreComptime(mod); - if (!err_union_ty.errorUnionSet().errorSetIsEmpty()) { + if (!err_union_ty.errorUnionSet(mod).errorSetIsEmpty(mod)) { // Block we can jump out of when error is not set try func.startBlock(.block, wasm.block_empty); // check if the error tag is set for the error union. try func.emitWValue(err_union); if (pl_has_bits) { - const err_offset = @intCast(u32, errUnionErrorOffset(pl_ty, func.target)); + const err_offset = @intCast(u32, errUnionErrorOffset(pl_ty, mod)); try func.addMemArg(.i32_load16_u, .{ .offset = err_union.offset() + err_offset, - .alignment = Type.anyerror.abiAlignment(func.target), + .alignment = Type.anyerror.abiAlignment(mod), }); } try func.addTag(.i32_eqz); try func.addLabel(.br_if, 0); // jump out of block when error is '0' + + const liveness = func.liveness.getCondBr(inst); + try func.branches.append(func.gpa, .{}); + try func.currentBranch().values.ensureUnusedCapacity(func.gpa, liveness.else_deaths.len + liveness.then_deaths.len); + defer { + var branch = func.branches.pop(); + branch.deinit(func.gpa); + } try func.genBody(body); try func.endBlock(); } @@ -5873,8 +6300,8 @@ fn lowerTry( return WValue{ .none = {} }; } - const pl_offset = @intCast(u32, errUnionPayloadOffset(pl_ty, func.target)); - if (isByRef(pl_ty, func.target)) { + const pl_offset = @intCast(u32, errUnionPayloadOffset(pl_ty, mod)); + if (isByRef(pl_ty, mod)) { return buildPointerOffset(func, err_union, pl_offset, .new); } const payload = try func.load(err_union, pl_ty, pl_offset); @@ -5882,15 +6309,16 @@ fn lowerTry( } fn airByteSwap(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; - const ty = func.air.typeOfIndex(inst); + const ty = func.typeOfIndex(inst); const operand = try func.resolveInst(ty_op.operand); - if (ty.zigTypeTag() == .Vector) { + if (ty.zigTypeTag(mod) == .Vector) { return func.fail("TODO: @byteSwap for vectors", .{}); } - const int_info = ty.intInfo(func.target); + const int_info = ty.intInfo(mod); // bytes are no-op if (int_info.bits == 8) { @@ -5952,31 +6380,54 @@ fn airByteSwap(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airDiv(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const bin_op = func.air.instructions.items(.data)[inst].bin_op; - const ty = func.air.typeOfIndex(inst); + const ty = func.typeOfIndex(inst); const lhs = try func.resolveInst(bin_op.lhs); const rhs = try func.resolveInst(bin_op.rhs); - const result = if (ty.isSignedInt()) + const result = if (ty.isSignedInt(mod)) try func.divSigned(lhs, rhs, ty) else try (try func.binOp(lhs, rhs, ty, .div)).toLocal(func, ty); func.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); } +fn airDivTrunc(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; + const bin_op = func.air.instructions.items(.data)[inst].bin_op; + + const ty = func.typeOfIndex(inst); + const lhs = try func.resolveInst(bin_op.lhs); + const rhs = try func.resolveInst(bin_op.rhs); + + const div_result = if (ty.isSignedInt(mod)) + try func.divSigned(lhs, rhs, ty) + else + try (try func.binOp(lhs, rhs, ty, .div)).toLocal(func, ty); + + if (ty.isAnyFloat()) { + const trunc_result = try (try func.floatOp(.trunc, ty, &.{div_result})).toLocal(func, ty); + return func.finishAir(inst, trunc_result, &.{ bin_op.lhs, bin_op.rhs }); + } + + return func.finishAir(inst, div_result, &.{ bin_op.lhs, bin_op.rhs }); +} + fn airDivFloor(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const bin_op = func.air.instructions.items(.data)[inst].bin_op; - const ty = func.air.typeOfIndex(inst); + const mod = func.bin_file.base.options.module.?; + const ty = func.typeOfIndex(inst); const lhs = try func.resolveInst(bin_op.lhs); const rhs = try func.resolveInst(bin_op.rhs); - if (ty.isUnsignedInt()) { + if (ty.isUnsignedInt(mod)) { const result = try (try func.binOp(lhs, rhs, ty, .div)).toLocal(func, ty); return func.finishAir(inst, result, &.{ bin_op.lhs, bin_op.rhs }); - } else if (ty.isSignedInt()) { - const int_bits = ty.intInfo(func.target).bits; + } else if (ty.isSignedInt(mod)) { + const int_bits = ty.intInfo(mod).bits; const wasm_bits = toWasmBits(int_bits) orelse { return func.fail("TODO: `@divFloor` for signed integers larger than '{d}' bits", .{int_bits}); }; @@ -6054,7 +6505,8 @@ fn airDivFloor(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn divSigned(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type) InnerError!WValue { - const int_bits = ty.intInfo(func.target).bits; + const mod = func.bin_file.base.options.module.?; + const int_bits = ty.intInfo(mod).bits; const wasm_bits = toWasmBits(int_bits) orelse { return func.fail("TODO: Implement signed division for integers with bitsize '{d}'", .{int_bits}); }; @@ -6081,7 +6533,8 @@ fn divSigned(func: *CodeGen, lhs: WValue, rhs: WValue, ty: Type) InnerError!WVal /// Retrieves the absolute value of a signed integer /// NOTE: Leaves the result value on the stack. fn signAbsValue(func: *CodeGen, operand: WValue, ty: Type) InnerError!WValue { - const int_bits = ty.intInfo(func.target).bits; + const mod = func.bin_file.base.options.module.?; + const int_bits = ty.intInfo(mod).bits; const wasm_bits = toWasmBits(int_bits) orelse { return func.fail("TODO: signAbsValue for signed integers larger than '{d}' bits", .{int_bits}); }; @@ -6116,11 +6569,12 @@ fn airSatBinOp(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void { assert(op == .add or op == .sub); const bin_op = func.air.instructions.items(.data)[inst].bin_op; - const ty = func.air.typeOfIndex(inst); + const mod = func.bin_file.base.options.module.?; + const ty = func.typeOfIndex(inst); const lhs = try func.resolveInst(bin_op.lhs); const rhs = try func.resolveInst(bin_op.rhs); - const int_info = ty.intInfo(func.target); + const int_info = ty.intInfo(mod); const is_signed = int_info.signedness == .signed; if (int_info.bits > 64) { @@ -6163,7 +6617,8 @@ fn airSatBinOp(func: *CodeGen, inst: Air.Inst.Index, op: Op) InnerError!void { } fn signedSat(func: *CodeGen, lhs_operand: WValue, rhs_operand: WValue, ty: Type, op: Op) InnerError!WValue { - const int_info = ty.intInfo(func.target); + const mod = func.bin_file.base.options.module.?; + const int_info = ty.intInfo(mod); const wasm_bits = toWasmBits(int_info.bits).?; const is_wasm_bits = wasm_bits == int_info.bits; @@ -6228,8 +6683,9 @@ fn signedSat(func: *CodeGen, lhs_operand: WValue, rhs_operand: WValue, ty: Type, fn airShlSat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const bin_op = func.air.instructions.items(.data)[inst].bin_op; - const ty = func.air.typeOfIndex(inst); - const int_info = ty.intInfo(func.target); + const mod = func.bin_file.base.options.module.?; + const ty = func.typeOfIndex(inst); + const int_info = ty.intInfo(mod); const is_signed = int_info.signedness == .signed; if (int_info.bits > 64) { return func.fail("TODO: Saturating shifting left for integers with bitsize '{d}'", .{int_info.bits}); @@ -6337,7 +6793,7 @@ fn airShlSat(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { fn callIntrinsic( func: *CodeGen, name: []const u8, - param_types: []const Type, + param_types: []const InternPool.Index, return_type: Type, args: []const WValue, ) InnerError!WValue { @@ -6347,12 +6803,13 @@ fn callIntrinsic( }; // Always pass over C-ABI - var func_type = try genFunctype(func.gpa, .C, param_types, return_type, func.target); + const mod = func.bin_file.base.options.module.?; + var func_type = try genFunctype(func.gpa, .C, param_types, return_type, mod); defer func_type.deinit(func.gpa); const func_type_index = try func.bin_file.putOrGetFuncType(func_type); try func.bin_file.addOrUpdateImport(name, symbol_index, null, func_type_index); - const want_sret_param = firstParamSRet(.C, return_type, func.target); + const want_sret_param = firstParamSRet(.C, return_type, mod); // if we want return as first param, we allocate a pointer to stack, // and emit it as our first argument const sret = if (want_sret_param) blk: { @@ -6364,16 +6821,16 @@ fn callIntrinsic( // Lower all arguments to the stack before we call our function for (args, 0..) |arg, arg_i| { assert(!(want_sret_param and arg == .stack)); - assert(param_types[arg_i].hasRuntimeBitsIgnoreComptime()); - try func.lowerArg(.C, param_types[arg_i], arg); + assert(param_types[arg_i].toType().hasRuntimeBitsIgnoreComptime(mod)); + try func.lowerArg(.C, param_types[arg_i].toType(), arg); } // Actually call our intrinsic try func.addLabel(.call, symbol_index); - if (!return_type.hasRuntimeBitsIgnoreComptime()) { + if (!return_type.hasRuntimeBitsIgnoreComptime(mod)) { return WValue.none; - } else if (return_type.isNoReturn()) { + } else if (return_type.isNoReturn(mod)) { try func.addTag(.@"unreachable"); return WValue.none; } else if (want_sret_param) { @@ -6386,11 +6843,11 @@ fn callIntrinsic( fn airTagName(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { const un_op = func.air.instructions.items(.data)[inst].un_op; const operand = try func.resolveInst(un_op); - const enum_ty = func.air.typeOf(un_op); + const enum_ty = func.typeOf(un_op); const func_sym_index = try func.getTagNameFunction(enum_ty); - const result_ptr = try func.allocStack(func.air.typeOfIndex(inst)); + const result_ptr = try func.allocStack(func.typeOfIndex(inst)); try func.lowerToStack(result_ptr); try func.emitWValue(operand); try func.addLabel(.call, func_sym_index); @@ -6399,15 +6856,14 @@ fn airTagName(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn getTagNameFunction(func: *CodeGen, enum_ty: Type) InnerError!u32 { - const enum_decl_index = enum_ty.getOwnerDecl(); - const module = func.bin_file.base.options.module.?; + const mod = func.bin_file.base.options.module.?; + const enum_decl_index = enum_ty.getOwnerDecl(mod); var arena_allocator = std.heap.ArenaAllocator.init(func.gpa); defer arena_allocator.deinit(); const arena = arena_allocator.allocator(); - const fqn = try module.declPtr(enum_decl_index).getFullyQualifiedName(module); - defer module.gpa.free(fqn); + const fqn = mod.intern_pool.stringToSlice(try mod.declPtr(enum_decl_index).getFullyQualifiedName(mod)); const func_name = try std.fmt.allocPrintZ(arena, "__zig_tag_name_{s}", .{fqn}); // check if we already generated code for this. @@ -6415,10 +6871,9 @@ fn getTagNameFunction(func: *CodeGen, enum_ty: Type) InnerError!u32 { return loc.index; } - var int_tag_type_buffer: Type.Payload.Bits = undefined; - const int_tag_ty = enum_ty.intTagType(&int_tag_type_buffer); + const int_tag_ty = enum_ty.intTagType(mod); - if (int_tag_ty.bitSize(func.target) > 64) { + if (int_tag_ty.bitSize(mod) > 64) { return func.fail("TODO: Implement @tagName for enums with tag size larger than 64 bits", .{}); } @@ -6438,36 +6893,22 @@ fn getTagNameFunction(func: *CodeGen, enum_ty: Type) InnerError!u32 { // TODO: Make switch implementation generic so we can use a jump table for this when the tags are not sparse. // generate an if-else chain for each tag value as well as constant. - for (enum_ty.enumFields().keys(), 0..) |tag_name, field_index| { + for (enum_ty.enumFields(mod), 0..) |tag_name_ip, field_index_usize| { + const field_index = @intCast(u32, field_index_usize); + const tag_name = mod.intern_pool.stringToSlice(tag_name_ip); // for each tag name, create an unnamed const, // and then get a pointer to its value. - var name_ty_payload: Type.Payload.Len = .{ - .base = .{ .tag = .array_u8_sentinel_0 }, - .data = @intCast(u64, tag_name.len), - }; - const name_ty = Type.initPayload(&name_ty_payload.base); - const string_bytes = &module.string_literal_bytes; - try string_bytes.ensureUnusedCapacity(module.gpa, tag_name.len); - const gop = try module.string_literal_table.getOrPutContextAdapted(module.gpa, tag_name, Module.StringLiteralAdapter{ - .bytes = string_bytes, - }, Module.StringLiteralContext{ - .bytes = string_bytes, + const name_ty = try mod.arrayType(.{ + .len = tag_name.len, + .child = .u8_type, + .sentinel = .zero_u8, }); - if (!gop.found_existing) { - gop.key_ptr.* = .{ - .index = @intCast(u32, string_bytes.items.len), - .len = @intCast(u32, tag_name.len), - }; - string_bytes.appendSliceAssumeCapacity(tag_name); - gop.value_ptr.* = .none; - } - var name_val_payload: Value.Payload.StrLit = .{ - .base = .{ .tag = .str_lit }, - .data = gop.key_ptr.*, - }; - const name_val = Value.initPayload(&name_val_payload.base); + const name_val = try mod.intern(.{ .aggregate = .{ + .ty = name_ty.toIntern(), + .storage = .{ .bytes = tag_name }, + } }); const tag_sym_index = try func.bin_file.lowerUnnamedConst( - .{ .ty = name_ty, .val = name_val }, + .{ .ty = name_ty, .val = name_val.toValue() }, enum_decl_index, ); @@ -6479,11 +6920,8 @@ fn getTagNameFunction(func: *CodeGen, enum_ty: Type) InnerError!u32 { try writer.writeByte(std.wasm.opcode(.local_get)); try leb.writeULEB128(writer, @as(u32, 1)); - var tag_val_payload: Value.Payload.U32 = .{ - .base = .{ .tag = .enum_field_index }, - .data = @intCast(u32, field_index), - }; - const tag_value = try func.lowerConstant(Value.initPayload(&tag_val_payload.base), enum_ty); + const tag_val = try mod.enumValueFieldIndex(enum_ty, field_index); + const tag_value = try func.lowerConstant(tag_val, enum_ty); switch (tag_value) { .imm32 => |value| { @@ -6568,27 +7006,27 @@ fn getTagNameFunction(func: *CodeGen, enum_ty: Type) InnerError!u32 { // finish function body try writer.writeByte(std.wasm.opcode(.end)); - const slice_ty = Type.initTag(.const_slice_u8_sentinel_0); - const func_type = try genFunctype(arena, .Unspecified, &.{int_tag_ty}, slice_ty, func.target); + const slice_ty = Type.slice_const_u8_sentinel_0; + const func_type = try genFunctype(arena, .Unspecified, &.{int_tag_ty.ip_index}, slice_ty, mod); return func.bin_file.createFunction(func_name, func_type, &body_list, &relocs); } fn airErrorSetHasValue(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_op = func.air.instructions.items(.data)[inst].ty_op; const operand = try func.resolveInst(ty_op.operand); const error_set_ty = func.air.getRefType(ty_op.ty); const result = try func.allocLocal(Type.bool); - const names = error_set_ty.errorSetNames(); + const names = error_set_ty.errorSetNames(mod); var values = try std.ArrayList(u32).initCapacity(func.gpa, names.len); defer values.deinit(); - const module = func.bin_file.base.options.module.?; var lowest: ?u32 = null; var highest: ?u32 = null; for (names) |name| { - const err_int = module.global_error_set.get(name).?; + const err_int = @intCast(Module.ErrorInt, mod.global_error_set.getIndex(name).?); if (lowest) |*l| { if (err_int < l.*) { l.* = err_int; @@ -6659,12 +7097,13 @@ inline fn useAtomicFeature(func: *const CodeGen) bool { } fn airCmpxchg(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const ty_pl = func.air.instructions.items(.data)[inst].ty_pl; const extra = func.air.extraData(Air.Cmpxchg, ty_pl.payload).data; - const ptr_ty = func.air.typeOf(extra.ptr); - const ty = ptr_ty.childType(); - const result_ty = func.air.typeOfIndex(inst); + const ptr_ty = func.typeOf(extra.ptr); + const ty = ptr_ty.childType(mod); + const result_ty = func.typeOfIndex(inst); const ptr_operand = try func.resolveInst(extra.ptr); const expected_val = try func.resolveInst(extra.expected_value); @@ -6677,7 +7116,7 @@ fn airCmpxchg(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.emitWValue(ptr_operand); try func.lowerToStack(expected_val); try func.lowerToStack(new_val); - try func.addAtomicMemArg(switch (ty.abiSize(func.target)) { + try func.addAtomicMemArg(switch (ty.abiSize(mod)) { 1 => .i32_atomic_rmw8_cmpxchg_u, 2 => .i32_atomic_rmw16_cmpxchg_u, 4 => .i32_atomic_rmw_cmpxchg, @@ -6685,14 +7124,14 @@ fn airCmpxchg(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { else => |size| return func.fail("TODO: implement `@cmpxchg` for types with abi size '{d}'", .{size}), }, .{ .offset = ptr_operand.offset(), - .alignment = ty.abiAlignment(func.target), + .alignment = ty.abiAlignment(mod), }); try func.addLabel(.local_tee, val_local.local.value); _ = try func.cmp(.stack, expected_val, ty, .eq); try func.addLabel(.local_set, cmp_result.local.value); break :val val_local; } else val: { - if (ty.abiSize(func.target) > 8) { + if (ty.abiSize(mod) > 8) { return func.fail("TODO: Implement `@cmpxchg` for types larger than abi size of 8 bytes", .{}); } const ptr_val = try WValue.toLocal(try func.load(ptr_operand, ty, 0), func, ty); @@ -6708,7 +7147,7 @@ fn airCmpxchg(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { break :val ptr_val; }; - const result_ptr = if (isByRef(result_ty, func.target)) val: { + const result_ptr = if (isByRef(result_ty, mod)) val: { try func.emitWValue(cmp_result); try func.addImm32(-1); try func.addTag(.i32_xor); @@ -6716,7 +7155,7 @@ fn airCmpxchg(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.addTag(.i32_and); const and_result = try WValue.toLocal(.stack, func, Type.bool); const result_ptr = try func.allocStack(result_ty); - try func.store(result_ptr, and_result, Type.bool, @intCast(u32, ty.abiSize(func.target))); + try func.store(result_ptr, and_result, Type.bool, @intCast(u32, ty.abiSize(mod))); try func.store(result_ptr, ptr_val, ty, 0); break :val result_ptr; } else val: { @@ -6727,16 +7166,17 @@ fn airCmpxchg(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { break :val try WValue.toLocal(.stack, func, result_ty); }; - return func.finishAir(inst, result_ptr, &.{ extra.ptr, extra.new_value, extra.expected_value }); + return func.finishAir(inst, result_ptr, &.{ extra.ptr, extra.expected_value, extra.new_value }); } fn airAtomicLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const atomic_load = func.air.instructions.items(.data)[inst].atomic_load; const ptr = try func.resolveInst(atomic_load.ptr); - const ty = func.air.typeOfIndex(inst); + const ty = func.typeOfIndex(inst); if (func.useAtomicFeature()) { - const tag: wasm.AtomicsOpcode = switch (ty.abiSize(func.target)) { + const tag: wasm.AtomicsOpcode = switch (ty.abiSize(mod)) { 1 => .i32_atomic_load8_u, 2 => .i32_atomic_load16_u, 4 => .i32_atomic_load, @@ -6746,7 +7186,7 @@ fn airAtomicLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.emitWValue(ptr); try func.addAtomicMemArg(tag, .{ .offset = ptr.offset(), - .alignment = ty.abiAlignment(func.target), + .alignment = ty.abiAlignment(mod), }); } else { _ = try func.load(ptr, ty, 0); @@ -6757,12 +7197,13 @@ fn airAtomicLoad(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airAtomicRmw(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const pl_op = func.air.instructions.items(.data)[inst].pl_op; const extra = func.air.extraData(Air.AtomicRmw, pl_op.payload).data; const ptr = try func.resolveInst(pl_op.operand); const operand = try func.resolveInst(extra.operand); - const ty = func.air.typeOfIndex(inst); + const ty = func.typeOfIndex(inst); const op: std.builtin.AtomicRmwOp = extra.op(); if (func.useAtomicFeature()) { @@ -6780,7 +7221,7 @@ fn airAtomicRmw(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.emitWValue(ptr); try func.emitWValue(value); if (op == .Nand) { - const wasm_bits = toWasmBits(@intCast(u16, ty.bitSize(func.target))).?; + const wasm_bits = toWasmBits(@intCast(u16, ty.bitSize(mod))).?; const and_res = try func.binOp(value, operand, ty, .@"and"); if (wasm_bits == 32) @@ -6797,7 +7238,7 @@ fn airAtomicRmw(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.addTag(.select); } try func.addAtomicMemArg( - switch (ty.abiSize(func.target)) { + switch (ty.abiSize(mod)) { 1 => .i32_atomic_rmw8_cmpxchg_u, 2 => .i32_atomic_rmw16_cmpxchg_u, 4 => .i32_atomic_rmw_cmpxchg, @@ -6806,7 +7247,7 @@ fn airAtomicRmw(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { }, .{ .offset = ptr.offset(), - .alignment = ty.abiAlignment(func.target), + .alignment = ty.abiAlignment(mod), }, ); const select_res = try func.allocLocal(ty); @@ -6825,7 +7266,7 @@ fn airAtomicRmw(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { else => { try func.emitWValue(ptr); try func.emitWValue(operand); - const tag: wasm.AtomicsOpcode = switch (ty.abiSize(func.target)) { + const tag: wasm.AtomicsOpcode = switch (ty.abiSize(mod)) { 1 => switch (op) { .Xchg => .i32_atomic_rmw8_xchg_u, .Add => .i32_atomic_rmw8_add_u, @@ -6866,7 +7307,7 @@ fn airAtomicRmw(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { }; try func.addAtomicMemArg(tag, .{ .offset = ptr.offset(), - .alignment = ty.abiAlignment(func.target), + .alignment = ty.abiAlignment(mod), }); const result = try WValue.toLocal(.stack, func, ty); return func.finishAir(inst, result, &.{ pl_op.operand, extra.operand }); @@ -6895,7 +7336,7 @@ fn airAtomicRmw(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { .Xor => .xor, else => unreachable, }); - if (ty.isInt() and (op == .Add or op == .Sub)) { + if (ty.isInt(mod) and (op == .Add or op == .Sub)) { _ = try func.wrapOperand(.stack, ty); } try func.store(.stack, .stack, ty, ptr.offset()); @@ -6911,7 +7352,7 @@ fn airAtomicRmw(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.store(.stack, .stack, ty, ptr.offset()); }, .Nand => { - const wasm_bits = toWasmBits(@intCast(u16, ty.bitSize(func.target))).?; + const wasm_bits = toWasmBits(@intCast(u16, ty.bitSize(mod))).?; try func.emitWValue(ptr); const and_res = try func.binOp(result, operand, ty, .@"and"); @@ -6942,15 +7383,16 @@ fn airFence(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { } fn airAtomicStore(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + const mod = func.bin_file.base.options.module.?; const bin_op = func.air.instructions.items(.data)[inst].bin_op; const ptr = try func.resolveInst(bin_op.lhs); const operand = try func.resolveInst(bin_op.rhs); - const ptr_ty = func.air.typeOf(bin_op.lhs); - const ty = ptr_ty.childType(); + const ptr_ty = func.typeOf(bin_op.lhs); + const ty = ptr_ty.childType(mod); if (func.useAtomicFeature()) { - const tag: wasm.AtomicsOpcode = switch (ty.abiSize(func.target)) { + const tag: wasm.AtomicsOpcode = switch (ty.abiSize(mod)) { 1 => .i32_atomic_store8, 2 => .i32_atomic_store16, 4 => .i32_atomic_store, @@ -6961,7 +7403,7 @@ fn airAtomicStore(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { try func.lowerToStack(operand); try func.addAtomicMemArg(tag, .{ .offset = ptr.offset(), - .alignment = ty.abiAlignment(func.target), + .alignment = ty.abiAlignment(mod), }); } else { try func.store(ptr, operand, ty, 0); @@ -6969,3 +7411,22 @@ fn airAtomicStore(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { return func.finishAir(inst, .none, &.{ bin_op.lhs, bin_op.rhs }); } + +fn airFrameAddress(func: *CodeGen, inst: Air.Inst.Index) InnerError!void { + if (func.initial_stack_value == .none) { + try func.initializeStack(); + } + try func.emitWValue(func.bottom_stack_value); + const result = try WValue.toLocal(.stack, func, Type.usize); + return func.finishAir(inst, result, &.{}); +} + +fn typeOf(func: *CodeGen, inst: Air.Inst.Ref) Type { + const mod = func.bin_file.base.options.module.?; + return func.air.typeOf(inst, &mod.intern_pool); +} + +fn typeOfIndex(func: *CodeGen, inst: Air.Inst.Index) Type { + const mod = func.bin_file.base.options.module.?; + return func.air.typeOfIndex(inst, &mod.intern_pool); +} diff --git a/src/arch/wasm/Emit.zig b/src/arch/wasm/Emit.zig index bfa5324dc6..45ad1d7eb3 100644 --- a/src/arch/wasm/Emit.zig +++ b/src/arch/wasm/Emit.zig @@ -254,7 +254,7 @@ fn fail(emit: *Emit, comptime format: []const u8, args: anytype) InnerError { @setCold(true); std.debug.assert(emit.error_msg == null); const mod = emit.bin_file.base.options.module.?; - emit.error_msg = try Module.ErrorMsg.create(emit.bin_file.base.allocator, mod.declPtr(emit.decl_index).srcLoc(), format, args); + emit.error_msg = try Module.ErrorMsg.create(emit.bin_file.base.allocator, mod.declPtr(emit.decl_index).srcLoc(mod), format, args); return error.EmitFail; } diff --git a/src/arch/wasm/abi.zig b/src/arch/wasm/abi.zig index 4692f65dd1..92b0f4dc40 100644 --- a/src/arch/wasm/abi.zig +++ b/src/arch/wasm/abi.zig @@ -5,9 +5,11 @@ //! Note: Above mentioned document is not an official specification, therefore called a convention. const std = @import("std"); -const Type = @import("../../type.zig").Type; const Target = std.Target; +const Type = @import("../../type.zig").Type; +const Module = @import("../../Module.zig"); + /// Defines how to pass a type as part of a function signature, /// both for parameters as well as return values. pub const Class = enum { direct, indirect, none }; @@ -19,27 +21,28 @@ const direct: [2]Class = .{ .direct, .none }; /// Classifies a given Zig type to determine how they must be passed /// or returned as value within a wasm function. /// When all elements result in `.none`, no value must be passed in or returned. -pub fn classifyType(ty: Type, target: Target) [2]Class { - if (!ty.hasRuntimeBitsIgnoreComptime()) return none; - switch (ty.zigTypeTag()) { +pub fn classifyType(ty: Type, mod: *Module) [2]Class { + const target = mod.getTarget(); + if (!ty.hasRuntimeBitsIgnoreComptime(mod)) return none; + switch (ty.zigTypeTag(mod)) { .Struct => { - if (ty.containerLayout() == .Packed) { - if (ty.bitSize(target) <= 64) return direct; + if (ty.containerLayout(mod) == .Packed) { + if (ty.bitSize(mod) <= 64) return direct; return .{ .direct, .direct }; } // When the struct type is non-scalar - if (ty.structFieldCount() > 1) return memory; + if (ty.structFieldCount(mod) > 1) return memory; // When the struct's alignment is non-natural - const field = ty.structFields().values()[0]; + const field = ty.structFields(mod).values()[0]; if (field.abi_align != 0) { - if (field.abi_align > field.ty.abiAlignment(target)) { + if (field.abi_align > field.ty.abiAlignment(mod)) { return memory; } } - return classifyType(field.ty, target); + return classifyType(field.ty, mod); }, .Int, .Enum, .ErrorSet, .Vector => { - const int_bits = ty.intInfo(target).bits; + const int_bits = ty.intInfo(mod).bits; if (int_bits <= 64) return direct; if (int_bits <= 128) return .{ .direct, .direct }; return memory; @@ -53,22 +56,22 @@ pub fn classifyType(ty: Type, target: Target) [2]Class { .Bool => return direct, .Array => return memory, .Optional => { - std.debug.assert(ty.isPtrLikeOptional()); + std.debug.assert(ty.isPtrLikeOptional(mod)); return direct; }, .Pointer => { - std.debug.assert(!ty.isSlice()); + std.debug.assert(!ty.isSlice(mod)); return direct; }, .Union => { - if (ty.containerLayout() == .Packed) { - if (ty.bitSize(target) <= 64) return direct; + if (ty.containerLayout(mod) == .Packed) { + if (ty.bitSize(mod) <= 64) return direct; return .{ .direct, .direct }; } - const layout = ty.unionGetLayout(target); + const layout = ty.unionGetLayout(mod); std.debug.assert(layout.tag_size == 0); - if (ty.unionFields().count() > 1) return memory; - return classifyType(ty.unionFields().values()[0].ty, target); + if (ty.unionFields(mod).count() > 1) return memory; + return classifyType(ty.unionFields(mod).values()[0].ty, mod); }, .ErrorUnion, .Frame, @@ -90,29 +93,29 @@ pub fn classifyType(ty: Type, target: Target) [2]Class { /// Returns the scalar type a given type can represent. /// Asserts given type can be represented as scalar, such as /// a struct with a single scalar field. -pub fn scalarType(ty: Type, target: std.Target) Type { - switch (ty.zigTypeTag()) { +pub fn scalarType(ty: Type, mod: *Module) Type { + switch (ty.zigTypeTag(mod)) { .Struct => { - switch (ty.containerLayout()) { + switch (ty.containerLayout(mod)) { .Packed => { - const struct_obj = ty.castTag(.@"struct").?.data; - return scalarType(struct_obj.backing_int_ty, target); + const struct_obj = mod.typeToStruct(ty).?; + return scalarType(struct_obj.backing_int_ty, mod); }, else => { - std.debug.assert(ty.structFieldCount() == 1); - return scalarType(ty.structFieldType(0), target); + std.debug.assert(ty.structFieldCount(mod) == 1); + return scalarType(ty.structFieldType(0, mod), mod); }, } }, .Union => { - if (ty.containerLayout() != .Packed) { - const layout = ty.unionGetLayout(target); + if (ty.containerLayout(mod) != .Packed) { + const layout = ty.unionGetLayout(mod); if (layout.payload_size == 0 and layout.tag_size != 0) { - return scalarType(ty.unionTagTypeSafety().?, target); + return scalarType(ty.unionTagTypeSafety(mod).?, mod); } - std.debug.assert(ty.unionFields().count() == 1); + std.debug.assert(ty.unionFields(mod).count() == 1); } - return scalarType(ty.unionFields().values()[0].ty, target); + return scalarType(ty.unionFields(mod).values()[0].ty, mod); }, else => return ty, } diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index be972d7aea..a33faecca3 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -7,6 +7,8 @@ const leb128 = std.leb; const link = @import("../../link.zig"); const log = std.log.scoped(.codegen); const tracking_log = std.log.scoped(.tracking); +const verbose_tracking_log = std.log.scoped(.verbose_tracking); +const wip_mir_log = std.log.scoped(.wip_mir); const math = std.math; const mem = std.mem; const trace = @import("../../tracy.zig").trace; @@ -24,6 +26,7 @@ const Liveness = @import("../../Liveness.zig"); const Lower = @import("Lower.zig"); const Mir = @import("Mir.zig"); const Module = @import("../../Module.zig"); +const InternPool = @import("../../InternPool.zig"); const Target = std.Target; const Type = @import("../../type.zig").Type; const TypedValue = @import("../../TypedValue.zig"); @@ -48,16 +51,13 @@ const sse = abi.RegisterClass.sse; const InnerError = CodeGenError || error{OutOfRegisters}; -const debug_wip_mir = false; -const debug_tracking = false; - gpa: Allocator, air: Air, liveness: Liveness, bin_file: *link.File, debug_output: DebugInfoOutput, target: *const std.Target, -mod_fn: *const Module.Fn, +owner: Owner, err_msg: ?*ErrorMsg, args: []MCValue, ret_mcv: InstTracking, @@ -109,6 +109,49 @@ const mir_to_air_map_init = if (builtin.mode == .Debug) std.AutoHashMapUnmanaged const FrameAddr = struct { index: FrameIndex, off: i32 = 0 }; const RegisterOffset = struct { reg: Register, off: i32 = 0 }; +const Owner = union(enum) { + mod_fn: *const Module.Fn, + lazy_sym: link.File.LazySymbol, + + fn getDecl(owner: Owner, mod: *Module) Module.Decl.Index { + return switch (owner) { + .mod_fn => |mod_fn| mod_fn.owner_decl, + .lazy_sym => |lazy_sym| lazy_sym.ty.getOwnerDecl(mod), + }; + } + + fn getSymbolIndex(owner: Owner, ctx: *Self) !u32 { + switch (owner) { + .mod_fn => |mod_fn| { + const decl_index = mod_fn.owner_decl; + if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { + const atom = try macho_file.getOrCreateAtomForDecl(decl_index); + return macho_file.getAtom(atom).getSymbolIndex().?; + } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| { + const atom = try coff_file.getOrCreateAtomForDecl(decl_index); + return coff_file.getAtom(atom).getSymbolIndex().?; + } else if (ctx.bin_file.cast(link.File.Plan9)) |p9_file| { + return p9_file.seeDecl(decl_index); + } else unreachable; + }, + .lazy_sym => |lazy_sym| { + if (ctx.bin_file.cast(link.File.MachO)) |macho_file| { + const atom = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); + return macho_file.getAtom(atom).getSymbolIndex().?; + } else if (ctx.bin_file.cast(link.File.Coff)) |coff_file| { + const atom = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); + return coff_file.getAtom(atom).getSymbolIndex().?; + } else if (ctx.bin_file.cast(link.File.Plan9)) |p9_file| { + return p9_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return ctx.fail("{s} creating lazy symbol", .{@errorName(err)}); + } else unreachable; + }, + } + } +}; + pub const MCValue = union(enum) { /// No runtime bits. `void` types, empty structs, u0, enums with 1 tag, etc. /// TODO Look into deleting this tag and using `dead` instead, since every use @@ -168,16 +211,7 @@ pub const MCValue = union(enum) { fn isMemory(mcv: MCValue) bool { return switch (mcv) { - .memory, - .load_direct, - .lea_direct, - .load_got, - .lea_got, - .load_tlv, - .lea_tlv, - .load_frame, - .lea_frame, - => true, + .memory, .indirect, .load_frame => true, else => false, }; } @@ -192,6 +226,14 @@ pub const MCValue = union(enum) { fn isRegister(mcv: MCValue) bool { return switch (mcv) { .register => true, + .register_offset => |reg_off| return reg_off.off == 0, + else => false, + }; + } + + fn isRegisterOffset(mcv: MCValue) bool { + return switch (mcv) { + .register, .register_offset => true, else => false, }; } @@ -220,9 +262,9 @@ pub const MCValue = union(enum) { .dead, .undef, .immediate, + .eflags, .register, .register_offset, - .eflags, .register_overflow, .lea_direct, .lea_got, @@ -298,6 +340,41 @@ pub const MCValue = union(enum) { }; } + fn mem(mcv: MCValue, ptr_size: Memory.PtrSize) Memory { + return switch (mcv) { + .none, + .unreach, + .dead, + .undef, + .immediate, + .eflags, + .register, + .register_offset, + .register_overflow, + .load_direct, + .lea_direct, + .load_got, + .lea_got, + .load_tlv, + .lea_tlv, + .lea_frame, + .reserved_frame, + => unreachable, + .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| + Memory.sib(ptr_size, .{ .base = .{ .reg = .ds }, .disp = small_addr }) + else + Memory.moffs(.ds, addr), + .indirect => |reg_off| Memory.sib(ptr_size, .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, + }), + .load_frame => |frame_addr| Memory.sib(ptr_size, .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }), + }; + } + pub fn format( mcv: MCValue, comptime _: []const u8, @@ -376,7 +453,7 @@ const InstTracking = struct { else => unreachable, } tracking_log.debug("spill %{d} from {} to {}", .{ inst, self.short, self.long }); - try function.genCopy(function.air.typeOfIndex(inst), self.long, self.short); + try function.genCopy(function.typeOfIndex(inst), self.long, self.short); } fn reuseFrame(self: *InstTracking) void { @@ -466,7 +543,7 @@ const InstTracking = struct { inst: Air.Inst.Index, target: InstTracking, ) !void { - const ty = function.air.typeOfIndex(inst); + const ty = function.typeOfIndex(inst); if ((self.long == .none or self.long == .reserved_frame) and target.long == .load_frame) try function.genCopy(ty, target.long, self.short); try function.genCopy(ty, target.short, self.short); @@ -534,14 +611,14 @@ const FrameAlloc = struct { .ref_count = 0, }; } - fn initType(ty: Type, target: Target) FrameAlloc { - return init(.{ .size = ty.abiSize(target), .alignment = ty.abiAlignment(target) }); + fn initType(ty: Type, mod: *Module) FrameAlloc { + return init(.{ .size = ty.abiSize(mod), .alignment = ty.abiAlignment(mod) }); } }; const StackAllocation = struct { inst: ?Air.Inst.Index, - /// TODO do we need size? should be determined by inst.ty.abiSize(self.target.*) + /// TODO do we need size? should be determined by inst.ty.abiSize(mod) size: u32, }; @@ -560,7 +637,7 @@ const Self = @This(); pub fn generate( bin_file: *link.File, src_loc: Module.SrcLoc, - module_fn: *Module.Fn, + module_fn_index: Module.Fn.Index, air: Air, liveness: Liveness, code: *std.ArrayList(u8), @@ -571,16 +648,11 @@ pub fn generate( } const mod = bin_file.options.module.?; + const module_fn = mod.funcPtr(module_fn_index); const fn_owner_decl = mod.declPtr(module_fn.owner_decl); assert(fn_owner_decl.has_tv); const fn_type = fn_owner_decl.ty; - if (debug_wip_mir) { - const stderr = std.io.getStdErr().writer(); - fn_owner_decl.renderFullyQualifiedName(mod, stderr) catch {}; - stderr.writeAll(":\n") catch {}; - } - const gpa = bin_file.allocator; var function = Self{ .gpa = gpa, @@ -589,7 +661,7 @@ pub fn generate( .target = &bin_file.options.target, .bin_file = bin_file, .debug_output = debug_output, - .mod_fn = module_fn, + .owner = .{ .mod_fn = module_fn }, .err_msg = null, .args = undefined, // populated after `resolveCallingConventionValues` .ret_mcv = undefined, // populated after `resolveCallingConventionValues` @@ -614,12 +686,14 @@ pub fn generate( if (builtin.mode == .Debug) function.mir_to_air_map.deinit(gpa); } + wip_mir_log.debug("{}:", .{function.fmtDecl(module_fn.owner_decl)}); + try function.frame_allocs.resize(gpa, FrameIndex.named_count); function.frame_allocs.set( @enumToInt(FrameIndex.stack_frame), FrameAlloc.init(.{ .size = 0, - .alignment = if (mod.align_stack_fns.get(module_fn)) |set_align_stack| + .alignment = if (mod.align_stack_fns.get(module_fn_index)) |set_align_stack| set_align_stack.alignment else 1, @@ -630,7 +704,8 @@ pub fn generate( FrameAlloc.init(.{ .size = 0, .alignment = 1 }), ); - var call_info = function.resolveCallingConventionValues(fn_type, &.{}, .args_frame) catch |err| switch (err) { + const fn_info = mod.typeToFunc(fn_type).?; + var call_info = function.resolveCallingConventionValues(fn_info, &.{}, .args_frame) catch |err| switch (err) { error.CodegenFail => return Result{ .fail = function.err_msg.? }, error.OutOfRegisters => return Result{ .fail = try ErrorMsg.create( @@ -647,12 +722,12 @@ pub fn generate( function.args = call_info.args; function.ret_mcv = call_info.return_value; function.frame_allocs.set(@enumToInt(FrameIndex.ret_addr), FrameAlloc.init(.{ - .size = Type.usize.abiSize(function.target.*), - .alignment = @min(Type.usize.abiAlignment(function.target.*), call_info.stack_align), + .size = Type.usize.abiSize(mod), + .alignment = @min(Type.usize.abiAlignment(mod), call_info.stack_align), })); function.frame_allocs.set(@enumToInt(FrameIndex.base_ptr), FrameAlloc.init(.{ - .size = Type.usize.abiSize(function.target.*), - .alignment = @min(Type.usize.abiAlignment(function.target.*) * 2, call_info.stack_align), + .size = Type.usize.abiSize(mod), + .alignment = @min(Type.usize.abiAlignment(mod) * 2, call_info.stack_align), })); function.frame_allocs.set( @enumToInt(FrameIndex.args_frame), @@ -715,48 +790,190 @@ pub fn generate( } } -fn dumpWipMir(self: *Self, inst: Mir.Inst) !void { - if (!debug_wip_mir) return; - const stderr = std.io.getStdErr().writer(); +pub fn generateLazy( + bin_file: *link.File, + src_loc: Module.SrcLoc, + lazy_sym: link.File.LazySymbol, + code: *std.ArrayList(u8), + debug_output: DebugInfoOutput, +) CodeGenError!Result { + const gpa = bin_file.allocator; + var function = Self{ + .gpa = gpa, + .air = undefined, + .liveness = undefined, + .target = &bin_file.options.target, + .bin_file = bin_file, + .debug_output = debug_output, + .owner = .{ .lazy_sym = lazy_sym }, + .err_msg = null, + .args = undefined, + .ret_mcv = undefined, + .fn_type = undefined, + .arg_index = undefined, + .src_loc = src_loc, + .end_di_line = undefined, // no debug info yet + .end_di_column = undefined, // no debug info yet + }; + defer { + function.mir_instructions.deinit(gpa); + function.mir_extra.deinit(gpa); + } + + function.genLazy(lazy_sym) catch |err| switch (err) { + error.CodegenFail => return Result{ .fail = function.err_msg.? }, + error.OutOfRegisters => return Result{ + .fail = try ErrorMsg.create(bin_file.allocator, src_loc, "CodeGen ran out of registers. This is a bug in the Zig compiler.", .{}), + }, + else => |e| return e, + }; + + var mir = Mir{ + .instructions = function.mir_instructions.toOwnedSlice(), + .extra = try function.mir_extra.toOwnedSlice(bin_file.allocator), + .frame_locs = function.frame_locs.toOwnedSlice(), + }; + defer mir.deinit(bin_file.allocator); + + var emit = Emit{ + .lower = .{ + .allocator = bin_file.allocator, + .mir = mir, + .target = &bin_file.options.target, + .src_loc = src_loc, + }, + .bin_file = bin_file, + .debug_output = debug_output, + .code = code, + .prev_di_pc = undefined, // no debug info yet + .prev_di_line = undefined, // no debug info yet + .prev_di_column = undefined, // no debug info yet + }; + defer emit.deinit(); + emit.emitMir() catch |err| switch (err) { + error.LowerFail, error.EmitFail => return Result{ .fail = emit.lower.err_msg.? }, + error.InvalidInstruction, error.CannotEncode => |e| { + const msg = switch (e) { + error.InvalidInstruction => "CodeGen failed to find a viable instruction.", + error.CannotEncode => "CodeGen failed to encode the instruction.", + }; + return Result{ + .fail = try ErrorMsg.create( + bin_file.allocator, + src_loc, + "{s} This is a bug in the Zig compiler.", + .{msg}, + ), + }; + }, + else => |e| return e, + }; + + if (function.err_msg) |em| { + return Result{ .fail = em }; + } else { + return Result.ok; + } +} + +const FormatDeclData = struct { + mod: *Module, + decl_index: Module.Decl.Index, +}; +fn formatDecl( + data: FormatDeclData, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, +) @TypeOf(writer).Error!void { + try data.mod.declPtr(data.decl_index).renderFullyQualifiedName(data.mod, writer); +} +fn fmtDecl(self: *Self, decl_index: Module.Decl.Index) std.fmt.Formatter(formatDecl) { + return .{ .data = .{ + .mod = self.bin_file.options.module.?, + .decl_index = decl_index, + } }; +} + +const FormatAirData = struct { + self: *Self, + inst: Air.Inst.Index, +}; +fn formatAir( + data: FormatAirData, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, +) @TypeOf(writer).Error!void { + @import("../../print_air.zig").dumpInst( + data.inst, + data.self.bin_file.options.module.?, + data.self.air, + data.self.liveness, + ); +} +fn fmtAir(self: *Self, inst: Air.Inst.Index) std.fmt.Formatter(formatAir) { + return .{ .data = .{ .self = self, .inst = inst } }; +} +const FormatWipMirData = struct { + self: *Self, + inst: Mir.Inst.Index, +}; +fn formatWipMir( + data: FormatWipMirData, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, +) @TypeOf(writer).Error!void { var lower = Lower{ - .allocator = self.gpa, + .allocator = data.self.gpa, .mir = .{ - .instructions = self.mir_instructions.slice(), - .extra = self.mir_extra.items, + .instructions = data.self.mir_instructions.slice(), + .extra = data.self.mir_extra.items, .frame_locs = (std.MultiArrayList(Mir.FrameLoc){}).slice(), }, - .target = self.target, - .src_loc = self.src_loc, + .target = data.self.target, + .src_loc = data.self.src_loc, }; - for (lower.lowerMir(inst) catch |err| switch (err) { + for ((lower.lowerMir(data.inst) catch |err| switch (err) { error.LowerFail => { defer { - lower.err_msg.?.deinit(self.gpa); + lower.err_msg.?.deinit(data.self.gpa); lower.err_msg = null; } - try stderr.print("{s}\n", .{lower.err_msg.?.msg}); + try writer.writeAll(lower.err_msg.?.msg); return; }, - error.InvalidInstruction, error.CannotEncode => |e| { - try stderr.writeAll(switch (e) { - error.InvalidInstruction => "CodeGen failed to find a viable instruction.\n", - error.CannotEncode => "CodeGen failed to encode the instruction.\n", + error.OutOfMemory, error.InvalidInstruction, error.CannotEncode => |e| { + try writer.writeAll(switch (e) { + error.OutOfMemory => "Out of memory", + error.InvalidInstruction => "CodeGen failed to find a viable instruction.", + error.CannotEncode => "CodeGen failed to encode the instruction.", }); return; }, else => |e| return e, - }) |lower_inst| { - try stderr.print(" | {}\n", .{lower_inst}); - } + }).insts) |lowered_inst| try writer.print(" | {}", .{lowered_inst}); +} +fn fmtWipMir(self: *Self, inst: Mir.Inst.Index) std.fmt.Formatter(formatWipMir) { + return .{ .data = .{ .self = self, .inst = inst } }; } -fn dumpTracking(self: *Self) !void { - if (!debug_tracking) return; - const stderr = std.io.getStdErr().writer(); - - var it = self.inst_tracking.iterator(); - while (it.next()) |entry| try stderr.print("%{d} = {}\n", .{ entry.key_ptr.*, entry.value_ptr.* }); +const FormatTrackingData = struct { + self: *Self, +}; +fn formatTracking( + data: FormatTrackingData, + comptime _: []const u8, + _: std.fmt.FormatOptions, + writer: anytype, +) @TypeOf(writer).Error!void { + var it = data.self.inst_tracking.iterator(); + while (it.next()) |entry| try writer.print("\n%{d} = {}", .{ entry.key_ptr.*, entry.value_ptr.* }); +} +fn fmtTracking(self: *Self) std.fmt.Formatter(formatTracking) { + return .{ .data = .{ .self = self } }; } fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { @@ -764,7 +981,14 @@ fn addInst(self: *Self, inst: Mir.Inst) error{OutOfMemory}!Mir.Inst.Index { try self.mir_instructions.ensureUnusedCapacity(gpa, 1); const result_index = @intCast(Mir.Inst.Index, self.mir_instructions.len); self.mir_instructions.appendAssumeCapacity(inst); - self.dumpWipMir(inst) catch {}; + if (inst.tag != .pseudo or switch (inst.ops) { + else => true, + .pseudo_dbg_prologue_end_none, + .pseudo_dbg_line_line_column, + .pseudo_dbg_epilogue_begin_none, + .pseudo_dead_none, + => false, + }) wip_mir_log.debug("{}", .{self.fmtWipMir(result_index)}); return result_index; } @@ -787,131 +1011,248 @@ fn addExtraAssumeCapacity(self: *Self, extra: anytype) u32 { return result; } -fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void { +/// A `cc` of `.z_and_np` clobbers `reg2`! +fn asmCmovccRegisterRegister(self: *Self, reg1: Register, reg2: Register, cc: bits.Condition) !void { _ = try self.addInst(.{ - .tag = .setcc, - .ops = .r_cc, - .data = .{ .r_cc = .{ .r = reg, .cc = cc } }, + .tag = switch (cc) { + else => .cmov, + .z_and_np, .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => .rr, + .z_and_np => .pseudo_cmov_z_and_np_rr, + .nz_or_p => .pseudo_cmov_nz_or_p_rr, + }, + .data = .{ .rr = .{ + .fixes = switch (cc) { + else => Mir.Inst.Fixes.fromCondition(cc), + .z_and_np, .nz_or_p => ._, + }, + .r1 = reg1, + .r2 = reg2, + } }, }); } -fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void { +/// A `cc` of `.z_and_np` is not supported by this encoding! +fn asmCmovccRegisterMemory(self: *Self, reg: Register, m: Memory, cc: bits.Condition) !void { _ = try self.addInst(.{ - .tag = .setcc, - .ops = switch (m) { - .sib => .m_sib_cc, - .rip => .m_rip_cc, - else => unreachable, + .tag = switch (cc) { + else => .cmov, + .z_and_np => unreachable, + .nz_or_p => .pseudo, }, - .data = .{ .x_cc = .{ .cc = cc, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .ops = switch (cc) { + else => switch (m) { + .sib => .rm_sib, + .rip => .rm_rip, + else => unreachable, + }, + .z_and_np => unreachable, + .nz_or_p => switch (m) { + .sib => .pseudo_cmov_nz_or_p_rm_sib, + .rip => .pseudo_cmov_nz_or_p_rm_rip, + else => unreachable, + }, + }, + .data = .{ .rx = .{ + .fixes = switch (cc) { + else => Mir.Inst.Fixes.fromCondition(cc), + .z_and_np => unreachable, + .nz_or_p => ._, + }, + .r1 = reg, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } -fn asmCmovccRegisterRegister(self: *Self, reg1: Register, reg2: Register, cc: bits.Condition) !void { +fn asmSetccRegister(self: *Self, reg: Register, cc: bits.Condition) !void { _ = try self.addInst(.{ - .tag = .cmovcc, - .ops = .rr_cc, - .data = .{ .rr_cc = .{ .r1 = reg1, .r2 = reg2, .cc = cc } }, + .tag = switch (cc) { + else => .set, + .z_and_np, .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => .r, + .z_and_np => .pseudo_set_z_and_np_r, + .nz_or_p => .pseudo_set_nz_or_p_r, + }, + .data = switch (cc) { + else => .{ .r = .{ + .fixes = Mir.Inst.Fixes.fromCondition(cc), + .r1 = reg, + } }, + .z_and_np, .nz_or_p => .{ .rr = .{ + .r1 = reg, + .r2 = (try self.register_manager.allocReg(null, gp)).to8(), + } }, + }, }); } -fn asmCmovccRegisterMemory(self: *Self, reg: Register, m: Memory, cc: bits.Condition) !void { +fn asmSetccMemory(self: *Self, m: Memory, cc: bits.Condition) !void { + const payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }; _ = try self.addInst(.{ - .tag = .cmovcc, - .ops = switch (m) { - .sib => .rm_sib_cc, - .rip => .rm_rip_cc, - else => unreachable, + .tag = switch (cc) { + else => .set, + .z_and_np, .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => switch (m) { + .sib => .m_sib, + .rip => .m_rip, + else => unreachable, + }, + .z_and_np => switch (m) { + .sib => .pseudo_set_z_and_np_m_sib, + .rip => .pseudo_set_z_and_np_m_rip, + else => unreachable, + }, + .nz_or_p => switch (m) { + .sib => .pseudo_set_nz_or_p_m_sib, + .rip => .pseudo_set_nz_or_p_m_rip, + else => unreachable, + }, + }, + .data = switch (cc) { + else => .{ .x = .{ + .fixes = Mir.Inst.Fixes.fromCondition(cc), + .payload = payload, + } }, + .z_and_np, .nz_or_p => .{ .rx = .{ + .r1 = (try self.register_manager.allocReg(null, gp)).to8(), + .payload = payload, + } }, }, - .data = .{ .rx_cc = .{ .r = reg, .cc = cc, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, }); } fn asmJmpReloc(self: *Self, target: Mir.Inst.Index) !Mir.Inst.Index { return self.addInst(.{ - .tag = .jmp_reloc, - .ops = undefined, - .data = .{ .inst = target }, + .tag = .jmp, + .ops = .inst, + .data = .{ .inst = .{ + .inst = target, + } }, }); } fn asmJccReloc(self: *Self, target: Mir.Inst.Index, cc: bits.Condition) !Mir.Inst.Index { return self.addInst(.{ - .tag = .jcc, - .ops = .inst_cc, - .data = .{ .inst_cc = .{ .inst = target, .cc = cc } }, + .tag = switch (cc) { + else => .j, + .z_and_np, .nz_or_p => .pseudo, + }, + .ops = switch (cc) { + else => .inst, + .z_and_np => .pseudo_j_z_and_np_inst, + .nz_or_p => .pseudo_j_nz_or_p_inst, + }, + .data = .{ .inst = .{ + .fixes = switch (cc) { + else => Mir.Inst.Fixes.fromCondition(cc), + .z_and_np, .nz_or_p => ._, + }, + .inst = target, + } }, }); } fn asmPlaceholder(self: *Self) !Mir.Inst.Index { return self.addInst(.{ - .tag = .dead, - .ops = undefined, + .tag = .pseudo, + .ops = .pseudo_dead_none, .data = undefined, }); } -fn asmOpOnly(self: *Self, tag: Mir.Inst.Tag) !void { +fn asmOpOnly(self: *Self, tag: Mir.Inst.FixedTag) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .none, + .data = .{ .none = .{ + .fixes = tag[0], + } }, + }); +} + +fn asmPseudo(self: *Self, ops: Mir.Inst.Ops) !void { + _ = try self.addInst(.{ + .tag = .pseudo, + .ops = ops, .data = undefined, }); } -fn asmRegister(self: *Self, tag: Mir.Inst.Tag, reg: Register) !void { +fn asmRegister(self: *Self, tag: Mir.Inst.FixedTag, reg: Register) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .r, - .data = .{ .r = reg }, + .data = .{ .r = .{ + .fixes = tag[0], + .r1 = reg, + } }, }); } -fn asmImmediate(self: *Self, tag: Mir.Inst.Tag, imm: Immediate) !void { +fn asmImmediate(self: *Self, tag: Mir.Inst.FixedTag, imm: Immediate) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (imm) { .signed => .i_s, .unsigned => .i_u, }, - .data = .{ .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), + .data = .{ .i = .{ + .fixes = tag[0], + .i = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + }, } }, }); } -fn asmRegisterRegister(self: *Self, tag: Mir.Inst.Tag, reg1: Register, reg2: Register) !void { +fn asmRegisterRegister(self: *Self, tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .rr, - .data = .{ .rr = .{ .r1 = reg1, .r2 = reg2 } }, + .data = .{ .rr = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + } }, }); } -fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.Tag, reg: Register, imm: Immediate) !void { +fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, imm: Immediate) !void { const ops: Mir.Inst.Ops = switch (imm) { .signed => .ri_s, .unsigned => |u| if (math.cast(u32, u)) |_| .ri_u else .ri64, }; _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = ops, .data = switch (ops) { - .ri_s, .ri_u => .{ .ri = .{ .r = reg, .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), - } } }, + .ri_s, .ri_u => .{ .ri = .{ + .fixes = tag[0], + .r1 = reg, + .i = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + }, + } }, .ri64 => .{ .rx = .{ - .r = reg, + .fixes = tag[0], + .r1 = reg, .payload = try self.addExtra(Mir.Imm64.encode(imm.unsigned)), } }, else => unreachable, @@ -921,89 +1262,244 @@ fn asmRegisterImmediate(self: *Self, tag: Mir.Inst.Tag, reg: Register, imm: Imme fn asmRegisterRegisterRegister( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, reg3: Register, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = .rrr, - .data = .{ .rrr = .{ .r1 = reg1, .r2 = reg2, .r3 = reg3 } }, + .data = .{ .rrr = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .r3 = reg3, + } }, + }); +} + +fn asmRegisterRegisterRegisterRegister( + self: *Self, + tag: Mir.Inst.FixedTag, + reg1: Register, + reg2: Register, + reg3: Register, + reg4: Register, +) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = .rrrr, + .data = .{ .rrrr = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .r3 = reg3, + .r4 = reg4, + } }, + }); +} + +fn asmRegisterRegisterRegisterImmediate( + self: *Self, + tag: Mir.Inst.FixedTag, + reg1: Register, + reg2: Register, + reg3: Register, + imm: Immediate, +) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = .rrri, + .data = .{ .rrri = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .r3 = reg3, + .i = @intCast(u8, imm.unsigned), + } }, }); } fn asmRegisterRegisterImmediate( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, reg1: Register, reg2: Register, imm: Immediate, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (imm) { .signed => .rri_s, .unsigned => .rri_u, }, - .data = .{ .rri = .{ .r1 = reg1, .r2 = reg2, .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), - } } }, + .data = .{ .rri = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .i = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + }, + } }, }); } -fn asmMemory(self: *Self, tag: Mir.Inst.Tag, m: Memory) !void { +fn asmRegisterRegisterMemory( + self: *Self, + tag: Mir.Inst.FixedTag, + reg1: Register, + reg2: Register, + m: Memory, +) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], + .ops = switch (m) { + .sib => .rrm_sib, + .rip => .rrm_rip, + else => unreachable, + }, + .data = .{ .rrx = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, + }); +} + +fn asmMemory(self: *Self, tag: Mir.Inst.FixedTag, m: Memory) !void { + _ = try self.addInst(.{ + .tag = tag[1], .ops = switch (m) { .sib => .m_sib, .rip => .m_rip, else => unreachable, }, - .data = .{ .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, + .data = .{ .x = .{ + .fixes = tag[0], + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, } }, }); } -fn asmRegisterMemory(self: *Self, tag: Mir.Inst.Tag, reg: Register, m: Memory) !void { +fn asmRegisterMemory(self: *Self, tag: Mir.Inst.FixedTag, reg: Register, m: Memory) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .rm_sib, .rip => .rm_rip, else => unreachable, }, - .data = .{ .rx = .{ .r = reg, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + .data = .{ .rx = .{ + .fixes = tag[0], + .r1 = reg, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, + }); +} + +fn asmRegisterMemoryImmediate( + self: *Self, + tag: Mir.Inst.FixedTag, + reg: Register, + m: Memory, + imm: Immediate, +) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = switch (m) { + .sib => .rmi_sib, + .rip => .rmi_rip, + else => unreachable, + }, + .data = .{ .rix = .{ + .fixes = tag[0], + .r1 = reg, + .i = @intCast(u8, imm.unsigned), + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, + }); +} + +fn asmRegisterRegisterMemoryImmediate( + self: *Self, + tag: Mir.Inst.FixedTag, + reg1: Register, + reg2: Register, + m: Memory, + imm: Immediate, +) !void { + _ = try self.addInst(.{ + .tag = tag[1], + .ops = switch (m) { + .sib => .rrmi_sib, + .rip => .rrmi_rip, else => unreachable, - } } }, + }, + .data = .{ .rrix = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .i = @intCast(u8, imm.unsigned), + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } -fn asmMemoryRegister(self: *Self, tag: Mir.Inst.Tag, m: Memory, reg: Register) !void { +fn asmMemoryRegister(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, reg: Register) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .mr_sib, .rip => .mr_rip, else => unreachable, }, - .data = .{ .rx = .{ .r = reg, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .rx = .{ + .fixes = tag[0], + .r1 = reg, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } -fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.Tag, m: Memory, imm: Immediate) !void { +fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.FixedTag, m: Memory, imm: Immediate) !void { + const payload = try self.addExtra(Mir.Imm32{ .imm = switch (imm) { + .signed => |s| @bitCast(u32, s), + .unsigned => |u| @intCast(u32, u), + } }); + assert(payload + 1 == switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }); _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => switch (imm) { .signed => .mi_sib_s, @@ -1015,69 +1511,78 @@ fn asmMemoryImmediate(self: *Self, tag: Mir.Inst.Tag, m: Memory, imm: Immediate) }, else => unreachable, }, - .data = .{ .ix = .{ .i = switch (imm) { - .signed => |s| @bitCast(u32, s), - .unsigned => |u| @intCast(u32, u), - }, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .x = .{ + .fixes = tag[0], + .payload = payload, + } }, }); } fn asmMemoryRegisterRegister( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, m: Memory, reg1: Register, reg2: Register, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .mrr_sib, .rip => .mrr_rip, else => unreachable, }, - .data = .{ .rrx = .{ .r1 = reg1, .r2 = reg2, .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .rrx = .{ + .fixes = tag[0], + .r1 = reg1, + .r2 = reg2, + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } fn asmMemoryRegisterImmediate( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, m: Memory, reg: Register, imm: Immediate, ) !void { _ = try self.addInst(.{ - .tag = tag, + .tag = tag[1], .ops = switch (m) { .sib => .mri_sib, .rip => .mri_rip, else => unreachable, }, - .data = .{ .rix = .{ .r = reg, .i = @intCast(u8, imm.unsigned), .payload = switch (m) { - .sib => try self.addExtra(Mir.MemorySib.encode(m)), - .rip => try self.addExtra(Mir.MemoryRip.encode(m)), - else => unreachable, - } } }, + .data = .{ .rix = .{ + .fixes = tag[0], + .r1 = reg, + .i = @intCast(u8, imm.unsigned), + .payload = switch (m) { + .sib => try self.addExtra(Mir.MemorySib.encode(m)), + .rip => try self.addExtra(Mir.MemoryRip.encode(m)), + else => unreachable, + }, + } }, }); } fn gen(self: *Self) InnerError!void { - const cc = self.fn_type.fnCallingConvention(); + const mod = self.bin_file.options.module.?; + const cc = self.fn_type.fnCallingConvention(mod); if (cc != .Naked) { - try self.asmRegister(.push, .rbp); + try self.asmRegister(.{ ._, .push }, .rbp); const backpatch_push_callee_preserved_regs = try self.asmPlaceholder(); - try self.asmRegisterRegister(.mov, .rbp, .rsp); + try self.asmRegisterRegister(.{ ._, .mov }, .rbp, .rsp); const backpatch_frame_align = try self.asmPlaceholder(); + const backpatch_frame_align_extra = try self.asmPlaceholder(); const backpatch_stack_alloc = try self.asmPlaceholder(); + const backpatch_stack_alloc_extra = try self.asmPlaceholder(); switch (self.ret_mcv.long) { .none, .unreach => {}, @@ -1086,7 +1591,7 @@ fn gen(self: *Self) InnerError!void { // register which the callee is free to clobber. Therefore, we purposely // spill it to stack immediately. const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(Type.usize, self.target.*)); + try self.allocFrameIndex(FrameAlloc.initType(Type.usize, mod)); try self.genSetMem( .{ .frame = frame_index }, 0, @@ -1099,7 +1604,7 @@ fn gen(self: *Self) InnerError!void { else => unreachable, } - try self.asmOpOnly(.dbg_prologue_end); + try self.asmPseudo(.pseudo_dbg_prologue_end_none); try self.genBody(self.air.getMainBody()); @@ -1111,64 +1616,115 @@ fn gen(self: *Self) InnerError!void { // } // Eliding the reloc will cause a miscompilation in this case. for (self.exitlude_jump_relocs.items) |jmp_reloc| { - self.mir_instructions.items(.data)[jmp_reloc].inst = + self.mir_instructions.items(.data)[jmp_reloc].inst.inst = @intCast(u32, self.mir_instructions.len); } - try self.asmOpOnly(.dbg_epilogue_begin); + try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); const backpatch_stack_dealloc = try self.asmPlaceholder(); const backpatch_pop_callee_preserved_regs = try self.asmPlaceholder(); - try self.asmRegister(.pop, .rbp); - try self.asmOpOnly(.ret); + try self.asmRegister(.{ ._, .pop }, .rbp); + try self.asmOpOnly(.{ ._, .ret }); const frame_layout = try self.computeFrameLayout(); const need_frame_align = frame_layout.stack_mask != math.maxInt(u32); const need_stack_adjust = frame_layout.stack_adjust > 0; const need_save_reg = frame_layout.save_reg_list.count() > 0; if (need_frame_align) { + const page_align = @as(u32, math.maxInt(u32)) << 12; self.mir_instructions.set(backpatch_frame_align, .{ .tag = .@"and", .ops = .ri_s, - .data = .{ .ri = .{ .r = .rsp, .i = frame_layout.stack_mask } }, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = @max(frame_layout.stack_mask, page_align), + } }, }); + if (frame_layout.stack_mask < page_align) { + self.mir_instructions.set(backpatch_frame_align_extra, .{ + .tag = .pseudo, + .ops = .pseudo_probe_align_ri_s, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = ~frame_layout.stack_mask & page_align, + } }, + }); + } } if (need_stack_adjust) { - self.mir_instructions.set(backpatch_stack_alloc, .{ - .tag = .sub, - .ops = .ri_s, - .data = .{ .ri = .{ .r = .rsp, .i = frame_layout.stack_adjust } }, - }); + const page_size: u32 = 1 << 12; + if (frame_layout.stack_adjust <= page_size) { + self.mir_instructions.set(backpatch_stack_alloc, .{ + .tag = .sub, + .ops = .ri_s, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = frame_layout.stack_adjust, + } }, + }); + } else if (frame_layout.stack_adjust < + page_size * Lower.pseudo_probe_adjust_unrolled_max_insts) + { + self.mir_instructions.set(backpatch_stack_alloc, .{ + .tag = .pseudo, + .ops = .pseudo_probe_adjust_unrolled_ri_s, + .data = .{ .ri = .{ + .r1 = .rsp, + .i = frame_layout.stack_adjust, + } }, + }); + } else { + self.mir_instructions.set(backpatch_stack_alloc, .{ + .tag = .pseudo, + .ops = .pseudo_probe_adjust_setup_rri_s, + .data = .{ .rri = .{ + .r1 = .rsp, + .r2 = .rax, + .i = frame_layout.stack_adjust, + } }, + }); + self.mir_instructions.set(backpatch_stack_alloc_extra, .{ + .tag = .pseudo, + .ops = .pseudo_probe_adjust_loop_rr, + .data = .{ .rr = .{ + .r1 = .rsp, + .r2 = .rax, + } }, + }); + } } if (need_frame_align or need_stack_adjust) { self.mir_instructions.set(backpatch_stack_dealloc, .{ .tag = .mov, .ops = .rr, - .data = .{ .rr = .{ .r1 = .rsp, .r2 = .rbp } }, + .data = .{ .rr = .{ + .r1 = .rsp, + .r2 = .rbp, + } }, }); } if (need_save_reg) { - const save_reg_list = frame_layout.save_reg_list.asInt(); self.mir_instructions.set(backpatch_push_callee_preserved_regs, .{ - .tag = .push_regs, - .ops = undefined, - .data = .{ .payload = save_reg_list }, + .tag = .pseudo, + .ops = .pseudo_push_reg_list, + .data = .{ .reg_list = frame_layout.save_reg_list }, }); self.mir_instructions.set(backpatch_pop_callee_preserved_regs, .{ - .tag = .pop_regs, - .ops = undefined, - .data = .{ .payload = save_reg_list }, + .tag = .pseudo, + .ops = .pseudo_pop_reg_list, + .data = .{ .reg_list = frame_layout.save_reg_list }, }); } } else { - try self.asmOpOnly(.dbg_prologue_end); + try self.asmPseudo(.pseudo_dbg_prologue_end_none); try self.genBody(self.air.getMainBody()); - try self.asmOpOnly(.dbg_epilogue_begin); + try self.asmPseudo(.pseudo_dbg_epilogue_begin_none); } // Drop them off at the rbrace. _ = try self.addInst(.{ - .tag = .dbg_line, - .ops = undefined, + .tag = .pseudo, + .ops = .pseudo_dbg_line_line_column, .data = .{ .line_column = .{ .line = self.end_di_line, .column = self.end_di_column, @@ -1177,6 +1733,8 @@ fn gen(self: *Self) InnerError!void { } fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { + const mod = self.bin_file.options.module.?; + const ip = &mod.intern_pool; const air_tags = self.air.instructions.items(.tag); for (body) |inst| { @@ -1185,14 +1743,9 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { try self.mir_to_air_map.put(self.gpa, mir_inst, inst); } - if (self.liveness.isUnused(inst) and !self.air.mustLower(inst)) continue; - if (debug_wip_mir) @import("../../print_air.zig").dumpInst( - inst, - self.bin_file.options.module.?, - self.air, - self.liveness, - ); - self.dumpTracking() catch {}; + if (self.liveness.isUnused(inst) and !self.air.mustLower(inst, ip)) continue; + wip_mir_log.debug("{}", .{self.fmtAir(inst)}); + verbose_tracking_log.debug("{}", .{self.fmtTracking()}); const old_air_bookkeeping = self.air_bookkeeping; try self.inst_tracking.ensureUnusedCapacity(self.gpa, 1); @@ -1230,7 +1783,6 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .shl_sat => try self.airShlSat(inst), .slice => try self.airSlice(inst), - .sqrt, .sin, .cos, .tan, @@ -1239,14 +1791,15 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .log, .log2, .log10, - .fabs, - .floor, - .ceil, .round, - .trunc_float, - .neg, => try self.airUnaryMath(inst), + .floor => try self.airRound(inst, 0b1_0_01), + .ceil => try self.airRound(inst, 0b1_0_10), + .trunc_float => try self.airRound(inst, 0b1_0_11), + .sqrt => try self.airSqrt(inst), + .neg, .fabs => try self.airFloatSign(inst), + .add_with_overflow => try self.airAddSubWithOverflow(inst), .sub_with_overflow => try self.airAddSubWithOverflow(inst), .mul_with_overflow => try self.airMulWithOverflow(inst), @@ -1374,8 +1927,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .ptr_elem_val => try self.airPtrElemVal(inst), .ptr_elem_ptr => try self.airPtrElemPtr(inst), - .constant => unreachable, // excluded from function bodies - .const_ty => unreachable, // excluded from function bodies + .inferred_alloc, .inferred_alloc_comptime, .interned => unreachable, .unreach => if (self.wantSafety()) try self.airTrap() else self.finishAirBookkeeping(), .optional_payload => try self.airOptionalPayload(inst), @@ -1453,7 +2005,64 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { } } } - self.dumpTracking() catch {}; + verbose_tracking_log.debug("{}", .{self.fmtTracking()}); +} + +fn genLazy(self: *Self, lazy_sym: link.File.LazySymbol) InnerError!void { + const mod = self.bin_file.options.module.?; + switch (lazy_sym.ty.zigTypeTag(mod)) { + .Enum => { + const enum_ty = lazy_sym.ty; + wip_mir_log.debug("{}.@tagName:", .{enum_ty.fmt(self.bin_file.options.module.?)}); + + const param_regs = abi.getCAbiIntParamRegs(self.target.*); + const param_locks = self.register_manager.lockRegsAssumeUnused(2, param_regs[0..2].*); + defer for (param_locks) |lock| self.register_manager.unlockReg(lock); + + const ret_reg = param_regs[0]; + const enum_mcv = MCValue{ .register = param_regs[1] }; + + var exitlude_jump_relocs = try self.gpa.alloc(u32, enum_ty.enumFieldCount(mod)); + defer self.gpa.free(exitlude_jump_relocs); + + const data_reg = try self.register_manager.allocReg(null, gp); + const data_lock = self.register_manager.lockRegAssumeUnused(data_reg); + defer self.register_manager.unlockReg(data_lock); + try self.genLazySymbolRef(.lea, data_reg, .{ .kind = .const_data, .ty = enum_ty }); + + var data_off: i32 = 0; + for (exitlude_jump_relocs, 0..) |*exitlude_jump_reloc, index_usize| { + const index = @intCast(u32, index_usize); + const tag_name = mod.intern_pool.stringToSlice(enum_ty.enumFields(mod)[index_usize]); + const tag_val = try mod.enumValueFieldIndex(enum_ty, index); + const tag_mcv = try self.genTypedValue(.{ .ty = enum_ty, .val = tag_val }); + try self.genBinOpMir(.{ ._, .cmp }, enum_ty, enum_mcv, tag_mcv); + const skip_reloc = try self.asmJccReloc(undefined, .ne); + + try self.genSetMem( + .{ .reg = ret_reg }, + 0, + Type.usize, + .{ .register_offset = .{ .reg = data_reg, .off = data_off } }, + ); + try self.genSetMem(.{ .reg = ret_reg }, 8, Type.usize, .{ .immediate = tag_name.len }); + + exitlude_jump_reloc.* = try self.asmJmpReloc(undefined); + try self.performReloc(skip_reloc); + + data_off += @intCast(i32, tag_name.len + 1); + } + + try self.airTrap(); + + for (exitlude_jump_relocs) |reloc| try self.performReloc(reloc); + try self.asmOpOnly(.{ ._, .ret }); + }, + else => return self.fail( + "TODO implement {s} for {}", + .{ @tagName(lazy_sym.kind), lazy_sym.ty.fmt(self.bin_file.options.module.?) }, + ), + } } fn getValue(self: *Self, value: MCValue, inst: ?Air.Inst.Index) void { @@ -1488,10 +2097,8 @@ fn feed(self: *Self, bt: *Liveness.BigTomb, operand: Air.Inst.Ref) void { /// Asserts there is already capacity to insert into top branch inst_table. fn processDeath(self: *Self, inst: Air.Inst.Index) void { - switch (self.air.instructions.items(.tag)[inst]) { - .constant, .const_ty => unreachable, - else => self.inst_tracking.getPtr(inst).?.die(self, inst), - } + assert(self.air.instructions.items(.tag)[inst] != .interned); + self.inst_tracking.getPtr(inst).?.die(self, inst); } /// Called when there are no operands, and the instruction is always unreferenced. @@ -1522,10 +2129,7 @@ fn finishAir(self: *Self, inst: Air.Inst.Index, result: MCValue, operands: [Live const dies = @truncate(u1, tomb_bits) != 0; tomb_bits >>= 1; if (!dies) continue; - const op_int = @enumToInt(op); - if (op_int < Air.Inst.Ref.typed_value_map.len) continue; - const op_index = @intCast(Air.Inst.Index, op_int - Air.Inst.Ref.typed_value_map.len); - self.processDeath(op_index); + self.processDeath(Air.refToIndexAllowNone(op) orelse continue); } self.finishAirResult(inst, result); } @@ -1546,7 +2150,7 @@ fn setFrameLoc( const frame_i = @enumToInt(frame_index); if (aligned) { const alignment = @as(i32, 1) << self.frame_allocs.items(.abi_align)[frame_i]; - offset.* = mem.alignForwardGeneric(i32, offset.*, alignment); + offset.* = mem.alignForward(i32, offset.*, alignment); } self.frame_locs.set(frame_i, .{ .base = base, .disp = offset.* }); offset.* += self.frame_allocs.items(.abi_size)[frame_i]; @@ -1572,7 +2176,7 @@ fn computeFrameLayout(self: *Self) !FrameLayout { } }; const sort_context = SortContext{ .frame_align = frame_align }; - std.sort.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan); + mem.sort(FrameIndex, stack_frame_order, sort_context, SortContext.lessThan); } const call_frame_align = frame_align[@enumToInt(FrameIndex.call_frame)]; @@ -1603,7 +2207,7 @@ fn computeFrameLayout(self: *Self) !FrameLayout { self.setFrameLoc(.stack_frame, .rsp, &rsp_offset, true); for (stack_frame_order) |frame_index| self.setFrameLoc(frame_index, .rsp, &rsp_offset, true); rsp_offset += stack_frame_align_offset; - rsp_offset = mem.alignForwardGeneric(i32, rsp_offset, @as(i32, 1) << needed_align); + rsp_offset = mem.alignForward(i32, rsp_offset, @as(i32, 1) << needed_align); rsp_offset -= stack_frame_align_offset; frame_size[@enumToInt(FrameIndex.call_frame)] = @intCast(u31, rsp_offset - frame_offset[@enumToInt(FrameIndex.stack_frame)]); @@ -1615,19 +2219,29 @@ fn computeFrameLayout(self: *Self) !FrameLayout { }; } +fn getFrameAddrAlignment(self: *Self, frame_addr: FrameAddr) u32 { + const alloc_align = @as(u32, 1) << self.frame_allocs.get(@enumToInt(frame_addr.index)).abi_align; + return @min(alloc_align, @bitCast(u32, frame_addr.off) & (alloc_align - 1)); +} + +fn getFrameAddrSize(self: *Self, frame_addr: FrameAddr) u32 { + return self.frame_allocs.get(@enumToInt(frame_addr.index)).abi_size - @intCast(u31, frame_addr.off); +} + fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex { const frame_allocs_slice = self.frame_allocs.slice(); const frame_size = frame_allocs_slice.items(.abi_size); const frame_align = frame_allocs_slice.items(.abi_align); + + const stack_frame_align = &frame_align[@enumToInt(FrameIndex.stack_frame)]; + stack_frame_align.* = @max(stack_frame_align.*, alloc.abi_align); + for (self.free_frame_indices.keys(), 0..) |frame_index, free_i| { const abi_size = frame_size[@enumToInt(frame_index)]; if (abi_size != alloc.abi_size) continue; const abi_align = &frame_align[@enumToInt(frame_index)]; abi_align.* = @max(abi_align.*, alloc.abi_align); - const stack_frame_align = &frame_align[@enumToInt(FrameIndex.stack_frame)]; - stack_frame_align.* = @max(stack_frame_align.*, alloc.abi_align); - _ = self.free_frame_indices.swapRemoveAt(free_i); return frame_index; } @@ -1638,54 +2252,61 @@ fn allocFrameIndex(self: *Self, alloc: FrameAlloc) !FrameIndex { /// Use a pointer instruction as the basis for allocating stack memory. fn allocMemPtr(self: *Self, inst: Air.Inst.Index) !FrameIndex { - const ptr_ty = self.air.typeOfIndex(inst); - const val_ty = ptr_ty.childType(); + const mod = self.bin_file.options.module.?; + const ptr_ty = self.typeOfIndex(inst); + const val_ty = ptr_ty.childType(mod); return self.allocFrameIndex(FrameAlloc.init(.{ - .size = math.cast(u32, val_ty.abiSize(self.target.*)) orelse { - const mod = self.bin_file.options.module.?; + .size = math.cast(u32, val_ty.abiSize(mod)) orelse { return self.fail("type '{}' too big to fit into stack frame", .{val_ty.fmt(mod)}); }, - .alignment = @max(ptr_ty.ptrAlignment(self.target.*), 1), + .alignment = @max(ptr_ty.ptrAlignment(mod), 1), })); } fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { - return self.allocRegOrMemAdvanced(self.air.typeOfIndex(inst), inst, reg_ok); + return self.allocRegOrMemAdvanced(self.typeOfIndex(inst), inst, reg_ok); } fn allocTempRegOrMem(self: *Self, elem_ty: Type, reg_ok: bool) !MCValue { return self.allocRegOrMemAdvanced(elem_ty, null, reg_ok); } -fn allocRegOrMemAdvanced(self: *Self, elem_ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue { - const abi_size = math.cast(u32, elem_ty.abiSize(self.target.*)) orelse { - const mod = self.bin_file.options.module.?; - return self.fail("type '{}' too big to fit into stack frame", .{elem_ty.fmt(mod)}); +fn allocRegOrMemAdvanced(self: *Self, ty: Type, inst: ?Air.Inst.Index, reg_ok: bool) !MCValue { + const mod = self.bin_file.options.module.?; + const abi_size = math.cast(u32, ty.abiSize(mod)) orelse { + return self.fail("type '{}' too big to fit into stack frame", .{ty.fmt(mod)}); }; - if (reg_ok) { - // Make sure the type can fit in a register before we try to allocate one. - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - if (abi_size <= ptr_bytes) { - if (self.register_manager.tryAllocReg(inst, try self.regClassForType(elem_ty))) |reg| { + if (reg_ok) need_mem: { + if (abi_size <= @as(u32, switch (ty.zigTypeTag(mod)) { + .Float => switch (ty.floatBits(self.target.*)) { + 16, 32, 64, 128 => 16, + 80 => break :need_mem, + else => unreachable, + }, + .Vector => switch (ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (ty.childType(mod).floatBits(self.target.*)) { + 16, 32, 64, 128 => if (self.hasFeature(.avx)) 32 else 16, + 80 => break :need_mem, + else => unreachable, + }, + else => if (self.hasFeature(.avx)) 32 else 16, + }, + else => 8, + })) { + if (self.register_manager.tryAllocReg(inst, regClassForType(ty, mod))) |reg| { return MCValue{ .register = registerAlias(reg, abi_size) }; } } } - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(elem_ty, self.target.*)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ty, mod)); return .{ .load_frame = .{ .index = frame_index } }; } -fn regClassForType(self: *Self, ty: Type) !RegisterManager.RegisterBitSet { - return switch (ty.zigTypeTag()) { - .Vector => self.fail("TODO regClassForType for {}", .{ty.fmt(self.bin_file.options.module.?)}), - .Float => switch (ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) sse else gp, - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) sse else gp, - else => gp, - }, +fn regClassForType(ty: Type, mod: *Module) RegisterManager.RegisterBitSet { + return switch (ty.zigTypeTag(mod)) { + .Float, .Vector => sse, else => gp, }; } @@ -1828,7 +2449,8 @@ pub fn spillRegisters(self: *Self, registers: []const Register) !void { /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const reg: Register = try self.register_manager.allocReg(null, try self.regClassForType(ty)); + const mod = self.bin_file.options.module.?; + const reg = try self.register_manager.allocReg(null, regClassForType(ty, mod)); try self.genSetReg(reg, ty, mcv); return reg; } @@ -1843,7 +2465,8 @@ fn copyToRegisterWithInstTracking( ty: Type, mcv: MCValue, ) !MCValue { - const reg: Register = try self.register_manager.allocReg(reg_owner, try self.regClassForType(ty)); + const mod = self.bin_file.options.module.?; + const reg: Register = try self.register_manager.allocReg(reg_owner, regClassForType(ty, mod)); try self.genSetReg(reg, ty, mcv); return MCValue{ .register = reg }; } @@ -1860,7 +2483,7 @@ fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { .load_frame => .{ .register_offset = .{ .reg = (try self.copyToRegisterWithInstTracking( inst, - self.air.typeOfIndex(inst), + self.typeOfIndex(inst), self.ret_mcv.long, )).register, .off = self.ret_mcv.short.indirect.off, @@ -1871,127 +2494,321 @@ fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airFptrunc for {}", .{self.target.cpu.arch}); - // return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const dst_ty = self.typeOfIndex(inst); + const dst_bits = dst_ty.floatBits(self.target.*); + const src_ty = self.typeOf(ty_op.operand); + const src_bits = src_ty.floatBits(self.target.*); + + const src_mcv = try self.resolveInst(ty_op.operand); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); + const dst_reg = dst_mcv.getReg().?.to128(); + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + if (dst_bits == 16 and self.hasFeature(.f16c)) { + switch (src_bits) { + 32 => { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + mat_src_reg.to128(), + Immediate.u(0b1_00), + ); + }, + else => return self.fail("TODO implement airFptrunc from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }), + } + } else if (src_bits == 64 and dst_bits == 32) { + if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + .{ .v_ss, .cvtsd2 }, + dst_reg, + dst_reg, + src_mcv.mem(.qword), + ) else try self.asmRegisterRegisterRegister( + .{ .v_ss, .cvtsd2 }, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ ._ss, .cvtsd2 }, + dst_reg, + src_mcv.mem(.qword), + ) else try self.asmRegisterRegister( + .{ ._ss, .cvtsd2 }, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ); + } else return self.fail("TODO implement airFptrunc from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airFpext(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airFpext for {}", .{self.target.cpu.arch}); - // return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const dst_ty = self.typeOfIndex(inst); + const dst_bits = dst_ty.floatBits(self.target.*); + const src_ty = self.typeOf(ty_op.operand); + const src_bits = src_ty.floatBits(self.target.*); + + const src_mcv = try self.resolveInst(ty_op.operand); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); + const dst_reg = dst_mcv.getReg().?.to128(); + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + if (src_bits == 16 and self.hasFeature(.f16c)) { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128()); + switch (dst_bits) { + 32 => {}, + 64 => try self.asmRegisterRegisterRegister(.{ .v_sd, .cvtss2 }, dst_reg, dst_reg, dst_reg), + else => return self.fail("TODO implement airFpext from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }), + } + } else if (src_bits == 32 and dst_bits == 64) { + if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + .{ .v_sd, .cvtss2 }, + dst_reg, + dst_reg, + src_mcv.mem(.dword), + ) else try self.asmRegisterRegisterRegister( + .{ .v_sd, .cvtss2 }, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ ._sd, .cvtss2 }, + dst_reg, + src_mcv.mem(.dword), + ) else try self.asmRegisterRegister( + .{ ._sd, .cvtss2 }, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv)).to128(), + ); + } else return self.fail("TODO implement airFpext from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const result: MCValue = result: { + const src_ty = self.typeOf(ty_op.operand); + const src_int_info = src_ty.intInfo(mod); - const src_ty = self.air.typeOf(ty_op.operand); - const src_int_info = src_ty.intInfo(self.target.*); - const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*)); - const src_mcv = try self.resolveInst(ty_op.operand); - const src_lock = switch (src_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (src_lock) |lock| self.register_manager.unlockReg(lock); + const dst_ty = self.typeOfIndex(inst); + const dst_int_info = dst_ty.intInfo(mod); + const abi_size = @intCast(u32, dst_ty.abiSize(mod)); - const dst_ty = self.air.typeOfIndex(inst); - const dst_int_info = dst_ty.intInfo(self.target.*); - const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); - const dst_mcv = if (dst_abi_size <= src_abi_size and - self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else - try self.allocRegOrMem(inst, true); + const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty; + const extend = switch (src_int_info.signedness) { + .signed => dst_int_info, + .unsigned => src_int_info, + }.signedness; - const min_ty = if (dst_int_info.bits < src_int_info.bits) dst_ty else src_ty; - const signedness: std.builtin.Signedness = if (dst_int_info.signedness == .signed and - src_int_info.signedness == .signed) .signed else .unsigned; - switch (dst_mcv) { - .register => |dst_reg| { - const min_abi_size = @min(dst_abi_size, src_abi_size); - const tag: Mir.Inst.Tag = switch (signedness) { - .signed => .movsx, - .unsigned => if (min_abi_size > 2) .mov else .movzx, - }; - const dst_alias = switch (tag) { - .movsx => dst_reg.to64(), - .mov, .movzx => if (min_abi_size > 4) dst_reg.to64() else dst_reg.to32(), - else => unreachable, - }; - switch (src_mcv) { - .register => |src_reg| { - try self.asmRegisterRegister( - tag, - dst_alias, - registerAlias(src_reg, min_abi_size), + const src_mcv = try self.resolveInst(ty_op.operand); + const src_storage_bits = switch (src_mcv) { + .register, .register_offset => 64, + .load_frame => |frame_addr| self.getFrameAddrSize(frame_addr) * 8, + else => src_int_info.bits, + }; + + const dst_mcv = if (dst_int_info.bits <= src_storage_bits and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv else dst: { + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy(min_ty, dst_mcv, src_mcv); + break :dst dst_mcv; + }; + + if (dst_int_info.bits <= src_int_info.bits) break :result if (dst_mcv.isRegister()) + .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) } + else + dst_mcv; + + if (dst_mcv.isRegister()) { + try self.truncateRegister(src_ty, dst_mcv.getReg().?); + break :result .{ .register = registerAlias(dst_mcv.getReg().?, abi_size) }; + } + + const src_limbs_len = std.math.divCeil(u16, src_int_info.bits, 64) catch unreachable; + const dst_limbs_len = std.math.divCeil(u16, dst_int_info.bits, 64) catch unreachable; + + const high_mcv = dst_mcv.address().offset((src_limbs_len - 1) * 8).deref(); + const high_reg = try self.copyToTmpRegister(switch (src_int_info.signedness) { + .signed => Type.isize, + .unsigned => Type.usize, + }, high_mcv); + const high_lock = self.register_manager.lockRegAssumeUnused(high_reg); + defer self.register_manager.unlockReg(high_lock); + + const high_bits = src_int_info.bits % 64; + if (high_bits > 0) { + const high_ty = try mod.intType(extend, high_bits); + try self.truncateRegister(high_ty, high_reg); + try self.genCopy(Type.usize, high_mcv, .{ .register = high_reg }); + } + + if (dst_limbs_len > src_limbs_len) try self.genInlineMemset( + dst_mcv.address().offset(src_limbs_len * 8), + switch (extend) { + .signed => extend: { + const extend_mcv = MCValue{ .register = high_reg }; + try self.genShiftBinOpMir( + .{ ._r, .sa }, + Type.isize, + extend_mcv, + .{ .immediate = 63 }, ); + break :extend extend_mcv; }, - .load_frame => |frame_addr| try self.asmRegisterMemory( - tag, - dst_alias, - Memory.sib(Memory.PtrSize.fromSize(min_abi_size), .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }), - ), - else => return self.fail("TODO airIntCast from {s} to {s}", .{ - @tagName(src_mcv), - @tagName(dst_mcv), - }), - } - if (self.regExtraBits(min_ty) > 0) try self.truncateRegister(min_ty, dst_reg); - }, - else => { - try self.genCopy(min_ty, dst_mcv, src_mcv); - const extra = dst_abi_size * 8 - dst_int_info.bits; - if (extra > 0) { - try self.genShiftBinOpMir(switch (signedness) { - .signed => .sal, - .unsigned => .shl, - }, dst_ty, dst_mcv, .{ .immediate = extra }); - try self.genShiftBinOpMir(switch (signedness) { - .signed => .sar, - .unsigned => .shr, - }, dst_ty, dst_mcv, .{ .immediate = extra }); - } - }, - } - return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); + .unsigned => .{ .immediate = 0 }, + }, + .{ .immediate = (dst_limbs_len - src_limbs_len) * 8 }, + ); + + break :result dst_mcv; + }; + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn airTrunc(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const dst_ty = self.air.typeOfIndex(inst); - const dst_abi_size = dst_ty.abiSize(self.target.*); - if (dst_abi_size > 8) { - return self.fail("TODO implement trunc for abi sizes larger than 8", .{}); - } + const dst_ty = self.typeOfIndex(inst); + const dst_abi_size = @intCast(u32, dst_ty.abiSize(mod)); + const src_ty = self.typeOf(ty_op.operand); + const src_abi_size = @intCast(u32, src_ty.abiSize(mod)); - const src_mcv = try self.resolveInst(ty_op.operand); - const src_lock = switch (src_mcv) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (src_lock) |lock| self.register_manager.unlockReg(lock); + const result = result: { + const src_mcv = try self.resolveInst(ty_op.operand); + const src_lock = + if (src_mcv.getReg()) |reg| self.register_manager.lockRegAssumeUnused(reg) else null; + defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) - src_mcv - else - try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); + + if (dst_ty.zigTypeTag(mod) == .Vector) { + assert(src_ty.zigTypeTag(mod) == .Vector and dst_ty.vectorLen(mod) == src_ty.vectorLen(mod)); + const dst_info = dst_ty.childType(mod).intInfo(mod); + const src_info = src_ty.childType(mod).intInfo(mod); + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (dst_info.bits) { + 8 => switch (src_info.bits) { + 16 => switch (dst_ty.vectorLen(mod)) { + 1...8 => if (self.hasFeature(.avx)) .{ .vp_b, .ackusw } else .{ .p_b, .ackusw }, + 9...16 => if (self.hasFeature(.avx2)) .{ .vp_b, .ackusw } else null, + else => null, + }, + else => null, + }, + 16 => switch (src_info.bits) { + 32 => switch (dst_ty.vectorLen(mod)) { + 1...4 => if (self.hasFeature(.avx)) + .{ .vp_w, .ackusd } + else if (self.hasFeature(.sse4_1)) + .{ .p_w, .ackusd } + else + null, + 5...8 => if (self.hasFeature(.avx2)) .{ .vp_w, .ackusd } else null, + else => null, + }, + else => null, + }, + else => null, + })) |tag| tag else return self.fail("TODO implement airTrunc for {}", .{ + dst_ty.fmt(self.bin_file.options.module.?), + }); - // when truncating a `u16` to `u5`, for example, those top 3 bits in the result - // have to be removed. this only happens if the dst if not a power-of-two size. - if (self.regExtraBits(dst_ty) > 0) try self.truncateRegister(dst_ty, dst_mcv.register.to64()); + const elem_ty = src_ty.childType(mod); + const mask_val = try mod.intValue(elem_ty, @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - dst_info.bits)); - return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); + const splat_ty = try mod.vectorType(.{ + .len = @intCast(u32, @divExact(@as(u64, if (src_abi_size > 16) 256 else 128), src_info.bits)), + .child = elem_ty.ip_index, + }); + const splat_abi_size = @intCast(u32, splat_ty.abiSize(mod)); + + const splat_val = try mod.intern(.{ .aggregate = .{ + .ty = splat_ty.ip_index, + .storage = .{ .repeated_elem = mask_val.ip_index }, + } }); + + const splat_mcv = try self.genTypedValue(.{ .ty = splat_ty, .val = splat_val.toValue() }); + const splat_addr_mcv: MCValue = switch (splat_mcv) { + .memory, .indirect, .load_frame => splat_mcv.address(), + else => .{ .register = try self.copyToTmpRegister(Type.usize, splat_mcv.address()) }, + }; + + const dst_reg = registerAlias(dst_mcv.getReg().?, src_abi_size); + if (self.hasFeature(.avx)) { + try self.asmRegisterRegisterMemory( + .{ .vp_, .@"and" }, + dst_reg, + dst_reg, + splat_addr_mcv.deref().mem(Memory.PtrSize.fromSize(splat_abi_size)), + ); + try self.asmRegisterRegisterRegister(mir_tag, dst_reg, dst_reg, dst_reg); + } else { + try self.asmRegisterMemory( + .{ .p_, .@"and" }, + dst_reg, + splat_addr_mcv.deref().mem(Memory.PtrSize.fromSize(splat_abi_size)), + ); + try self.asmRegisterRegister(mir_tag, dst_reg, dst_reg); + } + break :result dst_mcv; + } + + if (dst_abi_size > 8) { + return self.fail("TODO implement trunc for abi sizes larger than 8", .{}); + } + + // when truncating a `u16` to `u5`, for example, those top 3 bits in the result + // have to be removed. this only happens if the dst if not a power-of-two size. + if (self.regExtraBits(dst_ty) > 0) + try self.truncateRegister(dst_ty, dst_mcv.register.to64()); + + break :result dst_mcv; + }; + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn airBoolToInt(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; - const ty = self.air.typeOfIndex(inst); + const ty = self.typeOfIndex(inst); const operand = try self.resolveInst(un_op); const dst_mcv = if (self.reuseOperand(inst, un_op, 0, operand)) @@ -2003,20 +2820,21 @@ fn airBoolToInt(self: *Self, inst: Air.Inst.Index) !void { } fn airSlice(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - const slice_ty = self.air.typeOfIndex(inst); + const slice_ty = self.typeOfIndex(inst); const ptr = try self.resolveInst(bin_op.lhs); - const ptr_ty = self.air.typeOf(bin_op.lhs); + const ptr_ty = self.typeOf(bin_op.lhs); const len = try self.resolveInst(bin_op.rhs); - const len_ty = self.air.typeOf(bin_op.rhs); + const len_ty = self.typeOf(bin_op.rhs); - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, self.target.*)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, mod)); try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr); try self.genSetMem( .{ .frame = frame_index }, - @intCast(i32, ptr_ty.abiSize(self.target.*)), + @intCast(i32, ptr_ty.abiSize(mod)), len_ty, len, ); @@ -2045,23 +2863,24 @@ fn airPtrArithmetic(self: *Self, inst: Air.Inst.Index, tag: Air.Inst.Tag) !void } fn activeIntBits(self: *Self, dst_air: Air.Inst.Ref) u16 { + const mod = self.bin_file.options.module.?; const air_tag = self.air.instructions.items(.tag); const air_data = self.air.instructions.items(.data); - const dst_ty = self.air.typeOf(dst_air); - const dst_info = dst_ty.intInfo(self.target.*); + const dst_ty = self.typeOf(dst_air); + const dst_info = dst_ty.intInfo(mod); if (Air.refToIndex(dst_air)) |inst| { switch (air_tag[inst]) { - .constant => { - const src_val = self.air.values[air_data[inst].ty_pl.payload]; + .interned => { + const src_val = air_data[inst].interned.toValue(); var space: Value.BigIntSpace = undefined; - const src_int = src_val.toBigInt(&space, self.target.*); + const src_int = src_val.toBigInt(&space, mod); return @intCast(u16, src_int.bitCountTwosComp()) + @boolToInt(src_int.positive and dst_info.signedness == .signed); }, .intcast => { - const src_ty = self.air.typeOf(air_data[inst].ty_op.operand); - const src_info = src_ty.intInfo(self.target.*); + const src_ty = self.typeOf(air_data[inst].ty_op.operand); + const src_info = src_ty.intInfo(mod); return @min(switch (src_info.signedness) { .signed => switch (dst_info.signedness) { .signed => src_info.bits, @@ -2080,28 +2899,28 @@ fn activeIntBits(self: *Self, dst_air: Air.Inst.Ref) u16 { } fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; const result = result: { const tag = self.air.instructions.items(.tag)[inst]; - const dst_ty = self.air.typeOfIndex(inst); - if (dst_ty.zigTypeTag() == .Float) - break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs); - - const dst_info = dst_ty.intInfo(self.target.*); - var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) { - .signed => .int_signed, - .unsigned => .int_unsigned, - } }, .data = switch (tag) { + const dst_ty = self.typeOfIndex(inst); + switch (dst_ty.zigTypeTag(mod)) { + .Float, .Vector => break :result try self.genBinOp(inst, tag, bin_op.lhs, bin_op.rhs), + else => {}, + } + + const dst_info = dst_ty.intInfo(mod); + const src_ty = try mod.intType(dst_info.signedness, switch (tag) { else => unreachable, - .mul, .mulwrap => math.max3( + .mul, .mulwrap => @max( self.activeIntBits(bin_op.lhs), self.activeIntBits(bin_op.rhs), dst_info.bits / 2, ), .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_info.bits, - } }; - const src_ty = Type.initPayload(&src_pl.base); + }); + try self.spillEflagsIfOccupied(); try self.spillRegisters(&.{ .rax, .rdx }); const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); @@ -2111,8 +2930,9 @@ fn airMulDivBinOp(self: *Self, inst: Air.Inst.Index) !void { } fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const ty = self.air.typeOf(bin_op.lhs); + const ty = self.typeOf(bin_op.lhs); const lhs_mcv = try self.resolveInst(bin_op.lhs); const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) @@ -2136,34 +2956,62 @@ fn airAddSat(self: *Self, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(limit_lock); const reg_bits = self.regBitSize(ty); - const cc: Condition = if (ty.isSignedInt()) cc: { + const reg_extra_bits = self.regExtraBits(ty); + const cc: Condition = if (ty.isSignedInt(mod)) cc: { + if (reg_extra_bits > 0) { + try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits }); + } try self.genSetReg(limit_reg, ty, dst_mcv); - try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.xor, ty, limit_mcv, .{ + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1, }); + if (reg_extra_bits > 0) { + const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv); + const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg }; + const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg); + defer self.register_manager.unlockReg(shifted_rhs_lock); + + try self.genShiftBinOpMir( + .{ ._l, .sa }, + ty, + shifted_rhs_mcv, + .{ .immediate = reg_extra_bits }, + ); + try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, shifted_rhs_mcv); + } else try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv); break :cc .o; } else cc: { try self.genSetReg(limit_reg, ty, .{ - .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - reg_bits), + .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - ty.bitSize(mod)), }); + + try self.genBinOpMir(.{ ._, .add }, ty, dst_mcv, rhs_mcv); + if (reg_extra_bits > 0) { + try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, limit_mcv); + break :cc .a; + } break :cc .c; }; - try self.genBinOpMir(.add, ty, dst_mcv, rhs_mcv); - const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); + const cmov_abi_size = @max(@intCast(u32, ty.abiSize(mod)), 2); try self.asmCmovccRegisterRegister( registerAlias(dst_reg, cmov_abi_size), registerAlias(limit_reg, cmov_abi_size), cc, ); + if (reg_extra_bits > 0 and ty.isSignedInt(mod)) { + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits }); + } + return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const ty = self.air.typeOf(bin_op.lhs); + const ty = self.typeOf(bin_op.lhs); const lhs_mcv = try self.resolveInst(bin_op.lhs); const dst_mcv = if (lhs_mcv.isRegister() and self.reuseOperand(inst, bin_op.lhs, 0, lhs_mcv)) @@ -2187,32 +3035,55 @@ fn airSubSat(self: *Self, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(limit_lock); const reg_bits = self.regBitSize(ty); - const cc: Condition = if (ty.isSignedInt()) cc: { + const reg_extra_bits = self.regExtraBits(ty); + const cc: Condition = if (ty.isSignedInt(mod)) cc: { + if (reg_extra_bits > 0) { + try self.genShiftBinOpMir(.{ ._l, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits }); + } try self.genSetReg(limit_reg, ty, dst_mcv); - try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.xor, ty, limit_mcv, .{ + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1, }); + if (reg_extra_bits > 0) { + const shifted_rhs_reg = try self.copyToTmpRegister(ty, rhs_mcv); + const shifted_rhs_mcv = MCValue{ .register = shifted_rhs_reg }; + const shifted_rhs_lock = self.register_manager.lockRegAssumeUnused(shifted_rhs_reg); + defer self.register_manager.unlockReg(shifted_rhs_lock); + + try self.genShiftBinOpMir( + .{ ._l, .sa }, + ty, + shifted_rhs_mcv, + .{ .immediate = reg_extra_bits }, + ); + try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, shifted_rhs_mcv); + } else try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv); break :cc .o; } else cc: { try self.genSetReg(limit_reg, ty, .{ .immediate = 0 }); + try self.genBinOpMir(.{ ._, .sub }, ty, dst_mcv, rhs_mcv); break :cc .c; }; - try self.genBinOpMir(.sub, ty, dst_mcv, rhs_mcv); - const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); + const cmov_abi_size = @max(@intCast(u32, ty.abiSize(mod)), 2); try self.asmCmovccRegisterRegister( registerAlias(dst_reg, cmov_abi_size), registerAlias(limit_reg, cmov_abi_size), cc, ); + if (reg_extra_bits > 0 and ty.isSignedInt(mod)) { + try self.genShiftBinOpMir(.{ ._r, .sa }, ty, dst_mcv, .{ .immediate = reg_extra_bits }); + } + return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const ty = self.air.typeOf(bin_op.lhs); + const ty = self.typeOf(bin_op.lhs); try self.spillRegisters(&.{ .rax, .rdx }); const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx }); @@ -2238,11 +3109,11 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(limit_lock); const reg_bits = self.regBitSize(ty); - const cc: Condition = if (ty.isSignedInt()) cc: { + const cc: Condition = if (ty.isSignedInt(mod)) cc: { try self.genSetReg(limit_reg, ty, lhs_mcv); - try self.genBinOpMir(.xor, ty, limit_mcv, rhs_mcv); - try self.genShiftBinOpMir(.sar, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); - try self.genBinOpMir(.xor, ty, limit_mcv, .{ + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, rhs_mcv); + try self.genShiftBinOpMir(.{ ._, .sa }, ty, limit_mcv, .{ .immediate = reg_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, ty, limit_mcv, .{ .immediate = (@as(u64, 1) << @intCast(u6, reg_bits - 1)) - 1, }); break :cc .o; @@ -2254,7 +3125,7 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { }; const dst_mcv = try self.genMulDivBinOp(.mul, inst, ty, ty, lhs_mcv, rhs_mcv); - const cmov_abi_size = @max(@intCast(u32, ty.abiSize(self.target.*)), 2); + const cmov_abi_size = @max(@intCast(u32, ty.abiSize(mod)), 2); try self.asmCmovccRegisterRegister( registerAlias(dst_mcv.register, cmov_abi_size), registerAlias(limit_reg, cmov_abi_size), @@ -2265,12 +3136,13 @@ fn airMulSat(self: *Self, inst: Air.Inst.Index) !void { } fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; const result: MCValue = result: { const tag = self.air.instructions.items(.tag)[inst]; - const ty = self.air.typeOf(bin_op.lhs); - switch (ty.zigTypeTag()) { + const ty = self.typeOf(bin_op.lhs); + switch (ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO implement add/sub with overflow for Vector type", .{}), .Int => { try self.spillEflagsIfOccupied(); @@ -2280,13 +3152,13 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { .sub_with_overflow => .sub, else => unreachable, }, bin_op.lhs, bin_op.rhs); - const int_info = ty.intInfo(self.target.*); + const int_info = ty.intInfo(mod); const cc: Condition = switch (int_info.signedness) { .unsigned => .c, .signed => .o, }; - const tuple_ty = self.air.typeOfIndex(inst); + const tuple_ty = self.typeOfIndex(inst); if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) { switch (partial_mcv) { .register => |reg| { @@ -2297,16 +3169,16 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); try self.genSetMem( .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), + @intCast(i32, tuple_ty.structFieldOffset(1, mod)), Type.u1, .{ .eflags = cc }, ); try self.genSetMem( .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)), + @intCast(i32, tuple_ty.structFieldOffset(0, mod)), ty, partial_mcv, ); @@ -2314,13 +3186,8 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); - try self.genSetFrameTruncatedOverflowCompare( - tuple_ty, - frame_index, - partial_mcv.register, - cc, - ); + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); + try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); break :result .{ .load_frame = .{ .index = frame_index } }; }, else => unreachable, @@ -2330,12 +3197,13 @@ fn airAddSubWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; const result: MCValue = result: { - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); - switch (lhs_ty.zigTypeTag()) { + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); + switch (lhs_ty.zigTypeTag(mod)) { .Vector => return self.fail("TODO implement shl with overflow for Vector type", .{}), .Int => { try self.spillEflagsIfOccupied(); @@ -2344,7 +3212,7 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); const partial_mcv = try self.genShiftBinOp(.shl, null, lhs, rhs, lhs_ty, rhs_ty); const partial_lock = switch (partial_mcv) { @@ -2360,10 +3228,10 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { }; defer if (tmp_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir(.cmp, lhs_ty, tmp_mcv, lhs); + try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, tmp_mcv, lhs); const cc = Condition.ne; - const tuple_ty = self.air.typeOfIndex(inst); + const tuple_ty = self.typeOfIndex(inst); if (int_info.bits >= 8 and math.isPowerOfTwo(int_info.bits)) { switch (partial_mcv) { .register => |reg| { @@ -2374,30 +3242,25 @@ fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { } const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); try self.genSetMem( .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), - tuple_ty.structFieldType(1), + @intCast(i32, tuple_ty.structFieldOffset(1, mod)), + tuple_ty.structFieldType(1, mod), .{ .eflags = cc }, ); try self.genSetMem( .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)), - tuple_ty.structFieldType(0), + @intCast(i32, tuple_ty.structFieldOffset(0, mod)), + tuple_ty.structFieldType(0, mod), partial_mcv, ); break :result .{ .load_frame = .{ .index = frame_index } }; } const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); - try self.genSetFrameTruncatedOverflowCompare( - tuple_ty, - frame_index, - partial_mcv.register, - cc, - ); + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); + try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); break :result .{ .load_frame = .{ .index = frame_index } }; }, else => unreachable, @@ -2410,173 +3273,140 @@ fn genSetFrameTruncatedOverflowCompare( self: *Self, tuple_ty: Type, frame_index: FrameIndex, - reg: Register, - cc: Condition, + src_mcv: MCValue, + overflow_cc: ?Condition, ) !void { - const reg_lock = self.register_manager.lockReg(reg); - defer if (reg_lock) |lock| self.register_manager.unlockReg(lock); - - const ty = tuple_ty.structFieldType(0); - const int_info = ty.intInfo(self.target.*); - const extended_ty = switch (int_info.signedness) { - .signed => Type.isize, - .unsigned => ty, + const mod = self.bin_file.options.module.?; + const src_lock = switch (src_mcv) { + .register => |reg| self.register_manager.lockReg(reg), + else => null, }; + defer if (src_lock) |lock| self.register_manager.unlockReg(lock); + + const ty = tuple_ty.structFieldType(0, mod); + const int_info = ty.intInfo(mod); + + const hi_limb_bits = (int_info.bits - 1) % 64 + 1; + const hi_limb_ty = try mod.intType(int_info.signedness, hi_limb_bits); + + const rest_ty = try mod.intType(.unsigned, int_info.bits - hi_limb_bits); const temp_regs = try self.register_manager.allocRegs(3, .{ null, null, null }, gp); - const temp_regs_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs); - defer for (temp_regs_locks) |rreg| { - self.register_manager.unlockReg(rreg); - }; + const temp_locks = self.register_manager.lockRegsAssumeUnused(3, temp_regs); + defer for (temp_locks) |lock| self.register_manager.unlockReg(lock); const overflow_reg = temp_regs[0]; - try self.asmSetccRegister(overflow_reg.to8(), cc); + if (overflow_cc) |cc| try self.asmSetccRegister(overflow_reg.to8(), cc); const scratch_reg = temp_regs[1]; - try self.genSetReg(scratch_reg, extended_ty, .{ .register = reg }); - try self.truncateRegister(ty, scratch_reg); - try self.genBinOpMir( - .cmp, - extended_ty, - .{ .register = reg }, - .{ .register = scratch_reg }, - ); + const hi_limb_off = if (int_info.bits <= 64) 0 else (int_info.bits - 1) / 64 * 8; + const hi_limb_mcv = if (hi_limb_off > 0) + src_mcv.address().offset(int_info.bits / 64 * 8).deref() + else + src_mcv; + try self.genSetReg(scratch_reg, hi_limb_ty, hi_limb_mcv); + try self.truncateRegister(hi_limb_ty, scratch_reg); + try self.genBinOpMir(.{ ._, .cmp }, hi_limb_ty, .{ .register = scratch_reg }, hi_limb_mcv); const eq_reg = temp_regs[2]; - try self.asmSetccRegister(eq_reg.to8(), .ne); - try self.genBinOpMir( - .@"or", - Type.u8, - .{ .register = overflow_reg }, - .{ .register = eq_reg }, - ); + if (overflow_cc) |_| { + try self.asmSetccRegister(eq_reg.to8(), .ne); + try self.genBinOpMir( + .{ ._, .@"or" }, + Type.u8, + .{ .register = overflow_reg }, + .{ .register = eq_reg }, + ); + } + const payload_off = @intCast(i32, tuple_ty.structFieldOffset(0, mod)); + if (hi_limb_off > 0) try self.genSetMem(.{ .frame = frame_index }, payload_off, rest_ty, src_mcv); try self.genSetMem( .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), - tuple_ty.structFieldType(1), - .{ .register = overflow_reg.to8() }, + payload_off + hi_limb_off, + hi_limb_ty, + .{ .register = scratch_reg }, ); try self.genSetMem( .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)), - ty, - .{ .register = scratch_reg }, + @intCast(i32, tuple_ty.structFieldOffset(1, mod)), + tuple_ty.structFieldType(1, mod), + if (overflow_cc) |_| .{ .register = overflow_reg.to8() } else .{ .eflags = .ne }, ); } fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data; - const result: MCValue = result: { - const dst_ty = self.air.typeOf(bin_op.lhs); - switch (dst_ty.zigTypeTag()) { - .Vector => return self.fail("TODO implement mul_with_overflow for Vector type", .{}), - .Int => { - try self.spillEflagsIfOccupied(); + const dst_ty = self.typeOf(bin_op.lhs); + const result: MCValue = switch (dst_ty.zigTypeTag(mod)) { + .Vector => return self.fail("TODO implement mul_with_overflow for Vector type", .{}), + .Int => result: { + try self.spillEflagsIfOccupied(); + try self.spillRegisters(&.{ .rax, .rdx }); - const dst_info = dst_ty.intInfo(self.target.*); - const cc: Condition = switch (dst_info.signedness) { - .unsigned => .c, - .signed => .o, - }; + const dst_info = dst_ty.intInfo(mod); + const cc: Condition = switch (dst_info.signedness) { + .unsigned => .c, + .signed => .o, + }; - const tuple_ty = self.air.typeOfIndex(inst); - if (dst_info.bits >= 8 and math.isPowerOfTwo(dst_info.bits)) { - var src_pl = Type.Payload.Bits{ .base = .{ .tag = switch (dst_info.signedness) { - .signed => .int_signed, - .unsigned => .int_unsigned, - } }, .data = math.max3( - self.activeIntBits(bin_op.lhs), - self.activeIntBits(bin_op.rhs), - dst_info.bits / 2, - ) }; - const src_ty = Type.initPayload(&src_pl.base); + const lhs_active_bits = self.activeIntBits(bin_op.lhs); + const rhs_active_bits = self.activeIntBits(bin_op.rhs); + const src_bits = @max(lhs_active_bits, rhs_active_bits, dst_info.bits / 2); + const src_ty = try mod.intType(dst_info.signedness, src_bits); - try self.spillRegisters(&.{ .rax, .rdx }); - const lhs = try self.resolveInst(bin_op.lhs); - const rhs = try self.resolveInst(bin_op.rhs); + const lhs = try self.resolveInst(bin_op.lhs); + const rhs = try self.resolveInst(bin_op.rhs); - const partial_mcv = try self.genMulDivBinOp(.mul, null, dst_ty, src_ty, lhs, rhs); - switch (partial_mcv) { - .register => |reg| { - self.eflags_inst = inst; - break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; - }, - else => {}, - } + const tuple_ty = self.typeOfIndex(inst); + const extra_bits = if (dst_info.bits <= 64) + self.regExtraBits(dst_ty) + else + dst_info.bits % 64; + const partial_mcv = try self.genMulDivBinOp(.mul, null, dst_ty, src_ty, lhs, rhs); + switch (partial_mcv) { + .register => |reg| if (extra_bits == 0) { + self.eflags_inst = inst; + break :result .{ .register_overflow = .{ .reg = reg, .eflags = cc } }; + } else { + const frame_index = + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); + try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, partial_mcv, cc); + break :result .{ .load_frame = .{ .index = frame_index } }; + }, + else => { // For now, this is the only supported multiply that doesn't fit in a register. - assert(dst_info.bits == 128 and src_pl.data == 64); + assert(dst_info.bits <= 128 and src_bits == 64); + const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(1, self.target.*)), - tuple_ty.structFieldType(1), - .{ .immediate = 0 }, // overflow is impossible for 64-bit*64-bit -> 128-bit - ); - try self.genSetMem( - .{ .frame = frame_index }, - @intCast(i32, tuple_ty.structFieldOffset(0, self.target.*)), - tuple_ty.structFieldType(0), + try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, mod)); + if (dst_info.bits >= lhs_active_bits + rhs_active_bits) { + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(0, mod)), + tuple_ty.structFieldType(0, mod), + partial_mcv, + ); + try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, tuple_ty.structFieldOffset(1, mod)), + tuple_ty.structFieldType(1, mod), + .{ .immediate = 0 }, // cc being set is impossible + ); + } else try self.genSetFrameTruncatedOverflowCompare( + tuple_ty, + frame_index, partial_mcv, + null, ); break :result .{ .load_frame = .{ .index = frame_index } }; - } - - const dst_reg: Register = dst_reg: { - switch (dst_info.signedness) { - .signed => { - const lhs = try self.resolveInst(bin_op.lhs); - const rhs = try self.resolveInst(bin_op.rhs); - - const rhs_lock: ?RegisterLock = switch (rhs) { - .register => |reg| self.register_manager.lockRegAssumeUnused(reg), - else => null, - }; - defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - - const dst_reg: Register = blk: { - if (lhs.isRegister()) break :blk lhs.register; - break :blk try self.copyToTmpRegister(dst_ty, lhs); - }; - const dst_reg_lock = self.register_manager.lockRegAssumeUnused(dst_reg); - defer self.register_manager.unlockReg(dst_reg_lock); - - const rhs_mcv: MCValue = blk: { - if (rhs.isRegister() or rhs.isMemory()) break :blk rhs; - break :blk MCValue{ .register = try self.copyToTmpRegister(dst_ty, rhs) }; - }; - const rhs_mcv_lock: ?RegisterLock = switch (rhs_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (rhs_mcv_lock) |lock| self.register_manager.unlockReg(lock); - - try self.genIntMulComplexOpMir(Type.isize, .{ .register = dst_reg }, rhs_mcv); - - break :dst_reg dst_reg; - }, - .unsigned => { - try self.spillRegisters(&.{ .rax, .rdx }); - - const lhs = try self.resolveInst(bin_op.lhs); - const rhs = try self.resolveInst(bin_op.rhs); - - const dst_mcv = try self.genMulDivBinOp(.mul, null, dst_ty, dst_ty, lhs, rhs); - break :dst_reg dst_mcv.register; - }, - } - }; - - const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(tuple_ty, self.target.*)); - try self.genSetFrameTruncatedOverflowCompare(tuple_ty, frame_index, dst_reg, cc); - break :result .{ .load_frame = .{ .index = frame_index } }; - }, - else => unreachable, - } + }, + } + }, + else => unreachable, }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -2584,28 +3414,26 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { /// Generates signed or unsigned integer multiplication/division. /// Clobbers .rax and .rdx registers. /// Quotient is saved in .rax and remainder in .rdx. -fn genIntMulDivOpMir( - self: *Self, - tag: Mir.Inst.Tag, - ty: Type, - lhs: MCValue, - rhs: MCValue, -) !void { - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); +fn genIntMulDivOpMir(self: *Self, tag: Mir.Inst.FixedTag, ty: Type, lhs: MCValue, rhs: MCValue) !void { + const mod = self.bin_file.options.module.?; + const abi_size = @intCast(u32, ty.abiSize(mod)); if (abi_size > 8) { return self.fail("TODO implement genIntMulDivOpMir for ABI size larger than 8", .{}); } try self.genSetReg(.rax, ty, lhs); - switch (tag) { + switch (tag[1]) { else => unreachable, - .mul, .imul => {}, - .div => try self.asmRegisterRegister(.xor, .edx, .edx), - .idiv => switch (self.regBitSize(ty)) { - 8 => try self.asmOpOnly(.cbw), - 16 => try self.asmOpOnly(.cwd), - 32 => try self.asmOpOnly(.cdq), - 64 => try self.asmOpOnly(.cqo), + .mul => {}, + .div => switch (tag[0]) { + ._ => try self.asmRegisterRegister(.{ ._, .xor }, .edx, .edx), + .i_ => switch (self.regBitSize(ty)) { + 8 => try self.asmOpOnly(.{ ._, .cbw }), + 16 => try self.asmOpOnly(.{ ._, .cwd }), + 32 => try self.asmOpOnly(.{ ._, .cdq }), + 64 => try self.asmOpOnly(.{ ._, .cqo }), + else => unreachable, + }, else => unreachable, }, } @@ -2616,19 +3444,9 @@ fn genIntMulDivOpMir( }; switch (mat_rhs) { .register => |reg| try self.asmRegister(tag, registerAlias(reg, abi_size)), - .indirect, .load_frame => try self.asmMemory( + .memory, .indirect, .load_frame => try self.asmMemory( tag, - Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (mat_rhs) { - .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, - .disp = reg_off.off, - }, - .load_frame => |frame_addr| .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }, - else => unreachable, - }), + mat_rhs.mem(Memory.PtrSize.fromSize(abi_size)), ), else => unreachable, } @@ -2637,8 +3455,9 @@ fn genIntMulDivOpMir( /// Always returns a register. /// Clobbers .rax and .rdx registers. fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCValue { - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - const int_info = ty.intInfo(self.target.*); + const mod = self.bin_file.options.module.?; + const abi_size = @intCast(u32, ty.abiSize(mod)); + const int_info = ty.intInfo(mod); const dividend: Register = switch (lhs) { .register => |reg| reg, else => try self.copyToTmpRegister(ty, lhs), @@ -2653,23 +3472,28 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa const divisor_lock = self.register_manager.lockReg(divisor); defer if (divisor_lock) |lock| self.register_manager.unlockReg(lock); - try self.genIntMulDivOpMir(switch (int_info.signedness) { - .signed => .idiv, - .unsigned => .div, - }, ty, .{ .register = dividend }, .{ .register = divisor }); + try self.genIntMulDivOpMir( + switch (int_info.signedness) { + .signed => .{ .i_, .div }, + .unsigned => .{ ._, .div }, + }, + ty, + .{ .register = dividend }, + .{ .register = divisor }, + ); try self.asmRegisterRegister( - .xor, + .{ ._, .xor }, registerAlias(divisor, abi_size), registerAlias(dividend, abi_size), ); try self.asmRegisterImmediate( - .sar, + .{ ._r, .sa }, registerAlias(divisor, abi_size), Immediate.u(int_info.bits - 1), ); try self.asmRegisterRegister( - .@"test", + .{ ._, .@"test" }, registerAlias(.rdx, abi_size), registerAlias(.rdx, abi_size), ); @@ -2678,7 +3502,7 @@ fn genInlineIntDivFloor(self: *Self, ty: Type, lhs: MCValue, rhs: MCValue) !MCVa registerAlias(.rdx, abi_size), .z, ); - try self.genBinOpMir(.add, ty, .{ .register = divisor }, .{ .register = .rax }); + try self.genBinOpMir(.{ ._, .add }, ty, .{ .register = divisor }, .{ .register = .rax }); return MCValue{ .register = divisor }; } @@ -2691,8 +3515,8 @@ fn airShlShrBinOp(self: *Self, inst: Air.Inst.Index) !void { try self.register_manager.getReg(.rcx, null); const lhs = try self.resolveInst(bin_op.lhs); const rhs = try self.resolveInst(bin_op.rhs); - const lhs_ty = self.air.typeOf(bin_op.lhs); - const rhs_ty = self.air.typeOf(bin_op.rhs); + const lhs_ty = self.typeOf(bin_op.lhs); + const rhs_ty = self.typeOf(bin_op.rhs); const result = try self.genShiftBinOp(tag, inst, lhs, rhs, lhs_ty, rhs_ty); @@ -2709,7 +3533,7 @@ fn airShlSat(self: *Self, inst: Air.Inst.Index) !void { fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = result: { - const pl_ty = self.air.typeOfIndex(inst); + const pl_ty = self.typeOfIndex(inst); const opt_mcv = try self.resolveInst(ty_op.operand); if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) { @@ -2734,7 +3558,7 @@ fn airOptionalPayload(self: *Self, inst: Air.Inst.Index) !void { fn airOptionalPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const dst_ty = self.air.typeOfIndex(inst); + const dst_ty = self.typeOfIndex(inst); const opt_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) @@ -2745,14 +3569,15 @@ fn airOptionalPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { } fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result = result: { - const dst_ty = self.air.typeOfIndex(inst); - const src_ty = self.air.typeOf(ty_op.operand); - const opt_ty = src_ty.childType(); + const dst_ty = self.typeOfIndex(inst); + const src_ty = self.typeOf(ty_op.operand); + const opt_ty = src_ty.childType(mod); const src_mcv = try self.resolveInst(ty_op.operand); - if (opt_ty.optionalReprIsPayload()) { + if (opt_ty.optionalReprIsPayload(mod)) { break :result if (self.liveness.isUnused(inst)) .unreach else if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) @@ -2761,36 +3586,40 @@ fn airOptionalPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); } - const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + const dst_mcv: MCValue = if (src_mcv.isRegister() and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_mcv + else if (self.liveness.isUnused(inst)) + .{ .register = try self.copyToTmpRegister(dst_ty, src_mcv) } else try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); - const pl_ty = dst_ty.childType(); - const pl_abi_size = @intCast(i32, pl_ty.abiSize(self.target.*)); - try self.genSetMem(.{ .reg = dst_mcv.register }, pl_abi_size, Type.bool, .{ .immediate = 1 }); + const pl_ty = dst_ty.childType(mod); + const pl_abi_size = @intCast(i32, pl_ty.abiSize(mod)); + try self.genSetMem(.{ .reg = dst_mcv.getReg().? }, pl_abi_size, Type.bool, .{ .immediate = 1 }); break :result if (self.liveness.isUnused(inst)) .unreach else dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const err_union_ty = self.air.typeOf(ty_op.operand); - const err_ty = err_union_ty.errorUnionSet(); - const payload_ty = err_union_ty.errorUnionPayload(); + const err_union_ty = self.typeOf(ty_op.operand); + const err_ty = err_union_ty.errorUnionSet(mod); + const payload_ty = err_union_ty.errorUnionPayload(mod); const operand = try self.resolveInst(ty_op.operand); const result: MCValue = result: { - if (err_ty.errorSetIsEmpty()) { + if (err_ty.errorSetIsEmpty(mod)) { break :result MCValue{ .immediate = 0 }; } - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) { + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) { break :result operand; } - const err_off = errUnionErrorOffset(payload_ty, self.target.*); + const err_off = errUnionErrorOffset(payload_ty, mod); switch (operand) { .register => |reg| { // TODO reuse operand @@ -2800,7 +3629,12 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { const result = try self.copyToRegisterWithInstTracking(inst, err_union_ty, operand); if (err_off > 0) { const shift = @intCast(u6, err_off * 8); - try self.genShiftBinOpMir(.shr, err_union_ty, result, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + err_union_ty, + result, + .{ .immediate = shift }, + ); } else { try self.truncateRegister(Type.anyerror, result.register); } @@ -2818,7 +3652,7 @@ fn airUnwrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { fn airUnwrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const err_union_ty = self.air.typeOf(ty_op.operand); + const err_union_ty = self.typeOf(ty_op.operand); const operand = try self.resolveInst(ty_op.operand); const result = try self.genUnwrapErrorUnionPayloadMir(inst, err_union_ty, operand); return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); @@ -2830,12 +3664,13 @@ fn genUnwrapErrorUnionPayloadMir( err_union_ty: Type, err_union: MCValue, ) !MCValue { - const payload_ty = err_union_ty.errorUnionPayload(); + const mod = self.bin_file.options.module.?; + const payload_ty = err_union_ty.errorUnionPayload(mod); const result: MCValue = result: { - if (!payload_ty.hasRuntimeBitsIgnoreComptime()) break :result .none; + if (!payload_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none; - const payload_off = errUnionPayloadOffset(payload_ty, self.target.*); + const payload_off = errUnionPayloadOffset(payload_ty, mod); switch (err_union) { .load_frame => |frame_addr| break :result .{ .load_frame = .{ .index = frame_addr.index, @@ -2852,7 +3687,12 @@ fn genUnwrapErrorUnionPayloadMir( .{ .register = try self.copyToTmpRegister(err_union_ty, err_union) }; if (payload_off > 0) { const shift = @intCast(u6, payload_off * 8); - try self.genShiftBinOpMir(.shr, err_union_ty, result_mcv, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + err_union_ty, + result_mcv, + .{ .immediate = shift }, + ); } else { try self.truncateRegister(payload_ty, result_mcv.register); } @@ -2867,9 +3707,10 @@ fn genUnwrapErrorUnionPayloadMir( // *(E!T) -> E fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const src_ty = self.air.typeOf(ty_op.operand); + const src_ty = self.typeOf(ty_op.operand); const src_mcv = try self.resolveInst(ty_op.operand); const src_reg = switch (src_mcv) { .register => |reg| reg, @@ -2883,13 +3724,13 @@ fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_lock); - const eu_ty = src_ty.childType(); - const pl_ty = eu_ty.errorUnionPayload(); - const err_ty = eu_ty.errorUnionSet(); - const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*)); - const err_abi_size = @intCast(u32, err_ty.abiSize(self.target.*)); + const eu_ty = src_ty.childType(mod); + const pl_ty = eu_ty.errorUnionPayload(mod); + const err_ty = eu_ty.errorUnionSet(mod); + const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, mod)); + const err_abi_size = @intCast(u32, err_ty.abiSize(mod)); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, registerAlias(dst_reg, err_abi_size), Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ .base = .{ .reg = src_reg }, @@ -2902,9 +3743,10 @@ fn airUnwrapErrUnionErrPtr(self: *Self, inst: Air.Inst.Index) !void { // *(E!T) -> *T fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const src_ty = self.air.typeOf(ty_op.operand); + const src_ty = self.typeOf(ty_op.operand); const src_mcv = try self.resolveInst(ty_op.operand); const src_reg = switch (src_mcv) { .register => |reg| reg, @@ -2913,7 +3755,7 @@ fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); defer self.register_manager.unlockReg(src_lock); - const dst_ty = self.air.typeOfIndex(inst); + const dst_ty = self.typeOfIndex(inst); const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_reg else @@ -2922,12 +3764,12 @@ fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - const eu_ty = src_ty.childType(); - const pl_ty = eu_ty.errorUnionPayload(); - const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*)); - const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); + const eu_ty = src_ty.childType(mod); + const pl_ty = eu_ty.errorUnionPayload(mod); + const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, mod)); + const dst_abi_size = @intCast(u32, dst_ty.abiSize(mod)); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, registerAlias(dst_reg, dst_abi_size), Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }), ); @@ -2936,9 +3778,10 @@ fn airUnwrapErrUnionPayloadPtr(self: *Self, inst: Air.Inst.Index) !void { } fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = result: { - const src_ty = self.air.typeOf(ty_op.operand); + const src_ty = self.typeOf(ty_op.operand); const src_mcv = try self.resolveInst(ty_op.operand); const src_reg = switch (src_mcv) { .register => |reg| reg, @@ -2947,13 +3790,13 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); defer self.register_manager.unlockReg(src_lock); - const eu_ty = src_ty.childType(); - const pl_ty = eu_ty.errorUnionPayload(); - const err_ty = eu_ty.errorUnionSet(); - const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*)); - const err_abi_size = @intCast(u32, err_ty.abiSize(self.target.*)); + const eu_ty = src_ty.childType(mod); + const pl_ty = eu_ty.errorUnionPayload(mod); + const err_ty = eu_ty.errorUnionSet(mod); + const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, mod)); + const err_abi_size = @intCast(u32, err_ty.abiSize(mod)); try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(Memory.PtrSize.fromSize(err_abi_size), .{ .base = .{ .reg = src_reg }, .disp = err_off, @@ -2963,7 +3806,7 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { if (self.liveness.isUnused(inst)) break :result .unreach; - const dst_ty = self.air.typeOfIndex(inst); + const dst_ty = self.typeOfIndex(inst); const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_reg else @@ -2971,10 +3814,10 @@ fn airErrUnionPayloadPtrSet(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*)); - const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); + const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, mod)); + const dst_abi_size = @intCast(u32, dst_ty.abiSize(mod)); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, registerAlias(dst_reg, dst_abi_size), Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, .disp = pl_off }), ); @@ -3000,14 +3843,15 @@ fn airSaveErrReturnTraceIndex(self: *Self, inst: Air.Inst.Index) !void { } fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = result: { - const pl_ty = self.air.typeOf(ty_op.operand); - if (!pl_ty.hasRuntimeBits()) break :result .{ .immediate = 1 }; + const pl_ty = self.typeOf(ty_op.operand); + if (!pl_ty.hasRuntimeBits(mod)) break :result .{ .immediate = 1 }; - const opt_ty = self.air.typeOfIndex(inst); + const opt_ty = self.typeOfIndex(inst); const pl_mcv = try self.resolveInst(ty_op.operand); - const same_repr = opt_ty.optionalReprIsPayload(); + const same_repr = opt_ty.optionalReprIsPayload(mod); if (same_repr and self.reuseOperand(inst, ty_op.operand, 0, pl_mcv)) break :result pl_mcv; const pl_lock: ?RegisterLock = switch (pl_mcv) { @@ -3020,18 +3864,18 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { try self.genCopy(pl_ty, opt_mcv, pl_mcv); if (!same_repr) { - const pl_abi_size = @intCast(i32, pl_ty.abiSize(self.target.*)); + const pl_abi_size = @intCast(i32, pl_ty.abiSize(mod)); switch (opt_mcv) { else => unreachable, .register => |opt_reg| try self.asmRegisterImmediate( - .bts, + .{ ._s, .bt }, opt_reg, Immediate.u(@intCast(u6, pl_abi_size * 8)), ), .load_frame => |frame_addr| try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(.byte, .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off + pl_abi_size, @@ -3047,19 +3891,20 @@ fn airWrapOptional(self: *Self, inst: Air.Inst.Index) !void { /// T to E!T fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const eu_ty = self.air.getRefType(ty_op.ty); - const pl_ty = eu_ty.errorUnionPayload(); - const err_ty = eu_ty.errorUnionSet(); + const pl_ty = eu_ty.errorUnionPayload(mod); + const err_ty = eu_ty.errorUnionSet(mod); const operand = try self.resolveInst(ty_op.operand); const result: MCValue = result: { - if (!pl_ty.hasRuntimeBitsIgnoreComptime()) break :result .{ .immediate = 0 }; + if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .{ .immediate = 0 }; - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, self.target.*)); - const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*)); - const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, mod)); + const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, mod)); + const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, mod)); try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, operand); try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, .{ .immediate = 0 }); break :result .{ .load_frame = .{ .index = frame_index } }; @@ -3069,18 +3914,19 @@ fn airWrapErrUnionPayload(self: *Self, inst: Air.Inst.Index) !void { /// E to E!T fn airWrapErrUnionErr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const eu_ty = self.air.getRefType(ty_op.ty); - const pl_ty = eu_ty.errorUnionPayload(); - const err_ty = eu_ty.errorUnionSet(); + const pl_ty = eu_ty.errorUnionPayload(mod); + const err_ty = eu_ty.errorUnionSet(mod); const result: MCValue = result: { - if (!pl_ty.hasRuntimeBitsIgnoreComptime()) break :result try self.resolveInst(ty_op.operand); + if (!pl_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result try self.resolveInst(ty_op.operand); - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, self.target.*)); - const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, self.target.*)); - const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, self.target.*)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(eu_ty, mod)); + const pl_off = @intCast(i32, errUnionPayloadOffset(pl_ty, mod)); + const err_off = @intCast(i32, errUnionErrorOffset(pl_ty, mod)); try self.genSetMem(.{ .frame = frame_index }, pl_off, pl_ty, .undef); const operand = try self.resolveInst(ty_op.operand); try self.genSetMem(.{ .frame = frame_index }, err_off, err_ty, operand); @@ -3096,7 +3942,7 @@ fn airSlicePtr(self: *Self, inst: Air.Inst.Index) !void { if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; const dst_mcv = try self.allocRegOrMem(inst, true); - const dst_ty = self.air.typeOfIndex(inst); + const dst_ty = self.typeOfIndex(inst); try self.genCopy(dst_ty, dst_mcv, src_mcv); break :result dst_mcv; }; @@ -3121,9 +3967,10 @@ fn airSliceLen(self: *Self, inst: Air.Inst.Index) !void { } fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const src_ty = self.air.typeOf(ty_op.operand); + const src_ty = self.typeOf(ty_op.operand); const src_mcv = try self.resolveInst(ty_op.operand); const src_reg = switch (src_mcv) { .register => |reg| reg, @@ -3132,7 +3979,7 @@ fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); defer self.register_manager.unlockReg(src_lock); - const dst_ty = self.air.typeOfIndex(inst); + const dst_ty = self.typeOfIndex(inst); const dst_reg = if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) src_reg else @@ -3141,13 +3988,13 @@ fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); + const dst_abi_size = @intCast(u32, dst_ty.abiSize(mod)); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, registerAlias(dst_reg, dst_abi_size), Memory.sib(.qword, .{ .base = .{ .reg = src_reg }, - .disp = @divExact(self.target.cpu.arch.ptrBitWidth(), 8), + .disp = @divExact(self.target.ptrBitWidth(), 8), }), ); @@ -3157,7 +4004,7 @@ fn airPtrSliceLenPtr(self: *Self, inst: Air.Inst.Index) !void { fn airPtrSlicePtrPtr(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const dst_ty = self.air.typeOfIndex(inst); + const dst_ty = self.typeOfIndex(inst); const opt_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = if (self.reuseOperand(inst, ty_op.operand, 0, opt_mcv)) @@ -3188,7 +4035,8 @@ fn elemOffset(self: *Self, index_ty: Type, index: MCValue, elem_size: u64) !Regi } fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { - const slice_ty = self.air.typeOf(lhs); + const mod = self.bin_file.options.module.?; + const slice_ty = self.typeOf(lhs); const slice_mcv = try self.resolveInst(lhs); const slice_mcv_lock: ?RegisterLock = switch (slice_mcv) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), @@ -3196,12 +4044,11 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { }; defer if (slice_mcv_lock) |lock| self.register_manager.unlockReg(lock); - const elem_ty = slice_ty.childType(); - const elem_size = elem_ty.abiSize(self.target.*); - var buf: Type.SlicePtrFieldTypeBuffer = undefined; - const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf); + const elem_ty = slice_ty.childType(mod); + const elem_size = elem_ty.abiSize(mod); + const slice_ptr_field_type = slice_ty.slicePtrFieldType(mod); - const index_ty = self.air.typeOf(rhs); + const index_ty = self.typeOf(rhs); const index_mcv = try self.resolveInst(rhs); const index_mcv_lock: ?RegisterLock = switch (index_mcv) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), @@ -3214,31 +4061,21 @@ fn genSliceElemPtr(self: *Self, lhs: Air.Inst.Ref, rhs: Air.Inst.Ref) !MCValue { defer self.register_manager.unlockReg(offset_reg_lock); const addr_reg = try self.register_manager.allocReg(null, gp); - switch (slice_mcv) { - .load_frame => |frame_addr| try self.asmRegisterMemory( - .mov, - addr_reg.to64(), - Memory.sib(.qword, .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }), - ), - else => return self.fail("TODO implement slice_elem_ptr when slice is {}", .{slice_mcv}), - } + try self.genSetReg(addr_reg, Type.usize, slice_mcv); // TODO we could allocate register here, but need to expect addr register and potentially // offset register. - try self.genBinOpMir(.add, slice_ptr_field_type, .{ .register = addr_reg }, .{ + try self.genBinOpMir(.{ ._, .add }, slice_ptr_field_type, .{ .register = addr_reg }, .{ .register = offset_reg, }); return MCValue{ .register = addr_reg.to64() }; } fn airSliceElemVal(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const slice_ty = self.air.typeOf(bin_op.lhs); + const slice_ty = self.typeOf(bin_op.lhs); - var buf: Type.SlicePtrFieldTypeBuffer = undefined; - const slice_ptr_field_type = slice_ty.slicePtrFieldType(&buf); + const slice_ptr_field_type = slice_ty.slicePtrFieldType(mod); const elem_ptr = try self.genSliceElemPtr(bin_op.lhs, bin_op.rhs); const dst_mcv = try self.allocRegOrMem(inst, false); try self.load(dst_mcv, slice_ptr_field_type, elem_ptr); @@ -3254,9 +4091,10 @@ fn airSliceElemPtr(self: *Self, inst: Air.Inst.Index) !void { } fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const array_ty = self.air.typeOf(bin_op.lhs); + const array_ty = self.typeOf(bin_op.lhs); const array = try self.resolveInst(bin_op.lhs); const array_lock: ?RegisterLock = switch (array) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), @@ -3264,10 +4102,10 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { }; defer if (array_lock) |lock| self.register_manager.unlockReg(lock); - const elem_ty = array_ty.childType(); - const elem_abi_size = elem_ty.abiSize(self.target.*); + const elem_ty = array_ty.childType(mod); + const elem_abi_size = elem_ty.abiSize(mod); - const index_ty = self.air.typeOf(bin_op.rhs); + const index_ty = self.typeOf(bin_op.rhs); const index = try self.resolveInst(bin_op.rhs); const index_lock: ?RegisterLock = switch (index) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), @@ -3282,16 +4120,16 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { const addr_reg = try self.register_manager.allocReg(null, gp); switch (array) { .register => { - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, self.target.*)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(array_ty, mod)); try self.genSetMem(.{ .frame = frame_index }, 0, array_ty, array); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, addr_reg, Memory.sib(.qword, .{ .base = .{ .frame = frame_index } }), ); }, .load_frame => |frame_addr| try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, addr_reg, Memory.sib(.qword, .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off }), ), @@ -3307,22 +4145,28 @@ fn airArrayElemVal(self: *Self, inst: Air.Inst.Index) !void { // TODO we could allocate register here, but need to expect addr register and potentially // offset register. const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genBinOpMir(.add, Type.usize, .{ .register = addr_reg }, .{ .register = offset_reg }); + try self.genBinOpMir( + .{ ._, .add }, + Type.usize, + .{ .register = addr_reg }, + .{ .register = offset_reg }, + ); try self.genCopy(elem_ty, dst_mcv, .{ .indirect = .{ .reg = addr_reg } }); return self.finishAir(inst, dst_mcv, .{ bin_op.lhs, bin_op.rhs, .none }); } fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const ptr_ty = self.air.typeOf(bin_op.lhs); + const ptr_ty = self.typeOf(bin_op.lhs); // this is identical to the `airPtrElemPtr` codegen expect here an // additional `mov` is needed at the end to get the actual value - const elem_ty = ptr_ty.elemType2(); - const elem_abi_size = @intCast(u32, elem_ty.abiSize(self.target.*)); - const index_ty = self.air.typeOf(bin_op.rhs); + const elem_ty = ptr_ty.elemType2(mod); + const elem_abi_size = @intCast(u32, elem_ty.abiSize(mod)); + const index_ty = self.typeOf(bin_op.rhs); const index_mcv = try self.resolveInst(bin_op.rhs); const index_lock = switch (index_mcv) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), @@ -3341,7 +4185,11 @@ fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { try self.copyToTmpRegister(ptr_ty, ptr_mcv); const elem_ptr_lock = self.register_manager.lockRegAssumeUnused(elem_ptr_reg); defer self.register_manager.unlockReg(elem_ptr_lock); - try self.asmRegisterRegister(.add, elem_ptr_reg, offset_reg); + try self.asmRegisterRegister( + .{ ._, .add }, + elem_ptr_reg, + offset_reg, + ); const dst_mcv = try self.allocRegOrMem(inst, true); const dst_lock = switch (dst_mcv) { @@ -3355,10 +4203,11 @@ fn airPtrElemVal(self: *Self, inst: Air.Inst.Index) !void { } fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; - const ptr_ty = self.air.typeOf(extra.lhs); + const ptr_ty = self.typeOf(extra.lhs); const ptr = try self.resolveInst(extra.lhs); const ptr_lock: ?RegisterLock = switch (ptr) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), @@ -3366,9 +4215,9 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { }; defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); - const elem_ty = ptr_ty.elemType2(); - const elem_abi_size = elem_ty.abiSize(self.target.*); - const index_ty = self.air.typeOf(extra.rhs); + const elem_ty = ptr_ty.elemType2(mod); + const elem_abi_size = elem_ty.abiSize(mod); + const index_ty = self.typeOf(extra.rhs); const index = try self.resolveInst(extra.rhs); const index_lock: ?RegisterLock = switch (index) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), @@ -3381,17 +4230,18 @@ fn airPtrElemPtr(self: *Self, inst: Air.Inst.Index) !void { defer self.register_manager.unlockReg(offset_reg_lock); const dst_mcv = try self.copyToRegisterWithInstTracking(inst, ptr_ty, ptr); - try self.genBinOpMir(.add, ptr_ty, dst_mcv, .{ .register = offset_reg }); + try self.genBinOpMir(.{ ._, .add }, ptr_ty, dst_mcv, .{ .register = offset_reg }); return self.finishAir(inst, dst_mcv, .{ extra.lhs, extra.rhs, .none }); } fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const ptr_union_ty = self.air.typeOf(bin_op.lhs); - const union_ty = ptr_union_ty.childType(); - const tag_ty = self.air.typeOf(bin_op.rhs); - const layout = union_ty.unionGetLayout(self.target.*); + const ptr_union_ty = self.typeOf(bin_op.lhs); + const union_ty = ptr_union_ty.childType(mod); + const tag_ty = self.typeOf(bin_op.rhs); + const layout = union_ty.unionGetLayout(mod); if (layout.tag_size == 0) { return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); @@ -3414,24 +4264,28 @@ fn airSetUnionTag(self: *Self, inst: Air.Inst.Index) !void { const adjusted_ptr: MCValue = if (layout.payload_size > 0 and layout.tag_align < layout.payload_align) blk: { // TODO reusing the operand const reg = try self.copyToTmpRegister(ptr_union_ty, ptr); - try self.genBinOpMir(.add, ptr_union_ty, .{ .register = reg }, .{ .immediate = layout.payload_size }); + try self.genBinOpMir( + .{ ._, .add }, + ptr_union_ty, + .{ .register = reg }, + .{ .immediate = layout.payload_size }, + ); break :blk MCValue{ .register = reg }; } else ptr; - var ptr_tag_pl = ptr_union_ty.ptrInfo(); - ptr_tag_pl.data.pointee_type = tag_ty; - const ptr_tag_ty = Type.initPayload(&ptr_tag_pl.base); + const ptr_tag_ty = try mod.adjustPtrTypeChild(ptr_union_ty, tag_ty); try self.store(ptr_tag_ty, adjusted_ptr, tag); return self.finishAir(inst, .none, .{ bin_op.lhs, bin_op.rhs, .none }); } fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const tag_ty = self.air.typeOfIndex(inst); - const union_ty = self.air.typeOf(ty_op.operand); - const layout = union_ty.unionGetLayout(self.target.*); + const tag_ty = self.typeOfIndex(inst); + const union_ty = self.typeOf(ty_op.operand); + const layout = union_ty.unionGetLayout(mod); if (layout.tag_size == 0) { return self.finishAir(inst, .none, .{ ty_op.operand, .none, .none }); @@ -3445,7 +4299,7 @@ fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void { }; defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); - const tag_abi_size = tag_ty.abiSize(self.target.*); + const tag_abi_size = tag_ty.abiSize(mod); const dst_mcv: MCValue = blk: { switch (operand) { .load_frame => |frame_addr| { @@ -3467,7 +4321,7 @@ fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void { else 0; const result = try self.copyToRegisterWithInstTracking(inst, union_ty, operand); - try self.genShiftBinOpMir(.shr, Type.usize, result, .{ .immediate = shift }); + try self.genShiftBinOpMir(.{ ._r, .sh }, Type.usize, result, .{ .immediate = shift }); break :blk MCValue{ .register = registerAlias(result.register, @intCast(u32, layout.tag_size)), }; @@ -3480,10 +4334,11 @@ fn airGetUnionTag(self: *Self, inst: Air.Inst.Index) !void { } fn airClz(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result = result: { - const dst_ty = self.air.typeOfIndex(inst); - const src_ty = self.air.typeOf(ty_op.operand); + const dst_ty = self.typeOfIndex(inst); + const src_ty = self.typeOf(ty_op.operand); const src_mcv = try self.resolveInst(ty_op.operand); const mat_src_mcv = switch (src_mcv) { @@ -3498,40 +4353,101 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { const dst_reg = try self.register_manager.allocReg(inst, gp); const dst_mcv = MCValue{ .register = dst_reg }; - const dst_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); - if (Target.x86.featureSetHas(self.target.cpu.features, .lzcnt)) { - try self.genBinOpMir(.lzcnt, src_ty, dst_mcv, mat_src_mcv); - const extra_bits = self.regExtraBits(src_ty); - if (extra_bits > 0) { - try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .immediate = extra_bits }); - } + const src_bits = src_ty.bitSize(mod); + if (self.hasFeature(.lzcnt)) { + if (src_bits <= 8) { + const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); + try self.truncateRegister(src_ty, wide_reg); + try self.genBinOpMir(.{ ._, .lzcnt }, Type.u32, dst_mcv, .{ .register = wide_reg }); + try self.genBinOpMir( + .{ ._, .sub }, + dst_ty, + dst_mcv, + .{ .immediate = 8 + self.regExtraBits(src_ty) }, + ); + } else if (src_bits <= 64) { + try self.genBinOpMir(.{ ._, .lzcnt }, src_ty, dst_mcv, mat_src_mcv); + const extra_bits = self.regExtraBits(src_ty); + if (extra_bits > 0) { + try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .immediate = extra_bits }); + } + } else if (src_bits <= 128) { + const tmp_reg = try self.register_manager.allocReg(null, gp); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.genBinOpMir(.{ ._, .lzcnt }, Type.u64, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); + try self.genBinOpMir( + .{ ._, .lzcnt }, + Type.u64, + tmp_mcv, + mat_src_mcv.address().offset(8).deref(), + ); + try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc); + + if (src_bits < 128) { + try self.genBinOpMir( + .{ ._, .sub }, + dst_ty, + dst_mcv, + .{ .immediate = 128 - src_bits }, + ); + } + } else return self.fail("TODO airClz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); break :result dst_mcv; } - const src_bits = src_ty.bitSize(self.target.*); + if (src_bits > 64) + return self.fail("TODO airClz of {}", .{src_ty.fmt(mod)}); if (math.isPowerOfTwo(src_bits)) { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits ^ (src_bits - 1), }); - try self.genBinOpMir(.bsr, src_ty, dst_mcv, mat_src_mcv); + const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg); + defer self.register_manager.unlockReg(imm_lock); + + if (src_bits <= 8) { + const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); + const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); + defer self.register_manager.unlockReg(wide_lock); - const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); + try self.truncateRegister(src_ty, wide_reg); + try self.genBinOpMir(.{ ._, .bsr }, Type.u16, dst_mcv, .{ .register = wide_reg }); + } else try self.genBinOpMir(.{ ._, .bsr }, src_ty, dst_mcv, mat_src_mcv); + + const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(mod)), 2); try self.asmCmovccRegisterRegister( registerAlias(dst_reg, cmov_abi_size), registerAlias(imm_reg, cmov_abi_size), .z, ); - try self.genBinOpMir(.xor, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 }); + try self.genBinOpMir(.{ ._, .xor }, dst_ty, dst_mcv, .{ .immediate = src_bits - 1 }); } else { const imm_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - self.regBitSize(dst_ty)), }); - try self.genBinOpMir(.bsr, src_ty, dst_mcv, mat_src_mcv); + const imm_lock = self.register_manager.lockRegAssumeUnused(imm_reg); + defer self.register_manager.unlockReg(imm_lock); + + const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); + const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); + defer self.register_manager.unlockReg(wide_lock); + + try self.truncateRegister(src_ty, wide_reg); + try self.genBinOpMir( + .{ ._, .bsr }, + if (src_bits <= 8) Type.u16 else src_ty, + dst_mcv, + .{ .register = wide_reg }, + ); - const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); + const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(mod)), 2); try self.asmCmovccRegisterRegister( registerAlias(imm_reg, cmov_abi_size), registerAlias(dst_reg, cmov_abi_size), @@ -3539,7 +4455,7 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { ); try self.genSetReg(dst_reg, dst_ty, .{ .immediate = src_bits - 1 }); - try self.genBinOpMir(.sub, dst_ty, dst_mcv, .{ .register = imm_reg }); + try self.genBinOpMir(.{ ._, .sub }, dst_ty, dst_mcv, .{ .register = imm_reg }); } break :result dst_mcv; }; @@ -3547,11 +4463,12 @@ fn airClz(self: *Self, inst: Air.Inst.Index) !void { } fn airCtz(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result = result: { - const dst_ty = self.air.typeOfIndex(inst); - const src_ty = self.air.typeOf(ty_op.operand); - const src_bits = src_ty.bitSize(self.target.*); + const dst_ty = self.typeOfIndex(inst); + const src_ty = self.typeOf(ty_op.operand); + const src_bits = src_ty.bitSize(mod); const src_mcv = try self.resolveInst(ty_op.operand); const mat_src_mcv = switch (src_mcv) { @@ -3569,29 +4486,68 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockReg(dst_reg); defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - if (Target.x86.featureSetHas(self.target.cpu.features, .bmi)) { - const extra_bits = self.regExtraBits(src_ty); - const masked_mcv = if (extra_bits > 0) masked: { - const mask_mcv = MCValue{ - .immediate = ((@as(u64, 1) << @intCast(u6, extra_bits)) - 1) << - @intCast(u6, src_bits), - }; - const tmp_mcv = tmp: { - if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) break :tmp src_mcv; - try self.genSetReg(dst_reg, src_ty, src_mcv); - break :tmp dst_mcv; - }; - try self.genBinOpMir(.@"or", src_ty, tmp_mcv, mask_mcv); - break :masked tmp_mcv; - } else mat_src_mcv; - try self.genBinOpMir(.tzcnt, src_ty, dst_mcv, masked_mcv); + if (self.hasFeature(.bmi)) { + if (src_bits <= 64) { + const extra_bits = self.regExtraBits(src_ty) + @as(u64, if (src_bits <= 8) 8 else 0); + const wide_ty = if (src_bits <= 8) Type.u16 else src_ty; + const masked_mcv = if (extra_bits > 0) masked: { + const tmp_mcv = tmp: { + if (src_mcv.isImmediate() or self.liveness.operandDies(inst, 0)) + break :tmp src_mcv; + try self.genSetReg(dst_reg, wide_ty, src_mcv); + break :tmp dst_mcv; + }; + try self.genBinOpMir( + .{ ._, .@"or" }, + wide_ty, + tmp_mcv, + .{ .immediate = (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - extra_bits)) << + @intCast(u6, src_bits) }, + ); + break :masked tmp_mcv; + } else mat_src_mcv; + try self.genBinOpMir(.{ ._, .tzcnt }, wide_ty, dst_mcv, masked_mcv); + } else if (src_bits <= 128) { + const tmp_reg = try self.register_manager.allocReg(null, gp); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + const masked_mcv = if (src_bits < 128) masked: { + try self.genCopy(Type.u64, dst_mcv, mat_src_mcv.address().offset(8).deref()); + try self.genBinOpMir( + .{ ._, .@"or" }, + Type.u64, + dst_mcv, + .{ .immediate = @as(u64, math.maxInt(u64)) << @intCast(u6, src_bits - 64) }, + ); + break :masked dst_mcv; + } else mat_src_mcv.address().offset(8).deref(); + try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, dst_mcv, masked_mcv); + try self.genBinOpMir(.{ ._, .add }, dst_ty, dst_mcv, .{ .immediate = 64 }); + try self.genBinOpMir(.{ ._, .tzcnt }, Type.u64, tmp_mcv, mat_src_mcv); + try self.asmCmovccRegisterRegister(dst_reg.to32(), tmp_reg.to32(), .nc); + } else return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); break :result dst_mcv; } + if (src_bits > 64) + return self.fail("TODO airCtz of {}", .{src_ty.fmt(self.bin_file.options.module.?)}); + const width_reg = try self.copyToTmpRegister(dst_ty, .{ .immediate = src_bits }); - try self.genBinOpMir(.bsf, src_ty, dst_mcv, mat_src_mcv); + const width_lock = self.register_manager.lockRegAssumeUnused(width_reg); + defer self.register_manager.unlockReg(width_lock); + + if (src_bits <= 8 or !math.isPowerOfTwo(src_bits)) { + const wide_reg = try self.copyToTmpRegister(src_ty, mat_src_mcv); + const wide_lock = self.register_manager.lockRegAssumeUnused(wide_reg); + defer self.register_manager.unlockReg(wide_lock); + + try self.truncateRegister(src_ty, wide_reg); + try self.genBinOpMir(.{ ._, .bsf }, Type.u16, dst_mcv, .{ .register = wide_reg }); + } else try self.genBinOpMir(.{ ._, .bsf }, src_ty, dst_mcv, mat_src_mcv); - const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(self.target.*)), 2); + const cmov_abi_size = @max(@intCast(u32, dst_ty.abiSize(mod)), 2); try self.asmCmovccRegisterRegister( registerAlias(dst_reg, cmov_abi_size), registerAlias(width_reg, cmov_abi_size), @@ -3603,13 +4559,14 @@ fn airCtz(self: *Self, inst: Air.Inst.Index) !void { } fn airPopcount(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; const result: MCValue = result: { - const src_ty = self.air.typeOf(ty_op.operand); - const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*)); + const src_ty = self.typeOf(ty_op.operand); + const src_abi_size = @intCast(u32, src_ty.abiSize(mod)); const src_mcv = try self.resolveInst(ty_op.operand); - if (Target.x86.featureSetHas(self.target.cpu.features, .popcnt)) { + if (self.hasFeature(.popcnt)) { const mat_src_mcv = switch (src_mcv) { .immediate => MCValue{ .register = try self.copyToTmpRegister(src_ty, src_mcv) }, else => src_mcv, @@ -3627,7 +4584,7 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void { .{ .register = try self.register_manager.allocReg(inst, gp) }; const popcnt_ty = if (src_abi_size > 1) src_ty else Type.u16; - try self.genBinOpMir(.popcnt, popcnt_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .popcnt }, popcnt_ty, dst_mcv, mat_src_mcv); break :result dst_mcv; } @@ -3658,54 +4615,54 @@ fn airPopcount(self: *Self, inst: Air.Inst.Index) !void { undefined; // dst = operand - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = operand - try self.asmRegisterImmediate(.shr, tmp, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1)); // tmp = operand >> 1 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0_1); - try self.asmRegisterRegister(.@"and", tmp, imm); - } else try self.asmRegisterImmediate(.@"and", tmp, imm_0_1); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + } else try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1); // tmp = (operand >> 1) & 0x55...55 - try self.asmRegisterRegister(.sub, dst, tmp); + try self.asmRegisterRegister(.{ ._, .sub }, dst, tmp); // dst = temp1 = operand - ((operand >> 1) & 0x55...55) - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp1 - try self.asmRegisterImmediate(.shr, dst, Immediate.u(2)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2)); // dst = temp1 >> 2 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_00_11); - try self.asmRegisterRegister(.@"and", tmp, imm); - try self.asmRegisterRegister(.@"and", dst, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); } else { - try self.asmRegisterImmediate(.@"and", tmp, imm_00_11); - try self.asmRegisterImmediate(.@"and", dst, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11); } // tmp = temp1 & 0x33...33 // dst = (temp1 >> 2) & 0x33...33 - try self.asmRegisterRegister(.add, tmp, dst); + try self.asmRegisterRegister(.{ ._, .add }, tmp, dst); // tmp = temp2 = (temp1 & 0x33...33) + ((temp1 >> 2) & 0x33...33) - try self.asmRegisterRegister(.mov, dst, tmp); + try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); // dst = temp2 - try self.asmRegisterImmediate(.shr, tmp, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(4)); // tmp = temp2 >> 4 - try self.asmRegisterRegister(.add, dst, tmp); + try self.asmRegisterRegister(.{ ._, .add }, dst, tmp); // dst = temp2 + (temp2 >> 4) if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0000_1111); - try self.asmRegisterImmediate(.mov, tmp, imm_0000_0001); - try self.asmRegisterRegister(.@"and", dst, imm); - try self.asmRegisterRegister(.imul, dst, tmp); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .mov }, tmp, imm_0000_0001); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); + try self.asmRegisterRegister(.{ .i_, .mul }, dst, tmp); } else { - try self.asmRegisterImmediate(.@"and", dst, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111); if (src_abi_size > 1) { - try self.asmRegisterRegisterImmediate(.imul, dst, dst, imm_0000_0001); + try self.asmRegisterRegisterImmediate(.{ .i_, .mul }, dst, dst, imm_0000_0001); } } // dst = temp3 = (temp2 + (temp2 >> 4)) & 0x0f...0f // dst = temp3 * 0x01...01 if (src_abi_size > 1) { - try self.asmRegisterImmediate(.shr, dst, Immediate.u((src_abi_size - 1) * 8)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u((src_abi_size - 1) * 8)); } // dst = (temp3 * 0x01...01) >> (bits - 8) } @@ -3734,11 +4691,11 @@ fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, m 16 => if ((mem_ok or src_mcv.isRegister()) and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - try self.genBinOpMir(.rol, src_ty, src_mcv, .{ .immediate = 8 }); + try self.genBinOpMir(.{ ._l, .ro }, src_ty, src_mcv, .{ .immediate = 8 }); return src_mcv; }, 32, 64 => if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) { - try self.genUnOpMir(.bswap, src_ty, src_mcv); + try self.genUnOpMir(.{ ._, .bswap }, src_ty, src_mcv); return src_mcv; }, } @@ -3755,10 +4712,10 @@ fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, m try self.genSetReg(dst_mcv.register, src_ty, src_mcv); switch (src_bits) { else => unreachable, - 16 => try self.genBinOpMir(.rol, src_ty, dst_mcv, .{ .immediate = 8 }), - 32, 64 => try self.genUnOpMir(.bswap, src_ty, dst_mcv), + 16 => try self.genBinOpMir(.{ ._l, .ro }, src_ty, dst_mcv, .{ .immediate = 8 }), + 32, 64 => try self.genUnOpMir(.{ ._, .bswap }, src_ty, dst_mcv), } - } else try self.genBinOpMir(.movbe, src_ty, dst_mcv, src_mcv); + } else try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv); return dst_mcv; } @@ -3767,21 +4724,22 @@ fn byteSwap(self: *Self, inst: Air.Inst.Index, src_ty: Type, src_mcv: MCValue, m const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_lock); - try self.genBinOpMir(.movbe, src_ty, dst_mcv, src_mcv); + try self.genBinOpMir(.{ ._, .movbe }, src_ty, dst_mcv, src_mcv); return dst_mcv; } fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const src_ty = self.air.typeOf(ty_op.operand); + const src_ty = self.typeOf(ty_op.operand); const src_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = try self.byteSwap(inst, src_ty, src_mcv, true); switch (self.regExtraBits(src_ty)) { 0 => {}, else => |extra| try self.genBinOpMir( - if (src_ty.isSignedInt()) .sar else .shr, + if (src_ty.isSignedInt(mod)) .{ ._r, .sa } else .{ ._r, .sh }, src_ty, dst_mcv, .{ .immediate = extra }, @@ -3792,10 +4750,11 @@ fn airByteSwap(self: *Self, inst: Air.Inst.Index) !void { } fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const src_ty = self.air.typeOf(ty_op.operand); - const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*)); + const src_ty = self.typeOf(ty_op.operand); + const src_abi_size = @intCast(u32, src_ty.abiSize(mod)); const src_mcv = try self.resolveInst(ty_op.operand); const dst_mcv = try self.byteSwap(inst, src_ty, src_mcv, false); @@ -3821,40 +4780,40 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { const imm_0_1 = Immediate.u(mask / 0b1_1); // dst = temp1 = bswap(operand) - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp1 - try self.asmRegisterImmediate(.shr, dst, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(4)); // dst = temp1 >> 4 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0000_1111); - try self.asmRegisterRegister(.@"and", tmp, imm); - try self.asmRegisterRegister(.@"and", dst, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0000_1111); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); } else { - try self.asmRegisterImmediate(.@"and", tmp, imm_0000_1111); - try self.asmRegisterImmediate(.@"and", dst, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0000_1111); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0000_1111); } // tmp = temp1 & 0x0F...0F // dst = (temp1 >> 4) & 0x0F...0F - try self.asmRegisterImmediate(.shl, tmp, Immediate.u(4)); + try self.asmRegisterImmediate(.{ ._l, .sh }, tmp, Immediate.u(4)); // tmp = (temp1 & 0x0F...0F) << 4 - try self.asmRegisterRegister(.@"or", dst, tmp); + try self.asmRegisterRegister(.{ ._, .@"or" }, dst, tmp); // dst = temp2 = ((temp1 >> 4) & 0x0F...0F) | ((temp1 & 0x0F...0F) << 4) - try self.asmRegisterRegister(.mov, tmp, dst); + try self.asmRegisterRegister(.{ ._, .mov }, tmp, dst); // tmp = temp2 - try self.asmRegisterImmediate(.shr, dst, Immediate.u(2)); + try self.asmRegisterImmediate(.{ ._r, .sh }, dst, Immediate.u(2)); // dst = temp2 >> 2 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_00_11); - try self.asmRegisterRegister(.@"and", tmp, imm); - try self.asmRegisterRegister(.@"and", dst, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_00_11); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); } else { - try self.asmRegisterImmediate(.@"and", tmp, imm_00_11); - try self.asmRegisterImmediate(.@"and", dst, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_00_11); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_00_11); } // tmp = temp2 & 0x33...33 // dst = (temp2 >> 2) & 0x33...33 try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, if (src_abi_size > 4) tmp.to64() else tmp.to32(), Memory.sib(.qword, .{ .base = .{ .reg = dst.to64() }, @@ -3862,22 +4821,22 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { }), ); // tmp = temp3 = ((temp2 >> 2) & 0x33...33) + ((temp2 & 0x33...33) << 2) - try self.asmRegisterRegister(.mov, dst, tmp); + try self.asmRegisterRegister(.{ ._, .mov }, dst, tmp); // dst = temp3 - try self.asmRegisterImmediate(.shr, tmp, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._r, .sh }, tmp, Immediate.u(1)); // tmp = temp3 >> 1 if (src_abi_size > 4) { - try self.asmRegisterImmediate(.mov, imm, imm_0_1); - try self.asmRegisterRegister(.@"and", dst, imm); - try self.asmRegisterRegister(.@"and", tmp, imm); + try self.asmRegisterImmediate(.{ ._, .mov }, imm, imm_0_1); + try self.asmRegisterRegister(.{ ._, .@"and" }, dst, imm); + try self.asmRegisterRegister(.{ ._, .@"and" }, tmp, imm); } else { - try self.asmRegisterImmediate(.@"and", dst, imm_0_1); - try self.asmRegisterImmediate(.@"and", tmp, imm_0_1); + try self.asmRegisterImmediate(.{ ._, .@"and" }, dst, imm_0_1); + try self.asmRegisterImmediate(.{ ._, .@"and" }, tmp, imm_0_1); } // dst = temp3 & 0x55...55 // tmp = (temp3 >> 1) & 0x55...55 try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, if (src_abi_size > 4) dst.to64() else dst.to32(), Memory.sib(.qword, .{ .base = .{ .reg = tmp.to64() }, @@ -3890,7 +4849,7 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { switch (self.regExtraBits(src_ty)) { 0 => {}, else => |extra| try self.genBinOpMir( - if (src_ty.isSignedInt()) .sar else .shr, + if (src_ty.isSignedInt(mod)) .{ ._r, .sa } else .{ ._r, .sh }, src_ty, dst_mcv, .{ .immediate = extra }, @@ -3900,10 +4859,351 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } +fn airFloatSign(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; + const tag = self.air.instructions.items(.tag)[inst]; + const un_op = self.air.instructions.items(.data)[inst].un_op; + const ty = self.typeOf(un_op); + const abi_size: u32 = switch (ty.abiSize(mod)) { + 1...16 => 16, + 17...32 => 32, + else => return self.fail("TODO implement airFloatSign for {}", .{ + ty.fmt(mod), + }), + }; + const scalar_bits = ty.scalarType(mod).floatBits(self.target.*); + + const src_mcv = try self.resolveInst(un_op); + const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; + defer if (src_lock) |lock| self.register_manager.unlockReg(lock); + + const dst_mcv: MCValue = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) + src_mcv + else if (self.hasFeature(.avx)) + .{ .register = try self.register_manager.allocReg(inst, sse) } + else + try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + const vec_ty = try mod.vectorType(.{ + .len = @divExact(abi_size * 8, scalar_bits), + .child = (try mod.intType(.signed, scalar_bits)).ip_index, + }); + + const sign_val = switch (tag) { + .neg => try vec_ty.minInt(mod, vec_ty), + .fabs => try vec_ty.maxInt(mod, vec_ty), + else => unreachable, + }; + + const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val }); + const sign_mem = if (sign_mcv.isMemory()) + sign_mcv.mem(Memory.PtrSize.fromSize(abi_size)) + else + Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ + .base = .{ .reg = try self.copyToTmpRegister(Type.usize, sign_mcv.address()) }, + }); + + if (self.hasFeature(.avx)) try self.asmRegisterRegisterMemory( + switch (scalar_bits) { + 16, 128 => if (abi_size <= 16 or self.hasFeature(.avx2)) switch (tag) { + .neg => .{ .vp_, .xor }, + .fabs => .{ .vp_, .@"and" }, + else => unreachable, + } else switch (tag) { + .neg => .{ .v_ps, .xor }, + .fabs => .{ .v_ps, .@"and" }, + else => unreachable, + }, + 32 => switch (tag) { + .neg => .{ .v_ps, .xor }, + .fabs => .{ .v_ps, .@"and" }, + else => unreachable, + }, + 64 => switch (tag) { + .neg => .{ .v_pd, .xor }, + .fabs => .{ .v_pd, .@"and" }, + else => unreachable, + }, + 80 => return self.fail("TODO implement airFloatSign for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + else => unreachable, + }, + registerAlias(dst_reg, abi_size), + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + sign_mem, + ) else try self.asmRegisterMemory( + switch (scalar_bits) { + 16, 128 => switch (tag) { + .neg => .{ .p_, .xor }, + .fabs => .{ .p_, .@"and" }, + else => unreachable, + }, + 32 => switch (tag) { + .neg => .{ ._ps, .xor }, + .fabs => .{ ._ps, .@"and" }, + else => unreachable, + }, + 64 => switch (tag) { + .neg => .{ ._pd, .xor }, + .fabs => .{ ._pd, .@"and" }, + else => unreachable, + }, + 80 => return self.fail("TODO implement airFloatSign for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + else => unreachable, + }, + registerAlias(dst_reg, abi_size), + sign_mem, + ); + return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); +} + +fn airRound(self: *Self, inst: Air.Inst.Index, mode: u4) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const ty = self.typeOf(un_op); + + const src_mcv = try self.resolveInst(un_op); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + const dst_reg = dst_mcv.getReg().?; + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + try self.genRound(ty, dst_reg, src_mcv, mode); + return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); +} + +fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4) !void { + const mod = self.bin_file.options.module.?; + if (!self.hasFeature(.sse4_1)) + return self.fail("TODO implement genRound without sse4_1 feature", .{}); + + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(mod)) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (ty.vectorLen(mod)) { + 1 => if (self.hasFeature(.avx)) .{ .v_ss, .round } else .{ ._ss, .round }, + 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else .{ ._ps, .round }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .round } else null, + else => null, + }, + 64 => switch (ty.vectorLen(mod)) { + 1 => if (self.hasFeature(.avx)) .{ .v_sd, .round } else .{ ._sd, .round }, + 2 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else .{ ._pd, .round }, + 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .round } else null, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => null, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genRound for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }); + const abi_size = @intCast(u32, ty.abiSize(mod)); + const dst_alias = registerAlias(dst_reg, abi_size); + switch (mir_tag[0]) { + .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + mir_tag, + dst_alias, + dst_alias, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + Immediate.u(mode), + ) else try self.asmRegisterRegisterRegisterImmediate( + mir_tag, + dst_alias, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + Immediate.u(mode), + ), + else => if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + mir_tag, + dst_alias, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + Immediate.u(mode), + ) else try self.asmRegisterRegisterImmediate( + mir_tag, + dst_alias, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + Immediate.u(mode), + ), + } +} + +fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; + const un_op = self.air.instructions.items(.data)[inst].un_op; + const ty = self.typeOf(un_op); + const abi_size = @intCast(u32, ty.abiSize(mod)); + + const src_mcv = try self.resolveInst(un_op); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + const result: MCValue = result: { + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (ty.zigTypeTag(mod)) { + .Float => switch (ty.floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) { + const mat_src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + break :result dst_mcv; + } else null, + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, + 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (ty.childType(mod).floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen(mod)) { + 1 => { + try self.asmRegisterRegister( + .{ .v_ps, .cvtph2 }, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv)).to128(), + ); + try self.asmRegisterRegisterRegister( + .{ .v_ss, .sqrt }, + dst_reg, + dst_reg, + dst_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + break :result dst_mcv; + }, + 2...8 => { + const wide_reg = registerAlias(dst_reg, abi_size * 2); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_ps, .cvtph2 }, + wide_reg, + src_mcv.mem(Memory.PtrSize.fromSize( + @intCast(u32, @divExact(wide_reg.bitSize(), 16)), + )), + ) else try self.asmRegisterRegister( + .{ .v_ps, .cvtph2 }, + wide_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.{ .v_ps, .sqrt }, wide_reg, wide_reg); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + wide_reg, + Immediate.u(0b1_00), + ); + break :result dst_mcv; + }, + else => null, + } else null, + 32 => switch (ty.vectorLen(mod)) { + 1 => if (self.hasFeature(.avx)) .{ .v_ss, .sqrt } else .{ ._ss, .sqrt }, + 2...4 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else .{ ._ps, .sqrt }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_ps, .sqrt } else null, + else => null, + }, + 64 => switch (ty.vectorLen(mod)) { + 1 => if (self.hasFeature(.avx)) .{ .v_sd, .sqrt } else .{ ._sd, .sqrt }, + 2 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else .{ ._pd, .sqrt }, + 3...4 => if (self.hasFeature(.avx)) .{ .v_pd, .sqrt } else null, + else => null, + }, + 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement airSqrt for {}", .{ + ty.fmt(mod), + }); + switch (mir_tag[0]) { + .v_ss, .v_sd => if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + mir_tag, + dst_reg, + dst_reg, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegisterRegister( + mir_tag, + dst_reg, + dst_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + ), + else => if (src_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + dst_reg, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegister( + mir_tag, + dst_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv), abi_size), + ), + } + break :result dst_mcv; + }; + return self.finishAir(inst, result, .{ un_op, .none, .none }); +} + fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; _ = un_op; - return self.fail("TODO implement airUnaryMath for {}", .{self.target.cpu.arch}); + return self.fail("TODO implement airUnaryMath for {}", .{ + self.air.instructions.items(.tag)[inst], + }); //return self.finishAir(inst, result, .{ un_op, .none, .none }); } @@ -3951,11 +5251,12 @@ fn reuseOperandAdvanced( } fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { - const ptr_info = ptr_ty.ptrInfo().data; + const mod = self.bin_file.options.module.?; + const ptr_info = ptr_ty.ptrInfo(mod); const val_ty = ptr_info.pointee_type; - const val_abi_size = @intCast(u32, val_ty.abiSize(self.target.*)); - const limb_abi_size = @min(val_abi_size, 8); + const val_abi_size = @intCast(u32, val_ty.abiSize(mod)); + const limb_abi_size: u32 = @min(val_abi_size, 8); const limb_abi_bits = limb_abi_size * 8; const val_byte_off = @intCast(i32, ptr_info.bit_offset / limb_abi_bits * limb_abi_size); const val_bit_off = ptr_info.bit_offset % limb_abi_bits; @@ -3981,14 +5282,14 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn if (load_abi_size <= 8) { const load_reg = registerAlias(dst_reg, load_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, load_reg, Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{ .base = .{ .reg = ptr_reg }, .disp = val_byte_off, }), ); - try self.asmRegisterImmediate(.shr, load_reg, Immediate.u(val_bit_off)); + try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(val_bit_off)); } else { const tmp_reg = registerAlias(try self.register_manager.allocReg(null, gp), val_abi_size); const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); @@ -3996,7 +5297,7 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn const dst_alias = registerAlias(dst_reg, val_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, dst_alias, Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{ .base = .{ .reg = ptr_reg }, @@ -4004,14 +5305,19 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(Memory.PtrSize.fromSize(val_abi_size), .{ .base = .{ .reg = ptr_reg }, .disp = val_byte_off + 1, }), ); - try self.asmRegisterRegisterImmediate(.shrd, dst_alias, tmp_reg, Immediate.u(val_bit_off)); + try self.asmRegisterRegisterImmediate( + .{ ._rd, .sh }, + dst_alias, + tmp_reg, + Immediate.u(val_bit_off), + ); } if (val_extra_bits > 0) try self.truncateRegister(val_ty, dst_reg); @@ -4019,7 +5325,8 @@ fn packedLoad(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) Inn } fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerError!void { - const dst_ty = ptr_ty.childType(); + const mod = self.bin_file.options.module.?; + const dst_ty = ptr_ty.childType(mod); switch (ptr_mcv) { .none, .unreach, @@ -4054,25 +5361,31 @@ fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerErro } fn airLoad(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const elem_ty = self.air.typeOfIndex(inst); - const elem_size = elem_ty.abiSize(self.target.*); + const elem_ty = self.typeOfIndex(inst); const result: MCValue = result: { - if (!elem_ty.hasRuntimeBitsIgnoreComptime()) break :result .none; + if (!elem_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none; try self.spillRegisters(&.{ .rdi, .rsi, .rcx }); const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); + const ptr_ty = self.typeOf(ty_op.operand); + const elem_size = elem_ty.abiSize(mod); + + const elem_rc = regClassForType(elem_ty, mod); + const ptr_rc = regClassForType(ptr_ty, mod); + const ptr_mcv = try self.resolveInst(ty_op.operand); - const dst_mcv = if (elem_size <= 8 and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv)) + const dst_mcv = if (elem_size <= 8 and elem_rc.supersetOf(ptr_rc) and + self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv)) // The MCValue that holds the pointer can be re-used as the value. ptr_mcv else try self.allocRegOrMem(inst, true); - const ptr_ty = self.air.typeOf(ty_op.operand); - if (ptr_ty.ptrInfo().data.host_size > 0) { + if (ptr_ty.ptrInfo(mod).host_size > 0) { try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv); } else { try self.load(dst_mcv, ptr_ty, ptr_mcv); @@ -4083,13 +5396,14 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { } fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void { - const ptr_info = ptr_ty.ptrInfo().data; - const src_ty = ptr_ty.childType(); + const mod = self.bin_file.options.module.?; + const ptr_info = ptr_ty.ptrInfo(mod); + const src_ty = ptr_ty.childType(mod); - const limb_abi_size = @min(ptr_info.host_size, 8); + const limb_abi_size: u16 = @min(ptr_info.host_size, 8); const limb_abi_bits = limb_abi_size * 8; - const src_bit_size = src_ty.bitSize(self.target.*); + const src_bit_size = src_ty.bitSize(mod); const src_byte_off = @intCast(i32, ptr_info.bit_offset / limb_abi_bits * limb_abi_size); const src_bit_off = ptr_info.bit_offset % limb_abi_bits; @@ -4112,13 +5426,13 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In const part_mask_not = part_mask ^ (@as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - limb_abi_bits)); if (limb_abi_size <= 4) { - try self.asmMemoryImmediate(.@"and", limb_mem, Immediate.u(part_mask_not)); + try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.u(part_mask_not)); } else if (math.cast(i32, @bitCast(i64, part_mask_not))) |small| { - try self.asmMemoryImmediate(.@"and", limb_mem, Immediate.s(small)); + try self.asmMemoryImmediate(.{ ._, .@"and" }, limb_mem, Immediate.s(small)); } else { const part_mask_reg = try self.register_manager.allocReg(null, gp); - try self.asmRegisterImmediate(.mov, part_mask_reg, Immediate.u(part_mask_not)); - try self.asmMemoryRegister(.@"and", limb_mem, part_mask_reg); + try self.asmRegisterImmediate(.{ ._, .mov }, part_mask_reg, Immediate.u(part_mask_not)); + try self.asmMemoryRegister(.{ ._, .@"and" }, limb_mem, part_mask_reg); } if (src_bit_size <= 64) { @@ -4129,14 +5443,26 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In try self.genSetReg(tmp_reg, src_ty, src_mcv); switch (limb_i) { - 0 => try self.genShiftBinOpMir(.shl, src_ty, tmp_mcv, .{ .immediate = src_bit_off }), - 1 => try self.genShiftBinOpMir(.shr, src_ty, tmp_mcv, .{ - .immediate = limb_abi_bits - src_bit_off, - }), + 0 => try self.genShiftBinOpMir( + .{ ._l, .sh }, + src_ty, + tmp_mcv, + .{ .immediate = src_bit_off }, + ), + 1 => try self.genShiftBinOpMir( + .{ ._r, .sh }, + src_ty, + tmp_mcv, + .{ .immediate = limb_abi_bits - src_bit_off }, + ), else => unreachable, } - try self.genBinOpMir(.@"and", src_ty, tmp_mcv, .{ .immediate = part_mask }); - try self.asmMemoryRegister(.@"or", limb_mem, registerAlias(tmp_reg, limb_abi_size)); + try self.genBinOpMir(.{ ._, .@"and" }, src_ty, tmp_mcv, .{ .immediate = part_mask }); + try self.asmMemoryRegister( + .{ ._, .@"or" }, + limb_mem, + registerAlias(tmp_reg, limb_abi_size), + ); } else return self.fail("TODO: implement packed store of {}", .{ src_ty.fmt(self.bin_file.options.module.?), }); @@ -4144,7 +5470,8 @@ fn packedStore(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) In } fn store(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerError!void { - const src_ty = ptr_ty.childType(); + const mod = self.bin_file.options.module.?; + const src_ty = ptr_ty.childType(mod); switch (ptr_mcv) { .none, .unreach, @@ -4179,6 +5506,7 @@ fn store(self: *Self, ptr_ty: Type, ptr_mcv: MCValue, src_mcv: MCValue) InnerErr } fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void { + const mod = self.bin_file.options.module.?; if (safety) { // TODO if the value is undef, write 0xaa bytes to dest } else { @@ -4186,9 +5514,9 @@ fn airStore(self: *Self, inst: Air.Inst.Index, safety: bool) !void { } const bin_op = self.air.instructions.items(.data)[inst].bin_op; const ptr_mcv = try self.resolveInst(bin_op.lhs); - const ptr_ty = self.air.typeOf(bin_op.lhs); + const ptr_ty = self.typeOf(bin_op.lhs); const src_mcv = try self.resolveInst(bin_op.rhs); - if (ptr_ty.ptrInfo().data.host_size > 0) { + if (ptr_ty.ptrInfo(mod).host_size > 0) { try self.packedStore(ptr_ty, ptr_mcv, src_mcv); } else { try self.store(ptr_ty, ptr_mcv, src_mcv); @@ -4210,100 +5538,57 @@ fn airStructFieldPtrIndex(self: *Self, inst: Air.Inst.Index, index: u8) !void { } fn fieldPtr(self: *Self, inst: Air.Inst.Index, operand: Air.Inst.Ref, index: u32) !MCValue { - const ptr_field_ty = self.air.typeOfIndex(inst); - const mcv = try self.resolveInst(operand); - const ptr_container_ty = self.air.typeOf(operand); - const container_ty = ptr_container_ty.childType(); - const field_offset = switch (container_ty.containerLayout()) { - .Auto, .Extern => @intCast(u32, container_ty.structFieldOffset(index, self.target.*)), - .Packed => if (container_ty.zigTypeTag() == .Struct and - ptr_field_ty.ptrInfo().data.host_size == 0) - container_ty.packedStructFieldByteOffset(index, self.target.*) + const mod = self.bin_file.options.module.?; + const ptr_field_ty = self.typeOfIndex(inst); + const ptr_container_ty = self.typeOf(operand); + const container_ty = ptr_container_ty.childType(mod); + const field_offset = @intCast(i32, switch (container_ty.containerLayout(mod)) { + .Auto, .Extern => container_ty.structFieldOffset(index, mod), + .Packed => if (container_ty.zigTypeTag(mod) == .Struct and + ptr_field_ty.ptrInfo(mod).host_size == 0) + container_ty.packedStructFieldByteOffset(index, mod) else 0, - }; - - const result: MCValue = result: { - switch (mcv) { - .load_frame, .lea_tlv, .load_tlv => { - const offset_reg = try self.copyToTmpRegister(Type.usize, .{ - .immediate = field_offset, - }); - const offset_reg_lock = self.register_manager.lockRegAssumeUnused(offset_reg); - defer self.register_manager.unlockReg(offset_reg_lock); - - const dst_mcv = try self.copyToRegisterWithInstTracking(inst, Type.usize, switch (mcv) { - .load_tlv => |sym_index| .{ .lea_tlv = sym_index }, - else => mcv, - }); - try self.genBinOpMir(.add, Type.usize, dst_mcv, .{ .register = offset_reg }); - break :result dst_mcv; - }, - .indirect => |reg_off| break :result .{ .indirect = .{ - .reg = reg_off.reg, - .off = reg_off.off + @intCast(i32, field_offset), - } }, - .lea_frame => |frame_addr| break :result .{ .lea_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + @intCast(i32, field_offset), - } }, - .register, .register_offset => { - const src_reg = mcv.getReg().?; - const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); - defer self.register_manager.unlockReg(src_lock); + }); - const dst_mcv: MCValue = if (self.reuseOperand(inst, operand, 0, mcv)) - mcv - else - .{ .register = try self.copyToTmpRegister(ptr_field_ty, mcv) }; - break :result .{ .register_offset = .{ - .reg = dst_mcv.getReg().?, - .off = switch (dst_mcv) { - .register => 0, - .register_offset => |reg_off| reg_off.off, - else => unreachable, - } + @intCast(i32, field_offset), - } }; - }, - else => return self.fail("TODO implement fieldPtr for {}", .{mcv}), - } - }; - return result; + const src_mcv = try self.resolveInst(operand); + const dst_mcv = if (switch (src_mcv) { + .immediate, .lea_frame => true, + .register, .register_offset => self.reuseOperand(inst, operand, 0, src_mcv), + else => false, + }) src_mcv else try self.copyToRegisterWithInstTracking(inst, ptr_field_ty, src_mcv); + return dst_mcv.offset(field_offset); } fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.StructField, ty_pl.payload).data; const result: MCValue = result: { const operand = extra.struct_operand; const index = extra.field_index; - const container_ty = self.air.typeOf(operand); - const field_ty = container_ty.structFieldType(index); - if (!field_ty.hasRuntimeBitsIgnoreComptime()) break :result .none; + const container_ty = self.typeOf(operand); + const container_rc = regClassForType(container_ty, mod); + const field_ty = container_ty.structFieldType(index, mod); + if (!field_ty.hasRuntimeBitsIgnoreComptime(mod)) break :result .none; + const field_rc = regClassForType(field_ty, mod); + const field_is_gp = field_rc.supersetOf(gp); const src_mcv = try self.resolveInst(operand); - const field_off = switch (container_ty.containerLayout()) { - .Auto, .Extern => @intCast(u32, container_ty.structFieldOffset(index, self.target.*) * 8), - .Packed => if (container_ty.castTag(.@"struct")) |struct_obj| - struct_obj.data.packedFieldBitOffset(self.target.*, index) + const field_off = switch (container_ty.containerLayout(mod)) { + .Auto, .Extern => @intCast(u32, container_ty.structFieldOffset(index, mod) * 8), + .Packed => if (mod.typeToStruct(container_ty)) |struct_obj| + struct_obj.packedFieldBitOffset(mod, index) else 0, }; switch (src_mcv) { .load_frame => |frame_addr| { - const field_abi_size = @intCast(u32, field_ty.abiSize(self.target.*)); - const limb_abi_size = @min(field_abi_size, 8); - const limb_abi_bits = limb_abi_size * 8; - const field_byte_off = @intCast(i32, field_off / limb_abi_bits * limb_abi_size); - const field_bit_off = field_off % limb_abi_bits; - - if (field_bit_off == 0) { - const off_mcv = MCValue{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + field_byte_off, - } }; + if (field_off % 8 == 0) { + const off_mcv = + src_mcv.address().offset(@intCast(i32, @divExact(field_off, 8))).deref(); if (self.reuseOperand(inst, operand, 0, src_mcv)) break :result off_mcv; const dst_mcv = try self.allocRegOrMem(inst, true); @@ -4311,25 +5596,31 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { break :result dst_mcv; } + const field_abi_size = @intCast(u32, field_ty.abiSize(mod)); + const limb_abi_size: u32 = @min(field_abi_size, 8); + const limb_abi_bits = limb_abi_size * 8; + const field_byte_off = @intCast(i32, field_off / limb_abi_bits * limb_abi_size); + const field_bit_off = field_off % limb_abi_bits; + if (field_abi_size > 8) { return self.fail("TODO implement struct_field_val with large packed field", .{}); } - const dst_reg = try self.register_manager.allocReg(inst, gp); + const dst_reg = try self.register_manager.allocReg(if (field_is_gp) inst else null, gp); const field_extra_bits = self.regExtraBits(field_ty); const load_abi_size = if (field_bit_off < field_extra_bits) field_abi_size else field_abi_size * 2; if (load_abi_size <= 8) { const load_reg = registerAlias(dst_reg, load_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, load_reg, Memory.sib(Memory.PtrSize.fromSize(load_abi_size), .{ .base = .{ .frame = frame_addr.index }, .disp = frame_addr.off + field_byte_off, }), ); - try self.asmRegisterImmediate(.shr, load_reg, Immediate.u(field_bit_off)); + try self.asmRegisterImmediate(.{ ._r, .sh }, load_reg, Immediate.u(field_bit_off)); } else { const tmp_reg = registerAlias( try self.register_manager.allocReg(null, gp), @@ -4340,7 +5631,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { const dst_alias = registerAlias(dst_reg, field_abi_size); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, dst_alias, Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{ .base = .{ .frame = frame_addr.index }, @@ -4348,7 +5639,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(Memory.PtrSize.fromSize(field_abi_size), .{ .base = .{ .frame = frame_addr.index }, @@ -4356,7 +5647,7 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterRegisterImmediate( - .shrd, + .{ ._rd, .sh }, dst_alias, tmp_reg, Immediate.u(field_bit_off), @@ -4364,47 +5655,38 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { } if (field_extra_bits > 0) try self.truncateRegister(field_ty, dst_reg); - break :result .{ .register = dst_reg }; + + const dst_mcv = MCValue{ .register = dst_reg }; + break :result if (field_is_gp) + dst_mcv + else + try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); }, .register => |reg| { const reg_lock = self.register_manager.lockRegAssumeUnused(reg); defer self.register_manager.unlockReg(reg_lock); - const dst_mcv = if (self.reuseOperand(inst, operand, 0, src_mcv)) - src_mcv + const dst_reg = if (src_mcv.isRegister() and field_rc.supersetOf(container_rc) and + self.reuseOperand(inst, operand, 0, src_mcv)) + src_mcv.getReg().? else - try self.copyToRegisterWithInstTracking( - inst, - Type.usize, - .{ .register = reg.to64() }, - ); - const dst_mcv_lock: ?RegisterLock = switch (dst_mcv) { - .register => |a_reg| self.register_manager.lockReg(a_reg), - else => null, - }; - defer if (dst_mcv_lock) |lock| self.register_manager.unlockReg(lock); - - // Shift by struct_field_offset. - try self.genShiftBinOpMir(.shr, Type.usize, dst_mcv, .{ .immediate = field_off }); + try self.copyToTmpRegister(Type.usize, .{ .register = reg.to64() }); + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - // Mask to field_bit_size bits - const field_bit_size = field_ty.bitSize(self.target.*); - const mask = ~@as(u64, 0) >> @intCast(u6, 64 - field_bit_size); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + Type.usize, + dst_mcv, + .{ .immediate = field_off }, + ); + if (self.regExtraBits(field_ty) > 0) try self.truncateRegister(field_ty, dst_reg); - const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = mask }); - try self.genBinOpMir(.@"and", Type.usize, dst_mcv, .{ .register = tmp_reg }); - - const signedness = - if (field_ty.isAbiInt()) field_ty.intInfo(self.target.*).signedness else .unsigned; - const field_byte_size = @intCast(u32, field_ty.abiSize(self.target.*)); - if (signedness == .signed and field_byte_size < 8) { - try self.asmRegisterRegister( - .movsx, - dst_mcv.register, - registerAlias(dst_mcv.register, field_byte_size), - ); - } - break :result dst_mcv; + break :result if (field_rc.supersetOf(gp)) + dst_mcv + else + try self.copyToRegisterWithInstTracking(inst, field_ty, dst_mcv); }, .register_overflow => |ro| { switch (index) { @@ -4436,19 +5718,29 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { } fn airFieldParentPtr(self: *Self, inst: Air.Inst.Index) !void { - const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airFieldParentPtr for {}", .{self.target.cpu.arch}); - //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const mod = self.bin_file.options.module.?; + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const extra = self.air.extraData(Air.FieldParentPtr, ty_pl.payload).data; + + const inst_ty = self.typeOfIndex(inst); + const parent_ty = inst_ty.childType(mod); + const field_offset = @intCast(i32, parent_ty.structFieldOffset(extra.field_index, mod)); + + const src_mcv = try self.resolveInst(extra.field_ptr); + const dst_mcv = if (src_mcv.isRegisterOffset() and + self.reuseOperand(inst, extra.field_ptr, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, inst_ty, src_mcv); + const result = dst_mcv.offset(-field_offset); + return self.finishAir(inst, result, .{ extra.field_ptr, .none, .none }); } fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: Air.Inst.Ref) !MCValue { - const src_ty = self.air.typeOf(src_air); + const mod = self.bin_file.options.module.?; + const src_ty = self.typeOf(src_air); const src_mcv = try self.resolveInst(src_air); - if (src_ty.zigTypeTag() == .Vector) { - return self.fail("TODO implement genUnOp for {}", .{src_ty.fmt(self.bin_file.options.module.?)}); - } - if (src_ty.abiSize(self.target.*) > 8) { + if (src_ty.zigTypeTag(mod) == .Vector) { return self.fail("TODO implement genUnOp for {}", .{src_ty.fmt(self.bin_file.options.module.?)}); } @@ -4466,13 +5758,13 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: }; defer if (src_lock) |lock| self.register_manager.unlockReg(lock); - const dst_mcv: MCValue = if (maybe_inst) |inst| - if (self.reuseOperand(inst, src_air, 0, src_mcv)) - src_mcv - else - try self.copyToRegisterWithInstTracking(inst, src_ty, src_mcv) - else - .{ .register = try self.copyToTmpRegister(src_ty, src_mcv) }; + const dst_mcv: MCValue = dst: { + if (maybe_inst) |inst| if (self.reuseOperand(inst, src_air, 0, src_mcv)) break :dst src_mcv; + + const dst_mcv = try self.allocRegOrMemAdvanced(src_ty, maybe_inst, true); + try self.genCopy(src_ty, dst_mcv, src_mcv); + break :dst dst_mcv; + }; const dst_lock = switch (dst_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, @@ -4481,26 +5773,35 @@ fn genUnOp(self: *Self, maybe_inst: ?Air.Inst.Index, tag: Air.Inst.Tag, src_air: switch (tag) { .not => { - const int_info = if (src_ty.tag() == .bool) + const limb_abi_size = @intCast(u16, @min(src_ty.abiSize(mod), 8)); + const int_info = if (src_ty.ip_index == .bool_type) std.builtin.Type.Int{ .signedness = .unsigned, .bits = 1 } else - src_ty.intInfo(self.target.*); - const extra_bits = self.regExtraBits(src_ty); - if (int_info.signedness == .unsigned and extra_bits > 0) { - const mask = (@as(u64, 1) << @intCast(u6, src_ty.bitSize(self.target.*))) - 1; - try self.genBinOpMir(.xor, src_ty, dst_mcv, .{ .immediate = mask }); - } else try self.genUnOpMir(.not, src_ty, dst_mcv); - }, - - .neg => try self.genUnOpMir(.neg, src_ty, dst_mcv), + src_ty.intInfo(mod); + var byte_off: i32 = 0; + while (byte_off * 8 < int_info.bits) : (byte_off += limb_abi_size) { + const limb_bits = @intCast(u16, @min(int_info.bits - byte_off * 8, limb_abi_size * 8)); + const limb_ty = try mod.intType(int_info.signedness, limb_bits); + const limb_mcv = switch (byte_off) { + 0 => dst_mcv, + else => dst_mcv.address().offset(byte_off).deref(), + }; + if (int_info.signedness == .unsigned and self.regExtraBits(limb_ty) > 0) { + const mask = @as(u64, math.maxInt(u64)) >> @intCast(u6, 64 - limb_bits); + try self.genBinOpMir(.{ ._, .xor }, limb_ty, limb_mcv, .{ .immediate = mask }); + } else try self.genUnOpMir(.{ ._, .not }, limb_ty, limb_mcv); + } + }, + .neg => try self.genUnOpMir(.{ ._, .neg }, src_ty, dst_mcv), else => unreachable, } return dst_mcv; } -fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue) !void { - const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); +fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.FixedTag, dst_ty: Type, dst_mcv: MCValue) !void { + const mod = self.bin_file.options.module.?; + const abi_size = @intCast(u32, dst_ty.abiSize(mod)); if (abi_size > 8) return self.fail("TODO implement {} for {}", .{ mir_tag, dst_ty.fmt(self.bin_file.options.module.?), @@ -4534,17 +5835,7 @@ fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue }, .indirect, .load_frame => try self.asmMemory( mir_tag, - Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (dst_mcv) { - .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, - .disp = reg_off.off, - }, - .load_frame => |frame_addr| .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }, - else => unreachable, - }), + dst_mcv.mem(Memory.PtrSize.fromSize(abi_size)), ), } } @@ -4552,11 +5843,12 @@ fn genUnOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValue /// Clobbers .rcx for non-immediate shift value. fn genShiftBinOpMir( self: *Self, - tag: Mir.Inst.Tag, + tag: Mir.Inst.FixedTag, ty: Type, lhs_mcv: MCValue, shift_mcv: MCValue, ) !void { + const mod = self.bin_file.options.module.?; const rhs_mcv: MCValue = rhs: { switch (shift_mcv) { .immediate => |imm| switch (imm) { @@ -4571,7 +5863,7 @@ fn genShiftBinOpMir( break :rhs .{ .register = .rcx }; }; - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const abi_size = @intCast(u32, ty.abiSize(mod)); if (abi_size <= 8) { switch (lhs_mcv) { .register => |lhs_reg| switch (rhs_mcv) { @@ -4637,16 +5929,16 @@ fn genShiftBinOpMir( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - const info: struct { offsets: [2]i32, double_tag: Mir.Inst.Tag } = switch (tag) { - .shl, .sal => .{ .offsets = .{ 0, 8 }, .double_tag = .shld }, - .shr, .sar => .{ .offsets = .{ 8, 0 }, .double_tag = .shrd }, + const info: struct { offsets: [2]i32, double_tag: Mir.Inst.FixedTag } = switch (tag[0]) { + ._l => .{ .offsets = .{ 0, 8 }, .double_tag = .{ ._ld, .sh } }, + ._r => .{ .offsets = .{ 8, 0 }, .double_tag = .{ ._rd, .sh } }, else => unreachable, }; switch (lhs_mcv) { .load_frame => |dst_frame_addr| switch (rhs_mcv) { .immediate => |rhs_imm| if (rhs_imm == 0) {} else if (rhs_imm < 64) { try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, @@ -4673,7 +5965,7 @@ fn genShiftBinOpMir( } else { assert(rhs_imm < 128); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, tmp_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, @@ -4684,34 +5976,30 @@ fn genShiftBinOpMir( try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(rhs_imm - 64)); } try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[1], }), tmp_reg, ); - switch (tag) { - .shl, .sal, .shr => { - try self.asmRegisterRegister(.xor, tmp_reg.to32(), tmp_reg.to32()); - try self.asmMemoryRegister( - .mov, - Memory.sib(.qword, .{ - .base = .{ .frame = dst_frame_addr.index }, - .disp = dst_frame_addr.off + info.offsets[0], - }), - tmp_reg, - ); - }, - .sar => try self.asmMemoryImmediate( - tag, + if (tag[0] == ._r and tag[1] == .sa) try self.asmMemoryImmediate( + tag, + Memory.sib(.qword, .{ + .base = .{ .frame = dst_frame_addr.index }, + .disp = dst_frame_addr.off + info.offsets[0], + }), + Immediate.u(63), + ) else { + try self.asmRegisterRegister(.{ ._, .xor }, tmp_reg.to32(), tmp_reg.to32()); + try self.asmMemoryRegister( + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[0], }), - Immediate.u(63), - ), - else => unreachable, + tmp_reg, + ); } }, else => { @@ -4725,7 +6013,7 @@ fn genShiftBinOpMir( try self.genSetReg(.cl, Type.u8, rhs_mcv); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, first_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, @@ -4733,32 +6021,28 @@ fn genShiftBinOpMir( }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, second_reg, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[1], }), ); - switch (tag) { - .shl, .sal, .shr => try self.asmRegisterRegister( - .xor, - tmp_reg.to32(), - tmp_reg.to32(), - ), - .sar => { - try self.asmRegisterRegister(.mov, tmp_reg, first_reg); - try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63)); - }, - else => unreachable, - } + if (tag[0] == ._r and tag[1] == .sa) { + try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, first_reg); + try self.asmRegisterImmediate(tag, tmp_reg, Immediate.u(63)); + } else try self.asmRegisterRegister( + .{ ._, .xor }, + tmp_reg.to32(), + tmp_reg.to32(), + ); try self.asmRegisterRegisterRegister(info.double_tag, second_reg, first_reg, .cl); try self.asmRegisterRegister(tag, first_reg, .cl); - try self.asmRegisterImmediate(.cmp, .cl, Immediate.u(64)); + try self.asmRegisterImmediate(.{ ._, .cmp }, .cl, Immediate.u(64)); try self.asmCmovccRegisterRegister(second_reg, first_reg, .ae); try self.asmCmovccRegisterRegister(first_reg, tmp_reg, .ae); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[1], @@ -4766,7 +6050,7 @@ fn genShiftBinOpMir( second_reg, ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_frame_addr.index }, .disp = dst_frame_addr.off + info.offsets[0], @@ -4791,20 +6075,21 @@ fn genShiftBinOpMir( /// Asserts .rcx is free. fn genShiftBinOp( self: *Self, - tag: Air.Inst.Tag, + air_tag: Air.Inst.Tag, maybe_inst: ?Air.Inst.Index, lhs_mcv: MCValue, rhs_mcv: MCValue, lhs_ty: Type, rhs_ty: Type, ) !MCValue { - if (lhs_ty.zigTypeTag() == .Vector) { + const mod = self.bin_file.options.module.?; + if (lhs_ty.zigTypeTag(mod) == .Vector) { return self.fail("TODO implement genShiftBinOp for {}", .{lhs_ty.fmtDebug()}); } - assert(rhs_ty.abiSize(self.target.*) == 1); + assert(rhs_ty.abiSize(mod) == 1); - const lhs_abi_size = lhs_ty.abiSize(self.target.*); + const lhs_abi_size = lhs_ty.abiSize(mod); if (lhs_abi_size > 16) { return self.fail("TODO implement genShiftBinOp for {}", .{lhs_ty.fmtDebug()}); } @@ -4835,15 +6120,15 @@ fn genShiftBinOp( break :dst dst_mcv; }; - const signedness = lhs_ty.intInfo(self.target.*).signedness; - try self.genShiftBinOpMir(switch (tag) { + const signedness = lhs_ty.intInfo(mod).signedness; + try self.genShiftBinOpMir(switch (air_tag) { .shl, .shl_exact => switch (signedness) { - .signed => .sal, - .unsigned => .shl, + .signed => .{ ._l, .sa }, + .unsigned => .{ ._l, .sh }, }, .shr, .shr_exact => switch (signedness) { - .signed => .sar, - .unsigned => .shr, + .signed => .{ ._r, .sa }, + .unsigned => .{ ._r, .sh }, }, else => unreachable, }, lhs_ty, dst_mcv, rhs_mcv); @@ -4862,18 +6147,18 @@ fn genMulDivBinOp( lhs: MCValue, rhs: MCValue, ) !MCValue { - if (dst_ty.zigTypeTag() == .Vector or dst_ty.zigTypeTag() == .Float) { + const mod = self.bin_file.options.module.?; + if (dst_ty.zigTypeTag(mod) == .Vector or dst_ty.zigTypeTag(mod) == .Float) { return self.fail("TODO implement genMulDivBinOp for {}", .{dst_ty.fmtDebug()}); } - const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); - const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*)); + const dst_abi_size = @intCast(u32, dst_ty.abiSize(mod)); + const src_abi_size = @intCast(u32, src_ty.abiSize(mod)); if (switch (tag) { else => unreachable, .mul, .mulwrap => dst_abi_size != src_abi_size and dst_abi_size != src_abi_size * 2, .div_trunc, .div_floor, .div_exact, .rem, .mod => dst_abi_size != src_abi_size, } or src_abi_size > 8) return self.fail("TODO implement genMulDivBinOp from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), - dst_ty.fmt(self.bin_file.options.module.?), + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }); const ty = if (dst_abi_size <= 8) dst_ty else src_ty; const abi_size = if (dst_abi_size <= 8) dst_abi_size else src_abi_size; @@ -4884,7 +6169,7 @@ fn genMulDivBinOp( const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx }); defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); - const signedness = ty.intInfo(self.target.*).signedness; + const signedness = ty.intInfo(mod).signedness; switch (tag) { .mul, .mulwrap, @@ -4904,20 +6189,18 @@ fn genMulDivBinOp( try self.register_manager.getReg(.rax, track_inst_rax); try self.register_manager.getReg(.rdx, track_inst_rdx); - const mir_tag: Mir.Inst.Tag = switch (signedness) { + try self.genIntMulDivOpMir(switch (signedness) { .signed => switch (tag) { - .mul, .mulwrap => .imul, - .div_trunc, .div_exact, .rem => .idiv, + .mul, .mulwrap => .{ .i_, .mul }, + .div_trunc, .div_exact, .rem => .{ .i_, .div }, else => unreachable, }, .unsigned => switch (tag) { - .mul, .mulwrap => .mul, - .div_trunc, .div_exact, .rem => .div, + .mul, .mulwrap => .{ ._, .mul }, + .div_trunc, .div_exact, .rem => .{ ._, .div }, else => unreachable, }, - }; - - try self.genIntMulDivOpMir(mir_tag, ty, lhs, rhs); + }, ty, lhs, rhs); if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) { .mul, .mulwrap, .div_trunc, .div_exact => .rax, @@ -4927,7 +6210,7 @@ fn genMulDivBinOp( const dst_mcv = try self.allocRegOrMemAdvanced(dst_ty, maybe_inst, false); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off, @@ -4935,7 +6218,7 @@ fn genMulDivBinOp( .rax, ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 8, @@ -4976,12 +6259,12 @@ fn genMulDivBinOp( try self.copyToRegisterWithInstTracking(inst, ty, lhs) else .{ .register = try self.copyToTmpRegister(ty, lhs) }; - try self.genBinOpMir(.sub, ty, result, div_floor); + try self.genBinOpMir(.{ ._, .sub }, ty, result, div_floor); return result; }, .unsigned => { - try self.genIntMulDivOpMir(.div, ty, lhs, rhs); + try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, rhs); return .{ .register = registerAlias(.rdx, abi_size) }; }, } @@ -5023,7 +6306,7 @@ fn genMulDivBinOp( switch (signedness) { .signed => return try self.genInlineIntDivFloor(ty, lhs, actual_rhs), .unsigned => { - try self.genIntMulDivOpMir(.div, ty, lhs, actual_rhs); + try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, actual_rhs); return .{ .register = registerAlias(.rax, abi_size) }; }, } @@ -5033,25 +6316,37 @@ fn genMulDivBinOp( } } -/// Result is always a register. fn genBinOp( self: *Self, maybe_inst: ?Air.Inst.Index, - tag: Air.Inst.Tag, + air_tag: Air.Inst.Tag, lhs_air: Air.Inst.Ref, rhs_air: Air.Inst.Ref, ) !MCValue { - const lhs = try self.resolveInst(lhs_air); - const rhs = try self.resolveInst(rhs_air); - const lhs_ty = self.air.typeOf(lhs_air); - const rhs_ty = self.air.typeOf(rhs_air); - if (lhs_ty.zigTypeTag() == .Vector) { - return self.fail("TODO implement genBinOp for {}", .{lhs_ty.fmt(self.bin_file.options.module.?)}); - } + const mod = self.bin_file.options.module.?; + const lhs_ty = self.typeOf(lhs_air); + const rhs_ty = self.typeOf(rhs_air); + const abi_size = @intCast(u32, lhs_ty.abiSize(mod)); + + const maybe_mask_reg = switch (air_tag) { + else => null, + .max, .min => if (lhs_ty.scalarType(mod).isRuntimeFloat()) registerAlias( + if (!self.hasFeature(.avx) and self.hasFeature(.sse4_1)) mask: { + try self.register_manager.getReg(.xmm0, null); + break :mask .xmm0; + } else try self.register_manager.allocReg(null, sse), + abi_size, + ) else null, + }; + const mask_lock = + if (maybe_mask_reg) |mask_reg| self.register_manager.lockRegAssumeUnused(mask_reg) else null; + defer if (mask_lock) |lock| self.register_manager.unlockReg(lock); - switch (lhs) { + const lhs_mcv = try self.resolveInst(lhs_air); + const rhs_mcv = try self.resolveInst(rhs_air); + switch (lhs_mcv) { .immediate => |imm| switch (imm) { - 0 => switch (tag) { + 0 => switch (air_tag) { .sub, .subwrap => return self.genUnOp(maybe_inst, .neg, rhs_air), else => {}, }, @@ -5060,9 +6355,10 @@ fn genBinOp( else => {}, } - const is_commutative = switch (tag) { + const is_commutative = switch (air_tag) { .add, .addwrap, + .mul, .bool_or, .bit_or, .bool_and, @@ -5074,48 +6370,42 @@ fn genBinOp( else => false, }; - const dst_mem_ok = switch (tag) { - .add, - .addwrap, - .sub, - .subwrap, - .mul, - .div_float, - .div_exact, - .div_trunc, - .div_floor, - => !lhs_ty.isRuntimeFloat(), - - else => true, + const vec_op = switch (lhs_ty.zigTypeTag(mod)) { + else => false, + .Float, .Vector => true, }; - const lhs_lock: ?RegisterLock = switch (lhs) { + const lhs_lock: ?RegisterLock = switch (lhs_mcv) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), else => null, }; defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock); - const rhs_lock: ?RegisterLock = switch (rhs) { + const rhs_lock: ?RegisterLock = switch (rhs_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, }; defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - var flipped: bool = false; + var flipped = false; + var copied_to_dst = true; const dst_mcv: MCValue = dst: { if (maybe_inst) |inst| { - if ((dst_mem_ok or lhs.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs)) { - break :dst lhs; + if ((!vec_op or lhs_mcv.isRegister()) and self.reuseOperand(inst, lhs_air, 0, lhs_mcv)) { + break :dst lhs_mcv; } - if (is_commutative and (dst_mem_ok or rhs.isRegister()) and - self.reuseOperand(inst, rhs_air, 1, rhs)) + if (is_commutative and (!vec_op or rhs_mcv.isRegister()) and + self.reuseOperand(inst, rhs_air, 1, rhs_mcv)) { flipped = true; - break :dst rhs; + break :dst rhs_mcv; } } const dst_mcv = try self.allocRegOrMemAdvanced(lhs_ty, maybe_inst, true); - try self.genCopy(lhs_ty, dst_mcv, lhs); + if (vec_op and lhs_mcv.isRegister() and self.hasFeature(.avx)) + copied_to_dst = false + else + try self.genCopy(lhs_ty, dst_mcv, lhs_mcv); break :dst dst_mcv; }; const dst_lock: ?RegisterLock = switch (dst_mcv) { @@ -5124,96 +6414,61 @@ fn genBinOp( }; defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); - const src_mcv = if (flipped) lhs else rhs; - switch (tag) { - .add, - .addwrap, - => try self.genBinOpMir(switch (lhs_ty.tag()) { - else => .add, - .f32 => .addss, - .f64 => .addsd, - }, lhs_ty, dst_mcv, src_mcv), - - .sub, - .subwrap, - => try self.genBinOpMir(switch (lhs_ty.tag()) { - else => .sub, - .f32 => .subss, - .f64 => .subsd, - }, lhs_ty, dst_mcv, src_mcv), - - .mul => try self.genBinOpMir(switch (lhs_ty.tag()) { - .f32 => .mulss, - .f64 => .mulsd, - else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }), - }, lhs_ty, dst_mcv, src_mcv), - - .div_float, - .div_exact, - .div_trunc, - .div_floor, - => { - try self.genBinOpMir(switch (lhs_ty.tag()) { - .f32 => .divss, - .f64 => .divsd, - else => return self.fail("TODO implement genBinOp for {s} {}", .{ - @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), - }), - }, lhs_ty, dst_mcv, src_mcv); - switch (tag) { - .div_float, - .div_exact, - => {}, - .div_trunc, - .div_floor, - => if (Target.x86.featureSetHas(self.target.cpu.features, .sse4_1)) { - const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*)); - const dst_alias = registerAlias(dst_mcv.register, abi_size); - try self.asmRegisterRegisterImmediate(switch (lhs_ty.tag()) { - .f32 => .roundss, - .f64 => .roundsd, - else => unreachable, - }, dst_alias, dst_alias, Immediate.u(switch (tag) { - .div_trunc => 0b1_0_11, - .div_floor => 0b1_0_01, - else => unreachable, - })); - } else return self.fail("TODO implement round without sse4_1", .{}), - else => unreachable, - } - }, + const unmat_src_mcv = if (flipped) lhs_mcv else rhs_mcv; + const src_mcv: MCValue = if (maybe_mask_reg) |mask_reg| + if (self.hasFeature(.avx) and unmat_src_mcv.isRegister() and maybe_inst != null and + self.liveness.operandDies(maybe_inst.?, if (flipped) 0 else 1)) unmat_src_mcv else src: { + try self.genSetReg(mask_reg, rhs_ty, unmat_src_mcv); + break :src .{ .register = mask_reg }; + } + else + unmat_src_mcv; - .ptr_add, - .ptr_sub, - => { - const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv); - const tmp_mcv = MCValue{ .register = tmp_reg }; - const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); - defer self.register_manager.unlockReg(tmp_lock); + if (!vec_op) { + switch (air_tag) { + .add, + .addwrap, + => try self.genBinOpMir(.{ ._, .add }, lhs_ty, dst_mcv, src_mcv), - const elem_size = lhs_ty.elemType2().abiSize(self.target.*); - try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size }); - try self.genBinOpMir(switch (tag) { - .ptr_add => .add, - .ptr_sub => .sub, - else => unreachable, - }, lhs_ty, dst_mcv, tmp_mcv); - }, + .sub, + .subwrap, + => try self.genBinOpMir(.{ ._, .sub }, lhs_ty, dst_mcv, src_mcv), + + .ptr_add, + .ptr_sub, + => { + const tmp_reg = try self.copyToTmpRegister(rhs_ty, src_mcv); + const tmp_mcv = MCValue{ .register = tmp_reg }; + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + const elem_size = lhs_ty.elemType2(mod).abiSize(mod); + try self.genIntMulComplexOpMir(rhs_ty, tmp_mcv, .{ .immediate = elem_size }); + try self.genBinOpMir( + switch (air_tag) { + .ptr_add => .{ ._, .add }, + .ptr_sub => .{ ._, .sub }, + else => unreachable, + }, + lhs_ty, + dst_mcv, + tmp_mcv, + ); + }, - .bool_or, - .bit_or, - => try self.genBinOpMir(.@"or", lhs_ty, dst_mcv, src_mcv), + .bool_or, + .bit_or, + => try self.genBinOpMir(.{ ._, .@"or" }, lhs_ty, dst_mcv, src_mcv), - .bool_and, - .bit_and, - => try self.genBinOpMir(.@"and", lhs_ty, dst_mcv, src_mcv), + .bool_and, + .bit_and, + => try self.genBinOpMir(.{ ._, .@"and" }, lhs_ty, dst_mcv, src_mcv), - .xor => try self.genBinOpMir(.xor, lhs_ty, dst_mcv, src_mcv), + .xor => try self.genBinOpMir(.{ ._, .xor }, lhs_ty, dst_mcv, src_mcv), - .min, - .max, - => switch (lhs_ty.zigTypeTag()) { - .Int => { + .min, + .max, + => { const mat_src_mcv: MCValue = if (switch (src_mcv) { .immediate, .eflags, @@ -5235,23 +6490,23 @@ fn genBinOp( }; defer if (mat_mcv_lock) |lock| self.register_manager.unlockReg(lock); - try self.genBinOpMir(.cmp, lhs_ty, dst_mcv, mat_src_mcv); + try self.genBinOpMir(.{ ._, .cmp }, lhs_ty, dst_mcv, mat_src_mcv); - const int_info = lhs_ty.intInfo(self.target.*); + const int_info = lhs_ty.intInfo(mod); const cc: Condition = switch (int_info.signedness) { - .unsigned => switch (tag) { + .unsigned => switch (air_tag) { .min => .a, .max => .b, else => unreachable, }, - .signed => switch (tag) { + .signed => switch (air_tag) { .min => .g, .max => .l, else => unreachable, }, }; - const cmov_abi_size = @max(@intCast(u32, lhs_ty.abiSize(self.target.*)), 2); + const cmov_abi_size = @max(@intCast(u32, lhs_ty.abiSize(mod)), 2); const tmp_reg = switch (dst_mcv) { .register => |reg| reg, else => try self.copyToTmpRegister(lhs_ty, dst_mcv), @@ -5303,36 +6558,884 @@ fn genBinOp( } try self.genCopy(lhs_ty, dst_mcv, .{ .register = tmp_reg }); }, - .Float => try self.genBinOpMir(switch (lhs_ty.floatBits(self.target.*)) { - 32 => switch (tag) { - .min => .minss, - .max => .maxss, - else => unreachable, + + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + } + return dst_mcv; + } + + const dst_reg = registerAlias(dst_mcv.getReg().?, abi_size); + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) { + else => unreachable, + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .{ .vp_w, .insr }, + dst_reg, + dst_reg, + src_mcv.mem(.word), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .{ .vp_, .unpcklwd }, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .{ .v_ss, .add }, + .sub => .{ .v_ss, .sub }, + .mul => .{ .v_ss, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, + .max => .{ .v_ss, .max }, + .min => .{ .v_ss, .max }, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + } else null, + 32 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, + else => unreachable, + }, + 64 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, + else => unreachable, + }, + 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + else => null, + .Int => switch (lhs_ty.childType(mod).intInfo(mod).bits) { + 8 => switch (lhs_ty.vectorLen(mod)) { + 1...16 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx)) .{ .vp_b, .add } else .{ .p_b, .add }, + .sub, + .subwrap, + => if (self.hasFeature(.avx)) .{ .vp_b, .sub } else .{ .p_b, .sub }, + .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, + .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, + .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_b, .mins } + else if (self.hasFeature(.sse4_1)) + .{ .p_b, .mins } + else + null, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_b, .minu } + else if (self.hasFeature(.sse4_1)) + .{ .p_b, .minu } + else + null, + }, + .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_b, .maxs } + else if (self.hasFeature(.sse4_1)) + .{ .p_b, .maxs } + else + null, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_b, .maxu } + else if (self.hasFeature(.sse4_1)) + .{ .p_b, .maxu } + else + null, + }, + else => null, + }, + 17...32 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_b, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_b, .sub } else null, + .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, + .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, + .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, + .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .mins } else null, + .unsigned => if (self.hasFeature(.avx)) .{ .vp_b, .minu } else null, + }, + .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_b, .maxs } else null, + .unsigned => if (self.hasFeature(.avx2)) .{ .vp_b, .maxu } else null, + }, + else => null, + }, + else => null, }, - 64 => switch (tag) { - .min => .minsd, - .max => .maxsd, - else => unreachable, + 16 => switch (lhs_ty.vectorLen(mod)) { + 1...8 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx)) .{ .vp_w, .add } else .{ .p_w, .add }, + .sub, + .subwrap, + => if (self.hasFeature(.avx)) .{ .vp_w, .sub } else .{ .p_w, .sub }, + .mul, + .mulwrap, + => if (self.hasFeature(.avx)) .{ .vp_w, .mull } else .{ .p_d, .mull }, + .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, + .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, + .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_w, .mins } + else + .{ .p_w, .mins }, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_w, .minu } + else + .{ .p_w, .minu }, + }, + .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_w, .maxs } + else + .{ .p_w, .maxs }, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_w, .maxu } + else + .{ .p_w, .maxu }, + }, + else => null, + }, + 9...16 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_w, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_w, .sub } else null, + .mul, + .mulwrap, + => if (self.hasFeature(.avx2)) .{ .vp_w, .mull } else null, + .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, + .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, + .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, + .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .mins } else null, + .unsigned => if (self.hasFeature(.avx)) .{ .vp_w, .minu } else null, + }, + .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_w, .maxs } else null, + .unsigned => if (self.hasFeature(.avx2)) .{ .vp_w, .maxu } else null, + }, + else => null, + }, + else => null, + }, + 32 => switch (lhs_ty.vectorLen(mod)) { + 1...4 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx)) .{ .vp_d, .add } else .{ .p_d, .add }, + .sub, + .subwrap, + => if (self.hasFeature(.avx)) .{ .vp_d, .sub } else .{ .p_d, .sub }, + .mul, + .mulwrap, + => if (self.hasFeature(.avx)) + .{ .vp_d, .mull } + else if (self.hasFeature(.sse4_1)) + .{ .p_d, .mull } + else + null, + .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, + .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, + .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_d, .mins } + else if (self.hasFeature(.sse4_1)) + .{ .p_d, .mins } + else + null, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_d, .minu } + else if (self.hasFeature(.sse4_1)) + .{ .p_d, .minu } + else + null, + }, + .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx)) + .{ .vp_d, .maxs } + else if (self.hasFeature(.sse4_1)) + .{ .p_d, .maxs } + else + null, + .unsigned => if (self.hasFeature(.avx)) + .{ .vp_d, .maxu } + else if (self.hasFeature(.sse4_1)) + .{ .p_d, .maxu } + else + null, + }, + else => null, + }, + 5...8 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_d, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_d, .sub } else null, + .mul, + .mulwrap, + => if (self.hasFeature(.avx2)) .{ .vp_d, .mull } else null, + .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, + .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, + .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, + .min => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .mins } else null, + .unsigned => if (self.hasFeature(.avx)) .{ .vp_d, .minu } else null, + }, + .max => switch (lhs_ty.childType(mod).intInfo(mod).signedness) { + .signed => if (self.hasFeature(.avx2)) .{ .vp_d, .maxs } else null, + .unsigned => if (self.hasFeature(.avx2)) .{ .vp_d, .maxu } else null, + }, + else => null, + }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen(mod)) { + 1...2 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx)) .{ .vp_q, .add } else .{ .p_q, .add }, + .sub, + .subwrap, + => if (self.hasFeature(.avx)) .{ .vp_q, .sub } else .{ .p_q, .sub }, + .bit_and => if (self.hasFeature(.avx)) .{ .vp_, .@"and" } else .{ .p_, .@"and" }, + .bit_or => if (self.hasFeature(.avx)) .{ .vp_, .@"or" } else .{ .p_, .@"or" }, + .xor => if (self.hasFeature(.avx)) .{ .vp_, .xor } else .{ .p_, .xor }, + else => null, + }, + 3...4 => switch (air_tag) { + .add, + .addwrap, + => if (self.hasFeature(.avx2)) .{ .vp_q, .add } else null, + .sub, + .subwrap, + => if (self.hasFeature(.avx2)) .{ .vp_q, .sub } else null, + .bit_and => if (self.hasFeature(.avx2)) .{ .vp_, .@"and" } else null, + .bit_or => if (self.hasFeature(.avx2)) .{ .vp_, .@"or" } else null, + .xor => if (self.hasFeature(.avx2)) .{ .vp_, .xor } else null, + else => null, + }, + else => null, + }, + else => null, + }, + .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) switch (lhs_ty.vectorLen(mod)) { + 1 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .{ .vp_w, .insr }, + dst_reg, + dst_reg, + src_mcv.mem(.word), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .{ .vp_, .unpcklwd }, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .{ .v_ss, .add }, + .sub => .{ .v_ss, .sub }, + .mul => .{ .v_ss, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ss, .div }, + .max => .{ .v_ss, .max }, + .min => .{ .v_ss, .max }, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + 2 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + if (src_mcv.isMemory()) try self.asmRegisterMemoryImmediate( + .{ .vp_d, .insr }, + dst_reg, + src_mcv.mem(.dword), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .{ .v_ps, .unpckl }, + dst_reg, + dst_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); + try self.asmRegisterRegisterRegister( + .{ .v_ps, .movhl }, + tmp_reg, + dst_reg, + dst_reg, + ); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .mul => .{ .v_ps, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + 3...4 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_ps, .cvtph2 }, + tmp_reg, + src_mcv.mem(.qword), + ) else try self.asmRegisterRegister( + .{ .v_ps, .cvtph2 }, + tmp_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .mul => .{ .v_ps, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, + else => unreachable, + }, + dst_reg, + dst_reg, + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg, + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + 5...8 => { + const tmp_reg = (try self.register_manager.allocReg(null, sse)).to256(); + const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); + defer self.register_manager.unlockReg(tmp_lock); + + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_ps, .cvtph2 }, + tmp_reg, + src_mcv.mem(.xword), + ) else try self.asmRegisterRegister( + .{ .v_ps, .cvtph2 }, + tmp_reg, + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), + ); + try self.asmRegisterRegisterRegister( + switch (air_tag) { + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .mul => .{ .v_ps, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .max }, + else => unreachable, + }, + dst_reg.to256(), + dst_reg.to256(), + tmp_reg, + ); + try self.asmRegisterRegisterImmediate( + .{ .v_, .cvtps2ph }, + dst_reg, + dst_reg.to256(), + Immediate.u(0b1_00), + ); + return dst_mcv; + }, + else => null, + } else null, + 32 => switch (lhs_ty.vectorLen(mod)) { + 1 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_ss, .add } else .{ ._ss, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ss, .sub } else .{ ._ss, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ss, .mul } else .{ ._ss, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_ss, .div } else .{ ._ss, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ss, .max } else .{ ._ss, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ss, .min } else .{ ._ss, .min }, + else => unreachable, + }, + 2...4 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_ps, .add } else .{ ._ps, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_ps, .sub } else .{ ._ps, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_ps, .mul } else .{ ._ps, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_ps, .div } else .{ ._ps, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_ps, .max } else .{ ._ps, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_ps, .min } else .{ ._ps, .min }, + else => unreachable, + }, + 5...8 => if (self.hasFeature(.avx)) switch (air_tag) { + .add => .{ .v_ps, .add }, + .sub => .{ .v_ps, .sub }, + .mul => .{ .v_ps, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_ps, .div }, + .max => .{ .v_ps, .max }, + .min => .{ .v_ps, .min }, + else => unreachable, + } else null, + else => null, }, - else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }), - }, lhs_ty, dst_mcv, src_mcv), - else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }), + 64 => switch (lhs_ty.vectorLen(mod)) { + 1 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_sd, .add } else .{ ._sd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_sd, .sub } else .{ ._sd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_sd, .mul } else .{ ._sd, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_sd, .div } else .{ ._sd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_sd, .max } else .{ ._sd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_sd, .min } else .{ ._sd, .min }, + else => unreachable, + }, + 2 => switch (air_tag) { + .add => if (self.hasFeature(.avx)) .{ .v_pd, .add } else .{ ._pd, .add }, + .sub => if (self.hasFeature(.avx)) .{ .v_pd, .sub } else .{ ._pd, .sub }, + .mul => if (self.hasFeature(.avx)) .{ .v_pd, .mul } else .{ ._pd, .mul }, + .div_float, + .div_trunc, + .div_floor, + .div_exact, + => if (self.hasFeature(.avx)) .{ .v_pd, .div } else .{ ._pd, .div }, + .max => if (self.hasFeature(.avx)) .{ .v_pd, .max } else .{ ._pd, .max }, + .min => if (self.hasFeature(.avx)) .{ .v_pd, .min } else .{ ._pd, .min }, + else => unreachable, + }, + 3...4 => if (self.hasFeature(.avx)) switch (air_tag) { + .add => .{ .v_pd, .add }, + .sub => .{ .v_pd, .sub }, + .mul => .{ .v_pd, .mul }, + .div_float, .div_trunc, .div_floor, .div_exact => .{ .v_pd, .div }, + .max => .{ .v_pd, .max }, + .min => .{ .v_pd, .min }, + else => unreachable, + } else null, + else => null, + }, + 80, 128 => null, + else => unreachable, + }, }, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }); + + const lhs_copy_reg = if (maybe_mask_reg) |_| registerAlias( + if (copied_to_dst) try self.copyToTmpRegister(lhs_ty, dst_mcv) else lhs_mcv.getReg().?, + abi_size, + ) else null; + const lhs_copy_lock = if (lhs_copy_reg) |reg| self.register_manager.lockReg(reg) else null; + defer if (lhs_copy_lock) |lock| self.register_manager.unlockReg(lock); + + if (self.hasFeature(.avx)) { + const lhs_reg = + if (copied_to_dst) dst_reg else registerAlias(lhs_mcv.getReg().?, abi_size); + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( + mir_tag, + dst_reg, + lhs_reg, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegisterRegister( + mir_tag, + dst_reg, + lhs_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), + ); + } else { + assert(copied_to_dst); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + mir_tag, + dst_reg, + src_mcv.mem(Memory.PtrSize.fromSize(abi_size)), + ) else try self.asmRegisterRegister( + mir_tag, + dst_reg, + registerAlias(if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(rhs_ty, src_mcv), abi_size), + ); + } + switch (air_tag) { + .add, .addwrap, .sub, .subwrap, .mul, .mulwrap, .div_float, .div_exact => {}, + .div_trunc, .div_floor => if (self.hasFeature(.sse4_1)) try self.genRound( + lhs_ty, + dst_reg, + .{ .register = dst_reg }, + switch (air_tag) { + .div_trunc => 0b1_0_11, + .div_floor => 0b1_0_01, + else => unreachable, + }, + ) else return self.fail("TODO implement genBinOp for {s} {} without sse4_1 feature", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + .bit_and, .bit_or, .xor => {}, + .max, .min => if (maybe_mask_reg) |mask_reg| if (self.hasFeature(.avx)) { + const rhs_copy_reg = registerAlias(src_mcv.getReg().?, abi_size); + + try self.asmRegisterRegisterRegisterImmediate( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ .v_ss, .cmp }, + 64 => .{ .v_sd, .cmp }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen(mod)) { + 1 => .{ .v_ss, .cmp }, + 2...8 => .{ .v_ps, .cmp }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen(mod)) { + 1 => .{ .v_sd, .cmp }, + 2...4 => .{ .v_pd, .cmp }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + mask_reg, + rhs_copy_reg, + rhs_copy_reg, + Immediate.u(3), // unord + ); + try self.asmRegisterRegisterRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ .v_ps, .blendv }, + 64 => .{ .v_pd, .blendv }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen(mod)) { + 1...8 => .{ .v_ps, .blendv }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen(mod)) { + 1...4 => .{ .v_pd, .blendv }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + dst_reg, + dst_reg, + lhs_copy_reg.?, + mask_reg, + ); + } else { + const has_blend = self.hasFeature(.sse4_1); + try self.asmRegisterRegisterImmediate( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ ._ss, .cmp }, + 64 => .{ ._sd, .cmp }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen(mod)) { + 1 => .{ ._ss, .cmp }, + 2...4 => .{ ._ps, .cmp }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen(mod)) { + 1 => .{ ._sd, .cmp }, + 2 => .{ ._pd, .cmp }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + mask_reg, + mask_reg, + Immediate.u(if (has_blend) 3 else 7), // unord, ord + ); + if (has_blend) try self.asmRegisterRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ ._ps, .blendv }, + 64 => .{ ._pd, .blendv }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen(mod)) { + 1...4 => .{ ._ps, .blendv }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen(mod)) { + 1...2 => .{ ._pd, .blendv }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + dst_reg, + lhs_copy_reg.?, + mask_reg, + ) else { + try self.asmRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ ._ps, .@"and" }, + 64 => .{ ._pd, .@"and" }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen(mod)) { + 1...4 => .{ ._ps, .@"and" }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen(mod)) { + 1...2 => .{ ._pd, .@"and" }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + dst_reg, + mask_reg, + ); + try self.asmRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ ._ps, .andn }, + 64 => .{ ._pd, .andn }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen(mod)) { + 1...4 => .{ ._ps, .andn }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen(mod)) { + 1...2 => .{ ._pd, .andn }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + mask_reg, + lhs_copy_reg.?, + ); + try self.asmRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (lhs_ty.zigTypeTag(mod)) { + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => .{ ._ps, .@"or" }, + 64 => .{ ._pd, .@"or" }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (lhs_ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (lhs_ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (lhs_ty.vectorLen(mod)) { + 1...4 => .{ ._ps, .@"or" }, + else => null, + }, + 64 => switch (lhs_ty.vectorLen(mod)) { + 1...2 => .{ ._pd, .@"or" }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + })) |tag| tag else return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(air_tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + dst_reg, + mask_reg, + ); + } + }, else => unreachable, } + return dst_mcv; } -fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) !void { - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); +fn genBinOpMir( + self: *Self, + mir_tag: Mir.Inst.FixedTag, + ty: Type, + dst_mcv: MCValue, + src_mcv: MCValue, +) !void { + const mod = self.bin_file.options.module.?; + const abi_size = @intCast(u32, ty.abiSize(mod)); switch (dst_mcv) { .none, .unreach, .dead, .undef, .immediate, - .register_offset, .eflags, .register_overflow, .lea_direct, @@ -5341,7 +7444,9 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s .lea_frame, .reserved_frame, => unreachable, // unmodifiable destination - .register => |dst_reg| { + .register, .register_offset => { + assert(dst_mcv.isRegister()); + const dst_reg = dst_mcv.getReg().?; const dst_alias = registerAlias(dst_reg, abi_size); switch (src_mcv) { .none, @@ -5351,21 +7456,11 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s .register_overflow, .reserved_frame, => unreachable, - .register => |src_reg| switch (ty.zigTypeTag()) { - .Float => { - if (!Target.x86.featureSetHas(self.target.cpu.features, .sse)) - return self.fail("TODO genBinOpMir for {s} {} without sse", .{ - @tagName(mir_tag), - ty.fmt(self.bin_file.options.module.?), - }); - return self.asmRegisterRegister(mir_tag, dst_reg.to128(), src_reg.to128()); - }, - else => try self.asmRegisterRegister( - mir_tag, - dst_alias, - registerAlias(src_reg, abi_size), - ), - }, + .register => |src_reg| try self.asmRegisterRegister( + mir_tag, + dst_alias, + registerAlias(src_reg, abi_size), + ), .immediate => |imm| switch (self.regBitSize(ty)) { 8 => try self.asmRegisterImmediate( mir_tag, @@ -5400,44 +7495,74 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s )), else => unreachable, }, - .register_offset, .eflags, + .register_offset, .memory, + .indirect, .load_direct, .lea_direct, .load_got, .lea_got, .load_tlv, .lea_tlv, + .load_frame, .lea_frame, => { - assert(abi_size <= 8); + blk: { + return self.asmRegisterMemory( + mir_tag, + registerAlias(dst_reg, abi_size), + Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { + .memory => |addr| .{ + .base = .{ .reg = .ds }, + .disp = math.cast(i32, addr) orelse break :blk, + }, + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }, + else => break :blk, + }), + ); + } + const dst_reg_lock = self.register_manager.lockReg(dst_reg); defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock); - const reg = try self.copyToTmpRegister(ty, src_mcv); - return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg }); - }, - .indirect, .load_frame => try self.asmRegisterMemory( - mir_tag, - registerAlias(dst_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { - .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, - .disp = reg_off.off, + switch (src_mcv) { + .eflags, + .register_offset, + .lea_direct, + .lea_got, + .lea_tlv, + .lea_frame, + => { + const reg = try self.copyToTmpRegister(ty, src_mcv); + return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg }); }, - .load_frame => |frame_addr| .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, + .memory, + .load_direct, + .load_got, + .load_tlv, + => { + const ptr_ty = try mod.singleConstPtrType(ty); + const addr_reg = try self.copyToTmpRegister(ptr_ty, src_mcv.address()); + return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ + .indirect = .{ .reg = addr_reg }, + }); }, else => unreachable, - }), - ), + } + }, } }, .memory, .indirect, .load_got, .load_direct, .load_tlv, .load_frame => { const OpInfo = ?struct { addr_reg: Register, addr_lock: RegisterLock }; - const limb_abi_size = @min(abi_size, 8); + const limb_abi_size: u32 = @min(abi_size, 8); const dst_info: OpInfo = switch (dst_mcv) { else => unreachable, @@ -5498,21 +7623,21 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s defer if (src_info) |info| self.register_manager.unlockReg(info.addr_lock); const ty_signedness = - if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned; + if (ty.isAbiInt(mod)) ty.intInfo(mod).signedness else .unsigned; const limb_ty = if (abi_size <= 8) ty else switch (ty_signedness) { .signed => Type.usize, .unsigned => Type.isize, }; var off: i32 = 0; while (off < abi_size) : (off += 8) { - const mir_limb_tag = switch (off) { + const mir_limb_tag: Mir.Inst.FixedTag = switch (off) { 0 => mir_tag, - else => switch (mir_tag) { - .add => .adc, - .sub, .cmp => .sbb, + else => switch (mir_tag[1]) { + .add => .{ ._, .adc }, + .sub, .cmp => .{ ._, .sbb }, .@"or", .@"and", .xor => mir_tag, else => return self.fail("TODO genBinOpMir implement large ABI for {s}", .{ - @tagName(mir_tag), + @tagName(mir_tag[1]), }), }, }; @@ -5654,7 +7779,8 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s /// Performs multi-operand integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. /// Does not support byte-size operands. fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { - const abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); + const mod = self.bin_file.options.module.?; + const abi_size = @intCast(u32, dst_ty.abiSize(mod)); switch (dst_mcv) { .none, .unreach, @@ -5684,14 +7810,14 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M .reserved_frame, => unreachable, .register => |src_reg| try self.asmRegisterRegister( - .imul, + .{ .i_, .mul }, dst_alias, registerAlias(src_reg, abi_size), ), .immediate => |imm| { if (math.cast(i32, imm)) |small| { try self.asmRegisterRegisterImmediate( - .imul, + .{ .i_, .mul }, dst_alias, dst_alias, Immediate.s(small), @@ -5711,19 +7837,19 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M .lea_tlv, .lea_frame, => try self.asmRegisterRegister( - .imul, + .{ .i_, .mul }, dst_alias, registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size), ), .memory, .indirect, .load_frame => try self.asmRegisterMemory( - .imul, + .{ .i_, .mul }, dst_alias, Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { .memory => |addr| .{ .base = .{ .reg = .ds }, .disp = math.cast(i32, @bitCast(i64, addr)) orelse return self.asmRegisterRegister( - .imul, + .{ .i_, .mul }, dst_alias, registerAlias(try self.copyToTmpRegister(dst_ty, src_mcv), abi_size), ), @@ -5754,6 +7880,7 @@ fn genIntMulComplexOpMir(self: *Self, dst_ty: Type, dst_mcv: MCValue, src_mcv: M } fn airArg(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; // skip zero-bit arguments as they don't have a corresponding arg instruction var arg_index = self.arg_index; while (self.args[arg_index] == .none) arg_index += 1; @@ -5767,9 +7894,9 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { else => return self.fail("TODO implement arg for {}", .{dst_mcv}), } - const ty = self.air.typeOfIndex(inst); + const ty = self.typeOfIndex(inst); const src_index = self.air.instructions.items(.data)[inst].arg.src_index; - const name = self.mod_fn.getParamName(self.bin_file.options.module.?, src_index); + const name = self.owner.mod_fn.getParamName(mod, src_index); try self.genArgDbgInfo(ty, name, dst_mcv); break :result dst_mcv; @@ -5778,22 +7905,26 @@ fn airArg(self: *Self, inst: Air.Inst.Index) !void { } fn genArgDbgInfo(self: Self, ty: Type, name: [:0]const u8, mcv: MCValue) !void { + const mod = self.bin_file.options.module.?; switch (self.debug_output) { .dwarf => |dw| { const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) { - .register => |reg| .{ .register = reg.dwarfLocOp() }, + .register => |reg| .{ .register = reg.dwarfNum() }, // TODO use a frame index .load_frame => return, //.stack_offset => |off| .{ // .stack = .{ // // TODO handle -fomit-frame-pointer - // .fp_register = Register.rbp.dwarfLocOpDeref(), + // .fp_register = Register.rbp.dwarfNum(), // .offset = -off, // }, //}, else => unreachable, // not a valid function parameter }; - try dw.genArgDbgInfo(name, ty, self.mod_fn.owner_decl, loc); + // TODO: this might need adjusting like the linkers do. + // Instead of flattening the owner and passing Decl.Index here we may + // want to special case LazySymbol in DWARF linker too. + try dw.genArgDbgInfo(name, ty, self.owner.getDecl(mod), loc); }, .plan9 => {}, .none => {}, @@ -5807,6 +7938,7 @@ fn genVarDbgInfo( mcv: MCValue, name: [:0]const u8, ) !void { + const mod = self.bin_file.options.module.?; const is_ptr = switch (tag) { .dbg_var_ptr => true, .dbg_var_val => false, @@ -5816,11 +7948,11 @@ fn genVarDbgInfo( switch (self.debug_output) { .dwarf => |dw| { const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) { - .register => |reg| .{ .register = reg.dwarfLocOp() }, + .register => |reg| .{ .register = reg.dwarfNum() }, // TODO use a frame index .load_frame, .lea_frame => return, //=> |off| .{ .stack = .{ - // .fp_register = Register.rbp.dwarfLocOpDeref(), + // .fp_register = Register.rbp.dwarfNum(), // .offset = -off, //} }, .memory => |address| .{ .memory = address }, @@ -5834,7 +7966,10 @@ fn genVarDbgInfo( break :blk .nop; }, }; - try dw.genVarDbgInfo(name, ty, self.mod_fn.owner_decl, is_ptr, loc); + // TODO: this might need adjusting like the linkers do. + // Instead of flattening the owner and passing Decl.Index here we may + // want to special case LazySymbol in DWARF linker too. + try dw.genVarDbgInfo(name, ty, self.owner.getDecl(mod), is_ptr, loc); }, .plan9 => {}, .none => {}, @@ -5842,12 +7977,12 @@ fn genVarDbgInfo( } fn airTrap(self: *Self) !void { - try self.asmOpOnly(.ud2); + try self.asmOpOnly(.{ ._, .ud2 }); return self.finishAirBookkeeping(); } fn airBreakpoint(self: *Self) !void { - try self.asmOpOnly(.int3); + try self.asmOpOnly(.{ ._, .int3 }); return self.finishAirBookkeeping(); } @@ -5868,26 +8003,29 @@ fn airFence(self: *Self, inst: Air.Inst.Index) !void { switch (order) { .Unordered, .Monotonic => unreachable, .Acquire, .Release, .AcqRel => {}, - .SeqCst => try self.asmOpOnly(.mfence), + .SeqCst => try self.asmOpOnly(.{ ._, .mfence }), } return self.finishAirBookkeeping(); } fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier) !void { + const mod = self.bin_file.options.module.?; if (modifier == .always_tail) return self.fail("TODO implement tail calls for x86_64", .{}); const pl_op = self.air.instructions.items(.data)[inst].pl_op; const callee = pl_op.operand; const extra = self.air.extraData(Air.Call, pl_op.payload); const args = @ptrCast([]const Air.Inst.Ref, self.air.extra[extra.end..][0..extra.data.args_len]); - const ty = self.air.typeOf(callee); + const ty = self.typeOf(callee); - const fn_ty = switch (ty.zigTypeTag()) { + const fn_ty = switch (ty.zigTypeTag(mod)) { .Fn => ty, - .Pointer => ty.childType(), + .Pointer => ty.childType(mod), else => unreachable, }; - var info = try self.resolveCallingConventionValues(fn_ty, args[fn_ty.fnParamLen()..], .call_frame); + const fn_info = mod.typeToFunc(fn_ty).?; + + var info = try self.resolveCallingConventionValues(fn_info, args[fn_info.param_types.len..], .call_frame); defer info.deinit(self); // We need a properly aligned and sized call frame to be able to call this function. @@ -5914,7 +8052,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier else => unreachable, } for (args, info.args) |arg, mc_arg| { - const arg_ty = self.air.typeOf(arg); + const arg_ty = self.typeOf(arg); const arg_mcv = try self.resolveInst(arg); switch (mc_arg) { .none => {}, @@ -5928,8 +8066,8 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier const ret_lock = switch (info.return_value.long) { .none, .unreach => null, .indirect => |reg_off| lock: { - const ret_ty = fn_ty.fnReturnType(); - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ret_ty, self.target.*)); + const ret_ty = fn_info.return_type.toType(); + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(ret_ty, mod)); try self.genSetReg(reg_off.reg, Type.usize, .{ .lea_frame = .{ .index = frame_index, .off = -reg_off.off }, }); @@ -5941,7 +8079,7 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier defer if (ret_lock) |lock| self.register_manager.unlockReg(lock); for (args, info.args) |arg, mc_arg| { - const arg_ty = self.air.typeOf(arg); + const arg_ty = self.typeOf(arg); const arg_mcv = try self.resolveInst(arg); switch (mc_arg) { .none, .load_frame => {}, @@ -5952,65 +8090,68 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier // Due to incremental compilation, how function calls are generated depends // on linking. - const mod = self.bin_file.options.module.?; - if (self.air.value(callee)) |func_value| { - if (func_value.castTag(.function)) |func_payload| { - const func = func_payload.data; - + if (try self.air.value(callee, mod)) |func_value| { + const func_key = mod.intern_pool.indexToKey(func_value.ip_index); + if (switch (func_key) { + .func => |func| mod.funcPtr(func.index).owner_decl, + .ptr => |ptr| switch (ptr.addr) { + .decl => |decl| decl, + else => null, + }, + else => null, + }) |owner_decl| { if (self.bin_file.cast(link.File.Elf)) |elf_file| { - const atom_index = try elf_file.getOrCreateAtomForDecl(func.owner_decl); + const atom_index = try elf_file.getOrCreateAtomForDecl(owner_decl); const atom = elf_file.getAtom(atom_index); _ = try atom.getOrCreateOffsetTableEntry(elf_file); const got_addr = atom.getOffsetTableAddress(elf_file); - try self.asmMemory(.call, Memory.sib(.qword, .{ + try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr), })); - } else if (self.bin_file.cast(link.File.Coff)) |_| { - const sym_index = try self.getSymbolIndexForDecl(func.owner_decl); + } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { + const atom = try coff_file.getOrCreateAtomForDecl(owner_decl); + const sym_index = coff_file.getAtom(atom).getSymbolIndex().?; try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); - try self.asmRegister(.call, .rax); - } else if (self.bin_file.cast(link.File.MachO)) |_| { - const sym_index = try self.getSymbolIndexForDecl(func.owner_decl); + try self.asmRegister(.{ ._, .call }, .rax); + } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { + const atom = try macho_file.getOrCreateAtomForDecl(owner_decl); + const sym_index = macho_file.getAtom(atom).getSymbolIndex().?; try self.genSetReg(.rax, Type.usize, .{ .lea_got = sym_index }); - try self.asmRegister(.call, .rax); + try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.Plan9)) |p9| { - const decl_block_index = try p9.seeDecl(func.owner_decl); - const decl_block = p9.getDeclBlock(decl_block_index); - const ptr_bits = self.target.cpu.arch.ptrBitWidth(); - const ptr_bytes: u64 = @divExact(ptr_bits, 8); - const got_addr = p9.bases.data; - const got_index = decl_block.got_index.?; - const fn_got_addr = got_addr + got_index * ptr_bytes; - try self.asmMemory(.call, Memory.sib(.qword, .{ + const atom_index = try p9.seeDecl(owner_decl); + const atom = p9.getAtom(atom_index); + try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .ds }, - .disp = @intCast(i32, fn_got_addr), + .disp = @intCast(i32, atom.getOffsetTableAddress(p9)), })); } else unreachable; - } else if (func_value.castTag(.extern_fn)) |func_payload| { - const extern_fn = func_payload.data; - const decl_name = mem.sliceTo(mod.declPtr(extern_fn.owner_decl).name, 0); - const lib_name = mem.sliceTo(extern_fn.lib_name, 0); + } else if (func_value.getExternFunc(mod)) |extern_func| { + const decl_name = mod.intern_pool.stringToSlice(mod.declPtr(extern_func.decl).name); + const lib_name = mod.intern_pool.stringToSliceUnwrap(extern_func.lib_name); if (self.bin_file.cast(link.File.Coff)) |coff_file| { - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); + const atom_index = try self.owner.getSymbolIndex(self); const sym_index = try coff_file.getGlobalSymbol(decl_name, lib_name); _ = try self.addInst(.{ - .tag = .mov_linker, + .tag = .mov, .ops = .import_reloc, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(Register.rax), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .data = .{ .rx = .{ + .r1 = .rax, + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, }); - try self.asmRegister(.call, .rax); + try self.asmRegister(.{ ._, .call }, .rax); } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { + const atom_index = try self.owner.getSymbolIndex(self); const sym_index = try macho_file.getGlobalSymbol(decl_name, lib_name); - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); _ = try self.addInst(.{ - .tag = .call_extern, - .ops = undefined, - .data = .{ .relocation = .{ + .tag = .call, + .ops = .extern_fn_reloc, + .data = .{ .reloc = .{ .atom_index = atom_index, .sym_index = sym_index, } }, @@ -6022,10 +8163,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier return self.fail("TODO implement calling bitcasted functions", .{}); } } else { - assert(ty.zigTypeTag() == .Pointer); + assert(ty.zigTypeTag(mod) == .Pointer); const mcv = try self.resolveInst(callee); try self.genSetReg(.rax, Type.usize, mcv); - try self.asmRegister(.call, .rax); + try self.asmRegister(.{ ._, .call }, .rax); } var bt = self.liveness.iterateBigTomb(inst); @@ -6037,9 +8178,10 @@ fn airCall(self: *Self, inst: Air.Inst.Index, modifier: std.builtin.CallModifier } fn airRet(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const operand = try self.resolveInst(un_op); - const ret_ty = self.fn_type.fnReturnType(); + const ret_ty = self.fn_type.fnReturnType(mod); switch (self.ret_mcv.short) { .none => {}, .register => try self.genCopy(ret_ty, self.ret_mcv.short, operand), @@ -6063,7 +8205,7 @@ fn airRet(self: *Self, inst: Air.Inst.Index) !void { fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const ptr = try self.resolveInst(un_op); - const ptr_ty = self.air.typeOf(un_op); + const ptr_ty = self.typeOf(un_op); switch (self.ret_mcv.short) { .none => {}, .register => try self.load(self.ret_mcv.short, ptr_ty, ptr), @@ -6078,10 +8220,9 @@ fn airRetLoad(self: *Self, inst: Air.Inst.Index) !void { } fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const ty = self.air.typeOf(bin_op.lhs); - const ty_abi_size = ty.abiSize(self.target.*); - const can_reuse = ty_abi_size <= 8; + const ty = self.typeOf(bin_op.lhs); try self.spillEflagsIfOccupied(); self.eflags_inst = inst; @@ -6100,37 +8241,184 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { }; defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock); - const dst_mem_ok = !ty.isRuntimeFloat(); - var flipped = false; - const dst_mcv: MCValue = if (can_reuse and !lhs_mcv.isImmediate() and - (dst_mem_ok or lhs_mcv.isRegister()) and self.liveness.operandDies(inst, 0)) - lhs_mcv - else if (can_reuse and !rhs_mcv.isImmediate() and - (dst_mem_ok or rhs_mcv.isRegister()) and self.liveness.operandDies(inst, 1)) - dst: { - flipped = true; - break :dst rhs_mcv; - } else if (dst_mem_ok) dst: { - const dst_mcv = try self.allocTempRegOrMem(ty, true); - try self.genCopy(ty, dst_mcv, lhs_mcv); - break :dst dst_mcv; - } else .{ .register = try self.copyToTmpRegister(ty, lhs_mcv) }; - const dst_lock = switch (dst_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - else => null, - }; - defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const result = MCValue{ + .eflags = switch (ty.zigTypeTag(mod)) { + else => result: { + const abi_size = @intCast(u16, ty.abiSize(mod)); + const may_flip: enum { + may_flip, + must_flip, + must_not_flip, + } = if (abi_size > 8) switch (op) { + .lt, .gte => .must_not_flip, + .lte, .gt => .must_flip, + .eq, .neq => .may_flip, + } else .may_flip; + + const flipped = switch (may_flip) { + .may_flip => !lhs_mcv.isRegister() and !lhs_mcv.isMemory(), + .must_flip => true, + .must_not_flip => false, + }; + const unmat_dst_mcv = if (flipped) rhs_mcv else lhs_mcv; + const dst_mcv = if (unmat_dst_mcv.isRegister() or + (abi_size <= 8 and unmat_dst_mcv.isMemory())) unmat_dst_mcv else dst: { + const dst_mcv = try self.allocTempRegOrMem(ty, true); + try self.genCopy(ty, dst_mcv, unmat_dst_mcv); + break :dst dst_mcv; + }; + const dst_lock = + if (dst_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + const src_mcv = if (flipped) lhs_mcv else rhs_mcv; + const src_lock = + if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; + defer if (src_lock) |lock| self.register_manager.unlockReg(lock); + + break :result Condition.fromCompareOperator( + if (ty.isAbiInt(mod)) ty.intInfo(mod).signedness else .unsigned, + result_op: { + const flipped_op = if (flipped) op.reverse() else op; + if (abi_size > 8) switch (flipped_op) { + .lt, .gte => {}, + .lte, .gt => unreachable, + .eq, .neq => { + const dst_addr_mcv: MCValue = switch (dst_mcv) { + .memory, .indirect, .load_frame => dst_mcv.address(), + else => .{ .register = try self.copyToTmpRegister( + Type.usize, + dst_mcv.address(), + ) }, + }; + const dst_addr_lock = if (dst_addr_mcv.getReg()) |reg| + self.register_manager.lockReg(reg) + else + null; + defer if (dst_addr_lock) |lock| self.register_manager.unlockReg(lock); + + const src_addr_mcv: MCValue = switch (src_mcv) { + .memory, .indirect, .load_frame => src_mcv.address(), + else => .{ .register = try self.copyToTmpRegister( + Type.usize, + src_mcv.address(), + ) }, + }; + const src_addr_lock = if (src_addr_mcv.getReg()) |reg| + self.register_manager.lockReg(reg) + else + null; + defer if (src_addr_lock) |lock| self.register_manager.unlockReg(lock); + + const regs = try self.register_manager.allocRegs(2, .{ null, null }, gp); + const acc_reg = regs[0].to64(); + const locks = self.register_manager.lockRegsAssumeUnused(2, regs); + defer for (locks) |lock| self.register_manager.unlockReg(lock); + + const limbs_len = std.math.divCeil(u16, abi_size, 8) catch unreachable; + var limb_i: u16 = 0; + while (limb_i < limbs_len) : (limb_i += 1) { + const tmp_reg = regs[@min(limb_i, 1)].to64(); + try self.genSetReg( + tmp_reg, + Type.usize, + dst_addr_mcv.offset(limb_i * 8).deref(), + ); + try self.genBinOpMir( + .{ ._, .xor }, + Type.usize, + .{ .register = tmp_reg }, + src_addr_mcv.offset(limb_i * 8).deref(), + ); + if (limb_i > 0) try self.asmRegisterRegister( + .{ ._, .@"or" }, + acc_reg, + tmp_reg, + ); + } + try self.asmRegisterRegister(.{ ._, .@"test" }, acc_reg, acc_reg); + break :result_op flipped_op; + }, + }; + try self.genBinOpMir(.{ ._, .cmp }, ty, dst_mcv, src_mcv); + break :result_op flipped_op; + }, + ); + }, + .Float => result: { + const flipped = switch (op) { + .lt, .lte => true, + .eq, .gte, .gt, .neq => false, + }; - const src_mcv = if (flipped) lhs_mcv else rhs_mcv; - try self.genBinOpMir(switch (ty.tag()) { - else => .cmp, - .f32 => .ucomiss, - .f64 => .ucomisd, - }, ty, dst_mcv, src_mcv); + const dst_mcv = if (flipped) rhs_mcv else lhs_mcv; + const dst_reg = if (dst_mcv.isRegister()) + dst_mcv.getReg().? + else + try self.copyToTmpRegister(ty, dst_mcv); + const dst_lock = self.register_manager.lockReg(dst_reg); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + const src_mcv = if (flipped) lhs_mcv else rhs_mcv; + + switch (ty.floatBits(self.target.*)) { + 16 => if (self.hasFeature(.f16c)) { + const tmp1_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp1_mcv = MCValue{ .register = tmp1_reg }; + const tmp1_lock = self.register_manager.lockRegAssumeUnused(tmp1_reg); + defer self.register_manager.unlockReg(tmp1_lock); + + const tmp2_reg = (try self.register_manager.allocReg(null, sse)).to128(); + const tmp2_mcv = MCValue{ .register = tmp2_reg }; + const tmp2_lock = self.register_manager.lockRegAssumeUnused(tmp2_reg); + defer self.register_manager.unlockReg(tmp2_lock); + + if (src_mcv.isMemory()) try self.asmRegisterRegisterMemoryImmediate( + .{ .vp_w, .insr }, + tmp1_reg, + dst_reg.to128(), + src_mcv.mem(.word), + Immediate.u(1), + ) else try self.asmRegisterRegisterRegister( + .{ .vp_, .unpcklwd }, + tmp1_reg, + dst_reg.to128(), + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(ty, src_mcv)).to128(), + ); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, tmp1_reg, tmp1_reg); + try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg); + try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv); + } else return self.fail("TODO implement airCmp for {}", .{ + ty.fmt(mod), + }), + 32 => try self.genBinOpMir( + .{ ._ss, .ucomi }, + ty, + .{ .register = dst_reg }, + src_mcv, + ), + 64 => try self.genBinOpMir( + .{ ._sd, .ucomi }, + ty, + .{ .register = dst_reg }, + src_mcv, + ), + else => return self.fail("TODO implement airCmp for {}", .{ + ty.fmt(mod), + }), + } - const signedness = if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned; - const result = MCValue{ - .eflags = Condition.fromCompareOperator(signedness, if (flipped) op.reverse() else op), + break :result switch (if (flipped) op.reverse() else op) { + .lt, .lte => unreachable, // required to have been canonicalized to gt(e) + .gt => .a, + .gte => .ae, + .eq => .z_and_np, + .neq => .nz_or_p, + }; + }, + }, }; return self.finishAir(inst, result, .{ bin_op.lhs, bin_op.rhs, .none }); } @@ -6141,55 +8429,26 @@ fn airCmpVector(self: *Self, inst: Air.Inst.Index) !void { } fn airCmpLtErrorsLen(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const addr_reg = try self.register_manager.allocReg(null, gp); const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_lock); - - if (self.bin_file.cast(link.File.Elf)) |elf_file| { - const atom_index = try elf_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const atom = elf_file.getAtom(atom_index); - _ = try atom.getOrCreateOffsetTableEntry(elf_file); - const got_addr = atom.getOffsetTableAddress(elf_file); - try self.asmRegisterMemory( - .mov, - addr_reg.to64(), - Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }), - ); - } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { - const atom_index = try coff_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); - } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const atom_index = try macho_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); - } else { - return self.fail("TODO implement airCmpLtErrorsLen for x86_64 {s}", .{@tagName(self.bin_file.tag)}); - } + try self.genLazySymbolRef(.lea, addr_reg, link.File.LazySymbol.initDecl(.const_data, null, mod)); try self.spillEflagsIfOccupied(); self.eflags_inst = inst; - const op_ty = self.air.typeOf(un_op); - const op_abi_size = @intCast(u32, op_ty.abiSize(self.target.*)); + const op_ty = self.typeOf(un_op); + const op_abi_size = @intCast(u32, op_ty.abiSize(mod)); const op_mcv = try self.resolveInst(un_op); const dst_reg = switch (op_mcv) { .register => |reg| reg, else => try self.copyToTmpRegister(op_ty, op_mcv), }; try self.asmRegisterMemory( - .cmp, + .{ ._, .cmp }, registerAlias(dst_reg, op_abi_size), Memory.sib(Memory.PtrSize.fromSize(op_abi_size), .{ .base = .{ .reg = addr_reg } }), ); @@ -6201,16 +8460,17 @@ fn airTry(self: *Self, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[inst].pl_op; const extra = self.air.extraData(Air.Try, pl_op.payload); const body = self.air.extra[extra.end..][0..extra.data.body_len]; - const err_union_ty = self.air.typeOf(pl_op.operand); + const err_union_ty = self.typeOf(pl_op.operand); const result = try self.genTry(inst, pl_op.operand, body, err_union_ty, false); return self.finishAir(inst, result, .{ .none, .none, .none }); } fn airTryPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.TryPtr, ty_pl.payload); const body = self.air.extra[extra.end..][0..extra.data.body_len]; - const err_union_ty = self.air.typeOf(extra.data.ptr).childType(); + const err_union_ty = self.typeOf(extra.data.ptr).childType(mod); const result = try self.genTry(inst, extra.data.ptr, body, err_union_ty, true); return self.finishAir(inst, result, .{ .none, .none, .none }); } @@ -6263,8 +8523,8 @@ fn genTry( fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { const dbg_stmt = self.air.instructions.items(.data)[inst].dbg_stmt; _ = try self.addInst(.{ - .tag = .dbg_line, - .ops = undefined, + .tag = .pseudo, + .ops = .pseudo_dbg_line_line_column, .data = .{ .line_column = .{ .line = dbg_stmt.line, .column = dbg_stmt.column, @@ -6274,8 +8534,9 @@ fn airDbgStmt(self: *Self, inst: Air.Inst.Index) !void { } fn airDbgInline(self: *Self, inst: Air.Inst.Index) !void { - const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; - const function = self.air.values[ty_pl.payload].castTag(.function).?.data; + const ty_fn = self.air.instructions.items(.data)[inst].ty_fn; + const mod = self.bin_file.options.module.?; + const function = mod.funcPtr(ty_fn.func); // TODO emit debug info for function change _ = function; return self.finishAir(inst, .unreach, .{ .none, .none, .none }); @@ -6289,7 +8550,7 @@ fn airDbgBlock(self: *Self, inst: Air.Inst.Index) !void { fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[inst].pl_op; const operand = pl_op.operand; - const ty = self.air.typeOf(operand); + const ty = self.typeOf(operand); const mcv = try self.resolveInst(operand); const name = self.air.nullTerminatedString(pl_op.payload); @@ -6301,7 +8562,8 @@ fn airDbgVar(self: *Self, inst: Air.Inst.Index) !void { } fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !u32 { - const abi_size = ty.abiSize(self.target.*); + const mod = self.bin_file.options.module.?; + const abi_size = ty.abiSize(mod); switch (mcv) { .eflags => |cc| { // Here we map the opposites since the jump is to the false branch. @@ -6309,7 +8571,7 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !u32 { }, .register => |reg| { try self.spillEflagsIfOccupied(); - try self.asmRegisterImmediate(.@"test", reg, Immediate.u(1)); + try self.asmRegisterImmediate(.{ ._, .@"test" }, reg, Immediate.u(1)); return self.asmJccReloc(undefined, .e); }, .immediate, @@ -6330,7 +8592,7 @@ fn genCondBrMir(self: *Self, ty: Type, mcv: MCValue) !u32 { fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[inst].pl_op; const cond = try self.resolveInst(pl_op.operand); - const cond_ty = self.air.typeOf(pl_op.operand); + const cond_ty = self.typeOf(pl_op.operand); const extra = self.air.extraData(Air.CondBr, pl_op.payload); const then_body = self.air.extra[extra.end..][0..extra.data.then_body_len]; const else_body = self.air.extra[extra.end + then_body.len ..][0..extra.data.else_body_len]; @@ -6345,35 +8607,26 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { if (Air.refToIndex(pl_op.operand)) |op_inst| self.processDeath(op_inst); } - const outer_state = try self.saveState(); - { - self.scope_generation += 1; - const inner_state = try self.saveState(); + self.scope_generation += 1; + const state = try self.saveState(); - for (liveness_cond_br.then_deaths) |operand| self.processDeath(operand); - try self.genBody(then_body); - try self.restoreState(inner_state, &.{}, .{ - .emit_instructions = false, - .update_tracking = true, - .resurrect = true, - .close_scope = true, - }); + for (liveness_cond_br.then_deaths) |operand| self.processDeath(operand); + try self.genBody(then_body); + try self.restoreState(state, &.{}, .{ + .emit_instructions = false, + .update_tracking = true, + .resurrect = true, + .close_scope = true, + }); - try self.performReloc(reloc); + try self.performReloc(reloc); - for (liveness_cond_br.else_deaths) |operand| self.processDeath(operand); - try self.genBody(else_body); - try self.restoreState(inner_state, &.{}, .{ - .emit_instructions = false, - .update_tracking = true, - .resurrect = true, - .close_scope = true, - }); - } - try self.restoreState(outer_state, &.{}, .{ + for (liveness_cond_br.else_deaths) |operand| self.processDeath(operand); + try self.genBody(else_body); + try self.restoreState(state, &.{}, .{ .emit_instructions = false, - .update_tracking = false, - .resurrect = false, + .update_tracking = true, + .resurrect = true, .close_scope = true, }); @@ -6383,6 +8636,7 @@ fn airCondBr(self: *Self, inst: Air.Inst.Index) !void { } fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MCValue { + const mod = self.bin_file.options.module.?; switch (opt_mcv) { .register_overflow => |ro| return .{ .eflags = ro.eflags.negate() }, else => {}, @@ -6391,14 +8645,12 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC try self.spillEflagsIfOccupied(); self.eflags_inst = inst; - var pl_buf: Type.Payload.ElemType = undefined; - const pl_ty = opt_ty.optionalChild(&pl_buf); + const pl_ty = opt_ty.optionalChild(mod); - var ptr_buf: Type.SlicePtrFieldTypeBuffer = undefined; - const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload()) - .{ .off = 0, .ty = if (pl_ty.isSlice()) pl_ty.slicePtrFieldType(&ptr_buf) else pl_ty } + const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload(mod)) + .{ .off = 0, .ty = if (pl_ty.isSlice(mod)) pl_ty.slicePtrFieldType(mod) else pl_ty } else - .{ .off = @intCast(i32, pl_ty.abiSize(self.target.*)), .ty = Type.bool }; + .{ .off = @intCast(i32, pl_ty.abiSize(mod)), .ty = Type.bool }; switch (opt_mcv) { .none, @@ -6418,16 +8670,16 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC .register => |opt_reg| { if (some_info.off == 0) { - const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); + const some_abi_size = @intCast(u32, some_info.ty.abiSize(mod)); const alias_reg = registerAlias(opt_reg, some_abi_size); assert(some_abi_size * 8 == alias_reg.bitSize()); - try self.asmRegisterRegister(.@"test", alias_reg, alias_reg); + try self.asmRegisterRegister(.{ ._, .@"test" }, alias_reg, alias_reg); return .{ .eflags = .z }; } - assert(some_info.ty.tag() == .bool); - const opt_abi_size = @intCast(u32, opt_ty.abiSize(self.target.*)); + assert(some_info.ty.ip_index == .bool_type); + const opt_abi_size = @intCast(u32, opt_ty.abiSize(mod)); try self.asmRegisterImmediate( - .bt, + .{ ._, .bt }, registerAlias(opt_reg, opt_abi_size), Immediate.u(@intCast(u6, some_info.off * 8)), ); @@ -6444,9 +8696,9 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC defer self.register_manager.unlockReg(addr_reg_lock); try self.genSetReg(addr_reg, Type.usize, opt_mcv.address()); - const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); + const some_abi_size = @intCast(u32, some_info.ty.abiSize(mod)); try self.asmMemoryImmediate( - .cmp, + .{ ._, .cmp }, Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{ .base = .{ .reg = addr_reg }, .disp = some_info.off, @@ -6457,9 +8709,9 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC }, .indirect, .load_frame => { - const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); + const some_abi_size = @intCast(u32, some_info.ty.abiSize(mod)); try self.asmMemoryImmediate( - .cmp, + .{ ._, .cmp }, Memory.sib(Memory.PtrSize.fromSize(some_abi_size), switch (opt_mcv) { .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, @@ -6479,18 +8731,17 @@ fn isNull(self: *Self, inst: Air.Inst.Index, opt_ty: Type, opt_mcv: MCValue) !MC } fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) !MCValue { + const mod = self.bin_file.options.module.?; try self.spillEflagsIfOccupied(); self.eflags_inst = inst; - const opt_ty = ptr_ty.childType(); - var pl_buf: Type.Payload.ElemType = undefined; - const pl_ty = opt_ty.optionalChild(&pl_buf); + const opt_ty = ptr_ty.childType(mod); + const pl_ty = opt_ty.optionalChild(mod); - var ptr_buf: Type.SlicePtrFieldTypeBuffer = undefined; - const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload()) - .{ .off = 0, .ty = if (pl_ty.isSlice()) pl_ty.slicePtrFieldType(&ptr_buf) else pl_ty } + const some_info: struct { off: i32, ty: Type } = if (opt_ty.optionalReprIsPayload(mod)) + .{ .off = 0, .ty = if (pl_ty.isSlice(mod)) pl_ty.slicePtrFieldType(mod) else pl_ty } else - .{ .off = @intCast(i32, pl_ty.abiSize(self.target.*)), .ty = Type.bool }; + .{ .off = @intCast(i32, pl_ty.abiSize(mod)), .ty = Type.bool }; const ptr_reg = switch (ptr_mcv) { .register => |reg| reg, @@ -6499,9 +8750,9 @@ fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) const ptr_lock = self.register_manager.lockReg(ptr_reg); defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); - const some_abi_size = @intCast(u32, some_info.ty.abiSize(self.target.*)); + const some_abi_size = @intCast(u32, some_info.ty.abiSize(mod)); try self.asmMemoryImmediate( - .cmp, + .{ ._, .cmp }, Memory.sib(Memory.PtrSize.fromSize(some_abi_size), .{ .base = .{ .reg = ptr_reg }, .disp = some_info.off, @@ -6512,9 +8763,10 @@ fn isNullPtr(self: *Self, inst: Air.Inst.Index, ptr_ty: Type, ptr_mcv: MCValue) } fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) !MCValue { - const err_type = ty.errorUnionSet(); + const mod = self.bin_file.options.module.?; + const err_type = ty.errorUnionSet(mod); - if (err_type.errorSetIsEmpty()) { + if (err_type.errorSetIsEmpty(mod)) { return MCValue{ .immediate = 0 }; // always false } @@ -6523,7 +8775,7 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) ! self.eflags_inst = inst; } - const err_off = errUnionErrorOffset(ty.errorUnionPayload(), self.target.*); + const err_off = errUnionErrorOffset(ty.errorUnionPayload(mod), mod); switch (operand) { .register => |reg| { const eu_lock = self.register_manager.lockReg(reg); @@ -6532,14 +8784,24 @@ fn isErr(self: *Self, maybe_inst: ?Air.Inst.Index, ty: Type, operand: MCValue) ! const tmp_reg = try self.copyToTmpRegister(ty, operand); if (err_off > 0) { const shift = @intCast(u6, err_off * 8); - try self.genShiftBinOpMir(.shr, ty, .{ .register = tmp_reg }, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._r, .sh }, + ty, + .{ .register = tmp_reg }, + .{ .immediate = shift }, + ); } else { try self.truncateRegister(Type.anyerror, tmp_reg); } - try self.genBinOpMir(.cmp, Type.anyerror, .{ .register = tmp_reg }, .{ .immediate = 0 }); + try self.genBinOpMir( + .{ ._, .cmp }, + Type.anyerror, + .{ .register = tmp_reg }, + .{ .immediate = 0 }, + ); }, .load_frame => |frame_addr| try self.genBinOpMir( - .cmp, + .{ ._, .cmp }, Type.anyerror, .{ .load_frame = .{ .index = frame_addr.index, @@ -6571,7 +8833,7 @@ fn isNonErr(self: *Self, inst: Air.Inst.Index, ty: Type, operand: MCValue) !MCVa fn airIsNull(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const operand = try self.resolveInst(un_op); - const ty = self.air.typeOf(un_op); + const ty = self.typeOf(un_op); const result = try self.isNull(inst, ty, operand); return self.finishAir(inst, result, .{ un_op, .none, .none }); } @@ -6579,7 +8841,7 @@ fn airIsNull(self: *Self, inst: Air.Inst.Index) !void { fn airIsNullPtr(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const operand = try self.resolveInst(un_op); - const ty = self.air.typeOf(un_op); + const ty = self.typeOf(un_op); const result = try self.isNullPtr(inst, ty, operand); return self.finishAir(inst, result, .{ un_op, .none, .none }); } @@ -6587,7 +8849,7 @@ fn airIsNullPtr(self: *Self, inst: Air.Inst.Index) !void { fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const operand = try self.resolveInst(un_op); - const ty = self.air.typeOf(un_op); + const ty = self.typeOf(un_op); const result = switch (try self.isNull(inst, ty, operand)) { .eflags => |cc| .{ .eflags = cc.negate() }, else => unreachable, @@ -6598,7 +8860,7 @@ fn airIsNonNull(self: *Self, inst: Air.Inst.Index) !void { fn airIsNonNullPtr(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const operand = try self.resolveInst(un_op); - const ty = self.air.typeOf(un_op); + const ty = self.typeOf(un_op); const result = switch (try self.isNullPtr(inst, ty, operand)) { .eflags => |cc| .{ .eflags = cc.negate() }, else => unreachable, @@ -6609,12 +8871,13 @@ fn airIsNonNullPtr(self: *Self, inst: Air.Inst.Index) !void { fn airIsErr(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const operand = try self.resolveInst(un_op); - const ty = self.air.typeOf(un_op); + const ty = self.typeOf(un_op); const result = try self.isErr(inst, ty, operand); return self.finishAir(inst, result, .{ un_op, .none, .none }); } fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const operand_ptr = try self.resolveInst(un_op); @@ -6632,10 +8895,10 @@ fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void { break :blk try self.allocRegOrMem(inst, true); } }; - const ptr_ty = self.air.typeOf(un_op); + const ptr_ty = self.typeOf(un_op); try self.load(operand, ptr_ty, operand_ptr); - const result = try self.isErr(inst, ptr_ty.childType(), operand); + const result = try self.isErr(inst, ptr_ty.childType(mod), operand); return self.finishAir(inst, result, .{ un_op, .none, .none }); } @@ -6643,12 +8906,13 @@ fn airIsErrPtr(self: *Self, inst: Air.Inst.Index) !void { fn airIsNonErr(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; const operand = try self.resolveInst(un_op); - const ty = self.air.typeOf(un_op); + const ty = self.typeOf(un_op); const result = try self.isNonErr(inst, ty, operand); return self.finishAir(inst, result, .{ un_op, .none, .none }); } fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; const operand_ptr = try self.resolveInst(un_op); @@ -6666,10 +8930,10 @@ fn airIsNonErrPtr(self: *Self, inst: Air.Inst.Index) !void { break :blk try self.allocRegOrMem(inst, true); } }; - const ptr_ty = self.air.typeOf(un_op); + const ptr_ty = self.typeOf(un_op); try self.load(operand, ptr_ty, operand_ptr); - const result = try self.isNonErr(inst, ptr_ty.childType(), operand); + const result = try self.isNonErr(inst, ptr_ty.childType(mod), operand); return self.finishAir(inst, result, .{ un_op, .none, .none }); } @@ -6732,7 +8996,7 @@ fn airBlock(self: *Self, inst: Air.Inst.Index) !void { fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { const pl_op = self.air.instructions.items(.data)[inst].pl_op; const condition = try self.resolveInst(pl_op.operand); - const condition_ty = self.air.typeOf(pl_op.operand); + const condition_ty = self.typeOf(pl_op.operand); const switch_br = self.air.extraData(Air.SwitchBr, pl_op.payload); var extra_index: usize = switch_br.end; var case_i: u32 = 0; @@ -6746,64 +9010,56 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { if (Air.refToIndex(pl_op.operand)) |op_inst| self.processDeath(op_inst); } - const outer_state = try self.saveState(); - { - self.scope_generation += 1; - const inner_state = try self.saveState(); - - while (case_i < switch_br.data.cases_len) : (case_i += 1) { - const case = self.air.extraData(Air.SwitchBr.Case, extra_index); - const items = @ptrCast( - []const Air.Inst.Ref, - self.air.extra[case.end..][0..case.data.items_len], - ); - const case_body = self.air.extra[case.end + items.len ..][0..case.data.body_len]; - extra_index = case.end + items.len + case_body.len; + self.scope_generation += 1; + const state = try self.saveState(); - var relocs = try self.gpa.alloc(u32, items.len); - defer self.gpa.free(relocs); + while (case_i < switch_br.data.cases_len) : (case_i += 1) { + const case = self.air.extraData(Air.SwitchBr.Case, extra_index); + const items = @ptrCast( + []const Air.Inst.Ref, + self.air.extra[case.end..][0..case.data.items_len], + ); + const case_body = self.air.extra[case.end + items.len ..][0..case.data.body_len]; + extra_index = case.end + items.len + case_body.len; - for (items, relocs) |item, *reloc| { - try self.spillEflagsIfOccupied(); - const item_mcv = try self.resolveInst(item); - try self.genBinOpMir(.cmp, condition_ty, condition, item_mcv); - reloc.* = try self.asmJccReloc(undefined, .ne); - } + var relocs = try self.gpa.alloc(u32, items.len); + defer self.gpa.free(relocs); - for (liveness.deaths[case_i]) |operand| self.processDeath(operand); + try self.spillEflagsIfOccupied(); + for (items, relocs, 0..) |item, *reloc, i| { + const item_mcv = try self.resolveInst(item); + try self.genBinOpMir(.{ ._, .cmp }, condition_ty, condition, item_mcv); + reloc.* = try self.asmJccReloc(undefined, if (i < relocs.len - 1) .e else .ne); + } - try self.genBody(case_body); - try self.restoreState(inner_state, &.{}, .{ - .emit_instructions = false, - .update_tracking = true, - .resurrect = true, - .close_scope = true, - }); + for (liveness.deaths[case_i]) |operand| self.processDeath(operand); - for (relocs) |reloc| try self.performReloc(reloc); - } + for (relocs[0 .. relocs.len - 1]) |reloc| try self.performReloc(reloc); + try self.genBody(case_body); + try self.restoreState(state, &.{}, .{ + .emit_instructions = false, + .update_tracking = true, + .resurrect = true, + .close_scope = true, + }); - if (switch_br.data.else_body_len > 0) { - const else_body = self.air.extra[extra_index..][0..switch_br.data.else_body_len]; + try self.performReloc(relocs[relocs.len - 1]); + } - const else_deaths = liveness.deaths.len - 1; - for (liveness.deaths[else_deaths]) |operand| self.processDeath(operand); + if (switch_br.data.else_body_len > 0) { + const else_body = self.air.extra[extra_index..][0..switch_br.data.else_body_len]; - try self.genBody(else_body); - try self.restoreState(inner_state, &.{}, .{ - .emit_instructions = false, - .update_tracking = true, - .resurrect = true, - .close_scope = true, - }); - } + const else_deaths = liveness.deaths.len - 1; + for (liveness.deaths[else_deaths]) |operand| self.processDeath(operand); + + try self.genBody(else_body); + try self.restoreState(state, &.{}, .{ + .emit_instructions = false, + .update_tracking = true, + .resurrect = true, + .close_scope = true, + }); } - try self.restoreState(outer_state, &.{}, .{ - .emit_instructions = false, - .update_tracking = false, - .resurrect = false, - .close_scope = true, - }); // We already took care of pl_op.operand earlier, so we're going to pass .none here return self.finishAir(inst, .unreach, .{ .none, .none, .none }); @@ -6812,23 +9068,24 @@ fn airSwitchBr(self: *Self, inst: Air.Inst.Index) !void { fn performReloc(self: *Self, reloc: Mir.Inst.Index) !void { const next_inst = @intCast(u32, self.mir_instructions.len); switch (self.mir_instructions.items(.tag)[reloc]) { - .jcc => { - self.mir_instructions.items(.data)[reloc].inst_cc.inst = next_inst; - }, - .jmp_reloc => { - self.mir_instructions.items(.data)[reloc].inst = next_inst; + .j, .jmp => {}, + .pseudo => switch (self.mir_instructions.items(.ops)[reloc]) { + .pseudo_j_z_and_np_inst, .pseudo_j_nz_or_p_inst => {}, + else => unreachable, }, else => unreachable, } + self.mir_instructions.items(.data)[reloc].inst.inst = next_inst; } fn airBr(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const br = self.air.instructions.items(.data)[inst].br; const src_mcv = try self.resolveInst(br.operand); - const block_ty = self.air.typeOfIndex(br.block_inst); + const block_ty = self.typeOfIndex(br.block_inst); const block_unused = - !block_ty.hasRuntimeBitsIgnoreComptime() or self.liveness.isUnused(br.block_inst); + !block_ty.hasRuntimeBitsIgnoreComptime(mod) or self.liveness.isUnused(br.block_inst); const block_tracking = self.inst_tracking.getPtr(br.block_inst).?; const block_data = self.blocks.getPtr(br.block_inst).?; const first_br = block_data.relocs.items.len == 0; @@ -6951,7 +9208,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { const arg_mcv = try self.resolveInst(input); try self.register_manager.getReg(reg, null); - try self.genSetReg(reg, self.air.typeOf(input), arg_mcv); + try self.genSetReg(reg, self.typeOf(input), arg_mcv); } { @@ -6967,9 +9224,9 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } const asm_source = mem.sliceAsBytes(self.air.extra[extra_i..])[0..extra.data.source_len]; - var line_it = mem.tokenize(u8, asm_source, "\n\r;"); + var line_it = mem.tokenizeAny(u8, asm_source, "\n\r;"); while (line_it.next()) |line| { - var mnem_it = mem.tokenize(u8, line, " \t"); + var mnem_it = mem.tokenizeAny(u8, line, " \t"); const mnem_str = mnem_it.next() orelse continue; if (mem.startsWith(u8, mnem_str, "#")) continue; @@ -6983,7 +9240,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { .qword else null; - const mnem = mnem: { + const mnem_tag = Mir.Inst.FixedTag{ ._, mnem: { if (mnem_size) |_| { if (std.meta.stringToEnum(Mir.Inst.Tag, mnem_str[0 .. mnem_str.len - 1])) |mnem| { break :mnem mnem; @@ -6991,9 +9248,9 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } break :mnem std.meta.stringToEnum(Mir.Inst.Tag, mnem_str) orelse return self.fail("Invalid mnemonic: '{s}'", .{mnem_str}); - }; + } }; - var op_it = mem.tokenize(u8, mnem_it.rest(), ","); + var op_it = mem.tokenizeScalar(u8, mnem_it.rest(), ','); var ops = [1]encoder.Instruction.Operand{.none} ** 4; for (&ops) |*op| { const op_str = mem.trim(u8, op_it.next() orelse break, " \t"); @@ -7042,51 +9299,51 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { } else if (op_it.next()) |op_str| return self.fail("Extra operand: '{s}'", .{op_str}); (switch (ops[0]) { - .none => self.asmOpOnly(mnem), + .none => self.asmOpOnly(mnem_tag), .reg => |reg0| switch (ops[1]) { - .none => self.asmRegister(mnem, reg0), + .none => self.asmRegister(mnem_tag, reg0), .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterRegister(mnem, reg1, reg0), + .none => self.asmRegisterRegister(mnem_tag, reg1, reg0), .reg => |reg2| switch (ops[3]) { - .none => self.asmRegisterRegisterRegister(mnem, reg2, reg1, reg0), + .none => self.asmRegisterRegisterRegister(mnem_tag, reg2, reg1, reg0), else => error.InvalidInstruction, }, .mem => |mem2| switch (ops[3]) { - .none => self.asmMemoryRegisterRegister(mnem, mem2, reg1, reg0), + .none => self.asmMemoryRegisterRegister(mnem_tag, mem2, reg1, reg0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .mem => |mem1| switch (ops[2]) { - .none => self.asmMemoryRegister(mnem, mem1, reg0), + .none => self.asmMemoryRegister(mnem_tag, mem1, reg0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .mem => |mem0| switch (ops[1]) { - .none => self.asmMemory(mnem, mem0), + .none => self.asmMemory(mnem_tag, mem0), .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterMemory(mnem, reg1, mem0), + .none => self.asmRegisterMemory(mnem_tag, reg1, mem0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .imm => |imm0| switch (ops[1]) { - .none => self.asmImmediate(mnem, imm0), + .none => self.asmImmediate(mnem_tag, imm0), .reg => |reg1| switch (ops[2]) { - .none => self.asmRegisterImmediate(mnem, reg1, imm0), + .none => self.asmRegisterImmediate(mnem_tag, reg1, imm0), .reg => |reg2| switch (ops[3]) { - .none => self.asmRegisterRegisterImmediate(mnem, reg2, reg1, imm0), + .none => self.asmRegisterRegisterImmediate(mnem_tag, reg2, reg1, imm0), else => error.InvalidInstruction, }, .mem => |mem2| switch (ops[3]) { - .none => self.asmMemoryRegisterImmediate(mnem, mem2, reg1, imm0), + .none => self.asmMemoryRegisterImmediate(mnem_tag, mem2, reg1, imm0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, }, .mem => |mem1| switch (ops[2]) { - .none => self.asmMemoryImmediate(mnem, mem1, imm0), + .none => self.asmMemoryImmediate(mnem_tag, mem1, imm0), else => error.InvalidInstruction, }, else => error.InvalidInstruction, @@ -7095,7 +9352,7 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { error.InvalidInstruction => return self.fail( "Invalid instruction: '{s} {s} {s} {s} {s}'", .{ - @tagName(mnem), + @tagName(mnem_tag[1]), @tagName(ops[0]), @tagName(ops[1]), @tagName(ops[2]), @@ -7126,26 +9383,203 @@ fn airAsm(self: *Self, inst: Air.Inst.Index) !void { return self.finishAirResult(inst, result); } -fn movMirTag(self: *Self, ty: Type) !Mir.Inst.Tag { - return switch (ty.zigTypeTag()) { - else => .mov, +const MoveStrategy = union(enum) { + move: Mir.Inst.FixedTag, + insert_extract: InsertExtract, + vex_insert_extract: InsertExtract, + + const InsertExtract = struct { + insert: Mir.Inst.FixedTag, + extract: Mir.Inst.FixedTag, + }; +}; +fn moveStrategy(self: *Self, ty: Type, aligned: bool) !MoveStrategy { + const mod = self.bin_file.options.module.?; + switch (ty.zigTypeTag(mod)) { + else => return .{ .move = .{ ._, .mov } }, .Float => switch (ty.floatBits(self.target.*)) { - 16 => .mov, - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) .movss else .mov, - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) .movsd else .mov, - else => return self.fail("TODO movMirTag for {}", .{ - ty.fmt(self.bin_file.options.module.?), - }), + 16 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + } }, + 32 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_ss, .mov } else .{ ._ss, .mov } }, + 64 => return .{ .move = if (self.hasFeature(.avx)) .{ .v_sd, .mov } else .{ ._sd, .mov } }, + 128 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + else => {}, }, - }; + .Vector => switch (ty.childType(mod).zigTypeTag(mod)) { + .Int => switch (ty.childType(mod).intInfo(mod).bits) { + 8 => switch (ty.vectorLen(mod)) { + 1 => if (self.hasFeature(.avx)) return .{ .vex_insert_extract = .{ + .insert = .{ .vp_b, .insr }, + .extract = .{ .vp_b, .extr }, + } } else if (self.hasFeature(.sse4_2)) return .{ .insert_extract = .{ + .insert = .{ .p_b, .insr }, + .extract = .{ .p_b, .extr }, + } }, + 2 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_d, .mov } + else + .{ ._d, .mov } }, + 5...8 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_q, .mov } + else + .{ ._q, .mov } }, + 9...16 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 17...32 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + 16 => switch (ty.vectorLen(mod)) { + 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_d, .mov } + else + .{ ._d, .mov } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_q, .mov } + else + .{ ._q, .mov } }, + 5...8 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 9...16 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + 32 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_d, .mov } + else + .{ ._d, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_q, .mov } + else + .{ ._q, .mov } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 5...8 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + 64 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_q, .mov } + else + .{ ._q, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 3...4 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + 128 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 2 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + 256 => switch (ty.vectorLen(mod)) { + 1 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + else => {}, + }, + .Float => switch (ty.childType(mod).floatBits(self.target.*)) { + 16 => switch (ty.vectorLen(mod)) { + 1 => return if (self.hasFeature(.avx)) .{ .vex_insert_extract = .{ + .insert = .{ .vp_w, .insr }, + .extract = .{ .vp_w, .extr }, + } } else .{ .insert_extract = .{ + .insert = .{ .p_w, .insr }, + .extract = .{ .p_w, .extr }, + } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_d, .mov } + else + .{ ._d, .mov } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_q, .mov } + else + .{ ._q, .mov } }, + 5...8 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 9...16 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + 32 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_ss, .mov } + else + .{ ._ss, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + 3...4 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } + else if (aligned) .{ ._ps, .mova } else .{ ._ps, .movu } }, + 5...8 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_ps, .mova } else .{ .v_ps, .movu } }, + else => {}, + }, + 64 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + .{ .v_sd, .mov } + else + .{ ._sd, .mov } }, + 2 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } + else if (aligned) .{ ._pd, .mova } else .{ ._pd, .movu } }, + 3...4 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_pd, .mova } else .{ .v_pd, .movu } }, + else => {}, + }, + 128 => switch (ty.vectorLen(mod)) { + 1 => return .{ .move = if (self.hasFeature(.avx)) + if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } + else if (aligned) .{ ._, .movdqa } else .{ ._, .movdqu } }, + 2 => if (self.hasFeature(.avx)) + return .{ .move = if (aligned) .{ .v_, .movdqa } else .{ .v_, .movdqu } }, + else => {}, + }, + else => {}, + }, + else => {}, + }, + } + return self.fail("TODO moveStrategy for {}", .{ty.fmt(self.bin_file.options.module.?)}); } fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError!void { - const src_lock = switch (src_mcv) { - .register => |reg| self.register_manager.lockReg(reg), - .register_overflow => |ro| self.register_manager.lockReg(ro.reg), - else => null, - }; + const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; defer if (src_lock) |lock| self.register_manager.unlockReg(lock); switch (dst_mcv) { @@ -7206,8 +9640,10 @@ fn genCopy(self: *Self, ty: Type, dst_mcv: MCValue, src_mcv: MCValue) InnerError } fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerError!void { - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - if (abi_size > 8) return self.fail("genSetReg called with a value larger than one register", .{}); + const mod = self.bin_file.options.module.?; + const abi_size = @intCast(u32, ty.abiSize(mod)); + if (abi_size * 8 > dst_reg.bitSize()) + return self.fail("genSetReg called with a value larger than dst_reg", .{}); switch (src_mcv) { .none, .unreach, @@ -7222,50 +9658,113 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr if (imm == 0) { // 32-bit moves zero-extend to 64-bit, so xoring the 32-bit // register is the fastest way to zero a register. - try self.asmRegisterRegister(.xor, dst_reg.to32(), dst_reg.to32()); + try self.asmRegisterRegister(.{ ._, .xor }, dst_reg.to32(), dst_reg.to32()); } else if (abi_size > 4 and math.cast(u32, imm) != null) { // 32-bit moves zero-extend to 64-bit. - try self.asmRegisterImmediate(.mov, dst_reg.to32(), Immediate.u(imm)); + try self.asmRegisterImmediate(.{ ._, .mov }, dst_reg.to32(), Immediate.u(imm)); } else if (abi_size <= 4 and @bitCast(i64, imm) < 0) { try self.asmRegisterImmediate( - .mov, + .{ ._, .mov }, registerAlias(dst_reg, abi_size), Immediate.s(@intCast(i32, @bitCast(i64, imm))), ); } else { try self.asmRegisterImmediate( - .mov, + .{ ._, .mov }, registerAlias(dst_reg, abi_size), Immediate.u(imm), ); } }, - .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister( - if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point)) - try self.movMirTag(ty) - else switch (abi_size) { - 4 => .movd, - 8 => .movq, - else => return self.fail( - "unsupported register copy from {s} to {s}", - .{ @tagName(src_reg), @tagName(dst_reg) }, + .register => |src_reg| if (dst_reg.id() != src_reg.id()) switch (dst_reg.class()) { + .general_purpose => switch (src_reg.class()) { + .general_purpose => try self.asmRegisterRegister( + .{ ._, .mov }, + registerAlias(dst_reg, abi_size), + registerAlias(src_reg, abi_size), + ), + .segment => try self.asmRegisterRegister( + .{ ._, .mov }, + registerAlias(dst_reg, abi_size), + src_reg, + ), + .sse => try self.asmRegisterRegister( + switch (abi_size) { + 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, + else => unreachable, + }, + registerAlias(dst_reg, @max(abi_size, 4)), + src_reg.to128(), ), + .x87, .mmx => unreachable, }, - registerAlias(dst_reg, abi_size), - registerAlias(src_reg, abi_size), - ), - .register_offset, .indirect, .load_frame, .lea_frame => try self.asmRegisterMemory( - switch (src_mcv) { - .register_offset => |reg_off| switch (reg_off.off) { - 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }), - else => .lea, + .segment => try self.asmRegisterRegister( + .{ ._, .mov }, + dst_reg, + switch (src_reg.class()) { + .general_purpose, .segment => registerAlias(src_reg, abi_size), + .sse => try self.copyToTmpRegister(ty, src_mcv), + .x87, .mmx => unreachable, }, - .indirect, .load_frame => try self.movMirTag(ty), - .lea_frame => .lea, - else => unreachable, + ), + .sse => switch (src_reg.class()) { + .general_purpose => try self.asmRegisterRegister( + switch (abi_size) { + 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, + else => unreachable, + }, + dst_reg.to128(), + registerAlias(src_reg, @max(abi_size, 4)), + ), + .segment => try self.genSetReg( + dst_reg, + ty, + .{ .register = try self.copyToTmpRegister(ty, src_mcv) }, + ), + .sse => try self.asmRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (ty.scalarType(mod).zigTypeTag(mod)) { + else => switch (abi_size) { + 1...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, + 9...16 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else .{ ._, .movdqa }, + 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null, + else => null, + }, + .Float => switch (ty.scalarType(mod).floatBits(self.target.*)) { + 16, 128 => switch (abi_size) { + 2...4 => if (self.hasFeature(.avx)) .{ .v_d, .mov } else .{ ._d, .mov }, + 5...8 => if (self.hasFeature(.avx)) .{ .v_q, .mov } else .{ ._q, .mov }, + 9...16 => if (self.hasFeature(.avx)) + .{ .v_, .movdqa } + else + .{ ._, .movdqa }, + 17...32 => if (self.hasFeature(.avx)) .{ .v_, .movdqa } else null, + else => null, + }, + 32 => if (self.hasFeature(.avx)) .{ .v_ps, .mova } else .{ ._ps, .mova }, + 64 => if (self.hasFeature(.avx)) .{ .v_pd, .mova } else .{ ._pd, .mova }, + 80 => null, + else => unreachable, + }, + })) |tag| tag else return self.fail("TODO implement genSetReg for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + registerAlias(dst_reg, abi_size), + registerAlias(src_reg, abi_size), + ), + .x87, .mmx => unreachable, }, - registerAlias(dst_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { + .x87, .mmx => unreachable, + }, + .register_offset, + .indirect, + .load_frame, + .lea_frame, + => { + const dst_alias = registerAlias(dst_reg, abi_size); + const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { .register_offset, .indirect => |reg_off| .{ .base = .{ .reg = reg_off.reg }, .disp = reg_off.off, @@ -7275,31 +9774,82 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr .disp = frame_addr.off, }, else => unreachable, - }), - ), + }); + switch (@as(MoveStrategy, switch (src_mcv) { + .register_offset => |reg_off| switch (reg_off.off) { + 0 => return self.genSetReg(dst_reg, ty, .{ .register = reg_off.reg }), + else => .{ .move = .{ ._, .lea } }, + }, + .indirect => try self.moveStrategy(ty, false), + .load_frame => |frame_addr| try self.moveStrategy( + ty, + self.getFrameAddrAlignment(frame_addr) >= ty.abiAlignment(mod), + ), + .lea_frame => .{ .move = .{ ._, .lea } }, + else => unreachable, + })) { + .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem), + .insert_extract => |ie| try self.asmRegisterMemoryImmediate( + ie.insert, + dst_alias, + src_mem, + Immediate.u(0), + ), + .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( + ie.insert, + dst_alias, + dst_alias, + src_mem, + Immediate.u(0), + ), + } + }, .memory, .load_direct, .load_got, .load_tlv => { switch (src_mcv) { - .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| - return self.asmRegisterMemory( - try self.movMirTag(ty), - registerAlias(dst_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ - .base = .{ .reg = .ds }, - .disp = small_addr, - }), - ), - .load_direct => |sym_index| if (try self.movMirTag(ty) == .mov) { - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); - _ = try self.addInst(.{ - .tag = .mov_linker, - .ops = .direct_reloc, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(dst_reg.to64()), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .memory => |addr| if (math.cast(i32, @bitCast(i64, addr))) |small_addr| { + const dst_alias = registerAlias(dst_reg, abi_size); + const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ + .base = .{ .reg = .ds }, + .disp = small_addr, }); - return; + switch (try self.moveStrategy(ty, mem.isAlignedGeneric( + u32, + @bitCast(u32, small_addr), + ty.abiAlignment(mod), + ))) { + .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem), + .insert_extract => |ie| try self.asmRegisterMemoryImmediate( + ie.insert, + dst_alias, + src_mem, + Immediate.u(0), + ), + .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( + ie.insert, + dst_alias, + dst_alias, + src_mem, + Immediate.u(0), + ), + } + }, + .load_direct => |sym_index| switch (ty.zigTypeTag(mod)) { + else => { + const atom_index = try self.owner.getSymbolIndex(self); + _ = try self.addInst(.{ + .tag = .mov, + .ops = .direct_reloc, + .data = .{ .rx = .{ + .r1 = dst_reg.to64(), + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, + }); + return; + }, + .Float, .Vector => {}, }, .load_got, .load_tlv => {}, else => unreachable, @@ -7309,18 +9859,33 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_lock); - try self.asmRegisterMemory( - try self.movMirTag(ty), - registerAlias(dst_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = .{ .reg = addr_reg } }), - ); + const dst_alias = registerAlias(dst_reg, abi_size); + const src_mem = Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ + .base = .{ .reg = addr_reg }, + }); + switch (try self.moveStrategy(ty, false)) { + .move => |tag| try self.asmRegisterMemory(tag, dst_alias, src_mem), + .insert_extract => |ie| try self.asmRegisterMemoryImmediate( + ie.insert, + dst_alias, + src_mem, + Immediate.u(0), + ), + .vex_insert_extract => |ie| try self.asmRegisterRegisterMemoryImmediate( + ie.insert, + dst_alias, + dst_alias, + src_mem, + Immediate.u(0), + ), + } }, .lea_direct, .lea_got => |sym_index| { - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); + const atom_index = try self.owner.getSymbolIndex(self); _ = try self.addInst(.{ .tag = switch (src_mcv) { - .lea_direct => .lea_linker, - .lea_got => .mov_linker, + .lea_direct => .lea, + .lea_got => .mov, else => unreachable, }, .ops = switch (src_mcv) { @@ -7328,27 +9893,31 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr .lea_got => .got_reloc, else => unreachable, }, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(dst_reg.to64()), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .data = .{ .rx = .{ + .r1 = dst_reg.to64(), + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, }); }, .lea_tlv => |sym_index| { - const atom_index = try self.getSymbolIndexForDecl(self.mod_fn.owner_decl); + const atom_index = try self.owner.getSymbolIndex(self); if (self.bin_file.cast(link.File.MachO)) |_| { _ = try self.addInst(.{ - .tag = .lea_linker, + .tag = .lea, .ops = .tlv_reloc, - .data = .{ .payload = try self.addExtra(Mir.LeaRegisterReloc{ - .reg = @enumToInt(Register.rdi), - .atom_index = atom_index, - .sym_index = sym_index, - }) }, + .data = .{ .rx = .{ + .r1 = .rdi, + .payload = try self.addExtra(Mir.Reloc{ + .atom_index = atom_index, + .sym_index = sym_index, + }), + } }, }); // TODO: spill registers before calling - try self.asmMemory(.call, Memory.sib(.qword, .{ .base = .{ .reg = .rdi } })); + try self.asmMemory(.{ ._, .call }, Memory.sib(.qword, .{ .base = .{ .reg = .rdi } })); try self.genSetReg(dst_reg.to64(), Type.usize, .{ .register = .rax }); } else return self.fail("TODO emit ptr to TLV sequence on {s}", .{ @tagName(self.bin_file.tag), @@ -7358,7 +9927,8 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr } fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCValue) InnerError!void { - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); + const mod = self.bin_file.options.module.?; + const abi_size = @intCast(u32, ty.abiSize(mod)); const dst_ptr_mcv: MCValue = switch (base) { .none => .{ .immediate = @bitCast(u64, @as(i64, disp)) }, .reg => |base_reg| .{ .register_offset = .{ .reg = base_reg, .off = disp } }, @@ -7370,12 +9940,12 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal try self.genInlineMemset(dst_ptr_mcv, .{ .immediate = 0xaa }, .{ .immediate = abi_size }), .immediate => |imm| switch (abi_size) { 1, 2, 4 => { - const immediate = if (ty.isSignedInt()) + const immediate = if (ty.isSignedInt(mod)) Immediate.s(@truncate(i32, @bitCast(i64, imm))) else Immediate.u(@intCast(u32, imm)); try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }), immediate, ); @@ -7383,16 +9953,16 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal 3, 5...7 => unreachable, else => if (math.cast(i32, @bitCast(i64, imm))) |small| { try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }), Immediate.s(small), ); } else { var offset: i32 = 0; while (offset < abi_size) : (offset += 4) try self.asmMemoryImmediate( - .mov, + .{ ._, .mov }, Memory.sib(.dword, .{ .base = base, .disp = disp + offset }), - if (ty.isSignedInt()) + if (ty.isSignedInt(mod)) Immediate.s(@truncate( i32, @bitCast(i64, imm) >> (math.cast(u6, offset * 8) orelse 63), @@ -7406,22 +9976,50 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal }, }, .eflags => |cc| try self.asmSetccMemory(Memory.sib(.byte, .{ .base = base, .disp = disp }), cc), - .register => |reg| try self.asmMemoryRegister( - try self.movMirTag(ty), - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = base, .disp = disp }), - registerAlias(reg, abi_size), - ), + .register => |src_reg| { + const dst_mem = Memory.sib( + Memory.PtrSize.fromSize(abi_size), + .{ .base = base, .disp = disp }, + ); + const src_alias = registerAlias(src_reg, abi_size); + switch (try self.moveStrategy(ty, switch (base) { + .none => mem.isAlignedGeneric( + u32, + @bitCast(u32, disp), + ty.abiAlignment(mod), + ), + .reg => |reg| switch (reg) { + .es, .cs, .ss, .ds => mem.isAlignedGeneric( + u32, + @bitCast(u32, disp), + ty.abiAlignment(mod), + ), + else => false, + }, + .frame => |frame_index| self.getFrameAddrAlignment( + .{ .index = frame_index, .off = disp }, + ) >= ty.abiAlignment(mod), + })) { + .move => |tag| try self.asmMemoryRegister(tag, dst_mem, src_alias), + .insert_extract, .vex_insert_extract => |ie| try self.asmMemoryRegisterImmediate( + ie.extract, + dst_mem, + src_alias, + Immediate.u(0), + ), + } + }, .register_overflow => |ro| { try self.genSetMem( base, - disp + @intCast(i32, ty.structFieldOffset(0, self.target.*)), - ty.structFieldType(0), + disp + @intCast(i32, ty.structFieldOffset(0, mod)), + ty.structFieldType(0, mod), .{ .register = ro.reg }, ); try self.genSetMem( base, - disp + @intCast(i32, ty.structFieldOffset(1, self.target.*)), - ty.structFieldType(1), + disp + @intCast(i32, ty.structFieldOffset(1, mod)), + ty.structFieldType(1, mod), .{ .eflags = ro.eflags }, ); }, @@ -7450,73 +10048,12 @@ fn genSetMem(self: *Self, base: Memory.Base, disp: i32, ty: Type, src_mcv: MCVal } } -/// Like `genInlineMemcpy` but copies value from a register to an address via dereferencing -/// of destination register. -/// Boils down to MOV r/m64, r64. -fn genInlineMemcpyRegisterRegister( - self: *Self, - ty: Type, - dst_reg: Register, - src_reg: Register, - offset: i32, -) InnerError!void { - assert(dst_reg.bitSize() == 64); - - const dst_reg_lock = self.register_manager.lockReg(dst_reg); - defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock); - - const src_reg_lock = self.register_manager.lockReg(src_reg); - defer if (src_reg_lock) |lock| self.register_manager.unlockReg(lock); - - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); - - if (!math.isPowerOfTwo(abi_size)) { - const tmp_reg = try self.copyToTmpRegister(ty, .{ .register = src_reg }); - - var next_offset = offset; - var remainder = abi_size; - while (remainder > 0) { - const nearest_power_of_two = @as(u6, 1) << math.log2_int(u3, @intCast(u3, remainder)); - try self.asmMemoryRegister( - .mov, - Memory.sib(Memory.PtrSize.fromSize(nearest_power_of_two), .{ - .base = dst_reg, - .disp = -next_offset, - }), - registerAlias(tmp_reg, nearest_power_of_two), - ); - - if (nearest_power_of_two > 1) { - try self.genShiftBinOpMir(.shr, ty, .{ .register = tmp_reg }, .{ - .immediate = nearest_power_of_two * 8, - }); - } - - remainder -= nearest_power_of_two; - next_offset -= nearest_power_of_two; - } - } else { - try self.asmMemoryRegister( - switch (src_reg.class()) { - .general_purpose, .segment => .mov, - .floating_point => .movss, - }, - Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }), - registerAlias(src_reg, abi_size), - ); - } -} - fn genInlineMemcpy(self: *Self, dst_ptr: MCValue, src_ptr: MCValue, len: MCValue) InnerError!void { try self.spillRegisters(&.{ .rdi, .rsi, .rcx }); try self.genSetReg(.rdi, Type.usize, dst_ptr); try self.genSetReg(.rsi, Type.usize, src_ptr); try self.genSetReg(.rcx, Type.usize, len); - _ = try self.addInst(.{ - .tag = .movs, - .ops = .string, - .data = .{ .string = .{ .repeat = .rep, .width = .b } }, - }); + try self.asmOpOnly(.{ .@"rep _sb", .mov }); } fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) InnerError!void { @@ -7524,11 +10061,90 @@ fn genInlineMemset(self: *Self, dst_ptr: MCValue, value: MCValue, len: MCValue) try self.genSetReg(.rdi, Type.usize, dst_ptr); try self.genSetReg(.al, Type.u8, value); try self.genSetReg(.rcx, Type.usize, len); - _ = try self.addInst(.{ - .tag = .stos, - .ops = .string, - .data = .{ .string = .{ .repeat = .rep, .width = .b } }, - }); + try self.asmOpOnly(.{ .@"rep _sb", .sto }); +} + +fn genLazySymbolRef( + self: *Self, + comptime tag: Mir.Inst.Tag, + reg: Register, + lazy_sym: link.File.LazySymbol, +) InnerError!void { + if (self.bin_file.cast(link.File.Elf)) |elf_file| { + const atom_index = elf_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return self.fail("{s} creating lazy symbol", .{@errorName(err)}); + const atom = elf_file.getAtom(atom_index); + _ = try atom.getOrCreateOffsetTableEntry(elf_file); + const got_addr = atom.getOffsetTableAddress(elf_file); + const got_mem = + Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }); + switch (tag) { + .lea, .mov => try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), got_mem), + .call => try self.asmMemory(.{ ._, .call }, got_mem), + else => unreachable, + } + switch (tag) { + .lea, .call => {}, + .mov => try self.asmRegisterMemory( + .{ ._, tag }, + reg.to64(), + Memory.sib(.qword, .{ .base = .{ .reg = reg.to64() } }), + ), + else => unreachable, + } + } else if (self.bin_file.cast(link.File.Plan9)) |p9_file| { + const atom_index = p9_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return self.fail("{s} creating lazy symbol", .{@errorName(err)}); + var atom = p9_file.getAtom(atom_index); + _ = atom.getOrCreateOffsetTableEntry(p9_file); + const got_addr = atom.getOffsetTableAddress(p9_file); + const got_mem = + Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }); + switch (tag) { + .lea, .mov => try self.asmRegisterMemory(.{ ._, .mov }, reg.to64(), got_mem), + .call => try self.asmMemory(.{ ._, .call }, got_mem), + else => unreachable, + } + switch (tag) { + .lea, .call => {}, + .mov => try self.asmRegisterMemory( + .{ ._, tag }, + reg.to64(), + Memory.sib(.qword, .{ .base = .{ .reg = reg.to64() } }), + ), + else => unreachable, + } + } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { + const atom_index = coff_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return self.fail("{s} creating lazy symbol", .{@errorName(err)}); + const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?; + switch (tag) { + .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }), + .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }), + else => unreachable, + } + switch (tag) { + .lea, .mov => {}, + .call => try self.asmRegister(.{ ._, .call }, reg), + else => unreachable, + } + } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { + const atom_index = macho_file.getOrCreateAtomForLazySymbol(lazy_sym) catch |err| + return self.fail("{s} creating lazy symbol", .{@errorName(err)}); + const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; + switch (tag) { + .lea, .call => try self.genSetReg(reg, Type.usize, .{ .lea_got = sym_index }), + .mov => try self.genSetReg(reg, Type.usize, .{ .load_got = sym_index }), + else => unreachable, + } + switch (tag) { + .lea, .mov => {}, + .call => try self.asmRegister(.{ ._, .call }, reg), + else => unreachable, + } + } else { + return self.fail("TODO implement genLazySymbol for x86_64 {s}", .{@tagName(self.bin_file.tag)}); + } } fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void { @@ -7539,7 +10155,7 @@ fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void { if (self.reuseOperand(inst, un_op, 0, src_mcv)) break :result src_mcv; const dst_mcv = try self.allocRegOrMem(inst, true); - const dst_ty = self.air.typeOfIndex(inst); + const dst_ty = self.typeOfIndex(inst); try self.genCopy(dst_ty, dst_mcv, src_mcv); break :result dst_mcv; }; @@ -7547,44 +10163,81 @@ fn airPtrToInt(self: *Self, inst: Air.Inst.Index) !void { } fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const dst_ty = self.air.typeOfIndex(inst); - const src_ty = self.air.typeOf(ty_op.operand); + const dst_ty = self.typeOfIndex(inst); + const src_ty = self.typeOf(ty_op.operand); const result = result: { - const dst_rc = try self.regClassForType(dst_ty); - const src_rc = try self.regClassForType(src_ty); - const operand = try self.resolveInst(ty_op.operand); - if (dst_rc.eql(src_rc) and self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand; + const dst_rc = regClassForType(dst_ty, mod); + const src_rc = regClassForType(src_ty, mod); + const src_mcv = try self.resolveInst(ty_op.operand); - const operand_lock = switch (operand) { - .register => |reg| self.register_manager.lockReg(reg), - .register_overflow => |ro| self.register_manager.lockReg(ro.reg), - else => null, + const src_lock = if (src_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; + defer if (src_lock) |lock| self.register_manager.unlockReg(lock); + + const dst_mcv = if (dst_rc.supersetOf(src_rc) and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else dst: { + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy( + if (!dst_mcv.isMemory() or src_mcv.isMemory()) dst_ty else src_ty, + dst_mcv, + src_mcv, + ); + break :dst dst_mcv; }; - defer if (operand_lock) |lock| self.register_manager.unlockReg(lock); - const dest = try self.allocRegOrMem(inst, true); - try self.genCopy(self.air.typeOfIndex(inst), dest, operand); - break :result dest; + const dst_signedness = + if (dst_ty.isAbiInt(mod)) dst_ty.intInfo(mod).signedness else .unsigned; + const src_signedness = + if (src_ty.isAbiInt(mod)) src_ty.intInfo(mod).signedness else .unsigned; + if (dst_signedness == src_signedness) break :result dst_mcv; + + const abi_size = @intCast(u16, dst_ty.abiSize(mod)); + const bit_size = @intCast(u16, dst_ty.bitSize(mod)); + if (abi_size * 8 <= bit_size) break :result dst_mcv; + + const dst_limbs_len = math.divCeil(i32, bit_size, 64) catch unreachable; + const high_reg = if (dst_mcv.isRegister()) + dst_mcv.getReg().? + else + try self.copyToTmpRegister( + Type.usize, + dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(), + ); + const high_lock = self.register_manager.lockReg(high_reg); + defer if (high_lock) |lock| self.register_manager.unlockReg(lock); + + const high_ty = try mod.intType(dst_signedness, bit_size % 64); + + try self.truncateRegister(high_ty, high_reg); + if (!dst_mcv.isRegister()) try self.genCopy( + Type.usize, + dst_mcv.address().offset((dst_limbs_len - 1) * 8).deref(), + .{ .register = high_reg }, + ); + break :result dst_mcv; }; return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const slice_ty = self.air.typeOfIndex(inst); - const ptr_ty = self.air.typeOf(ty_op.operand); + const slice_ty = self.typeOfIndex(inst); + const ptr_ty = self.typeOf(ty_op.operand); const ptr = try self.resolveInst(ty_op.operand); - const array_ty = ptr_ty.childType(); - const array_len = array_ty.arrayLen(); + const array_ty = ptr_ty.childType(mod); + const array_len = array_ty.arrayLen(mod); - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, self.target.*)); + const frame_index = try self.allocFrameIndex(FrameAlloc.initType(slice_ty, mod)); try self.genSetMem(.{ .frame = frame_index }, 0, ptr_ty, ptr); try self.genSetMem( .{ .frame = frame_index }, - @intCast(i32, ptr_ty.abiSize(self.target.*)), + @intCast(i32, ptr_ty.abiSize(mod)), Type.usize, .{ .immediate = array_len }, ); @@ -7594,135 +10247,179 @@ fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { } fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airIntToFloat for {}", .{self.target.cpu.arch}); - //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + + const src_ty = self.typeOf(ty_op.operand); + const src_bits = @intCast(u32, src_ty.bitSize(mod)); + const src_signedness = + if (src_ty.isAbiInt(mod)) src_ty.intInfo(mod).signedness else .unsigned; + const dst_ty = self.typeOfIndex(inst); + + const src_size = math.divCeil(u32, @max(switch (src_signedness) { + .signed => src_bits, + .unsigned => src_bits + 1, + }, 32), 8) catch unreachable; + if (src_size > 8) return self.fail("TODO implement airIntToFloat from {} to {}", .{ + src_ty.fmt(mod), dst_ty.fmt(mod), + }); + + const src_mcv = try self.resolveInst(ty_op.operand); + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); + const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); + defer self.register_manager.unlockReg(src_lock); + + if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg); + + const dst_reg = try self.register_manager.allocReg(inst, regClassForType(dst_ty, mod)); + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (dst_ty.zigTypeTag(mod)) { + .Float => switch (dst_ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .cvtsi2 } else .{ ._ss, .cvtsi2 }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .cvtsi2 } else .{ ._sd, .cvtsi2 }, + 16, 80, 128 => null, + else => unreachable, + }, + else => null, + })) |tag| tag else return self.fail("TODO implement airIntToFloat from {} to {}", .{ + src_ty.fmt(mod), dst_ty.fmt(mod), + }); + const dst_alias = dst_reg.to128(); + const src_alias = registerAlias(src_reg, src_size); + switch (mir_tag[0]) { + .v_ss, .v_sd => try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, src_alias), + else => try self.asmRegisterRegister(mir_tag, dst_alias, src_alias), + } + + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - const src_ty = self.air.typeOf(ty_op.operand); - const dst_ty = self.air.typeOfIndex(inst); - const operand = try self.resolveInst(ty_op.operand); - const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*)); - const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); + const src_ty = self.typeOf(ty_op.operand); + const dst_ty = self.typeOfIndex(inst); + const dst_bits = @intCast(u32, dst_ty.bitSize(mod)); + const dst_signedness = + if (dst_ty.isAbiInt(mod)) dst_ty.intInfo(mod).signedness else .unsigned; + + const dst_size = math.divCeil(u32, @max(switch (dst_signedness) { + .signed => dst_bits, + .unsigned => dst_bits + 1, + }, 32), 8) catch unreachable; + if (dst_size > 8) return self.fail("TODO implement airFloatToInt from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); - switch (src_abi_size) { - 4, 8 => {}, - else => |size| return self.fail("TODO load ST(0) with abiSize={}", .{size}), - } - if (dst_abi_size > 8) { - return self.fail("TODO convert float with abiSize={}", .{dst_abi_size}); - } + const src_mcv = try self.resolveInst(ty_op.operand); + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); + const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); + defer self.register_manager.unlockReg(src_lock); - // move float src to ST(0) - const frame_addr: FrameAddr = switch (operand) { - .load_frame => |frame_addr| frame_addr, - else => frame_addr: { - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(src_ty, self.target.*)); - try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, operand); - break :frame_addr .{ .index = frame_index }; - }, - }; - try self.asmMemory( - .fld, - Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }), - ); + const dst_reg = try self.register_manager.allocReg(inst, regClassForType(dst_ty, mod)); + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); - // convert - const stack_dst = try self.allocRegOrMem(inst, false); - try self.asmMemory( - .fisttp, - Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{ - .base = .{ .frame = stack_dst.load_frame.index }, - .disp = stack_dst.load_frame.off, + try self.asmRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (src_ty.zigTypeTag(mod)) { + .Float => switch (src_ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .{ .v_, .cvttss2si } else .{ ._, .cvttss2si }, + 64 => if (self.hasFeature(.avx)) .{ .v_, .cvttsd2si } else .{ ._, .cvttsd2si }, + 16, 80, 128 => null, + else => unreachable, + }, + else => null, + })) |tag| tag else return self.fail("TODO implement airFloatToInt from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), + registerAlias(dst_reg, dst_size), + src_reg.to128(), ); - return self.finishAir(inst, stack_dst, .{ ty_op.operand, .none, .none }); + if (dst_bits < dst_size * 8) try self.truncateRegister(dst_ty, dst_reg); + + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.Cmpxchg, ty_pl.payload).data; - const ptr_ty = self.air.typeOf(extra.ptr); - const ptr_mcv = try self.resolveInst(extra.ptr); - const val_ty = self.air.typeOf(extra.expected_value); - const val_abi_size = @intCast(u32, val_ty.abiSize(self.target.*)); + const ptr_ty = self.typeOf(extra.ptr); + const val_ty = self.typeOf(extra.expected_value); + const val_abi_size = @intCast(u32, val_ty.abiSize(mod)); try self.spillRegisters(&.{ .rax, .rdx, .rbx, .rcx }); const regs_lock = self.register_manager.lockRegsAssumeUnused(4, .{ .rax, .rdx, .rbx, .rcx }); - for (regs_lock) |lock| self.register_manager.unlockReg(lock); + defer for (regs_lock) |lock| self.register_manager.unlockReg(lock); const exp_mcv = try self.resolveInst(extra.expected_value); - if (val_abi_size > 8) switch (exp_mcv) { - .load_frame => |frame_addr| { - try self.genSetReg(.rax, Type.usize, .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + 0, - } }); - try self.genSetReg(.rdx, Type.usize, .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + 8, - } }); - }, - else => return self.fail("TODO implement cmpxchg for {s}", .{@tagName(exp_mcv)}), + if (val_abi_size > 8) { + const exp_addr_mcv: MCValue = switch (exp_mcv) { + .memory, .indirect, .load_frame => exp_mcv.address(), + else => .{ .register = try self.copyToTmpRegister(Type.usize, exp_mcv.address()) }, + }; + const exp_addr_lock = + if (exp_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; + defer if (exp_addr_lock) |lock| self.register_manager.unlockReg(lock); + + try self.genSetReg(.rax, Type.usize, exp_addr_mcv.deref()); + try self.genSetReg(.rdx, Type.usize, exp_addr_mcv.offset(8).deref()); } else try self.genSetReg(.rax, val_ty, exp_mcv); - const rax_lock = self.register_manager.lockRegAssumeUnused(.rax); - defer self.register_manager.unlockReg(rax_lock); const new_mcv = try self.resolveInst(extra.new_value); - const new_reg: Register = if (val_abi_size > 8) switch (new_mcv) { - .load_frame => |frame_addr| new: { - try self.genSetReg(.rbx, Type.usize, .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + 0, - } }); - try self.genSetReg(.rcx, Type.usize, .{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + 8, - } }); - break :new undefined; - }, - else => return self.fail("TODO implement cmpxchg for {s}", .{@tagName(exp_mcv)}), + const new_reg = if (val_abi_size > 8) new: { + const new_addr_mcv: MCValue = switch (new_mcv) { + .memory, .indirect, .load_frame => new_mcv.address(), + else => .{ .register = try self.copyToTmpRegister(Type.usize, new_mcv.address()) }, + }; + const new_addr_lock = + if (new_addr_mcv.getReg()) |reg| self.register_manager.lockReg(reg) else null; + defer if (new_addr_lock) |lock| self.register_manager.unlockReg(lock); + + try self.genSetReg(.rbx, Type.usize, new_addr_mcv.deref()); + try self.genSetReg(.rcx, Type.usize, new_addr_mcv.offset(8).deref()); + break :new null; } else try self.copyToTmpRegister(val_ty, new_mcv); - const new_lock = self.register_manager.lockRegAssumeUnused(new_reg); - defer self.register_manager.unlockReg(new_lock); + const new_lock = if (new_reg) |reg| self.register_manager.lockRegAssumeUnused(reg) else null; + defer if (new_lock) |lock| self.register_manager.unlockReg(lock); + const ptr_mcv = try self.resolveInst(extra.ptr); const ptr_size = Memory.PtrSize.fromSize(val_abi_size); const ptr_mem = switch (ptr_mcv) { - .register => |reg| Memory.sib(ptr_size, .{ .base = .{ .reg = reg } }), - .lea_frame => |frame_addr| Memory.sib(ptr_size, .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, + .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(ptr_size), + else => Memory.sib(ptr_size, .{ + .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, }), - else => Memory.sib(ptr_size, .{ .base = .{ - .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv), - } }), }; - const mem_lock = switch (ptr_mem.base()) { + switch (ptr_mem) { + .sib, .rip => {}, + .moffs => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}), + } + const ptr_lock = switch (ptr_mem.base()) { .none, .frame => null, .reg => |reg| self.register_manager.lockReg(reg), }; - defer if (mem_lock) |lock| self.register_manager.unlockReg(lock); + defer if (ptr_lock) |lock| self.register_manager.unlockReg(lock); try self.spillEflagsIfOccupied(); - if (val_abi_size <= 8) { - _ = try self.addInst(.{ .tag = .cmpxchg, .ops = .lock_mr_sib, .data = .{ .rx = .{ - .r = registerAlias(new_reg, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } } }); - } else { - _ = try self.addInst(.{ .tag = .cmpxchgb, .ops = .lock_m_sib, .data = .{ - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }); - } + if (val_abi_size <= 8) try self.asmMemoryRegister( + .{ .@"lock _", .cmpxchg }, + ptr_mem, + registerAlias(new_reg.?, val_abi_size), + ) else try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem); const result: MCValue = result: { if (self.liveness.isUnused(inst)) break :result .unreach; @@ -7733,24 +10430,9 @@ fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { } const dst_mcv = try self.allocRegOrMem(inst, false); - try self.genSetMem( - .{ .frame = dst_mcv.load_frame.index }, - dst_mcv.load_frame.off + 16, - Type.bool, - .{ .eflags = .ne }, - ); - try self.genSetMem( - .{ .frame = dst_mcv.load_frame.index }, - dst_mcv.load_frame.off + 8, - Type.usize, - .{ .register = .rdx }, - ); - try self.genSetMem( - .{ .frame = dst_mcv.load_frame.index }, - dst_mcv.load_frame.off + 0, - Type.usize, - .{ .register = .rax }, - ); + try self.genCopy(Type.usize, dst_mcv, .{ .register = .rax }); + try self.genCopy(Type.usize, dst_mcv.address().offset(8).deref(), .{ .register = .rdx }); + try self.genCopy(Type.bool, dst_mcv.address().offset(16).deref(), .{ .eflags = .ne }); break :result dst_mcv; }; return self.finishAir(inst, result, .{ extra.ptr, extra.expected_value, extra.new_value }); @@ -7766,6 +10448,7 @@ fn atomicOp( rmw_op: ?std.builtin.AtomicRmwOp, order: std.builtin.AtomicOrder, ) InnerError!MCValue { + const mod = self.bin_file.options.module.?; const ptr_lock = switch (ptr_mcv) { .register => |reg| self.register_manager.lockReg(reg), else => null, @@ -7778,18 +10461,18 @@ fn atomicOp( }; defer if (val_lock) |lock| self.register_manager.unlockReg(lock); - const val_abi_size = @intCast(u32, val_ty.abiSize(self.target.*)); + const val_abi_size = @intCast(u32, val_ty.abiSize(mod)); const ptr_size = Memory.PtrSize.fromSize(val_abi_size); const ptr_mem = switch (ptr_mcv) { - .register => |reg| Memory.sib(ptr_size, .{ .base = .{ .reg = reg } }), - .lea_frame => |frame_addr| Memory.sib(ptr_size, .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, + .immediate, .register, .register_offset, .lea_frame => ptr_mcv.deref().mem(ptr_size), + else => Memory.sib(ptr_size, .{ + .base = .{ .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv) }, }), - else => Memory.sib(ptr_size, .{ .base = .{ - .reg = try self.copyToTmpRegister(ptr_ty, ptr_mcv), - } }), }; + switch (ptr_mem) { + .sib, .rip => {}, + .moffs => return self.fail("TODO airCmpxchg with {s}", .{@tagName(ptr_mcv)}), + } const mem_lock = switch (ptr_mem.base()) { .none, .frame => null, .reg => |reg| self.register_manager.lockReg(reg), @@ -7835,16 +10518,17 @@ fn atomicOp( try self.genSetReg(dst_reg, val_ty, val_mcv); if (rmw_op == std.builtin.AtomicRmwOp.Sub and tag == .xadd) { - try self.genUnOpMir(.neg, val_ty, dst_mcv); + try self.genUnOpMir(.{ ._, .neg }, val_ty, dst_mcv); } - _ = try self.addInst(.{ .tag = tag, .ops = switch (tag) { - .mov, .xchg => .mr_sib, - .xadd, .add, .sub, .@"and", .@"or", .xor => .lock_mr_sib, - else => unreachable, - }, .data = .{ .rx = .{ - .r = registerAlias(dst_reg, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } } }); + try self.asmMemoryRegister( + switch (tag) { + .mov, .xchg => .{ ._, tag }, + .xadd, .add, .sub, .@"and", .@"or", .xor => .{ .@"lock _", tag }, + else => unreachable, + }, + ptr_mem, + registerAlias(dst_reg, val_abi_size), + ); return if (unused) .unreach else dst_mcv; }, @@ -7854,25 +10538,25 @@ fn atomicOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterMemory(.mov, registerAlias(.rax, val_abi_size), ptr_mem); + try self.asmRegisterMemory(.{ ._, .mov }, registerAlias(.rax, val_abi_size), ptr_mem); const loop = @intCast(u32, self.mir_instructions.len); if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { try self.genSetReg(tmp_reg, val_ty, .{ .register = .rax }); } if (rmw_op) |op| switch (op) { .Xchg => try self.genSetReg(tmp_reg, val_ty, val_mcv), - .Add => try self.genBinOpMir(.add, val_ty, tmp_mcv, val_mcv), - .Sub => try self.genBinOpMir(.sub, val_ty, tmp_mcv, val_mcv), - .And => try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv), + .Add => try self.genBinOpMir(.{ ._, .add }, val_ty, tmp_mcv, val_mcv), + .Sub => try self.genBinOpMir(.{ ._, .sub }, val_ty, tmp_mcv, val_mcv), + .And => try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv), .Nand => { - try self.genBinOpMir(.@"and", val_ty, tmp_mcv, val_mcv); - try self.genUnOpMir(.not, val_ty, tmp_mcv); + try self.genBinOpMir(.{ ._, .@"and" }, val_ty, tmp_mcv, val_mcv); + try self.genUnOpMir(.{ ._, .not }, val_ty, tmp_mcv); }, - .Or => try self.genBinOpMir(.@"or", val_ty, tmp_mcv, val_mcv), - .Xor => try self.genBinOpMir(.xor, val_ty, tmp_mcv, val_mcv), + .Or => try self.genBinOpMir(.{ ._, .@"or" }, val_ty, tmp_mcv, val_mcv), + .Xor => try self.genBinOpMir(.{ ._, .xor }, val_ty, tmp_mcv, val_mcv), .Min, .Max => { - const cc: Condition = switch (if (val_ty.isAbiInt()) - val_ty.intInfo(self.target.*).signedness + const cc: Condition = switch (if (val_ty.isAbiInt(mod)) + val_ty.intInfo(mod).signedness else .unsigned) { .unsigned => switch (op) { @@ -7887,7 +10571,7 @@ fn atomicOp( }, }; - try self.genBinOpMir(.cmp, val_ty, tmp_mcv, val_mcv); + try self.genBinOpMir(.{ ._, .cmp }, val_ty, tmp_mcv, val_mcv); const cmov_abi_size = @max(val_abi_size, 2); switch (val_mcv) { .register => |val_reg| try self.asmCmovccRegisterRegister( @@ -7895,12 +10579,9 @@ fn atomicOp( registerAlias(val_reg, cmov_abi_size), cc, ), - .load_frame => |frame_addr| try self.asmCmovccRegisterMemory( + .memory, .indirect, .load_frame => try self.asmCmovccRegisterMemory( registerAlias(tmp_reg, cmov_abi_size), - Memory.sib(Memory.PtrSize.fromSize(cmov_abi_size), .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }), + val_mcv.mem(Memory.PtrSize.fromSize(cmov_abi_size)), cc, ), else => { @@ -7914,90 +10595,79 @@ fn atomicOp( } }, }; - _ = try self.addInst(.{ .tag = .cmpxchg, .ops = .lock_mr_sib, .data = .{ .rx = .{ - .r = registerAlias(tmp_reg, val_abi_size), - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } } }); + try self.asmMemoryRegister( + .{ .@"lock _", .cmpxchg }, + ptr_mem, + registerAlias(tmp_reg, val_abi_size), + ); _ = try self.asmJccReloc(loop, .ne); return if (unused) .unreach else .{ .register = .rax }; } else { - try self.asmRegisterMemory(.mov, .rax, Memory.sib(.qword, .{ + try self.asmRegisterMemory(.{ ._, .mov }, .rax, Memory.sib(.qword, .{ .base = ptr_mem.sib.base, - .scale_index = ptr_mem.sib.scale_index, + .scale_index = ptr_mem.scaleIndex(), .disp = ptr_mem.sib.disp + 0, })); - try self.asmRegisterMemory(.mov, .rdx, Memory.sib(.qword, .{ + try self.asmRegisterMemory(.{ ._, .mov }, .rdx, Memory.sib(.qword, .{ .base = ptr_mem.sib.base, - .scale_index = ptr_mem.sib.scale_index, + .scale_index = ptr_mem.scaleIndex(), .disp = ptr_mem.sib.disp + 8, })); const loop = @intCast(u32, self.mir_instructions.len); - switch (val_mcv) { - .load_frame => |frame_addr| { - const val_lo_mem = Memory.sib(.qword, .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off + 0, - }); - const val_hi_mem = Memory.sib(.qword, .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off + 8, - }); - - if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { - try self.asmRegisterRegister(.mov, .rbx, .rax); - try self.asmRegisterRegister(.mov, .rcx, .rdx); - } - if (rmw_op) |op| switch (op) { - .Xchg => { - try self.asmRegisterMemory(.mov, .rbx, val_lo_mem); - try self.asmRegisterMemory(.mov, .rcx, val_hi_mem); - }, - .Add => { - try self.asmRegisterMemory(.add, .rbx, val_lo_mem); - try self.asmRegisterMemory(.adc, .rcx, val_hi_mem); - }, - .Sub => { - try self.asmRegisterMemory(.sub, .rbx, val_lo_mem); - try self.asmRegisterMemory(.sbb, .rcx, val_hi_mem); - }, - .And => { - try self.asmRegisterMemory(.@"and", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"and", .rcx, val_hi_mem); - }, - .Nand => { - try self.asmRegisterMemory(.@"and", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"and", .rcx, val_hi_mem); - try self.asmRegister(.not, .rbx); - try self.asmRegister(.not, .rcx); - }, - .Or => { - try self.asmRegisterMemory(.@"or", .rbx, val_lo_mem); - try self.asmRegisterMemory(.@"or", .rcx, val_hi_mem); - }, - .Xor => { - try self.asmRegisterMemory(.xor, .rbx, val_lo_mem); - try self.asmRegisterMemory(.xor, .rcx, val_hi_mem); - }, - else => return self.fail( - "TODO implement x86 atomic loop for large abi {s}", - .{@tagName(op)}, - ), - }; - }, - else => return self.fail( - "TODO implement x86 atomic loop for large abi {s}", - .{@tagName(val_mcv)}, - ), + const val_mem_mcv: MCValue = switch (val_mcv) { + .memory, .indirect, .load_frame => val_mcv, + else => .{ .indirect = .{ + .reg = try self.copyToTmpRegister(Type.usize, val_mcv.address()), + } }, + }; + const val_lo_mem = val_mem_mcv.mem(.qword); + const val_hi_mem = val_mem_mcv.address().offset(8).deref().mem(.qword); + if (rmw_op != std.builtin.AtomicRmwOp.Xchg) { + try self.asmRegisterRegister(.{ ._, .mov }, .rbx, .rax); + try self.asmRegisterRegister(.{ ._, .mov }, .rcx, .rdx); } - _ = try self.addInst(.{ .tag = .cmpxchgb, .ops = .lock_m_sib, .data = .{ - .payload = try self.addExtra(Mir.MemorySib.encode(ptr_mem)), - } }); + if (rmw_op) |op| switch (op) { + .Xchg => { + try self.asmRegisterMemory(.{ ._, .mov }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .mov }, .rcx, val_hi_mem); + }, + .Add => { + try self.asmRegisterMemory(.{ ._, .add }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .adc }, .rcx, val_hi_mem); + }, + .Sub => { + try self.asmRegisterMemory(.{ ._, .sub }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .sbb }, .rcx, val_hi_mem); + }, + .And => { + try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem); + }, + .Nand => { + try self.asmRegisterMemory(.{ ._, .@"and" }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .@"and" }, .rcx, val_hi_mem); + try self.asmRegister(.{ ._, .not }, .rbx); + try self.asmRegister(.{ ._, .not }, .rcx); + }, + .Or => { + try self.asmRegisterMemory(.{ ._, .@"or" }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .@"or" }, .rcx, val_hi_mem); + }, + .Xor => { + try self.asmRegisterMemory(.{ ._, .xor }, .rbx, val_lo_mem); + try self.asmRegisterMemory(.{ ._, .xor }, .rcx, val_hi_mem); + }, + else => return self.fail("TODO implement x86 atomic loop for {} {s}", .{ + val_ty.fmt(self.bin_file.options.module.?), @tagName(op), + }), + }; + try self.asmMemory(.{ .@"lock _16b", .cmpxchg }, ptr_mem); _ = try self.asmJccReloc(loop, .ne); if (unused) return .unreach; const dst_mcv = try self.allocTempRegOrMem(val_ty, false); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 0, @@ -8005,7 +10675,7 @@ fn atomicOp( .rax, ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 8, @@ -8028,10 +10698,10 @@ fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void { const unused = self.liveness.isUnused(inst); - const ptr_ty = self.air.typeOf(pl_op.operand); + const ptr_ty = self.typeOf(pl_op.operand); const ptr_mcv = try self.resolveInst(pl_op.operand); - const val_ty = self.air.typeOf(extra.operand); + const val_ty = self.typeOf(extra.operand); const val_mcv = try self.resolveInst(extra.operand); const result = @@ -8042,7 +10712,7 @@ fn airAtomicRmw(self: *Self, inst: Air.Inst.Index) !void { fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void { const atomic_load = self.air.instructions.items(.data)[inst].atomic_load; - const ptr_ty = self.air.typeOf(atomic_load.ptr); + const ptr_ty = self.typeOf(atomic_load.ptr); const ptr_mcv = try self.resolveInst(atomic_load.ptr); const ptr_lock = switch (ptr_mcv) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), @@ -8063,10 +10733,10 @@ fn airAtomicLoad(self: *Self, inst: Air.Inst.Index) !void { fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOrder) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; - const ptr_ty = self.air.typeOf(bin_op.lhs); + const ptr_ty = self.typeOf(bin_op.lhs); const ptr_mcv = try self.resolveInst(bin_op.lhs); - const val_ty = self.air.typeOf(bin_op.rhs); + const val_ty = self.typeOf(bin_op.rhs); const val_mcv = try self.resolveInst(bin_op.rhs); const result = try self.atomicOp(ptr_mcv, val_mcv, ptr_ty, val_ty, true, null, order); @@ -8074,6 +10744,7 @@ fn airAtomicStore(self: *Self, inst: Air.Inst.Index, order: std.builtin.AtomicOr } fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { + const mod = self.bin_file.options.module.?; if (safety) { // TODO if the value is undef, write 0xaa bytes to dest } else { @@ -8083,7 +10754,7 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { const bin_op = self.air.instructions.items(.data)[inst].bin_op; const dst_ptr = try self.resolveInst(bin_op.lhs); - const dst_ptr_ty = self.air.typeOf(bin_op.lhs); + const dst_ptr_ty = self.typeOf(bin_op.lhs); const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), else => null, @@ -8091,26 +10762,26 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { defer if (dst_ptr_lock) |lock| self.register_manager.unlockReg(lock); const src_val = try self.resolveInst(bin_op.rhs); - const elem_ty = self.air.typeOf(bin_op.rhs); + const elem_ty = self.typeOf(bin_op.rhs); const src_val_lock: ?RegisterLock = switch (src_val) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), else => null, }; defer if (src_val_lock) |lock| self.register_manager.unlockReg(lock); - const elem_abi_size = @intCast(u31, elem_ty.abiSize(self.target.*)); + const elem_abi_size = @intCast(u31, elem_ty.abiSize(mod)); if (elem_abi_size == 1) { - const ptr: MCValue = switch (dst_ptr_ty.ptrSize()) { + const ptr: MCValue = switch (dst_ptr_ty.ptrSize(mod)) { // TODO: this only handles slices stored in the stack .Slice => dst_ptr, .One => dst_ptr, .C, .Many => unreachable, }; - const len: MCValue = switch (dst_ptr_ty.ptrSize()) { + const len: MCValue = switch (dst_ptr_ty.ptrSize(mod)) { // TODO: this only handles slices stored in the stack .Slice => dst_ptr.address().offset(8).deref(), - .One => .{ .immediate = dst_ptr_ty.childType().arrayLen() }, + .One => .{ .immediate = dst_ptr_ty.childType(mod).arrayLen(mod) }, .C, .Many => unreachable, }; const len_lock: ?RegisterLock = switch (len) { @@ -8126,10 +10797,9 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { // Store the first element, and then rely on memcpy copying forwards. // Length zero requires a runtime check - so we handle arrays specially // here to elide it. - switch (dst_ptr_ty.ptrSize()) { + switch (dst_ptr_ty.ptrSize(mod)) { .Slice => { - var buf: Type.SlicePtrFieldTypeBuffer = undefined; - const slice_ptr_ty = dst_ptr_ty.slicePtrFieldType(&buf); + const slice_ptr_ty = dst_ptr_ty.slicePtrFieldType(mod); // TODO: this only handles slices stored in the stack const ptr = dst_ptr; @@ -8157,20 +10827,21 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { .off = elem_abi_size, } }); - try self.genBinOpMir(.sub, Type.usize, len_mcv, .{ .immediate = 1 }); - try self.asmRegisterRegisterImmediate(.imul, len_reg, len_reg, Immediate.u(elem_abi_size)); + try self.genBinOpMir(.{ ._, .sub }, Type.usize, len_mcv, .{ .immediate = 1 }); + try self.asmRegisterRegisterImmediate( + .{ .i_, .mul }, + len_reg, + len_reg, + Immediate.u(elem_abi_size), + ); try self.genInlineMemcpy(second_elem_ptr_mcv, ptr, len_mcv); try self.performReloc(skip_reloc); }, .One => { - var elem_ptr_pl = Type.Payload.ElemType{ - .base = .{ .tag = .single_mut_pointer }, - .data = elem_ty, - }; - const elem_ptr_ty = Type.initPayload(&elem_ptr_pl.base); + const elem_ptr_ty = try mod.singleMutPtrType(elem_ty); - const len = dst_ptr_ty.childType().arrayLen(); + const len = dst_ptr_ty.childType(mod).arrayLen(mod); assert(len != 0); // prevented by Sema try self.store(elem_ptr_ty, dst_ptr, src_val); @@ -8195,10 +10866,11 @@ fn airMemset(self: *Self, inst: Air.Inst.Index, safety: bool) !void { } fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const bin_op = self.air.instructions.items(.data)[inst].bin_op; const dst_ptr = try self.resolveInst(bin_op.lhs); - const dst_ptr_ty = self.air.typeOf(bin_op.lhs); + const dst_ptr_ty = self.typeOf(bin_op.lhs); const dst_ptr_lock: ?RegisterLock = switch (dst_ptr) { .register => |reg| self.register_manager.lockRegAssumeUnused(reg), else => null, @@ -8212,9 +10884,9 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { }; defer if (src_ptr_lock) |lock| self.register_manager.unlockReg(lock); - const len: MCValue = switch (dst_ptr_ty.ptrSize()) { + const len: MCValue = switch (dst_ptr_ty.ptrSize(mod)) { .Slice => dst_ptr.address().offset(8).deref(), - .One => .{ .immediate = dst_ptr_ty.childType().arrayLen() }, + .One => .{ .immediate = dst_ptr_ty.childType(mod).arrayLen(mod) }, .C, .Many => unreachable, }; const len_lock: ?RegisterLock = switch (len) { @@ -8230,17 +10902,51 @@ fn airMemcpy(self: *Self, inst: Air.Inst.Index) !void { } fn airTagName(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; + const inst_ty = self.typeOfIndex(inst); + const enum_ty = self.typeOf(un_op); + + // We need a properly aligned and sized call frame to be able to call this function. + { + const needed_call_frame = FrameAlloc.init(.{ + .size = inst_ty.abiSize(mod), + .alignment = inst_ty.abiAlignment(mod), + }); + const frame_allocs_slice = self.frame_allocs.slice(); + const stack_frame_size = + &frame_allocs_slice.items(.abi_size)[@enumToInt(FrameIndex.call_frame)]; + stack_frame_size.* = @max(stack_frame_size.*, needed_call_frame.abi_size); + const stack_frame_align = + &frame_allocs_slice.items(.abi_align)[@enumToInt(FrameIndex.call_frame)]; + stack_frame_align.* = @max(stack_frame_align.*, needed_call_frame.abi_align); + } + + try self.spillEflagsIfOccupied(); + try self.spillRegisters(abi.getCallerPreservedRegs(self.target.*)); + + const param_regs = abi.getCAbiIntParamRegs(self.target.*); + + const dst_mcv = try self.allocRegOrMem(inst, false); + try self.genSetReg(param_regs[0], Type.usize, dst_mcv.address()); + const operand = try self.resolveInst(un_op); - _ = operand; - return self.fail("TODO implement airTagName for x86_64", .{}); - //return self.finishAir(inst, result, .{ un_op, .none, .none }); + try self.genSetReg(param_regs[1], enum_ty, operand); + + try self.genLazySymbolRef( + .call, + .rax, + link.File.LazySymbol.initDecl(.code, enum_ty.getOwnerDecl(mod), mod), + ); + + return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); } fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const un_op = self.air.instructions.items(.data)[inst].un_op; - const err_ty = self.air.typeOf(un_op); + const err_ty = self.typeOf(un_op); const err_mcv = try self.resolveInst(un_op); const err_reg = try self.copyToTmpRegister(err_ty, err_mcv); const err_lock = self.register_manager.lockRegAssumeUnused(err_reg); @@ -8249,37 +10955,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { const addr_reg = try self.register_manager.allocReg(null, gp); const addr_lock = self.register_manager.lockRegAssumeUnused(addr_reg); defer self.register_manager.unlockReg(addr_lock); - - if (self.bin_file.cast(link.File.Elf)) |elf_file| { - const atom_index = try elf_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const atom = elf_file.getAtom(atom_index); - _ = try atom.getOrCreateOffsetTableEntry(elf_file); - const got_addr = atom.getOffsetTableAddress(elf_file); - try self.asmRegisterMemory( - .mov, - addr_reg.to64(), - Memory.sib(.qword, .{ .base = .{ .reg = .ds }, .disp = @intCast(i32, got_addr) }), - ); - } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { - const atom_index = try coff_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const sym_index = coff_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); - } else if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const atom_index = try macho_file.getOrCreateAtomForLazySymbol( - .{ .kind = .const_data, .ty = Type.anyerror }, - 4, // dword alignment - ); - const sym_index = macho_file.getAtom(atom_index).getSymbolIndex().?; - try self.genSetReg(addr_reg, Type.usize, .{ .lea_got = sym_index }); - } else { - return self.fail("TODO implement airErrorName for x86_64 {s}", .{@tagName(self.bin_file.tag)}); - } + try self.genLazySymbolRef(.lea, addr_reg, link.File.LazySymbol.initDecl(.const_data, null, mod)); const start_reg = try self.register_manager.allocReg(null, gp); const start_lock = self.register_manager.lockRegAssumeUnused(start_reg); @@ -8292,7 +10968,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { try self.truncateRegister(err_ty, err_reg.to32()); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, start_reg.to32(), Memory.sib(.dword, .{ .base = .{ .reg = addr_reg.to64() }, @@ -8301,7 +10977,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterMemory( - .mov, + .{ ._, .mov }, end_reg.to32(), Memory.sib(.dword, .{ .base = .{ .reg = addr_reg.to64() }, @@ -8309,9 +10985,9 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { .disp = 8, }), ); - try self.asmRegisterRegister(.sub, end_reg.to32(), start_reg.to32()); + try self.asmRegisterRegister(.{ ._, .sub }, end_reg.to32(), start_reg.to32()); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, start_reg.to64(), Memory.sib(.byte, .{ .base = .{ .reg = addr_reg.to64() }, @@ -8320,7 +10996,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { }), ); try self.asmRegisterMemory( - .lea, + .{ ._, .lea }, end_reg.to32(), Memory.sib(.byte, .{ .base = .{ .reg = end_reg.to64() }, @@ -8330,7 +11006,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { const dst_mcv = try self.allocRegOrMem(inst, false); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off, @@ -8338,7 +11014,7 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { start_reg.to64(), ); try self.asmMemoryRegister( - .mov, + .{ ._, .mov }, Memory.sib(.qword, .{ .base = .{ .frame = dst_mcv.load_frame.index }, .disp = dst_mcv.load_frame.off + 8, @@ -8350,10 +11026,202 @@ fn airErrorName(self: *Self, inst: Air.Inst.Index) !void { } fn airSplat(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airSplat for x86_64", .{}); - //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const vector_ty = self.typeOfIndex(inst); + const dst_rc = regClassForType(vector_ty, mod); + const scalar_ty = vector_ty.scalarType(mod); + + const src_mcv = try self.resolveInst(ty_op.operand); + const result: MCValue = result: { + switch (scalar_ty.zigTypeTag(mod)) { + else => {}, + .Float => switch (scalar_ty.floatBits(self.target.*)) { + 32 => switch (vector_ty.vectorLen(mod)) { + 1 => { + if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + try self.genSetReg(dst_reg, scalar_ty, src_mcv); + break :result .{ .register = dst_reg }; + }, + 2...4 => { + if (self.hasFeature(.avx)) { + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_ss, .broadcast }, + dst_reg.to128(), + src_mcv.mem(.dword), + ) else { + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv); + try self.asmRegisterRegisterRegisterImmediate( + .{ .v_ps, .shuf }, + dst_reg.to128(), + src_reg.to128(), + src_reg.to128(), + Immediate.u(0), + ); + } + break :result .{ .register = dst_reg }; + } else { + const dst_mcv = if (src_mcv.isRegister() and + self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, scalar_ty, src_mcv); + const dst_reg = dst_mcv.getReg().?; + try self.asmRegisterRegisterImmediate( + .{ ._ps, .shuf }, + dst_reg.to128(), + dst_reg.to128(), + Immediate.u(0), + ); + break :result dst_mcv; + } + }, + 5...8 => if (self.hasFeature(.avx)) { + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_ss, .broadcast }, + dst_reg.to256(), + src_mcv.mem(.dword), + ) else { + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv); + if (self.hasFeature(.avx2)) try self.asmRegisterRegister( + .{ .v_ss, .broadcast }, + dst_reg.to256(), + src_reg.to128(), + ) else { + try self.asmRegisterRegisterRegisterImmediate( + .{ .v_ps, .shuf }, + dst_reg.to128(), + src_reg.to128(), + src_reg.to128(), + Immediate.u(0), + ); + try self.asmRegisterRegisterRegisterImmediate( + .{ .v_f128, .insert }, + dst_reg.to256(), + dst_reg.to256(), + dst_reg.to128(), + Immediate.u(1), + ); + } + } + break :result .{ .register = dst_reg }; + }, + else => {}, + }, + 64 => switch (vector_ty.vectorLen(mod)) { + 1 => { + if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + try self.genSetReg(dst_reg, scalar_ty, src_mcv); + break :result .{ .register = dst_reg }; + }, + 2 => { + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + if (self.hasFeature(.sse3)) { + if (src_mcv.isMemory()) try self.asmRegisterMemory( + if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, + dst_reg.to128(), + src_mcv.mem(.qword), + ) else try self.asmRegisterRegister( + if (self.hasFeature(.avx)) .{ .v_, .movddup } else .{ ._, .movddup }, + dst_reg.to128(), + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(), + ); + break :result .{ .register = dst_reg }; + } else try self.asmRegisterRegister( + .{ ._ps, .movlh }, + dst_reg.to128(), + (if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv)).to128(), + ); + }, + 3...4 => if (self.hasFeature(.avx)) { + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_sd, .broadcast }, + dst_reg.to256(), + src_mcv.mem(.qword), + ) else { + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv); + if (self.hasFeature(.avx2)) try self.asmRegisterRegister( + .{ .v_sd, .broadcast }, + dst_reg.to256(), + src_reg.to128(), + ) else { + try self.asmRegisterRegister( + .{ .v_, .movddup }, + dst_reg.to128(), + src_reg.to128(), + ); + try self.asmRegisterRegisterRegisterImmediate( + .{ .v_f128, .insert }, + dst_reg.to256(), + dst_reg.to256(), + dst_reg.to128(), + Immediate.u(1), + ); + } + } + break :result .{ .register = dst_reg }; + }, + else => {}, + }, + 128 => switch (vector_ty.vectorLen(mod)) { + 1 => { + if (self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) break :result src_mcv; + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + try self.genSetReg(dst_reg, scalar_ty, src_mcv); + break :result .{ .register = dst_reg }; + }, + 2 => if (self.hasFeature(.avx)) { + const dst_reg = try self.register_manager.allocReg(inst, dst_rc); + if (src_mcv.isMemory()) try self.asmRegisterMemory( + .{ .v_f128, .broadcast }, + dst_reg.to256(), + src_mcv.mem(.xword), + ) else { + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(scalar_ty, src_mcv); + try self.asmRegisterRegisterRegisterImmediate( + .{ .v_f128, .insert }, + dst_reg.to256(), + src_reg.to256(), + src_reg.to128(), + Immediate.u(1), + ); + } + break :result .{ .register = dst_reg }; + }, + else => {}, + }, + 16, 80 => {}, + else => unreachable, + }, + } + return self.fail("TODO implement airSplat for {}", .{ + vector_ty.fmt(self.bin_file.options.module.?), + }); + }; + return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } fn airSelect(self: *Self, inst: Air.Inst.Index) !void { @@ -8365,8 +11233,8 @@ fn airSelect(self: *Self, inst: Air.Inst.Index) !void { } fn airShuffle(self: *Self, inst: Air.Inst.Index) !void { - const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + _ = ty_pl; return self.fail("TODO implement airShuffle for x86_64", .{}); //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); } @@ -8379,36 +11247,37 @@ fn airReduce(self: *Self, inst: Air.Inst.Index) !void { } fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { - const result_ty = self.air.typeOfIndex(inst); - const len = @intCast(usize, result_ty.arrayLen()); + const mod = self.bin_file.options.module.?; + const result_ty = self.typeOfIndex(inst); + const len = @intCast(usize, result_ty.arrayLen(mod)); const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const elements = @ptrCast([]const Air.Inst.Ref, self.air.extra[ty_pl.payload..][0..len]); const result: MCValue = result: { - switch (result_ty.zigTypeTag()) { + switch (result_ty.zigTypeTag(mod)) { .Struct => { const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(result_ty, self.target.*)); - if (result_ty.containerLayout() == .Packed) { - const struct_obj = result_ty.castTag(.@"struct").?.data; + try self.allocFrameIndex(FrameAlloc.initType(result_ty, mod)); + if (result_ty.containerLayout(mod) == .Packed) { + const struct_obj = mod.typeToStruct(result_ty).?; try self.genInlineMemset( .{ .lea_frame = .{ .index = frame_index } }, .{ .immediate = 0 }, - .{ .immediate = result_ty.abiSize(self.target.*) }, + .{ .immediate = result_ty.abiSize(mod) }, ); for (elements, 0..) |elem, elem_i| { - if (result_ty.structFieldValueComptime(elem_i) != null) continue; + if ((try result_ty.structFieldValueComptime(mod, elem_i)) != null) continue; - const elem_ty = result_ty.structFieldType(elem_i); - const elem_bit_size = @intCast(u32, elem_ty.bitSize(self.target.*)); + const elem_ty = result_ty.structFieldType(elem_i, mod); + const elem_bit_size = @intCast(u32, elem_ty.bitSize(mod)); if (elem_bit_size > 64) { return self.fail( "TODO airAggregateInit implement packed structs with large fields", .{}, ); } - const elem_abi_size = @intCast(u32, elem_ty.abiSize(self.target.*)); + const elem_abi_size = @intCast(u32, elem_ty.abiSize(mod)); const elem_abi_bits = elem_abi_size * 8; - const elem_off = struct_obj.packedFieldBitOffset(self.target.*, elem_i); + const elem_off = struct_obj.packedFieldBitOffset(mod, elem_i); const elem_byte_off = @intCast(i32, elem_off / elem_abi_bits * elem_abi_size); const elem_bit_off = elem_off % elem_abi_bits; const elem_mcv = try self.resolveInst(elem); @@ -8434,13 +11303,13 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { try self.truncateRegister(elem_ty, elem_reg); } if (elem_bit_off > 0) try self.genShiftBinOpMir( - .shl, + .{ ._l, .sh }, elem_ty, .{ .register = elem_reg }, .{ .immediate = elem_bit_off }, ); try self.genBinOpMir( - .@"or", + .{ ._, .@"or" }, elem_ty, .{ .load_frame = .{ .index = frame_index, .off = elem_byte_off } }, .{ .register = elem_reg }, @@ -8451,13 +11320,13 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { try self.truncateRegister(elem_ty, registerAlias(reg, elem_abi_size)); } try self.genShiftBinOpMir( - .shr, + .{ ._r, .sh }, elem_ty, .{ .register = reg }, .{ .immediate = elem_abi_bits - elem_bit_off }, ); try self.genBinOpMir( - .@"or", + .{ ._, .@"or" }, elem_ty, .{ .load_frame = .{ .index = frame_index, @@ -8468,10 +11337,10 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { } } } else for (elements, 0..) |elem, elem_i| { - if (result_ty.structFieldValueComptime(elem_i) != null) continue; + if ((try result_ty.structFieldValueComptime(mod, elem_i)) != null) continue; - const elem_ty = result_ty.structFieldType(elem_i); - const elem_off = @intCast(i32, result_ty.structFieldOffset(elem_i, self.target.*)); + const elem_ty = result_ty.structFieldType(elem_i, mod); + const elem_off = @intCast(i32, result_ty.structFieldOffset(elem_i, mod)); const elem_mcv = try self.resolveInst(elem); const mat_elem_mcv = switch (elem_mcv) { .load_tlv => |sym_index| MCValue{ .lea_tlv = sym_index }, @@ -8483,9 +11352,9 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { }, .Array => { const frame_index = - try self.allocFrameIndex(FrameAlloc.initType(result_ty, self.target.*)); - const elem_ty = result_ty.childType(); - const elem_size = @intCast(u32, elem_ty.abiSize(self.target.*)); + try self.allocFrameIndex(FrameAlloc.initType(result_ty, mod)); + const elem_ty = result_ty.childType(mod); + const elem_size = @intCast(u32, elem_ty.abiSize(mod)); for (elements, 0..) |elem, elem_i| { const elem_mcv = try self.resolveInst(elem); @@ -8496,6 +11365,12 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { const elem_off = @intCast(i32, elem_size * elem_i); try self.genSetMem(.{ .frame = frame_index }, elem_off, elem_ty, mat_elem_mcv); } + if (result_ty.sentinel(mod)) |sentinel| try self.genSetMem( + .{ .frame = frame_index }, + @intCast(i32, elem_size * elements.len), + elem_ty, + try self.genTypedValue(.{ .ty = elem_ty, .val = sentinel }), + ); break :result .{ .load_frame = .{ .index = frame_index } }; }, .Vector => return self.fail("TODO implement aggregate_init for vectors", .{}), @@ -8514,11 +11389,47 @@ fn airAggregateInit(self: *Self, inst: Air.Inst.Index) !void { } fn airUnionInit(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; const extra = self.air.extraData(Air.UnionInit, ty_pl.payload).data; - _ = extra; - return self.fail("TODO implement airAggregateInit for x86_64", .{}); - //return self.finishAir(inst, result, .{ extra.init, .none, .none }); + const result: MCValue = result: { + const union_ty = self.typeOfIndex(inst); + const layout = union_ty.unionGetLayout(mod); + + const src_ty = self.typeOf(extra.init); + const src_mcv = try self.resolveInst(extra.init); + if (layout.tag_size == 0) { + if (self.reuseOperand(inst, extra.init, 0, src_mcv)) break :result src_mcv; + + const dst_mcv = try self.allocRegOrMem(inst, true); + try self.genCopy(union_ty, dst_mcv, src_mcv); + break :result dst_mcv; + } + + const dst_mcv = try self.allocRegOrMem(inst, false); + + const union_obj = mod.typeToUnion(union_ty).?; + const field_name = union_obj.fields.keys()[extra.field_index]; + const tag_ty = union_obj.tag_ty; + const field_index = tag_ty.enumFieldIndex(field_name, mod).?; + const tag_val = try mod.enumValueFieldIndex(tag_ty, field_index); + const tag_int_val = try tag_val.enumToInt(tag_ty, mod); + const tag_int = tag_int_val.toUnsignedInt(mod); + const tag_off = if (layout.tag_align < layout.payload_align) + @intCast(i32, layout.payload_size) + else + 0; + try self.genCopy(tag_ty, dst_mcv.address().offset(tag_off).deref(), .{ .immediate = tag_int }); + + const pl_off = if (layout.tag_align < layout.payload_align) + 0 + else + @intCast(i32, layout.tag_size); + try self.genCopy(src_ty, dst_mcv.address().offset(pl_off).deref(), src_mcv); + + break :result dst_mcv; + }; + return self.finishAir(inst, result, .{ extra.init, .none, .none }); } fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void { @@ -8527,30 +11438,172 @@ fn airPrefetch(self: *Self, inst: Air.Inst.Index) !void { } fn airMulAdd(self: *Self, inst: Air.Inst.Index) !void { + const mod = self.bin_file.options.module.?; const pl_op = self.air.instructions.items(.data)[inst].pl_op; const extra = self.air.extraData(Air.Bin, pl_op.payload).data; - _ = extra; - return self.fail("TODO implement airMulAdd for x86_64", .{}); - //return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, pl_op.operand }); + const ty = self.typeOfIndex(inst); + + if (!self.hasFeature(.fma)) return self.fail("TODO implement airMulAdd for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }); + + const ops = [3]Air.Inst.Ref{ extra.lhs, extra.rhs, pl_op.operand }; + var mcvs: [3]MCValue = undefined; + var locks = [1]?RegisterManager.RegisterLock{null} ** 3; + defer for (locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock); + var order = [1]u2{0} ** 3; + var unused = std.StaticBitSet(3).initFull(); + for (ops, &mcvs, &locks, 0..) |op, *mcv, *lock, op_i| { + const op_index = @intCast(u2, op_i); + mcv.* = try self.resolveInst(op); + if (unused.isSet(0) and mcv.isRegister() and self.reuseOperand(inst, op, op_index, mcv.*)) { + order[op_index] = 1; + unused.unset(0); + } else if (unused.isSet(2) and mcv.isMemory()) { + order[op_index] = 3; + unused.unset(2); + } + switch (mcv.*) { + .register => |reg| lock.* = self.register_manager.lockReg(reg), + else => {}, + } + } + for (&order, &mcvs, &locks) |*mop_index, *mcv, *lock| { + if (mop_index.* != 0) continue; + mop_index.* = 1 + @intCast(u2, unused.toggleFirstSet().?); + if (mop_index.* > 1 and mcv.isRegister()) continue; + const reg = try self.copyToTmpRegister(ty, mcv.*); + mcv.* = .{ .register = reg }; + if (lock.*) |old_lock| self.register_manager.unlockReg(old_lock); + lock.* = self.register_manager.lockRegAssumeUnused(reg); + } + + const mir_tag = if (@as( + ?Mir.Inst.FixedTag, + if (mem.eql(u2, &order, &.{ 1, 3, 2 }) or mem.eql(u2, &order, &.{ 3, 1, 2 })) + switch (ty.zigTypeTag(mod)) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .{ .v_ss, .fmadd132 }, + 64 => .{ .v_sd, .fmadd132 }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (ty.vectorLen(mod)) { + 1 => .{ .v_ss, .fmadd132 }, + 2...8 => .{ .v_ps, .fmadd132 }, + else => null, + }, + 64 => switch (ty.vectorLen(mod)) { + 1 => .{ .v_sd, .fmadd132 }, + 2...4 => .{ .v_pd, .fmadd132 }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + } + else if (mem.eql(u2, &order, &.{ 2, 1, 3 }) or mem.eql(u2, &order, &.{ 1, 2, 3 })) + switch (ty.zigTypeTag(mod)) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .{ .v_ss, .fmadd213 }, + 64 => .{ .v_sd, .fmadd213 }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (ty.vectorLen(mod)) { + 1 => .{ .v_ss, .fmadd213 }, + 2...8 => .{ .v_ps, .fmadd213 }, + else => null, + }, + 64 => switch (ty.vectorLen(mod)) { + 1 => .{ .v_sd, .fmadd213 }, + 2...4 => .{ .v_pd, .fmadd213 }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + } + else if (mem.eql(u2, &order, &.{ 2, 3, 1 }) or mem.eql(u2, &order, &.{ 3, 2, 1 })) + switch (ty.zigTypeTag(mod)) { + .Float => switch (ty.floatBits(self.target.*)) { + 32 => .{ .v_ss, .fmadd231 }, + 64 => .{ .v_sd, .fmadd231 }, + 16, 80, 128 => null, + else => unreachable, + }, + .Vector => switch (ty.childType(mod).zigTypeTag(mod)) { + .Float => switch (ty.childType(mod).floatBits(self.target.*)) { + 32 => switch (ty.vectorLen(mod)) { + 1 => .{ .v_ss, .fmadd231 }, + 2...8 => .{ .v_ps, .fmadd231 }, + else => null, + }, + 64 => switch (ty.vectorLen(mod)) { + 1 => .{ .v_sd, .fmadd231 }, + 2...4 => .{ .v_pd, .fmadd231 }, + else => null, + }, + 16, 80, 128 => null, + else => unreachable, + }, + else => unreachable, + }, + else => unreachable, + } + else + unreachable, + )) |tag| tag else return self.fail("TODO implement airMulAdd for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }); + + var mops: [3]MCValue = undefined; + for (order, mcvs) |mop_index, mcv| mops[mop_index - 1] = mcv; + + const abi_size = @intCast(u32, ty.abiSize(mod)); + const mop1_reg = registerAlias(mops[0].getReg().?, abi_size); + const mop2_reg = registerAlias(mops[1].getReg().?, abi_size); + if (mops[2].isRegister()) try self.asmRegisterRegisterRegister( + mir_tag, + mop1_reg, + mop2_reg, + registerAlias(mops[2].getReg().?, abi_size), + ) else try self.asmRegisterRegisterMemory( + mir_tag, + mop1_reg, + mop2_reg, + mops[2].mem(Memory.PtrSize.fromSize(abi_size)), + ); + return self.finishAir(inst, mops[0], ops); } fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { - const ty = self.air.typeOf(ref); + const mod = self.bin_file.options.module.?; + const ty = self.typeOf(ref); // If the type has no codegen bits, no need to store it. - if (!ty.hasRuntimeBitsIgnoreComptime()) return .none; + if (!ty.hasRuntimeBitsIgnoreComptime(mod)) return .none; if (Air.refToIndex(ref)) |inst| { const mcv = switch (self.air.instructions.items(.tag)[inst]) { - .constant => tracking: { + .interned => tracking: { const gop = try self.const_tracking.getOrPut(self.gpa, inst); if (!gop.found_existing) gop.value_ptr.* = InstTracking.init(try self.genTypedValue(.{ .ty = ty, - .val = self.air.value(ref).?, + .val = self.air.instructions.items(.data)[inst].interned.toValue(), })); break :tracking gop.value_ptr; }, - .const_ty => unreachable, else => self.inst_tracking.getPtr(inst).?, }.short; switch (mcv) { @@ -8559,13 +11612,12 @@ fn resolveInst(self: *Self, ref: Air.Inst.Ref) InnerError!MCValue { } } - return self.genTypedValue(.{ .ty = ty, .val = self.air.value(ref).? }); + return self.genTypedValue(.{ .ty = ty, .val = (try self.air.value(ref, mod)).? }); } fn getResolvedInstValue(self: *Self, inst: Air.Inst.Index) *InstTracking { const tracking = switch (self.air.instructions.items(.tag)[inst]) { - .constant => &self.const_tracking, - .const_ty => unreachable, + .interned => &self.const_tracking, else => &self.inst_tracking, }.getPtr(inst).?; return switch (tracking.short) { @@ -8596,12 +11648,8 @@ fn limitImmediateType(self: *Self, operand: Air.Inst.Ref, comptime T: type) !MCV } fn genTypedValue(self: *Self, arg_tv: TypedValue) InnerError!MCValue { - const mcv: MCValue = switch (try codegen.genTypedValue( - self.bin_file, - self.src_loc, - arg_tv, - self.mod_fn.owner_decl, - )) { + const mod = self.bin_file.options.module.?; + return switch (try codegen.genTypedValue(self.bin_file, self.src_loc, arg_tv, self.owner.getDecl(mod))) { .mcv => |mcv| switch (mcv) { .none => .none, .undef => .undef, @@ -8616,7 +11664,6 @@ fn genTypedValue(self: *Self, arg_tv: TypedValue) InnerError!MCValue { return error.CodegenFail; }, }; - return mcv; } const CallMCValues = struct { @@ -8634,17 +11681,23 @@ const CallMCValues = struct { /// Caller must call `CallMCValues.deinit`. fn resolveCallingConventionValues( self: *Self, - fn_ty: Type, + fn_info: InternPool.Key.FuncType, var_args: []const Air.Inst.Ref, stack_frame_base: FrameIndex, ) !CallMCValues { - const cc = fn_ty.fnCallingConvention(); - const param_len = fn_ty.fnParamLen(); - const param_types = try self.gpa.alloc(Type, param_len + var_args.len); + const mod = self.bin_file.options.module.?; + const cc = fn_info.cc; + const param_types = try self.gpa.alloc(Type, fn_info.param_types.len + var_args.len); defer self.gpa.free(param_types); - fn_ty.fnParamTypes(param_types); + + for (param_types[0..fn_info.param_types.len], fn_info.param_types) |*dest, src| { + dest.* = src.toType(); + } // TODO: promote var arg types - for (param_types[param_len..], var_args) |*param_ty, arg| param_ty.* = self.air.typeOf(arg); + for (param_types[fn_info.param_types.len..], var_args) |*param_ty, arg| { + param_ty.* = self.typeOf(arg); + } + var result: CallMCValues = .{ .args = try self.gpa.alloc(MCValue, param_types.len), // These undefined values must be populated before returning from this function. @@ -8654,7 +11707,7 @@ fn resolveCallingConventionValues( }; errdefer self.gpa.free(result.args); - const ret_ty = fn_ty.fnReturnType(); + const ret_ty = fn_info.return_type.toType(); switch (cc) { .Naked => { @@ -8664,67 +11717,97 @@ fn resolveCallingConventionValues( }, .C => { var param_reg_i: usize = 0; + var param_sse_reg_i: usize = 0; result.stack_align = 16; switch (self.target.os.tag) { .windows => { // Align the stack to 16bytes before allocating shadow stack space (if any). - result.stack_byte_count += @intCast(u31, 4 * Type.usize.abiSize(self.target.*)); + result.stack_byte_count += @intCast(u31, 4 * Type.usize.abiSize(mod)); }, else => {}, } // Return values - if (ret_ty.zigTypeTag() == .NoReturn) { + if (ret_ty.zigTypeTag(mod) == .NoReturn) { result.return_value = InstTracking.init(.unreach); - } else if (!ret_ty.hasRuntimeBitsIgnoreComptime()) { + } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod)) { // TODO: is this even possible for C calling convention? result.return_value = InstTracking.init(.none); } else { - const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0]; - const ret_ty_size = @intCast(u31, ret_ty.abiSize(self.target.*)); - if (ret_ty_size <= 8) { - const aliased_reg = registerAlias(ret_reg, ret_ty_size); - result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none }; - } else { - const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i]; - param_reg_i += 1; - result.return_value = .{ - .short = .{ .indirect = .{ .reg = ret_reg } }, - .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, - }; + const classes = switch (self.target.os.tag) { + .windows => &[1]abi.Class{abi.classifyWindows(ret_ty, mod)}, + else => mem.sliceTo(&abi.classifySystemV(ret_ty, mod, .ret), .none), + }; + if (classes.len > 1) { + return self.fail("TODO handle multiple classes per type", .{}); } + const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0]; + result.return_value = switch (classes[0]) { + .integer => InstTracking.init(.{ .register = registerAlias( + ret_reg, + @intCast(u32, ret_ty.abiSize(mod)), + ) }), + .float, .sse => InstTracking.init(.{ .register = .xmm0 }), + .memory => ret: { + const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i]; + param_reg_i += 1; + break :ret .{ + .short = .{ .indirect = .{ .reg = ret_reg } }, + .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, + }; + }, + else => |class| return self.fail("TODO handle calling convention class {s}", .{ + @tagName(class), + }), + }; } // Input params for (param_types, result.args) |ty, *arg| { - assert(ty.hasRuntimeBitsIgnoreComptime()); + assert(ty.hasRuntimeBitsIgnoreComptime(mod)); - const classes: []const abi.Class = switch (self.target.os.tag) { - .windows => &[1]abi.Class{abi.classifyWindows(ty, self.target.*)}, - else => mem.sliceTo(&abi.classifySystemV(ty, self.target.*, .arg), .none), + const classes = switch (self.target.os.tag) { + .windows => &[1]abi.Class{abi.classifyWindows(ty, mod)}, + else => mem.sliceTo(&abi.classifySystemV(ty, mod, .arg), .none), }; if (classes.len > 1) { return self.fail("TODO handle multiple classes per type", .{}); } switch (classes[0]) { - .integer => blk: { - if (param_reg_i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk; - const param_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i]; + .integer => if (param_reg_i < abi.getCAbiIntParamRegs(self.target.*).len) { + arg.* = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i] }; param_reg_i += 1; - arg.* = .{ .register = param_reg }; continue; }, + .float, .sse => switch (self.target.os.tag) { + .windows => if (param_reg_i < 4) { + arg.* = .{ .register = @intToEnum( + Register, + @enumToInt(Register.xmm0) + param_reg_i, + ) }; + param_reg_i += 1; + continue; + }, + else => if (param_sse_reg_i < 8) { + arg.* = .{ .register = @intToEnum( + Register, + @enumToInt(Register.xmm0) + param_sse_reg_i, + ) }; + param_sse_reg_i += 1; + continue; + }, + }, .memory => {}, // fallthrough else => |class| return self.fail("TODO handle calling convention class {s}", .{ @tagName(class), }), } - const param_size = @intCast(u31, ty.abiSize(self.target.*)); - const param_align = @intCast(u31, ty.abiAlignment(self.target.*)); + const param_size = @intCast(u31, ty.abiSize(mod)); + const param_align = @intCast(u31, ty.abiAlignment(mod)); result.stack_byte_count = - mem.alignForwardGeneric(u31, result.stack_byte_count, param_align); + mem.alignForward(u31, result.stack_byte_count, param_align); arg.* = .{ .load_frame = .{ .index = stack_frame_base, .off = result.stack_byte_count, @@ -8736,13 +11819,13 @@ fn resolveCallingConventionValues( result.stack_align = 16; // Return values - if (ret_ty.zigTypeTag() == .NoReturn) { + if (ret_ty.zigTypeTag(mod) == .NoReturn) { result.return_value = InstTracking.init(.unreach); - } else if (!ret_ty.hasRuntimeBitsIgnoreComptime()) { + } else if (!ret_ty.hasRuntimeBitsIgnoreComptime(mod)) { result.return_value = InstTracking.init(.none); } else { const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0]; - const ret_ty_size = @intCast(u31, ret_ty.abiSize(self.target.*)); + const ret_ty_size = @intCast(u31, ret_ty.abiSize(mod)); if (ret_ty_size <= 8 and !ret_ty.isRuntimeFloat()) { const aliased_reg = registerAlias(ret_reg, ret_ty_size); result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none }; @@ -8757,14 +11840,14 @@ fn resolveCallingConventionValues( // Input params for (param_types, result.args) |ty, *arg| { - if (!ty.hasRuntimeBitsIgnoreComptime()) { + if (!ty.hasRuntimeBitsIgnoreComptime(mod)) { arg.* = .none; continue; } - const param_size = @intCast(u31, ty.abiSize(self.target.*)); - const param_align = @intCast(u31, ty.abiAlignment(self.target.*)); + const param_size = @intCast(u31, ty.abiSize(mod)); + const param_align = @intCast(u31, ty.abiAlignment(mod)); result.stack_byte_count = - mem.alignForwardGeneric(u31, result.stack_byte_count, param_align); + mem.alignForward(u31, result.stack_byte_count, param_align); arg.* = .{ .load_frame = .{ .index = stack_frame_base, .off = result.stack_byte_count, @@ -8775,7 +11858,7 @@ fn resolveCallingConventionValues( else => return self.fail("TODO implement function parameters and return values for {} on x86_64", .{cc}), } - result.stack_byte_count = mem.alignForwardGeneric(u31, result.stack_byte_count, result.stack_align); + result.stack_byte_count = mem.alignForward(u31, result.stack_byte_count, result.stack_align); return result; } @@ -8825,53 +11908,84 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { reg.to64() else unreachable, - .floating_point => if (size_bytes <= 16) + .segment => if (size_bytes <= 2) + reg + else + unreachable, + .x87 => unreachable, + .mmx => if (size_bytes <= 8) + reg + else + unreachable, + .sse => if (size_bytes <= 16) reg.to128() else if (size_bytes <= 32) reg.to256() else unreachable, - .segment => unreachable, }; } /// Truncates the value in the register in place. /// Clobbers any remaining bits. fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { - const int_info = if (ty.isAbiInt()) ty.intInfo(self.target.*) else std.builtin.Type.Int{ + const mod = self.bin_file.options.module.?; + const int_info = if (ty.isAbiInt(mod)) ty.intInfo(mod) else std.builtin.Type.Int{ .signedness = .unsigned, - .bits = @intCast(u16, ty.bitSize(self.target.*)), + .bits = @intCast(u16, ty.bitSize(mod)), }; const max_reg_bit_width = Register.rax.bitSize(); switch (int_info.signedness) { .signed => { const shift = @intCast(u6, max_reg_bit_width - int_info.bits); - try self.genShiftBinOpMir(.sal, Type.isize, .{ .register = reg }, .{ .immediate = shift }); - try self.genShiftBinOpMir(.sar, Type.isize, .{ .register = reg }, .{ .immediate = shift }); + try self.genShiftBinOpMir( + .{ ._l, .sa }, + Type.isize, + .{ .register = reg }, + .{ .immediate = shift }, + ); + try self.genShiftBinOpMir( + .{ ._r, .sa }, + Type.isize, + .{ .register = reg }, + .{ .immediate = shift }, + ); }, .unsigned => { const shift = @intCast(u6, max_reg_bit_width - int_info.bits); const mask = (~@as(u64, 0)) >> shift; if (int_info.bits <= 32) { - try self.genBinOpMir(.@"and", Type.u32, .{ .register = reg }, .{ .immediate = mask }); + try self.genBinOpMir( + .{ ._, .@"and" }, + Type.u32, + .{ .register = reg }, + .{ .immediate = mask }, + ); } else { const tmp_reg = try self.copyToTmpRegister(Type.usize, .{ .immediate = mask }); - try self.genBinOpMir(.@"and", Type.usize, .{ .register = reg }, .{ .register = tmp_reg }); + try self.genBinOpMir( + .{ ._, .@"and" }, + Type.usize, + .{ .register = reg }, + .{ .register = tmp_reg }, + ); } }, } } fn regBitSize(self: *Self, ty: Type) u64 { - return switch (ty.zigTypeTag()) { - else => switch (ty.abiSize(self.target.*)) { + const mod = self.bin_file.options.module.?; + const abi_size = ty.abiSize(mod); + return switch (ty.zigTypeTag(mod)) { + else => switch (abi_size) { 1 => 8, 2 => 16, 3...4 => 32, 5...8 => 64, else => unreachable, }, - .Float => switch (ty.abiSize(self.target.*)) { + .Float => switch (abi_size) { 1...16 => 128, 17...32 => 256, else => unreachable, @@ -8880,19 +11994,26 @@ fn regBitSize(self: *Self, ty: Type) u64 { } fn regExtraBits(self: *Self, ty: Type) u64 { - return self.regBitSize(ty) - ty.bitSize(self.target.*); + const mod = self.bin_file.options.module.?; + return self.regBitSize(ty) - ty.bitSize(mod); } -fn hasAvxSupport(target: Target) bool { - return Target.x86.featureSetHasAny(target.cpu.features, .{ .avx, .avx2 }); +fn hasFeature(self: *Self, feature: Target.x86.Feature) bool { + return Target.x86.featureSetHas(self.target.cpu.features, feature); +} +fn hasAnyFeatures(self: *Self, features: anytype) bool { + return Target.x86.featureSetHasAny(self.target.cpu.features, features); +} +fn hasAllFeatures(self: *Self, features: anytype) bool { + return Target.x86.featureSetHasAll(self.target.cpu.features, features); } -fn getSymbolIndexForDecl(self: *Self, decl_index: Module.Decl.Index) !u32 { - if (self.bin_file.cast(link.File.MachO)) |macho_file| { - const atom = try macho_file.getOrCreateAtomForDecl(decl_index); - return macho_file.getAtom(atom).getSymbolIndex().?; - } else if (self.bin_file.cast(link.File.Coff)) |coff_file| { - const atom = try coff_file.getOrCreateAtomForDecl(decl_index); - return coff_file.getAtom(atom).getSymbolIndex().?; - } else unreachable; +fn typeOf(self: *Self, inst: Air.Inst.Ref) Type { + const mod = self.bin_file.options.module.?; + return self.air.typeOf(inst, &mod.intern_pool); +} + +fn typeOfIndex(self: *Self, inst: Air.Inst.Index) Type { + const mod = self.bin_file.options.module.?; + return self.air.typeOfIndex(inst, &mod.intern_pool); } diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index c6c8f7995c..78ff918715 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -18,142 +18,160 @@ pub const Error = Lower.Error || error{ }; pub fn emitMir(emit: *Emit) Error!void { - for (0..emit.lower.mir.instructions.len) |i| { - const index = @intCast(Mir.Inst.Index, i); - const inst = emit.lower.mir.instructions.get(index); - - const start_offset = @intCast(u32, emit.code.items.len); - try emit.code_offset_mapping.putNoClobber(emit.lower.allocator, index, start_offset); - for (try emit.lower.lowerMir(inst)) |lower_inst| try lower_inst.encode(emit.code.writer(), .{}); - const end_offset = @intCast(u32, emit.code.items.len); - - switch (inst.tag) { - else => {}, - - .jmp_reloc => try emit.relocs.append(emit.lower.allocator, .{ - .source = start_offset, - .target = inst.data.inst, - .offset = end_offset - 4, - .length = 5, - }), - - .call_extern => if (emit.bin_file.cast(link.File.MachO)) |macho_file| { - // Add relocation to the decl. - const atom_index = macho_file.getAtomIndexForSymbol( - .{ .sym_index = inst.data.relocation.atom_index, .file = null }, - ).?; - const target = macho_file.getGlobalByIndex(inst.data.relocation.sym_index); - try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ - .type = .branch, + for (0..emit.lower.mir.instructions.len) |mir_i| { + const mir_index = @intCast(Mir.Inst.Index, mir_i); + try emit.code_offset_mapping.putNoClobber( + emit.lower.allocator, + mir_index, + @intCast(u32, emit.code.items.len), + ); + const lowered = try emit.lower.lowerMir(mir_index); + var lowered_relocs = lowered.relocs; + for (lowered.insts, 0..) |lowered_inst, lowered_index| { + const start_offset = @intCast(u32, emit.code.items.len); + try lowered_inst.encode(emit.code.writer(), .{}); + const end_offset = @intCast(u32, emit.code.items.len); + while (lowered_relocs.len > 0 and + lowered_relocs[0].lowered_inst_index == lowered_index) : ({ + lowered_relocs = lowered_relocs[1..]; + }) switch (lowered_relocs[0].target) { + .inst => |target| try emit.relocs.append(emit.lower.allocator, .{ + .source = start_offset, .target = target, .offset = end_offset - 4, - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { - // Add relocation to the decl. - const atom_index = coff_file.getAtomIndexForSymbol( - .{ .sym_index = inst.data.relocation.atom_index, .file = null }, - ).?; - const target = coff_file.getGlobalByIndex(inst.data.relocation.sym_index); - try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ - .type = .direct, - .target = target, - .offset = end_offset - 4, - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else return emit.fail("TODO implement {} for {}", .{ inst.tag, emit.bin_file.tag }), - - .mov_linker, .lea_linker => if (emit.bin_file.cast(link.File.MachO)) |macho_file| { - const metadata = - emit.lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const atom_index = macho_file.getAtomIndexForSymbol(.{ - .sym_index = metadata.atom_index, - .file = null, - }).?; - try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ - .type = switch (inst.ops) { - .got_reloc => .got, - .direct_reloc => .signed, - .tlv_reloc => .tlv, - else => unreachable, - }, - .target = .{ .sym_index = metadata.sym_index, .file = null }, - .offset = @intCast(u32, end_offset - 4), - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { - const metadata = - emit.lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const atom_index = coff_file.getAtomIndexForSymbol(.{ - .sym_index = metadata.atom_index, - .file = null, - }).?; - try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ - .type = switch (inst.ops) { - .got_reloc => .got, - .direct_reloc => .direct, - .import_reloc => .import, - else => unreachable, - }, - .target = switch (inst.ops) { - .got_reloc, - .direct_reloc, - => .{ .sym_index = metadata.sym_index, .file = null }, - .import_reloc => coff_file.getGlobalByIndex(metadata.sym_index), - else => unreachable, - }, - .offset = @intCast(u32, end_offset - 4), - .addend = 0, - .pcrel = true, - .length = 2, - }); - } else return emit.fail("TODO implement {} for {}", .{ inst.tag, emit.bin_file.tag }), - - .jcc => try emit.relocs.append(emit.lower.allocator, .{ - .source = start_offset, - .target = inst.data.inst_cc.inst, - .offset = end_offset - 4, - .length = 6, - }), - - .dbg_line => try emit.dbgAdvancePCAndLine( - inst.data.line_column.line, - inst.data.line_column.column, - ), + .length = @intCast(u5, end_offset - start_offset), + }), + .linker_extern_fn => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { + // Add relocation to the decl. + const atom_index = macho_file.getAtomIndexForSymbol( + .{ .sym_index = symbol.atom_index, .file = null }, + ).?; + const target = macho_file.getGlobalByIndex(symbol.sym_index); + try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ + .type = .branch, + .target = target, + .offset = end_offset - 4, + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { + // Add relocation to the decl. + const atom_index = coff_file.getAtomIndexForSymbol( + .{ .sym_index = symbol.atom_index, .file = null }, + ).?; + const target = coff_file.getGlobalByIndex(symbol.sym_index); + try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ + .type = .direct, + .target = target, + .offset = end_offset - 4, + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else return emit.fail("TODO implement extern reloc for {s}", .{ + @tagName(emit.bin_file.tag), + }), + .linker_got, + .linker_direct, + .linker_import, + .linker_tlv, + => |symbol| if (emit.bin_file.cast(link.File.MachO)) |macho_file| { + const atom_index = macho_file.getAtomIndexForSymbol(.{ + .sym_index = symbol.atom_index, + .file = null, + }).?; + try link.File.MachO.Atom.addRelocation(macho_file, atom_index, .{ + .type = switch (lowered_relocs[0].target) { + .linker_got => .got, + .linker_direct => .signed, + .linker_tlv => .tlv, + else => unreachable, + }, + .target = .{ .sym_index = symbol.sym_index, .file = null }, + .offset = @intCast(u32, end_offset - 4), + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else if (emit.bin_file.cast(link.File.Coff)) |coff_file| { + const atom_index = coff_file.getAtomIndexForSymbol(.{ + .sym_index = symbol.atom_index, + .file = null, + }).?; + try link.File.Coff.Atom.addRelocation(coff_file, atom_index, .{ + .type = switch (lowered_relocs[0].target) { + .linker_got => .got, + .linker_direct => .direct, + .linker_import => .import, + else => unreachable, + }, + .target = switch (lowered_relocs[0].target) { + .linker_got, + .linker_direct, + => .{ .sym_index = symbol.sym_index, .file = null }, + .linker_import => coff_file.getGlobalByIndex(symbol.sym_index), + else => unreachable, + }, + .offset = @intCast(u32, end_offset - 4), + .addend = 0, + .pcrel = true, + .length = 2, + }); + } else if (emit.bin_file.cast(link.File.Plan9)) |p9_file| { + const atom_index = symbol.atom_index; + try p9_file.addReloc(atom_index, .{ // TODO we may need to add a .type field to the relocs if they are .linker_got instead of just .linker_direct + .target = symbol.sym_index, // we set sym_index to just be the atom index + .offset = @intCast(u32, end_offset - 4), + .addend = 0, + .pcrel = true, + }); + } else return emit.fail("TODO implement linker reloc for {s}", .{ + @tagName(emit.bin_file.tag), + }), + }; + } + std.debug.assert(lowered_relocs.len == 0); - .dbg_prologue_end => { - switch (emit.debug_output) { - .dwarf => |dw| { - try dw.setPrologueEnd(); - log.debug("mirDbgPrologueEnd (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + if (lowered.insts.len == 0) { + const mir_inst = emit.lower.mir.instructions.get(mir_index); + switch (mir_inst.tag) { + else => unreachable, + .pseudo => switch (mir_inst.ops) { + else => unreachable, + .pseudo_dbg_prologue_end_none => { + switch (emit.debug_output) { + .dwarf => |dw| { + try dw.setPrologueEnd(); + log.debug("mirDbgPrologueEnd (line={d}, col={d})", .{ + emit.prev_di_line, emit.prev_di_column, + }); + try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } }, - .plan9 => {}, - .none => {}, - } - }, - - .dbg_epilogue_begin => { - switch (emit.debug_output) { - .dwarf => |dw| { - try dw.setEpilogueBegin(); - log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{ - emit.prev_di_line, emit.prev_di_column, - }); - try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + .pseudo_dbg_line_line_column => try emit.dbgAdvancePCAndLine( + mir_inst.data.line_column.line, + mir_inst.data.line_column.column, + ), + .pseudo_dbg_epilogue_begin_none => { + switch (emit.debug_output) { + .dwarf => |dw| { + try dw.setEpilogueBegin(); + log.debug("mirDbgEpilogueBegin (line={d}, col={d})", .{ + emit.prev_di_line, emit.prev_di_column, + }); + try emit.dbgAdvancePCAndLine(emit.prev_di_line, emit.prev_di_column); + }, + .plan9 => {}, + .none => {}, + } }, - .plan9 => {}, - .none => {}, - } - }, + .pseudo_dead_none => {}, + }, + } } } try emit.fixupRelocs(); diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index a977af7842..625a5283b9 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -23,6 +23,7 @@ const Data = struct { opc: [7]u8, modrm_ext: u3, mode: Mode, + feature: Feature, }; pub fn findByMnemonic( @@ -57,9 +58,9 @@ pub fn findByMnemonic( var shortest_len: ?usize = null; next: for (mnemonic_to_encodings_map[@enumToInt(mnemonic)]) |data| { switch (data.mode) { - .rex => if (!rex_required) continue, - .long, .sse2_long => {}, - else => if (rex_required) continue, + .none, .short => if (rex_required) continue, + .rex, .rex_short => if (!rex_required) continue, + else => {}, } for (input_ops, data.ops) |input_op, data_op| if (!input_op.isSubset(data_op)) continue :next; @@ -88,28 +89,13 @@ pub fn findByOpcode(opc: []const u8, prefixes: struct { if (modrm_ext) |ext| if (ext != data.modrm_ext) continue; if (!std.mem.eql(u8, opc, enc.opcode())) continue; if (prefixes.rex.w) { - switch (data.mode) { - .short, .fpu, .sse, .sse2, .sse4_1, .none => continue, - .long, .sse2_long, .rex => {}, - } + if (!data.mode.isLong()) continue; } else if (prefixes.rex.present and !prefixes.rex.isSet()) { - switch (data.mode) { - .rex => {}, - else => continue, - } + if (!data.mode.isRex()) continue; } else if (prefixes.legacy.prefix_66) { - switch (enc.operandBitSize()) { - 16 => {}, - else => continue, - } + if (!data.mode.isShort()) continue; } else { - switch (data.mode) { - .none => switch (enc.operandBitSize()) { - 16 => continue, - else => {}, - }, - else => continue, - } + if (data.mode.isShort()) continue; } return enc; }; @@ -130,30 +116,11 @@ pub fn mandatoryPrefix(encoding: *const Encoding) ?u8 { pub fn modRmExt(encoding: Encoding) u3 { return switch (encoding.data.op_en) { - .m, .mi, .m1, .mc => encoding.data.modrm_ext, + .m, .mi, .m1, .mc, .vmi => encoding.data.modrm_ext, else => unreachable, }; } -pub fn operandBitSize(encoding: Encoding) u64 { - switch (encoding.data.mode) { - .short => return 16, - .long, .sse2_long => return 64, - else => {}, - } - const bit_size: u64 = switch (encoding.data.op_en) { - .np => switch (encoding.data.ops[0]) { - .o16 => 16, - .o32 => 32, - .o64 => 64, - else => 32, - }, - .td => encoding.data.ops[1].bitSize(), - else => encoding.data.ops[0].bitSize(), - }; - return bit_size; -} - pub fn format( encoding: Encoding, comptime fmt: []const u8, @@ -162,14 +129,41 @@ pub fn format( ) !void { _ = options; _ = fmt; - switch (encoding.data.mode) { - .long, .sse2_long => try writer.writeAll("REX.W + "), - else => {}, - } - for (encoding.opcode()) |byte| { - try writer.print("{x:0>2} ", .{byte}); - } + var opc = encoding.opcode(); + if (encoding.data.mode.isVex()) { + try writer.writeAll("VEX."); + + try writer.writeAll(switch (encoding.data.mode) { + .vex_128_w0, .vex_128_w1, .vex_128_wig => "128", + .vex_256_w0, .vex_256_w1, .vex_256_wig => "256", + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig => "LIG", + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig => "LZ", + else => unreachable, + }); + + switch (opc[0]) { + else => {}, + 0x66, 0xf3, 0xf2 => { + try writer.print(".{X:0>2}", .{opc[0]}); + opc = opc[1..]; + }, + } + + try writer.print(".{}", .{std.fmt.fmtSliceHexUpper(opc[0 .. opc.len - 1])}); + opc = opc[opc.len - 1 ..]; + + try writer.writeAll(".W"); + try writer.writeAll(switch (encoding.data.mode) { + .vex_128_w0, .vex_256_w0, .vex_lig_w0, .vex_lz_w0 => "0", + .vex_128_w1, .vex_256_w1, .vex_lig_w1, .vex_lz_w1 => "1", + .vex_128_wig, .vex_256_wig, .vex_lig_wig, .vex_lz_wig => "IG", + else => unreachable, + }); + + try writer.writeByte(' '); + } else if (encoding.data.mode.isLong()) try writer.writeAll("REX.W + "); + for (opc) |byte| try writer.print("{x:0>2} ", .{byte}); switch (encoding.data.op_en) { .np, .fd, .td, .i, .zi, .d => {}, @@ -183,16 +177,17 @@ pub fn format( }; try writer.print("+{s} ", .{tag}); }, - .m, .mi, .m1, .mc => try writer.print("/{d} ", .{encoding.modRmExt()}), - .mr, .rm, .rmi, .mri, .mrc => try writer.writeAll("/r "), + .m, .mi, .m1, .mc, .vmi => try writer.print("/{d} ", .{encoding.modRmExt()}), + .mr, .rm, .rmi, .mri, .mrc, .rm0, .rvm, .rvmr, .rvmi, .mvr => try writer.writeAll("/r "), } switch (encoding.data.op_en) { - .i, .d, .zi, .oi, .mi, .rmi, .mri => { + .i, .d, .zi, .oi, .mi, .rmi, .mri, .vmi, .rvmi => { const op = switch (encoding.data.op_en) { .i, .d => encoding.data.ops[0], .zi, .oi, .mi => encoding.data.ops[1], - .rmi, .mri => encoding.data.ops[2], + .rmi, .mri, .vmi => encoding.data.ops[2], + .rvmi => encoding.data.ops[3], else => unreachable, }; const tag = switch (op) { @@ -207,7 +202,8 @@ pub fn format( }; try writer.print("{s} ", .{tag}); }, - .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc => {}, + .rvmr => try writer.writeAll("/is4 "), + .np, .fd, .td, .o, .m, .m1, .mc, .mr, .rm, .mrc, .rm0, .rvm, .mvr => {}, } try writer.print("{s} ", .{@tagName(encoding.mnemonic)}); @@ -238,7 +234,6 @@ pub const Mnemonic = enum { cmpxchg, cmpxchg8b, cmpxchg16b, cqo, cwd, cwde, div, - fisttp, fld, idiv, imul, int3, ja, jae, jb, jbe, jc, jrcxz, je, jg, jge, jl, jle, jna, jnae, jnb, jnbe, jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, js, jz, @@ -264,29 +259,127 @@ pub const Mnemonic = enum { @"test", tzcnt, ud2, xadd, xchg, xor, + // X87 + fisttp, fld, // MMX - movd, + movd, movq, + packssdw, packsswb, packuswb, + paddb, paddd, paddq, paddsb, paddsw, paddusb, paddusw, paddw, + pand, pandn, por, pxor, + pmulhw, pmullw, + psubb, psubd, psubq, psubsb, psubsw, psubusb, psubusw, psubw, // SSE - addss, - cmpss, - divss, - maxss, minss, - movss, - mulss, - subss, + addps, addss, + andps, + andnps, + cmpps, cmpss, + cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si, + divps, divss, + maxps, maxss, + minps, minss, + movaps, movhlps, movlhps, + movss, movups, + mulps, mulss, + orps, + pextrw, pinsrw, + pmaxsw, pmaxub, pminsw, pminub, + shufps, + sqrtps, sqrtss, + subps, subss, ucomiss, + xorps, // SSE2 - addsd, - //cmpsd, - divsd, - maxsd, minsd, - movq, //movd, movsd, - mulsd, - subsd, + addpd, addsd, + andpd, + andnpd, + cmppd, //cmpsd, + cvtdq2pd, cvtdq2ps, cvtpd2dq, cvtpd2pi, cvtpd2ps, cvtpi2pd, + cvtps2dq, cvtps2pd, cvtsd2si, cvtsd2ss, cvtsi2sd, cvtss2sd, + cvttpd2dq, cvttpd2pi, cvttps2dq, cvttsd2si, + divpd, divsd, + maxpd, maxsd, + minpd, minsd, + movapd, + movdqa, movdqu, + //movsd, + movupd, + mulpd, mulsd, + orpd, + pshufhw, pshuflw, + psrld, psrlq, psrlw, + punpckhbw, punpckhdq, punpckhqdq, punpckhwd, + punpcklbw, punpckldq, punpcklqdq, punpcklwd, + shufpd, + sqrtpd, sqrtsd, + subpd, subsd, ucomisd, + xorpd, + // SSE3 + movddup, movshdup, movsldup, // SSE4.1 - roundss, - roundsd, + blendpd, blendps, blendvpd, blendvps, + extractps, + insertps, + packusdw, + pextrb, pextrd, pextrq, + pinsrb, pinsrd, pinsrq, + pmaxsb, pmaxsd, pmaxud, pmaxuw, pminsb, pminsd, pminud, pminuw, + pmulld, + roundpd, roundps, roundsd, roundss, + // AVX + vaddpd, vaddps, vaddsd, vaddss, + vandnpd, vandnps, vandpd, vandps, + vblendpd, vblendps, vblendvpd, vblendvps, + vbroadcastf128, vbroadcastsd, vbroadcastss, + vcmppd, vcmpps, vcmpsd, vcmpss, + vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps, + vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss, + vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vcvtss2si, + vcvttpd2dq, vcvttps2dq, vcvttsd2si, vcvttss2si, + vdivpd, vdivps, vdivsd, vdivss, + vextractf128, vextractps, + vinsertf128, vinsertps, + vmaxpd, vmaxps, vmaxsd, vmaxss, + vminpd, vminps, vminsd, vminss, + vmovapd, vmovaps, + vmovd, + vmovddup, + vmovdqa, vmovdqu, + vmovhlps, vmovlhps, + vmovq, + vmovsd, + vmovshdup, vmovsldup, + vmovss, + vmovupd, vmovups, + vmulpd, vmulps, vmulsd, vmulss, + vorpd, vorps, + vpackssdw, vpacksswb, vpackusdw, vpackuswb, + vpaddb, vpaddd, vpaddq, vpaddsb, vpaddsw, vpaddusb, vpaddusw, vpaddw, + vpand, vpandn, + vpextrb, vpextrd, vpextrq, vpextrw, + vpinsrb, vpinsrd, vpinsrq, vpinsrw, + vpmaxsb, vpmaxsd, vpmaxsw, vpmaxub, vpmaxud, vpmaxuw, + vpminsb, vpminsd, vpminsw, vpminub, vpminud, vpminuw, + vpmulhw, vpmulld, vpmullw, + vpor, + vpshufhw, vpshuflw, + vpsrld, vpsrlq, vpsrlw, + vpsubb, vpsubd, vpsubq, vpsubsb, vpsubsw, vpsubusb, vpsubusw, vpsubw, + vpunpckhbw, vpunpckhdq, vpunpckhqdq, vpunpckhwd, + vpunpcklbw, vpunpckldq, vpunpcklqdq, vpunpcklwd, + vpxor, + vroundpd, vroundps, vroundsd, vroundss, + vshufpd, vshufps, + vsqrtpd, vsqrtps, vsqrtsd, vsqrtss, + vsubpd, vsubps, vsubsd, vsubss, + vxorpd, vxorps, + // F16C + vcvtph2ps, vcvtps2ph, + // FMA + vfmadd132pd, vfmadd213pd, vfmadd231pd, + vfmadd132ps, vfmadd213ps, vfmadd231ps, + vfmadd132sd, vfmadd213sd, vfmadd231sd, + vfmadd132ss, vfmadd213ss, vfmadd231ss, // zig fmt: on }; @@ -299,6 +392,7 @@ pub const OpEn = enum { fd, td, m1, mc, mi, mr, rm, rmi, mri, mrc, + rm0, vmi, rvm, rvmr, rvmi, mvr, // zig fmt: on }; @@ -313,94 +407,143 @@ pub const Op = enum { cl, r8, r16, r32, r64, rm8, rm16, rm32, rm64, - m8, m16, m32, m64, m80, m128, + r32_m8, r32_m16, r64_m16, + m8, m16, m32, m64, m80, m128, m256, rel8, rel16, rel32, m, moffs, sreg, - xmm, xmm_m32, xmm_m64, + st, mm, mm_m64, + xmm0, xmm, xmm_m32, xmm_m64, xmm_m128, + ymm, ymm_m256, // zig fmt: on pub fn fromOperand(operand: Instruction.Operand) Op { - switch (operand) { - .none => return .none, - - .reg => |reg| { - switch (reg.class()) { - .segment => return .sreg, - .floating_point => return switch (reg.bitSize()) { - 128 => .xmm, + return switch (operand) { + .none => .none, + + .reg => |reg| switch (reg.class()) { + .general_purpose => if (reg.to64() == .rax) + switch (reg) { + .al => .al, + .ax => .ax, + .eax => .eax, + .rax => .rax, else => unreachable, - }, - .general_purpose => { - if (reg.to64() == .rax) return switch (reg) { - .al => .al, - .ax => .ax, - .eax => .eax, - .rax => .rax, - else => unreachable, - }; - if (reg == .cl) return .cl; - return switch (reg.bitSize()) { - 8 => .r8, - 16 => .r16, - 32 => .r32, - 64 => .r64, - else => unreachable, - }; - }, - } + } + else if (reg == .cl) + .cl + else switch (reg.bitSize()) { + 8 => .r8, + 16 => .r16, + 32 => .r32, + 64 => .r64, + else => unreachable, + }, + .segment => .sreg, + .x87 => .st, + .mmx => .mm, + .sse => if (reg == .xmm0) + .xmm0 + else switch (reg.bitSize()) { + 128 => .xmm, + 256 => .ymm, + else => unreachable, + }, }, .mem => |mem| switch (mem) { - .moffs => return .moffs, - .sib, .rip => { - const bit_size = mem.bitSize(); - return switch (bit_size) { - 8 => .m8, - 16 => .m16, - 32 => .m32, - 64 => .m64, - 80 => .m80, - 128 => .m128, - else => unreachable, - }; + .moffs => .moffs, + .sib, .rip => switch (mem.bitSize()) { + 8 => .m8, + 16 => .m16, + 32 => .m32, + 64 => .m64, + 80 => .m80, + 128 => .m128, + 256 => .m256, + else => unreachable, }, }, - .imm => |imm| { - switch (imm) { - .signed => |x| { - if (x == 1) return .unity; - if (math.cast(i8, x)) |_| return .imm8s; - if (math.cast(i16, x)) |_| return .imm16s; - return .imm32s; - }, - .unsigned => |x| { - if (x == 1) return .unity; - if (math.cast(i8, x)) |_| return .imm8s; - if (math.cast(u8, x)) |_| return .imm8; - if (math.cast(i16, x)) |_| return .imm16s; - if (math.cast(u16, x)) |_| return .imm16; - if (math.cast(i32, x)) |_| return .imm32s; - if (math.cast(u32, x)) |_| return .imm32; - return .imm64; - }, - } + .imm => |imm| switch (imm) { + .signed => |x| if (x == 1) + .unity + else if (math.cast(i8, x)) |_| + .imm8s + else if (math.cast(i16, x)) |_| + .imm16s + else + .imm32s, + .unsigned => |x| if (x == 1) + .unity + else if (math.cast(i8, x)) |_| + .imm8s + else if (math.cast(u8, x)) |_| + .imm8 + else if (math.cast(i16, x)) |_| + .imm16s + else if (math.cast(u16, x)) |_| + .imm16 + else if (math.cast(i32, x)) |_| + .imm32s + else if (math.cast(u32, x)) |_| + .imm32 + else + .imm64, }, - } + }; } - pub fn bitSize(op: Op) u64 { + pub fn immBitSize(op: Op) u64 { return switch (op) { .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, + .al, .cl, .r8, .rm8, .r32_m8 => unreachable, + .ax, .r16, .rm16 => unreachable, + .eax, .r32, .rm32, .r32_m16 => unreachable, + .rax, .r64, .rm64, .r64_m16 => unreachable, + .st, .mm, .mm_m64 => unreachable, + .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable, + .ymm, .ymm_m256 => unreachable, + .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, .unity => 1, - .imm8, .imm8s, .al, .cl, .r8, .m8, .rm8, .rel8 => 8, - .imm16, .imm16s, .ax, .r16, .m16, .rm16, .rel16 => 16, - .imm32, .imm32s, .eax, .r32, .m32, .rm32, .rel32, .xmm_m32 => 32, - .imm64, .rax, .r64, .m64, .rm64, .xmm_m64 => 64, + .imm8, .imm8s, .rel8 => 8, + .imm16, .imm16s, .rel16 => 16, + .imm32, .imm32s, .rel32 => 32, + .imm64 => 64, + }; + } + + pub fn regBitSize(op: Op) u64 { + return switch (op) { + .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, + .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, + .rel8, .rel16, .rel32 => unreachable, + .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, + .al, .cl, .r8, .rm8 => 8, + .ax, .r16, .rm16 => 16, + .eax, .r32, .rm32, .r32_m8, .r32_m16 => 32, + .rax, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64, + .st => 80, + .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128, + .ymm, .ymm_m256 => 256, + }; + } + + pub fn memBitSize(op: Op) u64 { + return switch (op) { + .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, + .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, + .rel8, .rel16, .rel32 => unreachable, + .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64 => unreachable, + .st, .mm, .xmm0, .xmm, .ymm => unreachable, + .m8, .rm8, .r32_m8 => 8, + .m16, .rm16, .r32_m16, .r64_m16 => 16, + .m32, .rm32, .xmm_m32 => 32, + .m64, .rm64, .mm_m64, .xmm_m64 => 64, .m80 => 80, - .m128, .xmm => 128, + .m128, .xmm_m128 => 128, + .m256, .ymm_m256 => 256, }; } @@ -423,8 +566,11 @@ pub const Op = enum { .al, .ax, .eax, .rax, .r8, .r16, .r32, .r64, .rm8, .rm16, .rm32, .rm64, - .xmm, .xmm_m32, .xmm_m64, - => true, + .r32_m8, .r32_m16, .r64_m16, + .st, .mm, .mm_m64, + .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128, + .ymm, .ymm_m256, + => true, else => false, }; // zig fmt: on @@ -447,9 +593,12 @@ pub const Op = enum { // zig fmt: off return switch (op) { .rm8, .rm16, .rm32, .rm64, - .m8, .m16, .m32, .m64, .m80, .m128, + .r32_m8, .r32_m16, .r64_m16, + .m8, .m16, .m32, .m64, .m80, .m128, .m256, .m, - .xmm_m32, .xmm_m64, + .mm_m64, + .xmm_m32, .xmm_m64, .xmm_m128, + .ymm_m256, => true, else => false, }; @@ -469,15 +618,12 @@ pub const Op = enum { .al, .ax, .eax, .rax, .cl => .general_purpose, .r8, .r16, .r32, .r64 => .general_purpose, .rm8, .rm16, .rm32, .rm64 => .general_purpose, + .r32_m8, .r32_m16, .r64_m16 => .general_purpose, .sreg => .segment, - .xmm, .xmm_m32, .xmm_m64 => .floating_point, - }; - } - - pub fn isFloatingPointRegister(op: Op) bool { - return switch (op) { - .xmm, .xmm_m32, .xmm_m64 => true, - else => false, + .st => .x87, + .mm, .mm_m64 => .mmx, + .xmm0, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse, + .ymm, .ymm_m256 => .sse, }; } @@ -493,31 +639,28 @@ pub const Op = enum { else => { if (op.isRegister() and target.isRegister()) { return switch (target) { - .cl, .al, .ax, .eax, .rax => op == target, - else => op.class() == target.class() and switch (target.class()) { - .floating_point => true, - else => op.bitSize() == target.bitSize(), - }, + .cl, .al, .ax, .eax, .rax, .xmm0 => op == target, + else => op.class() == target.class() and op.regBitSize() == target.regBitSize(), }; } if (op.isMemory() and target.isMemory()) { switch (target) { .m => return true, - else => return op.bitSize() == target.bitSize(), + else => return op.memBitSize() == target.memBitSize(), } } if (op.isImmediate() and target.isImmediate()) { switch (target) { - .imm64 => if (op.bitSize() <= 64) return true, - .imm32s, .rel32 => if (op.bitSize() < 32 or (op.bitSize() == 32 and op.isSigned())) + .imm64 => if (op.immBitSize() <= 64) return true, + .imm32s, .rel32 => if (op.immBitSize() < 32 or (op.immBitSize() == 32 and op.isSigned())) return true, - .imm32 => if (op.bitSize() <= 32) return true, - .imm16s, .rel16 => if (op.bitSize() < 16 or (op.bitSize() == 16 and op.isSigned())) + .imm32 => if (op.immBitSize() <= 32) return true, + .imm16s, .rel16 => if (op.immBitSize() < 16 or (op.immBitSize() == 16 and op.isSigned())) return true, - .imm16 => if (op.bitSize() <= 16) return true, - .imm8s, .rel8 => if (op.bitSize() < 8 or (op.bitSize() == 8 and op.isSigned())) + .imm16 => if (op.immBitSize() <= 16) return true, + .imm8s, .rel8 => if (op.immBitSize() < 8 or (op.immBitSize() == 8 and op.isSigned())) return true, - .imm8 => if (op.bitSize() <= 8) return true, + .imm8 => if (op.immBitSize() <= 8) return true, else => {}, } return op == target; @@ -529,15 +672,85 @@ pub const Op = enum { }; pub const Mode = enum { + // zig fmt: off none, - short, - fpu, - rex, - long, + short, long, + rex, rex_short, + vex_128_w0, vex_128_w1, vex_128_wig, + vex_256_w0, vex_256_w1, vex_256_wig, + vex_lig_w0, vex_lig_w1, vex_lig_wig, + vex_lz_w0, vex_lz_w1, vex_lz_wig, + // zig fmt: on + + pub fn isShort(mode: Mode) bool { + return switch (mode) { + .short, .rex_short => true, + else => false, + }; + } + + pub fn isLong(mode: Mode) bool { + return switch (mode) { + .long, + .vex_128_w1, + .vex_256_w1, + .vex_lig_w1, + .vex_lz_w1, + => true, + else => false, + }; + } + + pub fn isRex(mode: Mode) bool { + return switch (mode) { + else => false, + .rex, .rex_short => true, + }; + } + + pub fn isVex(mode: Mode) bool { + return switch (mode) { + // zig fmt: off + else => false, + .vex_128_w0, .vex_128_w1, .vex_128_wig, + .vex_256_w0, .vex_256_w1, .vex_256_wig, + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig, + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig, + => true, + // zig fmt: on + }; + } + + pub fn isVecLong(mode: Mode) bool { + return switch (mode) { + // zig fmt: off + else => unreachable, + .vex_128_w0, .vex_128_w1, .vex_128_wig, + .vex_lig_w0, .vex_lig_w1, .vex_lig_wig, + .vex_lz_w0, .vex_lz_w1, .vex_lz_wig, + => false, + .vex_256_w0, .vex_256_w1, .vex_256_wig, + => true, + // zig fmt: on + }; + } +}; + +pub const Feature = enum { + none, + avx, + avx2, + bmi, + f16c, + fma, + lzcnt, + movbe, + popcnt, sse, sse2, - sse2_long, + sse3, sse4_1, + x87, }; fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Operand) usize { @@ -554,10 +767,10 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op } const mnemonic_to_encodings_map = init: { - @setEvalBranchQuota(100_000); + @setEvalBranchQuota(30_000); const encodings = @import("encodings.zig"); var entries = encodings.table; - std.sort.sort(encodings.Entry, &entries, {}, struct { + std.mem.sort(encodings.Entry, &entries, {}, struct { fn lessThan(_: void, lhs: encodings.Entry, rhs: encodings.Entry) bool { return @enumToInt(lhs[0]) < @enumToInt(rhs[0]); } @@ -574,6 +787,7 @@ const mnemonic_to_encodings_map = init: { .opc = undefined, .modrm_ext = entry[4], .mode = entry[5], + .feature = entry[6], }; // TODO: use `@memcpy` for these. When I did that, I got a false positive // compile error for this copy happening at compile time. diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index af0146c6e1..d77ddf3050 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -5,13 +5,36 @@ mir: Mir, target: *const std.Target, err_msg: ?*ErrorMsg = null, src_loc: Module.SrcLoc, -result: [ +result_insts_len: u8 = undefined, +result_relocs_len: u8 = undefined, +result_insts: [ std.mem.max(usize, &.{ - abi.Win64.callee_preserved_regs.len, - abi.SysV.callee_preserved_regs.len, + 1, // non-pseudo instructions + 2, // cmovcc: cmovcc \ cmovcc + 3, // setcc: setcc \ setcc \ logicop + 2, // jcc: jcc \ jcc + pseudo_probe_align_insts, + pseudo_probe_adjust_unrolled_max_insts, + pseudo_probe_adjust_setup_insts, + pseudo_probe_adjust_loop_insts, + abi.Win64.callee_preserved_regs.len, // push_regs/pop_regs + abi.SysV.callee_preserved_regs.len, // push_regs/pop_regs }) ]Instruction = undefined, -result_len: usize = undefined, +result_relocs: [ + std.mem.max(usize, &.{ + 1, // jmp/jcc/call/mov/lea: jmp/jcc/call/mov/lea + 2, // jcc: jcc \ jcc + 2, // test \ jcc \ probe \ sub \ jmp + 1, // probe \ sub \ jcc + }) +]Reloc = undefined, + +pub const pseudo_probe_align_insts = 5; // test \ jcc \ probe \ sub \ jmp +pub const pseudo_probe_adjust_unrolled_max_insts = + pseudo_probe_adjust_setup_insts + pseudo_probe_adjust_loop_insts; +pub const pseudo_probe_adjust_setup_insts = 2; // mov \ sub +pub const pseudo_probe_adjust_loop_insts = 3; // probe \ sub \ jcc pub const Error = error{ OutOfMemory, @@ -20,135 +43,236 @@ pub const Error = error{ CannotEncode, }; -/// The returned slice is overwritten by the next call to lowerMir. -pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { - lower.result = undefined; - errdefer lower.result = undefined; - lower.result_len = 0; - defer lower.result_len = undefined; +pub const Reloc = struct { + lowered_inst_index: u8, + target: Target, + + const Target = union(enum) { + inst: Mir.Inst.Index, + linker_extern_fn: Mir.Reloc, + linker_got: Mir.Reloc, + linker_direct: Mir.Reloc, + linker_import: Mir.Reloc, + linker_tlv: Mir.Reloc, + }; +}; +/// The returned slice is overwritten by the next call to lowerMir. +pub fn lowerMir(lower: *Lower, index: Mir.Inst.Index) Error!struct { + insts: []const Instruction, + relocs: []const Reloc, +} { + lower.result_insts = undefined; + lower.result_relocs = undefined; + errdefer lower.result_insts = undefined; + errdefer lower.result_relocs = undefined; + lower.result_insts_len = 0; + lower.result_relocs_len = 0; + defer lower.result_insts_len = undefined; + defer lower.result_relocs_len = undefined; + + const inst = lower.mir.instructions.get(index); switch (inst.tag) { - .adc, - .add, - .@"and", - .bsf, - .bsr, - .bswap, - .bt, - .btc, - .btr, - .bts, - .call, - .cbw, - .cwde, - .cdqe, - .cwd, - .cdq, - .cqo, - .cmp, - .cmpxchg, - .div, - .fisttp, - .fld, - .idiv, - .imul, - .int3, - .jmp, - .lea, - .lfence, - .lzcnt, - .mfence, - .mov, - .movbe, - .movd, - .movq, - .movzx, - .mul, - .neg, - .nop, - .not, - .@"or", - .pop, - .popcnt, - .push, - .rcl, - .rcr, - .ret, - .rol, - .ror, - .sal, - .sar, - .sbb, - .sfence, - .shl, - .shld, - .shr, - .shrd, - .sub, - .syscall, - .@"test", - .tzcnt, - .ud2, - .xadd, - .xchg, - .xor, - - .addss, - .cmpss, - .divss, - .maxss, - .minss, - .movss, - .mulss, - .roundss, - .subss, - .ucomiss, - .addsd, - .cmpsd, - .divsd, - .maxsd, - .minsd, - .movsd, - .mulsd, - .roundsd, - .subsd, - .ucomisd, - => try lower.mirGeneric(inst), - - .cmps, - .lods, - .movs, - .scas, - .stos, - => try lower.mirString(inst), - - .cmpxchgb => try lower.mirCmpxchgBytes(inst), - - .jmp_reloc => try lower.emit(.none, .jmp, &.{.{ .imm = Immediate.s(0) }}), - - .call_extern => try lower.emit(.none, .call, &.{.{ .imm = Immediate.s(0) }}), - - .lea_linker => try lower.mirLeaLinker(inst), - .mov_linker => try lower.mirMovLinker(inst), - - .mov_moffs => try lower.mirMovMoffs(inst), - - .movsx => try lower.mirMovsx(inst), - .cmovcc => try lower.mirCmovcc(inst), - .setcc => try lower.mirSetcc(inst), - .jcc => try lower.emit(.none, mnem_cc(.j, inst.data.inst_cc.cc), &.{.{ .imm = Immediate.s(0) }}), - - .push_regs => try lower.mirPushPopRegisterList(inst, .push), - .pop_regs => try lower.mirPushPopRegisterList(inst, .pop), + else => try lower.generic(inst), + .pseudo => switch (inst.ops) { + .pseudo_cmov_z_and_np_rr => { + assert(inst.data.rr.fixes == ._); + try lower.emit(.none, .cmovnz, &.{ + .{ .reg = inst.data.rr.r2 }, + .{ .reg = inst.data.rr.r1 }, + }); + try lower.emit(.none, .cmovnp, &.{ + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, + }); + }, + .pseudo_cmov_nz_or_p_rr => { + assert(inst.data.rr.fixes == ._); + try lower.emit(.none, .cmovnz, &.{ + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, + }); + try lower.emit(.none, .cmovp, &.{ + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, + }); + }, + .pseudo_cmov_nz_or_p_rm_sib, + .pseudo_cmov_nz_or_p_rm_rip, + => { + assert(inst.data.rx.fixes == ._); + try lower.emit(.none, .cmovnz, &.{ + .{ .reg = inst.data.rx.r1 }, + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + }); + try lower.emit(.none, .cmovp, &.{ + .{ .reg = inst.data.rx.r1 }, + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + }); + }, + .pseudo_set_z_and_np_r => { + assert(inst.data.rr.fixes == ._); + try lower.emit(.none, .setz, &.{ + .{ .reg = inst.data.rr.r1 }, + }); + try lower.emit(.none, .setnp, &.{ + .{ .reg = inst.data.rr.r2 }, + }); + try lower.emit(.none, .@"and", &.{ + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, + }); + }, + .pseudo_set_z_and_np_m_sib, + .pseudo_set_z_and_np_m_rip, + => { + assert(inst.data.rx.fixes == ._); + try lower.emit(.none, .setz, &.{ + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + }); + try lower.emit(.none, .setnp, &.{ + .{ .reg = inst.data.rx.r1 }, + }); + try lower.emit(.none, .@"and", &.{ + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + .{ .reg = inst.data.rx.r1 }, + }); + }, + .pseudo_set_nz_or_p_r => { + assert(inst.data.rr.fixes == ._); + try lower.emit(.none, .setnz, &.{ + .{ .reg = inst.data.rr.r1 }, + }); + try lower.emit(.none, .setp, &.{ + .{ .reg = inst.data.rr.r2 }, + }); + try lower.emit(.none, .@"or", &.{ + .{ .reg = inst.data.rr.r1 }, + .{ .reg = inst.data.rr.r2 }, + }); + }, + .pseudo_set_nz_or_p_m_sib, + .pseudo_set_nz_or_p_m_rip, + => { + assert(inst.data.rx.fixes == ._); + try lower.emit(.none, .setnz, &.{ + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + }); + try lower.emit(.none, .setp, &.{ + .{ .reg = inst.data.rx.r1 }, + }); + try lower.emit(.none, .@"or", &.{ + .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, + .{ .reg = inst.data.rx.r1 }, + }); + }, + .pseudo_j_z_and_np_inst => { + assert(inst.data.inst.fixes == ._); + try lower.emit(.none, .jnz, &.{ + .{ .imm = lower.reloc(.{ .inst = index + 1 }) }, + }); + try lower.emit(.none, .jnp, &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }); + }, + .pseudo_j_nz_or_p_inst => { + assert(inst.data.inst.fixes == ._); + try lower.emit(.none, .jnz, &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }); + try lower.emit(.none, .jp, &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }); + }, - .dbg_line, - .dbg_prologue_end, - .dbg_epilogue_begin, - .dead, - => {}, + .pseudo_probe_align_ri_s => { + try lower.emit(.none, .@"test", &.{ + .{ .reg = inst.data.ri.r1 }, + .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) }, + }); + try lower.emit(.none, .jz, &.{ + .{ .imm = lower.reloc(.{ .inst = index + 1 }) }, + }); + try lower.emit(.none, .lea, &.{ + .{ .reg = inst.data.ri.r1 }, + .{ .mem = Memory.sib(.qword, .{ + .base = .{ .reg = inst.data.ri.r1 }, + .disp = -page_size, + }) }, + }); + try lower.emit(.none, .@"test", &.{ + .{ .mem = Memory.sib(.dword, .{ + .base = .{ .reg = inst.data.ri.r1 }, + }) }, + .{ .reg = inst.data.ri.r1.to32() }, + }); + try lower.emit(.none, .jmp, &.{ + .{ .imm = lower.reloc(.{ .inst = index }) }, + }); + assert(lower.result_insts_len == pseudo_probe_align_insts); + }, + .pseudo_probe_adjust_unrolled_ri_s => { + var offset = page_size; + while (offset < @bitCast(i32, inst.data.ri.i)) : (offset += page_size) { + try lower.emit(.none, .@"test", &.{ + .{ .mem = Memory.sib(.dword, .{ + .base = .{ .reg = inst.data.ri.r1 }, + .disp = -offset, + }) }, + .{ .reg = inst.data.ri.r1.to32() }, + }); + } + try lower.emit(.none, .sub, &.{ + .{ .reg = inst.data.ri.r1 }, + .{ .imm = Immediate.s(@bitCast(i32, inst.data.ri.i)) }, + }); + assert(lower.result_insts_len <= pseudo_probe_adjust_unrolled_max_insts); + }, + .pseudo_probe_adjust_setup_rri_s => { + try lower.emit(.none, .mov, &.{ + .{ .reg = inst.data.rri.r2.to32() }, + .{ .imm = Immediate.s(@bitCast(i32, inst.data.rri.i)) }, + }); + try lower.emit(.none, .sub, &.{ + .{ .reg = inst.data.rri.r1 }, + .{ .reg = inst.data.rri.r2 }, + }); + assert(lower.result_insts_len == pseudo_probe_adjust_setup_insts); + }, + .pseudo_probe_adjust_loop_rr => { + try lower.emit(.none, .@"test", &.{ + .{ .mem = Memory.sib(.dword, .{ + .base = .{ .reg = inst.data.rr.r1 }, + .scale_index = .{ .scale = 1, .index = inst.data.rr.r2 }, + .disp = -page_size, + }) }, + .{ .reg = inst.data.rr.r1.to32() }, + }); + try lower.emit(.none, .sub, &.{ + .{ .reg = inst.data.rr.r2 }, + .{ .imm = Immediate.s(page_size) }, + }); + try lower.emit(.none, .jae, &.{ + .{ .imm = lower.reloc(.{ .inst = index }) }, + }); + assert(lower.result_insts_len == pseudo_probe_adjust_loop_insts); + }, + .pseudo_push_reg_list => try lower.pushPopRegList(.push, inst), + .pseudo_pop_reg_list => try lower.pushPopRegList(.pop, inst), + + .pseudo_dbg_prologue_end_none, + .pseudo_dbg_line_line_column, + .pseudo_dbg_epilogue_begin_none, + .pseudo_dead_none, + => {}, + else => unreachable, + }, } - return lower.result[0..lower.result_len]; + return .{ + .insts = lower.result_insts[0..lower.result_insts_len], + .relocs = lower.result_relocs[0..lower.result_relocs_len], + }; } pub fn fail(lower: *Lower, comptime format: []const u8, args: anytype) Error { @@ -158,12 +282,6 @@ pub fn fail(lower: *Lower, comptime format: []const u8, args: anytype) Error { return error.LowerFail; } -fn mnem_cc(comptime base: @Type(.EnumLiteral), cc: bits.Condition) Mnemonic { - return switch (cc) { - inline else => |c| @field(Mnemonic, @tagName(base) ++ @tagName(c)), - }; -} - fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { return switch (ops) { .rri_s, @@ -171,19 +289,22 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { .i_s, .mi_sib_s, .mi_rip_s, - .lock_mi_sib_s, - .lock_mi_rip_s, => Immediate.s(@bitCast(i32, i)), + .rrri, .rri_u, .ri_u, .i_u, .mi_sib_u, .mi_rip_u, - .lock_mi_sib_u, - .lock_mi_rip_u, + .rmi_sib, + .rmi_rip, .mri_sib, .mri_rip, + .rrm_sib, + .rrm_rip, + .rrmi_sib, + .rrmi_rip, => Immediate.u(i), .ri64 => Immediate.u(lower.mir.extraData(Mir.Imm64, i).data.decode()), @@ -195,74 +316,111 @@ fn imm(lower: Lower, ops: Mir.Inst.Ops, i: u32) Immediate { fn mem(lower: Lower, ops: Mir.Inst.Ops, payload: u32) Memory { return lower.mir.resolveFrameLoc(switch (ops) { .rm_sib, - .rm_sib_cc, + .rmi_sib, .m_sib, - .m_sib_cc, .mi_sib_u, .mi_sib_s, .mr_sib, .mrr_sib, .mri_sib, - .lock_m_sib, - .lock_mi_sib_u, - .lock_mi_sib_s, - .lock_mr_sib, + .rrm_sib, + .rrmi_sib, + + .pseudo_cmov_nz_or_p_rm_sib, + .pseudo_set_z_and_np_m_sib, + .pseudo_set_nz_or_p_m_sib, => lower.mir.extraData(Mir.MemorySib, payload).data.decode(), .rm_rip, - .rm_rip_cc, + .rmi_rip, .m_rip, - .m_rip_cc, .mi_rip_u, .mi_rip_s, .mr_rip, .mrr_rip, .mri_rip, - .lock_m_rip, - .lock_mi_rip_u, - .lock_mi_rip_s, - .lock_mr_rip, + .rrm_rip, + .rrmi_rip, + + .pseudo_cmov_nz_or_p_rm_rip, + .pseudo_set_z_and_np_m_rip, + .pseudo_set_nz_or_p_m_rip, => lower.mir.extraData(Mir.MemoryRip, payload).data.decode(), .rax_moffs, .moffs_rax, - .lock_moffs_rax, => lower.mir.extraData(Mir.MemoryMoffs, payload).data.decode(), else => unreachable, }); } +fn reloc(lower: *Lower, target: Reloc.Target) Immediate { + lower.result_relocs[lower.result_relocs_len] = .{ + .lowered_inst_index = lower.result_insts_len, + .target = target, + }; + lower.result_relocs_len += 1; + return Immediate.s(0); +} + fn emit(lower: *Lower, prefix: Prefix, mnemonic: Mnemonic, ops: []const Operand) Error!void { - lower.result[lower.result_len] = try Instruction.new(prefix, mnemonic, ops); - lower.result_len += 1; + lower.result_insts[lower.result_insts_len] = try Instruction.new(prefix, mnemonic, ops); + lower.result_insts_len += 1; } -fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { - try lower.emit(switch (inst.ops) { - else => .none, - .lock_m_sib, - .lock_m_rip, - .lock_mi_sib_u, - .lock_mi_rip_u, - .lock_mi_sib_s, - .lock_mi_rip_s, - .lock_mr_sib, - .lock_mr_rip, - .lock_moffs_rax, - => .lock, - }, switch (inst.tag) { - inline else => |tag| if (@hasField(Mnemonic, @tagName(tag))) - @field(Mnemonic, @tagName(tag)) +fn generic(lower: *Lower, inst: Mir.Inst) Error!void { + const fixes = switch (inst.ops) { + .none => inst.data.none.fixes, + .inst => inst.data.inst.fixes, + .i_s, .i_u => inst.data.i.fixes, + .r => inst.data.r.fixes, + .rr => inst.data.rr.fixes, + .rrr => inst.data.rrr.fixes, + .rrrr => inst.data.rrrr.fixes, + .rrri => inst.data.rrri.fixes, + .rri_s, .rri_u => inst.data.rri.fixes, + .ri_s, .ri_u => inst.data.ri.fixes, + .ri64, .rm_sib, .rm_rip, .mr_sib, .mr_rip => inst.data.rx.fixes, + .mrr_sib, .mrr_rip, .rrm_sib, .rrm_rip => inst.data.rrx.fixes, + .rmi_sib, .rmi_rip, .mri_sib, .mri_rip => inst.data.rix.fixes, + .rrmi_sib, .rrmi_rip => inst.data.rrix.fixes, + .mi_sib_u, .mi_rip_u, .mi_sib_s, .mi_rip_s => inst.data.x.fixes, + .m_sib, .m_rip, .rax_moffs, .moffs_rax => inst.data.x.fixes, + .extern_fn_reloc, .got_reloc, .direct_reloc, .import_reloc, .tlv_reloc => ._, + else => return lower.fail("TODO lower .{s}", .{@tagName(inst.ops)}), + }; + try lower.emit(switch (fixes) { + inline else => |tag| comptime if (std.mem.indexOfScalar(u8, @tagName(tag), ' ')) |space| + @field(Prefix, @tagName(tag)[0..space]) else - unreachable, + .none, + }, mnemonic: { + @setEvalBranchQuota(2_000); + + comptime var max_len = 0; + inline for (@typeInfo(Mnemonic).Enum.fields) |field| max_len = @max(field.name.len, max_len); + var buf: [max_len]u8 = undefined; + + const fixes_name = @tagName(fixes); + const pattern = fixes_name[if (std.mem.indexOfScalar(u8, fixes_name, ' ')) |i| i + 1 else 0..]; + const wildcard_i = std.mem.indexOfScalar(u8, pattern, '_').?; + const parts = .{ pattern[0..wildcard_i], @tagName(inst.tag), pattern[wildcard_i + 1 ..] }; + const err_msg = "unsupported mnemonic: "; + const mnemonic = std.fmt.bufPrint(&buf, "{s}{s}{s}", parts) catch + return lower.fail(err_msg ++ "'{s}{s}{s}'", parts); + break :mnemonic std.meta.stringToEnum(Mnemonic, mnemonic) orelse + return lower.fail(err_msg ++ "'{s}'", .{mnemonic}); }, switch (inst.ops) { .none => &.{}, + .inst => &.{ + .{ .imm = lower.reloc(.{ .inst = inst.data.inst.inst }) }, + }, .i_s, .i_u => &.{ - .{ .imm = lower.imm(inst.ops, inst.data.i) }, + .{ .imm = lower.imm(inst.ops, inst.data.i.i) }, }, .r => &.{ - .{ .reg = inst.data.r }, + .{ .reg = inst.data.r.r1 }, }, .rr => &.{ .{ .reg = inst.data.rr.r1 }, @@ -273,12 +431,24 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rrr.r2 }, .{ .reg = inst.data.rrr.r3 }, }, + .rrrr => &.{ + .{ .reg = inst.data.rrrr.r1 }, + .{ .reg = inst.data.rrrr.r2 }, + .{ .reg = inst.data.rrrr.r3 }, + .{ .reg = inst.data.rrrr.r4 }, + }, + .rrri => &.{ + .{ .reg = inst.data.rrri.r1 }, + .{ .reg = inst.data.rrri.r2 }, + .{ .reg = inst.data.rrri.r3 }, + .{ .imm = lower.imm(inst.ops, inst.data.rrri.i) }, + }, .ri_s, .ri_u => &.{ - .{ .reg = inst.data.ri.r }, + .{ .reg = inst.data.ri.r1 }, .{ .imm = lower.imm(inst.ops, inst.data.ri.i) }, }, .ri64 => &.{ - .{ .reg = inst.data.rx.r }, + .{ .reg = inst.data.rx.r1 }, .{ .imm = lower.imm(inst.ops, inst.data.rx.payload) }, }, .rri_s, .rri_u => &.{ @@ -286,28 +456,28 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { .{ .reg = inst.data.rri.r2 }, .{ .imm = lower.imm(inst.ops, inst.data.rri.i) }, }, - .m_sib, .lock_m_sib, .m_rip, .lock_m_rip => &.{ - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, + .m_sib, .m_rip => &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x.payload) }, }, - .mi_sib_s, - .lock_mi_sib_s, - .mi_sib_u, - .lock_mi_sib_u, - .mi_rip_u, - .lock_mi_rip_u, - .mi_rip_s, - .lock_mi_rip_s, - => &.{ - .{ .mem = lower.mem(inst.ops, inst.data.ix.payload) }, - .{ .imm = lower.imm(inst.ops, inst.data.ix.i) }, + .mi_sib_s, .mi_sib_u, .mi_rip_u, .mi_rip_s => &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x.payload + 1) }, + .{ .imm = lower.imm( + inst.ops, + lower.mir.extraData(Mir.Imm32, inst.data.x.payload).data.imm, + ) }, }, .rm_sib, .rm_rip => &.{ - .{ .reg = inst.data.rx.r }, + .{ .reg = inst.data.rx.r1 }, .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, }, - .mr_sib, .lock_mr_sib, .mr_rip, .lock_mr_rip => &.{ + .rmi_sib, .rmi_rip => &.{ + .{ .reg = inst.data.rix.r1 }, + .{ .mem = lower.mem(inst.ops, inst.data.rix.payload) }, + .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, + }, + .mr_sib, .mr_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, - .{ .reg = inst.data.rx.r }, + .{ .reg = inst.data.rx.r1 }, }, .mrr_sib, .mrr_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.rrx.payload) }, @@ -316,138 +486,63 @@ fn mirGeneric(lower: *Lower, inst: Mir.Inst) Error!void { }, .mri_sib, .mri_rip => &.{ .{ .mem = lower.mem(inst.ops, inst.data.rix.payload) }, - .{ .reg = inst.data.rix.r }, + .{ .reg = inst.data.rix.r1 }, .{ .imm = lower.imm(inst.ops, inst.data.rix.i) }, }, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }); -} - -fn mirString(lower: *Lower, inst: Mir.Inst) Error!void { - switch (inst.ops) { - .string => try lower.emit(switch (inst.data.string.repeat) { - inline else => |repeat| @field(Prefix, @tagName(repeat)), - }, switch (inst.tag) { - inline .cmps, .lods, .movs, .scas, .stos => |tag| switch (inst.data.string.width) { - inline else => |width| @field(Mnemonic, @tagName(tag) ++ @tagName(width)), - }, - else => unreachable, - }, &.{}), - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - } -} - -fn mirCmpxchgBytes(lower: *Lower, inst: Mir.Inst) Error!void { - const ops: [1]Operand = switch (inst.ops) { - .m_sib, .lock_m_sib, .m_rip, .lock_m_rip => .{ - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, + .rrm_sib, .rrm_rip => &.{ + .{ .reg = inst.data.rrx.r1 }, + .{ .reg = inst.data.rrx.r2 }, + .{ .mem = lower.mem(inst.ops, inst.data.rrx.payload) }, + }, + .rrmi_sib, .rrmi_rip => &.{ + .{ .reg = inst.data.rrix.r1 }, + .{ .reg = inst.data.rrix.r2 }, + .{ .mem = lower.mem(inst.ops, inst.data.rrix.payload) }, + .{ .imm = lower.imm(inst.ops, inst.data.rrix.i) }, }, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }; - try lower.emit(switch (inst.ops) { - .m_sib, .m_rip => .none, - .lock_m_sib, .lock_m_rip => .lock, - else => unreachable, - }, switch (@divExact(ops[0].bitSize(), 8)) { - 8 => .cmpxchg8b, - 16 => .cmpxchg16b, - else => return lower.fail("invalid operand for {s}", .{@tagName(inst.tag)}), - }, &ops); -} - -fn mirMovMoffs(lower: *Lower, inst: Mir.Inst) Error!void { - try lower.emit(switch (inst.ops) { - .rax_moffs, .moffs_rax => .none, - .lock_moffs_rax => .lock, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }, .mov, switch (inst.ops) { .rax_moffs => &.{ .{ .reg = .rax }, - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, + .{ .mem = lower.mem(inst.ops, inst.data.x.payload) }, }, - .moffs_rax, .lock_moffs_rax => &.{ - .{ .mem = lower.mem(inst.ops, inst.data.payload) }, + .moffs_rax => &.{ + .{ .mem = lower.mem(inst.ops, inst.data.x.payload) }, .{ .reg = .rax }, }, - else => unreachable, - }); -} - -fn mirMovsx(lower: *Lower, inst: Mir.Inst) Error!void { - const ops: [2]Operand = switch (inst.ops) { - .rr => .{ - .{ .reg = inst.data.rr.r1 }, - .{ .reg = inst.data.rr.r2 }, + .extern_fn_reloc => &.{ + .{ .imm = lower.reloc(.{ .linker_extern_fn = inst.data.reloc }) }, }, - .rm_sib, .rm_rip => .{ - .{ .reg = inst.data.rx.r }, - .{ .mem = lower.mem(inst.ops, inst.data.rx.payload) }, - }, - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - }; - try lower.emit(.none, switch (ops[0].bitSize()) { - 32, 64 => switch (ops[1].bitSize()) { - 32 => .movsxd, - else => .movsx, + .got_reloc, .direct_reloc, .import_reloc, .tlv_reloc => ops: { + const reg = inst.data.rx.r1; + const extra = lower.mir.extraData(Mir.Reloc, inst.data.rx.payload).data; + _ = lower.reloc(switch (inst.ops) { + .got_reloc => .{ .linker_got = extra }, + .direct_reloc => .{ .linker_direct = extra }, + .import_reloc => .{ .linker_import = extra }, + .tlv_reloc => .{ .linker_tlv = extra }, + else => unreachable, + }); + break :ops &.{ + .{ .reg = reg }, + .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(reg.bitSize()), 0) }, + }; }, - else => .movsx, - }, &ops); -} - -fn mirCmovcc(lower: *Lower, inst: Mir.Inst) Error!void { - switch (inst.ops) { - .rr_cc => try lower.emit(.none, mnem_cc(.cmov, inst.data.rr_cc.cc), &.{ - .{ .reg = inst.data.rr_cc.r1 }, - .{ .reg = inst.data.rr_cc.r2 }, - }), - .rm_sib_cc, .rm_rip_cc => try lower.emit(.none, mnem_cc(.cmov, inst.data.rx_cc.cc), &.{ - .{ .reg = inst.data.rx_cc.r }, - .{ .mem = lower.mem(inst.ops, inst.data.rx_cc.payload) }, - }), else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - } -} - -fn mirSetcc(lower: *Lower, inst: Mir.Inst) Error!void { - switch (inst.ops) { - .r_cc => try lower.emit(.none, mnem_cc(.set, inst.data.r_cc.cc), &.{ - .{ .reg = inst.data.r_cc.r }, - }), - .m_sib_cc, .m_rip_cc => try lower.emit(.none, mnem_cc(.set, inst.data.x_cc.cc), &.{ - .{ .mem = lower.mem(inst.ops, inst.data.x_cc.payload) }, - }), - else => return lower.fail("TODO lower {s} {s}", .{ @tagName(inst.tag), @tagName(inst.ops) }), - } + }); } -fn mirPushPopRegisterList(lower: *Lower, inst: Mir.Inst, comptime mnemonic: Mnemonic) Error!void { - const reg_list = Mir.RegisterList.fromInt(inst.data.payload); +fn pushPopRegList(lower: *Lower, comptime mnemonic: Mnemonic, inst: Mir.Inst) Error!void { const callee_preserved_regs = abi.getCalleePreservedRegs(lower.target.*); - var it = reg_list.iterator(.{ .direction = switch (mnemonic) { + var it = inst.data.reg_list.iterator(.{ .direction = switch (mnemonic) { .push => .reverse, .pop => .forward, else => unreachable, } }); - while (it.next()) |i| try lower.emit(.none, mnemonic, &.{.{ .reg = callee_preserved_regs[i] }}); + while (it.next()) |i| try lower.emit(.none, mnemonic, &.{.{ + .reg = callee_preserved_regs[i], + }}); } -fn mirLeaLinker(lower: *Lower, inst: Mir.Inst) Error!void { - const metadata = lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const reg = @intToEnum(Register, metadata.reg); - try lower.emit(.none, .lea, &.{ - .{ .reg = reg }, - .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(reg.bitSize()), 0) }, - }); -} - -fn mirMovLinker(lower: *Lower, inst: Mir.Inst) Error!void { - const metadata = lower.mir.extraData(Mir.LeaRegisterReloc, inst.data.payload).data; - const reg = @intToEnum(Register, metadata.reg); - try lower.emit(.none, .mov, &.{ - .{ .reg = reg }, - .{ .mem = Memory.rip(Memory.PtrSize.fromBitSize(reg.bitSize()), 0) }, - }); -} +const page_size: i32 = 1 << 12; const abi = @import("abi.zig"); const assert = std.debug.assert; diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index c8703373d2..96b7742929 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -32,12 +32,271 @@ pub const Inst = struct { pub const Index = u32; + pub const Fixes = enum(u8) { + /// ___ + @"_", + + /// Integer __ + i_, + + /// ___ Left + _l, + /// ___ Left Double + _ld, + /// ___ Right + _r, + /// ___ Right Double + _rd, + + /// ___ Above + _a, + /// ___ Above Or Equal + _ae, + /// ___ Below + _b, + /// ___ Below Or Equal + _be, + /// ___ Carry + _c, + /// ___ Equal + _e, + /// ___ Greater + _g, + /// ___ Greater Or Equal + _ge, + /// ___ Less + //_l, + /// ___ Less Or Equal + _le, + /// ___ Not Above + _na, + /// ___ Not Above Or Equal + _nae, + /// ___ Not Below + _nb, + /// ___ Not Below Or Equal + _nbe, + /// ___ Not Carry + _nc, + /// ___ Not Equal + _ne, + /// ___ Not Greater + _ng, + /// ___ Not Greater Or Equal + _nge, + /// ___ Not Less + _nl, + /// ___ Not Less Or Equal + _nle, + /// ___ Not Overflow + _no, + /// ___ Not Parity + _np, + /// ___ Not Sign + _ns, + /// ___ Not Zero + _nz, + /// ___ Overflow + _o, + /// ___ Parity + _p, + /// ___ Parity Even + _pe, + /// ___ Parity Odd + _po, + /// ___ Sign + _s, + /// ___ Zero + _z, + + /// ___ Byte + //_b, + /// ___ Word + _w, + /// ___ Doubleword + _d, + /// ___ QuadWord + _q, + + /// ___ String + //_s, + /// ___ String Byte + _sb, + /// ___ String Word + _sw, + /// ___ String Doubleword + _sd, + /// ___ String Quadword + _sq, + + /// Repeat ___ String + @"rep _s", + /// Repeat ___ String Byte + @"rep _sb", + /// Repeat ___ String Word + @"rep _sw", + /// Repeat ___ String Doubleword + @"rep _sd", + /// Repeat ___ String Quadword + @"rep _sq", + + /// Repeat Equal ___ String + @"repe _s", + /// Repeat Equal ___ String Byte + @"repe _sb", + /// Repeat Equal ___ String Word + @"repe _sw", + /// Repeat Equal ___ String Doubleword + @"repe _sd", + /// Repeat Equal ___ String Quadword + @"repe _sq", + + /// Repeat Not Equal ___ String + @"repne _s", + /// Repeat Not Equal ___ String Byte + @"repne _sb", + /// Repeat Not Equal ___ String Word + @"repne _sw", + /// Repeat Not Equal ___ String Doubleword + @"repne _sd", + /// Repeat Not Equal ___ String Quadword + @"repne _sq", + + /// Repeat Not Zero ___ String + @"repnz _s", + /// Repeat Not Zero ___ String Byte + @"repnz _sb", + /// Repeat Not Zero ___ String Word + @"repnz _sw", + /// Repeat Not Zero ___ String Doubleword + @"repnz _sd", + /// Repeat Not Zero ___ String Quadword + @"repnz _sq", + + /// Repeat Zero ___ String + @"repz _s", + /// Repeat Zero ___ String Byte + @"repz _sb", + /// Repeat Zero ___ String Word + @"repz _sw", + /// Repeat Zero ___ String Doubleword + @"repz _sd", + /// Repeat Zero ___ String Quadword + @"repz _sq", + + /// Locked ___ + @"lock _", + /// ___ And Complement + //_c, + /// Locked ___ And Complement + @"lock _c", + /// ___ And Reset + //_r, + /// Locked ___ And Reset + @"lock _r", + /// ___ And Set + //_s, + /// Locked ___ And Set + @"lock _s", + /// ___ 8 Bytes + _8b, + /// Locked ___ 8 Bytes + @"lock _8b", + /// ___ 16 Bytes + _16b, + /// Locked ___ 16 Bytes + @"lock _16b", + + /// Float ___ + f_, + /// Float ___ Pop + f_p, + + /// Packed ___ + p_, + /// Packed ___ Byte + p_b, + /// Packed ___ Word + p_w, + /// Packed ___ Doubleword + p_d, + /// Packed ___ Quadword + p_q, + /// Packed ___ Double Quadword + p_dq, + + /// ___ Scalar Single-Precision Values + _ss, + /// ___ Packed Single-Precision Values + _ps, + /// ___ Scalar Double-Precision Values + //_sd, + /// ___ Packed Double-Precision Values + _pd, + + /// VEX-Encoded ___ + v_, + /// VEX-Encoded ___ Byte + v_b, + /// VEX-Encoded ___ Word + v_w, + /// VEX-Encoded ___ Doubleword + v_d, + /// VEX-Encoded ___ QuadWord + v_q, + /// VEX-Encoded Packed ___ + vp_, + /// VEX-Encoded Packed ___ Byte + vp_b, + /// VEX-Encoded Packed ___ Word + vp_w, + /// VEX-Encoded Packed ___ Doubleword + vp_d, + /// VEX-Encoded Packed ___ Quadword + vp_q, + /// VEX-Encoded Packed ___ Double Quadword + vp_dq, + /// VEX-Encoded ___ Scalar Single-Precision Values + v_ss, + /// VEX-Encoded ___ Packed Single-Precision Values + v_ps, + /// VEX-Encoded ___ Scalar Double-Precision Values + v_sd, + /// VEX-Encoded ___ Packed Double-Precision Values + v_pd, + /// VEX-Encoded ___ 128-Bits Of Floating-Point Data + v_f128, + + /// Mask ___ Byte + k_b, + /// Mask ___ Word + k_w, + /// Mask ___ Doubleword + k_d, + /// Mask ___ Quadword + k_q, + + pub fn fromCondition(cc: bits.Condition) Fixes { + return switch (cc) { + inline else => |cc_tag| @field(Fixes, "_" ++ @tagName(cc_tag)), + .z_and_np, .nz_or_p => unreachable, + }; + } + }; + pub const Tag = enum(u8) { /// Add with carry adc, /// Add + /// Add packed integers + /// Add packed single-precision floating-point values + /// Add scalar single-precision floating-point values + /// Add packed double-precision floating-point values + /// Add scalar double-precision floating-point values add, /// Logical and + /// Bitwise logical and of packed single-precision floating-point values + /// Bitwise logical and of packed double-precision floating-point values @"and", /// Bit scan forward bsf, @@ -46,49 +305,55 @@ pub const Inst = struct { /// Byte swap bswap, /// Bit test - bt, /// Bit test and complement - btc, /// Bit test and reset - btr, /// Bit test and set - bts, + bt, /// Call call, /// Convert byte to word cbw, - /// Convert word to doubleword - cwde, - /// Convert doubleword to quadword - cdqe, - /// Convert word to doubleword - cwd, /// Convert doubleword to quadword cdq, /// Convert doubleword to quadword - cqo, + cdqe, + /// Conditional move + cmov, /// Logical compare + /// Compare string + /// Compare scalar single-precision floating-point values + /// Compare scalar double-precision floating-point values cmp, /// Compare and exchange - cmpxchg, /// Compare and exchange bytes - cmpxchgb, + cmpxchg, + /// Convert doubleword to quadword + cqo, + /// Convert word to doubleword + cwd, + /// Convert word to doubleword + cwde, /// Unsigned division - div, - /// Store integer with truncation - fisttp, - /// Load floating-point value - fld, /// Signed division - idiv, - /// Signed multiplication - imul, + /// Divide packed single-precision floating-point values + /// Divide scalar single-precision floating-point values + /// Divide packed double-precision floating-point values + /// Divide scalar double-precision floating-point values + div, /// int3, + /// Store integer with truncation + istt, + /// Conditional jump + j, /// Jump jmp, + /// Load floating-point value + ld, /// Load effective address lea, + /// Load string + lod, /// Load fence lfence, /// Count the number of leading zero bits @@ -96,18 +361,24 @@ pub const Inst = struct { /// Memory fence mfence, /// Move + /// Move data from string to string + /// Move scalar single-precision floating-point value + /// Move scalar double-precision floating-point value + /// Move doubleword + /// Move quadword mov, /// Move data after swapping bytes movbe, - /// Move doubleword - movd, - /// Move quadword - movq, /// Move with sign extension movsx, /// Move with zero extension movzx, /// Multiply + /// Signed multiplication + /// Multiply packed single-precision floating-point values + /// Multiply scalar single-precision floating-point values + /// Multiply packed double-precision floating-point values + /// Multiply scalar double-precision floating-point values mul, /// Two's complement negation neg, @@ -116,6 +387,8 @@ pub const Inst = struct { /// One's complement negation not, /// Logical or + /// Bitwise logical or of packed single-precision floating-point values + /// Bitwise logical or of packed double-precision floating-point values @"or", /// Pop pop, @@ -124,33 +397,38 @@ pub const Inst = struct { /// Push push, /// Rotate left through carry - rcl, /// Rotate right through carry - rcr, + rc, /// Return ret, /// Rotate left - rol, /// Rotate right - ror, + ro, /// Arithmetic shift left - sal, /// Arithmetic shift right - sar, + sa, /// Integer subtraction with borrow sbb, + /// Scan string + sca, + /// Set byte on condition + set, /// Store fence sfence, /// Logical shift left - shl, /// Double precision shift left - shld, /// Logical shift right - shr, /// Double precision shift right - shrd, + sh, /// Subtract + /// Subtract packed integers + /// Subtract packed single-precision floating-point values + /// Subtract scalar single-precision floating-point values + /// Subtract packed double-precision floating-point values + /// Subtract scalar double-precision floating-point values sub, + /// Store string + sto, /// Syscall syscall, /// Test condition @@ -164,102 +442,221 @@ pub const Inst = struct { /// Exchange register/memory with register xchg, /// Logical exclusive-or + /// Bitwise logical xor of packed single-precision floating-point values + /// Bitwise logical xor of packed double-precision floating-point values xor, - /// Add single precision floating point values - addss, - /// Compare scalar single-precision floating-point values - cmpss, - /// Divide scalar single-precision floating-point values - divss, - /// Return maximum single-precision floating-point value - maxss, - /// Return minimum single-precision floating-point value - minss, - /// Move scalar single-precision floating-point value - movss, - /// Multiply scalar single-precision floating-point values - mulss, - /// Round scalar single-precision floating-point values - roundss, - /// Subtract scalar single-precision floating-point values - subss, + /// Pack with signed saturation + ackssw, + /// Pack with signed saturation + ackssd, + /// Pack with unsigned saturation + ackusw, + /// Add packed signed integers with signed saturation + adds, + /// Add packed unsigned integers with unsigned saturation + addus, + /// Bitwise logical and not of packed single-precision floating-point values + /// Bitwise logical and not of packed double-precision floating-point values + andn, + /// Maximum of packed signed integers + maxs, + /// Maximum of packed unsigned integers + maxu, + /// Minimum of packed signed integers + mins, + /// Minimum of packed unsigned integers + minu, + /// Multiply packed signed integers and store low result + mull, + /// Multiply packed signed integers and store high result + mulh, + /// Subtract packed signed integers with signed saturation + subs, + /// Subtract packed unsigned integers with unsigned saturation + subus, + + /// Convert packed doubleword integers to packed single-precision floating-point values + /// Convert packed doubleword integers to packed double-precision floating-point values + cvtpi2, + /// Convert packed single-precision floating-point values to packed doubleword integers + cvtps2pi, + /// Convert doubleword integer to scalar single-precision floating-point value + /// Convert doubleword integer to scalar double-precision floating-point value + cvtsi2, + /// Convert scalar single-precision floating-point value to doubleword integer + cvtss2si, + /// Convert with truncation packed single-precision floating-point values to packed doubleword integers + cvttps2pi, + /// Convert with truncation scalar single-precision floating-point value to doubleword integer + cvttss2si, + + /// Maximum of packed single-precision floating-point values + /// Maximum of scalar single-precision floating-point values + /// Maximum of packed double-precision floating-point values + /// Maximum of scalar double-precision floating-point values + max, + /// Minimum of packed single-precision floating-point values + /// Minimum of scalar single-precision floating-point values + /// Minimum of packed double-precision floating-point values + /// Minimum of scalar double-precision floating-point values + min, + /// Move aligned packed single-precision floating-point values + /// Move aligned packed double-precision floating-point values + mova, + /// Move packed single-precision floating-point values high to low + movhl, + /// Move packed single-precision floating-point values low to high + movlh, + /// Move unaligned packed single-precision floating-point values + /// Move unaligned packed double-precision floating-point values + movu, + /// Extract byte + /// Extract word + /// Extract doubleword + /// Extract quadword + extr, + /// Insert byte + /// Insert word + /// Insert doubleword + /// Insert quadword + insr, + /// Square root of packed single-precision floating-point values + /// Square root of scalar single-precision floating-point value + /// Square root of packed double-precision floating-point values + /// Square root of scalar double-precision floating-point value + sqrt, /// Unordered compare scalar single-precision floating-point values - ucomiss, - /// Add double precision floating point values - addsd, - /// Compare scalar double-precision floating-point values - cmpsd, - /// Divide scalar double-precision floating-point values - divsd, - /// Return maximum double-precision floating-point value - maxsd, - /// Return minimum double-precision floating-point value - minsd, - /// Move scalar double-precision floating-point value - movsd, - /// Multiply scalar double-precision floating-point values - mulsd, - /// Round scalar double-precision floating-point values - roundsd, - /// Subtract scalar double-precision floating-point values - subsd, /// Unordered compare scalar double-precision floating-point values - ucomisd, + ucomi, + /// Unpack and interleave high packed single-precision floating-point values + /// Unpack and interleave high packed double-precision floating-point values + unpckh, + /// Unpack and interleave low packed single-precision floating-point values + /// Unpack and interleave low packed double-precision floating-point values + unpckl, - /// Compare string operands - cmps, - /// Load string - lods, - /// Move data from string to string - movs, - /// Scan string - scas, - /// Store string - stos, - - /// Conditional move - cmovcc, - /// Conditional jump - jcc, - /// Set byte on condition - setcc, + /// Convert packed doubleword integers to packed single-precision floating-point values + /// Convert packed doubleword integers to packed double-precision floating-point values + cvtdq2, + /// Convert packed double-precision floating-point values to packed doubleword integers + cvtpd2dq, + /// Convert packed double-precision floating-point values to packed doubleword integers + cvtpd2pi, + /// Convert packed double-precision floating-point values to packed single-precision floating-point values + cvtpd2, + /// Convert packed single-precision floating-point values to packed doubleword integers + cvtps2dq, + /// Convert packed single-precision floating-point values to packed double-precision floating-point values + cvtps2, + /// Convert scalar double-precision floating-point value to doubleword integer + cvtsd2si, + /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value + cvtsd2, + /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value + cvtss2, + /// Convert with truncation packed double-precision floating-point values to packed doubleword integers + cvttpd2dq, + /// Convert with truncation packed double-precision floating-point values to packed doubleword integers + cvttpd2pi, + /// Convert with truncation packed single-precision floating-point values to packed doubleword integers + cvttps2dq, + /// Convert with truncation scalar double-precision floating-point value to doubleword integer + cvttsd2si, + /// Move aligned packed integer values + movdqa, + /// Move unaligned packed integer values + movdqu, + /// Packed interleave shuffle of quadruplets of single-precision floating-point values + /// Packed interleave shuffle of pairs of double-precision floating-point values + shuf, + /// Shuffle packed high words + shufh, + /// Shuffle packed low words + shufl, + /// Shift packed data right logical + /// Shift packed data right logical + /// Shift packed data right logical + srl, + /// Unpack high data + unpckhbw, + /// Unpack high data + unpckhdq, + /// Unpack high data + unpckhqdq, + /// Unpack high data + unpckhwd, + /// Unpack low data + unpcklbw, + /// Unpack low data + unpckldq, + /// Unpack low data + unpcklqdq, + /// Unpack low data + unpcklwd, - /// Mov absolute to/from memory wrt segment register to/from rax - mov_moffs, + /// Replicate double floating-point values + movddup, + /// Replicate single floating-point values + movshdup, + /// Replicate single floating-point values + movsldup, - /// Jump with relocation to another local MIR instruction - /// Uses `inst` payload. - jmp_reloc, + /// Pack with unsigned saturation + ackusd, + /// Blend packed single-precision floating-point values + /// Blend scalar single-precision floating-point values + /// Blend packed double-precision floating-point values + /// Blend scalar double-precision floating-point values + blend, + /// Variable blend packed single-precision floating-point values + /// Variable blend scalar single-precision floating-point values + /// Variable blend packed double-precision floating-point values + /// Variable blend scalar double-precision floating-point values + blendv, + /// Extract packed floating-point values + extract, + /// Insert scalar single-precision floating-point value + /// Insert packed floating-point values + insert, + /// Round packed single-precision floating-point values + /// Round scalar single-precision floating-point value + /// Round packed double-precision floating-point values + /// Round scalar double-precision floating-point value + round, - /// Call to an extern symbol via linker relocation. - /// Uses `relocation` payload. - call_extern, + /// Load with broadcast floating-point data + broadcast, - /// Load effective address of a symbol not yet allocated in VM. - lea_linker, - /// Move address of a symbol not yet allocated in VM. - mov_linker, + /// Convert 16-bit floating-point values to single-precision floating-point values + cvtph2, + /// Convert single-precision floating-point values to 16-bit floating-point values + cvtps2ph, - /// End of prologue - dbg_prologue_end, - /// Start of epilogue - dbg_epilogue_begin, - /// Update debug line - /// Uses `line_column` payload containing the line and column. - dbg_line, - /// Push registers - /// Uses `payload` payload containing `RegisterList.asInt` directly. - push_regs, - /// Pop registers - /// Uses `payload` payload containing `RegisterList.asInt` directly. - pop_regs, + /// Fused multiply-add of packed single-precision floating-point values + /// Fused multiply-add of scalar single-precision floating-point values + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd132, + /// Fused multiply-add of packed single-precision floating-point values + /// Fused multiply-add of scalar single-precision floating-point values + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd213, + /// Fused multiply-add of packed single-precision floating-point values + /// Fused multiply-add of scalar single-precision floating-point values + /// Fused multiply-add of packed double-precision floating-point values + /// Fused multiply-add of scalar double-precision floating-point values + fmadd231, - /// Tombstone - /// Emitter should skip this instruction. - dead, + /// A pseudo instruction that requires special lowering. + /// This should be the only tag in this enum that doesn't + /// directly correspond to one or more instruction mnemonics. + pseudo, }; + pub const FixedTag = struct { Fixes, Tag }; + pub const Ops = enum(u8) { /// No data associated with this instruction (only mnemonic is used). none, @@ -272,18 +669,18 @@ pub const Inst = struct { /// Register, register, register operands. /// Uses `rrr` payload. rrr, + /// Register, register, register, register operands. + /// Uses `rrrr` payload. + rrrr, + /// Register, register, register, immediate (byte) operands. + /// Uses `rrri` payload. + rrri, /// Register, register, immediate (sign-extended) operands. /// Uses `rri` payload. rri_s, /// Register, register, immediate (unsigned) operands. /// Uses `rri` payload. rri_u, - /// Register with condition code (CC). - /// Uses `r_cc` payload. - r_cc, - /// Register, register with condition code (CC). - /// Uses `rr_cc` payload. - rr_cc, /// Register, immediate (sign-extended) operands. /// Uses `ri` payload. ri_s, @@ -308,35 +705,41 @@ pub const Inst = struct { /// Register, memory (RIP) operands. /// Uses `rx` payload. rm_rip, - /// Register, memory (SIB) operands with condition code (CC). - /// Uses `rx_cc` payload. - rm_sib_cc, - /// Register, memory (RIP) operands with condition code (CC). - /// Uses `rx_cc` payload. - rm_rip_cc, + /// Register, memory (SIB), immediate (byte) operands. + /// Uses `rix` payload with extra data of type `MemorySib`. + rmi_sib, + /// Register, register, memory (RIP). + /// Uses `rrix` payload with extra data of type `MemoryRip`. + rrm_rip, + /// Register, register, memory (SIB). + /// Uses `rrix` payload with extra data of type `MemorySib`. + rrm_sib, + /// Register, register, memory (RIP), immediate (byte) operands. + /// Uses `rrix` payload with extra data of type `MemoryRip`. + rrmi_rip, + /// Register, register, memory (SIB), immediate (byte) operands. + /// Uses `rrix` payload with extra data of type `MemorySib`. + rrmi_sib, + /// Register, memory (RIP), immediate (byte) operands. + /// Uses `rix` payload with extra data of type `MemoryRip`. + rmi_rip, /// Single memory (SIB) operand. - /// Uses `payload` with extra data of type `MemorySib`. + /// Uses `x` with extra data of type `MemorySib`. m_sib, /// Single memory (RIP) operand. - /// Uses `payload` with extra data of type `MemoryRip`. + /// Uses `x` with extra data of type `MemoryRip`. m_rip, - /// Single memory (SIB) operand with condition code (CC). - /// Uses `x_cc` with extra data of type `MemorySib`. - m_sib_cc, - /// Single memory (RIP) operand with condition code (CC). - /// Uses `x_cc` with extra data of type `MemoryRip`. - m_rip_cc, /// Memory (SIB), immediate (unsigned) operands. - /// Uses `ix` payload with extra data of type `MemorySib`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`. mi_sib_u, /// Memory (RIP), immediate (unsigned) operands. - /// Uses `ix` payload with extra data of type `MemoryRip`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`. mi_rip_u, /// Memory (SIB), immediate (sign-extend) operands. - /// Uses `ix` payload with extra data of type `MemorySib`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemorySib`. mi_sib_s, /// Memory (RIP), immediate (sign-extend) operands. - /// Uses `ix` payload with extra data of type `MemoryRip`. + /// Uses `x` payload with extra data of type `Imm32` followed by `MemoryRip`. mi_rip_s, /// Memory (SIB), register operands. /// Uses `rx` payload with extra data of type `MemorySib`. @@ -357,161 +760,207 @@ pub const Inst = struct { /// Uses `rix` payload with extra data of type `MemoryRip`. mri_rip, /// Rax, Memory moffs. - /// Uses `payload` with extra data of type `MemoryMoffs`. + /// Uses `x` with extra data of type `MemoryMoffs`. rax_moffs, /// Memory moffs, rax. - /// Uses `payload` with extra data of type `MemoryMoffs`. + /// Uses `x` with extra data of type `MemoryMoffs`. moffs_rax, - /// Single memory (SIB) operand with lock prefix. - /// Uses `payload` with extra data of type `MemorySib`. - lock_m_sib, - /// Single memory (RIP) operand with lock prefix. - /// Uses `payload` with extra data of type `MemoryRip`. - lock_m_rip, - /// Memory (SIB), immediate (unsigned) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemorySib`. - lock_mi_sib_u, - /// Memory (RIP), immediate (unsigned) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemoryRip`. - lock_mi_rip_u, - /// Memory (SIB), immediate (sign-extend) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemorySib`. - lock_mi_sib_s, - /// Memory (RIP), immediate (sign-extend) operands with lock prefix. - /// Uses `xi` payload with extra data of type `MemoryRip`. - lock_mi_rip_s, - /// Memory (SIB), register operands with lock prefix. - /// Uses `rx` payload with extra data of type `MemorySib`. - lock_mr_sib, - /// Memory (RIP), register operands with lock prefix. - /// Uses `rx` payload with extra data of type `MemoryRip`. - lock_mr_rip, - /// Memory moffs, rax with lock prefix. - /// Uses `payload` with extra data of type `MemoryMoffs`. - lock_moffs_rax, /// References another Mir instruction directly. /// Uses `inst` payload. inst, - /// References another Mir instruction directly with condition code (CC). - /// Uses `inst_cc` payload. - inst_cc, - /// String repeat and width - /// Uses `string` payload. - string, + /// Linker relocation - external function. /// Uses `reloc` payload. - reloc, + extern_fn_reloc, /// Linker relocation - GOT indirection. - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. got_reloc, /// Linker relocation - direct reference. - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. direct_reloc, /// Linker relocation - imports table indirection (binding). - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. import_reloc, /// Linker relocation - threadlocal variable via GOT indirection. - /// Uses `payload` payload with extra data of type `LeaRegisterReloc`. + /// Uses `rx` payload with extra data of type `Reloc`. tlv_reloc, + + // Pseudo instructions: + + /// Conditional move if zero flag set and parity flag not set + /// Clobbers the source operand! + /// Uses `rr` payload. + pseudo_cmov_z_and_np_rr, + /// Conditional move if zero flag not set or parity flag set + /// Uses `rr` payload. + pseudo_cmov_nz_or_p_rr, + /// Conditional move if zero flag not set or parity flag set + /// Uses `rx` payload. + pseudo_cmov_nz_or_p_rm_sib, + /// Conditional move if zero flag not set or parity flag set + /// Uses `rx` payload. + pseudo_cmov_nz_or_p_rm_rip, + /// Set byte if zero flag set and parity flag not set + /// Requires a scratch register! + /// Uses `rr` payload. + pseudo_set_z_and_np_r, + /// Set byte if zero flag set and parity flag not set + /// Requires a scratch register! + /// Uses `rx` payload. + pseudo_set_z_and_np_m_sib, + /// Set byte if zero flag set and parity flag not set + /// Requires a scratch register! + /// Uses `rx` payload. + pseudo_set_z_and_np_m_rip, + /// Set byte if zero flag not set or parity flag set + /// Requires a scratch register! + /// Uses `rr` payload. + pseudo_set_nz_or_p_r, + /// Set byte if zero flag not set or parity flag set + /// Requires a scratch register! + /// Uses `rx` payload. + pseudo_set_nz_or_p_m_sib, + /// Set byte if zero flag not set or parity flag set + /// Requires a scratch register! + /// Uses `rx` payload. + pseudo_set_nz_or_p_m_rip, + /// Jump if zero flag set and parity flag not set + /// Uses `inst` payload. + pseudo_j_z_and_np_inst, + /// Jump if zero flag not set or parity flag set + /// Uses `inst` payload. + pseudo_j_nz_or_p_inst, + + /// Probe alignment + /// Uses `ri` payload + pseudo_probe_align_ri_s, + /// Probe adjust unrolled + /// Uses `ri` payload + pseudo_probe_adjust_unrolled_ri_s, + /// Probe adjust setup + /// Uses `rri` payload + pseudo_probe_adjust_setup_rri_s, + /// Probe adjust loop + /// Uses `rr` payload + pseudo_probe_adjust_loop_rr, + /// Push registers + /// Uses `reg_list` payload. + pseudo_push_reg_list, + /// Pop registers + /// Uses `reg_list` payload. + pseudo_pop_reg_list, + + /// End of prologue + pseudo_dbg_prologue_end_none, + /// Update debug line + /// Uses `line_column` payload. + pseudo_dbg_line_line_column, + /// Start of epilogue + pseudo_dbg_epilogue_begin_none, + + /// Tombstone + /// Emitter should skip this instruction. + pseudo_dead_none, }; pub const Data = union { + none: struct { + fixes: Fixes = ._, + }, /// References another Mir instruction. - inst: Index, - /// Another instruction with condition code (CC). - /// Used by `jcc`. - inst_cc: struct { - /// Another instruction. + inst: struct { + fixes: Fixes = ._, inst: Index, - /// A condition code for use with EFLAGS register. - cc: bits.Condition, }, /// A 32-bit immediate value. - i: u32, - r: Register, + i: struct { + fixes: Fixes = ._, + i: u32, + }, + r: struct { + fixes: Fixes = ._, + r1: Register, + }, rr: struct { + fixes: Fixes = ._, r1: Register, r2: Register, }, rrr: struct { + fixes: Fixes = ._, r1: Register, r2: Register, r3: Register, }, - rri: struct { + rrrr: struct { + fixes: Fixes = ._, r1: Register, r2: Register, - i: u32, - }, - /// Condition code (CC), followed by custom payload found in extra. - x_cc: struct { - cc: bits.Condition, - payload: u32, + r3: Register, + r4: Register, }, - /// Register with condition code (CC). - r_cc: struct { - r: Register, - cc: bits.Condition, + rrri: struct { + fixes: Fixes = ._, + r1: Register, + r2: Register, + r3: Register, + i: u8, }, - /// Register, register with condition code (CC). - rr_cc: struct { + rri: struct { + fixes: Fixes = ._, r1: Register, r2: Register, - cc: bits.Condition, + i: u32, }, /// Register, immediate. ri: struct { - r: Register, + fixes: Fixes = ._, + r1: Register, i: u32, }, /// Register, followed by custom payload found in extra. rx: struct { - r: Register, - payload: u32, - }, - /// Register with condition code (CC), followed by custom payload found in extra. - rx_cc: struct { - r: Register, - cc: bits.Condition, - payload: u32, - }, - /// Immediate, followed by Custom payload found in extra. - ix: struct { - i: u32, + fixes: Fixes = ._, + r1: Register, payload: u32, }, /// Register, register, followed by Custom payload found in extra. rrx: struct { + fixes: Fixes = ._, r1: Register, r2: Register, payload: u32, }, /// Register, byte immediate, followed by Custom payload found in extra. rix: struct { - r: Register, + fixes: Fixes = ._, + r1: Register, + i: u8, + payload: u32, + }, + /// Register, register, byte immediate, followed by Custom payload found in extra. + rrix: struct { + fixes: Fixes = ._, + r1: Register, + r2: Register, i: u8, payload: u32, }, - /// String instruction prefix and width. - string: struct { - repeat: bits.StringRepeat, - width: bits.StringWidth, + /// Custom payload found in extra. + x: struct { + fixes: Fixes = ._, + payload: u32, }, /// Relocation for the linker where: /// * `atom_index` is the index of the source /// * `sym_index` is the index of the target - relocation: struct { - /// Index of the containing atom. - atom_index: u32, - /// Index into the linker's symbol table. - sym_index: u32, - }, + reloc: Reloc, /// Debug line and column position line_column: struct { line: u32, column: u32, }, - /// Index into `extra`. Meaning of what can be found there is context-dependent. - payload: u32, + /// Register list + reg_list: RegisterList, }; // Make sure we don't accidentally make instructions bigger than expected. @@ -523,9 +972,8 @@ pub const Inst = struct { } }; -pub const LeaRegisterReloc = struct { - /// Destination register. - reg: u32, +/// A linker symbol not yet allocated in VM. +pub const Reloc = struct { /// Index of the containing atom. atom_index: u32, /// Index into the linker's symbol table. @@ -560,21 +1008,15 @@ pub const RegisterList = struct { return self.bitset.iterator(options); } - pub fn asInt(self: Self) u32 { - return self.bitset.mask; - } - - pub fn fromInt(mask: u32) Self { - return .{ - .bitset = BitSet{ .mask = @intCast(BitSet.MaskInt, mask) }, - }; - } - pub fn count(self: Self) u32 { return @intCast(u32, self.bitset.count()); } }; +pub const Imm32 = struct { + imm: u32, +}; + pub const Imm64 = struct { msb: u32, lsb: u32, diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig index ff1a0ee520..69df5dbf4c 100644 --- a/src/arch/x86_64/abi.zig +++ b/src/arch/x86_64/abi.zig @@ -1,10 +1,3 @@ -const std = @import("std"); -const Type = @import("../../type.zig").Type; -const Target = std.Target; -const assert = std.debug.assert; -const Register = @import("bits.zig").Register; -const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; - pub const Class = enum { integer, sse, @@ -19,7 +12,7 @@ pub const Class = enum { float_combine, }; -pub fn classifyWindows(ty: Type, target: Target) Class { +pub fn classifyWindows(ty: Type, mod: *Module) Class { // https://docs.microsoft.com/en-gb/cpp/build/x64-calling-convention?view=vs-2017 // "There's a strict one-to-one correspondence between a function call's arguments // and the registers used for those arguments. Any argument that doesn't fit in 8 @@ -28,7 +21,7 @@ pub fn classifyWindows(ty: Type, target: Target) Class { // "All floating point operations are done using the 16 XMM registers." // "Structs and unions of size 8, 16, 32, or 64 bits, and __m64 types, are passed // as if they were integers of the same size." - switch (ty.zigTypeTag()) { + switch (ty.zigTypeTag(mod)) { .Pointer, .Int, .Bool, @@ -43,12 +36,12 @@ pub fn classifyWindows(ty: Type, target: Target) Class { .ErrorUnion, .AnyFrame, .Frame, - => switch (ty.abiSize(target)) { + => switch (ty.abiSize(mod)) { 0 => unreachable, 1, 2, 4, 8 => return .integer, - else => switch (ty.zigTypeTag()) { + else => switch (ty.zigTypeTag(mod)) { .Int => return .win_i128, - .Struct, .Union => if (ty.containerLayout() == .Packed) { + .Struct, .Union => if (ty.containerLayout(mod) == .Packed) { return .win_i128; } else { return .memory; @@ -75,14 +68,15 @@ pub const Context = enum { ret, arg, other }; /// There are a maximum of 8 possible return slots. Returned values are in /// the beginning of the array; unused slots are filled with .none. -pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { +pub fn classifySystemV(ty: Type, mod: *Module, ctx: Context) [8]Class { + const target = mod.getTarget(); const memory_class = [_]Class{ .memory, .none, .none, .none, .none, .none, .none, .none, }; var result = [1]Class{.none} ** 8; - switch (ty.zigTypeTag()) { - .Pointer => switch (ty.ptrSize()) { + switch (ty.zigTypeTag(mod)) { + .Pointer => switch (ty.ptrSize(mod)) { .Slice => { result[0] = .integer; result[1] = .integer; @@ -94,7 +88,7 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { }, }, .Int, .Enum, .ErrorSet => { - const bits = ty.intInfo(target).bits; + const bits = ty.intInfo(mod).bits; if (bits <= 64) { result[0] = .integer; return result; @@ -164,36 +158,8 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { else => unreachable, }, .Vector => { - const elem_ty = ty.childType(); - if (ctx == .arg) { - const bit_size = ty.bitSize(target); - if (bit_size > 128) { - const has_avx512 = target.cpu.features.isEnabled(@enumToInt(std.Target.x86.Feature.avx512f)); - if (has_avx512 and bit_size <= 512) return .{ - .integer, .integer, .integer, .integer, - .integer, .integer, .integer, .integer, - }; - const has_avx = target.cpu.features.isEnabled(@enumToInt(std.Target.x86.Feature.avx)); - if (has_avx and bit_size <= 256) return .{ - .integer, .integer, .integer, .integer, - .none, .none, .none, .none, - }; - return memory_class; - } - if (bit_size > 80) return .{ - .integer, .integer, .none, .none, - .none, .none, .none, .none, - }; - if (bit_size > 64) return .{ - .x87, .none, .none, .none, - .none, .none, .none, .none, - }; - return .{ - .integer, .none, .none, .none, - .none, .none, .none, .none, - }; - } - const bits = elem_ty.bitSize(target) * ty.arrayLen(); + const elem_ty = ty.childType(mod); + const bits = elem_ty.bitSize(mod) * ty.arrayLen(mod); if (bits <= 64) return .{ .sse, .none, .none, .none, .none, .none, .none, .none, @@ -202,6 +168,7 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { .sse, .sseup, .none, .none, .none, .none, .none, .none, }; + if (ctx == .arg and !std.Target.x86.featureSetHas(target.cpu.features, .avx)) return memory_class; if (bits <= 192) return .{ .sse, .sseup, .sseup, .none, .none, .none, .none, .none, @@ -210,6 +177,7 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { .sse, .sseup, .sseup, .sseup, .none, .none, .none, .none, }; + if (ctx == .arg and !std.Target.x86.featureSetHas(target.cpu.features, .avx512f)) return memory_class; if (bits <= 320) return .{ .sse, .sseup, .sseup, .sseup, .sseup, .none, .none, .none, @@ -230,7 +198,7 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { return memory_class; }, .Optional => { - if (ty.isPtrLikeOptional()) { + if (ty.isPtrLikeOptional(mod)) { result[0] = .integer; return result; } @@ -241,8 +209,8 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { // it contains unaligned fields, it has class MEMORY" // "If the size of the aggregate exceeds a single eightbyte, each is classified // separately.". - const ty_size = ty.abiSize(target); - if (ty.containerLayout() == .Packed) { + const ty_size = ty.abiSize(mod); + if (ty.containerLayout(mod) == .Packed) { assert(ty_size <= 128); result[0] = .integer; if (ty_size > 64) result[1] = .integer; @@ -253,15 +221,15 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { var result_i: usize = 0; // out of 8 var byte_i: usize = 0; // out of 8 - const fields = ty.structFields(); + const fields = ty.structFields(mod); for (fields.values()) |field| { if (field.abi_align != 0) { - if (field.abi_align < field.ty.abiAlignment(target)) { + if (field.abi_align < field.ty.abiAlignment(mod)) { return memory_class; } } - const field_size = field.ty.abiSize(target); - const field_class_array = classifySystemV(field.ty, target, .other); + const field_size = field.ty.abiSize(mod); + const field_class_array = classifySystemV(field.ty, mod, .other); const field_class = std.mem.sliceTo(&field_class_array, .none); if (byte_i + field_size <= 8) { // Combine this field with the previous one. @@ -360,8 +328,8 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { // it contains unaligned fields, it has class MEMORY" // "If the size of the aggregate exceeds a single eightbyte, each is classified // separately.". - const ty_size = ty.abiSize(target); - if (ty.containerLayout() == .Packed) { + const ty_size = ty.abiSize(mod); + if (ty.containerLayout(mod) == .Packed) { assert(ty_size <= 128); result[0] = .integer; if (ty_size > 64) result[1] = .integer; @@ -370,15 +338,15 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { if (ty_size > 64) return memory_class; - const fields = ty.unionFields(); + const fields = ty.unionFields(mod); for (fields.values()) |field| { if (field.abi_align != 0) { - if (field.abi_align < field.ty.abiAlignment(target)) { + if (field.abi_align < field.ty.abiAlignment(mod)) { return memory_class; } } // Combine this field with the previous one. - const field_class = classifySystemV(field.ty, target, .other); + const field_class = classifySystemV(field.ty, mod, .other); for (&result, 0..) |*result_item, i| { const field_item = field_class[i]; // "If both classes are equal, this is the resulting class." @@ -452,7 +420,7 @@ pub fn classifySystemV(ty: Type, target: Target, ctx: Context) [8]Class { return result; }, .Array => { - const ty_size = ty.abiSize(target); + const ty_size = ty.abiSize(mod); if (ty_size <= 64) { result[0] = .integer; return result; @@ -553,10 +521,17 @@ pub const RegisterClass = struct { }; }; +const builtin = @import("builtin"); +const std = @import("std"); +const Target = std.Target; +const assert = std.debug.assert; const testing = std.testing; + const Module = @import("../../Module.zig"); +const Register = @import("bits.zig").Register; +const RegisterManagerFn = @import("../../register_manager.zig").RegisterManager; +const Type = @import("../../type.zig").Type; const Value = @import("../../value.zig").Value; -const builtin = @import("builtin"); fn _field(comptime tag: Type.Tag, offset: u32) Module.Struct.Field { return .{ @@ -567,34 +542,3 @@ fn _field(comptime tag: Type.Tag, offset: u32) Module.Struct.Field { .is_comptime = false, }; } - -test "C_C_D" { - var fields = Module.Struct.Fields{}; - // const C_C_D = extern struct { v1: i8, v2: i8, v3: f64 }; - try fields.ensureTotalCapacity(testing.allocator, 3); - defer fields.deinit(testing.allocator); - fields.putAssumeCapacity("v1", _field(.i8, 0)); - fields.putAssumeCapacity("v2", _field(.i8, 1)); - fields.putAssumeCapacity("v3", _field(.f64, 4)); - - var C_C_D_struct = Module.Struct{ - .fields = fields, - .namespace = undefined, - .owner_decl = undefined, - .zir_index = undefined, - .layout = .Extern, - .status = .fully_resolved, - .known_non_opv = true, - .is_tuple = false, - }; - var C_C_D = Type.Payload.Struct{ .data = &C_C_D_struct }; - - try testing.expectEqual( - [_]Class{ .integer, .sse, .none, .none, .none, .none, .none, .none }, - classifySystemV(Type.initPayload(&C_C_D.base), builtin.target, .ret), - ); - try testing.expectEqual( - [_]Class{ .integer, .sse, .none, .none, .none, .none, .none, .none }, - classifySystemV(Type.initPayload(&C_C_D.base), builtin.target, .arg), - ); -} diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 5d06865566..923ba31266 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -6,9 +6,6 @@ const Allocator = std.mem.Allocator; const ArrayList = std.ArrayList; const DW = std.dwarf; -pub const StringRepeat = enum(u3) { none, rep, repe, repz, repne, repnz }; -pub const StringWidth = enum(u2) { b, w, d, q }; - /// EFLAGS condition codes pub const Condition = enum(u5) { /// above @@ -72,6 +69,12 @@ pub const Condition = enum(u5) { /// zero z, + // Pseudo conditions + /// zero and not parity + z_and_np, + /// not zero or parity + nz_or_p, + /// Converts a std.math.CompareOperator into a condition flag, /// i.e. returns the condition that is true iff the result of the /// comparison is true. Assumes signed comparison @@ -143,6 +146,9 @@ pub const Condition = enum(u5) { .po => .pe, .s => .ns, .z => .nz, + + .z_and_np => .nz_or_p, + .nz_or_p => .z_and_np, }; } }; @@ -169,15 +175,21 @@ pub const Register = enum(u7) { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, + mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, + + st0, st1, st2, st3, st4, st5, st6, st7, + es, cs, ss, ds, fs, gs, none, // zig fmt: on - pub const Class = enum(u2) { + pub const Class = enum { general_purpose, - floating_point, segment, + x87, + mmx, + sse, }; pub fn class(reg: Register) Class { @@ -189,8 +201,10 @@ pub const Register = enum(u7) { @enumToInt(Register.al) ... @enumToInt(Register.r15b) => .general_purpose, @enumToInt(Register.ah) ... @enumToInt(Register.bh) => .general_purpose, - @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => .floating_point, - @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => .floating_point, + @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => .sse, + @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => .sse, + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => .mmx, + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => .x87, @enumToInt(Register.es) ... @enumToInt(Register.gs) => .segment, @@ -210,8 +224,10 @@ pub const Register = enum(u7) { @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => @enumToInt(Register.ymm0) - 16, @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => @enumToInt(Register.xmm0) - 16, + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => @enumToInt(Register.mm0) - 32, + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => @enumToInt(Register.st0) - 40, - @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es) - 32, + @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es) - 48, else => unreachable, // zig fmt: on @@ -230,6 +246,8 @@ pub const Register = enum(u7) { @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => 256, @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => 128, + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => 64, + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => 80, @enumToInt(Register.es) ... @enumToInt(Register.gs) => 16, @@ -265,6 +283,8 @@ pub const Register = enum(u7) { @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => @enumToInt(Register.ymm0), @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => @enumToInt(Register.xmm0), + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => @enumToInt(Register.mm0), + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => @enumToInt(Register.st0), @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es), @@ -320,8 +340,8 @@ pub const Register = enum(u7) { return @intToEnum(Register, @enumToInt(reg) - reg.gpBase() + @enumToInt(Register.al)); } - fn fpBase(reg: Register) u7 { - assert(reg.class() == .floating_point); + fn sseBase(reg: Register) u7 { + assert(reg.class() == .sse); return switch (@enumToInt(reg)) { @enumToInt(Register.ymm0)...@enumToInt(Register.ymm15) => @enumToInt(Register.ymm0), @enumToInt(Register.xmm0)...@enumToInt(Register.xmm15) => @enumToInt(Register.xmm0), @@ -330,49 +350,24 @@ pub const Register = enum(u7) { } pub fn to256(reg: Register) Register { - return @intToEnum(Register, @enumToInt(reg) - reg.fpBase() + @enumToInt(Register.ymm0)); + return @intToEnum(Register, @enumToInt(reg) - reg.sseBase() + @enumToInt(Register.ymm0)); } pub fn to128(reg: Register) Register { - return @intToEnum(Register, @enumToInt(reg) - reg.fpBase() + @enumToInt(Register.xmm0)); + return @intToEnum(Register, @enumToInt(reg) - reg.sseBase() + @enumToInt(Register.xmm0)); } - pub fn dwarfLocOp(reg: Register) u8 { + /// DWARF register encoding + pub fn dwarfNum(reg: Register) u6 { return switch (reg.class()) { - .general_purpose => switch (reg.to64()) { - .rax => DW.OP.reg0, - .rdx => DW.OP.reg1, - .rcx => DW.OP.reg2, - .rbx => DW.OP.reg3, - .rsi => DW.OP.reg4, - .rdi => DW.OP.reg5, - .rbp => DW.OP.reg6, - .rsp => DW.OP.reg7, - else => @intCast(u8, @enumToInt(reg) - reg.gpBase()) + DW.OP.reg0, - }, - .floating_point => @intCast(u8, @enumToInt(reg) - reg.fpBase()) + DW.OP.reg17, - else => unreachable, - }; - } - - /// DWARF encodings that push a value onto the DWARF stack that is either - /// the contents of a register or the result of adding the contents a given - /// register to a given signed offset. - pub fn dwarfLocOpDeref(reg: Register) u8 { - return switch (reg.class()) { - .general_purpose => switch (reg.to64()) { - .rax => DW.OP.breg0, - .rdx => DW.OP.breg1, - .rcx => DW.OP.breg2, - .rbx => DW.OP.breg3, - .rsi => DW.OP.breg4, - .rdi => DW.OP.breg5, - .rbp => DW.OP.breg6, - .rsp => DW.OP.breg7, - else => @intCast(u8, @enumToInt(reg) - reg.gpBase()) + DW.OP.breg0, - }, - .floating_point => @intCast(u8, @enumToInt(reg) - reg.fpBase()) + DW.OP.breg17, - else => unreachable, + .general_purpose => if (reg.isExtended()) + reg.enc() + else + @truncate(u3, @as(u24, 0o54673120) >> @as(u5, reg.enc()) * 3), + .sse => 17 + @as(u6, reg.enc()), + .x87 => 33 + @as(u6, reg.enc()), + .mmx => 41 + @as(u6, reg.enc()), + .segment => 50 + @as(u6, reg.enc()), }; } }; @@ -386,6 +381,8 @@ test "Register id - different classes" { try expect(Register.ymm0.id() == 0b10000); try expect(Register.ymm0.id() != Register.rax.id()); try expect(Register.xmm0.id() == Register.ymm0.id()); + try expect(Register.xmm0.id() != Register.mm0.id()); + try expect(Register.mm0.id() != Register.st0.id()); try expect(Register.es.id() == 0b100000); } @@ -401,7 +398,9 @@ test "Register enc - different classes" { test "Register classes" { try expect(Register.r11.class() == .general_purpose); - try expect(Register.ymm11.class() == .floating_point); + try expect(Register.ymm11.class() == .sse); + try expect(Register.mm3.class() == .mmx); + try expect(Register.st3.class() == .x87); try expect(Register.fs.class() == .segment); } @@ -476,7 +475,9 @@ pub const Memory = union(enum) { dword, qword, tbyte, - dqword, + xword, + yword, + zword, pub fn fromSize(size: u32) PtrSize { return switch (size) { @@ -484,7 +485,9 @@ pub const Memory = union(enum) { 2...2 => .word, 3...4 => .dword, 5...8 => .qword, - 9...16 => .dqword, + 9...16 => .xword, + 17...32 => .yword, + 33...64 => .zword, else => unreachable, }; } @@ -496,7 +499,9 @@ pub const Memory = union(enum) { 32 => .dword, 64 => .qword, 80 => .tbyte, - 128 => .dqword, + 128 => .xword, + 256 => .yword, + 512 => .zword, else => unreachable, }; } @@ -508,7 +513,9 @@ pub const Memory = union(enum) { .dword => 32, .qword => 64, .tbyte => 80, - .dqword => 128, + .xword => 128, + .yword => 256, + .zword => 512, }; } }; diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 329dfca924..5f9a2f49b3 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -151,15 +151,12 @@ pub const Instruction = struct { moffs.offset, }), }, - .imm => |imm| try writer.print("0x{x}", .{imm.asUnsigned(enc_op.bitSize())}), + .imm => |imm| try writer.print("0x{x}", .{imm.asUnsigned(enc_op.immBitSize())}), } } pub fn fmtPrint(op: Operand, enc_op: Encoding.Op) std.fmt.Formatter(fmt) { - return .{ .data = .{ - .op = op, - .enc_op = enc_op, - } }; + return .{ .data = .{ .op = op, .enc_op = enc_op } }; } }; @@ -209,10 +206,16 @@ pub const Instruction = struct { const enc = inst.encoding; const data = enc.data; - try inst.encodeLegacyPrefixes(encoder); - try inst.encodeMandatoryPrefix(encoder); - try inst.encodeRexPrefix(encoder); - try inst.encodeOpcode(encoder); + if (data.mode.isVex()) { + try inst.encodeVexPrefix(encoder); + const opc = inst.encoding.opcode(); + try encoder.opcode_1byte(opc[opc.len - 1]); + } else { + try inst.encodeLegacyPrefixes(encoder); + try inst.encodeMandatoryPrefix(encoder); + try inst.encodeRexPrefix(encoder); + try inst.encodeOpcode(encoder); + } switch (data.op_en) { .np, .o => {}, @@ -222,25 +225,28 @@ pub const Instruction = struct { .td => try encoder.imm64(inst.ops[0].mem.moffs.offset), else => { const mem_op = switch (data.op_en) { - .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0], - .rm, .rmi => inst.ops[1], + .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], + .rm, .rmi, .rm0, .vmi => inst.ops[1], + .rvm, .rvmr, .rvmi => inst.ops[2], else => unreachable, }; switch (mem_op) { .reg => |reg| { const rm = switch (data.op_en) { - .m, .mi, .m1, .mc => enc.modRmExt(), + .m, .mi, .m1, .mc, .vmi => enc.modRmExt(), .mr, .mri, .mrc => inst.ops[1].reg.lowEnc(), - .rm, .rmi => inst.ops[0].reg.lowEnc(), + .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0].reg.lowEnc(), + .mvr => inst.ops[2].reg.lowEnc(), else => unreachable, }; try encoder.modRm_direct(rm, reg.lowEnc()); }, .mem => |mem| { const op = switch (data.op_en) { - .m, .mi, .m1, .mc => .none, + .m, .mi, .m1, .mc, .vmi => .none, .mr, .mri, .mrc => inst.ops[1], - .rm, .rmi => inst.ops[0], + .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0], + .mvr => inst.ops[2], else => unreachable, }; try encodeMemory(enc, mem, op, encoder); @@ -250,7 +256,9 @@ pub const Instruction = struct { switch (data.op_en) { .mi => try encodeImm(inst.ops[1].imm, data.ops[1], encoder), - .rmi, .mri => try encodeImm(inst.ops[2].imm, data.ops[2], encoder), + .rmi, .mri, .vmi => try encodeImm(inst.ops[2].imm, data.ops[2], encoder), + .rvmr => try encoder.imm8(@as(u8, inst.ops[3].reg.enc()) << 4), + .rvmi => try encodeImm(inst.ops[3].imm, data.ops[3], encoder), else => {}, } }, @@ -282,18 +290,16 @@ pub const Instruction = struct { .rep, .repe, .repz => legacy.prefix_f3 = true, } - if (data.mode == .none) { - const bit_size = enc.operandBitSize(); - if (bit_size == 16) { - legacy.set16BitOverride(); - } + switch (data.mode) { + .short, .rex_short => legacy.set16BitOverride(), + else => {}, } const segment_override: ?Register = switch (op_en) { .i, .zi, .o, .oi, .d, .np => null, .fd => inst.ops[1].mem.base().reg, .td => inst.ops[0].mem.base().reg, - .rm, .rmi => if (inst.ops[1].isSegmentRegister()) + .rm, .rmi, .rm0 => if (inst.ops[1].isSegmentRegister()) switch (inst.ops[1]) { .reg => |reg| reg, .mem => |mem| mem.base().reg, @@ -309,6 +315,7 @@ pub const Instruction = struct { } else null, + .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable, }; if (segment_override) |seg| { legacy.setSegmentOverride(seg); @@ -322,35 +329,92 @@ pub const Instruction = struct { var rex = Rex{}; rex.present = inst.encoding.data.mode == .rex; - switch (inst.encoding.data.mode) { - .long, .sse2_long => rex.w = true, - else => {}, - } + rex.w = inst.encoding.data.mode == .long; switch (op_en) { .np, .i, .zi, .fd, .td, .d => {}, .o, .oi => rex.b = inst.ops[0].reg.isExtended(), - .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc => { + .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0 => { const r_op = switch (op_en) { - .rm, .rmi => inst.ops[0], + .rm, .rmi, .rm0 => inst.ops[0], .mr, .mri, .mrc => inst.ops[1], else => .none, }; rex.r = r_op.isBaseExtended(); const b_x_op = switch (op_en) { - .rm, .rmi => inst.ops[1], + .rm, .rmi, .rm0 => inst.ops[1], .m, .mi, .m1, .mc, .mr, .mri, .mrc => inst.ops[0], else => unreachable, }; rex.b = b_x_op.isBaseExtended(); rex.x = b_x_op.isIndexExtended(); }, + .vmi, .rvm, .rvmr, .rvmi, .mvr => unreachable, } try encoder.rex(rex); } + fn encodeVexPrefix(inst: Instruction, encoder: anytype) !void { + const op_en = inst.encoding.data.op_en; + const opc = inst.encoding.opcode(); + const mand_pre = inst.encoding.mandatoryPrefix(); + + var vex = Vex{}; + + vex.w = inst.encoding.data.mode.isLong(); + + switch (op_en) { + .np, .i, .zi, .fd, .td, .d => {}, + .o, .oi => vex.b = inst.ops[0].reg.isExtended(), + .m, .mi, .m1, .mc, .mr, .rm, .rmi, .mri, .mrc, .rm0, .vmi, .rvm, .rvmr, .rvmi, .mvr => { + const r_op = switch (op_en) { + .rm, .rmi, .rm0, .rvm, .rvmr, .rvmi => inst.ops[0], + .mr, .mri, .mrc => inst.ops[1], + .mvr => inst.ops[2], + .m, .mi, .m1, .mc, .vmi => .none, + else => unreachable, + }; + vex.r = r_op.isBaseExtended(); + + const b_x_op = switch (op_en) { + .rm, .rmi, .rm0, .vmi => inst.ops[1], + .m, .mi, .m1, .mc, .mr, .mri, .mrc, .mvr => inst.ops[0], + .rvm, .rvmr, .rvmi => inst.ops[2], + else => unreachable, + }; + vex.b = b_x_op.isBaseExtended(); + vex.x = b_x_op.isIndexExtended(); + }, + } + + vex.l = inst.encoding.data.mode.isVecLong(); + + vex.p = if (mand_pre) |mand| switch (mand) { + 0x66 => .@"66", + 0xf2 => .f2, + 0xf3 => .f3, + else => unreachable, + } else .none; + + const leading: usize = if (mand_pre) |_| 1 else 0; + assert(opc[leading] == 0x0f); + vex.m = switch (opc[leading + 1]) { + else => .@"0f", + 0x38 => .@"0f38", + 0x3a => .@"0f3a", + }; + + switch (op_en) { + else => {}, + .vmi => vex.v = inst.ops[0].reg, + .rvm, .rvmr, .rvmi => vex.v = inst.ops[1].reg, + } + + try encoder.vex(vex); + } + fn encodeMandatoryPrefix(inst: Instruction, encoder: anytype) !void { const prefix = inst.encoding.mandatoryPrefix() orelse return; try encoder.opcode_1byte(prefix); @@ -443,8 +507,8 @@ pub const Instruction = struct { } fn encodeImm(imm: Immediate, kind: Encoding.Op, encoder: anytype) !void { - const raw = imm.asUnsigned(kind.bitSize()); - switch (kind.bitSize()) { + const raw = imm.asUnsigned(kind.immBitSize()); + switch (kind.immBitSize()) { 8 => try encoder.imm8(@intCast(u8, raw)), 16 => try encoder.imm16(@intCast(u16, raw)), 32 => try encoder.imm32(@intCast(u32, raw)), @@ -562,17 +626,48 @@ fn Encoder(comptime T: type, comptime opts: Options) type { /// or one of reg, index, r/m, base, or opcode-reg might be extended. /// /// See struct `Rex` for a description of each field. - pub fn rex(self: Self, byte: Rex) !void { - if (!byte.present and !byte.isSet()) return; + pub fn rex(self: Self, fields: Rex) !void { + if (!fields.present and !fields.isSet()) return; - var value: u8 = 0b0100_0000; + var byte: u8 = 0b0100_0000; - if (byte.w) value |= 0b1000; - if (byte.r) value |= 0b0100; - if (byte.x) value |= 0b0010; - if (byte.b) value |= 0b0001; + if (fields.w) byte |= 0b1000; + if (fields.r) byte |= 0b0100; + if (fields.x) byte |= 0b0010; + if (fields.b) byte |= 0b0001; - try self.writer.writeByte(value); + try self.writer.writeByte(byte); + } + + /// Encodes a VEX prefix given all the fields + /// + /// See struct `Vex` for a description of each field. + pub fn vex(self: Self, fields: Vex) !void { + if (fields.is3Byte()) { + try self.writer.writeByte(0b1100_0100); + + try self.writer.writeByte( + @as(u8, ~@boolToInt(fields.r)) << 7 | + @as(u8, ~@boolToInt(fields.x)) << 6 | + @as(u8, ~@boolToInt(fields.b)) << 5 | + @as(u8, @enumToInt(fields.m)) << 0, + ); + + try self.writer.writeByte( + @as(u8, @boolToInt(fields.w)) << 7 | + @as(u8, ~fields.v.enc()) << 3 | + @as(u8, @boolToInt(fields.l)) << 2 | + @as(u8, @enumToInt(fields.p)) << 0, + ); + } else { + try self.writer.writeByte(0b1100_0101); + try self.writer.writeByte( + @as(u8, ~@boolToInt(fields.r)) << 7 | + @as(u8, ~fields.v.enc()) << 3 | + @as(u8, @boolToInt(fields.l)) << 2 | + @as(u8, @enumToInt(fields.p)) << 0, + ); + } } // ------ @@ -848,6 +943,31 @@ pub const Rex = struct { } }; +pub const Vex = struct { + w: bool = false, + r: bool = false, + x: bool = false, + b: bool = false, + l: bool = false, + p: enum(u2) { + none = 0b00, + @"66" = 0b01, + f3 = 0b10, + f2 = 0b11, + } = .none, + m: enum(u5) { + @"0f" = 0b0_0001, + @"0f38" = 0b0_0010, + @"0f3a" = 0b0_0011, + _, + } = .@"0f", + v: Register = .ymm0, + + pub fn is3Byte(vex: Vex) bool { + return vex.w or vex.x or vex.b or vex.m != .@"0f"; + } +}; + // Tests fn expectEqualHexStrings(expected: []const u8, given: []const u8, assembly: []const u8) !void { assert(expected.len > 0); diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 333bdceea8..a0cd1af0a7 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -3,883 +3,1602 @@ const Mnemonic = Encoding.Mnemonic; const OpEn = Encoding.OpEn; const Op = Encoding.Op; const Mode = Encoding.Mode; +const Feature = Encoding.Feature; const modrm_ext = u3; -pub const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, modrm_ext, Mode }; +pub const Entry = struct { Mnemonic, OpEn, []const Op, []const u8, modrm_ext, Mode, Feature }; // TODO move this into a .zon file when Zig is capable of importing .zon files // zig fmt: off pub const table = [_]Entry{ // General-purpose - .{ .adc, .zi, &.{ .al, .imm8 }, &.{ 0x14 }, 0, .none }, - .{ .adc, .zi, &.{ .ax, .imm16 }, &.{ 0x15 }, 0, .none }, - .{ .adc, .zi, &.{ .eax, .imm32 }, &.{ 0x15 }, 0, .none }, - .{ .adc, .zi, &.{ .rax, .imm32s }, &.{ 0x15 }, 0, .long }, - .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .none }, - .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .rex }, - .{ .adc, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 2, .none }, - .{ .adc, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 2, .none }, - .{ .adc, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 2, .long }, - .{ .adc, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 2, .none }, - .{ .adc, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 2, .none }, - .{ .adc, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 2, .long }, - .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .none }, - .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .rex }, - .{ .adc, .mr, &.{ .rm16, .r16 }, &.{ 0x11 }, 0, .none }, - .{ .adc, .mr, &.{ .rm32, .r32 }, &.{ 0x11 }, 0, .none }, - .{ .adc, .mr, &.{ .rm64, .r64 }, &.{ 0x11 }, 0, .long }, - .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .none }, - .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .rex }, - .{ .adc, .rm, &.{ .r16, .rm16 }, &.{ 0x13 }, 0, .none }, - .{ .adc, .rm, &.{ .r32, .rm32 }, &.{ 0x13 }, 0, .none }, - .{ .adc, .rm, &.{ .r64, .rm64 }, &.{ 0x13 }, 0, .long }, - - .{ .add, .zi, &.{ .al, .imm8 }, &.{ 0x04 }, 0, .none }, - .{ .add, .zi, &.{ .ax, .imm16 }, &.{ 0x05 }, 0, .none }, - .{ .add, .zi, &.{ .eax, .imm32 }, &.{ 0x05 }, 0, .none }, - .{ .add, .zi, &.{ .rax, .imm32s }, &.{ 0x05 }, 0, .long }, - .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .none }, - .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .rex }, - .{ .add, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 0, .none }, - .{ .add, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 0, .none }, - .{ .add, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 0, .long }, - .{ .add, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 0, .none }, - .{ .add, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 0, .none }, - .{ .add, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 0, .long }, - .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .none }, - .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .rex }, - .{ .add, .mr, &.{ .rm16, .r16 }, &.{ 0x01 }, 0, .none }, - .{ .add, .mr, &.{ .rm32, .r32 }, &.{ 0x01 }, 0, .none }, - .{ .add, .mr, &.{ .rm64, .r64 }, &.{ 0x01 }, 0, .long }, - .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .none }, - .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .rex }, - .{ .add, .rm, &.{ .r16, .rm16 }, &.{ 0x03 }, 0, .none }, - .{ .add, .rm, &.{ .r32, .rm32 }, &.{ 0x03 }, 0, .none }, - .{ .add, .rm, &.{ .r64, .rm64 }, &.{ 0x03 }, 0, .long }, - - .{ .@"and", .zi, &.{ .al, .imm8 }, &.{ 0x24 }, 0, .none }, - .{ .@"and", .zi, &.{ .ax, .imm16 }, &.{ 0x25 }, 0, .none }, - .{ .@"and", .zi, &.{ .eax, .imm32 }, &.{ 0x25 }, 0, .none }, - .{ .@"and", .zi, &.{ .rax, .imm32s }, &.{ 0x25 }, 0, .long }, - .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .rex }, - .{ .@"and", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 4, .long }, - .{ .@"and", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 4, .none }, - .{ .@"and", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 4, .long }, - .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .none }, - .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .rex }, - .{ .@"and", .mr, &.{ .rm16, .r16 }, &.{ 0x21 }, 0, .none }, - .{ .@"and", .mr, &.{ .rm32, .r32 }, &.{ 0x21 }, 0, .none }, - .{ .@"and", .mr, &.{ .rm64, .r64 }, &.{ 0x21 }, 0, .long }, - .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .none }, - .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .rex }, - .{ .@"and", .rm, &.{ .r16, .rm16 }, &.{ 0x23 }, 0, .none }, - .{ .@"and", .rm, &.{ .r32, .rm32 }, &.{ 0x23 }, 0, .none }, - .{ .@"and", .rm, &.{ .r64, .rm64 }, &.{ 0x23 }, 0, .long }, - - .{ .bsf, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbc }, 0, .none }, - .{ .bsf, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbc }, 0, .none }, - .{ .bsf, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbc }, 0, .long }, - - .{ .bsr, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbd }, 0, .none }, - .{ .bsr, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbd }, 0, .none }, - .{ .bsr, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbd }, 0, .long }, - - .{ .bswap, .o, &.{ .r32 }, &.{ 0x0f, 0xc8 }, 0, .none }, - .{ .bswap, .o, &.{ .r64 }, &.{ 0x0f, 0xc8 }, 0, .long }, - - .{ .bt, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xa3 }, 0, .none }, - .{ .bt, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xa3 }, 0, .none }, - .{ .bt, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xa3 }, 0, .long }, - .{ .bt, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 4, .none }, - .{ .bt, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 4, .none }, - .{ .bt, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 4, .long }, - - .{ .btc, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xbb }, 0, .none }, - .{ .btc, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xbb }, 0, .none }, - .{ .btc, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xbb }, 0, .long }, - .{ .btc, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 7, .none }, - .{ .btc, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 7, .none }, - .{ .btc, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 7, .long }, - - .{ .btr, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb3 }, 0, .none }, - .{ .btr, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb3 }, 0, .none }, - .{ .btr, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb3 }, 0, .long }, - .{ .btr, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 6, .none }, - .{ .btr, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 6, .none }, - .{ .btr, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 6, .long }, - - .{ .bts, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xab }, 0, .none }, - .{ .bts, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xab }, 0, .none }, - .{ .bts, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xab }, 0, .long }, - .{ .bts, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 5, .none }, - .{ .bts, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 5, .none }, - .{ .bts, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 5, .long }, + .{ .adc, .zi, &.{ .al, .imm8 }, &.{ 0x14 }, 0, .none, .none }, + .{ .adc, .zi, &.{ .ax, .imm16 }, &.{ 0x15 }, 0, .short, .none }, + .{ .adc, .zi, &.{ .eax, .imm32 }, &.{ 0x15 }, 0, .none, .none }, + .{ .adc, .zi, &.{ .rax, .imm32s }, &.{ 0x15 }, 0, .long, .none }, + .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 2, .rex, .none }, + .{ .adc, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 2, .short, .none }, + .{ .adc, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 2, .long, .none }, + .{ .adc, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 2, .short, .none }, + .{ .adc, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 2, .none, .none }, + .{ .adc, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 2, .long, .none }, + .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .none, .none }, + .{ .adc, .mr, &.{ .rm8, .r8 }, &.{ 0x10 }, 0, .rex, .none }, + .{ .adc, .mr, &.{ .rm16, .r16 }, &.{ 0x11 }, 0, .short, .none }, + .{ .adc, .mr, &.{ .rm32, .r32 }, &.{ 0x11 }, 0, .none, .none }, + .{ .adc, .mr, &.{ .rm64, .r64 }, &.{ 0x11 }, 0, .long, .none }, + .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .none, .none }, + .{ .adc, .rm, &.{ .r8, .rm8 }, &.{ 0x12 }, 0, .rex, .none }, + .{ .adc, .rm, &.{ .r16, .rm16 }, &.{ 0x13 }, 0, .short, .none }, + .{ .adc, .rm, &.{ .r32, .rm32 }, &.{ 0x13 }, 0, .none, .none }, + .{ .adc, .rm, &.{ .r64, .rm64 }, &.{ 0x13 }, 0, .long, .none }, + + .{ .add, .zi, &.{ .al, .imm8 }, &.{ 0x04 }, 0, .none, .none }, + .{ .add, .zi, &.{ .ax, .imm16 }, &.{ 0x05 }, 0, .short, .none }, + .{ .add, .zi, &.{ .eax, .imm32 }, &.{ 0x05 }, 0, .none, .none }, + .{ .add, .zi, &.{ .rax, .imm32s }, &.{ 0x05 }, 0, .long, .none }, + .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 0, .rex, .none }, + .{ .add, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 0, .short, .none }, + .{ .add, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 0, .long, .none }, + .{ .add, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 0, .short, .none }, + .{ .add, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 0, .none, .none }, + .{ .add, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 0, .long, .none }, + .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .none, .none }, + .{ .add, .mr, &.{ .rm8, .r8 }, &.{ 0x00 }, 0, .rex, .none }, + .{ .add, .mr, &.{ .rm16, .r16 }, &.{ 0x01 }, 0, .short, .none }, + .{ .add, .mr, &.{ .rm32, .r32 }, &.{ 0x01 }, 0, .none, .none }, + .{ .add, .mr, &.{ .rm64, .r64 }, &.{ 0x01 }, 0, .long, .none }, + .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .none, .none }, + .{ .add, .rm, &.{ .r8, .rm8 }, &.{ 0x02 }, 0, .rex, .none }, + .{ .add, .rm, &.{ .r16, .rm16 }, &.{ 0x03 }, 0, .short, .none }, + .{ .add, .rm, &.{ .r32, .rm32 }, &.{ 0x03 }, 0, .none, .none }, + .{ .add, .rm, &.{ .r64, .rm64 }, &.{ 0x03 }, 0, .long, .none }, + + .{ .@"and", .zi, &.{ .al, .imm8 }, &.{ 0x24 }, 0, .none, .none }, + .{ .@"and", .zi, &.{ .ax, .imm16 }, &.{ 0x25 }, 0, .short, .none }, + .{ .@"and", .zi, &.{ .eax, .imm32 }, &.{ 0x25 }, 0, .none, .none }, + .{ .@"and", .zi, &.{ .rax, .imm32s }, &.{ 0x25 }, 0, .long, .none }, + .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 4, .rex, .none }, + .{ .@"and", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 4, .short, .none }, + .{ .@"and", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 4, .long, .none }, + .{ .@"and", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 4, .short, .none }, + .{ .@"and", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 4, .none, .none }, + .{ .@"and", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 4, .long, .none }, + .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .none, .none }, + .{ .@"and", .mr, &.{ .rm8, .r8 }, &.{ 0x20 }, 0, .rex, .none }, + .{ .@"and", .mr, &.{ .rm16, .r16 }, &.{ 0x21 }, 0, .short, .none }, + .{ .@"and", .mr, &.{ .rm32, .r32 }, &.{ 0x21 }, 0, .none, .none }, + .{ .@"and", .mr, &.{ .rm64, .r64 }, &.{ 0x21 }, 0, .long, .none }, + .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .none, .none }, + .{ .@"and", .rm, &.{ .r8, .rm8 }, &.{ 0x22 }, 0, .rex, .none }, + .{ .@"and", .rm, &.{ .r16, .rm16 }, &.{ 0x23 }, 0, .short, .none }, + .{ .@"and", .rm, &.{ .r32, .rm32 }, &.{ 0x23 }, 0, .none, .none }, + .{ .@"and", .rm, &.{ .r64, .rm64 }, &.{ 0x23 }, 0, .long, .none }, + + .{ .bsf, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbc }, 0, .short, .none }, + .{ .bsf, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbc }, 0, .none, .none }, + .{ .bsf, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbc }, 0, .long, .none }, + + .{ .bsr, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0xbd }, 0, .short, .none }, + .{ .bsr, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0xbd }, 0, .none, .none }, + .{ .bsr, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0xbd }, 0, .long, .none }, + + .{ .bswap, .o, &.{ .r32 }, &.{ 0x0f, 0xc8 }, 0, .none, .none }, + .{ .bswap, .o, &.{ .r64 }, &.{ 0x0f, 0xc8 }, 0, .long, .none }, + + .{ .bt, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xa3 }, 0, .short, .none }, + .{ .bt, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xa3 }, 0, .none, .none }, + .{ .bt, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xa3 }, 0, .long, .none }, + .{ .bt, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 4, .short, .none }, + .{ .bt, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 4, .none, .none }, + .{ .bt, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 4, .long, .none }, + + .{ .btc, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xbb }, 0, .short, .none }, + .{ .btc, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xbb }, 0, .none, .none }, + .{ .btc, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xbb }, 0, .long, .none }, + .{ .btc, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 7, .short, .none }, + .{ .btc, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 7, .none, .none }, + .{ .btc, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 7, .long, .none }, + + .{ .btr, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb3 }, 0, .short, .none }, + .{ .btr, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb3 }, 0, .none, .none }, + .{ .btr, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb3 }, 0, .long, .none }, + .{ .btr, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 6, .short, .none }, + .{ .btr, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 6, .none, .none }, + .{ .btr, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 6, .long, .none }, + + .{ .bts, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xab }, 0, .short, .none }, + .{ .bts, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xab }, 0, .none, .none }, + .{ .bts, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xab }, 0, .long, .none }, + .{ .bts, .mi, &.{ .rm16, .imm8 }, &.{ 0x0f, 0xba }, 5, .short, .none }, + .{ .bts, .mi, &.{ .rm32, .imm8 }, &.{ 0x0f, 0xba }, 5, .none, .none }, + .{ .bts, .mi, &.{ .rm64, .imm8 }, &.{ 0x0f, 0xba }, 5, .long, .none }, // This is M encoding according to Intel, but D makes more sense here. - .{ .call, .d, &.{ .rel32 }, &.{ 0xe8 }, 0, .none }, - .{ .call, .m, &.{ .rm64 }, &.{ 0xff }, 2, .none }, - - .{ .cbw, .np, &.{ .o16 }, &.{ 0x98 }, 0, .none }, - .{ .cwde, .np, &.{ .o32 }, &.{ 0x98 }, 0, .none }, - .{ .cdqe, .np, &.{ .o64 }, &.{ 0x98 }, 0, .long }, - - .{ .cwd, .np, &.{ .o16 }, &.{ 0x99 }, 0, .none }, - .{ .cdq, .np, &.{ .o32 }, &.{ 0x99 }, 0, .none }, - .{ .cqo, .np, &.{ .o64 }, &.{ 0x99 }, 0, .long }, - - .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long }, - .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long }, - .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long }, - .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long }, - .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long }, - .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long }, - .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long }, - .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long }, - .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long }, - .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long }, - .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none }, - .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long }, - .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none }, - .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long }, - .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long }, - .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none }, - .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long }, - .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none }, - .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long }, - .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long }, - .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none }, - .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long }, - .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none }, - .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long }, - .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none }, - .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long }, - .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none }, - .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long }, - .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .none }, - .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none }, - .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long }, - .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long }, - .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .none }, - .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none }, - .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long }, - .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none }, - .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long }, - .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .none }, - .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none }, - .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long }, - .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long }, - .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none }, - .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long }, - .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none }, - .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long }, - .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .none }, - .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none }, - .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long }, - .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none }, - .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long }, - - .{ .cmp, .zi, &.{ .al, .imm8 }, &.{ 0x3c }, 0, .none }, - .{ .cmp, .zi, &.{ .ax, .imm16 }, &.{ 0x3d }, 0, .none }, - .{ .cmp, .zi, &.{ .eax, .imm32 }, &.{ 0x3d }, 0, .none }, - .{ .cmp, .zi, &.{ .rax, .imm32s }, &.{ 0x3d }, 0, .long }, - .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .rex }, - .{ .cmp, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 7, .long }, - .{ .cmp, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 7, .none }, - .{ .cmp, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 7, .long }, - .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .none }, - .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .rex }, - .{ .cmp, .mr, &.{ .rm16, .r16 }, &.{ 0x39 }, 0, .none }, - .{ .cmp, .mr, &.{ .rm32, .r32 }, &.{ 0x39 }, 0, .none }, - .{ .cmp, .mr, &.{ .rm64, .r64 }, &.{ 0x39 }, 0, .long }, - .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .none }, - .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .rex }, - .{ .cmp, .rm, &.{ .r16, .rm16 }, &.{ 0x3b }, 0, .none }, - .{ .cmp, .rm, &.{ .r32, .rm32 }, &.{ 0x3b }, 0, .none }, - .{ .cmp, .rm, &.{ .r64, .rm64 }, &.{ 0x3b }, 0, .long }, - - .{ .cmps, .np, &.{ .m8, .m8 }, &.{ 0xa6 }, 0, .none }, - .{ .cmps, .np, &.{ .m16, .m16 }, &.{ 0xa7 }, 0, .none }, - .{ .cmps, .np, &.{ .m32, .m32 }, &.{ 0xa7 }, 0, .none }, - .{ .cmps, .np, &.{ .m64, .m64 }, &.{ 0xa7 }, 0, .long }, - - .{ .cmpsb, .np, &.{}, &.{ 0xa6 }, 0, .none }, - .{ .cmpsw, .np, &.{}, &.{ 0xa7 }, 0, .short }, - .{ .cmpsd, .np, &.{}, &.{ 0xa7 }, 0, .none }, - .{ .cmpsq, .np, &.{}, &.{ 0xa7 }, 0, .long }, - - .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .none }, - .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .rex }, - .{ .cmpxchg, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb1 }, 0, .none }, - .{ .cmpxchg, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb1 }, 0, .none }, - .{ .cmpxchg, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb1 }, 0, .long }, - - .{ .cmpxchg8b , .m, &.{ .m64 }, &.{ 0x0f, 0xc7 }, 1, .none }, - .{ .cmpxchg16b, .m, &.{ .m128 }, &.{ 0x0f, 0xc7 }, 1, .long }, - - .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .none }, - .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .rex }, - .{ .div, .m, &.{ .rm16 }, &.{ 0xf7 }, 6, .none }, - .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none }, - .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long }, - - .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .fpu }, - .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .fpu }, - .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .fpu }, - - .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .fpu }, - .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .fpu }, - .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .fpu }, - - .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none }, - .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex }, - .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .none }, - .{ .idiv, .m, &.{ .rm32 }, &.{ 0xf7 }, 7, .none }, - .{ .idiv, .m, &.{ .rm64 }, &.{ 0xf7 }, 7, .long }, - - .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .none }, - .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .rex }, - .{ .imul, .m, &.{ .rm16, }, &.{ 0xf7 }, 5, .none }, - .{ .imul, .m, &.{ .rm32, }, &.{ 0xf7 }, 5, .none }, - .{ .imul, .m, &.{ .rm64, }, &.{ 0xf7 }, 5, .long }, - .{ .imul, .rm, &.{ .r16, .rm16, }, &.{ 0x0f, 0xaf }, 0, .none }, - .{ .imul, .rm, &.{ .r32, .rm32, }, &.{ 0x0f, 0xaf }, 0, .none }, - .{ .imul, .rm, &.{ .r64, .rm64, }, &.{ 0x0f, 0xaf }, 0, .long }, - .{ .imul, .rmi, &.{ .r16, .rm16, .imm8s }, &.{ 0x6b }, 0, .none }, - .{ .imul, .rmi, &.{ .r32, .rm32, .imm8s }, &.{ 0x6b }, 0, .none }, - .{ .imul, .rmi, &.{ .r64, .rm64, .imm8s }, &.{ 0x6b }, 0, .long }, - .{ .imul, .rmi, &.{ .r16, .rm16, .imm16 }, &.{ 0x69 }, 0, .none }, - .{ .imul, .rmi, &.{ .r32, .rm32, .imm32 }, &.{ 0x69 }, 0, .none }, - .{ .imul, .rmi, &.{ .r64, .rm64, .imm32 }, &.{ 0x69 }, 0, .long }, - - .{ .int3, .np, &.{}, &.{ 0xcc }, 0, .none }, - - .{ .ja, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none }, - .{ .jae, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none }, - .{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none }, - .{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none }, - .{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none }, - .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none }, - .{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none }, - .{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none }, - .{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none }, - .{ .jl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none }, - .{ .jle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none }, - .{ .jna, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none }, - .{ .jnae, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none }, - .{ .jnb, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none }, - .{ .jnbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none }, - .{ .jnc, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none }, - .{ .jne, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none }, - .{ .jng, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none }, - .{ .jnge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none }, - .{ .jnl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none }, - .{ .jnle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none }, - .{ .jno, .d, &.{ .rel32 }, &.{ 0x0f, 0x81 }, 0, .none }, - .{ .jnp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none }, - .{ .jns, .d, &.{ .rel32 }, &.{ 0x0f, 0x89 }, 0, .none }, - .{ .jnz, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none }, - .{ .jo, .d, &.{ .rel32 }, &.{ 0x0f, 0x80 }, 0, .none }, - .{ .jp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none }, - .{ .jpe, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none }, - .{ .jpo, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none }, - .{ .js, .d, &.{ .rel32 }, &.{ 0x0f, 0x88 }, 0, .none }, - .{ .jz, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none }, - - .{ .jmp, .d, &.{ .rel32 }, &.{ 0xe9 }, 0, .none }, - .{ .jmp, .m, &.{ .rm64 }, &.{ 0xff }, 4, .none }, - - .{ .lea, .rm, &.{ .r16, .m }, &.{ 0x8d }, 0, .none }, - .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none }, - .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long }, - - .{ .lfence, .np, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none }, - - .{ .lods, .np, &.{ .m8 }, &.{ 0xac }, 0, .none }, - .{ .lods, .np, &.{ .m16 }, &.{ 0xad }, 0, .none }, - .{ .lods, .np, &.{ .m32 }, &.{ 0xad }, 0, .none }, - .{ .lods, .np, &.{ .m64 }, &.{ 0xad }, 0, .long }, - - .{ .lodsb, .np, &.{}, &.{ 0xac }, 0, .none }, - .{ .lodsw, .np, &.{}, &.{ 0xad }, 0, .short }, - .{ .lodsd, .np, &.{}, &.{ 0xad }, 0, .none }, - .{ .lodsq, .np, &.{}, &.{ 0xad }, 0, .long }, - - .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none }, - .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none }, - .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long }, - - .{ .mfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none }, - - .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none }, - .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .rex }, - .{ .mov, .mr, &.{ .rm16, .r16 }, &.{ 0x89 }, 0, .none }, - .{ .mov, .mr, &.{ .rm32, .r32 }, &.{ 0x89 }, 0, .none }, - .{ .mov, .mr, &.{ .rm64, .r64 }, &.{ 0x89 }, 0, .long }, - .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .none }, - .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .rex }, - .{ .mov, .rm, &.{ .r16, .rm16 }, &.{ 0x8b }, 0, .none }, - .{ .mov, .rm, &.{ .r32, .rm32 }, &.{ 0x8b }, 0, .none }, - .{ .mov, .rm, &.{ .r64, .rm64 }, &.{ 0x8b }, 0, .long }, - .{ .mov, .mr, &.{ .rm16, .sreg }, &.{ 0x8c }, 0, .none }, - .{ .mov, .mr, &.{ .rm64, .sreg }, &.{ 0x8c }, 0, .long }, - .{ .mov, .rm, &.{ .sreg, .rm16 }, &.{ 0x8e }, 0, .none }, - .{ .mov, .rm, &.{ .sreg, .rm64 }, &.{ 0x8e }, 0, .long }, - .{ .mov, .fd, &.{ .al, .moffs }, &.{ 0xa0 }, 0, .none }, - .{ .mov, .fd, &.{ .ax, .moffs }, &.{ 0xa1 }, 0, .none }, - .{ .mov, .fd, &.{ .eax, .moffs }, &.{ 0xa1 }, 0, .none }, - .{ .mov, .fd, &.{ .rax, .moffs }, &.{ 0xa1 }, 0, .long }, - .{ .mov, .td, &.{ .moffs, .al }, &.{ 0xa2 }, 0, .none }, - .{ .mov, .td, &.{ .moffs, .ax }, &.{ 0xa3 }, 0, .none }, - .{ .mov, .td, &.{ .moffs, .eax }, &.{ 0xa3 }, 0, .none }, - .{ .mov, .td, &.{ .moffs, .rax }, &.{ 0xa3 }, 0, .long }, - .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .none }, - .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .rex }, - .{ .mov, .oi, &.{ .r16, .imm16 }, &.{ 0xb8 }, 0, .none }, - .{ .mov, .oi, &.{ .r32, .imm32 }, &.{ 0xb8 }, 0, .none }, - .{ .mov, .oi, &.{ .r64, .imm64 }, &.{ 0xb8 }, 0, .long }, - .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .none }, - .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .rex }, - .{ .mov, .mi, &.{ .rm16, .imm16 }, &.{ 0xc7 }, 0, .none }, - .{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none }, - .{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long }, - - .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none }, - .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none }, - .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long }, - .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none }, - .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none }, - .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long }, - - .{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none }, - .{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .none }, - .{ .movs, .np, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none }, - .{ .movs, .np, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long }, - - .{ .movsb, .np, &.{}, &.{ 0xa4 }, 0, .none }, - .{ .movsw, .np, &.{}, &.{ 0xa5 }, 0, .short }, - .{ .movsd, .np, &.{}, &.{ 0xa5 }, 0, .none }, - .{ .movsq, .np, &.{}, &.{ 0xa5 }, 0, .long }, - - .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none }, - .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex }, - .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none }, - .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex }, - .{ .movsx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xbe }, 0, .long }, - .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .none }, - .{ .movsx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xbf }, 0, .long }, + .{ .call, .d, &.{ .rel32 }, &.{ 0xe8 }, 0, .none, .none }, + .{ .call, .m, &.{ .rm64 }, &.{ 0xff }, 2, .none, .none }, + + .{ .cbw, .np, &.{ .o16 }, &.{ 0x98 }, 0, .short, .none }, + .{ .cwde, .np, &.{ .o32 }, &.{ 0x98 }, 0, .none, .none }, + .{ .cdqe, .np, &.{ .o64 }, &.{ 0x98 }, 0, .long, .none }, + + .{ .cwd, .np, &.{ .o16 }, &.{ 0x99 }, 0, .short, .none }, + .{ .cdq, .np, &.{ .o32 }, &.{ 0x99 }, 0, .none, .none }, + .{ .cqo, .np, &.{ .o64 }, &.{ 0x99 }, 0, .long, .none }, + + .{ .cmova, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none }, + .{ .cmova, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, + .{ .cmova, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, + .{ .cmovae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, + .{ .cmovae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, + .{ .cmovb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmovbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none }, + .{ .cmovbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, + .{ .cmovbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, + .{ .cmovc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, + .{ .cmovc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmove, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none }, + .{ .cmove, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, + .{ .cmove, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, + .{ .cmovg, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none }, + .{ .cmovg, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, + .{ .cmovg, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, + .{ .cmovge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none }, + .{ .cmovge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, + .{ .cmovge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, + .{ .cmovl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none }, + .{ .cmovl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, + .{ .cmovl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, + .{ .cmovle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none }, + .{ .cmovle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, + .{ .cmovle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, + .{ .cmovna, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x46 }, 0, .short, .none }, + .{ .cmovna, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x46 }, 0, .none, .none }, + .{ .cmovna, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x46 }, 0, .long, .none }, + .{ .cmovnae, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x42 }, 0, .short, .none }, + .{ .cmovnae, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x42 }, 0, .none, .none }, + .{ .cmovnae, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x42 }, 0, .long, .none }, + .{ .cmovnb, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, + .{ .cmovnb, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovnb, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovnbe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x47 }, 0, .short, .none }, + .{ .cmovnbe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x47 }, 0, .none, .none }, + .{ .cmovnbe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x47 }, 0, .long, .none }, + .{ .cmovnc, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x43 }, 0, .short, .none }, + .{ .cmovnc, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x43 }, 0, .none, .none }, + .{ .cmovnc, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x43 }, 0, .long, .none }, + .{ .cmovne, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none }, + .{ .cmovne, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, + .{ .cmovne, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, + .{ .cmovng, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4e }, 0, .short, .none }, + .{ .cmovng, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4e }, 0, .none, .none }, + .{ .cmovng, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4e }, 0, .long, .none }, + .{ .cmovnge, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4c }, 0, .short, .none }, + .{ .cmovnge, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4c }, 0, .none, .none }, + .{ .cmovnge, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4c }, 0, .long, .none }, + .{ .cmovnl, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4d }, 0, .short, .none }, + .{ .cmovnl, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4d }, 0, .none, .none }, + .{ .cmovnl, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4d }, 0, .long, .none }, + .{ .cmovnle, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4f }, 0, .short, .none }, + .{ .cmovnle, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4f }, 0, .none, .none }, + .{ .cmovnle, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4f }, 0, .long, .none }, + .{ .cmovno, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x41 }, 0, .short, .none }, + .{ .cmovno, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x41 }, 0, .none, .none }, + .{ .cmovno, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x41 }, 0, .long, .none }, + .{ .cmovnp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none }, + .{ .cmovnp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, + .{ .cmovnp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, + .{ .cmovns, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x49 }, 0, .short, .none }, + .{ .cmovns, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x49 }, 0, .none, .none }, + .{ .cmovns, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x49 }, 0, .long, .none }, + .{ .cmovnz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x45 }, 0, .short, .none }, + .{ .cmovnz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x45 }, 0, .none, .none }, + .{ .cmovnz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x45 }, 0, .long, .none }, + .{ .cmovo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x40 }, 0, .short, .none }, + .{ .cmovo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x40 }, 0, .none, .none }, + .{ .cmovo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x40 }, 0, .long, .none }, + .{ .cmovp, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none }, + .{ .cmovp, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, + .{ .cmovp, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, + .{ .cmovpe, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4a }, 0, .short, .none }, + .{ .cmovpe, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4a }, 0, .none, .none }, + .{ .cmovpe, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4a }, 0, .long, .none }, + .{ .cmovpo, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x4b }, 0, .short, .none }, + .{ .cmovpo, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x4b }, 0, .none, .none }, + .{ .cmovpo, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x4b }, 0, .long, .none }, + .{ .cmovs, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x48 }, 0, .short, .none }, + .{ .cmovs, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x48 }, 0, .none, .none }, + .{ .cmovs, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x48 }, 0, .long, .none }, + .{ .cmovz, .rm, &.{ .r16, .rm16 }, &.{ 0x0f, 0x44 }, 0, .short, .none }, + .{ .cmovz, .rm, &.{ .r32, .rm32 }, &.{ 0x0f, 0x44 }, 0, .none, .none }, + .{ .cmovz, .rm, &.{ .r64, .rm64 }, &.{ 0x0f, 0x44 }, 0, .long, .none }, + + .{ .cmp, .zi, &.{ .al, .imm8 }, &.{ 0x3c }, 0, .none, .none }, + .{ .cmp, .zi, &.{ .ax, .imm16 }, &.{ 0x3d }, 0, .short, .none }, + .{ .cmp, .zi, &.{ .eax, .imm32 }, &.{ 0x3d }, 0, .none, .none }, + .{ .cmp, .zi, &.{ .rax, .imm32s }, &.{ 0x3d }, 0, .long, .none }, + .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 7, .rex, .none }, + .{ .cmp, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 7, .short, .none }, + .{ .cmp, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 7, .long, .none }, + .{ .cmp, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 7, .short, .none }, + .{ .cmp, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 7, .none, .none }, + .{ .cmp, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 7, .long, .none }, + .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .none, .none }, + .{ .cmp, .mr, &.{ .rm8, .r8 }, &.{ 0x38 }, 0, .rex, .none }, + .{ .cmp, .mr, &.{ .rm16, .r16 }, &.{ 0x39 }, 0, .short, .none }, + .{ .cmp, .mr, &.{ .rm32, .r32 }, &.{ 0x39 }, 0, .none, .none }, + .{ .cmp, .mr, &.{ .rm64, .r64 }, &.{ 0x39 }, 0, .long, .none }, + .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .none, .none }, + .{ .cmp, .rm, &.{ .r8, .rm8 }, &.{ 0x3a }, 0, .rex, .none }, + .{ .cmp, .rm, &.{ .r16, .rm16 }, &.{ 0x3b }, 0, .short, .none }, + .{ .cmp, .rm, &.{ .r32, .rm32 }, &.{ 0x3b }, 0, .none, .none }, + .{ .cmp, .rm, &.{ .r64, .rm64 }, &.{ 0x3b }, 0, .long, .none }, + + .{ .cmps, .np, &.{ .m8, .m8 }, &.{ 0xa6 }, 0, .none, .none }, + .{ .cmps, .np, &.{ .m16, .m16 }, &.{ 0xa7 }, 0, .short, .none }, + .{ .cmps, .np, &.{ .m32, .m32 }, &.{ 0xa7 }, 0, .none, .none }, + .{ .cmps, .np, &.{ .m64, .m64 }, &.{ 0xa7 }, 0, .long, .none }, + + .{ .cmpsb, .np, &.{}, &.{ 0xa6 }, 0, .none, .none }, + .{ .cmpsw, .np, &.{}, &.{ 0xa7 }, 0, .short, .none }, + .{ .cmpsd, .np, &.{}, &.{ 0xa7 }, 0, .none, .none }, + .{ .cmpsq, .np, &.{}, &.{ 0xa7 }, 0, .long, .none }, + + .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .none, .none }, + .{ .cmpxchg, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xb0 }, 0, .rex, .none }, + .{ .cmpxchg, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xb1 }, 0, .short, .none }, + .{ .cmpxchg, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xb1 }, 0, .none, .none }, + .{ .cmpxchg, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xb1 }, 0, .long, .none }, + + .{ .cmpxchg8b, .m, &.{ .m64 }, &.{ 0x0f, 0xc7 }, 1, .none, .none }, + .{ .cmpxchg16b, .m, &.{ .m128 }, &.{ 0x0f, 0xc7 }, 1, .long, .none }, + + .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .none, .none }, + .{ .div, .m, &.{ .rm8 }, &.{ 0xf6 }, 6, .rex, .none }, + .{ .div, .m, &.{ .rm16 }, &.{ 0xf7 }, 6, .short, .none }, + .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none, .none }, + .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long, .none }, + + .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none, .none }, + .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex, .none }, + .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .short, .none }, + .{ .idiv, .m, &.{ .rm32 }, &.{ 0xf7 }, 7, .none, .none }, + .{ .idiv, .m, &.{ .rm64 }, &.{ 0xf7 }, 7, .long, .none }, + + .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .none, .none }, + .{ .imul, .m, &.{ .rm8 }, &.{ 0xf6 }, 5, .rex, .none }, + .{ .imul, .m, &.{ .rm16, }, &.{ 0xf7 }, 5, .short, .none }, + .{ .imul, .m, &.{ .rm32, }, &.{ 0xf7 }, 5, .none, .none }, + .{ .imul, .m, &.{ .rm64, }, &.{ 0xf7 }, 5, .long, .none }, + .{ .imul, .rm, &.{ .r16, .rm16, }, &.{ 0x0f, 0xaf }, 0, .short, .none }, + .{ .imul, .rm, &.{ .r32, .rm32, }, &.{ 0x0f, 0xaf }, 0, .none, .none }, + .{ .imul, .rm, &.{ .r64, .rm64, }, &.{ 0x0f, 0xaf }, 0, .long, .none }, + .{ .imul, .rmi, &.{ .r16, .rm16, .imm8s }, &.{ 0x6b }, 0, .short, .none }, + .{ .imul, .rmi, &.{ .r32, .rm32, .imm8s }, &.{ 0x6b }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r64, .rm64, .imm8s }, &.{ 0x6b }, 0, .long, .none }, + .{ .imul, .rmi, &.{ .r16, .rm16, .imm16 }, &.{ 0x69 }, 0, .short, .none }, + .{ .imul, .rmi, &.{ .r32, .rm32, .imm32 }, &.{ 0x69 }, 0, .none, .none }, + .{ .imul, .rmi, &.{ .r64, .rm64, .imm32 }, &.{ 0x69 }, 0, .long, .none }, + + .{ .int3, .np, &.{}, &.{ 0xcc }, 0, .none, .none }, + + .{ .ja, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none }, + .{ .jae, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jb, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, + .{ .jc, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jrcxz, .d, &.{ .rel32 }, &.{ 0xe3 }, 0, .none, .none }, + .{ .je, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, + .{ .jg, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, + .{ .jge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, + .{ .jl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none }, + .{ .jle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none }, + .{ .jna, .d, &.{ .rel32 }, &.{ 0x0f, 0x86 }, 0, .none, .none }, + .{ .jnae, .d, &.{ .rel32 }, &.{ 0x0f, 0x82 }, 0, .none, .none }, + .{ .jnb, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jnbe, .d, &.{ .rel32 }, &.{ 0x0f, 0x87 }, 0, .none, .none }, + .{ .jnc, .d, &.{ .rel32 }, &.{ 0x0f, 0x83 }, 0, .none, .none }, + .{ .jne, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none }, + .{ .jng, .d, &.{ .rel32 }, &.{ 0x0f, 0x8e }, 0, .none, .none }, + .{ .jnge, .d, &.{ .rel32 }, &.{ 0x0f, 0x8c }, 0, .none, .none }, + .{ .jnl, .d, &.{ .rel32 }, &.{ 0x0f, 0x8d }, 0, .none, .none }, + .{ .jnle, .d, &.{ .rel32 }, &.{ 0x0f, 0x8f }, 0, .none, .none }, + .{ .jno, .d, &.{ .rel32 }, &.{ 0x0f, 0x81 }, 0, .none, .none }, + .{ .jnp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none }, + .{ .jns, .d, &.{ .rel32 }, &.{ 0x0f, 0x89 }, 0, .none, .none }, + .{ .jnz, .d, &.{ .rel32 }, &.{ 0x0f, 0x85 }, 0, .none, .none }, + .{ .jo, .d, &.{ .rel32 }, &.{ 0x0f, 0x80 }, 0, .none, .none }, + .{ .jp, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none }, + .{ .jpe, .d, &.{ .rel32 }, &.{ 0x0f, 0x8a }, 0, .none, .none }, + .{ .jpo, .d, &.{ .rel32 }, &.{ 0x0f, 0x8b }, 0, .none, .none }, + .{ .js, .d, &.{ .rel32 }, &.{ 0x0f, 0x88 }, 0, .none, .none }, + .{ .jz, .d, &.{ .rel32 }, &.{ 0x0f, 0x84 }, 0, .none, .none }, + + .{ .jmp, .d, &.{ .rel32 }, &.{ 0xe9 }, 0, .none, .none }, + .{ .jmp, .m, &.{ .rm64 }, &.{ 0xff }, 4, .none, .none }, + + .{ .lea, .rm, &.{ .r16, .m }, &.{ 0x8d }, 0, .short, .none }, + .{ .lea, .rm, &.{ .r32, .m }, &.{ 0x8d }, 0, .none, .none }, + .{ .lea, .rm, &.{ .r64, .m }, &.{ 0x8d }, 0, .long, .none }, + + .{ .lfence, .np, &.{}, &.{ 0x0f, 0xae, 0xe8 }, 0, .none, .none }, + + .{ .lods, .np, &.{ .m8 }, &.{ 0xac }, 0, .none, .none }, + .{ .lods, .np, &.{ .m16 }, &.{ 0xad }, 0, .short, .none }, + .{ .lods, .np, &.{ .m32 }, &.{ 0xad }, 0, .none, .none }, + .{ .lods, .np, &.{ .m64 }, &.{ 0xad }, 0, .long, .none }, + + .{ .lodsb, .np, &.{}, &.{ 0xac }, 0, .none, .none }, + .{ .lodsw, .np, &.{}, &.{ 0xad }, 0, .short, .none }, + .{ .lodsd, .np, &.{}, &.{ 0xad }, 0, .none, .none }, + .{ .lodsq, .np, &.{}, &.{ 0xad }, 0, .long, .none }, + + .{ .lzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .short, .lzcnt }, + .{ .lzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .none, .lzcnt }, + .{ .lzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbd }, 0, .long, .lzcnt }, + + .{ .mfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf0 }, 0, .none, .none }, + + .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .none, .none }, + .{ .mov, .mr, &.{ .rm8, .r8 }, &.{ 0x88 }, 0, .rex, .none }, + .{ .mov, .mr, &.{ .rm16, .r16 }, &.{ 0x89 }, 0, .short, .none }, + .{ .mov, .mr, &.{ .rm32, .r32 }, &.{ 0x89 }, 0, .none, .none }, + .{ .mov, .mr, &.{ .rm64, .r64 }, &.{ 0x89 }, 0, .long, .none }, + .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .none, .none }, + .{ .mov, .rm, &.{ .r8, .rm8 }, &.{ 0x8a }, 0, .rex, .none }, + .{ .mov, .rm, &.{ .r16, .rm16 }, &.{ 0x8b }, 0, .short, .none }, + .{ .mov, .rm, &.{ .r32, .rm32 }, &.{ 0x8b }, 0, .none, .none }, + .{ .mov, .rm, &.{ .r64, .rm64 }, &.{ 0x8b }, 0, .long, .none }, + .{ .mov, .mr, &.{ .rm16, .sreg }, &.{ 0x8c }, 0, .short, .none }, + .{ .mov, .mr, &.{ .r32_m16, .sreg }, &.{ 0x8c }, 0, .none, .none }, + .{ .mov, .mr, &.{ .r64_m16, .sreg }, &.{ 0x8c }, 0, .long, .none }, + .{ .mov, .rm, &.{ .sreg, .rm16 }, &.{ 0x8e }, 0, .short, .none }, + .{ .mov, .rm, &.{ .sreg, .r32_m16 }, &.{ 0x8e }, 0, .none, .none }, + .{ .mov, .rm, &.{ .sreg, .r64_m16 }, &.{ 0x8e }, 0, .long, .none }, + .{ .mov, .fd, &.{ .al, .moffs }, &.{ 0xa0 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .ax, .moffs }, &.{ 0xa1 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .eax, .moffs }, &.{ 0xa1 }, 0, .none, .none }, + .{ .mov, .fd, &.{ .rax, .moffs }, &.{ 0xa1 }, 0, .long, .none }, + .{ .mov, .td, &.{ .moffs, .al }, &.{ 0xa2 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .ax }, &.{ 0xa3 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .eax }, &.{ 0xa3 }, 0, .none, .none }, + .{ .mov, .td, &.{ .moffs, .rax }, &.{ 0xa3 }, 0, .long, .none }, + .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .none, .none }, + .{ .mov, .oi, &.{ .r8, .imm8 }, &.{ 0xb0 }, 0, .rex, .none }, + .{ .mov, .oi, &.{ .r16, .imm16 }, &.{ 0xb8 }, 0, .short, .none }, + .{ .mov, .oi, &.{ .r32, .imm32 }, &.{ 0xb8 }, 0, .none, .none }, + .{ .mov, .oi, &.{ .r64, .imm64 }, &.{ 0xb8 }, 0, .long, .none }, + .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .none, .none }, + .{ .mov, .mi, &.{ .rm8, .imm8 }, &.{ 0xc6 }, 0, .rex, .none }, + .{ .mov, .mi, &.{ .rm16, .imm16 }, &.{ 0xc7 }, 0, .short, .none }, + .{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none, .none }, + .{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long, .none }, + + .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .movbe }, + .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .movbe }, + .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .movbe }, + .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .short, .movbe }, + .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .movbe }, + .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .movbe }, + + .{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none, .none }, + .{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none }, + .{ .movs, .np, &.{ .m32, .m32 }, &.{ 0xa5 }, 0, .none, .none }, + .{ .movs, .np, &.{ .m64, .m64 }, &.{ 0xa5 }, 0, .long, .none }, + + .{ .movsb, .np, &.{}, &.{ 0xa4 }, 0, .none, .none }, + .{ .movsw, .np, &.{}, &.{ 0xa5 }, 0, .short, .none }, + .{ .movsd, .np, &.{}, &.{ 0xa5 }, 0, .none, .none }, + .{ .movsq, .np, &.{}, &.{ 0xa5 }, 0, .long, .none }, + + .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .short, .none }, + .{ .movsx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex_short, .none }, + .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .none, .none }, + .{ .movsx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xbe }, 0, .rex, .none }, + .{ .movsx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xbe }, 0, .long, .none }, + .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .none, .none }, + .{ .movsx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xbf }, 0, .rex, .none }, + .{ .movsx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xbf }, 0, .long, .none }, // This instruction is discouraged. - .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none }, - .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long }, - - .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none }, - .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none }, - .{ .movzx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .long }, - .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .none }, - .{ .movzx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .long }, - - .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .none }, - .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .rex }, - .{ .mul, .m, &.{ .rm16 }, &.{ 0xf7 }, 4, .none }, - .{ .mul, .m, &.{ .rm32 }, &.{ 0xf7 }, 4, .none }, - .{ .mul, .m, &.{ .rm64 }, &.{ 0xf7 }, 4, .long }, - - .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .none }, - .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .rex }, - .{ .neg, .m, &.{ .rm16 }, &.{ 0xf7 }, 3, .none }, - .{ .neg, .m, &.{ .rm32 }, &.{ 0xf7 }, 3, .none }, - .{ .neg, .m, &.{ .rm64 }, &.{ 0xf7 }, 3, .long }, - - .{ .nop, .np, &.{}, &.{ 0x90 }, 0, .none }, - - .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .none }, - .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .rex }, - .{ .not, .m, &.{ .rm16 }, &.{ 0xf7 }, 2, .none }, - .{ .not, .m, &.{ .rm32 }, &.{ 0xf7 }, 2, .none }, - .{ .not, .m, &.{ .rm64 }, &.{ 0xf7 }, 2, .long }, - - .{ .@"or", .zi, &.{ .al, .imm8 }, &.{ 0x0c }, 0, .none }, - .{ .@"or", .zi, &.{ .ax, .imm16 }, &.{ 0x0d }, 0, .none }, - .{ .@"or", .zi, &.{ .eax, .imm32 }, &.{ 0x0d }, 0, .none }, - .{ .@"or", .zi, &.{ .rax, .imm32s }, &.{ 0x0d }, 0, .long }, - .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .rex }, - .{ .@"or", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 1, .long }, - .{ .@"or", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 1, .none }, - .{ .@"or", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 1, .long }, - .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .none }, - .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .rex }, - .{ .@"or", .mr, &.{ .rm16, .r16 }, &.{ 0x09 }, 0, .none }, - .{ .@"or", .mr, &.{ .rm32, .r32 }, &.{ 0x09 }, 0, .none }, - .{ .@"or", .mr, &.{ .rm64, .r64 }, &.{ 0x09 }, 0, .long }, - .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .none }, - .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .rex }, - .{ .@"or", .rm, &.{ .r16, .rm16 }, &.{ 0x0b }, 0, .none }, - .{ .@"or", .rm, &.{ .r32, .rm32 }, &.{ 0x0b }, 0, .none }, - .{ .@"or", .rm, &.{ .r64, .rm64 }, &.{ 0x0b }, 0, .long }, - - .{ .pop, .o, &.{ .r16 }, &.{ 0x58 }, 0, .none }, - .{ .pop, .o, &.{ .r64 }, &.{ 0x58 }, 0, .none }, - .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .none }, - .{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none }, - - .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none }, - .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none }, - .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long }, - - .{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .none }, - .{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none }, - .{ .push, .m, &.{ .rm16 }, &.{ 0xff }, 6, .none }, - .{ .push, .m, &.{ .rm64 }, &.{ 0xff }, 6, .none }, - .{ .push, .i, &.{ .imm8 }, &.{ 0x6a }, 0, .none }, - .{ .push, .i, &.{ .imm16 }, &.{ 0x68 }, 0, .none }, - .{ .push, .i, &.{ .imm32 }, &.{ 0x68 }, 0, .none }, - - .{ .ret, .np, &.{}, &.{ 0xc3 }, 0, .none }, - - .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .none }, - .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .rex }, - .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .none }, - .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .rex }, - .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .none }, - .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .rex }, - .{ .rcl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 2, .none }, - .{ .rcl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 2, .none }, - .{ .rcl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 2, .none }, - .{ .rcl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 2, .none }, - .{ .rcl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 2, .long }, - .{ .rcl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 2, .none }, - .{ .rcl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 2, .long }, - .{ .rcl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 2, .none }, - .{ .rcl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 2, .long }, - - .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .none }, - .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .rex }, - .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .none }, - .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .rex }, - .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .none }, - .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .rex }, - .{ .rcr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 3, .none }, - .{ .rcr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 3, .none }, - .{ .rcr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 3, .none }, - .{ .rcr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 3, .none }, - .{ .rcr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 3, .long }, - .{ .rcr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 3, .none }, - .{ .rcr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 3, .long }, - .{ .rcr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 3, .none }, - .{ .rcr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 3, .long }, - - .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .none }, - .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .rex }, - .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .none }, - .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .rex }, - .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .none }, - .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .rex }, - .{ .rol, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 0, .none }, - .{ .rol, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 0, .none }, - .{ .rol, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 0, .none }, - .{ .rol, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 0, .none }, - .{ .rol, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 0, .long }, - .{ .rol, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 0, .none }, - .{ .rol, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 0, .long }, - .{ .rol, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 0, .none }, - .{ .rol, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 0, .long }, - - .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .none }, - .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .rex }, - .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .none }, - .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .rex }, - .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .none }, - .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .rex }, - .{ .ror, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 1, .none }, - .{ .ror, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 1, .none }, - .{ .ror, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 1, .none }, - .{ .ror, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 1, .none }, - .{ .ror, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 1, .long }, - .{ .ror, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 1, .none }, - .{ .ror, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 1, .long }, - .{ .ror, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 1, .none }, - .{ .ror, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 1, .long }, - - .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none }, - .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex }, - .{ .sal, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .sal, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .sal, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long }, - .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none }, - .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex }, - .{ .sal, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .sal, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .sal, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long }, - .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none }, - .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex }, - .{ .sal, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .sal, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .sal, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long }, - - .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .none }, - .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .rex }, - .{ .sar, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 7, .none }, - .{ .sar, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 7, .none }, - .{ .sar, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 7, .long }, - .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .none }, - .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .rex }, - .{ .sar, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 7, .none }, - .{ .sar, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 7, .none }, - .{ .sar, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 7, .long }, - .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .none }, - .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .rex }, - .{ .sar, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 7, .none }, - .{ .sar, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 7, .none }, - .{ .sar, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 7, .long }, - - .{ .sbb, .zi, &.{ .al, .imm8 }, &.{ 0x1c }, 0, .none }, - .{ .sbb, .zi, &.{ .ax, .imm16 }, &.{ 0x1d }, 0, .none }, - .{ .sbb, .zi, &.{ .eax, .imm32 }, &.{ 0x1d }, 0, .none }, - .{ .sbb, .zi, &.{ .rax, .imm32s }, &.{ 0x1d }, 0, .long }, - .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .rex }, - .{ .sbb, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 3, .long }, - .{ .sbb, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 3, .none }, - .{ .sbb, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 3, .long }, - .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .none }, - .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .rex }, - .{ .sbb, .mr, &.{ .rm16, .r16 }, &.{ 0x19 }, 0, .none }, - .{ .sbb, .mr, &.{ .rm32, .r32 }, &.{ 0x19 }, 0, .none }, - .{ .sbb, .mr, &.{ .rm64, .r64 }, &.{ 0x19 }, 0, .long }, - .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .none }, - .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .rex }, - .{ .sbb, .rm, &.{ .r16, .rm16 }, &.{ 0x1b }, 0, .none }, - .{ .sbb, .rm, &.{ .r32, .rm32 }, &.{ 0x1b }, 0, .none }, - .{ .sbb, .rm, &.{ .r64, .rm64 }, &.{ 0x1b }, 0, .long }, - - .{ .scas, .np, &.{ .m8 }, &.{ 0xae }, 0, .none }, - .{ .scas, .np, &.{ .m16 }, &.{ 0xaf }, 0, .none }, - .{ .scas, .np, &.{ .m32 }, &.{ 0xaf }, 0, .none }, - .{ .scas, .np, &.{ .m64 }, &.{ 0xaf }, 0, .long }, - - .{ .scasb, .np, &.{}, &.{ 0xae }, 0, .none }, - .{ .scasw, .np, &.{}, &.{ 0xaf }, 0, .short }, - .{ .scasd, .np, &.{}, &.{ 0xaf }, 0, .none }, - .{ .scasq, .np, &.{}, &.{ 0xaf }, 0, .long }, - - .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none }, - .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex }, - .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none }, - .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex }, - .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none }, - .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex }, - .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none }, - .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex }, - .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none }, - .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex }, - .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none }, - .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex }, - .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none }, - .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex }, - .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none }, - .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex }, - .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none }, - .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex }, - .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none }, - .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex }, - .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none }, - .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex }, - .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none }, - .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex }, - .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none }, - .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex }, - .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none }, - .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex }, - .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none }, - .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex }, - .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none }, - .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex }, - .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none }, - .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex }, - .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none }, - .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex }, - .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none }, - .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex }, - .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none }, - .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex }, - .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .none }, - .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .rex }, - .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none }, - .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex }, - .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .none }, - .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .rex }, - .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none }, - .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex }, - .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .none }, - .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .rex }, - .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none }, - .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex }, - .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none }, - .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex }, - .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none }, - .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex }, - .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .none }, - .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .rex }, - .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none }, - .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex }, - - .{ .sfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none }, - - .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none }, - .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex }, - .{ .shl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .shl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none }, - .{ .shl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long }, - .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none }, - .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex }, - .{ .shl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .shl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none }, - .{ .shl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long }, - .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none }, - .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex }, - .{ .shl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .shl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none }, - .{ .shl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long }, - - .{ .shld, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none }, - .{ .shld, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xa5 }, 0, .none }, - .{ .shld, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none }, - .{ .shld, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .long }, - .{ .shld, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xa5 }, 0, .none }, - .{ .shld, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xa5 }, 0, .long }, - - .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .none }, - .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .rex }, - .{ .shr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 5, .none }, - .{ .shr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 5, .none }, - .{ .shr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 5, .long }, - .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .none }, - .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .rex }, - .{ .shr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 5, .none }, - .{ .shr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 5, .none }, - .{ .shr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 5, .long }, - .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .none }, - .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .rex }, - .{ .shr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 5, .none }, - .{ .shr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 5, .none }, - .{ .shr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 5, .long }, - - .{ .shrd, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xac }, 0, .none }, - .{ .shrd, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xad }, 0, .none }, - .{ .shrd, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xac }, 0, .none }, - .{ .shrd, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xac }, 0, .long }, - .{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none }, - .{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long }, - - .{ .stos, .np, &.{ .m8 }, &.{ 0xaa }, 0, .none }, - .{ .stos, .np, &.{ .m16 }, &.{ 0xab }, 0, .none }, - .{ .stos, .np, &.{ .m32 }, &.{ 0xab }, 0, .none }, - .{ .stos, .np, &.{ .m64 }, &.{ 0xab }, 0, .long }, - - .{ .stosb, .np, &.{}, &.{ 0xaa }, 0, .none }, - .{ .stosw, .np, &.{}, &.{ 0xab }, 0, .short }, - .{ .stosd, .np, &.{}, &.{ 0xab }, 0, .none }, - .{ .stosq, .np, &.{}, &.{ 0xab }, 0, .long }, - - .{ .sub, .zi, &.{ .al, .imm8 }, &.{ 0x2c }, 0, .none }, - .{ .sub, .zi, &.{ .ax, .imm16 }, &.{ 0x2d }, 0, .none }, - .{ .sub, .zi, &.{ .eax, .imm32 }, &.{ 0x2d }, 0, .none }, - .{ .sub, .zi, &.{ .rax, .imm32s }, &.{ 0x2d }, 0, .long }, - .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .none }, - .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .rex }, - .{ .sub, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 5, .none }, - .{ .sub, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 5, .none }, - .{ .sub, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 5, .long }, - .{ .sub, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 5, .none }, - .{ .sub, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 5, .none }, - .{ .sub, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 5, .long }, - .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .none }, - .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .rex }, - .{ .sub, .mr, &.{ .rm16, .r16 }, &.{ 0x29 }, 0, .none }, - .{ .sub, .mr, &.{ .rm32, .r32 }, &.{ 0x29 }, 0, .none }, - .{ .sub, .mr, &.{ .rm64, .r64 }, &.{ 0x29 }, 0, .long }, - .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .none }, - .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .rex }, - .{ .sub, .rm, &.{ .r16, .rm16 }, &.{ 0x2b }, 0, .none }, - .{ .sub, .rm, &.{ .r32, .rm32 }, &.{ 0x2b }, 0, .none }, - .{ .sub, .rm, &.{ .r64, .rm64 }, &.{ 0x2b }, 0, .long }, - - .{ .syscall, .np, &.{}, &.{ 0x0f, 0x05 }, 0, .none } -, - .{ .@"test", .zi, &.{ .al, .imm8 }, &.{ 0xa8 }, 0, .none }, - .{ .@"test", .zi, &.{ .ax, .imm16 }, &.{ 0xa9 }, 0, .none }, - .{ .@"test", .zi, &.{ .eax, .imm32 }, &.{ 0xa9 }, 0, .none }, - .{ .@"test", .zi, &.{ .rax, .imm32s }, &.{ 0xa9 }, 0, .long }, - .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .none }, - .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .rex }, - .{ .@"test", .mi, &.{ .rm16, .imm16 }, &.{ 0xf7 }, 0, .none }, - .{ .@"test", .mi, &.{ .rm32, .imm32 }, &.{ 0xf7 }, 0, .none }, - .{ .@"test", .mi, &.{ .rm64, .imm32s }, &.{ 0xf7 }, 0, .long }, - .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .none }, - .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .rex }, - .{ .@"test", .mr, &.{ .rm16, .r16 }, &.{ 0x85 }, 0, .none }, - .{ .@"test", .mr, &.{ .rm32, .r32 }, &.{ 0x85 }, 0, .none }, - .{ .@"test", .mr, &.{ .rm64, .r64 }, &.{ 0x85 }, 0, .long }, - - .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none }, - .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none }, - .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long }, - - .{ .ud2, .np, &.{}, &.{ 0x0f, 0x0b }, 0, .none }, - - .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .none }, - .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .rex }, - .{ .xadd, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xc1 }, 0, .none }, - .{ .xadd, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xc1 }, 0, .none }, - .{ .xadd, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xc1 }, 0, .long }, - - .{ .xchg, .o, &.{ .ax, .r16 }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .r16, .ax }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .eax, .r32 }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .rax, .r64 }, &.{ 0x90 }, 0, .long }, - .{ .xchg, .o, &.{ .r32, .eax }, &.{ 0x90 }, 0, .none }, - .{ .xchg, .o, &.{ .r64, .rax }, &.{ 0x90 }, 0, .long }, - .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .none }, - .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .rex }, - .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .none }, - .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .rex }, - .{ .xchg, .mr, &.{ .rm16, .r16 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .rm, &.{ .r16, .rm16 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .mr, &.{ .rm32, .r32 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .mr, &.{ .rm64, .r64 }, &.{ 0x87 }, 0, .long }, - .{ .xchg, .rm, &.{ .r32, .rm32 }, &.{ 0x87 }, 0, .none }, - .{ .xchg, .rm, &.{ .r64, .rm64 }, &.{ 0x87 }, 0, .long }, - - .{ .xor, .zi, &.{ .al, .imm8 }, &.{ 0x34 }, 0, .none }, - .{ .xor, .zi, &.{ .ax, .imm16 }, &.{ 0x35 }, 0, .none }, - .{ .xor, .zi, &.{ .eax, .imm32 }, &.{ 0x35 }, 0, .none }, - .{ .xor, .zi, &.{ .rax, .imm32s }, &.{ 0x35 }, 0, .long }, - .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .none }, - .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .rex }, - .{ .xor, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 6, .none }, - .{ .xor, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 6, .none }, - .{ .xor, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 6, .long }, - .{ .xor, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 6, .none }, - .{ .xor, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 6, .none }, - .{ .xor, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 6, .long }, - .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .none }, - .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .rex }, - .{ .xor, .mr, &.{ .rm16, .r16 }, &.{ 0x31 }, 0, .none }, - .{ .xor, .mr, &.{ .rm32, .r32 }, &.{ 0x31 }, 0, .none }, - .{ .xor, .mr, &.{ .rm64, .r64 }, &.{ 0x31 }, 0, .long }, - .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .none }, - .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .rex }, - .{ .xor, .rm, &.{ .r16, .rm16 }, &.{ 0x33 }, 0, .none }, - .{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none }, - .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long }, + .{ .movsxd, .rm, &.{ .r32, .rm32 }, &.{ 0x63 }, 0, .none, .none }, + .{ .movsxd, .rm, &.{ .r64, .rm32 }, &.{ 0x63 }, 0, .long, .none }, + + .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .short, .none }, + .{ .movzx, .rm, &.{ .r16, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .rex_short, .none }, + .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .none, .none }, + .{ .movzx, .rm, &.{ .r32, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .rex, .none }, + .{ .movzx, .rm, &.{ .r64, .rm8 }, &.{ 0x0f, 0xb6 }, 0, .long, .none }, + .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .none, .none }, + .{ .movzx, .rm, &.{ .r32, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .rex, .none }, + .{ .movzx, .rm, &.{ .r64, .rm16 }, &.{ 0x0f, 0xb7 }, 0, .long, .none }, + + .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .none, .none }, + .{ .mul, .m, &.{ .rm8 }, &.{ 0xf6 }, 4, .rex, .none }, + .{ .mul, .m, &.{ .rm16 }, &.{ 0xf7 }, 4, .short, .none }, + .{ .mul, .m, &.{ .rm32 }, &.{ 0xf7 }, 4, .none, .none }, + .{ .mul, .m, &.{ .rm64 }, &.{ 0xf7 }, 4, .long, .none }, + + .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .none, .none }, + .{ .neg, .m, &.{ .rm8 }, &.{ 0xf6 }, 3, .rex, .none }, + .{ .neg, .m, &.{ .rm16 }, &.{ 0xf7 }, 3, .short, .none }, + .{ .neg, .m, &.{ .rm32 }, &.{ 0xf7 }, 3, .none, .none }, + .{ .neg, .m, &.{ .rm64 }, &.{ 0xf7 }, 3, .long, .none }, + + .{ .nop, .np, &.{}, &.{ 0x90 }, 0, .none, .none }, + + .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .none, .none }, + .{ .not, .m, &.{ .rm8 }, &.{ 0xf6 }, 2, .rex, .none }, + .{ .not, .m, &.{ .rm16 }, &.{ 0xf7 }, 2, .short, .none }, + .{ .not, .m, &.{ .rm32 }, &.{ 0xf7 }, 2, .none, .none }, + .{ .not, .m, &.{ .rm64 }, &.{ 0xf7 }, 2, .long, .none }, + + .{ .@"or", .zi, &.{ .al, .imm8 }, &.{ 0x0c }, 0, .none, .none }, + .{ .@"or", .zi, &.{ .ax, .imm16 }, &.{ 0x0d }, 0, .short, .none }, + .{ .@"or", .zi, &.{ .eax, .imm32 }, &.{ 0x0d }, 0, .none, .none }, + .{ .@"or", .zi, &.{ .rax, .imm32s }, &.{ 0x0d }, 0, .long, .none }, + .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 1, .rex, .none }, + .{ .@"or", .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 1, .short, .none }, + .{ .@"or", .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 1, .long, .none }, + .{ .@"or", .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 1, .short, .none }, + .{ .@"or", .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 1, .none, .none }, + .{ .@"or", .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 1, .long, .none }, + .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .none, .none }, + .{ .@"or", .mr, &.{ .rm8, .r8 }, &.{ 0x08 }, 0, .rex, .none }, + .{ .@"or", .mr, &.{ .rm16, .r16 }, &.{ 0x09 }, 0, .short, .none }, + .{ .@"or", .mr, &.{ .rm32, .r32 }, &.{ 0x09 }, 0, .none, .none }, + .{ .@"or", .mr, &.{ .rm64, .r64 }, &.{ 0x09 }, 0, .long, .none }, + .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .none, .none }, + .{ .@"or", .rm, &.{ .r8, .rm8 }, &.{ 0x0a }, 0, .rex, .none }, + .{ .@"or", .rm, &.{ .r16, .rm16 }, &.{ 0x0b }, 0, .short, .none }, + .{ .@"or", .rm, &.{ .r32, .rm32 }, &.{ 0x0b }, 0, .none, .none }, + .{ .@"or", .rm, &.{ .r64, .rm64 }, &.{ 0x0b }, 0, .long, .none }, + + .{ .pop, .o, &.{ .r16 }, &.{ 0x58 }, 0, .short, .none }, + .{ .pop, .o, &.{ .r64 }, &.{ 0x58 }, 0, .none, .none }, + .{ .pop, .m, &.{ .rm16 }, &.{ 0x8f }, 0, .short, .none }, + .{ .pop, .m, &.{ .rm64 }, &.{ 0x8f }, 0, .none, .none }, + + .{ .popcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .short, .popcnt }, + .{ .popcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .none, .popcnt }, + .{ .popcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xb8 }, 0, .long, .popcnt }, + + .{ .push, .o, &.{ .r16 }, &.{ 0x50 }, 0, .short, .none }, + .{ .push, .o, &.{ .r64 }, &.{ 0x50 }, 0, .none, .none }, + .{ .push, .m, &.{ .rm16 }, &.{ 0xff }, 6, .short, .none }, + .{ .push, .m, &.{ .rm64 }, &.{ 0xff }, 6, .none, .none }, + .{ .push, .i, &.{ .imm8 }, &.{ 0x6a }, 0, .none, .none }, + .{ .push, .i, &.{ .imm16 }, &.{ 0x68 }, 0, .short, .none }, + .{ .push, .i, &.{ .imm32 }, &.{ 0x68 }, 0, .none, .none }, + + .{ .ret, .np, &.{}, &.{ 0xc3 }, 0, .none, .none }, + + .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .none, .none }, + .{ .rcl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 2, .rex, .none }, + .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .none, .none }, + .{ .rcl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 2, .rex, .none }, + .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .none, .none }, + .{ .rcl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 2, .rex, .none }, + .{ .rcl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 2, .short, .none }, + .{ .rcl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 2, .short, .none }, + .{ .rcl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 2, .short, .none }, + .{ .rcl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 2, .none, .none }, + .{ .rcl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 2, .long, .none }, + .{ .rcl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 2, .none, .none }, + .{ .rcl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 2, .long, .none }, + .{ .rcl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 2, .none, .none }, + .{ .rcl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 2, .long, .none }, + + .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .none, .none }, + .{ .rcr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 3, .rex, .none }, + .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .none, .none }, + .{ .rcr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 3, .rex, .none }, + .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .none, .none }, + .{ .rcr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 3, .rex, .none }, + .{ .rcr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 3, .short, .none }, + .{ .rcr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 3, .short, .none }, + .{ .rcr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 3, .short, .none }, + .{ .rcr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 3, .none, .none }, + .{ .rcr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 3, .long, .none }, + .{ .rcr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 3, .none, .none }, + .{ .rcr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 3, .long, .none }, + .{ .rcr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 3, .none, .none }, + .{ .rcr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 3, .long, .none }, + + .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .none, .none }, + .{ .rol, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 0, .rex, .none }, + .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .none, .none }, + .{ .rol, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 0, .rex, .none }, + .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .none, .none }, + .{ .rol, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 0, .rex, .none }, + .{ .rol, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 0, .short, .none }, + .{ .rol, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 0, .short, .none }, + .{ .rol, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 0, .short, .none }, + .{ .rol, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 0, .none, .none }, + .{ .rol, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 0, .long, .none }, + .{ .rol, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 0, .none, .none }, + .{ .rol, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 0, .long, .none }, + .{ .rol, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 0, .none, .none }, + .{ .rol, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 0, .long, .none }, + + .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .none, .none }, + .{ .ror, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 1, .rex, .none }, + .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .none, .none }, + .{ .ror, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 1, .rex, .none }, + .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .none, .none }, + .{ .ror, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 1, .rex, .none }, + .{ .ror, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 1, .short, .none }, + .{ .ror, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 1, .short, .none }, + .{ .ror, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 1, .short, .none }, + .{ .ror, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 1, .none, .none }, + .{ .ror, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 1, .long, .none }, + .{ .ror, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 1, .none, .none }, + .{ .ror, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 1, .long, .none }, + .{ .ror, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 1, .none, .none }, + .{ .ror, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 1, .long, .none }, + + .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, + .{ .sal, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, + .{ .sal, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .short, .none }, + .{ .sal, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none, .none }, + .{ .sal, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long, .none }, + .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none, .none }, + .{ .sal, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex, .none }, + .{ .sal, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .short, .none }, + .{ .sal, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none, .none }, + .{ .sal, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long, .none }, + .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none, .none }, + .{ .sal, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex, .none }, + .{ .sal, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .short, .none }, + .{ .sal, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, + .{ .sal, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long, .none }, + + .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .none, .none }, + .{ .sar, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 7, .rex, .none }, + .{ .sar, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 7, .short, .none }, + .{ .sar, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 7, .none, .none }, + .{ .sar, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 7, .long, .none }, + .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .none, .none }, + .{ .sar, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 7, .rex, .none }, + .{ .sar, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 7, .short, .none }, + .{ .sar, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 7, .none, .none }, + .{ .sar, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 7, .long, .none }, + .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .none, .none }, + .{ .sar, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 7, .rex, .none }, + .{ .sar, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 7, .short, .none }, + .{ .sar, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 7, .none, .none }, + .{ .sar, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 7, .long, .none }, + + .{ .sbb, .zi, &.{ .al, .imm8 }, &.{ 0x1c }, 0, .none, .none }, + .{ .sbb, .zi, &.{ .ax, .imm16 }, &.{ 0x1d }, 0, .short, .none }, + .{ .sbb, .zi, &.{ .eax, .imm32 }, &.{ 0x1d }, 0, .none, .none }, + .{ .sbb, .zi, &.{ .rax, .imm32s }, &.{ 0x1d }, 0, .long, .none }, + .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 3, .rex, .none }, + .{ .sbb, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 3, .short, .none }, + .{ .sbb, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 3, .long, .none }, + .{ .sbb, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 3, .short, .none }, + .{ .sbb, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 3, .none, .none }, + .{ .sbb, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 3, .long, .none }, + .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .none, .none }, + .{ .sbb, .mr, &.{ .rm8, .r8 }, &.{ 0x18 }, 0, .rex, .none }, + .{ .sbb, .mr, &.{ .rm16, .r16 }, &.{ 0x19 }, 0, .short, .none }, + .{ .sbb, .mr, &.{ .rm32, .r32 }, &.{ 0x19 }, 0, .none, .none }, + .{ .sbb, .mr, &.{ .rm64, .r64 }, &.{ 0x19 }, 0, .long, .none }, + .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .none, .none }, + .{ .sbb, .rm, &.{ .r8, .rm8 }, &.{ 0x1a }, 0, .rex, .none }, + .{ .sbb, .rm, &.{ .r16, .rm16 }, &.{ 0x1b }, 0, .short, .none }, + .{ .sbb, .rm, &.{ .r32, .rm32 }, &.{ 0x1b }, 0, .none, .none }, + .{ .sbb, .rm, &.{ .r64, .rm64 }, &.{ 0x1b }, 0, .long, .none }, + + .{ .scas, .np, &.{ .m8 }, &.{ 0xae }, 0, .none, .none }, + .{ .scas, .np, &.{ .m16 }, &.{ 0xaf }, 0, .short, .none }, + .{ .scas, .np, &.{ .m32 }, &.{ 0xaf }, 0, .none, .none }, + .{ .scas, .np, &.{ .m64 }, &.{ 0xaf }, 0, .long, .none }, + + .{ .scasb, .np, &.{}, &.{ 0xae }, 0, .none, .none }, + .{ .scasw, .np, &.{}, &.{ 0xaf }, 0, .short, .none }, + .{ .scasd, .np, &.{}, &.{ 0xaf }, 0, .none, .none }, + .{ .scasq, .np, &.{}, &.{ 0xaf }, 0, .long, .none }, + + .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none, .none }, + .{ .seta, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex, .none }, + .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none, .none }, + .{ .setae, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex, .none }, + .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none, .none }, + .{ .setb, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex, .none }, + .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none, .none }, + .{ .setbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex, .none }, + .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none, .none }, + .{ .setc, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex, .none }, + .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none, .none }, + .{ .sete, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex, .none }, + .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none, .none }, + .{ .setg, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex, .none }, + .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none, .none }, + .{ .setge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex, .none }, + .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none, .none }, + .{ .setl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex, .none }, + .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none, .none }, + .{ .setle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex, .none }, + .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .none, .none }, + .{ .setna, .m, &.{ .rm8 }, &.{ 0x0f, 0x96 }, 0, .rex, .none }, + .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .none, .none }, + .{ .setnae, .m, &.{ .rm8 }, &.{ 0x0f, 0x92 }, 0, .rex, .none }, + .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none, .none }, + .{ .setnb, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex, .none }, + .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .none, .none }, + .{ .setnbe, .m, &.{ .rm8 }, &.{ 0x0f, 0x97 }, 0, .rex, .none }, + .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .none, .none }, + .{ .setnc, .m, &.{ .rm8 }, &.{ 0x0f, 0x93 }, 0, .rex, .none }, + .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none, .none }, + .{ .setne, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex, .none }, + .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .none, .none }, + .{ .setng, .m, &.{ .rm8 }, &.{ 0x0f, 0x9e }, 0, .rex, .none }, + .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .none, .none }, + .{ .setnge, .m, &.{ .rm8 }, &.{ 0x0f, 0x9c }, 0, .rex, .none }, + .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .none, .none }, + .{ .setnl, .m, &.{ .rm8 }, &.{ 0x0f, 0x9d }, 0, .rex, .none }, + .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .none, .none }, + .{ .setnle, .m, &.{ .rm8 }, &.{ 0x0f, 0x9f }, 0, .rex, .none }, + .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .none, .none }, + .{ .setno, .m, &.{ .rm8 }, &.{ 0x0f, 0x91 }, 0, .rex, .none }, + .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none, .none }, + .{ .setnp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex, .none }, + .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .none, .none }, + .{ .setns, .m, &.{ .rm8 }, &.{ 0x0f, 0x99 }, 0, .rex, .none }, + .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .none, .none }, + .{ .setnz, .m, &.{ .rm8 }, &.{ 0x0f, 0x95 }, 0, .rex, .none }, + .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .none, .none }, + .{ .seto, .m, &.{ .rm8 }, &.{ 0x0f, 0x90 }, 0, .rex, .none }, + .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none, .none }, + .{ .setp, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex, .none }, + .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .none, .none }, + .{ .setpe, .m, &.{ .rm8 }, &.{ 0x0f, 0x9a }, 0, .rex, .none }, + .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .none, .none }, + .{ .setpo, .m, &.{ .rm8 }, &.{ 0x0f, 0x9b }, 0, .rex, .none }, + .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .none, .none }, + .{ .sets, .m, &.{ .rm8 }, &.{ 0x0f, 0x98 }, 0, .rex, .none }, + .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .none, .none }, + .{ .setz, .m, &.{ .rm8 }, &.{ 0x0f, 0x94 }, 0, .rex, .none }, + + .{ .sfence, .np, &.{}, &.{ 0x0f, 0xae, 0xf8 }, 0, .none, .none }, + + .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .none, .none }, + .{ .shl, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 4, .rex, .none }, + .{ .shl, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 4, .short, .none }, + .{ .shl, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 4, .none, .none }, + .{ .shl, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 4, .long, .none }, + .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .none, .none }, + .{ .shl, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 4, .rex, .none }, + .{ .shl, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 4, .short, .none }, + .{ .shl, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 4, .none, .none }, + .{ .shl, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 4, .long, .none }, + .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .none, .none }, + .{ .shl, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 4, .rex, .none }, + .{ .shl, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 4, .short, .none }, + .{ .shl, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 4, .none, .none }, + .{ .shl, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 4, .long, .none }, + + .{ .shld, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .short, .none }, + .{ .shld, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xa5 }, 0, .short, .none }, + .{ .shld, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .none, .none }, + .{ .shld, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xa4 }, 0, .long, .none }, + .{ .shld, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xa5 }, 0, .none, .none }, + .{ .shld, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xa5 }, 0, .long, .none }, + + .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .none, .none }, + .{ .shr, .m1, &.{ .rm8, .unity }, &.{ 0xd0 }, 5, .rex, .none }, + .{ .shr, .m1, &.{ .rm16, .unity }, &.{ 0xd1 }, 5, .short, .none }, + .{ .shr, .m1, &.{ .rm32, .unity }, &.{ 0xd1 }, 5, .none, .none }, + .{ .shr, .m1, &.{ .rm64, .unity }, &.{ 0xd1 }, 5, .long, .none }, + .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .none, .none }, + .{ .shr, .mc, &.{ .rm8, .cl }, &.{ 0xd2 }, 5, .rex, .none }, + .{ .shr, .mc, &.{ .rm16, .cl }, &.{ 0xd3 }, 5, .short, .none }, + .{ .shr, .mc, &.{ .rm32, .cl }, &.{ 0xd3 }, 5, .none, .none }, + .{ .shr, .mc, &.{ .rm64, .cl }, &.{ 0xd3 }, 5, .long, .none }, + .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .none, .none }, + .{ .shr, .mi, &.{ .rm8, .imm8 }, &.{ 0xc0 }, 5, .rex, .none }, + .{ .shr, .mi, &.{ .rm16, .imm8 }, &.{ 0xc1 }, 5, .short, .none }, + .{ .shr, .mi, &.{ .rm32, .imm8 }, &.{ 0xc1 }, 5, .none, .none }, + .{ .shr, .mi, &.{ .rm64, .imm8 }, &.{ 0xc1 }, 5, .long, .none }, + + .{ .shrd, .mri, &.{ .rm16, .r16, .imm8 }, &.{ 0x0f, 0xac }, 0, .short, .none }, + .{ .shrd, .mrc, &.{ .rm16, .r16, .cl }, &.{ 0x0f, 0xad }, 0, .short, .none }, + .{ .shrd, .mri, &.{ .rm32, .r32, .imm8 }, &.{ 0x0f, 0xac }, 0, .none, .none }, + .{ .shrd, .mri, &.{ .rm64, .r64, .imm8 }, &.{ 0x0f, 0xac }, 0, .long, .none }, + .{ .shrd, .mrc, &.{ .rm32, .r32, .cl }, &.{ 0x0f, 0xad }, 0, .none, .none }, + .{ .shrd, .mrc, &.{ .rm64, .r64, .cl }, &.{ 0x0f, 0xad }, 0, .long, .none }, + + .{ .stos, .np, &.{ .m8 }, &.{ 0xaa }, 0, .none, .none }, + .{ .stos, .np, &.{ .m16 }, &.{ 0xab }, 0, .short, .none }, + .{ .stos, .np, &.{ .m32 }, &.{ 0xab }, 0, .none, .none }, + .{ .stos, .np, &.{ .m64 }, &.{ 0xab }, 0, .long, .none }, + + .{ .stosb, .np, &.{}, &.{ 0xaa }, 0, .none, .none }, + .{ .stosw, .np, &.{}, &.{ 0xab }, 0, .short, .none }, + .{ .stosd, .np, &.{}, &.{ 0xab }, 0, .none, .none }, + .{ .stosq, .np, &.{}, &.{ 0xab }, 0, .long, .none }, + + .{ .sub, .zi, &.{ .al, .imm8 }, &.{ 0x2c }, 0, .none, .none }, + .{ .sub, .zi, &.{ .ax, .imm16 }, &.{ 0x2d }, 0, .short, .none }, + .{ .sub, .zi, &.{ .eax, .imm32 }, &.{ 0x2d }, 0, .none, .none }, + .{ .sub, .zi, &.{ .rax, .imm32s }, &.{ 0x2d }, 0, .long, .none }, + .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 5, .rex, .none }, + .{ .sub, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 5, .short, .none }, + .{ .sub, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 5, .long, .none }, + .{ .sub, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 5, .short, .none }, + .{ .sub, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 5, .none, .none }, + .{ .sub, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 5, .long, .none }, + .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .none, .none }, + .{ .sub, .mr, &.{ .rm8, .r8 }, &.{ 0x28 }, 0, .rex, .none }, + .{ .sub, .mr, &.{ .rm16, .r16 }, &.{ 0x29 }, 0, .short, .none }, + .{ .sub, .mr, &.{ .rm32, .r32 }, &.{ 0x29 }, 0, .none, .none }, + .{ .sub, .mr, &.{ .rm64, .r64 }, &.{ 0x29 }, 0, .long, .none }, + .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .none, .none }, + .{ .sub, .rm, &.{ .r8, .rm8 }, &.{ 0x2a }, 0, .rex, .none }, + .{ .sub, .rm, &.{ .r16, .rm16 }, &.{ 0x2b }, 0, .short, .none }, + .{ .sub, .rm, &.{ .r32, .rm32 }, &.{ 0x2b }, 0, .none, .none }, + .{ .sub, .rm, &.{ .r64, .rm64 }, &.{ 0x2b }, 0, .long, .none }, + + .{ .syscall, .np, &.{}, &.{ 0x0f, 0x05 }, 0, .none, .none }, + + .{ .@"test", .zi, &.{ .al, .imm8 }, &.{ 0xa8 }, 0, .none, .none }, + .{ .@"test", .zi, &.{ .ax, .imm16 }, &.{ 0xa9 }, 0, .short, .none }, + .{ .@"test", .zi, &.{ .eax, .imm32 }, &.{ 0xa9 }, 0, .none, .none }, + .{ .@"test", .zi, &.{ .rax, .imm32s }, &.{ 0xa9 }, 0, .long, .none }, + .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .none, .none }, + .{ .@"test", .mi, &.{ .rm8, .imm8 }, &.{ 0xf6 }, 0, .rex, .none }, + .{ .@"test", .mi, &.{ .rm16, .imm16 }, &.{ 0xf7 }, 0, .short, .none }, + .{ .@"test", .mi, &.{ .rm32, .imm32 }, &.{ 0xf7 }, 0, .none, .none }, + .{ .@"test", .mi, &.{ .rm64, .imm32s }, &.{ 0xf7 }, 0, .long, .none }, + .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .none, .none }, + .{ .@"test", .mr, &.{ .rm8, .r8 }, &.{ 0x84 }, 0, .rex, .none }, + .{ .@"test", .mr, &.{ .rm16, .r16 }, &.{ 0x85 }, 0, .short, .none }, + .{ .@"test", .mr, &.{ .rm32, .r32 }, &.{ 0x85 }, 0, .none, .none }, + .{ .@"test", .mr, &.{ .rm64, .r64 }, &.{ 0x85 }, 0, .long, .none }, + + .{ .tzcnt, .rm, &.{ .r16, .rm16 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .short, .bmi }, + .{ .tzcnt, .rm, &.{ .r32, .rm32 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .none, .bmi }, + .{ .tzcnt, .rm, &.{ .r64, .rm64 }, &.{ 0xf3, 0x0f, 0xbc }, 0, .long, .bmi }, + + .{ .ud2, .np, &.{}, &.{ 0x0f, 0x0b }, 0, .none, .none }, + + .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .none, .none }, + .{ .xadd, .mr, &.{ .rm8, .r8 }, &.{ 0x0f, 0xc0 }, 0, .rex, .none }, + .{ .xadd, .mr, &.{ .rm16, .r16 }, &.{ 0x0f, 0xc1 }, 0, .short, .none }, + .{ .xadd, .mr, &.{ .rm32, .r32 }, &.{ 0x0f, 0xc1 }, 0, .none, .none }, + .{ .xadd, .mr, &.{ .rm64, .r64 }, &.{ 0x0f, 0xc1 }, 0, .long, .none }, + + .{ .xchg, .o, &.{ .ax, .r16 }, &.{ 0x90 }, 0, .short, .none }, + .{ .xchg, .o, &.{ .r16, .ax }, &.{ 0x90 }, 0, .short, .none }, + .{ .xchg, .o, &.{ .eax, .r32 }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .o, &.{ .rax, .r64 }, &.{ 0x90 }, 0, .long, .none }, + .{ .xchg, .o, &.{ .r32, .eax }, &.{ 0x90 }, 0, .none, .none }, + .{ .xchg, .o, &.{ .r64, .rax }, &.{ 0x90 }, 0, .long, .none }, + .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .none, .none }, + .{ .xchg, .mr, &.{ .rm8, .r8 }, &.{ 0x86 }, 0, .rex, .none }, + .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .none, .none }, + .{ .xchg, .rm, &.{ .r8, .rm8 }, &.{ 0x86 }, 0, .rex, .none }, + .{ .xchg, .mr, &.{ .rm16, .r16 }, &.{ 0x87 }, 0, .short, .none }, + .{ .xchg, .rm, &.{ .r16, .rm16 }, &.{ 0x87 }, 0, .short, .none }, + .{ .xchg, .mr, &.{ .rm32, .r32 }, &.{ 0x87 }, 0, .none, .none }, + .{ .xchg, .mr, &.{ .rm64, .r64 }, &.{ 0x87 }, 0, .long, .none }, + .{ .xchg, .rm, &.{ .r32, .rm32 }, &.{ 0x87 }, 0, .none, .none }, + .{ .xchg, .rm, &.{ .r64, .rm64 }, &.{ 0x87 }, 0, .long, .none }, + + .{ .xor, .zi, &.{ .al, .imm8 }, &.{ 0x34 }, 0, .none, .none }, + .{ .xor, .zi, &.{ .ax, .imm16 }, &.{ 0x35 }, 0, .short, .none }, + .{ .xor, .zi, &.{ .eax, .imm32 }, &.{ 0x35 }, 0, .none, .none }, + .{ .xor, .zi, &.{ .rax, .imm32s }, &.{ 0x35 }, 0, .long, .none }, + .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm8, .imm8 }, &.{ 0x80 }, 6, .rex, .none }, + .{ .xor, .mi, &.{ .rm16, .imm16 }, &.{ 0x81 }, 6, .short, .none }, + .{ .xor, .mi, &.{ .rm32, .imm32 }, &.{ 0x81 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm64, .imm32s }, &.{ 0x81 }, 6, .long, .none }, + .{ .xor, .mi, &.{ .rm16, .imm8s }, &.{ 0x83 }, 6, .short, .none }, + .{ .xor, .mi, &.{ .rm32, .imm8s }, &.{ 0x83 }, 6, .none, .none }, + .{ .xor, .mi, &.{ .rm64, .imm8s }, &.{ 0x83 }, 6, .long, .none }, + .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .none, .none }, + .{ .xor, .mr, &.{ .rm8, .r8 }, &.{ 0x30 }, 0, .rex, .none }, + .{ .xor, .mr, &.{ .rm16, .r16 }, &.{ 0x31 }, 0, .short, .none }, + .{ .xor, .mr, &.{ .rm32, .r32 }, &.{ 0x31 }, 0, .none, .none }, + .{ .xor, .mr, &.{ .rm64, .r64 }, &.{ 0x31 }, 0, .long, .none }, + .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .none, .none }, + .{ .xor, .rm, &.{ .r8, .rm8 }, &.{ 0x32 }, 0, .rex, .none }, + .{ .xor, .rm, &.{ .r16, .rm16 }, &.{ 0x33 }, 0, .short, .none }, + .{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none, .none }, + .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none }, + + // X87 + .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 }, + .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 }, + .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 }, + + .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 }, + .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 }, + .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 }, // SSE - .{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .sse }, + .{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse }, - .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .sse }, + .{ .addss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .none, .sse }, - .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .sse }, + .{ .andnps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .none, .sse }, - .{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .sse }, + .{ .andps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .none, .sse }, - .{ .minss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .sse }, + .{ .cmpps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .none, .sse }, - .{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .sse }, - .{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .sse }, + .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse }, - .{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .sse }, + .{ .cvtpi2ps, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x0f, 0x2a }, 0, .none, .sse }, - .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .sse }, + .{ .cvtps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2d }, 0, .none, .sse }, - .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .sse }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .none, .sse }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .long, .sse }, + + .{ .cvtss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .none, .sse }, + .{ .cvtss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .long, .sse }, + + .{ .cvttps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2c }, 0, .none, .sse }, + + .{ .cvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .none, .sse }, + .{ .cvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .long, .sse }, + + .{ .divps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .none, .sse }, + + .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse }, + + .{ .maxps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .none, .sse }, + + .{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .none, .sse }, + + .{ .minps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5d }, 0, .none, .sse }, + + .{ .minss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .none, .sse }, + + .{ .movaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .none, .sse }, + .{ .movaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .none, .sse }, + + .{ .movhlps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .none, .sse }, + + .{ .movlhps, .rm, &.{ .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .none, .sse }, + + .{ .movss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .none, .sse }, + .{ .movss, .mr, &.{ .xmm_m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .none, .sse }, + + .{ .movups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .none, .sse }, + .{ .movups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .none, .sse }, + + .{ .mulps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x59 }, 0, .none, .sse }, + + .{ .mulss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .none, .sse }, + + .{ .orps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .none, .sse }, + + .{ .shufps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .none, .sse }, + + .{ .sqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .none, .sse }, + + .{ .sqrtss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .none, .sse }, + + .{ .subps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .none, .sse }, + + .{ .subss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .none, .sse }, + + .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .none, .sse }, + + .{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .none, .sse }, // SSE2 - .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .sse2 }, + .{ .addpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .none, .sse2 }, + + .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .none, .sse2 }, + + .{ .andnpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .none, .sse2 }, + + .{ .andpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .none, .sse2 }, + + .{ .cmppd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .none, .sse2 }, + + .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 }, + + .{ .cvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .none, .sse2 }, + + .{ .cvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .none, .sse2 }, + + .{ .cvtpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .none, .sse2 }, + + .{ .cvtpd2pi, .rm, &.{ .mm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x2d }, 0, .none, .sse2 }, + + .{ .cvtpd2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5a }, 0, .none, .sse2 }, + + .{ .cvtpi2pd, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x66, 0x0f, 0x2a }, 0, .none, .sse2 }, + + .{ .cvtps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5b }, 0, .none, .sse2 }, + + .{ .cvtps2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x0f, 0x5a }, 0, .none, .sse2 }, + + .{ .cvtsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .none, .sse2 }, + .{ .cvtsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .long, .sse2 }, + + .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .none, .sse2 }, + + .{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .none, .sse2 }, + .{ .cvtsi2sd, .rm, &.{ .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .long, .sse2 }, + + .{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .none, .sse2 }, + + .{ .cvttpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .none, .sse2 }, + + .{ .cvttpd2pi, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x2c }, 0, .none, .sse2 }, + + .{ .cvttps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .none, .sse2 }, + + .{ .cvttsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .none, .sse2 }, + .{ .cvttsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .long, .sse2 }, + + .{ .divpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .none, .sse2 }, + + .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 }, + + .{ .maxpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .none, .sse2 }, + + .{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .none, .sse2 }, + + .{ .minpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .none, .sse2 }, + + .{ .minsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .none, .sse2 }, + + .{ .movapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .none, .sse2 }, + .{ .movapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .none, .sse2 }, + + .{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .none, .sse2 }, + .{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .long, .sse2 }, + .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .none, .sse2 }, + .{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .long, .sse2 }, + + .{ .movdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .none, .sse2 }, + .{ .movdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .none, .sse2 }, + + .{ .movdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .none, .sse2 }, + .{ .movdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .none, .sse2 }, + + .{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .none, .sse2 }, + .{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .none, .sse2 }, + + .{ .movupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .none, .sse2 }, + .{ .movupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .none, .sse2 }, + + .{ .mulpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x59 }, 0, .none, .sse2 }, + + .{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .none, .sse2 }, + + .{ .orpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .none, .sse2 }, + + .{ .packsswb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x63 }, 0, .none, .sse2 }, + .{ .packssdw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6b }, 0, .none, .sse2 }, + + .{ .packuswb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x67 }, 0, .none, .sse2 }, + + .{ .paddb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfc }, 0, .none, .sse2 }, + .{ .paddw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfd }, 0, .none, .sse2 }, + .{ .paddd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfe }, 0, .none, .sse2 }, + .{ .paddq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .none, .sse2 }, + + .{ .paddsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xec }, 0, .none, .sse2 }, + .{ .paddsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xed }, 0, .none, .sse2 }, - .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .sse2 }, + .{ .paddusb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdc }, 0, .none, .sse2 }, + .{ .paddusw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdd }, 0, .none, .sse2 }, - .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .sse2 }, + .{ .pand, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .none, .sse2 }, - .{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .sse2 }, + .{ .pandn, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .none, .sse2 }, - .{ .minsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .sse2 }, + .{ .pextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0xc5 }, 0, .none, .sse2 }, - .{ .movd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .sse2 }, - .{ .movd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .sse2 }, + .{ .pinsrw, .rmi, &.{ .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .none, .sse2 }, - .{ .movq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .sse2_long }, - .{ .movq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .sse2_long }, + .{ .pmaxsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .none, .sse2 }, - .{ .movq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .sse2 }, - .{ .movq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .sse2 }, + .{ .pmaxub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .none, .sse2 }, - .{ .mulsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .sse2 }, + .{ .pminsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .none, .sse2 }, - .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .sse2 }, + .{ .pminub, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .none, .sse2 }, - .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .sse2 }, - .{ .movsd, .mr, &.{ .xmm_m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .sse2 }, + .{ .pmulhw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .none, .sse2 }, - .{ .ucomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2e }, 0, .sse2 }, + .{ .pmullw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .none, .sse2 }, + + .{ .por, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .none, .sse2 }, + + .{ .pshufhw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf3, 0x0f, 0x70 }, 0, .none, .sse2 }, + + .{ .pshuflw, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0xf2, 0x0f, 0x70 }, 0, .none, .sse2 }, + + .{ .psrlw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .none, .sse2 }, + .{ .psrlw, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .none, .sse2 }, + .{ .psrld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .none, .sse2 }, + .{ .psrld, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .none, .sse2 }, + .{ .psrlq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .none, .sse2 }, + .{ .psrlq, .mi, &.{ .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .none, .sse2 }, + + .{ .psubb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .none, .sse2 }, + .{ .psubw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .none, .sse2 }, + .{ .psubd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfa }, 0, .none, .sse2 }, + + .{ .psubsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .none, .sse2 }, + .{ .psubsw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .none, .sse2 }, + + .{ .psubq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfb }, 0, .none, .sse2 }, + + .{ .psubusb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .none, .sse2 }, + .{ .psubusw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .none, .sse2 }, + + .{ .punpckhbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .none, .sse2 }, + .{ .punpckhwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .none, .sse2 }, + .{ .punpckhdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .none, .sse2 }, + .{ .punpckhqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .none, .sse2 }, + + .{ .punpcklbw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .none, .sse2 }, + .{ .punpcklwd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .none, .sse2 }, + .{ .punpckldq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .none, .sse2 }, + .{ .punpcklqdq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .none, .sse2 }, + + .{ .pxor, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .none, .sse2 }, + + .{ .shufpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .none, .sse2 }, + + .{ .sqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .none, .sse2 }, + + .{ .sqrtsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .none, .sse2 }, + + .{ .subpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5c }, 0, .none, .sse2 }, + + .{ .subsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .none, .sse2 }, + + .{ .movsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .none, .sse2 }, + .{ .movsd, .mr, &.{ .xmm_m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .none, .sse2 }, + + .{ .ucomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2e }, 0, .none, .sse2 }, + + .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .none, .sse2 }, + + // SSE3 + .{ .movddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .none, .sse3 }, + + .{ .movshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .none, .sse3 }, + + .{ .movsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .none, .sse3 }, // SSE4.1 - .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .sse4_1 }, - .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .sse4_1 }, + .{ .blendpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .none, .sse4_1 }, + + .{ .blendps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .none, .sse4_1 }, + + .{ .blendvpd, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x15 }, 0, .none, .sse4_1 }, + + .{ .blendvps, .rm0, &.{ .xmm, .xmm_m128, .xmm0 }, &.{ 0x66, 0x0f, 0x38, 0x14 }, 0, .none, .sse4_1 }, + + .{ .extractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .none, .sse4_1 }, + + .{ .insertps, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .none, .sse4_1 }, + + .{ .packusdw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x2b }, 0, .none, .sse4_1 }, + + .{ .pextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .none, .sse4_1 }, + .{ .pextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .none, .sse4_1 }, + .{ .pextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .long, .sse4_1 }, + + .{ .pextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .none, .sse4_1 }, + + .{ .pinsrb, .rmi, &.{ .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .none, .sse4_1 }, + .{ .pinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .none, .sse4_1 }, + .{ .pinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .long, .sse4_1 }, + + .{ .pmaxsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .none, .sse4_1 }, + .{ .pmaxsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .none, .sse4_1 }, + + .{ .pmaxuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .none, .sse4_1 }, + + .{ .pmaxud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .none, .sse4_1 }, + + .{ .pminsb, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .none, .sse4_1 }, + .{ .pminsd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .none, .sse4_1 }, + + .{ .pminuw, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .none, .sse4_1 }, + + .{ .pminud, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .none, .sse4_1 }, + + .{ .pmulld, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .none, .sse4_1 }, + + .{ .roundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .none, .sse4_1 }, + + .{ .roundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .none, .sse4_1 }, + + .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .none, .sse4_1 }, + + .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .none, .sse4_1 }, + + // AVX + .{ .vaddpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_128_wig, .avx }, + .{ .vaddpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x58 }, 0, .vex_256_wig, .avx }, + + .{ .vaddps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .vex_128_wig, .avx }, + .{ .vaddps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x58 }, 0, .vex_256_wig, .avx }, + + .{ .vaddsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx }, + + .{ .vaddss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x58 }, 0, .vex_lig_wig, .avx }, + + .{ .vandnpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_128_wig, .avx }, + .{ .vandnpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x55 }, 0, .vex_256_wig, .avx }, + + .{ .vandnps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x55 }, 0, .vex_128_wig, .avx }, + .{ .vandnps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x55 }, 0, .vex_256_wig, .avx }, + + .{ .vandpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_128_wig, .avx }, + .{ .vandpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x54 }, 0, .vex_256_wig, .avx }, + + .{ .vandps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x54 }, 0, .vex_128_wig, .avx }, + .{ .vandps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x54 }, 0, .vex_256_wig, .avx }, + + .{ .vblendpd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .vex_128_wig, .avx }, + .{ .vblendpd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0d }, 0, .vex_256_wig, .avx }, + + .{ .vblendps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .vex_128_wig, .avx }, + .{ .vblendps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0c }, 0, .vex_256_wig, .avx }, + + .{ .vblendvpd, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4b }, 0, .vex_128_w0, .avx }, + .{ .vblendvpd, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4b }, 0, .vex_256_w0, .avx }, + + .{ .vblendvps, .rvmr, &.{ .xmm, .xmm, .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x3a, 0x4a }, 0, .vex_128_w0, .avx }, + .{ .vblendvps, .rvmr, &.{ .ymm, .ymm, .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x3a, 0x4a }, 0, .vex_256_w0, .avx }, + + .{ .vbroadcastss, .rm, &.{ .xmm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx }, + .{ .vbroadcastss, .rm, &.{ .ymm, .m32 }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx }, + .{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx }, + .{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx }, + + .{ .vcmppd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .vex_128_wig, .avx }, + .{ .vcmppd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0xc2 }, 0, .vex_256_wig, .avx }, + + .{ .vcmpps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .vex_128_wig, .avx }, + .{ .vcmpps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x0f, 0xc2 }, 0, .vex_256_wig, .avx }, + + .{ .vcmpsd, .rvmi, &.{ .xmm, .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx }, + + .{ .vcmpss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .vex_lig_wig, .avx }, + + .{ .vcvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx }, + .{ .vcvtdq2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx }, + + .{ .vcvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .vex_128_wig, .avx }, + .{ .vcvtdq2ps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x5b }, 0, .vex_256_wig, .avx }, + + .{ .vcvtpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx }, + .{ .vcvtpd2dq, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx }, + + .{ .vcvtpd2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5a }, 0, .vex_128_wig, .avx }, + .{ .vcvtpd2ps, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5a }, 0, .vex_256_wig, .avx }, + + .{ .vcvtps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5b }, 0, .vex_128_wig, .avx }, + .{ .vcvtps2dq, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5b }, 0, .vex_256_wig, .avx }, + + .{ .vcvtps2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x0f, 0x5a }, 0, .vex_128_wig, .avx }, + .{ .vcvtps2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x0f, 0x5a }, 0, .vex_256_wig, .avx }, + + .{ .vcvtsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .vex_lig_w0, .sse2 }, + .{ .vcvtsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .vex_lig_w1, .sse2 }, + + .{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, + + .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, + .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, + + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, + + .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, + + .{ .vcvtss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .vex_lig_w0, .avx }, + .{ .vcvtss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .vex_lig_w1, .avx }, + + .{ .vcvttpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx }, + .{ .vcvttpd2dq, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx }, + + .{ .vcvttps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .vex_128_wig, .avx }, + .{ .vcvttps2dq, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .vex_256_wig, .avx }, + + .{ .vcvttsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .vex_lig_w0, .sse2 }, + .{ .vcvttsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .vex_lig_w1, .sse2 }, + + .{ .vcvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w0, .avx }, + .{ .vcvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w1, .avx }, + + .{ .vdivpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_128_wig, .avx }, + .{ .vdivpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_256_wig, .avx }, + + .{ .vdivps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .vex_128_wig, .avx }, + .{ .vdivps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5e }, 0, .vex_256_wig, .avx }, + + .{ .vdivsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx }, + + .{ .vdivss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .vex_lig_wig, .avx }, + + .{ .vextractf128, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x19 }, 0, .vex_256_w0, .avx }, + + .{ .vextractps, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x17 }, 0, .vex_128_wig, .avx }, + + .{ .vinsertf128, .rvmi, &.{ .ymm, .ymm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x18 }, 0, .vex_256_w0, .avx }, + + .{ .vinsertps, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x21 }, 0, .vex_128_wig, .avx }, + + .{ .vmaxpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_128_wig, .avx }, + .{ .vmaxpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5f }, 0, .vex_256_wig, .avx }, + + .{ .vmaxps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5f }, 0, .vex_128_wig, .avx }, + .{ .vmaxps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5f }, 0, .vex_256_wig, .avx }, + + .{ .vmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx }, + + .{ .vmaxss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .vex_lig_wig, .avx }, + + .{ .vminpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_128_wig, .avx }, + .{ .vminpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5d }, 0, .vex_256_wig, .avx }, + + .{ .vminps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5d }, 0, .vex_128_wig, .avx }, + .{ .vminps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5d }, 0, .vex_256_wig, .avx }, + + .{ .vminsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5d }, 0, .vex_lig_wig, .avx }, + + .{ .vminss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5d }, 0, .vex_lig_wig, .avx }, + + .{ .vmovapd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_128_wig, .avx }, + .{ .vmovapd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_128_wig, .avx }, + .{ .vmovapd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x28 }, 0, .vex_256_wig, .avx }, + .{ .vmovapd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x29 }, 0, .vex_256_wig, .avx }, + + .{ .vmovaps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x28 }, 0, .vex_128_wig, .avx }, + .{ .vmovaps, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x29 }, 0, .vex_128_wig, .avx }, + .{ .vmovaps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x28 }, 0, .vex_256_wig, .avx }, + .{ .vmovaps, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x29 }, 0, .vex_256_wig, .avx }, + + .{ .vmovd, .rm, &.{ .xmm, .rm32 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w0, .avx }, + .{ .vmovq, .rm, &.{ .xmm, .rm64 }, &.{ 0x66, 0x0f, 0x6e }, 0, .vex_128_w1, .avx }, + .{ .vmovd, .mr, &.{ .rm32, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w0, .avx }, + .{ .vmovq, .mr, &.{ .rm64, .xmm }, &.{ 0x66, 0x0f, 0x7e }, 0, .vex_128_w1, .avx }, + + .{ .vmovddup, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovddup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0x12 }, 0, .vex_256_wig, .avx }, + + .{ .vmovdqa, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqa, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqa, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6f }, 0, .vex_256_wig, .avx }, + .{ .vmovdqa, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x7f }, 0, .vex_256_wig, .avx }, + + .{ .vmovdqu, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqu, .mr, &.{ .xmm_m128, .xmm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_128_wig, .avx }, + .{ .vmovdqu, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x6f }, 0, .vex_256_wig, .avx }, + .{ .vmovdqu, .mr, &.{ .ymm_m256, .ymm }, &.{ 0xf3, 0x0f, 0x7f }, 0, .vex_256_wig, .avx }, + + .{ .vmovhlps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + + .{ .vmovlhps, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + + .{ .vmovq, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0x7e }, 0, .vex_128_wig, .avx }, + .{ .vmovq, .mr, &.{ .xmm_m64, .xmm }, &.{ 0x66, 0x0f, 0xd6 }, 0, .vex_128_wig, .avx }, + + .{ .vmovsd, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .rm, &.{ .xmm, .m64 }, &.{ 0xf2, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + .{ .vmovsd, .mr, &.{ .m64, .xmm }, &.{ 0xf2, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + + .{ .vmovshdup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_128_wig, .avx }, + .{ .vmovshdup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x16 }, 0, .vex_256_wig, .avx }, + + .{ .vmovsldup, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_128_wig, .avx }, + .{ .vmovsldup, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x12 }, 0, .vex_256_wig, .avx }, + + .{ .vmovss, .rvm, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .rm, &.{ .xmm, .m32 }, &.{ 0xf3, 0x0f, 0x10 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .mvr, &.{ .xmm, .xmm, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + .{ .vmovss, .mr, &.{ .m32, .xmm }, &.{ 0xf3, 0x0f, 0x11 }, 0, .vex_lig_wig, .avx }, + + .{ .vmovupd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_128_wig, .avx }, + .{ .vmovupd, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_128_wig, .avx }, + .{ .vmovupd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x10 }, 0, .vex_256_wig, .avx }, + .{ .vmovupd, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x66, 0x0f, 0x11 }, 0, .vex_256_wig, .avx }, + + .{ .vmovups, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x10 }, 0, .vex_128_wig, .avx }, + .{ .vmovups, .mr, &.{ .xmm_m128, .xmm }, &.{ 0x0f, 0x11 }, 0, .vex_128_wig, .avx }, + .{ .vmovups, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x10 }, 0, .vex_256_wig, .avx }, + .{ .vmovups, .mr, &.{ .ymm_m256, .ymm }, &.{ 0x0f, 0x11 }, 0, .vex_256_wig, .avx }, + + .{ .vmulpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x59 }, 0, .vex_128_wig, .avx }, + .{ .vmulpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x59 }, 0, .vex_256_wig, .avx }, + + .{ .vmulps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x59 }, 0, .vex_128_wig, .avx }, + .{ .vmulps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x59 }, 0, .vex_256_wig, .avx }, + + .{ .vmulsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx }, + + .{ .vmulss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x59 }, 0, .vex_lig_wig, .avx }, + + .{ .vorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_128_wig, .avx }, + .{ .vorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x56 }, 0, .vex_256_wig, .avx }, + + .{ .vorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x56 }, 0, .vex_128_wig, .avx }, + .{ .vorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x56 }, 0, .vex_256_wig, .avx }, + + .{ .vpacksswb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_128_wig, .avx }, + .{ .vpackssdw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_128_wig, .avx }, + + .{ .vpackusdw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x2b }, 0, .vex_128_wig, .avx }, + + .{ .vpackuswb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x67 }, 0, .vex_128_wig, .avx }, + + .{ .vpaddb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfc }, 0, .vex_128_wig, .avx }, + .{ .vpaddw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfd }, 0, .vex_128_wig, .avx }, + .{ .vpaddd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfe }, 0, .vex_128_wig, .avx }, + .{ .vpaddq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .vex_128_wig, .avx }, + + .{ .vpaddsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xec }, 0, .vex_128_wig, .avx }, + .{ .vpaddsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xed }, 0, .vex_128_wig, .avx }, + + .{ .vpaddusb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdc }, 0, .vex_128_wig, .avx }, + .{ .vpaddusw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdd }, 0, .vex_128_wig, .avx }, + + .{ .vpand, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_128_wig, .avx }, + + .{ .vpandn, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_128_wig, .avx }, + + .{ .vpextrb, .mri, &.{ .r32_m8, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x14 }, 0, .vex_128_w0, .avx }, + .{ .vpextrd, .mri, &.{ .rm32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w0, .avx }, + .{ .vpextrq, .mri, &.{ .rm64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x16 }, 0, .vex_128_w1, .avx }, + + .{ .vpextrw, .rmi, &.{ .r32, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x15 }, 0, .vex_128_wig, .avx }, + .{ .vpextrw, .mri, &.{ .r32_m16, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x15 }, 0, .vex_128_wig, .avx }, + + .{ .vpinsrb, .rmi, &.{ .xmm, .r32_m8, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x20 }, 0, .vex_128_w0, .avx }, + .{ .vpinsrd, .rmi, &.{ .xmm, .rm32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w0, .avx }, + .{ .vpinsrq, .rmi, &.{ .xmm, .rm64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x22 }, 0, .vex_128_w1, .avx }, + + .{ .vpinsrw, .rvmi, &.{ .xmm, .xmm, .r32_m16, .imm8 }, &.{ 0x66, 0x0f, 0xc4 }, 0, .vex_128_wig, .avx }, + + .{ .vpmaxsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_128_wig, .avx }, + .{ .vpmaxsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_128_wig, .avx }, + .{ .vpmaxsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_128_wig, .avx }, + + .{ .vpmaxub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_128_wig, .avx }, + .{ .vpmaxuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_128_wig, .avx }, + + .{ .vpmaxud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_128_wig, .avx }, + + .{ .vpminsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_128_wig, .avx }, + .{ .vpminsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_128_wig, .avx }, + .{ .vpminsd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_128_wig, .avx }, + + .{ .vpminub, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_128_wig, .avx }, + .{ .vpminuw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_128_wig, .avx }, + + .{ .vpminud, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_128_wig, .avx }, + + .{ .vpmulhw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_128_wig, .avx }, + + .{ .vpmulld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_128_wig, .avx }, + + .{ .vpmullw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_128_wig, .avx }, + + .{ .vpor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_128_wig, .avx }, + + .{ .vpsrlw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_128_wig, .avx }, + .{ .vpsrlw, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_128_wig, .avx }, + .{ .vpsrld, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_128_wig, .avx }, + .{ .vpsrld, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_128_wig, .avx }, + .{ .vpsrlq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_128_wig, .avx }, + .{ .vpsrlq, .vmi, &.{ .xmm, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_128_wig, .avx }, + + .{ .vpsubb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .vex_128_wig, .avx }, + .{ .vpsubw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .vex_128_wig, .avx }, + .{ .vpsubd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfa }, 0, .vex_128_wig, .avx }, + + .{ .vpsubsb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .vex_128_wig, .avx }, + .{ .vpsubsw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .vex_128_wig, .avx }, + + .{ .vpsubq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xfb }, 0, .vex_128_wig, .avx }, + + .{ .vpsubusb, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .vex_128_wig, .avx }, + .{ .vpsubusw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .vex_128_wig, .avx }, + + .{ .vpunpckhbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_128_wig, .avx }, + .{ .vpunpckhqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_128_wig, .avx }, + + .{ .vpunpcklbw, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_128_wig, .avx }, + .{ .vpunpcklwd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_128_wig, .avx }, + .{ .vpunpckldq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_128_wig, .avx }, + .{ .vpunpcklqdq, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_128_wig, .avx }, + + .{ .vpxor, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_128_wig, .avx }, + + .{ .vroundpd, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_128_wig, .avx }, + .{ .vroundpd, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x09 }, 0, .vex_256_wig, .avx }, + + .{ .vroundps, .rmi, &.{ .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .vex_128_wig, .avx }, + .{ .vroundps, .rmi, &.{ .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x08 }, 0, .vex_256_wig, .avx }, + + .{ .vroundsd, .rvmi, &.{ .xmm, .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .vex_lig_wig, .avx }, + + .{ .vroundss, .rvmi, &.{ .xmm, .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .vex_lig_wig, .avx }, + + .{ .vshufpd, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .vex_128_wig, .avx }, + .{ .vshufpd, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x66, 0x0f, 0xc6 }, 0, .vex_256_wig, .avx }, + + .{ .vshufps, .rvmi, &.{ .xmm, .xmm, .xmm_m128, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .vex_128_wig, .avx }, + .{ .vshufps, .rvmi, &.{ .ymm, .ymm, .ymm_m256, .imm8 }, &.{ 0x0f, 0xc6 }, 0, .vex_256_wig, .avx }, + + .{ .vsqrtpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_128_wig, .avx }, + .{ .vsqrtpd, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x51 }, 0, .vex_256_wig, .avx }, + + .{ .vsqrtps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x51 }, 0, .vex_128_wig, .avx }, + .{ .vsqrtps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x51 }, 0, .vex_256_wig, .avx }, + + .{ .vsqrtsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x51 }, 0, .vex_lig_wig, .avx }, + + .{ .vsqrtss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x51 }, 0, .vex_lig_wig, .avx }, + + .{ .vsubpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_128_wig, .avx }, + .{ .vsubpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5c }, 0, .vex_256_wig, .avx }, + + .{ .vsubps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x5c }, 0, .vex_128_wig, .avx }, + .{ .vsubps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x5c }, 0, .vex_256_wig, .avx }, + + .{ .vsubsd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx }, + + .{ .vsubss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5c }, 0, .vex_lig_wig, .avx }, + + .{ .vxorpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_128_wig, .avx }, + .{ .vxorpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x57 }, 0, .vex_256_wig, .avx }, + + .{ .vxorps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .vex_128_wig, .avx }, + .{ .vxorps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x0f, 0x57 }, 0, .vex_256_wig, .avx }, + + // F16C + .{ .vcvtph2ps, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_128_w0, .f16c }, + .{ .vcvtph2ps, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x13 }, 0, .vex_256_w0, .f16c }, + + .{ .vcvtps2ph, .mri, &.{ .xmm_m64, .xmm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_128_w0, .f16c }, + .{ .vcvtps2ph, .mri, &.{ .xmm_m128, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x1d }, 0, .vex_256_w0, .f16c }, + + // FMA + .{ .vfmadd132pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd213pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd231pd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_w1, .fma }, + .{ .vfmadd132pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_w1, .fma }, + .{ .vfmadd213pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_w1, .fma }, + .{ .vfmadd231pd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_w1, .fma }, + + .{ .vfmadd132ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd213ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd231ps, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_128_w0, .fma }, + .{ .vfmadd132ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x98 }, 0, .vex_256_w0, .fma }, + .{ .vfmadd213ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xa8 }, 0, .vex_256_w0, .fma }, + .{ .vfmadd231ps, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0xb8 }, 0, .vex_256_w0, .fma }, + + .{ .vfmadd132sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_lig_w1, .fma }, + .{ .vfmadd213sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_lig_w1, .fma }, + .{ .vfmadd231sd, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w1, .fma }, + + .{ .vfmadd132ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0x99 }, 0, .vex_lig_w0, .fma }, + .{ .vfmadd213ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xa9 }, 0, .vex_lig_w0, .fma }, + .{ .vfmadd231ss, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0x66, 0x0f, 0x38, 0xb9 }, 0, .vex_lig_w0, .fma }, + + // AVX2 + .{ .vbroadcastss, .rm, &.{ .xmm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_128_w0, .avx2 }, + .{ .vbroadcastss, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x18 }, 0, .vex_256_w0, .avx2 }, + .{ .vbroadcastsd, .rm, &.{ .ymm, .xmm }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx2 }, + + .{ .vpacksswb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x63 }, 0, .vex_256_wig, .avx2 }, + .{ .vpackssdw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6b }, 0, .vex_256_wig, .avx2 }, + + .{ .vpackusdw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x2b }, 0, .vex_256_wig, .avx2 }, + + .{ .vpackuswb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x67 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpaddb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfc }, 0, .vex_256_wig, .avx2 }, + .{ .vpaddw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfd }, 0, .vex_256_wig, .avx2 }, + .{ .vpaddd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfe }, 0, .vex_256_wig, .avx2 }, + .{ .vpaddq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd4 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpaddsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xec }, 0, .vex_256_wig, .avx2 }, + .{ .vpaddsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xed }, 0, .vex_256_wig, .avx2 }, + + .{ .vpaddusb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdc }, 0, .vex_256_wig, .avx2 }, + .{ .vpaddusw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdd }, 0, .vex_256_wig, .avx2 }, + + .{ .vpand, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdb }, 0, .vex_256_wig, .avx2 }, + + .{ .vpandn, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xdf }, 0, .vex_256_wig, .avx2 }, + + .{ .vpmaxsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3c }, 0, .vex_256_wig, .avx }, + .{ .vpmaxsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xee }, 0, .vex_256_wig, .avx }, + .{ .vpmaxsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3d }, 0, .vex_256_wig, .avx }, + + .{ .vpmaxub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xde }, 0, .vex_256_wig, .avx }, + .{ .vpmaxuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3e }, 0, .vex_256_wig, .avx }, + + .{ .vpmaxud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3f }, 0, .vex_256_wig, .avx }, + + .{ .vpminsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x38 }, 0, .vex_256_wig, .avx }, + .{ .vpminsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xea }, 0, .vex_256_wig, .avx }, + .{ .vpminsd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x39 }, 0, .vex_256_wig, .avx }, + + .{ .vpminub, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xda }, 0, .vex_256_wig, .avx }, + .{ .vpminuw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3a }, 0, .vex_256_wig, .avx }, + + .{ .vpminud, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x3b }, 0, .vex_256_wig, .avx }, + + .{ .vpmulhw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe5 }, 0, .vex_256_wig, .avx }, + + .{ .vpmulld, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x38, 0x40 }, 0, .vex_256_wig, .avx }, + + .{ .vpmullw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd5 }, 0, .vex_256_wig, .avx }, + + .{ .vpor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xeb }, 0, .vex_256_wig, .avx2 }, + + .{ .vpsrlw, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd1 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrlw, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x71 }, 2, .vex_256_wig, .avx2 }, + .{ .vpsrld, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd2 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrld, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x72 }, 2, .vex_256_wig, .avx2 }, + .{ .vpsrlq, .rvm, &.{ .ymm, .ymm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xd3 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsrlq, .vmi, &.{ .ymm, .ymm, .imm8 }, &.{ 0x66, 0x0f, 0x73 }, 2, .vex_256_wig, .avx2 }, + + .{ .vpsubb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf8 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsubw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xf9 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsubd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfa }, 0, .vex_256_wig, .avx2 }, + + .{ .vpsubsb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe8 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsubsw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe9 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpsubq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xfb }, 0, .vex_256_wig, .avx2 }, + + .{ .vpsubusb, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd8 }, 0, .vex_256_wig, .avx2 }, + .{ .vpsubusw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xd9 }, 0, .vex_256_wig, .avx2 }, + + .{ .vpunpckhbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x68 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x69 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6a }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckhqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6d }, 0, .vex_256_wig, .avx2 }, + + .{ .vpunpcklbw, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x60 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpcklwd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x61 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpckldq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x62 }, 0, .vex_256_wig, .avx2 }, + .{ .vpunpcklqdq, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x6c }, 0, .vex_256_wig, .avx2 }, + + .{ .vpxor, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xef }, 0, .vex_256_wig, .avx2 }, }; // zig fmt: on |
