diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2022-05-19 17:36:04 +0200 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2022-05-19 19:39:34 +0200 |
| commit | 5cbfd5819e423cdc6b092d1eb687189fb204b075 (patch) | |
| tree | 813b04e2f28f59af65d3d69709def69f6dd7ce08 /src | |
| parent | 283f40e4e9c44986353ba8abcc760684e9adf6cc (diff) | |
| download | zig-5cbfd5819e423cdc6b092d1eb687189fb204b075.tar.gz zig-5cbfd5819e423cdc6b092d1eb687189fb204b075.zip | |
x64: check for floating-point intrinsics in codegen
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 283 | ||||
| -rw-r--r-- | src/arch/x86_64/Emit.zig | 12 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 12 | ||||
| -rw-r--r-- | src/arch/x86_64/abi.zig | 6 |
4 files changed, 179 insertions, 134 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 3109470620..2e4a396c9f 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -39,7 +39,7 @@ const RegisterLock = RegisterManager.RegisterLock; const Register = bits.Register; const gp = abi.RegisterClass.gp; -const avx = abi.RegisterClass.avx; +const sse = abi.RegisterClass.sse; const InnerError = error{ OutOfMemory, @@ -881,15 +881,18 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue { switch (elem_ty.zigTypeTag()) { .Vector => return self.fail("TODO allocRegOrMem for Vector type", .{}), .Float => { - // TODO check if AVX available - const ptr_bytes: u64 = 32; - if (abi_size <= ptr_bytes) { - if (self.register_manager.tryAllocReg(inst, .{ - .selector_mask = avx, - })) |reg| { - return MCValue{ .register = registerAlias(reg, abi_size) }; + if (self.intrinsicsAllowed(elem_ty)) { + const ptr_bytes: u64 = 32; + if (abi_size <= ptr_bytes) { + if (self.register_manager.tryAllocReg(inst, .{ + .selector_mask = sse, + })) |reg| { + return MCValue{ .register = registerAlias(reg, abi_size) }; + } } } + + return self.fail("TODO allocRegOrMem for Float type without SSE/AVX support", .{}); }, else => { // Make sure the type can fit in a register before we try to allocate one. @@ -969,8 +972,11 @@ pub fn spillRegisters(self: *Self, comptime count: comptime_int, registers: [cou /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const mask = switch (ty.zigTypeTag()) { - .Float => avx, + const mask: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) { + .Float => blk: { + if (self.intrinsicsAllowed(ty)) break :blk sse; + return self.fail("TODO copy {} to register", .{ty.fmtDebug()}); + }, else => gp, }; const reg: Register = try self.register_manager.allocReg(null, .{ @@ -985,8 +991,11 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { /// This can have a side effect of spilling instructions to the stack to free up a register. /// WARNING make sure that the allocated register matches the returned MCValue from an instruction! fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue { - const mask = switch (ty.zigTypeTag()) { - .Float => avx, + const mask: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) { + .Float => blk: { + if (self.intrinsicsAllowed(ty)) break :blk sse; + return self.fail("TODO copy {} to register", .{ty.fmtDebug()}); + }, else => gp, }; const reg: Register = try self.register_manager.allocReg(reg_owner, .{ @@ -3469,27 +3478,32 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu }, .register => |src_reg| switch (dst_ty.zigTypeTag()) { .Float => { - const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) { - .f32 => switch (mir_tag) { - .add => Mir.Inst.Tag.add_f32, - .cmp => Mir.Inst.Tag.cmp_f32, - else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}), - }, - .f64 => switch (mir_tag) { - .add => Mir.Inst.Tag.add_f64, - .cmp => Mir.Inst.Tag.cmp_f64, - else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}), - }, - else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}), - }; - _ = try self.addInst(.{ - .tag = actual_tag, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = dst_reg.to128(), - .reg2 = src_reg.to128(), - }), - .data = undefined, - }); + if (self.intrinsicsAllowed(dst_ty)) { + const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) { + .f32 => switch (mir_tag) { + .add => Mir.Inst.Tag.add_f32_avx, + .cmp => Mir.Inst.Tag.cmp_f32_avx, + else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}), + }, + .f64 => switch (mir_tag) { + .add => Mir.Inst.Tag.add_f64_avx, + .cmp => Mir.Inst.Tag.cmp_f64_avx, + else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}), + }, + else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}), + }; + _ = try self.addInst(.{ + .tag = actual_tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = dst_reg.to128(), + .reg2 = src_reg.to128(), + }), + .data = undefined, + }); + return; + } + + return self.fail("TODO genBinOpMir for float register-register and no intrinsics", .{}); }, else => { _ = try self.addInst(.{ @@ -5326,24 +5340,29 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE .register => |reg| { switch (ty.zigTypeTag()) { .Float => { - const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .mov_f32, - .f64 => .mov_f64, - else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}), - }; - _ = try self.addInst(.{ - .tag = tag, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = switch (ty.tag()) { - .f32 => .esp, - .f64 => .rsp, - else => unreachable, - }, - .reg2 = reg.to128(), - .flags = 0b01, - }), - .data = .{ .imm = @bitCast(u32, -stack_offset) }, - }); + if (self.intrinsicsAllowed(ty)) { + const tag: Mir.Inst.Tag = switch (ty.tag()) { + .f32 => .mov_f32_avx, + .f64 => .mov_f64_avx, + else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}), + }; + _ = try self.addInst(.{ + .tag = tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = switch (ty.tag()) { + .f32 => .esp, + .f64 => .rsp, + else => unreachable, + }, + .reg2 = reg.to128(), + .flags = 0b01, + }), + .data = .{ .imm = @bitCast(u32, -stack_offset) }, + }); + return; + } + + return self.fail("TODO genSetStackArg for register with no intrinsics", .{}); }, else => { _ = try self.addInst(.{ @@ -5505,24 +5524,29 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl switch (ty.zigTypeTag()) { .Float => { - const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .mov_f32, - .f64 => .mov_f64, - else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}), - }; - _ = try self.addInst(.{ - .tag = tag, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = switch (ty.tag()) { - .f32 => base_reg.to32(), - .f64 => base_reg.to64(), - else => unreachable, - }, - .reg2 = reg.to128(), - .flags = 0b01, - }), - .data = .{ .imm = @bitCast(u32, -stack_offset) }, - }); + if (self.intrinsicsAllowed(ty)) { + const tag: Mir.Inst.Tag = switch (ty.tag()) { + .f32 => .mov_f32_avx, + .f64 => .mov_f64_avx, + else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}), + }; + _ = try self.addInst(.{ + .tag = tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = switch (ty.tag()) { + .f32 => base_reg.to32(), + .f64 => base_reg.to64(), + else => unreachable, + }, + .reg2 = reg.to128(), + .flags = 0b01, + }), + .data = .{ .imm = @bitCast(u32, -stack_offset) }, + }); + return; + } + + return self.fail("TODO genSetStack for register for type float with no intrinsics", .{}); }, else => { if (!math.isPowerOfTwo(abi_size)) { @@ -6026,21 +6050,25 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void }, }, .Float => { - const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .mov_f32, - .f64 => .mov_f64, - else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}), - }; - _ = try self.addInst(.{ - .tag = tag, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = reg.to128(), - .reg2 = src_reg.to128(), - .flags = 0b10, - }), - .data = undefined, - }); - return; + if (self.intrinsicsAllowed(ty)) { + const tag: Mir.Inst.Tag = switch (ty.tag()) { + .f32 => .mov_f32_avx, + .f64 => .mov_f64_avx, + else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}), + }; + _ = try self.addInst(.{ + .tag = tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to128(), + .reg2 = src_reg.to128(), + .flags = 0b10, + }), + .data = undefined, + }); + return; + } + + return self.fail("TODO genSetReg from register for float with no intrinsics", .{}); }, else => {}, } @@ -6073,24 +6101,29 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void const base_reg = try self.register_manager.allocReg(null, .{ .selector_mask = gp }); try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv); - const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .mov_f32, - .f64 => .mov_f64, - else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}), - }; + if (self.intrinsicsAllowed(ty)) { + const tag: Mir.Inst.Tag = switch (ty.tag()) { + .f32 => .mov_f32_avx, + .f64 => .mov_f64_avx, + else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}), + }; - _ = try self.addInst(.{ - .tag = tag, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = reg.to128(), - .reg2 = switch (ty.tag()) { - .f32 => base_reg.to32(), - .f64 => base_reg.to64(), - else => unreachable, - }, - }), - .data = .{ .imm = 0 }, - }); + _ = try self.addInst(.{ + .tag = tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to128(), + .reg2 = switch (ty.tag()) { + .f32 => base_reg.to32(), + .f64 => base_reg.to64(), + else => unreachable, + }, + }), + .data = .{ .imm = 0 }, + }); + return; + } + + return self.fail("TODO genSetReg from memory for float with no intrinsics", .{}); }, else => { if (x <= math.maxInt(i32)) { @@ -6183,24 +6216,27 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void }, }, .Float => { - const tag: Mir.Inst.Tag = switch (ty.tag()) { - .f32 => .mov_f32, - .f64 => .mov_f64, - else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}), - }; - _ = try self.addInst(.{ - .tag = tag, - .ops = Mir.Inst.Ops.encode(.{ - .reg1 = reg.to128(), - .reg2 = switch (ty.tag()) { - .f32 => .ebp, - .f64 => .rbp, - else => unreachable, - }, - }), - .data = .{ .imm = @bitCast(u32, -off) }, - }); - return; + if (self.intrinsicsAllowed(ty)) { + const tag: Mir.Inst.Tag = switch (ty.tag()) { + .f32 => .mov_f32_avx, + .f64 => .mov_f64_avx, + else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}), + }; + _ = try self.addInst(.{ + .tag = tag, + .ops = Mir.Inst.Ops.encode(.{ + .reg1 = reg.to128(), + .reg2 = switch (ty.tag()) { + .f32 => .ebp, + .f64 => .rbp, + else => unreachable, + }, + }), + .data = .{ .imm = @bitCast(u32, -off) }, + }); + return; + } + return self.fail("TODO genSetReg from stack offset for float with no intrinsics", .{}); }, else => {}, } @@ -6995,3 +7031,12 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void { }, } } + +fn intrinsicsAllowed(self: *Self, ty: Type) bool { + return switch (ty.tag()) { + .f32, + .f64, + => Target.x86.featureSetHasAny(self.target.cpu.features, .{ .avx, .avx2 }), + else => unreachable, // TODO finish this off + }; +} diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig index 96f640b610..fbcd8359f7 100644 --- a/src/arch/x86_64/Emit.zig +++ b/src/arch/x86_64/Emit.zig @@ -183,14 +183,14 @@ pub fn lowerMir(emit: *Emit) InnerError!void { .nop => try emit.mirNop(), // AVX instructions - .mov_f64 => try emit.mirMovFloatAvx(.vmovsd, inst), - .mov_f32 => try emit.mirMovFloatAvx(.vmovss, inst), + .mov_f64_avx => try emit.mirMovFloatAvx(.vmovsd, inst), + .mov_f32_avx => try emit.mirMovFloatAvx(.vmovss, inst), - .add_f64 => try emit.mirAddFloatAvx(.vaddsd, inst), - .add_f32 => try emit.mirAddFloatAvx(.vaddss, inst), + .add_f64_avx => try emit.mirAddFloatAvx(.vaddsd, inst), + .add_f32_avx => try emit.mirAddFloatAvx(.vaddss, inst), - .cmp_f64 => try emit.mirCmpFloatAvx(.vucomisd, inst), - .cmp_f32 => try emit.mirCmpFloatAvx(.vucomiss, inst), + .cmp_f64_avx => try emit.mirCmpFloatAvx(.vucomisd, inst), + .cmp_f32_avx => try emit.mirCmpFloatAvx(.vucomiss, inst), // Pseudo-instructions .call_extern => try emit.mirCallExtern(inst), diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index dc8c1fa0b2..0f200d43e6 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -350,18 +350,18 @@ pub const Inst = struct { /// 0b00 reg1, qword ptr [reg2 + imm32] /// 0b01 qword ptr [reg1 + imm32], reg2 /// 0b10 reg1, reg2 - mov_f64, - mov_f32, + mov_f64_avx, + mov_f32_avx, /// ops flags: form: /// 0b00 reg1, reg1, reg2 - add_f64, - add_f32, + add_f64_avx, + add_f32_avx, /// ops flags: form: /// - cmp_f64, - cmp_f32, + cmp_f64_avx, + cmp_f32_avx, /// Pseudo-instructions /// call extern function diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig index bf85f002d1..7e2025a23d 100644 --- a/src/arch/x86_64/abi.zig +++ b/src/arch/x86_64/abi.zig @@ -383,11 +383,11 @@ pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8 pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 }; pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx }; -const avx_regs = [_]Register{ +const sse_avx_regs = [_]Register{ .ymm0, .ymm1, .ymm2, .ymm3, .ymm4, .ymm5, .ymm6, .ymm7, .ymm8, .ymm9, .ymm10, .ymm11, .ymm12, .ymm13, .ymm14, .ymm15, }; -const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ avx_regs; +const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ sse_avx_regs; pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers); // Register classes @@ -401,7 +401,7 @@ pub const RegisterClass = struct { }, true); break :blk set; }; - pub const avx: RegisterBitSet = blk: { + pub const sse: RegisterBitSet = blk: { var set = RegisterBitSet.initEmpty(); set.setRangeValue(.{ .start = caller_preserved_regs.len + callee_preserved_regs.len, |
