aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJakub Konka <kubkon@jakubkonka.com>2022-05-19 17:36:04 +0200
committerJakub Konka <kubkon@jakubkonka.com>2022-05-19 19:39:34 +0200
commit5cbfd5819e423cdc6b092d1eb687189fb204b075 (patch)
tree813b04e2f28f59af65d3d69709def69f6dd7ce08 /src
parent283f40e4e9c44986353ba8abcc760684e9adf6cc (diff)
downloadzig-5cbfd5819e423cdc6b092d1eb687189fb204b075.tar.gz
zig-5cbfd5819e423cdc6b092d1eb687189fb204b075.zip
x64: check for floating-point intrinsics in codegen
Diffstat (limited to 'src')
-rw-r--r--src/arch/x86_64/CodeGen.zig283
-rw-r--r--src/arch/x86_64/Emit.zig12
-rw-r--r--src/arch/x86_64/Mir.zig12
-rw-r--r--src/arch/x86_64/abi.zig6
4 files changed, 179 insertions, 134 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 3109470620..2e4a396c9f 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -39,7 +39,7 @@ const RegisterLock = RegisterManager.RegisterLock;
const Register = bits.Register;
const gp = abi.RegisterClass.gp;
-const avx = abi.RegisterClass.avx;
+const sse = abi.RegisterClass.sse;
const InnerError = error{
OutOfMemory,
@@ -881,15 +881,18 @@ fn allocRegOrMem(self: *Self, inst: Air.Inst.Index, reg_ok: bool) !MCValue {
switch (elem_ty.zigTypeTag()) {
.Vector => return self.fail("TODO allocRegOrMem for Vector type", .{}),
.Float => {
- // TODO check if AVX available
- const ptr_bytes: u64 = 32;
- if (abi_size <= ptr_bytes) {
- if (self.register_manager.tryAllocReg(inst, .{
- .selector_mask = avx,
- })) |reg| {
- return MCValue{ .register = registerAlias(reg, abi_size) };
+ if (self.intrinsicsAllowed(elem_ty)) {
+ const ptr_bytes: u64 = 32;
+ if (abi_size <= ptr_bytes) {
+ if (self.register_manager.tryAllocReg(inst, .{
+ .selector_mask = sse,
+ })) |reg| {
+ return MCValue{ .register = registerAlias(reg, abi_size) };
+ }
}
}
+
+ return self.fail("TODO allocRegOrMem for Float type without SSE/AVX support", .{});
},
else => {
// Make sure the type can fit in a register before we try to allocate one.
@@ -969,8 +972,11 @@ pub fn spillRegisters(self: *Self, comptime count: comptime_int, registers: [cou
/// allocated. A second call to `copyToTmpRegister` may return the same register.
/// This can have a side effect of spilling instructions to the stack to free up a register.
fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
- const mask = switch (ty.zigTypeTag()) {
- .Float => avx,
+ const mask: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
+ .Float => blk: {
+ if (self.intrinsicsAllowed(ty)) break :blk sse;
+ return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
+ },
else => gp,
};
const reg: Register = try self.register_manager.allocReg(null, .{
@@ -985,8 +991,11 @@ fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register {
/// This can have a side effect of spilling instructions to the stack to free up a register.
/// WARNING make sure that the allocated register matches the returned MCValue from an instruction!
fn copyToRegisterWithInstTracking(self: *Self, reg_owner: Air.Inst.Index, ty: Type, mcv: MCValue) !MCValue {
- const mask = switch (ty.zigTypeTag()) {
- .Float => avx,
+ const mask: RegisterManager.RegisterBitSet = switch (ty.zigTypeTag()) {
+ .Float => blk: {
+ if (self.intrinsicsAllowed(ty)) break :blk sse;
+ return self.fail("TODO copy {} to register", .{ty.fmtDebug()});
+ },
else => gp,
};
const reg: Register = try self.register_manager.allocReg(reg_owner, .{
@@ -3469,27 +3478,32 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, dst_ty: Type, dst_mcv: MCValu
},
.register => |src_reg| switch (dst_ty.zigTypeTag()) {
.Float => {
- const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) {
- .f32 => switch (mir_tag) {
- .add => Mir.Inst.Tag.add_f32,
- .cmp => Mir.Inst.Tag.cmp_f32,
- else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}),
- },
- .f64 => switch (mir_tag) {
- .add => Mir.Inst.Tag.add_f64,
- .cmp => Mir.Inst.Tag.cmp_f64,
- else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}),
- },
- else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}),
- };
- _ = try self.addInst(.{
- .tag = actual_tag,
- .ops = Mir.Inst.Ops.encode(.{
- .reg1 = dst_reg.to128(),
- .reg2 = src_reg.to128(),
- }),
- .data = undefined,
- });
+ if (self.intrinsicsAllowed(dst_ty)) {
+ const actual_tag: Mir.Inst.Tag = switch (dst_ty.tag()) {
+ .f32 => switch (mir_tag) {
+ .add => Mir.Inst.Tag.add_f32_avx,
+ .cmp => Mir.Inst.Tag.cmp_f32_avx,
+ else => return self.fail("TODO genBinOpMir for f32 register-register with MIR tag {}", .{mir_tag}),
+ },
+ .f64 => switch (mir_tag) {
+ .add => Mir.Inst.Tag.add_f64_avx,
+ .cmp => Mir.Inst.Tag.cmp_f64_avx,
+ else => return self.fail("TODO genBinOpMir for f64 register-register with MIR tag {}", .{mir_tag}),
+ },
+ else => return self.fail("TODO genBinOpMir for float register-register and type {}", .{dst_ty.fmtDebug()}),
+ };
+ _ = try self.addInst(.{
+ .tag = actual_tag,
+ .ops = Mir.Inst.Ops.encode(.{
+ .reg1 = dst_reg.to128(),
+ .reg2 = src_reg.to128(),
+ }),
+ .data = undefined,
+ });
+ return;
+ }
+
+ return self.fail("TODO genBinOpMir for float register-register and no intrinsics", .{});
},
else => {
_ = try self.addInst(.{
@@ -5326,24 +5340,29 @@ fn genSetStackArg(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue) InnerE
.register => |reg| {
switch (ty.zigTypeTag()) {
.Float => {
- const tag: Mir.Inst.Tag = switch (ty.tag()) {
- .f32 => .mov_f32,
- .f64 => .mov_f64,
- else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}),
- };
- _ = try self.addInst(.{
- .tag = tag,
- .ops = Mir.Inst.Ops.encode(.{
- .reg1 = switch (ty.tag()) {
- .f32 => .esp,
- .f64 => .rsp,
- else => unreachable,
- },
- .reg2 = reg.to128(),
- .flags = 0b01,
- }),
- .data = .{ .imm = @bitCast(u32, -stack_offset) },
- });
+ if (self.intrinsicsAllowed(ty)) {
+ const tag: Mir.Inst.Tag = switch (ty.tag()) {
+ .f32 => .mov_f32_avx,
+ .f64 => .mov_f64_avx,
+ else => return self.fail("TODO genSetStackArg for register for type {}", .{ty.fmtDebug()}),
+ };
+ _ = try self.addInst(.{
+ .tag = tag,
+ .ops = Mir.Inst.Ops.encode(.{
+ .reg1 = switch (ty.tag()) {
+ .f32 => .esp,
+ .f64 => .rsp,
+ else => unreachable,
+ },
+ .reg2 = reg.to128(),
+ .flags = 0b01,
+ }),
+ .data = .{ .imm = @bitCast(u32, -stack_offset) },
+ });
+ return;
+ }
+
+ return self.fail("TODO genSetStackArg for register with no intrinsics", .{});
},
else => {
_ = try self.addInst(.{
@@ -5505,24 +5524,29 @@ fn genSetStack(self: *Self, ty: Type, stack_offset: i32, mcv: MCValue, opts: Inl
switch (ty.zigTypeTag()) {
.Float => {
- const tag: Mir.Inst.Tag = switch (ty.tag()) {
- .f32 => .mov_f32,
- .f64 => .mov_f64,
- else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}),
- };
- _ = try self.addInst(.{
- .tag = tag,
- .ops = Mir.Inst.Ops.encode(.{
- .reg1 = switch (ty.tag()) {
- .f32 => base_reg.to32(),
- .f64 => base_reg.to64(),
- else => unreachable,
- },
- .reg2 = reg.to128(),
- .flags = 0b01,
- }),
- .data = .{ .imm = @bitCast(u32, -stack_offset) },
- });
+ if (self.intrinsicsAllowed(ty)) {
+ const tag: Mir.Inst.Tag = switch (ty.tag()) {
+ .f32 => .mov_f32_avx,
+ .f64 => .mov_f64_avx,
+ else => return self.fail("TODO genSetStack for register for type {}", .{ty.fmtDebug()}),
+ };
+ _ = try self.addInst(.{
+ .tag = tag,
+ .ops = Mir.Inst.Ops.encode(.{
+ .reg1 = switch (ty.tag()) {
+ .f32 => base_reg.to32(),
+ .f64 => base_reg.to64(),
+ else => unreachable,
+ },
+ .reg2 = reg.to128(),
+ .flags = 0b01,
+ }),
+ .data = .{ .imm = @bitCast(u32, -stack_offset) },
+ });
+ return;
+ }
+
+ return self.fail("TODO genSetStack for register for type float with no intrinsics", .{});
},
else => {
if (!math.isPowerOfTwo(abi_size)) {
@@ -6026,21 +6050,25 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
},
},
.Float => {
- const tag: Mir.Inst.Tag = switch (ty.tag()) {
- .f32 => .mov_f32,
- .f64 => .mov_f64,
- else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}),
- };
- _ = try self.addInst(.{
- .tag = tag,
- .ops = Mir.Inst.Ops.encode(.{
- .reg1 = reg.to128(),
- .reg2 = src_reg.to128(),
- .flags = 0b10,
- }),
- .data = undefined,
- });
- return;
+ if (self.intrinsicsAllowed(ty)) {
+ const tag: Mir.Inst.Tag = switch (ty.tag()) {
+ .f32 => .mov_f32_avx,
+ .f64 => .mov_f64_avx,
+ else => return self.fail("TODO genSetReg from register for {}", .{ty.fmtDebug()}),
+ };
+ _ = try self.addInst(.{
+ .tag = tag,
+ .ops = Mir.Inst.Ops.encode(.{
+ .reg1 = reg.to128(),
+ .reg2 = src_reg.to128(),
+ .flags = 0b10,
+ }),
+ .data = undefined,
+ });
+ return;
+ }
+
+ return self.fail("TODO genSetReg from register for float with no intrinsics", .{});
},
else => {},
}
@@ -6073,24 +6101,29 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
const base_reg = try self.register_manager.allocReg(null, .{ .selector_mask = gp });
try self.loadMemPtrIntoRegister(base_reg, Type.usize, mcv);
- const tag: Mir.Inst.Tag = switch (ty.tag()) {
- .f32 => .mov_f32,
- .f64 => .mov_f64,
- else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}),
- };
+ if (self.intrinsicsAllowed(ty)) {
+ const tag: Mir.Inst.Tag = switch (ty.tag()) {
+ .f32 => .mov_f32_avx,
+ .f64 => .mov_f64_avx,
+ else => return self.fail("TODO genSetReg from memory for {}", .{ty.fmtDebug()}),
+ };
- _ = try self.addInst(.{
- .tag = tag,
- .ops = Mir.Inst.Ops.encode(.{
- .reg1 = reg.to128(),
- .reg2 = switch (ty.tag()) {
- .f32 => base_reg.to32(),
- .f64 => base_reg.to64(),
- else => unreachable,
- },
- }),
- .data = .{ .imm = 0 },
- });
+ _ = try self.addInst(.{
+ .tag = tag,
+ .ops = Mir.Inst.Ops.encode(.{
+ .reg1 = reg.to128(),
+ .reg2 = switch (ty.tag()) {
+ .f32 => base_reg.to32(),
+ .f64 => base_reg.to64(),
+ else => unreachable,
+ },
+ }),
+ .data = .{ .imm = 0 },
+ });
+ return;
+ }
+
+ return self.fail("TODO genSetReg from memory for float with no intrinsics", .{});
},
else => {
if (x <= math.maxInt(i32)) {
@@ -6183,24 +6216,27 @@ fn genSetReg(self: *Self, ty: Type, reg: Register, mcv: MCValue) InnerError!void
},
},
.Float => {
- const tag: Mir.Inst.Tag = switch (ty.tag()) {
- .f32 => .mov_f32,
- .f64 => .mov_f64,
- else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}),
- };
- _ = try self.addInst(.{
- .tag = tag,
- .ops = Mir.Inst.Ops.encode(.{
- .reg1 = reg.to128(),
- .reg2 = switch (ty.tag()) {
- .f32 => .ebp,
- .f64 => .rbp,
- else => unreachable,
- },
- }),
- .data = .{ .imm = @bitCast(u32, -off) },
- });
- return;
+ if (self.intrinsicsAllowed(ty)) {
+ const tag: Mir.Inst.Tag = switch (ty.tag()) {
+ .f32 => .mov_f32_avx,
+ .f64 => .mov_f64_avx,
+ else => return self.fail("TODO genSetReg from stack offset for {}", .{ty.fmtDebug()}),
+ };
+ _ = try self.addInst(.{
+ .tag = tag,
+ .ops = Mir.Inst.Ops.encode(.{
+ .reg1 = reg.to128(),
+ .reg2 = switch (ty.tag()) {
+ .f32 => .ebp,
+ .f64 => .rbp,
+ else => unreachable,
+ },
+ }),
+ .data = .{ .imm = @bitCast(u32, -off) },
+ });
+ return;
+ }
+ return self.fail("TODO genSetReg from stack offset for float with no intrinsics", .{});
},
else => {},
}
@@ -6995,3 +7031,12 @@ fn truncateRegister(self: *Self, ty: Type, reg: Register) !void {
},
}
}
+
+fn intrinsicsAllowed(self: *Self, ty: Type) bool {
+ return switch (ty.tag()) {
+ .f32,
+ .f64,
+ => Target.x86.featureSetHasAny(self.target.cpu.features, .{ .avx, .avx2 }),
+ else => unreachable, // TODO finish this off
+ };
+}
diff --git a/src/arch/x86_64/Emit.zig b/src/arch/x86_64/Emit.zig
index 96f640b610..fbcd8359f7 100644
--- a/src/arch/x86_64/Emit.zig
+++ b/src/arch/x86_64/Emit.zig
@@ -183,14 +183,14 @@ pub fn lowerMir(emit: *Emit) InnerError!void {
.nop => try emit.mirNop(),
// AVX instructions
- .mov_f64 => try emit.mirMovFloatAvx(.vmovsd, inst),
- .mov_f32 => try emit.mirMovFloatAvx(.vmovss, inst),
+ .mov_f64_avx => try emit.mirMovFloatAvx(.vmovsd, inst),
+ .mov_f32_avx => try emit.mirMovFloatAvx(.vmovss, inst),
- .add_f64 => try emit.mirAddFloatAvx(.vaddsd, inst),
- .add_f32 => try emit.mirAddFloatAvx(.vaddss, inst),
+ .add_f64_avx => try emit.mirAddFloatAvx(.vaddsd, inst),
+ .add_f32_avx => try emit.mirAddFloatAvx(.vaddss, inst),
- .cmp_f64 => try emit.mirCmpFloatAvx(.vucomisd, inst),
- .cmp_f32 => try emit.mirCmpFloatAvx(.vucomiss, inst),
+ .cmp_f64_avx => try emit.mirCmpFloatAvx(.vucomisd, inst),
+ .cmp_f32_avx => try emit.mirCmpFloatAvx(.vucomiss, inst),
// Pseudo-instructions
.call_extern => try emit.mirCallExtern(inst),
diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig
index dc8c1fa0b2..0f200d43e6 100644
--- a/src/arch/x86_64/Mir.zig
+++ b/src/arch/x86_64/Mir.zig
@@ -350,18 +350,18 @@ pub const Inst = struct {
/// 0b00 reg1, qword ptr [reg2 + imm32]
/// 0b01 qword ptr [reg1 + imm32], reg2
/// 0b10 reg1, reg2
- mov_f64,
- mov_f32,
+ mov_f64_avx,
+ mov_f32_avx,
/// ops flags: form:
/// 0b00 reg1, reg1, reg2
- add_f64,
- add_f32,
+ add_f64_avx,
+ add_f32_avx,
/// ops flags: form:
///
- cmp_f64,
- cmp_f32,
+ cmp_f64_avx,
+ cmp_f32_avx,
/// Pseudo-instructions
/// call extern function
diff --git a/src/arch/x86_64/abi.zig b/src/arch/x86_64/abi.zig
index bf85f002d1..7e2025a23d 100644
--- a/src/arch/x86_64/abi.zig
+++ b/src/arch/x86_64/abi.zig
@@ -383,11 +383,11 @@ pub const caller_preserved_regs = [_]Register{ .rax, .rcx, .rdx, .rsi, .rdi, .r8
pub const c_abi_int_param_regs = [_]Register{ .rdi, .rsi, .rdx, .rcx, .r8, .r9 };
pub const c_abi_int_return_regs = [_]Register{ .rax, .rdx };
-const avx_regs = [_]Register{
+const sse_avx_regs = [_]Register{
.ymm0, .ymm1, .ymm2, .ymm3, .ymm4, .ymm5, .ymm6, .ymm7,
.ymm8, .ymm9, .ymm10, .ymm11, .ymm12, .ymm13, .ymm14, .ymm15,
};
-const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ avx_regs;
+const allocatable_registers = callee_preserved_regs ++ caller_preserved_regs ++ sse_avx_regs;
pub const RegisterManager = RegisterManagerFn(@import("CodeGen.zig"), Register, &allocatable_registers);
// Register classes
@@ -401,7 +401,7 @@ pub const RegisterClass = struct {
}, true);
break :blk set;
};
- pub const avx: RegisterBitSet = blk: {
+ pub const sse: RegisterBitSet = blk: {
var set = RegisterBitSet.initEmpty();
set.setRangeValue(.{
.start = caller_preserved_regs.len + callee_preserved_regs.len,