diff options
| author | Jacob Young <jacobly0@users.noreply.github.com> | 2023-04-28 20:39:38 -0400 |
|---|---|---|
| committer | Jacob Young <jacobly0@users.noreply.github.com> | 2023-05-01 19:22:52 -0400 |
| commit | 4ec49da5f6a6a8e77cdb66b8f814718bf11fffef (patch) | |
| tree | 38e53fbeadbffa706bff35209613a090bcbd5201 /src | |
| parent | 7c9891d7b7d09060630693231702f27669dd0dc9 (diff) | |
| download | zig-4ec49da5f6a6a8e77cdb66b8f814718bf11fffef.tar.gz zig-4ec49da5f6a6a8e77cdb66b8f814718bf11fffef.zip | |
x86_64: implement a bunch of floating point stuff
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 438 | ||||
| -rw-r--r-- | src/arch/x86_64/Encoding.zig | 27 | ||||
| -rw-r--r-- | src/arch/x86_64/Lower.zig | 6 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 12 | ||||
| -rw-r--r-- | src/arch/x86_64/encoder.zig | 2 | ||||
| -rw-r--r-- | src/arch/x86_64/encodings.zig | 14 | ||||
| -rw-r--r-- | src/codegen.zig | 103 |
7 files changed, 486 insertions, 116 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index b862252561..d9c7298c95 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -1297,9 +1297,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .ceil, .round, .trunc_float, - .neg, => try self.airUnaryMath(inst), + .neg => try self.airNeg(inst), + .add_with_overflow => try self.airAddSubWithOverflow(inst), .sub_with_overflow => try self.airAddSubWithOverflow(inst), .mul_with_overflow => try self.airMulWithOverflow(inst), @@ -1881,7 +1882,7 @@ pub fn spillRegisters(self: *Self, registers: []const Register) !void { /// allocated. A second call to `copyToTmpRegister` may return the same register. /// This can have a side effect of spilling instructions to the stack to free up a register. fn copyToTmpRegister(self: *Self, ty: Type, mcv: MCValue) !Register { - const reg: Register = try self.register_manager.allocReg(null, try self.regClassForType(ty)); + const reg = try self.register_manager.allocReg(null, try self.regClassForType(ty)); try self.genSetReg(reg, ty, mcv); return reg; } @@ -1924,16 +1925,48 @@ fn airRetPtr(self: *Self, inst: Air.Inst.Index) !void { fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airFptrunc for {}", .{self.target.cpu.arch}); - // return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const dst_ty = self.air.typeOfIndex(inst); + const src_ty = self.air.typeOf(ty_op.operand); + if (dst_ty.floatBits(self.target.*) != 32 or src_ty.floatBits(self.target.*) != 64 or + !Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + return self.fail("TODO implement airFptrunc from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }); + + const src_mcv = try self.resolveInst(ty_op.operand); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); + const dst_lock = self.register_manager.lockReg(dst_mcv.register); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + try self.genBinOpMir(.cvtsd2ss, src_ty, dst_mcv, src_mcv); + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airFpext(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airFpext for {}", .{self.target.cpu.arch}); - // return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + const dst_ty = self.air.typeOfIndex(inst); + const src_ty = self.air.typeOf(ty_op.operand); + if (dst_ty.floatBits(self.target.*) != 64 or src_ty.floatBits(self.target.*) != 32 or + !Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + return self.fail("TODO implement airFpext from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }); + + const src_mcv = try self.resolveInst(ty_op.operand); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, ty_op.operand, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, dst_ty, src_mcv); + const dst_lock = self.register_manager.lockReg(dst_mcv.register); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + try self.genBinOpMir(.cvtss2sd, src_ty, dst_mcv, src_mcv); + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airIntCast(self: *Self, inst: Air.Inst.Index) !void { @@ -3953,10 +3986,65 @@ fn airBitReverse(self: *Self, inst: Air.Inst.Index) !void { return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } +fn airNeg(self: *Self, inst: Air.Inst.Index) !void { + const un_op = self.air.instructions.items(.data)[inst].un_op; + const ty = self.air.typeOf(un_op); + const ty_bits = ty.floatBits(self.target.*); + + var arena = std.heap.ArenaAllocator.init(self.gpa); + defer arena.deinit(); + + const ExpectedContents = union { + f16: Value.Payload.Float_16, + f32: Value.Payload.Float_32, + f64: Value.Payload.Float_64, + f80: Value.Payload.Float_80, + f128: Value.Payload.Float_128, + }; + var stack align(@alignOf(ExpectedContents)) = + std.heap.stackFallback(@sizeOf(ExpectedContents), arena.allocator()); + + var vec_pl = Type.Payload.Array{ + .base = .{ .tag = .vector }, + .data = .{ + .len = @divExact(128, ty_bits), + .elem_type = ty, + }, + }; + const vec_ty = Type.initPayload(&vec_pl.base); + + var sign_pl = Value.Payload.SubValue{ + .base = .{ .tag = .repeated }, + .data = try Value.floatToValue(-0.0, stack.get(), ty, self.target.*), + }; + const sign_val = Value.initPayload(&sign_pl.base); + + const sign_mcv = try self.genTypedValue(.{ .ty = vec_ty, .val = sign_val }); + + const src_mcv = try self.resolveInst(un_op); + const dst_mcv = if (src_mcv.isRegister() and self.reuseOperand(inst, un_op, 0, src_mcv)) + src_mcv + else + try self.copyToRegisterWithInstTracking(inst, ty, src_mcv); + const dst_lock = self.register_manager.lockReg(dst_mcv.register); + defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); + + try self.genBinOpMir(switch (ty_bits) { + 32 => .xorps, + 64 => .xorpd, + else => return self.fail("TODO implement airNeg for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + }, vec_ty, dst_mcv, sign_mcv); + return self.finishAir(inst, dst_mcv, .{ un_op, .none, .none }); +} + fn airUnaryMath(self: *Self, inst: Air.Inst.Index) !void { const un_op = self.air.instructions.items(.data)[inst].un_op; _ = un_op; - return self.fail("TODO implement airUnaryMath for {}", .{self.target.cpu.arch}); + return self.fail("TODO implement airUnaryMath for {}", .{ + self.air.instructions.items(.tag)[inst], + }); //return self.finishAir(inst, result, .{ un_op, .none, .none }); } @@ -4109,7 +4197,6 @@ fn load(self: *Self, dst_mcv: MCValue, ptr_ty: Type, ptr_mcv: MCValue) InnerErro fn airLoad(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const elem_ty = self.air.typeOfIndex(inst); - const elem_size = elem_ty.abiSize(self.target.*); const result: MCValue = result: { if (!elem_ty.hasRuntimeBitsIgnoreComptime()) break :result .none; @@ -4117,14 +4204,20 @@ fn airLoad(self: *Self, inst: Air.Inst.Index) !void { const reg_locks = self.register_manager.lockRegsAssumeUnused(3, .{ .rdi, .rsi, .rcx }); defer for (reg_locks) |lock| self.register_manager.unlockReg(lock); + const ptr_ty = self.air.typeOf(ty_op.operand); + const elem_size = elem_ty.abiSize(self.target.*); + + const elem_rc = try self.regClassForType(elem_ty); + const ptr_rc = try self.regClassForType(ptr_ty); + const ptr_mcv = try self.resolveInst(ty_op.operand); - const dst_mcv = if (elem_size <= 8 and self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv)) + const dst_mcv = if (elem_size <= 8 and elem_rc.supersetOf(ptr_rc) and + self.reuseOperand(inst, ty_op.operand, 0, ptr_mcv)) // The MCValue that holds the pointer can be re-used as the value. ptr_mcv else try self.allocRegOrMem(inst, true); - const ptr_ty = self.air.typeOf(ty_op.operand); if (ptr_ty.ptrInfo().data.host_size > 0) { try self.packedLoad(dst_mcv, ptr_ty, ptr_mcv); } else { @@ -4346,17 +4439,9 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { switch (src_mcv) { .load_frame => |frame_addr| { - const field_abi_size = @intCast(u32, field_ty.abiSize(self.target.*)); - const limb_abi_size = @min(field_abi_size, 8); - const limb_abi_bits = limb_abi_size * 8; - const field_byte_off = @intCast(i32, field_off / limb_abi_bits * limb_abi_size); - const field_bit_off = field_off % limb_abi_bits; - - if (field_bit_off == 0) { - const off_mcv = MCValue{ .load_frame = .{ - .index = frame_addr.index, - .off = frame_addr.off + field_byte_off, - } }; + if (field_off % 8 == 0) { + const off_mcv = + src_mcv.address().offset(@intCast(i32, @divExact(field_off, 8))).deref(); if (self.reuseOperand(inst, operand, 0, src_mcv)) break :result off_mcv; const dst_mcv = try self.allocRegOrMem(inst, true); @@ -4364,6 +4449,12 @@ fn airStructFieldVal(self: *Self, inst: Air.Inst.Index) !void { break :result dst_mcv; } + const field_abi_size = @intCast(u32, field_ty.abiSize(self.target.*)); + const limb_abi_size = @min(field_abi_size, 8); + const limb_abi_bits = limb_abi_size * 8; + const field_byte_off = @intCast(i32, field_off / limb_abi_bits * limb_abi_size); + const field_bit_off = field_off % limb_abi_bits; + if (field_abi_size > 8) { return self.fail("TODO implement struct_field_val with large packed field", .{}); } @@ -5181,24 +5272,69 @@ fn genBinOp( switch (tag) { .add, .addwrap, - => try self.genBinOpMir(switch (lhs_ty.tag()) { + => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { else => .add, - .f32 => .addss, - .f64 => .addsd, + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .addss + else + return self.fail("TODO implement genBinOp for {s} {} without sse", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .addsd + else + return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + }, }, lhs_ty, dst_mcv, src_mcv), .sub, .subwrap, - => try self.genBinOpMir(switch (lhs_ty.tag()) { + => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { else => .sub, - .f32 => .subss, - .f64 => .subsd, + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .subss + else + return self.fail("TODO implement genBinOp for {s} {} without sse", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .subsd + else + return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + }, }, lhs_ty, dst_mcv, src_mcv), - .mul => try self.genBinOpMir(switch (lhs_ty.tag()) { - .f32 => .mulss, - .f64 => .mulsd, + .mul => try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?) }), + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .mulss + else + return self.fail("TODO implement genBinOp for {s} {} without sse", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .mulsd + else + return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + }, }, lhs_ty, dst_mcv, src_mcv), .div_float, @@ -5206,12 +5342,27 @@ fn genBinOp( .div_trunc, .div_floor, => { - try self.genBinOpMir(switch (lhs_ty.tag()) { - .f32 => .divss, - .f64 => .divsd, + try self.genBinOpMir(switch (lhs_ty.zigTypeTag()) { else => return self.fail("TODO implement genBinOp for {s} {}", .{ @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), }), + .Float => switch (lhs_ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .divss + else + return self.fail("TODO implement genBinOp for {s} {} without sse", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .divsd + else + return self.fail("TODO implement genBinOp for {s} {} without sse2", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement genBinOp for {s} {}", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), + }, }, lhs_ty, dst_mcv, src_mcv); switch (tag) { .div_float, @@ -5222,16 +5373,18 @@ fn genBinOp( => if (Target.x86.featureSetHas(self.target.cpu.features, .sse4_1)) { const abi_size = @intCast(u32, lhs_ty.abiSize(self.target.*)); const dst_alias = registerAlias(dst_mcv.register, abi_size); - try self.asmRegisterRegisterImmediate(switch (lhs_ty.tag()) { - .f32 => .roundss, - .f64 => .roundsd, + try self.asmRegisterRegisterImmediate(switch (lhs_ty.floatBits(self.target.*)) { + 32 => .roundss, + 64 => .roundsd, else => unreachable, }, dst_alias, dst_alias, Immediate.u(switch (tag) { .div_trunc => 0b1_0_11, .div_floor => 0b1_0_01, else => unreachable, })); - } else return self.fail("TODO implement round without sse4_1", .{}), + } else return self.fail("TODO implement genBinOp for {s} {} without sse4_1", .{ + @tagName(tag), lhs_ty.fmt(self.bin_file.options.module.?), + }), else => unreachable, } }, @@ -5453,39 +5606,68 @@ fn genBinOpMir(self: *Self, mir_tag: Mir.Inst.Tag, ty: Type, dst_mcv: MCValue, s )), else => unreachable, }, - .register_offset, .eflags, + .register_offset, .memory, + .indirect, .load_direct, .lea_direct, .load_got, .lea_got, .load_tlv, .lea_tlv, + .load_frame, .lea_frame, => { - assert(abi_size <= 8); + blk: { + return self.asmRegisterMemory( + mir_tag, + registerAlias(dst_reg, abi_size), + Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { + .memory => |addr| .{ + .base = .{ .reg = .ds }, + .disp = math.cast(i32, addr) orelse break :blk, + }, + .indirect => |reg_off| .{ + .base = .{ .reg = reg_off.reg }, + .disp = reg_off.off, + }, + .load_frame => |frame_addr| .{ + .base = .{ .frame = frame_addr.index }, + .disp = frame_addr.off, + }, + else => break :blk, + }), + ); + } + const dst_reg_lock = self.register_manager.lockReg(dst_reg); defer if (dst_reg_lock) |lock| self.register_manager.unlockReg(lock); - const reg = try self.copyToTmpRegister(ty, src_mcv); - return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg }); - }, - .indirect, .load_frame => try self.asmRegisterMemory( - mir_tag, - registerAlias(dst_reg, abi_size), - Memory.sib(Memory.PtrSize.fromSize(abi_size), switch (src_mcv) { - .indirect => |reg_off| .{ - .base = .{ .reg = reg_off.reg }, - .disp = reg_off.off, + switch (src_mcv) { + .eflags, + .register_offset, + .lea_direct, + .lea_got, + .lea_tlv, + .lea_frame, + => { + const reg = try self.copyToTmpRegister(ty, src_mcv); + return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ .register = reg }); }, - .load_frame => |frame_addr| .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, + .memory, + .load_direct, + .load_got, + .load_tlv, + => { + const addr_reg = try self.copyToTmpRegister(ty, src_mcv.address()); + return self.genBinOpMir(mir_tag, ty, dst_mcv, .{ + .indirect = .{ .reg = addr_reg }, + }); }, else => unreachable, - }), - ), + } + }, } }, .memory, .indirect, .load_got, .load_direct, .load_tlv, .load_frame => { @@ -6175,10 +6357,25 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { defer if (dst_lock) |lock| self.register_manager.unlockReg(lock); const src_mcv = if (flipped) lhs_mcv else rhs_mcv; - try self.genBinOpMir(switch (ty.tag()) { + try self.genBinOpMir(switch (ty.zigTypeTag()) { else => .cmp, - .f32 => .ucomiss, - .f64 => .ucomisd, + .Float => switch (ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .ucomiss + else + return self.fail("TODO implement airCmp for {} without sse", .{ + ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .ucomisd + else + return self.fail("TODO implement airCmp for {} without sse2", .{ + ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement airCmp for {}", .{ + ty.fmt(self.bin_file.options.module.?), + }), + }, }, ty, dst_mcv, src_mcv); const signedness = if (ty.isAbiInt()) ty.intInfo(self.target.*).signedness else .unsigned; @@ -7608,7 +7805,8 @@ fn airBitCast(self: *Self, inst: Air.Inst.Index) !void { const dst_rc = try self.regClassForType(dst_ty); const src_rc = try self.regClassForType(src_ty); const operand = try self.resolveInst(ty_op.operand); - if (dst_rc.eql(src_rc) and self.reuseOperand(inst, ty_op.operand, 0, operand)) break :result operand; + if (dst_rc.supersetOf(src_rc) and self.reuseOperand(inst, ty_op.operand, 0, operand)) + break :result operand; const operand_lock = switch (operand) { .register => |reg| self.register_manager.lockReg(reg), @@ -7648,9 +7846,59 @@ fn airArrayToSlice(self: *Self, inst: Air.Inst.Index) !void { fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { const ty_op = self.air.instructions.items(.data)[inst].ty_op; - _ = ty_op; - return self.fail("TODO implement airIntToFloat for {}", .{self.target.cpu.arch}); - //return self.finishAir(inst, result, .{ ty_op.operand, .none, .none }); + + const src_ty = self.air.typeOf(ty_op.operand); + const src_bits = @intCast(u32, src_ty.bitSize(self.target.*)); + const src_signedness = + if (src_ty.isAbiInt()) src_ty.intInfo(self.target.*).signedness else .unsigned; + const dst_ty = self.air.typeOfIndex(inst); + + const src_size = std.math.divCeil(u32, @max(switch (src_signedness) { + .signed => src_bits, + .unsigned => src_bits + 1, + }, 32), 8) catch unreachable; + if (src_size > 8) return self.fail("TODO implement airIntToFloat from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }); + + const src_mcv = try self.resolveInst(ty_op.operand); + const src_reg = switch (src_mcv) { + .register => |reg| reg, + else => try self.copyToTmpRegister(src_ty, src_mcv), + }; + const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); + defer self.register_manager.unlockReg(src_lock); + + if (src_bits < src_size * 8) try self.truncateRegister(src_ty, src_reg); + + const dst_reg = try self.register_manager.allocReg(inst, try self.regClassForType(dst_ty)); + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + + try self.asmRegisterRegister(switch (dst_ty.floatBits(self.target.*)) { + 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) + .cvtsi2ss + else + return self.fail("TODO implement airIntToFloat from {} to {} without sse", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }), + 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) + .cvtsi2sd + else + return self.fail("TODO implement airIntToFloat from {} to {} without sse2", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }), + else => return self.fail("TODO implement airIntToFloat from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), + dst_ty.fmt(self.bin_file.options.module.?), + }), + }, dst_reg.to128(), registerAlias(src_reg, src_size)); + + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { @@ -8717,6 +8965,7 @@ fn resolveCallingConventionValues( }, .C => { var param_reg_i: usize = 0; + var param_sse_reg_i: usize = 0; result.stack_align = 16; switch (self.target.os.tag) { @@ -8734,26 +8983,39 @@ fn resolveCallingConventionValues( // TODO: is this even possible for C calling convention? result.return_value = InstTracking.init(.none); } else { - const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0]; - const ret_ty_size = @intCast(u31, ret_ty.abiSize(self.target.*)); - if (ret_ty_size <= 8) { - const aliased_reg = registerAlias(ret_reg, ret_ty_size); - result.return_value = .{ .short = .{ .register = aliased_reg }, .long = .none }; - } else { - const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i]; - param_reg_i += 1; - result.return_value = .{ - .short = .{ .indirect = .{ .reg = ret_reg } }, - .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, - }; + const classes = switch (self.target.os.tag) { + .windows => &[1]abi.Class{abi.classifyWindows(ret_ty, self.target.*)}, + else => mem.sliceTo(&abi.classifySystemV(ret_ty, self.target.*, .ret), .none), + }; + if (classes.len > 1) { + return self.fail("TODO handle multiple classes per type", .{}); } + const ret_reg = abi.getCAbiIntReturnRegs(self.target.*)[0]; + result.return_value = switch (classes[0]) { + .integer => InstTracking.init(.{ .register = registerAlias( + ret_reg, + @intCast(u32, ret_ty.abiSize(self.target.*)), + ) }), + .float, .sse => InstTracking.init(.{ .register = .xmm0 }), + .memory => ret: { + const ret_indirect_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i]; + param_reg_i += 1; + break :ret .{ + .short = .{ .indirect = .{ .reg = ret_reg } }, + .long = .{ .indirect = .{ .reg = ret_indirect_reg } }, + }; + }, + else => |class| return self.fail("TODO handle calling convention class {s}", .{ + @tagName(class), + }), + }; } // Input params for (param_types, result.args) |ty, *arg| { assert(ty.hasRuntimeBitsIgnoreComptime()); - const classes: []const abi.Class = switch (self.target.os.tag) { + const classes = switch (self.target.os.tag) { .windows => &[1]abi.Class{abi.classifyWindows(ty, self.target.*)}, else => mem.sliceTo(&abi.classifySystemV(ty, self.target.*, .arg), .none), }; @@ -8761,13 +9023,29 @@ fn resolveCallingConventionValues( return self.fail("TODO handle multiple classes per type", .{}); } switch (classes[0]) { - .integer => blk: { - if (param_reg_i >= abi.getCAbiIntParamRegs(self.target.*).len) break :blk; - const param_reg = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i]; + .integer => if (param_reg_i < abi.getCAbiIntParamRegs(self.target.*).len) { + arg.* = .{ .register = abi.getCAbiIntParamRegs(self.target.*)[param_reg_i] }; param_reg_i += 1; - arg.* = .{ .register = param_reg }; continue; }, + .float, .sse => switch (self.target.os.tag) { + .windows => if (param_reg_i < 4) { + arg.* = .{ .register = @intToEnum( + Register, + @enumToInt(Register.xmm0) + param_reg_i, + ) }; + param_reg_i += 1; + continue; + }, + else => if (param_sse_reg_i < 8) { + arg.* = .{ .register = @intToEnum( + Register, + @enumToInt(Register.xmm0) + param_sse_reg_i, + ) }; + param_sse_reg_i += 1; + continue; + }, + }, .memory => {}, // fallthrough else => |class| return self.fail("TODO handle calling convention class {s}", .{ @tagName(class), diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index a977af7842..5cb7f7a2d9 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -58,7 +58,7 @@ pub fn findByMnemonic( next: for (mnemonic_to_encodings_map[@enumToInt(mnemonic)]) |data| { switch (data.mode) { .rex => if (!rex_required) continue, - .long, .sse2_long => {}, + .long, .sse_long, .sse2_long => {}, else => if (rex_required) continue, } for (input_ops, data.ops) |input_op, data_op| @@ -90,7 +90,7 @@ pub fn findByOpcode(opc: []const u8, prefixes: struct { if (prefixes.rex.w) { switch (data.mode) { .short, .fpu, .sse, .sse2, .sse4_1, .none => continue, - .long, .sse2_long, .rex => {}, + .long, .sse_long, .sse2_long, .rex => {}, } } else if (prefixes.rex.present and !prefixes.rex.isSet()) { switch (data.mode) { @@ -138,7 +138,7 @@ pub fn modRmExt(encoding: Encoding) u3 { pub fn operandBitSize(encoding: Encoding) u64 { switch (encoding.data.mode) { .short => return 16, - .long, .sse2_long => return 64, + .long, .sse_long, .sse2_long => return 64, else => {}, } const bit_size: u64 = switch (encoding.data.op_en) { @@ -163,7 +163,7 @@ pub fn format( _ = options; _ = fmt; switch (encoding.data.mode) { - .long, .sse2_long => try writer.writeAll("REX.W + "), + .long, .sse_long, .sse2_long => try writer.writeAll("REX.W + "), else => {}, } @@ -269,21 +269,25 @@ pub const Mnemonic = enum { // SSE addss, cmpss, + cvtsi2ss, divss, maxss, minss, movss, mulss, subss, ucomiss, + xorps, // SSE2 addsd, //cmpsd, + cvtsd2ss, cvtsi2sd, cvtss2sd, divsd, maxsd, minsd, movq, //movd, movsd, mulsd, subsd, ucomisd, + xorpd, // SSE4.1 roundss, roundsd, @@ -318,7 +322,7 @@ pub const Op = enum { m, moffs, sreg, - xmm, xmm_m32, xmm_m64, + xmm, xmm_m32, xmm_m64, xmm_m128, // zig fmt: on pub fn fromOperand(operand: Instruction.Operand) Op { @@ -400,7 +404,7 @@ pub const Op = enum { .imm32, .imm32s, .eax, .r32, .m32, .rm32, .rel32, .xmm_m32 => 32, .imm64, .rax, .r64, .m64, .rm64, .xmm_m64 => 64, .m80 => 80, - .m128, .xmm => 128, + .m128, .xmm, .xmm_m128 => 128, }; } @@ -423,8 +427,8 @@ pub const Op = enum { .al, .ax, .eax, .rax, .r8, .r16, .r32, .r64, .rm8, .rm16, .rm32, .rm64, - .xmm, .xmm_m32, .xmm_m64, - => true, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128, + => true, else => false, }; // zig fmt: on @@ -449,7 +453,7 @@ pub const Op = enum { .rm8, .rm16, .rm32, .rm64, .m8, .m16, .m32, .m64, .m80, .m128, .m, - .xmm_m32, .xmm_m64, + .xmm_m32, .xmm_m64, .xmm_m128, => true, else => false, }; @@ -470,13 +474,13 @@ pub const Op = enum { .r8, .r16, .r32, .r64 => .general_purpose, .rm8, .rm16, .rm32, .rm64 => .general_purpose, .sreg => .segment, - .xmm, .xmm_m32, .xmm_m64 => .floating_point, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point, }; } pub fn isFloatingPointRegister(op: Op) bool { return switch (op) { - .xmm, .xmm_m32, .xmm_m64 => true, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => true, else => false, }; } @@ -535,6 +539,7 @@ pub const Mode = enum { rex, long, sse, + sse_long, sse2, sse2_long, sse4_1, diff --git a/src/arch/x86_64/Lower.zig b/src/arch/x86_64/Lower.zig index af0146c6e1..a961100687 100644 --- a/src/arch/x86_64/Lower.zig +++ b/src/arch/x86_64/Lower.zig @@ -95,6 +95,7 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .addss, .cmpss, + .cvtsi2ss, .divss, .maxss, .minss, @@ -103,8 +104,12 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .roundss, .subss, .ucomiss, + .xorps, .addsd, .cmpsd, + .cvtsd2ss, + .cvtsi2sd, + .cvtss2sd, .divsd, .maxsd, .minsd, @@ -113,6 +118,7 @@ pub fn lowerMir(lower: *Lower, inst: Mir.Inst) Error![]const Instruction { .roundsd, .subsd, .ucomisd, + .xorpd, => try lower.mirGeneric(inst), .cmps, diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index c8703373d2..c14338b13d 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -170,6 +170,8 @@ pub const Inst = struct { addss, /// Compare scalar single-precision floating-point values cmpss, + /// Convert doubleword integer to scalar single-precision floating-point value + cvtsi2ss, /// Divide scalar single-precision floating-point values divss, /// Return maximum single-precision floating-point value @@ -186,10 +188,18 @@ pub const Inst = struct { subss, /// Unordered compare scalar single-precision floating-point values ucomiss, + /// Bitwise logical xor of packed single precision floating-point values + xorps, /// Add double precision floating point values addsd, /// Compare scalar double-precision floating-point values cmpsd, + /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value + cvtsd2ss, + /// Convert doubleword integer to scalar double-precision floating-point value + cvtsi2sd, + /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value + cvtss2sd, /// Divide scalar double-precision floating-point values divsd, /// Return maximum double-precision floating-point value @@ -206,6 +216,8 @@ pub const Inst = struct { subsd, /// Unordered compare scalar double-precision floating-point values ucomisd, + /// Bitwise logical xor of packed double precision floating-point values + xorpd, /// Compare string operands cmps, diff --git a/src/arch/x86_64/encoder.zig b/src/arch/x86_64/encoder.zig index 329dfca924..4c900697f5 100644 --- a/src/arch/x86_64/encoder.zig +++ b/src/arch/x86_64/encoder.zig @@ -323,7 +323,7 @@ pub const Instruction = struct { var rex = Rex{}; rex.present = inst.encoding.data.mode == .rex; switch (inst.encoding.data.mode) { - .long, .sse2_long => rex.w = true, + .long, .sse_long, .sse2_long => rex.w = true, else => {}, } diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index 333bdceea8..ac427c3633 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -834,6 +834,9 @@ pub const table = [_]Entry{ .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .sse }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .sse_long }, + .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .sse }, .{ .maxss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5f }, 0, .sse }, @@ -849,11 +852,20 @@ pub const table = [_]Entry{ .{ .ucomiss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0x0f, 0x2e }, 0, .sse }, + .{ .xorps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x57 }, 0, .sse }, + // SSE2 .{ .addsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x58 }, 0, .sse2 }, .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .sse2 }, + .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .sse2 }, + + .{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .sse2 }, + .{ .cvtsi2sd, .rm, &.{ .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .sse2_long }, + + .{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .sse2 }, + .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .sse2 }, .{ .maxsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5f }, 0, .sse2 }, @@ -878,6 +890,8 @@ pub const table = [_]Entry{ .{ .ucomisd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x66, 0x0f, 0x2e }, 0, .sse2 }, + .{ .xorpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x57 }, 0, .sse2 }, + // SSE4.1 .{ .roundss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0a }, 0, .sse4_1 }, .{ .roundsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0x66, 0x0f, 0x3a, 0x0b }, 0, .sse4_1 }, diff --git a/src/codegen.zig b/src/codegen.zig index bf80a90cc3..0043b38a5b 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -291,6 +291,20 @@ pub fn generateSymbol( }, }, .Pointer => switch (typed_value.val.tag()) { + .null_value => { + switch (target.cpu.arch.ptrBitWidth()) { + 32 => { + mem.writeInt(u32, try code.addManyAsArray(4), 0, endian); + if (typed_value.ty.isSlice()) try code.appendNTimes(0xaa, 4); + }, + 64 => { + mem.writeInt(u64, try code.addManyAsArray(8), 0, endian); + if (typed_value.ty.isSlice()) try code.appendNTimes(0xaa, 8); + }, + else => unreachable, + } + return Result.ok; + }, .zero, .one, .int_u64, .int_big_positive => { switch (target.cpu.arch.ptrBitWidth()) { 32 => { @@ -397,30 +411,15 @@ pub fn generateSymbol( }, } }, - .elem_ptr => { - const elem_ptr = typed_value.val.castTag(.elem_ptr).?.data; - const elem_size = typed_value.ty.childType().abiSize(target); - const addend = @intCast(u32, elem_ptr.index * elem_size); - const array_ptr = elem_ptr.array_ptr; - - switch (array_ptr.tag()) { - .decl_ref => { - const decl_index = array_ptr.castTag(.decl_ref).?.data; - return lowerDeclRef(bin_file, src_loc, typed_value, decl_index, code, debug_output, .{ - .parent_atom_index = reloc_info.parent_atom_index, - .addend = (reloc_info.addend orelse 0) + addend, - }); - }, - else => return Result{ - .fail = try ErrorMsg.create( - bin_file.allocator, - src_loc, - "TODO implement generateSymbol for pointer type value: '{s}'", - .{@tagName(typed_value.val.tag())}, - ), - }, - } - }, + .elem_ptr => return lowerParentPtr( + bin_file, + src_loc, + typed_value, + typed_value.val, + code, + debug_output, + reloc_info, + ), else => return Result{ .fail = try ErrorMsg.create( bin_file.allocator, @@ -838,9 +837,62 @@ pub fn generateSymbol( } } +fn lowerParentPtr( + bin_file: *link.File, + src_loc: Module.SrcLoc, + typed_value: TypedValue, + parent_ptr: Value, + code: *std.ArrayList(u8), + debug_output: DebugInfoOutput, + reloc_info: RelocInfo, +) CodeGenError!Result { + const target = bin_file.options.target; + + switch (parent_ptr.tag()) { + .elem_ptr => { + const elem_ptr = parent_ptr.castTag(.elem_ptr).?.data; + return lowerParentPtr( + bin_file, + src_loc, + typed_value, + elem_ptr.array_ptr, + code, + debug_output, + reloc_info.offset(@intCast(u32, elem_ptr.index * elem_ptr.elem_ty.abiSize(target))), + ); + }, + .decl_ref => { + const decl_index = parent_ptr.castTag(.decl_ref).?.data; + return lowerDeclRef( + bin_file, + src_loc, + typed_value, + decl_index, + code, + debug_output, + reloc_info, + ); + }, + else => |t| { + return Result{ + .fail = try ErrorMsg.create( + bin_file.allocator, + src_loc, + "TODO implement lowerParentPtr for type '{s}'", + .{@tagName(t)}, + ), + }; + }, + } +} + const RelocInfo = struct { parent_atom_index: u32, addend: ?u32 = null, + + fn offset(ri: RelocInfo, addend: u32) RelocInfo { + return .{ .parent_atom_index = ri.parent_atom_index, .addend = (ri.addend orelse 0) + addend }; + } }; fn lowerDeclRef( @@ -1095,6 +1147,9 @@ pub fn genTypedValue( .Slice => {}, else => { switch (typed_value.val.tag()) { + .null_value => { + return GenResult.mcv(.{ .immediate = 0 }); + }, .int_u64 => { return GenResult.mcv(.{ .immediate = typed_value.val.toUnsignedInt(target) }); }, |
