diff options
| author | Jacob Young <jacobly0@users.noreply.github.com> | 2023-05-13 18:06:16 -0400 |
|---|---|---|
| committer | Jacob Young <jacobly0@users.noreply.github.com> | 2023-05-15 03:07:51 -0400 |
| commit | b6d61028508c5b1e1961a124bc17d4d9bda9686f (patch) | |
| tree | 9b50fbfca9bf08638a44635d7da534284afb684d /src | |
| parent | e08eab664861461b0adbe7984881f72b5a36a979 (diff) | |
| download | zig-b6d61028508c5b1e1961a124bc17d4d9bda9686f.tar.gz zig-b6d61028508c5b1e1961a124bc17d4d9bda9686f.zip | |
x86_64: reimplement `@floatToInt`
Diffstat (limited to 'src')
| -rw-r--r-- | src/arch/x86_64/CodeGen.zig | 181 | ||||
| -rw-r--r-- | src/arch/x86_64/Encoding.zig | 159 | ||||
| -rw-r--r-- | src/arch/x86_64/Mir.zig | 46 | ||||
| -rw-r--r-- | src/arch/x86_64/bits.zig | 83 | ||||
| -rw-r--r-- | src/arch/x86_64/encodings.zig | 111 | ||||
| -rw-r--r-- | src/link/Dwarf.zig | 92 |
6 files changed, 420 insertions, 252 deletions
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index e4f28e34cf..e5c6925596 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -2501,12 +2501,12 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { } } else if (src_bits == 64 and dst_bits == 32) { if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .{ .v_, .cvtsd2ss }, + .{ .v_ss, .cvtsd2 }, dst_reg, dst_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegisterRegister( - .{ .v_, .cvtsd2ss }, + .{ .v_ss, .cvtsd2 }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -2514,11 +2514,11 @@ fn airFptrunc(self: *Self, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(src_ty, src_mcv)).to128(), ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._, .cvtsd2ss }, + .{ ._ss, .cvtsd2 }, dst_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegister( - .{ ._, .cvtsd2ss }, + .{ ._ss, .cvtsd2 }, dst_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -2552,22 +2552,22 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { src_mcv.getReg().? else try self.copyToTmpRegister(src_ty, src_mcv); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128()); switch (dst_bits) { 32 => {}, - 64 => try self.asmRegisterRegisterRegister(.{ .v_, .cvtss2sd }, dst_reg, dst_reg, dst_reg), + 64 => try self.asmRegisterRegisterRegister(.{ .v_sd, .cvtss2 }, dst_reg, dst_reg, dst_reg), else => return self.fail("TODO implement airFpext from {} to {}", .{ src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), } } else if (src_bits == 32 and dst_bits == 64) { if (self.hasFeature(.avx)) if (src_mcv.isMemory()) try self.asmRegisterRegisterMemory( - .{ .v_, .cvtss2sd }, + .{ .v_sd, .cvtss2 }, dst_reg, dst_reg, src_mcv.mem(.dword), ) else try self.asmRegisterRegisterRegister( - .{ .v_, .cvtss2sd }, + .{ .v_sd, .cvtss2 }, dst_reg, dst_reg, (if (src_mcv.isRegister()) @@ -2575,11 +2575,11 @@ fn airFpext(self: *Self, inst: Air.Inst.Index) !void { else try self.copyToTmpRegister(src_ty, src_mcv)).to128(), ) else if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ ._, .cvtss2sd }, + .{ ._sd, .cvtss2 }, dst_reg, src_mcv.mem(.dword), ) else try self.asmRegisterRegister( - .{ ._, .cvtss2sd }, + .{ ._sd, .cvtss2 }, dst_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -4789,7 +4789,6 @@ fn genRound(self: *Self, ty: Type, dst_reg: Register, src_mcv: MCValue, mode: u4 })) |tag| tag else return self.fail("TODO implement genRound for {}", .{ ty.fmt(self.bin_file.options.module.?), }); - const abi_size = @intCast(u32, ty.abiSize(self.target.*)); const dst_alias = registerAlias(dst_reg, abi_size); switch (mir_tag[0]) { @@ -4848,7 +4847,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { src_mcv.getReg().? else try self.copyToTmpRegister(ty, src_mcv); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, mat_src_reg.to128()); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, mat_src_reg.to128()); try self.asmRegisterRegisterRegister(.{ .v_ss, .sqrt }, dst_reg, dst_reg, dst_reg); try self.asmRegisterRegisterImmediate( .{ .v_, .cvtps2ph }, @@ -4868,7 +4867,7 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { 16 => if (self.hasFeature(.f16c)) switch (ty.vectorLen()) { 1 => { try self.asmRegisterRegister( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, dst_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -4892,13 +4891,13 @@ fn airSqrt(self: *Self, inst: Air.Inst.Index) !void { 2...8 => { const wide_reg = registerAlias(dst_reg, abi_size * 2); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, wide_reg, src_mcv.mem(Memory.PtrSize.fromSize( @intCast(u32, @divExact(wide_reg.bitSize(), 16)), )), ) else try self.asmRegisterRegister( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, wide_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -6347,7 +6346,7 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); try self.asmRegisterRegisterRegister( switch (air_tag) { @@ -6424,7 +6423,7 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp_reg, dst_reg); try self.asmRegisterRegisterRegister( switch (air_tag) { @@ -6467,7 +6466,7 @@ fn genBinOp( else try self.copyToTmpRegister(rhs_ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); try self.asmRegisterRegisterRegister( .{ .v_ps, .movhl }, tmp_reg, @@ -6501,13 +6500,13 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg, dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg, dst_reg); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, tmp_reg, src_mcv.mem(.qword), ) else try self.asmRegisterRegister( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, tmp_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -6541,13 +6540,13 @@ fn genBinOp( const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg); defer self.register_manager.unlockReg(tmp_lock); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, dst_reg.to256(), dst_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, dst_reg.to256(), dst_reg); if (src_mcv.isMemory()) try self.asmRegisterMemory( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, tmp_reg, src_mcv.mem(.xword), ) else try self.asmRegisterRegister( - .{ .v_, .cvtph2ps }, + .{ .v_ps, .cvtph2 }, tmp_reg, (if (src_mcv.isRegister()) src_mcv.getReg().? @@ -7199,13 +7198,13 @@ fn genArgDbgInfo(self: Self, ty: Type, name: [:0]const u8, mcv: MCValue) !void { switch (self.debug_output) { .dwarf => |dw| { const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) { - .register => |reg| .{ .register = reg.dwarfLocOp() }, + .register => |reg| .{ .register = reg.dwarfNum() }, // TODO use a frame index .load_frame => return, //.stack_offset => |off| .{ // .stack = .{ // // TODO handle -fomit-frame-pointer - // .fp_register = Register.rbp.dwarfLocOpDeref(), + // .fp_register = Register.rbp.dwarfNum(), // .offset = -off, // }, //}, @@ -7237,11 +7236,11 @@ fn genVarDbgInfo( switch (self.debug_output) { .dwarf => |dw| { const loc: link.File.Dwarf.DeclState.DbgInfoLoc = switch (mcv) { - .register => |reg| .{ .register = reg.dwarfLocOp() }, + .register => |reg| .{ .register = reg.dwarfNum() }, // TODO use a frame index .load_frame, .lea_frame => return, //=> |off| .{ .stack = .{ - // .fp_register = Register.rbp.dwarfLocOpDeref(), + // .fp_register = Register.rbp.dwarfNum(), // .offset = -off, //} }, .memory => |address| .{ .memory = address }, @@ -7595,7 +7594,7 @@ fn airCmp(self: *Self, inst: Air.Inst.Index, op: math.CompareOperator) !void { else try self.copyToTmpRegister(ty, src_mcv)).to128(), ); - try self.asmRegisterRegister(.{ .v_, .cvtph2ps }, tmp1_reg, tmp1_reg); + try self.asmRegisterRegister(.{ .v_ps, .cvtph2 }, tmp1_reg, tmp1_reg); try self.asmRegisterRegister(.{ .v_, .movshdup }, tmp2_reg, tmp1_reg); try self.genBinOpMir(.{ ._ss, .ucomi }, ty, tmp1_mcv, tmp2_mcv); } else return self.fail("TODO implement airCmp for {}", .{ @@ -8862,14 +8861,14 @@ fn genSetReg(self: *Self, dst_reg: Register, ty: Type, src_mcv: MCValue) InnerEr } }, .register => |src_reg| if (dst_reg.id() != src_reg.id()) try self.asmRegisterRegister( - if ((dst_reg.class() == .floating_point) == (src_reg.class() == .floating_point)) + if ((dst_reg.class() == .sse) == (src_reg.class() == .sse)) switch (ty.zigTypeTag()) { else => .{ ._, .mov }, .Float, .Vector => .{ ._ps, .mova }, } else switch (abi_size) { 2 => return try self.asmRegisterRegisterImmediate( - if (dst_reg.class() == .floating_point) .{ .p_w, .insr } else .{ .p_w, .extr }, + if (dst_reg.class() == .sse) .{ .p_w, .insr } else .{ .p_w, .extr }, registerAlias(dst_reg, 4), registerAlias(src_reg, 4), Immediate.u(0), @@ -9222,7 +9221,7 @@ fn genInlineMemcpyRegisterRegister( try self.asmMemoryRegister( switch (src_reg.class()) { .general_purpose, .segment => .{ ._, .mov }, - .floating_point => .{ ._ss, .mov }, + .sse => .{ ._ss, .mov }, }, Memory.sib(Memory.PtrSize.fromSize(abi_size), .{ .base = dst_reg, .disp = -offset }), registerAlias(src_reg, abi_size), @@ -9388,10 +9387,10 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { }); const src_mcv = try self.resolveInst(ty_op.operand); - const src_reg = switch (src_mcv) { - .register => |reg| reg, - else => try self.copyToTmpRegister(src_ty, src_mcv), - }; + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); defer self.register_manager.unlockReg(src_lock); @@ -9402,23 +9401,23 @@ fn airIntToFloat(self: *Self, inst: Air.Inst.Index) !void { const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); defer self.register_manager.unlockReg(dst_lock); - try self.asmRegisterRegister(switch (dst_ty.floatBits(self.target.*)) { - 32 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse)) - .{ ._, .cvtsi2ss } - else - return self.fail("TODO implement airIntToFloat from {} to {} without sse", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }), - 64 => if (Target.x86.featureSetHas(self.target.cpu.features, .sse2)) - .{ ._, .cvtsi2sd } - else - return self.fail("TODO implement airIntToFloat from {} to {} without sse2", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }), - else => return self.fail("TODO implement airIntToFloat from {} to {}", .{ - src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), - }), - }, dst_reg.to128(), registerAlias(src_reg, src_size)); + const mir_tag = if (@as(?Mir.Inst.FixedTag, switch (dst_ty.zigTypeTag()) { + .Float => switch (dst_ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .{ .v_ss, .cvtsi2 } else .{ ._ss, .cvtsi2 }, + 64 => if (self.hasFeature(.avx)) .{ .v_sd, .cvtsi2 } else .{ ._sd, .cvtsi2 }, + 16, 80, 128 => null, + else => unreachable, + }, + else => null, + })) |tag| tag else return self.fail("TODO implement airIntToFloat from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); + const dst_alias = dst_reg.to128(); + const src_alias = registerAlias(src_reg, src_size); + switch (mir_tag[0]) { + .v_ss, .v_sd => try self.asmRegisterRegisterRegister(mir_tag, dst_alias, dst_alias, src_alias), + else => try self.asmRegisterRegister(mir_tag, dst_alias, src_alias), + } return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } @@ -9428,46 +9427,50 @@ fn airFloatToInt(self: *Self, inst: Air.Inst.Index) !void { const src_ty = self.air.typeOf(ty_op.operand); const dst_ty = self.air.typeOfIndex(inst); - const operand = try self.resolveInst(ty_op.operand); - const src_abi_size = @intCast(u32, src_ty.abiSize(self.target.*)); - const dst_abi_size = @intCast(u32, dst_ty.abiSize(self.target.*)); + const dst_bits = @intCast(u32, dst_ty.bitSize(self.target.*)); + const dst_signedness = + if (dst_ty.isAbiInt()) dst_ty.intInfo(self.target.*).signedness else .unsigned; - switch (src_abi_size) { - 4, 8 => {}, - else => |size| return self.fail("TODO load ST(0) with abiSize={}", .{size}), - } - if (dst_abi_size > 8) { - return self.fail("TODO convert float with abiSize={}", .{dst_abi_size}); - } + const dst_size = std.math.divCeil(u32, @max(switch (dst_signedness) { + .signed => dst_bits, + .unsigned => dst_bits + 1, + }, 32), 8) catch unreachable; + if (dst_size > 8) return self.fail("TODO implement airFloatToInt from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), + }); - // move float src to ST(0) - const frame_addr: FrameAddr = switch (operand) { - .load_frame => |frame_addr| frame_addr, - else => frame_addr: { - const frame_index = try self.allocFrameIndex(FrameAlloc.initType(src_ty, self.target.*)); - try self.genSetMem(.{ .frame = frame_index }, 0, src_ty, operand); - break :frame_addr .{ .index = frame_index }; - }, - }; - try self.asmMemory( - .{ .f_, .ld }, - Memory.sib(Memory.PtrSize.fromSize(src_abi_size), .{ - .base = .{ .frame = frame_addr.index }, - .disp = frame_addr.off, - }), - ); + const src_mcv = try self.resolveInst(ty_op.operand); + const src_reg = if (src_mcv.isRegister()) + src_mcv.getReg().? + else + try self.copyToTmpRegister(src_ty, src_mcv); + const src_lock = self.register_manager.lockRegAssumeUnused(src_reg); + defer self.register_manager.unlockReg(src_lock); - // convert - const stack_dst = try self.allocRegOrMem(inst, false); - try self.asmMemory( - .{ .f_p, .istt }, - Memory.sib(Memory.PtrSize.fromSize(dst_abi_size), .{ - .base = .{ .frame = stack_dst.load_frame.index }, - .disp = stack_dst.load_frame.off, + const dst_reg = try self.register_manager.allocReg(inst, regClassForType(dst_ty)); + const dst_mcv = MCValue{ .register = dst_reg }; + const dst_lock = self.register_manager.lockRegAssumeUnused(dst_reg); + defer self.register_manager.unlockReg(dst_lock); + + try self.asmRegisterRegister( + if (@as(?Mir.Inst.FixedTag, switch (src_ty.zigTypeTag()) { + .Float => switch (src_ty.floatBits(self.target.*)) { + 32 => if (self.hasFeature(.avx)) .{ .v_, .cvttss2si } else .{ ._, .cvttss2si }, + 64 => if (self.hasFeature(.avx)) .{ .v_, .cvttsd2si } else .{ ._, .cvttsd2si }, + 16, 80, 128 => null, + else => unreachable, + }, + else => null, + })) |tag| tag else return self.fail("TODO implement airFloatToInt from {} to {}", .{ + src_ty.fmt(self.bin_file.options.module.?), dst_ty.fmt(self.bin_file.options.module.?), }), + registerAlias(dst_reg, dst_size), + src_reg.to128(), ); - return self.finishAir(inst, stack_dst, .{ ty_op.operand, .none, .none }); + if (dst_bits < dst_size * 8) try self.truncateRegister(dst_ty, dst_reg); + + return self.finishAir(inst, dst_mcv, .{ ty_op.operand, .none, .none }); } fn airCmpxchg(self: *Self, inst: Air.Inst.Index) !void { @@ -10997,13 +11000,13 @@ fn registerAlias(reg: Register, size_bytes: u32) Register { reg.to64() else unreachable, - .floating_point => if (size_bytes <= 16) + .segment, .x87, .mmx => unreachable, + .sse => if (size_bytes <= 16) reg.to128() else if (size_bytes <= 32) reg.to256() else unreachable, - .segment => unreachable, }; } diff --git a/src/arch/x86_64/Encoding.zig b/src/arch/x86_64/Encoding.zig index 537a03fa2a..66a249a3f2 100644 --- a/src/arch/x86_64/Encoding.zig +++ b/src/arch/x86_64/Encoding.zig @@ -233,7 +233,6 @@ pub const Mnemonic = enum { cmpxchg, cmpxchg8b, cmpxchg16b, cqo, cwd, cwde, div, - fisttp, fld, idiv, imul, int3, ja, jae, jb, jbe, jc, jrcxz, je, jg, jge, jl, jle, jna, jnae, jnb, jnbe, jnc, jne, jng, jnge, jnl, jnle, jno, jnp, jns, jnz, jo, jp, jpe, jpo, js, jz, @@ -259,6 +258,8 @@ pub const Mnemonic = enum { @"test", tzcnt, ud2, xadd, xchg, xor, + // X87 + fisttp, fld, // MMX movd, // SSE @@ -266,7 +267,7 @@ pub const Mnemonic = enum { andps, andnps, cmpss, - cvtsi2ss, + cvtpi2ps, cvtps2pi, cvtsi2ss, cvtss2si, cvttps2pi, cvttss2si, divps, divss, maxps, maxss, minps, minss, @@ -285,7 +286,9 @@ pub const Mnemonic = enum { andpd, andnpd, //cmpsd, - cvtsd2ss, cvtsi2sd, cvtss2sd, + cvtdq2pd, cvtdq2ps, cvtpd2dq, cvtpd2pi, cvtpd2ps, cvtpi2pd, + cvtps2dq, cvtps2pd, cvtsd2si, cvtsd2ss, cvtsi2sd, cvtss2sd, + cvttpd2dq, cvttpd2pi, cvttps2dq, cvttsd2si, divpd, divsd, maxpd, maxsd, minpd, minsd, @@ -314,7 +317,10 @@ pub const Mnemonic = enum { // AVX vaddpd, vaddps, vaddsd, vaddss, vbroadcastf128, vbroadcastsd, vbroadcastss, - vcvtsd2ss, vcvtsi2sd, vcvtsi2ss, vcvtss2sd, + vcvtdq2pd, vcvtdq2ps, vcvtpd2dq, vcvtpd2ps, + vcvtps2dq, vcvtps2pd, vcvtsd2si, vcvtsd2ss, + vcvtsi2sd, vcvtsi2ss, vcvtss2sd, vcvtss2si, + vcvttpd2dq, vcvttps2dq, vcvttsd2si, vcvttss2si, vdivpd, vdivps, vdivsd, vdivss, vextractf128, vextractps, vinsertf128, vinsertps, @@ -377,80 +383,84 @@ pub const Op = enum { m, moffs, sreg, + st, mm, mm_m64, xmm, xmm_m32, xmm_m64, xmm_m128, ymm, ymm_m256, // zig fmt: on pub fn fromOperand(operand: Instruction.Operand) Op { - switch (operand) { - .none => return .none, - - .reg => |reg| { - switch (reg.class()) { - .segment => return .sreg, - .floating_point => return switch (reg.bitSize()) { - 128 => .xmm, - 256 => .ymm, + return switch (operand) { + .none => .none, + + .reg => |reg| switch (reg.class()) { + .general_purpose => if (reg.to64() == .rax) + switch (reg) { + .al => .al, + .ax => .ax, + .eax => .eax, + .rax => .rax, else => unreachable, - }, - .general_purpose => { - if (reg.to64() == .rax) return switch (reg) { - .al => .al, - .ax => .ax, - .eax => .eax, - .rax => .rax, - else => unreachable, - }; - if (reg == .cl) return .cl; - return switch (reg.bitSize()) { - 8 => .r8, - 16 => .r16, - 32 => .r32, - 64 => .r64, - else => unreachable, - }; - }, - } + } + else if (reg == .cl) + .cl + else switch (reg.bitSize()) { + 8 => .r8, + 16 => .r16, + 32 => .r32, + 64 => .r64, + else => unreachable, + }, + .segment => .sreg, + .x87 => .st, + .mmx => .mm, + .sse => switch (reg.bitSize()) { + 128 => .xmm, + 256 => .ymm, + else => unreachable, + }, }, .mem => |mem| switch (mem) { - .moffs => return .moffs, - .sib, .rip => { - const bit_size = mem.bitSize(); - return switch (bit_size) { - 8 => .m8, - 16 => .m16, - 32 => .m32, - 64 => .m64, - 80 => .m80, - 128 => .m128, - 256 => .m256, - else => unreachable, - }; + .moffs => .moffs, + .sib, .rip => switch (mem.bitSize()) { + 8 => .m8, + 16 => .m16, + 32 => .m32, + 64 => .m64, + 80 => .m80, + 128 => .m128, + 256 => .m256, + else => unreachable, }, }, - .imm => |imm| { - switch (imm) { - .signed => |x| { - if (x == 1) return .unity; - if (math.cast(i8, x)) |_| return .imm8s; - if (math.cast(i16, x)) |_| return .imm16s; - return .imm32s; - }, - .unsigned => |x| { - if (x == 1) return .unity; - if (math.cast(i8, x)) |_| return .imm8s; - if (math.cast(u8, x)) |_| return .imm8; - if (math.cast(i16, x)) |_| return .imm16s; - if (math.cast(u16, x)) |_| return .imm16; - if (math.cast(i32, x)) |_| return .imm32s; - if (math.cast(u32, x)) |_| return .imm32; - return .imm64; - }, - } + .imm => |imm| switch (imm) { + .signed => |x| if (x == 1) + .unity + else if (math.cast(i8, x)) |_| + .imm8s + else if (math.cast(i16, x)) |_| + .imm16s + else + .imm32s, + .unsigned => |x| if (x == 1) + .unity + else if (math.cast(i8, x)) |_| + .imm8s + else if (math.cast(u8, x)) |_| + .imm8 + else if (math.cast(i16, x)) |_| + .imm16s + else if (math.cast(u16, x)) |_| + .imm16 + else if (math.cast(i32, x)) |_| + .imm32s + else if (math.cast(u32, x)) |_| + .imm32 + else + .imm64, }, - } + }; } pub fn immBitSize(op: Op) u64 { @@ -460,6 +470,7 @@ pub const Op = enum { .ax, .r16, .rm16 => unreachable, .eax, .r32, .rm32, .r32_m16 => unreachable, .rax, .r64, .rm64, .r64_m16 => unreachable, + .st, .mm, .mm_m64 => unreachable, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => unreachable, .ymm, .ymm_m256 => unreachable, .m8, .m16, .m32, .m64, .m80, .m128, .m256 => unreachable, @@ -480,7 +491,8 @@ pub const Op = enum { .al, .cl, .r8, .rm8 => 8, .ax, .r16, .rm16 => 16, .eax, .r32, .rm32, .r32_m8, .r32_m16 => 32, - .rax, .r64, .rm64, .r64_m16 => 64, + .rax, .r64, .rm64, .r64_m16, .mm, .mm_m64 => 64, + .st => 80, .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => 128, .ymm, .ymm_m256 => 256, }; @@ -491,11 +503,11 @@ pub const Op = enum { .none, .o16, .o32, .o64, .moffs, .m, .sreg => unreachable, .unity, .imm8, .imm8s, .imm16, .imm16s, .imm32, .imm32s, .imm64 => unreachable, .rel8, .rel16, .rel32 => unreachable, - .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .xmm, .ymm => unreachable, + .al, .cl, .r8, .ax, .r16, .eax, .r32, .rax, .r64, .st, .mm, .xmm, .ymm => unreachable, .m8, .rm8, .r32_m8 => 8, .m16, .rm16, .r32_m16, .r64_m16 => 16, .m32, .rm32, .xmm_m32 => 32, - .m64, .rm64, .xmm_m64 => 64, + .m64, .rm64, .mm_m64, .xmm_m64 => 64, .m80 => 80, .m128, .xmm_m128 => 128, .m256, .ymm_m256 => 256, @@ -522,6 +534,7 @@ pub const Op = enum { .r8, .r16, .r32, .r64, .rm8, .rm16, .rm32, .rm64, .r32_m8, .r32_m16, .r64_m16, + .st, .mm, .mm_m64, .xmm, .xmm_m32, .xmm_m64, .xmm_m128, .ymm, .ymm_m256, => true, @@ -550,6 +563,7 @@ pub const Op = enum { .r32_m8, .r32_m16, .r64_m16, .m8, .m16, .m32, .m64, .m80, .m128, .m256, .m, + .mm_m64, .xmm_m32, .xmm_m64, .xmm_m128, .ymm_m256, => true, @@ -573,8 +587,10 @@ pub const Op = enum { .rm8, .rm16, .rm32, .rm64 => .general_purpose, .r32_m8, .r32_m16, .r64_m16 => .general_purpose, .sreg => .segment, - .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .floating_point, - .ymm, .ymm_m256 => .floating_point, + .st => .x87, + .mm, .mm_m64 => .mmx, + .xmm, .xmm_m32, .xmm_m64, .xmm_m128 => .sse, + .ymm, .ymm_m256 => .sse, }; } @@ -695,6 +711,7 @@ pub const Feature = enum { f16c, fma, lzcnt, + movbe, popcnt, sse, sse2, @@ -717,7 +734,7 @@ fn estimateInstructionLength(prefix: Prefix, encoding: Encoding, ops: []const Op } const mnemonic_to_encodings_map = init: { - @setEvalBranchQuota(20_000); + @setEvalBranchQuota(25_000); const encodings = @import("encodings.zig"); var entries = encodings.table; std.sort.sort(encodings.Entry, &entries, {}, struct { diff --git a/src/arch/x86_64/Mir.zig b/src/arch/x86_64/Mir.zig index f26bf97e82..ef8bbe07b3 100644 --- a/src/arch/x86_64/Mir.zig +++ b/src/arch/x86_64/Mir.zig @@ -439,8 +439,21 @@ pub const Inst = struct { /// Bitwise logical and not of packed single-precision floating-point values /// Bitwise logical and not of packed double-precision floating-point values andn, + /// Convert packed doubleword integers to packed single-precision floating-point values + /// Convert packed doubleword integers to packed double-precision floating-point values + cvtpi2, + /// Convert packed single-precision floating-point values to packed doubleword integers + cvtps2pi, /// Convert doubleword integer to scalar single-precision floating-point value - cvtsi2ss, + /// Convert doubleword integer to scalar double-precision floating-point value + cvtsi2, + /// Convert scalar single-precision floating-point value to doubleword integer + cvtss2si, + /// Convert with truncation packed single-precision floating-point values to packed doubleword integers + cvttps2pi, + /// Convert with truncation scalar single-precision floating-point value to doubleword integer + cvttss2si, + /// Maximum of packed single-precision floating-point values /// Maximum of scalar single-precision floating-point values /// Maximum of packed double-precision floating-point values @@ -486,12 +499,33 @@ pub const Inst = struct { /// Unpack and interleave low packed double-precision floating-point values unpckl, + /// Convert packed doubleword integers to packed single-precision floating-point values + /// Convert packed doubleword integers to packed double-precision floating-point values + cvtdq2, + /// Convert packed double-precision floating-point values to packed doubleword integers + cvtpd2dq, + /// Convert packed double-precision floating-point values to packed doubleword integers + cvtpd2pi, + /// Convert packed double-precision floating-point values to packed single-precision floating-point values + cvtpd2, + /// Convert packed single-precision floating-point values to packed doubleword integers + cvtps2dq, + /// Convert packed single-precision floating-point values to packed double-precision floating-point values + cvtps2, + /// Convert scalar double-precision floating-point value to doubleword integer + cvtsd2si, /// Convert scalar double-precision floating-point value to scalar single-precision floating-point value - cvtsd2ss, - /// Convert doubleword integer to scalar double-precision floating-point value - cvtsi2sd, + cvtsd2, /// Convert scalar single-precision floating-point value to scalar double-precision floating-point value - cvtss2sd, + cvtss2, + /// Convert with truncation packed double-precision floating-point values to packed doubleword integers + cvttpd2dq, + /// Convert with truncation packed double-precision floating-point values to packed doubleword integers + cvttpd2pi, + /// Convert with truncation packed single-precision floating-point values to packed doubleword integers + cvttps2dq, + /// Convert with truncation scalar double-precision floating-point value to doubleword integer + cvttsd2si, /// Packed interleave shuffle of quadruplets of single-precision floating-point values /// Packed interleave shuffle of pairs of double-precision floating-point values shuf, @@ -542,7 +576,7 @@ pub const Inst = struct { broadcast, /// Convert 16-bit floating-point values to single-precision floating-point values - cvtph2ps, + cvtph2, /// Convert single-precision floating-point values to 16-bit floating-point values cvtps2ph, diff --git a/src/arch/x86_64/bits.zig b/src/arch/x86_64/bits.zig index 3343f280b9..923ba31266 100644 --- a/src/arch/x86_64/bits.zig +++ b/src/arch/x86_64/bits.zig @@ -175,15 +175,21 @@ pub const Register = enum(u7) { xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15, + mm0, mm1, mm2, mm3, mm4, mm5, mm6, mm7, + + st0, st1, st2, st3, st4, st5, st6, st7, + es, cs, ss, ds, fs, gs, none, // zig fmt: on - pub const Class = enum(u2) { + pub const Class = enum { general_purpose, - floating_point, segment, + x87, + mmx, + sse, }; pub fn class(reg: Register) Class { @@ -195,8 +201,10 @@ pub const Register = enum(u7) { @enumToInt(Register.al) ... @enumToInt(Register.r15b) => .general_purpose, @enumToInt(Register.ah) ... @enumToInt(Register.bh) => .general_purpose, - @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => .floating_point, - @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => .floating_point, + @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => .sse, + @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => .sse, + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => .mmx, + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => .x87, @enumToInt(Register.es) ... @enumToInt(Register.gs) => .segment, @@ -216,8 +224,10 @@ pub const Register = enum(u7) { @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => @enumToInt(Register.ymm0) - 16, @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => @enumToInt(Register.xmm0) - 16, + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => @enumToInt(Register.mm0) - 32, + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => @enumToInt(Register.st0) - 40, - @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es) - 32, + @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es) - 48, else => unreachable, // zig fmt: on @@ -236,6 +246,8 @@ pub const Register = enum(u7) { @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => 256, @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => 128, + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => 64, + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => 80, @enumToInt(Register.es) ... @enumToInt(Register.gs) => 16, @@ -271,6 +283,8 @@ pub const Register = enum(u7) { @enumToInt(Register.ymm0) ... @enumToInt(Register.ymm15) => @enumToInt(Register.ymm0), @enumToInt(Register.xmm0) ... @enumToInt(Register.xmm15) => @enumToInt(Register.xmm0), + @enumToInt(Register.mm0) ... @enumToInt(Register.mm7) => @enumToInt(Register.mm0), + @enumToInt(Register.st0) ... @enumToInt(Register.st7) => @enumToInt(Register.st0), @enumToInt(Register.es) ... @enumToInt(Register.gs) => @enumToInt(Register.es), @@ -326,8 +340,8 @@ pub const Register = enum(u7) { return @intToEnum(Register, @enumToInt(reg) - reg.gpBase() + @enumToInt(Register.al)); } - fn fpBase(reg: Register) u7 { - assert(reg.class() == .floating_point); + fn sseBase(reg: Register) u7 { + assert(reg.class() == .sse); return switch (@enumToInt(reg)) { @enumToInt(Register.ymm0)...@enumToInt(Register.ymm15) => @enumToInt(Register.ymm0), @enumToInt(Register.xmm0)...@enumToInt(Register.xmm15) => @enumToInt(Register.xmm0), @@ -336,49 +350,24 @@ pub const Register = enum(u7) { } pub fn to256(reg: Register) Register { - return @intToEnum(Register, @enumToInt(reg) - reg.fpBase() + @enumToInt(Register.ymm0)); + return @intToEnum(Register, @enumToInt(reg) - reg.sseBase() + @enumToInt(Register.ymm0)); } pub fn to128(reg: Register) Register { - return @intToEnum(Register, @enumToInt(reg) - reg.fpBase() + @enumToInt(Register.xmm0)); - } - - pub fn dwarfLocOp(reg: Register) u8 { - return switch (reg.class()) { - .general_purpose => switch (reg.to64()) { - .rax => DW.OP.reg0, - .rdx => DW.OP.reg1, - .rcx => DW.OP.reg2, - .rbx => DW.OP.reg3, - .rsi => DW.OP.reg4, - .rdi => DW.OP.reg5, - .rbp => DW.OP.reg6, - .rsp => DW.OP.reg7, - else => @intCast(u8, @enumToInt(reg) - reg.gpBase()) + DW.OP.reg0, - }, - .floating_point => @intCast(u8, @enumToInt(reg) - reg.fpBase()) + DW.OP.reg17, - else => unreachable, - }; + return @intToEnum(Register, @enumToInt(reg) - reg.sseBase() + @enumToInt(Register.xmm0)); } - /// DWARF encodings that push a value onto the DWARF stack that is either - /// the contents of a register or the result of adding the contents a given - /// register to a given signed offset. - pub fn dwarfLocOpDeref(reg: Register) u8 { + /// DWARF register encoding + pub fn dwarfNum(reg: Register) u6 { return switch (reg.class()) { - .general_purpose => switch (reg.to64()) { - .rax => DW.OP.breg0, - .rdx => DW.OP.breg1, - .rcx => DW.OP.breg2, - .rbx => DW.OP.breg3, - .rsi => DW.OP.breg4, - .rdi => DW.OP.breg5, - .rbp => DW.OP.breg6, - .rsp => DW.OP.breg7, - else => @intCast(u8, @enumToInt(reg) - reg.gpBase()) + DW.OP.breg0, - }, - .floating_point => @intCast(u8, @enumToInt(reg) - reg.fpBase()) + DW.OP.breg17, - else => unreachable, + .general_purpose => if (reg.isExtended()) + reg.enc() + else + @truncate(u3, @as(u24, 0o54673120) >> @as(u5, reg.enc()) * 3), + .sse => 17 + @as(u6, reg.enc()), + .x87 => 33 + @as(u6, reg.enc()), + .mmx => 41 + @as(u6, reg.enc()), + .segment => 50 + @as(u6, reg.enc()), }; } }; @@ -392,6 +381,8 @@ test "Register id - different classes" { try expect(Register.ymm0.id() == 0b10000); try expect(Register.ymm0.id() != Register.rax.id()); try expect(Register.xmm0.id() == Register.ymm0.id()); + try expect(Register.xmm0.id() != Register.mm0.id()); + try expect(Register.mm0.id() != Register.st0.id()); try expect(Register.es.id() == 0b100000); } @@ -407,7 +398,9 @@ test "Register enc - different classes" { test "Register classes" { try expect(Register.r11.class() == .general_purpose); - try expect(Register.ymm11.class() == .floating_point); + try expect(Register.ymm11.class() == .sse); + try expect(Register.mm3.class() == .mmx); + try expect(Register.st3.class() == .x87); try expect(Register.fs.class() == .segment); } diff --git a/src/arch/x86_64/encodings.zig b/src/arch/x86_64/encodings.zig index a7a50867c3..3383315bd6 100644 --- a/src/arch/x86_64/encodings.zig +++ b/src/arch/x86_64/encodings.zig @@ -272,14 +272,6 @@ pub const table = [_]Entry{ .{ .div, .m, &.{ .rm32 }, &.{ 0xf7 }, 6, .none, .none }, .{ .div, .m, &.{ .rm64 }, &.{ 0xf7 }, 6, .long, .none }, - .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 }, - .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 }, - .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 }, - - .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 }, - .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 }, - .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 }, - .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .none, .none }, .{ .idiv, .m, &.{ .rm8 }, &.{ 0xf6 }, 7, .rex, .none }, .{ .idiv, .m, &.{ .rm16 }, &.{ 0xf7 }, 7, .short, .none }, @@ -395,12 +387,12 @@ pub const table = [_]Entry{ .{ .mov, .mi, &.{ .rm32, .imm32 }, &.{ 0xc7 }, 0, .none, .none }, .{ .mov, .mi, &.{ .rm64, .imm32s }, &.{ 0xc7 }, 0, .long, .none }, - .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .none }, - .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .none }, - .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .none }, - .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .short, .none }, - .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .none }, - .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .none }, + .{ .movbe, .rm, &.{ .r16, .m16 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .short, .movbe }, + .{ .movbe, .rm, &.{ .r32, .m32 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .none, .movbe }, + .{ .movbe, .rm, &.{ .r64, .m64 }, &.{ 0x0f, 0x38, 0xf0 }, 0, .long, .movbe }, + .{ .movbe, .mr, &.{ .m16, .r16 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .short, .movbe }, + .{ .movbe, .mr, &.{ .m32, .r32 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .none, .movbe }, + .{ .movbe, .mr, &.{ .m64, .r64 }, &.{ 0x0f, 0x38, 0xf1 }, 0, .long, .movbe }, .{ .movs, .np, &.{ .m8, .m8 }, &.{ 0xa4 }, 0, .none, .none }, .{ .movs, .np, &.{ .m16, .m16 }, &.{ 0xa5 }, 0, .short, .none }, @@ -836,6 +828,15 @@ pub const table = [_]Entry{ .{ .xor, .rm, &.{ .r32, .rm32 }, &.{ 0x33 }, 0, .none, .none }, .{ .xor, .rm, &.{ .r64, .rm64 }, &.{ 0x33 }, 0, .long, .none }, + // X87 + .{ .fisttp, .m, &.{ .m16 }, &.{ 0xdf }, 1, .none, .x87 }, + .{ .fisttp, .m, &.{ .m32 }, &.{ 0xdb }, 1, .none, .x87 }, + .{ .fisttp, .m, &.{ .m64 }, &.{ 0xdd }, 1, .none, .x87 }, + + .{ .fld, .m, &.{ .m32 }, &.{ 0xd9 }, 0, .none, .x87 }, + .{ .fld, .m, &.{ .m64 }, &.{ 0xdd }, 0, .none, .x87 }, + .{ .fld, .m, &.{ .m80 }, &.{ 0xdb }, 5, .none, .x87 }, + // SSE .{ .addps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x58 }, 0, .none, .sse }, @@ -847,9 +848,21 @@ pub const table = [_]Entry{ .{ .cmpss, .rmi, &.{ .xmm, .xmm_m32, .imm8 }, &.{ 0xf3, 0x0f, 0xc2 }, 0, .none, .sse }, + .{ .cvtpi2ps, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x0f, 0x2a }, 0, .none, .sse }, + + .{ .cvtps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2d }, 0, .none, .sse }, + .{ .cvtsi2ss, .rm, &.{ .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .none, .sse }, .{ .cvtsi2ss, .rm, &.{ .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .long, .sse }, + .{ .cvtss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .none, .sse }, + .{ .cvtss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .long, .sse }, + + .{ .cvttps2pi, .rm, &.{ .mm, .xmm_m64 }, &.{ 0x0f, 0x2c }, 0, .none, .sse }, + + .{ .cvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .none, .sse }, + .{ .cvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .long, .sse }, + .{ .divps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5e }, 0, .none, .sse }, .{ .divss, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5e }, 0, .none, .sse }, @@ -906,6 +919,25 @@ pub const table = [_]Entry{ .{ .cmpsd, .rmi, &.{ .xmm, .xmm_m64, .imm8 }, &.{ 0xf2, 0x0f, 0xc2 }, 0, .none, .sse2 }, + .{ .cvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .none, .sse2 }, + + .{ .cvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .none, .sse2 }, + + .{ .cvtpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .none, .sse2 }, + + .{ .cvtpd2pi, .rm, &.{ .mm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x2d }, 0, .none, .sse2 }, + + .{ .cvtpd2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5a }, 0, .none, .sse2 }, + + .{ .cvtpi2pd, .rm, &.{ .xmm, .mm_m64 }, &.{ 0x66, 0x0f, 0x2a }, 0, .none, .sse2 }, + + .{ .cvtps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5b }, 0, .none, .sse2 }, + + .{ .cvtps2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x0f, 0x5a }, 0, .none, .sse2 }, + + .{ .cvtsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .none, .sse2 }, + .{ .cvtsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .long, .sse2 }, + .{ .cvtsd2ss, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .none, .sse2 }, .{ .cvtsi2sd, .rm, &.{ .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .none, .sse2 }, @@ -913,6 +945,15 @@ pub const table = [_]Entry{ .{ .cvtss2sd, .rm, &.{ .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .none, .sse2 }, + .{ .cvttpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .none, .sse2 }, + + .{ .cvttpd2pi, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x2c }, 0, .none, .sse2 }, + + .{ .cvttps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .none, .sse2 }, + + .{ .cvttsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .none, .sse2 }, + .{ .cvttsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .long, .sse2 }, + .{ .divpd, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .none, .sse2 }, .{ .divsd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5e }, 0, .none, .sse2 }, @@ -1034,15 +1075,51 @@ pub const table = [_]Entry{ .{ .vbroadcastsd, .rm, &.{ .ymm, .m64 }, &.{ 0x66, 0x0f, 0x38, 0x19 }, 0, .vex_256_w0, .avx }, .{ .vbroadcastf128, .rm, &.{ .ymm, .m128 }, &.{ 0x66, 0x0f, 0x38, 0x1a }, 0, .vex_256_w0, .avx }, + .{ .vcvtdq2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx }, + .{ .vcvtdq2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx }, + + .{ .vcvtdq2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x0f, 0x5b }, 0, .vex_128_wig, .avx }, + .{ .vcvtdq2ps, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x0f, 0x5b }, 0, .vex_256_wig, .avx }, + + .{ .vcvtpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx }, + .{ .vcvtpd2dq, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0xf2, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx }, + + .{ .vcvtpd2ps, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5a }, 0, .vex_128_wig, .avx }, + .{ .vcvtpd2ps, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5a }, 0, .vex_256_wig, .avx }, + + .{ .vcvtps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5b }, 0, .vex_128_wig, .avx }, + .{ .vcvtps2dq, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5b }, 0, .vex_256_wig, .avx }, + + .{ .vcvtps2pd, .rm, &.{ .xmm, .xmm_m64 }, &.{ 0x0f, 0x5a }, 0, .vex_128_wig, .avx }, + .{ .vcvtps2pd, .rm, &.{ .ymm, .xmm_m128 }, &.{ 0x0f, 0x5a }, 0, .vex_256_wig, .avx }, + + .{ .vcvtsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .vex_lig_w0, .sse2 }, + .{ .vcvtsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2d }, 0, .vex_lig_w1, .sse2 }, + .{ .vcvtsd2ss, .rvm, &.{ .xmm, .xmm, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, .{ .vcvtsi2sd, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, - .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, - .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf2, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm32 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .vex_lig_w0, .avx }, + .{ .vcvtsi2ss, .rvm, &.{ .xmm, .xmm, .rm64 }, &.{ 0xf3, 0x0f, 0x2a }, 0, .vex_lig_w1, .avx }, + + .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, + + .{ .vcvtss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .vex_lig_w0, .avx }, + .{ .vcvtss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2d }, 0, .vex_lig_w1, .avx }, + + .{ .vcvttpd2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .vex_128_wig, .avx }, + .{ .vcvttpd2dq, .rm, &.{ .xmm, .ymm_m256 }, &.{ 0x66, 0x0f, 0xe6 }, 0, .vex_256_wig, .avx }, + + .{ .vcvttps2dq, .rm, &.{ .xmm, .xmm_m128 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .vex_128_wig, .avx }, + .{ .vcvttps2dq, .rm, &.{ .ymm, .ymm_m256 }, &.{ 0xf3, 0x0f, 0x5b }, 0, .vex_256_wig, .avx }, + + .{ .vcvttsd2si, .rm, &.{ .r32, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .vex_lig_w0, .sse2 }, + .{ .vcvttsd2si, .rm, &.{ .r64, .xmm_m64 }, &.{ 0xf2, 0x0f, 0x2c }, 0, .vex_lig_w1, .sse2 }, - .{ .vcvtss2sd, .rvm, &.{ .xmm, .xmm, .xmm_m32 }, &.{ 0xf2, 0x0f, 0x5a }, 0, .vex_lig_wig, .avx }, + .{ .vcvttss2si, .rm, &.{ .r32, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w0, .avx }, + .{ .vcvttss2si, .rm, &.{ .r64, .xmm_m32 }, &.{ 0xf3, 0x0f, 0x2c }, 0, .vex_lig_w1, .avx }, .{ .vdivpd, .rvm, &.{ .xmm, .xmm, .xmm_m128 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_128_wig, .avx }, .{ .vdivpd, .rvm, &.{ .ymm, .ymm, .ymm_m256 }, &.{ 0x66, 0x0f, 0x5e }, 0, .vex_256_wig, .avx }, diff --git a/src/link/Dwarf.zig b/src/link/Dwarf.zig index c134f60316..1a064049fc 100644 --- a/src/link/Dwarf.zig +++ b/src/link/Dwarf.zig @@ -608,23 +608,44 @@ pub const DeclState = struct { switch (loc) { .register => |reg| { - try dbg_info.ensureUnusedCapacity(3); + try dbg_info.ensureUnusedCapacity(4); dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter)); - dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc - 1, // ULEB128 dwarf expression length - reg, - }); + // DW.AT.location, DW.FORM.exprloc + var expr_len = std.io.countingWriter(std.io.null_writer); + if (reg < 32) { + expr_len.writer().writeByte(DW.OP.reg0 + reg) catch unreachable; + } else { + expr_len.writer().writeByte(DW.OP.regx) catch unreachable; + leb128.writeULEB128(expr_len.writer(), reg) catch unreachable; + } + leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable; + if (reg < 32) { + dbg_info.appendAssumeCapacity(DW.OP.reg0 + reg); + } else { + dbg_info.appendAssumeCapacity(DW.OP.regx); + leb128.writeULEB128(dbg_info.writer(), reg) catch unreachable; + } }, .stack => |info| { - try dbg_info.ensureUnusedCapacity(8); + try dbg_info.ensureUnusedCapacity(9); dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter)); - const fixup = dbg_info.items.len; - dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc - 1, // we will backpatch it after we encode the displacement in LEB128 - info.fp_register, // frame pointer - }); + // DW.AT.location, DW.FORM.exprloc + var expr_len = std.io.countingWriter(std.io.null_writer); + if (info.fp_register < 32) { + expr_len.writer().writeByte(DW.OP.breg0 + info.fp_register) catch unreachable; + } else { + expr_len.writer().writeByte(DW.OP.bregx) catch unreachable; + leb128.writeULEB128(expr_len.writer(), info.fp_register) catch unreachable; + } + leb128.writeILEB128(expr_len.writer(), info.offset) catch unreachable; + leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable; + if (info.fp_register < 32) { + dbg_info.appendAssumeCapacity(DW.OP.breg0 + info.fp_register); + } else { + dbg_info.appendAssumeCapacity(DW.OP.bregx); + leb128.writeULEB128(dbg_info.writer(), info.fp_register) catch unreachable; + } leb128.writeILEB128(dbg_info.writer(), info.offset) catch unreachable; - dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2); }, .wasm_local => |value| { const leb_size = link.File.Wasm.getULEB128Size(value); @@ -670,22 +691,45 @@ pub const DeclState = struct { switch (loc) { .register => |reg| { - try dbg_info.ensureUnusedCapacity(2); - dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc - 1, // ULEB128 dwarf expression length - reg, - }); + try dbg_info.ensureUnusedCapacity(4); + dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter)); + // DW.AT.location, DW.FORM.exprloc + var expr_len = std.io.countingWriter(std.io.null_writer); + if (reg < 32) { + expr_len.writer().writeByte(DW.OP.reg0 + reg) catch unreachable; + } else { + expr_len.writer().writeByte(DW.OP.regx) catch unreachable; + leb128.writeULEB128(expr_len.writer(), reg) catch unreachable; + } + leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable; + if (reg < 32) { + dbg_info.appendAssumeCapacity(DW.OP.reg0 + reg); + } else { + dbg_info.appendAssumeCapacity(DW.OP.regx); + leb128.writeULEB128(dbg_info.writer(), reg) catch unreachable; + } }, .stack => |info| { - try dbg_info.ensureUnusedCapacity(7); - const fixup = dbg_info.items.len; - dbg_info.appendSliceAssumeCapacity(&[2]u8{ // DW.AT.location, DW.FORM.exprloc - 1, // we will backpatch it after we encode the displacement in LEB128 - info.fp_register, - }); + try dbg_info.ensureUnusedCapacity(9); + dbg_info.appendAssumeCapacity(@enumToInt(AbbrevKind.parameter)); + // DW.AT.location, DW.FORM.exprloc + var expr_len = std.io.countingWriter(std.io.null_writer); + if (info.fp_register < 32) { + expr_len.writer().writeByte(DW.OP.breg0 + info.fp_register) catch unreachable; + } else { + expr_len.writer().writeByte(DW.OP.bregx) catch unreachable; + leb128.writeULEB128(expr_len.writer(), info.fp_register) catch unreachable; + } + leb128.writeILEB128(expr_len.writer(), info.offset) catch unreachable; + leb128.writeULEB128(dbg_info.writer(), expr_len.bytes_written) catch unreachable; + if (info.fp_register < 32) { + dbg_info.appendAssumeCapacity(DW.OP.breg0 + info.fp_register); + } else { + dbg_info.appendAssumeCapacity(DW.OP.bregx); + leb128.writeULEB128(dbg_info.writer(), info.fp_register) catch unreachable; + } leb128.writeILEB128(dbg_info.writer(), info.offset) catch unreachable; - dbg_info.items[fixup] += @intCast(u8, dbg_info.items.len - fixup - 2); }, .wasm_local => |value| { |
