From c4b83ea02102611a85f75b189f0803d9b6a335c2 Mon Sep 17 00:00:00 2001 From: gracefu <81774659+gracefuu@users.noreply.github.com> Date: Fri, 9 Apr 2021 13:51:00 +0800 Subject: stage2 x86_64: implement integer mul This was also an experiment to see if it were easier to implement a new feature when using the instruction encoder. Verdict: It's not that much easier, but I think it's certainly much more readable, because the description of the Instruction annotates what each field means. Right now, precise knowledge of x86_64 instructions is still required because things like when to set the 64-bit flag, how to read x86_64 instruction references, etc. are still not automatically done for you. In the future, this interface might make it sligtly easier to write an assembler for x86_64, by abstracting the bit-fiddling aspects of instruction encoding. --- src/Module.zig | 60 +++++++++++++++++++++++ src/Sema.zig | 7 +++ src/codegen.zig | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 216 insertions(+) (limited to 'src') diff --git a/src/Module.zig b/src/Module.zig index 96b490e2a1..90e1a71bd2 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -4330,6 +4330,33 @@ pub fn intSub(allocator: *Allocator, lhs: Value, rhs: Value) !Value { } } +pub fn intMul(allocator: *Allocator, lhs: Value, rhs: Value) !Value { + // TODO is this a performance issue? maybe we should try the operation without + // resorting to BigInt first. + var lhs_space: Value.BigIntSpace = undefined; + var rhs_space: Value.BigIntSpace = undefined; + const lhs_bigint = lhs.toBigInt(&lhs_space); + const rhs_bigint = rhs.toBigInt(&rhs_space); + const limbs = try allocator.alloc( + std.math.big.Limb, + lhs_bigint.limbs.len + rhs_bigint.limbs.len + 1, + ); + var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined }; + var limbs_buffer = try allocator.alloc( + std.math.big.Limb, + std.math.big.int.calcMulLimbsBufferLen(lhs_bigint.limbs.len, rhs_bigint.limbs.len, 1), + ); + defer allocator.free(limbs_buffer); + result_bigint.mul(lhs_bigint, rhs_bigint, limbs_buffer, allocator); + const result_limbs = result_bigint.limbs[0..result_bigint.len]; + + if (result_bigint.positive) { + return Value.Tag.int_big_positive.create(allocator, result_limbs); + } else { + return Value.Tag.int_big_negative.create(allocator, result_limbs); + } +} + pub fn floatAdd( arena: *Allocator, float_type: Type, @@ -4396,6 +4423,39 @@ pub fn floatSub( } } +pub fn floatMul( + arena: *Allocator, + float_type: Type, + src: LazySrcLoc, + lhs: Value, + rhs: Value, +) !Value { + switch (float_type.tag()) { + .f16 => { + @panic("TODO add __trunctfhf2 to compiler-rt"); + //const lhs_val = lhs.toFloat(f16); + //const rhs_val = rhs.toFloat(f16); + //return Value.Tag.float_16.create(arena, lhs_val * rhs_val); + }, + .f32 => { + const lhs_val = lhs.toFloat(f32); + const rhs_val = rhs.toFloat(f32); + return Value.Tag.float_32.create(arena, lhs_val * rhs_val); + }, + .f64 => { + const lhs_val = lhs.toFloat(f64); + const rhs_val = rhs.toFloat(f64); + return Value.Tag.float_64.create(arena, lhs_val * rhs_val); + }, + .f128, .comptime_float, .c_longdouble => { + const lhs_val = lhs.toFloat(f128); + const rhs_val = rhs.toFloat(f128); + return Value.Tag.float_128.create(arena, lhs_val * rhs_val); + }, + else => unreachable, + } +} + pub fn simplePtrType( mod: *Module, arena: *Allocator, diff --git a/src/Sema.zig b/src/Sema.zig index 98bff5bf23..74af84b078 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -3885,6 +3885,13 @@ fn analyzeArithmetic( try Module.floatSub(sema.arena, scalar_type, src, lhs_val, rhs_val); break :blk val; }, + .mul => blk: { + const val = if (is_int) + try Module.intMul(sema.arena, lhs_val, rhs_val) + else + try Module.floatMul(sema.arena, scalar_type, src, lhs_val, rhs_val); + break :blk val; + }, else => return sema.mod.fail(&block.base, src, "TODO Implement arithmetic operand '{s}'", .{@tagName(zir_tag)}), }; diff --git a/src/codegen.zig b/src/codegen.zig index 6739acbfa6..2f49e10522 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -1079,6 +1079,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { if (inst.base.isUnused()) return MCValue.dead; switch (arch) { + .x86_64 => return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs), .arm, .armeb => return try self.genArmMul(&inst.base, inst.lhs, inst.rhs), else => return self.fail(inst.base.src, "TODO implement mul for {}", .{self.target.cpu.arch}), } @@ -1574,6 +1575,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { .sub, .subwrap => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 5, 0x28), .xor, .not => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 6, 0x30), + .mul, .mulwrap => try self.genX8664Imul(inst.src, inst.ty, dst_mcv, src_mcv), else => unreachable, } @@ -1795,6 +1797,153 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type { } } + /// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv. + fn genX8664Imul( + self: *Self, + src: LazySrcLoc, + dst_ty: Type, + dst_mcv: MCValue, + src_mcv: MCValue, + ) !void { + switch (dst_mcv) { + .none => unreachable, + .undef => unreachable, + .dead, .unreach, .immediate => unreachable, + .compare_flags_unsigned => unreachable, + .compare_flags_signed => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .register => |dst_reg| { + switch (src_mcv) { + .none => unreachable, + .undef => try self.genSetReg(src, dst_ty, dst_reg, .undef), + .dead, .unreach => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .register => |src_reg| { + // register, register + // + // Use the following imul opcode + // 0F AF /r: IMUL r32/64, r/m32/64 + try self.encodeX8664Instruction(src, Instruction{ + .operand_size_64 = dst_ty.abiSize(self.target.*) == 64, + .primary_opcode_2b = 0xaf, + // TODO: Explicit optional wrap due to stage 1 miscompilation :( + // https://github.com/ziglang/zig/issues/6515 + .modrm = @as( + ?Instruction.ModrmEffectiveAddress, + Instruction.ModrmEffectiveAddress{ .reg = src_reg }, + ), + .reg = dst_reg, + }); + }, + .immediate => |imm| { + // register, immediate: + // depends on size of immediate. + // + // immediate fits in i8: + // 6B /r ib: IMUL r32/64, r/m32/64, imm8 + // + // immediate fits in i32: + // 69 /r id: IMUL r32/64, r/m32/64, imm32 + // + // immediate is huge: + // split into 2 instructions + // 1) copy the 64 bit immediate into a tmp register + // 2) perform register,register mul + // 0F AF /r: IMUL r32/64, r/m32/64 + if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) { + try self.encodeX8664Instruction(src, Instruction{ + .operand_size_64 = dst_ty.abiSize(self.target.*) == 64, + .primary_opcode_1b = 0x6B, + .reg = dst_reg, + // TODO: Explicit optional wrap due to stage 1 miscompilation :( + // https://github.com/ziglang/zig/issues/6515 + .modrm = @as( + ?Instruction.ModrmEffectiveAddress, + Instruction.ModrmEffectiveAddress{ .reg = dst_reg }, + ), + .immediate_bytes = 1, + .immediate = imm, + }); + } else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) { + try self.encodeX8664Instruction(src, Instruction{ + .operand_size_64 = dst_ty.abiSize(self.target.*) == 64, + .primary_opcode_1b = 0x69, + .reg = dst_reg, + // TODO: Explicit optional wrap due to stage 1 miscompilation :( + // https://github.com/ziglang/zig/issues/6515 + .modrm = @as( + ?Instruction.ModrmEffectiveAddress, + Instruction.ModrmEffectiveAddress{ .reg = dst_reg }, + ), + .immediate_bytes = 4, + .immediate = imm, + }); + } else { + const src_reg = try self.copyToTmpRegister(src, dst_ty, src_mcv); + return self.genX8664Imul(src, dst_ty, dst_mcv, MCValue{ .register = src_reg }); + } + }, + .embedded_in_code, .memory, .stack_offset => { + return self.fail(src, "TODO implement x86 multiply source memory", .{}); + }, + .compare_flags_unsigned => { + return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{}); + }, + .compare_flags_signed => { + return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{}); + }, + } + }, + .stack_offset => |off| { + switch (src_mcv) { + .none => unreachable, + .undef => return self.genSetStack(src, dst_ty, off, .undef), + .dead, .unreach => unreachable, + .ptr_stack_offset => unreachable, + .ptr_embedded_in_code => unreachable, + .register => |src_reg| { + // copy dst to a register + const dst_reg = try self.copyToTmpRegister(src, dst_ty, dst_mcv); + // multiply into dst_reg + // register, register + // Use the following imul opcode + // 0F AF /r: IMUL r32/64, r/m32/64 + try self.encodeX8664Instruction(src, Instruction{ + .operand_size_64 = dst_ty.abiSize(self.target.*) == 64, + .primary_opcode_2b = 0xaf, + // TODO: Explicit optional wrap due to stage 1 miscompilation :( + // https://github.com/ziglang/zig/issues/6515 + .modrm = @as( + ?Instruction.ModrmEffectiveAddress, + Instruction.ModrmEffectiveAddress{ .reg = src_reg }, + ), + .reg = dst_reg, + }); + // copy dst_reg back out + return self.genSetStack(src, dst_ty, off, MCValue{ .register = dst_reg }); + }, + .immediate => |imm| { + return self.fail(src, "TODO implement x86 multiply source immediate", .{}); + }, + .embedded_in_code, .memory, .stack_offset => { + return self.fail(src, "TODO implement x86 multiply source memory", .{}); + }, + .compare_flags_unsigned => { + return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{}); + }, + .compare_flags_signed => { + return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{}); + }, + } + }, + .embedded_in_code, .memory => { + return self.fail(src, "TODO implement x86 multiply destination memory", .{}); + }, + } + } + fn genX8664ModRMRegToStack(self: *Self, src: LazySrcLoc, ty: Type, off: u32, reg: Register, opcode: u8) !void { const abi_size = ty.abiSize(self.target.*); const adj_off = off + abi_size; -- cgit v1.2.3