From c4b83ea02102611a85f75b189f0803d9b6a335c2 Mon Sep 17 00:00:00 2001
From: gracefu <81774659+gracefuu@users.noreply.github.com>
Date: Fri, 9 Apr 2021 13:51:00 +0800
Subject: stage2 x86_64: implement integer mul

This was also an experiment to see if it were easier to implement a new
feature when using the instruction encoder.

Verdict: It's not that much easier, but I think it's certainly much more
readable, because the description of the Instruction annotates what each
field means. Right now, precise knowledge of x86_64 instructions is
still required because things like when to set the 64-bit flag, how to
read x86_64 instruction references, etc. are still not automatically
done for you.

In the future, this interface might make it sligtly easier to write an
assembler for x86_64, by abstracting the bit-fiddling aspects of
instruction encoding.
---
 src/Module.zig  |  60 +++++++++++++++++++++++
 src/Sema.zig    |   7 +++
 src/codegen.zig | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 216 insertions(+)

(limited to 'src')

diff --git a/src/Module.zig b/src/Module.zig
index 96b490e2a1..90e1a71bd2 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -4330,6 +4330,33 @@ pub fn intSub(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
     }
 }
 
+pub fn intMul(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
+    // TODO is this a performance issue? maybe we should try the operation without
+    // resorting to BigInt first.
+    var lhs_space: Value.BigIntSpace = undefined;
+    var rhs_space: Value.BigIntSpace = undefined;
+    const lhs_bigint = lhs.toBigInt(&lhs_space);
+    const rhs_bigint = rhs.toBigInt(&rhs_space);
+    const limbs = try allocator.alloc(
+        std.math.big.Limb,
+        lhs_bigint.limbs.len + rhs_bigint.limbs.len + 1,
+    );
+    var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+    var limbs_buffer = try allocator.alloc(
+        std.math.big.Limb,
+        std.math.big.int.calcMulLimbsBufferLen(lhs_bigint.limbs.len, rhs_bigint.limbs.len, 1),
+    );
+    defer allocator.free(limbs_buffer);
+    result_bigint.mul(lhs_bigint, rhs_bigint, limbs_buffer, allocator);
+    const result_limbs = result_bigint.limbs[0..result_bigint.len];
+
+    if (result_bigint.positive) {
+        return Value.Tag.int_big_positive.create(allocator, result_limbs);
+    } else {
+        return Value.Tag.int_big_negative.create(allocator, result_limbs);
+    }
+}
+
 pub fn floatAdd(
     arena: *Allocator,
     float_type: Type,
@@ -4396,6 +4423,39 @@ pub fn floatSub(
     }
 }
 
+pub fn floatMul(
+    arena: *Allocator,
+    float_type: Type,
+    src: LazySrcLoc,
+    lhs: Value,
+    rhs: Value,
+) !Value {
+    switch (float_type.tag()) {
+        .f16 => {
+            @panic("TODO add __trunctfhf2 to compiler-rt");
+            //const lhs_val = lhs.toFloat(f16);
+            //const rhs_val = rhs.toFloat(f16);
+            //return Value.Tag.float_16.create(arena, lhs_val * rhs_val);
+        },
+        .f32 => {
+            const lhs_val = lhs.toFloat(f32);
+            const rhs_val = rhs.toFloat(f32);
+            return Value.Tag.float_32.create(arena, lhs_val * rhs_val);
+        },
+        .f64 => {
+            const lhs_val = lhs.toFloat(f64);
+            const rhs_val = rhs.toFloat(f64);
+            return Value.Tag.float_64.create(arena, lhs_val * rhs_val);
+        },
+        .f128, .comptime_float, .c_longdouble => {
+            const lhs_val = lhs.toFloat(f128);
+            const rhs_val = rhs.toFloat(f128);
+            return Value.Tag.float_128.create(arena, lhs_val * rhs_val);
+        },
+        else => unreachable,
+    }
+}
+
 pub fn simplePtrType(
     mod: *Module,
     arena: *Allocator,
diff --git a/src/Sema.zig b/src/Sema.zig
index 98bff5bf23..74af84b078 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -3885,6 +3885,13 @@ fn analyzeArithmetic(
                         try Module.floatSub(sema.arena, scalar_type, src, lhs_val, rhs_val);
                     break :blk val;
                 },
+                .mul => blk: {
+                    const val = if (is_int)
+                        try Module.intMul(sema.arena, lhs_val, rhs_val)
+                    else
+                        try Module.floatMul(sema.arena, scalar_type, src, lhs_val, rhs_val);
+                    break :blk val;
+                },
                 else => return sema.mod.fail(&block.base, src, "TODO Implement arithmetic operand '{s}'", .{@tagName(zir_tag)}),
             };
 
diff --git a/src/codegen.zig b/src/codegen.zig
index 6739acbfa6..2f49e10522 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -1079,6 +1079,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             if (inst.base.isUnused())
                 return MCValue.dead;
             switch (arch) {
+                .x86_64 => return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs),
                 .arm, .armeb => return try self.genArmMul(&inst.base, inst.lhs, inst.rhs),
                 else => return self.fail(inst.base.src, "TODO implement mul for {}", .{self.target.cpu.arch}),
             }
@@ -1574,6 +1575,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
                 .sub, .subwrap => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 5, 0x28),
                 .xor, .not => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 6, 0x30),
 
+                .mul, .mulwrap => try self.genX8664Imul(inst.src, inst.ty, dst_mcv, src_mcv),
                 else => unreachable,
             }
 
@@ -1795,6 +1797,153 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
             }
         }
 
+        /// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
+        fn genX8664Imul(
+            self: *Self,
+            src: LazySrcLoc,
+            dst_ty: Type,
+            dst_mcv: MCValue,
+            src_mcv: MCValue,
+        ) !void {
+            switch (dst_mcv) {
+                .none => unreachable,
+                .undef => unreachable,
+                .dead, .unreach, .immediate => unreachable,
+                .compare_flags_unsigned => unreachable,
+                .compare_flags_signed => unreachable,
+                .ptr_stack_offset => unreachable,
+                .ptr_embedded_in_code => unreachable,
+                .register => |dst_reg| {
+                    switch (src_mcv) {
+                        .none => unreachable,
+                        .undef => try self.genSetReg(src, dst_ty, dst_reg, .undef),
+                        .dead, .unreach => unreachable,
+                        .ptr_stack_offset => unreachable,
+                        .ptr_embedded_in_code => unreachable,
+                        .register => |src_reg| {
+                            // register, register
+                            //
+                            // Use the following imul opcode
+                            // 0F AF /r: IMUL r32/64, r/m32/64
+                            try self.encodeX8664Instruction(src, Instruction{
+                                .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+                                .primary_opcode_2b = 0xaf,
+                                // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+                                //       https://github.com/ziglang/zig/issues/6515
+                                .modrm = @as(
+                                    ?Instruction.ModrmEffectiveAddress,
+                                    Instruction.ModrmEffectiveAddress{ .reg = src_reg },
+                                ),
+                                .reg = dst_reg,
+                            });
+                        },
+                        .immediate => |imm| {
+                            // register, immediate:
+                            // depends on size of immediate.
+                            //
+                            // immediate fits in i8:
+                            // 6B /r ib: IMUL r32/64, r/m32/64, imm8
+                            //
+                            // immediate fits in i32:
+                            // 69 /r id: IMUL r32/64, r/m32/64, imm32
+                            //
+                            // immediate is huge:
+                            // split into 2 instructions
+                            // 1) copy the 64 bit immediate into a tmp register
+                            // 2) perform register,register mul
+                            // 0F AF /r: IMUL r32/64, r/m32/64
+                            if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) {
+                                try self.encodeX8664Instruction(src, Instruction{
+                                    .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+                                    .primary_opcode_1b = 0x6B,
+                                    .reg = dst_reg,
+                                    // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+                                    //       https://github.com/ziglang/zig/issues/6515
+                                    .modrm = @as(
+                                        ?Instruction.ModrmEffectiveAddress,
+                                        Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
+                                    ),
+                                    .immediate_bytes = 1,
+                                    .immediate = imm,
+                                });
+                            } else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) {
+                                try self.encodeX8664Instruction(src, Instruction{
+                                    .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+                                    .primary_opcode_1b = 0x69,
+                                    .reg = dst_reg,
+                                    // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+                                    //       https://github.com/ziglang/zig/issues/6515
+                                    .modrm = @as(
+                                        ?Instruction.ModrmEffectiveAddress,
+                                        Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
+                                    ),
+                                    .immediate_bytes = 4,
+                                    .immediate = imm,
+                                });
+                            } else {
+                                const src_reg = try self.copyToTmpRegister(src, dst_ty, src_mcv);
+                                return self.genX8664Imul(src, dst_ty, dst_mcv, MCValue{ .register = src_reg });
+                            }
+                        },
+                        .embedded_in_code, .memory, .stack_offset => {
+                            return self.fail(src, "TODO implement x86 multiply source memory", .{});
+                        },
+                        .compare_flags_unsigned => {
+                            return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
+                        },
+                        .compare_flags_signed => {
+                            return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
+                        },
+                    }
+                },
+                .stack_offset => |off| {
+                    switch (src_mcv) {
+                        .none => unreachable,
+                        .undef => return self.genSetStack(src, dst_ty, off, .undef),
+                        .dead, .unreach => unreachable,
+                        .ptr_stack_offset => unreachable,
+                        .ptr_embedded_in_code => unreachable,
+                        .register => |src_reg| {
+                            // copy dst to a register
+                            const dst_reg = try self.copyToTmpRegister(src, dst_ty, dst_mcv);
+                            // multiply into dst_reg
+                            // register, register
+                            // Use the following imul opcode
+                            // 0F AF /r: IMUL r32/64, r/m32/64
+                            try self.encodeX8664Instruction(src, Instruction{
+                                .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+                                .primary_opcode_2b = 0xaf,
+                                // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+                                //       https://github.com/ziglang/zig/issues/6515
+                                .modrm = @as(
+                                    ?Instruction.ModrmEffectiveAddress,
+                                    Instruction.ModrmEffectiveAddress{ .reg = src_reg },
+                                ),
+                                .reg = dst_reg,
+                            });
+                            // copy dst_reg back out
+                            return self.genSetStack(src, dst_ty, off, MCValue{ .register = dst_reg });
+                        },
+                        .immediate => |imm| {
+                            return self.fail(src, "TODO implement x86 multiply source immediate", .{});
+                        },
+                        .embedded_in_code, .memory, .stack_offset => {
+                            return self.fail(src, "TODO implement x86 multiply source memory", .{});
+                        },
+                        .compare_flags_unsigned => {
+                            return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
+                        },
+                        .compare_flags_signed => {
+                            return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
+                        },
+                    }
+                },
+                .embedded_in_code, .memory => {
+                    return self.fail(src, "TODO implement x86 multiply destination memory", .{});
+                },
+            }
+        }
+
         fn genX8664ModRMRegToStack(self: *Self, src: LazySrcLoc, ty: Type, off: u32, reg: Register, opcode: u8) !void {
             const abi_size = ty.abiSize(self.target.*);
             const adj_off = off + abi_size;
-- 
cgit v1.2.3