aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorgracefu <81774659+gracefuu@users.noreply.github.com>2021-04-09 13:51:00 +0800
committergracefu <81774659+gracefuu@users.noreply.github.com>2021-04-16 15:21:17 +0800
commitc4b83ea02102611a85f75b189f0803d9b6a335c2 (patch)
tree5460625c3766085c5f1210f577e3144296c6a986 /src
parent5bd464e386df35bfe38b062190074ce3c2689001 (diff)
downloadzig-c4b83ea02102611a85f75b189f0803d9b6a335c2.tar.gz
zig-c4b83ea02102611a85f75b189f0803d9b6a335c2.zip
stage2 x86_64: implement integer mul
This was also an experiment to see if it were easier to implement a new feature when using the instruction encoder. Verdict: It's not that much easier, but I think it's certainly much more readable, because the description of the Instruction annotates what each field means. Right now, precise knowledge of x86_64 instructions is still required because things like when to set the 64-bit flag, how to read x86_64 instruction references, etc. are still not automatically done for you. In the future, this interface might make it sligtly easier to write an assembler for x86_64, by abstracting the bit-fiddling aspects of instruction encoding.
Diffstat (limited to 'src')
-rw-r--r--src/Module.zig60
-rw-r--r--src/Sema.zig7
-rw-r--r--src/codegen.zig149
3 files changed, 216 insertions, 0 deletions
diff --git a/src/Module.zig b/src/Module.zig
index 96b490e2a1..90e1a71bd2 100644
--- a/src/Module.zig
+++ b/src/Module.zig
@@ -4330,6 +4330,33 @@ pub fn intSub(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
}
}
+pub fn intMul(allocator: *Allocator, lhs: Value, rhs: Value) !Value {
+ // TODO is this a performance issue? maybe we should try the operation without
+ // resorting to BigInt first.
+ var lhs_space: Value.BigIntSpace = undefined;
+ var rhs_space: Value.BigIntSpace = undefined;
+ const lhs_bigint = lhs.toBigInt(&lhs_space);
+ const rhs_bigint = rhs.toBigInt(&rhs_space);
+ const limbs = try allocator.alloc(
+ std.math.big.Limb,
+ lhs_bigint.limbs.len + rhs_bigint.limbs.len + 1,
+ );
+ var result_bigint = BigIntMutable{ .limbs = limbs, .positive = undefined, .len = undefined };
+ var limbs_buffer = try allocator.alloc(
+ std.math.big.Limb,
+ std.math.big.int.calcMulLimbsBufferLen(lhs_bigint.limbs.len, rhs_bigint.limbs.len, 1),
+ );
+ defer allocator.free(limbs_buffer);
+ result_bigint.mul(lhs_bigint, rhs_bigint, limbs_buffer, allocator);
+ const result_limbs = result_bigint.limbs[0..result_bigint.len];
+
+ if (result_bigint.positive) {
+ return Value.Tag.int_big_positive.create(allocator, result_limbs);
+ } else {
+ return Value.Tag.int_big_negative.create(allocator, result_limbs);
+ }
+}
+
pub fn floatAdd(
arena: *Allocator,
float_type: Type,
@@ -4396,6 +4423,39 @@ pub fn floatSub(
}
}
+pub fn floatMul(
+ arena: *Allocator,
+ float_type: Type,
+ src: LazySrcLoc,
+ lhs: Value,
+ rhs: Value,
+) !Value {
+ switch (float_type.tag()) {
+ .f16 => {
+ @panic("TODO add __trunctfhf2 to compiler-rt");
+ //const lhs_val = lhs.toFloat(f16);
+ //const rhs_val = rhs.toFloat(f16);
+ //return Value.Tag.float_16.create(arena, lhs_val * rhs_val);
+ },
+ .f32 => {
+ const lhs_val = lhs.toFloat(f32);
+ const rhs_val = rhs.toFloat(f32);
+ return Value.Tag.float_32.create(arena, lhs_val * rhs_val);
+ },
+ .f64 => {
+ const lhs_val = lhs.toFloat(f64);
+ const rhs_val = rhs.toFloat(f64);
+ return Value.Tag.float_64.create(arena, lhs_val * rhs_val);
+ },
+ .f128, .comptime_float, .c_longdouble => {
+ const lhs_val = lhs.toFloat(f128);
+ const rhs_val = rhs.toFloat(f128);
+ return Value.Tag.float_128.create(arena, lhs_val * rhs_val);
+ },
+ else => unreachable,
+ }
+}
+
pub fn simplePtrType(
mod: *Module,
arena: *Allocator,
diff --git a/src/Sema.zig b/src/Sema.zig
index 98bff5bf23..74af84b078 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -3885,6 +3885,13 @@ fn analyzeArithmetic(
try Module.floatSub(sema.arena, scalar_type, src, lhs_val, rhs_val);
break :blk val;
},
+ .mul => blk: {
+ const val = if (is_int)
+ try Module.intMul(sema.arena, lhs_val, rhs_val)
+ else
+ try Module.floatMul(sema.arena, scalar_type, src, lhs_val, rhs_val);
+ break :blk val;
+ },
else => return sema.mod.fail(&block.base, src, "TODO Implement arithmetic operand '{s}'", .{@tagName(zir_tag)}),
};
diff --git a/src/codegen.zig b/src/codegen.zig
index 6739acbfa6..2f49e10522 100644
--- a/src/codegen.zig
+++ b/src/codegen.zig
@@ -1079,6 +1079,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
if (inst.base.isUnused())
return MCValue.dead;
switch (arch) {
+ .x86_64 => return try self.genX8664BinMath(&inst.base, inst.lhs, inst.rhs),
.arm, .armeb => return try self.genArmMul(&inst.base, inst.lhs, inst.rhs),
else => return self.fail(inst.base.src, "TODO implement mul for {}", .{self.target.cpu.arch}),
}
@@ -1574,6 +1575,7 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
.sub, .subwrap => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 5, 0x28),
.xor, .not => try self.genX8664BinMathCode(inst.src, inst.ty, dst_mcv, src_mcv, 6, 0x30),
+ .mul, .mulwrap => try self.genX8664Imul(inst.src, inst.ty, dst_mcv, src_mcv),
else => unreachable,
}
@@ -1795,6 +1797,153 @@ fn Function(comptime arch: std.Target.Cpu.Arch) type {
}
}
+ /// Performs integer multiplication between dst_mcv and src_mcv, storing the result in dst_mcv.
+ fn genX8664Imul(
+ self: *Self,
+ src: LazySrcLoc,
+ dst_ty: Type,
+ dst_mcv: MCValue,
+ src_mcv: MCValue,
+ ) !void {
+ switch (dst_mcv) {
+ .none => unreachable,
+ .undef => unreachable,
+ .dead, .unreach, .immediate => unreachable,
+ .compare_flags_unsigned => unreachable,
+ .compare_flags_signed => unreachable,
+ .ptr_stack_offset => unreachable,
+ .ptr_embedded_in_code => unreachable,
+ .register => |dst_reg| {
+ switch (src_mcv) {
+ .none => unreachable,
+ .undef => try self.genSetReg(src, dst_ty, dst_reg, .undef),
+ .dead, .unreach => unreachable,
+ .ptr_stack_offset => unreachable,
+ .ptr_embedded_in_code => unreachable,
+ .register => |src_reg| {
+ // register, register
+ //
+ // Use the following imul opcode
+ // 0F AF /r: IMUL r32/64, r/m32/64
+ try self.encodeX8664Instruction(src, Instruction{
+ .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+ .primary_opcode_2b = 0xaf,
+ // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+ // https://github.com/ziglang/zig/issues/6515
+ .modrm = @as(
+ ?Instruction.ModrmEffectiveAddress,
+ Instruction.ModrmEffectiveAddress{ .reg = src_reg },
+ ),
+ .reg = dst_reg,
+ });
+ },
+ .immediate => |imm| {
+ // register, immediate:
+ // depends on size of immediate.
+ //
+ // immediate fits in i8:
+ // 6B /r ib: IMUL r32/64, r/m32/64, imm8
+ //
+ // immediate fits in i32:
+ // 69 /r id: IMUL r32/64, r/m32/64, imm32
+ //
+ // immediate is huge:
+ // split into 2 instructions
+ // 1) copy the 64 bit immediate into a tmp register
+ // 2) perform register,register mul
+ // 0F AF /r: IMUL r32/64, r/m32/64
+ if (math.minInt(i8) <= imm and imm <= math.maxInt(i8)) {
+ try self.encodeX8664Instruction(src, Instruction{
+ .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+ .primary_opcode_1b = 0x6B,
+ .reg = dst_reg,
+ // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+ // https://github.com/ziglang/zig/issues/6515
+ .modrm = @as(
+ ?Instruction.ModrmEffectiveAddress,
+ Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
+ ),
+ .immediate_bytes = 1,
+ .immediate = imm,
+ });
+ } else if (math.minInt(i32) <= imm and imm <= math.maxInt(i32)) {
+ try self.encodeX8664Instruction(src, Instruction{
+ .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+ .primary_opcode_1b = 0x69,
+ .reg = dst_reg,
+ // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+ // https://github.com/ziglang/zig/issues/6515
+ .modrm = @as(
+ ?Instruction.ModrmEffectiveAddress,
+ Instruction.ModrmEffectiveAddress{ .reg = dst_reg },
+ ),
+ .immediate_bytes = 4,
+ .immediate = imm,
+ });
+ } else {
+ const src_reg = try self.copyToTmpRegister(src, dst_ty, src_mcv);
+ return self.genX8664Imul(src, dst_ty, dst_mcv, MCValue{ .register = src_reg });
+ }
+ },
+ .embedded_in_code, .memory, .stack_offset => {
+ return self.fail(src, "TODO implement x86 multiply source memory", .{});
+ },
+ .compare_flags_unsigned => {
+ return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
+ },
+ .compare_flags_signed => {
+ return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
+ },
+ }
+ },
+ .stack_offset => |off| {
+ switch (src_mcv) {
+ .none => unreachable,
+ .undef => return self.genSetStack(src, dst_ty, off, .undef),
+ .dead, .unreach => unreachable,
+ .ptr_stack_offset => unreachable,
+ .ptr_embedded_in_code => unreachable,
+ .register => |src_reg| {
+ // copy dst to a register
+ const dst_reg = try self.copyToTmpRegister(src, dst_ty, dst_mcv);
+ // multiply into dst_reg
+ // register, register
+ // Use the following imul opcode
+ // 0F AF /r: IMUL r32/64, r/m32/64
+ try self.encodeX8664Instruction(src, Instruction{
+ .operand_size_64 = dst_ty.abiSize(self.target.*) == 64,
+ .primary_opcode_2b = 0xaf,
+ // TODO: Explicit optional wrap due to stage 1 miscompilation :(
+ // https://github.com/ziglang/zig/issues/6515
+ .modrm = @as(
+ ?Instruction.ModrmEffectiveAddress,
+ Instruction.ModrmEffectiveAddress{ .reg = src_reg },
+ ),
+ .reg = dst_reg,
+ });
+ // copy dst_reg back out
+ return self.genSetStack(src, dst_ty, off, MCValue{ .register = dst_reg });
+ },
+ .immediate => |imm| {
+ return self.fail(src, "TODO implement x86 multiply source immediate", .{});
+ },
+ .embedded_in_code, .memory, .stack_offset => {
+ return self.fail(src, "TODO implement x86 multiply source memory", .{});
+ },
+ .compare_flags_unsigned => {
+ return self.fail(src, "TODO implement x86 multiply source compare flag (unsigned)", .{});
+ },
+ .compare_flags_signed => {
+ return self.fail(src, "TODO implement x86 multiply source compare flag (signed)", .{});
+ },
+ }
+ },
+ .embedded_in_code, .memory => {
+ return self.fail(src, "TODO implement x86 multiply destination memory", .{});
+ },
+ }
+ }
+
fn genX8664ModRMRegToStack(self: *Self, src: LazySrcLoc, ty: Type, off: u32, reg: Register, opcode: u8) !void {
const abi_size = ty.abiSize(self.target.*);
const adj_off = off + abi_size;