diff options
| author | Jakub Konka <kubkon@jakubkonka.com> | 2022-05-03 12:20:27 +0200 |
|---|---|---|
| committer | Jakub Konka <kubkon@jakubkonka.com> | 2022-05-05 21:43:36 +0200 |
| commit | 8715b01005c49ff99327a87264ffaa28fb3807a0 (patch) | |
| tree | 9b93f3921cdabbe1a2d4f1b7dc14234f6d8e39f3 /src/arch | |
| parent | aaacda4df97c03cfcea444c1d77c06f46575049d (diff) | |
| download | zig-8715b01005c49ff99327a87264ffaa28fb3807a0.tar.gz zig-8715b01005c49ff99327a87264ffaa28fb3807a0.zip | |
aarch64: implement mul_with_overflow for <= 32bit ints
Add emitters for `smull`, `umull` and `tst (immediate)` instructions.
Diffstat (limited to 'src/arch')
| -rw-r--r-- | src/arch/aarch64/CodeGen.zig | 72 | ||||
| -rw-r--r-- | src/arch/aarch64/Emit.zig | 6 | ||||
| -rw-r--r-- | src/arch/aarch64/Mir.zig | 6 | ||||
| -rw-r--r-- | src/arch/aarch64/bits.zig | 33 |
4 files changed, 115 insertions, 2 deletions
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index 9b66c91925..3122501dac 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -1296,6 +1296,11 @@ fn binOpRegister( const dest_reg = switch (mir_tag) { .cmp_shifted_register => undefined, // cmp has no destination register + .smull, .umull => blk: { + // TODO can we reuse anything for smull and umull? + const raw_reg = try self.register_manager.allocReg(null); + break :blk raw_reg.to64(); + }, else => if (maybe_inst) |inst| blk: { const bin_op = self.air.instructions.items(.data)[inst].bin_op; @@ -1335,6 +1340,8 @@ fn binOpRegister( .shift = .lsl, } }, .mul, + .smull, + .umull, .lsl_register, .asr_register, .lsr_register, @@ -1883,8 +1890,69 @@ fn airOverflow(self: *Self, inst: Air.Inst.Index) !void { } fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void { - _ = inst; - return self.fail("TODO implement airMulWithOverflow for {}", .{self.target.cpu.arch}); + const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; + const extra = self.air.extraData(Air.Bin, ty_pl.payload).data; + if (self.liveness.isUnused(inst)) return self.finishAir(inst, .dead, .{ extra.lhs, extra.rhs, .none }); + const result: MCValue = result: { + const lhs = try self.resolveInst(extra.lhs); + const rhs = try self.resolveInst(extra.rhs); + const lhs_ty = self.air.typeOf(extra.lhs); + const rhs_ty = self.air.typeOf(extra.rhs); + + const tuple_ty = self.air.typeOfIndex(inst); + const tuple_size = @intCast(u32, tuple_ty.abiSize(self.target.*)); + const tuple_align = tuple_ty.abiAlignment(self.target.*); + const overflow_bit_offset = @intCast(u32, tuple_ty.structFieldOffset(1, self.target.*)); + + switch (lhs_ty.zigTypeTag()) { + .Vector => return self.fail("TODO implement mul_with_overflow for vectors", .{}), + .Int => { + const int_info = lhs_ty.intInfo(self.target.*); + + if (int_info.bits <= 32) { + const stack_offset = try self.allocMem(inst, tuple_size, tuple_align); + + try self.spillCompareFlagsIfOccupied(); + self.compare_flags_inst = null; + + const base_tag: Mir.Inst.Tag = switch (int_info.signedness) { + .signed => .smull, + .unsigned => .umull, + }; + + const dest = try self.binOpRegister(base_tag, null, lhs, rhs, lhs_ty, rhs_ty); + const dest_reg = dest.register; + self.register_manager.freezeRegs(&.{dest_reg}); + defer self.register_manager.unfreezeRegs(&.{dest_reg}); + + const truncated_reg = try self.register_manager.allocReg(null); + self.register_manager.freezeRegs(&.{truncated_reg}); + defer self.register_manager.unfreezeRegs(&.{truncated_reg}); + + try self.truncRegister(dest_reg, truncated_reg, int_info.signedness, int_info.bits); + _ = try self.binOp( + .cmp_eq, + null, + dest, + .{ .register = truncated_reg }, + Type.usize, + Type.usize, + ); + + try self.genSetStack(lhs_ty, stack_offset, .{ .register = truncated_reg }); + try self.genSetStack(Type.initTag(.u1), stack_offset - overflow_bit_offset, .{ + .compare_flags_unsigned = .neq, + }); + + break :result MCValue{ .stack_offset = stack_offset }; + } else if (int_info.bits <= 64) { + return self.fail("TODO implement mul_with_overflow for ints", .{}); + } else return self.fail("TODO implmenet mul_with_overflow for integers > u64/i64", .{}); + }, + else => unreachable, + } + }; + return self.finishAir(inst, result, .{ extra.lhs, extra.rhs, .none }); } fn airShlWithOverflow(self: *Self, inst: Air.Inst.Index) !void { diff --git a/src/arch/aarch64/Emit.zig b/src/arch/aarch64/Emit.zig index 85389f445e..5c4e221586 100644 --- a/src/arch/aarch64/Emit.zig +++ b/src/arch/aarch64/Emit.zig @@ -106,6 +106,7 @@ pub fn emitMir( .call_extern => try emit.mirCallExtern(inst), .eor_immediate => try emit.mirLogicalImmediate(inst), + .tst_immediate => try emit.mirLogicalImmediate(inst), .add_shifted_register => try emit.mirAddSubtractShiftedRegister(inst), .adds_shifted_register => try emit.mirAddSubtractShiftedRegister(inst), @@ -166,6 +167,8 @@ pub fn emitMir( .movz => try emit.mirMoveWideImmediate(inst), .mul => try emit.mirDataProcessing3Source(inst), + .smull => try emit.mirDataProcessing3Source(inst), + .umull => try emit.mirDataProcessing3Source(inst), .nop => try emit.mirNop(), @@ -674,6 +677,7 @@ fn mirLogicalImmediate(emit: *Emit, inst: Mir.Inst.Index) !void { switch (tag) { .eor_immediate => try emit.writeInstruction(Instruction.eorImmediate(rd, rn, imms, immr, n)), + .tst_immediate => try emit.writeInstruction(Instruction.tstImmediate(rn, imms, immr, n)), else => unreachable, } } @@ -1000,6 +1004,8 @@ fn mirDataProcessing3Source(emit: *Emit, inst: Mir.Inst.Index) !void { switch (tag) { .mul => try emit.writeInstruction(Instruction.mul(rrr.rd, rrr.rn, rrr.rm)), + .smull => try emit.writeInstruction(Instruction.smull(rrr.rd, rrr.rn, rrr.rm)), + .umull => try emit.writeInstruction(Instruction.umull(rrr.rd, rrr.rn, rrr.rm)), else => unreachable, } } diff --git a/src/arch/aarch64/Mir.zig b/src/arch/aarch64/Mir.zig index 8c5b635649..49ec895290 100644 --- a/src/arch/aarch64/Mir.zig +++ b/src/arch/aarch64/Mir.zig @@ -146,6 +146,8 @@ pub const Inst = struct { ret, /// Signed bitfield extract sbfx, + /// Signed multiply long + smull, /// Signed extend byte sxtb, /// Signed extend halfword @@ -182,8 +184,12 @@ pub const Inst = struct { subs_shifted_register, /// Supervisor Call svc, + /// Test bits (immediate) + tst_immediate, /// Unsigned bitfield extract ubfx, + /// Unsigned multiply long + umull, /// Unsigned extend byte uxtb, /// Unsigned extend halfword diff --git a/src/arch/aarch64/bits.zig b/src/arch/aarch64/bits.zig index 0775ca1f7b..b72891af30 100644 --- a/src/arch/aarch64/bits.zig +++ b/src/arch/aarch64/bits.zig @@ -1409,6 +1409,10 @@ pub const Instruction = union(enum) { return logicalImmediate(0b11, rd, rn, imms, immr, n); } + pub fn tstImmediate(rn: Register, imms: u6, immr: u6, n: u1) Instruction { + return andsImmediate(.xzr, rn, imms, immr, n); + } + // Bitfield pub fn sbfm(rd: Register, rn: Register, immr: u6, imms: u6) Instruction { @@ -1564,6 +1568,15 @@ pub const Instruction = union(enum) { return dataProcessing3Source(0b00, 0b000, 0b0, rd, rn, rm, ra); } + pub fn smaddl(rd: Register, rn: Register, rm: Register, ra: Register) Instruction { + return dataProcessing3Source(0b00, 0b001, 0b0, rd, rn, rm, ra); + } + + pub fn umaddl(rd: Register, rn: Register, rm: Register, ra: Register) Instruction { + assert(rd.size() == 64); + return dataProcessing3Source(0b00, 0b101, 0b0, rd, rn, rm, ra); + } + pub fn msub(rd: Register, rn: Register, rm: Register, ra: Register) Instruction { return dataProcessing3Source(0b00, 0b000, 0b1, rd, rn, rm, ra); } @@ -1572,6 +1585,14 @@ pub const Instruction = union(enum) { return madd(rd, rn, rm, .xzr); } + pub fn smull(rd: Register, rn: Register, rm: Register) Instruction { + return smaddl(rd, rn, rm, .xzr); + } + + pub fn umull(rd: Register, rn: Register, rm: Register) Instruction { + return umaddl(rd, rn, rm, .xzr); + } + pub fn mneg(rd: Register, rn: Register, rm: Register) Instruction { return msub(rd, rn, rm, .xzr); } @@ -1790,6 +1811,18 @@ test "serialize instructions" { .inst = Instruction.lsrImmediate(.x4, .x2, 63), .expected = 0b1_10_100110_1_111111_111111_00010_00100, }, + .{ // umull x0, w0, w1 + .inst = Instruction.umull(.x0, .w0, .w1), + .expected = 0b1_00_11011_1_01_00001_0_11111_00000_00000, + }, + .{ // smull x0, w0, w1 + .inst = Instruction.smull(.x0, .w0, .w1), + .expected = 0b1_00_11011_0_01_00001_0_11111_00000_00000, + }, + .{ // tst x0, #0xffffffff00000000 + .inst = Instruction.tstImmediate(.x0, 0b011111, 0b100000, 0b1), + .expected = 0b1_11_100100_1_100000_011111_00000_11111, + }, }; for (testcases) |case| { |
