diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2023-05-11 08:36:33 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-05-11 08:36:33 -0700 |
| commit | 5569e6b49d9b421d35e3175df36eb9fe7e4e8084 (patch) | |
| tree | 4622b826b87d84de1abc3598de27317d92f21041 /src/codegen | |
| parent | c857959372ec87e3988c2548876d818d92df5f9a (diff) | |
| parent | 2e6a6d7564901009aace5fef99e647959ad5bb90 (diff) | |
| download | zig-5569e6b49d9b421d35e3175df36eb9fe7e4e8084.tar.gz zig-5569e6b49d9b421d35e3175df36eb9fe7e4e8084.zip | |
Merge pull request #15639 from jacobly0/signed-mod
llvm/cbe: fix signed `@mod`/`@divFloor` computations
Diffstat (limited to 'src/codegen')
| -rw-r--r-- | src/codegen/c.zig | 13 | ||||
| -rw-r--r-- | src/codegen/llvm.zig | 59 |
2 files changed, 44 insertions, 28 deletions
diff --git a/src/codegen/c.zig b/src/codegen/c.zig index 7da99de5c1..86b74b1429 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -6508,23 +6508,16 @@ fn airSplat(f: *Function, inst: Air.Inst.Index) !CValue { const inst_ty = f.air.typeOfIndex(inst); const inst_scalar_ty = inst_ty.scalarType(); - const inst_scalar_cty = try f.typeToIndex(inst_scalar_ty, .complete); - const need_memcpy = f.indexToCType(inst_scalar_cty).tag() == .array; const writer = f.object.writer(); const local = try f.allocLocal(inst, inst_ty); const v = try Vectorize.start(f, inst, writer, inst_ty); - if (need_memcpy) try writer.writeAll("memcpy(&"); + const a = try Assignment.init(f, inst_scalar_ty); try f.writeCValue(writer, local, .Other); try v.elem(f, writer); - try writer.writeAll(if (need_memcpy) ", &" else " = "); + try a.assign(f, writer); try f.writeCValue(writer, operand, .Other); - if (need_memcpy) { - try writer.writeAll(", sizeof("); - try f.renderCType(writer, inst_scalar_cty); - try writer.writeAll("))"); - } - try writer.writeAll(";\n"); + try a.end(f, writer); try v.end(f, inst, writer); return local; diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 2189663338..991ac04573 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -7215,20 +7215,28 @@ pub const FuncGen = struct { return self.buildFloatOp(.floor, inst_ty, 1, .{result}); } if (scalar_ty.isSignedInt()) { - // const d = @divTrunc(a, b); - // const r = @rem(a, b); - // return if (r == 0) d else d - ((a < 0) ^ (b < 0)); - const result_llvm_ty = try self.dg.lowerType(inst_ty); - const zero = result_llvm_ty.constNull(); - const div_trunc = self.builder.buildSDiv(lhs, rhs, ""); + const target = self.dg.module.getTarget(); + const inst_llvm_ty = try self.dg.lowerType(inst_ty); + const scalar_bit_size_minus_one = scalar_ty.bitSize(target) - 1; + const bit_size_minus_one = if (inst_ty.zigTypeTag() == .Vector) const_vector: { + const vec_len = inst_ty.vectorLen(); + const scalar_llvm_ty = try self.dg.lowerType(scalar_ty); + + const shifts = try self.gpa.alloc(*llvm.Value, vec_len); + defer self.gpa.free(shifts); + + @memset(shifts, scalar_llvm_ty.constInt(scalar_bit_size_minus_one, .False)); + break :const_vector llvm.constVector(shifts.ptr, vec_len); + } else inst_llvm_ty.constInt(scalar_bit_size_minus_one, .False); + + const div = self.builder.buildSDiv(lhs, rhs, ""); const rem = self.builder.buildSRem(lhs, rhs, ""); - const rem_eq_0 = self.builder.buildICmp(.EQ, rem, zero, ""); - const a_lt_0 = self.builder.buildICmp(.SLT, lhs, zero, ""); - const b_lt_0 = self.builder.buildICmp(.SLT, rhs, zero, ""); - const a_b_xor = self.builder.buildXor(a_lt_0, b_lt_0, ""); - const a_b_xor_ext = self.builder.buildZExt(a_b_xor, div_trunc.typeOf(), ""); - const d_sub_xor = self.builder.buildSub(div_trunc, a_b_xor_ext, ""); - return self.builder.buildSelect(rem_eq_0, div_trunc, d_sub_xor, ""); + const div_sign = self.builder.buildXor(lhs, rhs, ""); + const div_sign_mask = self.builder.buildAShr(div_sign, bit_size_minus_one, ""); + const zero = inst_llvm_ty.constNull(); + const rem_nonzero = self.builder.buildICmp(.NE, rem, zero, ""); + const correction = self.builder.buildSelect(rem_nonzero, div_sign_mask, zero, ""); + return self.builder.buildNSWAdd(div, correction, ""); } return self.builder.buildUDiv(lhs, rhs, ""); } @@ -7280,12 +7288,27 @@ pub const FuncGen = struct { return self.builder.buildSelect(ltz, c, a, ""); } if (scalar_ty.isSignedInt()) { - const a = self.builder.buildSRem(lhs, rhs, ""); - const b = self.builder.buildNSWAdd(a, rhs, ""); - const c = self.builder.buildSRem(b, rhs, ""); + const target = self.dg.module.getTarget(); + const scalar_bit_size_minus_one = scalar_ty.bitSize(target) - 1; + const bit_size_minus_one = if (inst_ty.zigTypeTag() == .Vector) const_vector: { + const vec_len = inst_ty.vectorLen(); + const scalar_llvm_ty = try self.dg.lowerType(scalar_ty); + + const shifts = try self.gpa.alloc(*llvm.Value, vec_len); + defer self.gpa.free(shifts); + + @memset(shifts, scalar_llvm_ty.constInt(scalar_bit_size_minus_one, .False)); + break :const_vector llvm.constVector(shifts.ptr, vec_len); + } else inst_llvm_ty.constInt(scalar_bit_size_minus_one, .False); + + const rem = self.builder.buildSRem(lhs, rhs, ""); + const div_sign = self.builder.buildXor(lhs, rhs, ""); + const div_sign_mask = self.builder.buildAShr(div_sign, bit_size_minus_one, ""); + const rhs_masked = self.builder.buildAnd(rhs, div_sign_mask, ""); const zero = inst_llvm_ty.constNull(); - const ltz = self.builder.buildICmp(.SLT, lhs, zero, ""); - return self.builder.buildSelect(ltz, c, a, ""); + const rem_nonzero = self.builder.buildICmp(.NE, rem, zero, ""); + const correction = self.builder.buildSelect(rem_nonzero, rhs_masked, zero, ""); + return self.builder.buildNSWAdd(rem, correction, ""); } return self.builder.buildURem(lhs, rhs, ""); } |
