diff options
| author | John Schmidt <john.schmidt.h@gmail.com> | 2022-03-15 23:25:38 +0100 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2022-03-16 20:11:05 -0700 |
| commit | c8ed813097ebb679e858a7764673f6236e638ea4 (patch) | |
| tree | 2f0c506bbebe70a70e9678110f87d43cc204147f /src/codegen/llvm.zig | |
| parent | 312536540baf26728a56304811f63f01a7414b7a (diff) | |
| download | zig-c8ed813097ebb679e858a7764673f6236e638ea4.tar.gz zig-c8ed813097ebb679e858a7764673f6236e638ea4.zip | |
Implement `@mulAdd` for vectors
Diffstat (limited to 'src/codegen/llvm.zig')
| -rw-r--r-- | src/codegen/llvm.zig | 55 |
1 files changed, 45 insertions, 10 deletions
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 67eac94af3..b41611813e 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -5166,7 +5166,13 @@ pub const FuncGen = struct { intrinsic, libc: [*:0]const u8, }; - const strat: Strat = switch (ty.floatBits(target)) { + + const scalar_ty = if (ty.zigTypeTag() == .Vector) + ty.elemType() + else + ty; + + const strat: Strat = switch (scalar_ty.floatBits(target)) { 16, 32, 64 => Strat.intrinsic, 80 => if (CType.longdouble.sizeInBits(target) == 80) Strat{ .intrinsic = {} } else Strat{ .libc = "__fmax" }, // LLVM always lowers the fma builtin for f128 to fmal, which is for `long double`. @@ -5175,17 +5181,46 @@ pub const FuncGen = struct { else => unreachable, }; - const llvm_fn = switch (strat) { - .intrinsic => self.getIntrinsic("llvm.fma", &.{llvm_ty}), - .libc => |fn_name| self.dg.object.llvm_module.getNamedFunction(fn_name) orelse b: { - const param_types = [_]*const llvm.Type{ llvm_ty, llvm_ty, llvm_ty }; - const fn_type = llvm.functionType(llvm_ty, ¶m_types, param_types.len, .False); - break :b self.dg.object.llvm_module.addFunction(fn_name, fn_type); + switch (strat) { + .intrinsic => { + const llvm_fn = self.getIntrinsic("llvm.fma", &.{llvm_ty}); + const params = [_]*const llvm.Value{ mulend1, mulend2, addend }; + return self.builder.buildCall(llvm_fn, ¶ms, params.len, .C, .Auto, ""); }, - }; + .libc => |fn_name| { + const scalar_llvm_ty = try self.dg.llvmType(scalar_ty); + const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse b: { + const param_types = [_]*const llvm.Type{ scalar_llvm_ty, scalar_llvm_ty, scalar_llvm_ty }; + const fn_type = llvm.functionType(scalar_llvm_ty, ¶m_types, param_types.len, .False); + break :b self.dg.object.llvm_module.addFunction(fn_name, fn_type); + }; + + if (ty.zigTypeTag() == .Vector) { + const llvm_i32 = self.context.intType(32); + const vector_llvm_ty = try self.dg.llvmType(ty); + + var i: usize = 0; + var vector = vector_llvm_ty.getUndef(); + while (i < ty.vectorLen()) : (i += 1) { + const index_i32 = llvm_i32.constInt(i, .False); + + const mulend1_elem = self.builder.buildExtractElement(mulend1, index_i32, ""); + const mulend2_elem = self.builder.buildExtractElement(mulend2, index_i32, ""); + const addend_elem = self.builder.buildExtractElement(addend, index_i32, ""); - const params = [_]*const llvm.Value{ mulend1, mulend2, addend }; - return self.builder.buildCall(llvm_fn, ¶ms, params.len, .C, .Auto, ""); + const params = [_]*const llvm.Value{ mulend1_elem, mulend2_elem, addend_elem }; + const mul_add = self.builder.buildCall(llvm_fn, ¶ms, params.len, .C, .Auto, ""); + + vector = self.builder.buildInsertElement(vector, mul_add, index_i32, ""); + } + + return vector; + } else { + const params = [_]*const llvm.Value{ mulend1, mulend2, addend }; + return self.builder.buildCall(llvm_fn, ¶ms, params.len, .C, .Auto, ""); + } + }, + } } fn airShlWithOverflow(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { |
