From 7d6a6ce87fde465ffc3bd6b0a8bb6e039c67268f Mon Sep 17 00:00:00 2001
From: Cody Tapscott <topolarity@tapscott.me>
Date: Tue, 26 Apr 2022 15:19:55 -0700
Subject: stage2: Manually lower softfloat ops when needed

Updates stage2 to manually lower softfloat operations for all unary
floating point operations and arithmetic.

Softfloat support still needs to be added for conversion operators
(float<->float and int<->float)
---
 src/codegen/llvm.zig | 400 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 264 insertions(+), 136 deletions(-)

(limited to 'src/codegen/llvm.zig')

diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index b2abc536ef..bf7f4e2ce1 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -3518,19 +3518,19 @@ pub const FuncGen = struct {
                 .shr                => try self.airShr(inst, false),
                 .shr_exact          => try self.airShr(inst, true),
 
-                .sqrt         => try self.airUnaryOp(inst, "sqrt"),
-                .sin          => try self.airUnaryOp(inst, "sin"),
-                .cos          => try self.airUnaryOp(inst, "cos"),
-                .exp          => try self.airUnaryOp(inst, "exp"),
-                .exp2         => try self.airUnaryOp(inst, "exp2"),
-                .log          => try self.airUnaryOp(inst, "log"),
-                .log2         => try self.airUnaryOp(inst, "log2"),
-                .log10        => try self.airUnaryOp(inst, "log10"),
-                .fabs         => try self.airUnaryOp(inst, "fabs"),
-                .floor        => try self.airUnaryOp(inst, "floor"),
-                .ceil         => try self.airUnaryOp(inst, "ceil"),
-                .round        => try self.airUnaryOp(inst, "round"),
-                .trunc_float  => try self.airUnaryOp(inst, "trunc"),
+                .sqrt         => try self.airUnaryOp(inst, .sqrt),
+                .sin          => try self.airUnaryOp(inst, .sin),
+                .cos          => try self.airUnaryOp(inst, .cos),
+                .exp          => try self.airUnaryOp(inst, .exp),
+                .exp2         => try self.airUnaryOp(inst, .exp2),
+                .log          => try self.airUnaryOp(inst, .log),
+                .log2         => try self.airUnaryOp(inst, .log2),
+                .log10        => try self.airUnaryOp(inst, .log10),
+                .fabs         => try self.airUnaryOp(inst, .fabs),
+                .floor        => try self.airUnaryOp(inst, .floor),
+                .ceil         => try self.airUnaryOp(inst, .ceil),
+                .round        => try self.airUnaryOp(inst, .round),
+                .trunc_float  => try self.airUnaryOp(inst, .trunc),
 
                 .cmp_eq  => try self.airCmp(inst, .eq),
                 .cmp_gt  => try self.airCmp(inst, .gt),
@@ -3905,7 +3905,7 @@ pub const FuncGen = struct {
         rhs: *const llvm.Value,
         operand_ty: Type,
         op: math.CompareOperator,
-    ) *const llvm.Value {
+    ) Allocator.Error!*const llvm.Value {
         var int_buffer: Type.Payload.Bits = undefined;
         var opt_buffer: Type.Payload.ElemType = undefined;
 
@@ -3947,7 +3947,7 @@ pub const FuncGen = struct {
                 self.builder.positionBuilderAtEnd(both_pl_block);
                 const lhs_payload = self.optPayloadHandle(lhs, is_by_ref);
                 const rhs_payload = self.optPayloadHandle(rhs, is_by_ref);
-                const payload_cmp = self.cmp(lhs_payload, rhs_payload, payload_ty, op);
+                const payload_cmp = try self.cmp(lhs_payload, rhs_payload, payload_ty, op);
                 _ = self.builder.buildBr(end_block);
                 const both_pl_block_end = self.builder.getInsertBlock();
 
@@ -3983,17 +3983,7 @@ pub const FuncGen = struct {
                 );
                 return phi_node;
             },
-            .Float => {
-                const operation: llvm.RealPredicate = switch (op) {
-                    .eq => .OEQ,
-                    .neq => .UNE,
-                    .lt => .OLT,
-                    .lte => .OLE,
-                    .gt => .OGT,
-                    .gte => .OGE,
-                };
-                return self.builder.buildFCmp(operation, lhs, rhs, "");
-            },
+            .Float => return self.buildFloatCmp(op, operand_ty, &.{ lhs, rhs }),
             else => unreachable,
         };
         const is_signed = int_ty.isSignedInt();
@@ -5221,7 +5211,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isAnyFloat()) return self.builder.buildFAdd(lhs, rhs, "");
+        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.add, inst_ty, &.{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildNSWAdd(lhs, rhs, "");
         return self.builder.buildNUWAdd(lhs, rhs, "");
     }
@@ -5260,7 +5250,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isAnyFloat()) return self.builder.buildFSub(lhs, rhs, "");
+        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.sub, inst_ty, &.{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildNSWSub(lhs, rhs, "");
         return self.builder.buildNUWSub(lhs, rhs, "");
     }
@@ -5298,7 +5288,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isAnyFloat()) return self.builder.buildFMul(lhs, rhs, "");
+        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.mul, inst_ty, &.{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildNSWMul(lhs, rhs, "");
         return self.builder.buildNUWMul(lhs, rhs, "");
     }
@@ -5333,8 +5323,9 @@ pub const FuncGen = struct {
         const bin_op = self.air.instructions.items(.data)[inst].bin_op;
         const lhs = try self.resolveInst(bin_op.lhs);
         const rhs = try self.resolveInst(bin_op.rhs);
+        const inst_ty = self.air.typeOfIndex(inst);
 
-        return self.builder.buildFDiv(lhs, rhs, "");
+        return self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
     }
 
     fn airDivTrunc(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -5347,8 +5338,8 @@ pub const FuncGen = struct {
         const scalar_ty = inst_ty.scalarType();
 
         if (scalar_ty.isRuntimeFloat()) {
-            const result = self.builder.buildFDiv(lhs, rhs, "");
-            return self.callTrunc(result, inst_ty);
+            const result = try self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
+            return self.buildFloatOp(.trunc, inst_ty, &.{result});
         }
         if (scalar_ty.isSignedInt()) return self.builder.buildSDiv(lhs, rhs, "");
         return self.builder.buildUDiv(lhs, rhs, "");
@@ -5364,8 +5355,8 @@ pub const FuncGen = struct {
         const scalar_ty = inst_ty.scalarType();
 
         if (scalar_ty.isRuntimeFloat()) {
-            const result = self.builder.buildFDiv(lhs, rhs, "");
-            return try self.callFloor(result, inst_ty);
+            const result = try self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
+            return self.buildFloatOp(.floor, inst_ty, &.{result});
         }
         if (scalar_ty.isSignedInt()) {
             // const d = @divTrunc(a, b);
@@ -5395,7 +5386,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isRuntimeFloat()) return self.builder.buildFDiv(lhs, rhs, "");
+        if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildExactSDiv(lhs, rhs, "");
         return self.builder.buildExactUDiv(lhs, rhs, "");
     }
@@ -5409,7 +5400,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isRuntimeFloat()) return self.builder.buildFRem(lhs, rhs, "");
+        if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.rem, inst_ty, &.{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildSRem(lhs, rhs, "");
         return self.builder.buildURem(lhs, rhs, "");
     }
@@ -5425,11 +5416,11 @@ pub const FuncGen = struct {
         const scalar_ty = inst_ty.scalarType();
 
         if (scalar_ty.isRuntimeFloat()) {
-            const a = self.builder.buildFRem(lhs, rhs, "");
-            const b = self.builder.buildFAdd(a, rhs, "");
-            const c = self.builder.buildFRem(b, rhs, "");
+            const a = try self.buildFloatOp(.rem, inst_ty, &.{ lhs, rhs });
+            const b = try self.buildFloatOp(.add, inst_ty, &.{ a, rhs });
+            const c = try self.buildFloatOp(.rem, inst_ty, &.{ b, rhs });
             const zero = inst_llvm_ty.constNull();
-            const ltz = self.builder.buildFCmp(.OLT, lhs, zero, "");
+            const ltz = try self.buildFloatCmp(.lt, inst_ty, &.{ lhs, zero });
             return self.builder.buildSelect(ltz, c, a, "");
         }
         if (scalar_ty.isSignedInt()) {
@@ -5508,75 +5499,253 @@ pub const FuncGen = struct {
         return result_struct;
     }
 
-    fn airMulAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
-        if (self.liveness.isUnused(inst)) return null;
+    fn buildElementwiseCall(
+        self: *FuncGen,
+        llvm_fn: *const llvm.Value,
+        args_vectors: []const *const llvm.Value,
+        result_vector: *const llvm.Value,
+        vector_len: usize,
+    ) !*const llvm.Value {
+        const args_len = @intCast(c_uint, args_vectors.len);
+        const llvm_i32 = self.context.intType(32);
+        assert(args_len <= 8);
 
-        const pl_op = self.air.instructions.items(.data)[inst].pl_op;
-        const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+        var i: usize = 0;
+        var result = result_vector;
+        while (i < vector_len) : (i += 1) {
+            const index_i32 = llvm_i32.constInt(i, .False);
 
-        const mulend1 = try self.resolveInst(extra.lhs);
-        const mulend2 = try self.resolveInst(extra.rhs);
-        const addend = try self.resolveInst(pl_op.operand);
+            var args: [8]*const llvm.Value = undefined;
+            for (args_vectors) |arg_vector, k| {
+                args[k] = self.builder.buildExtractElement(arg_vector, index_i32, "");
+            }
+            const result_elem = self.builder.buildCall(llvm_fn, args[0..], args_len, .C, .Auto, "");
+            result = self.builder.buildInsertElement(result, result_elem, index_i32, "");
+        }
+        return result;
+    }
 
-        const ty = self.air.typeOfIndex(inst);
-        const llvm_ty = try self.dg.llvmType(ty);
-        const scalar_ty = ty.scalarType();
-        const target = self.dg.module.getTarget();
+    fn getLibcFunction(
+        self: *FuncGen,
+        fn_name: [:0]const u8,
+        param_types: []const *const llvm.Type,
+        return_type: *const llvm.Type,
+    ) *const llvm.Value {
+        return self.dg.object.llvm_module.getNamedFunction(fn_name.ptr) orelse b: {
+            const alias = self.dg.object.llvm_module.getNamedGlobalAlias(fn_name.ptr, fn_name.len);
+            break :b if (alias) |a| a.getAliasee() else null;
+        } orelse b: {
+            const params_len = @intCast(c_uint, param_types.len);
+            const fn_type = llvm.functionType(return_type, param_types.ptr, params_len, .False);
+            const f = self.dg.object.llvm_module.addFunction(fn_name, fn_type);
+            break :b f;
+        };
+    }
 
-        const Strat = union(enum) {
-            intrinsic,
-            libc: [*:0]const u8,
+    fn getMathHTypeAbbrev(ty: Type) []const u8 {
+        return switch (ty.tag()) {
+            .f16 => "h", // Non-standard
+            .f32 => "s",
+            .f64 => "",
+            .f80 => "x", // Non-standard
+            .c_longdouble => "l",
+            .f128 => "q", // Non-standard (mimics convention in GCC libquadmath)
+            else => unreachable,
         };
+    }
 
-        const strat: Strat = switch (scalar_ty.floatBits(target)) {
-            16, 32, 64 => Strat.intrinsic,
-            80 => if (CType.longdouble.sizeInBits(target) == 80) Strat{ .intrinsic = {} } else Strat{ .libc = "__fmax" },
-            // LLVM always lowers the fma builtin for f128 to fmal, which is for `long double`.
-            // On some targets this will be correct; on others it will be incorrect.
-            128 => if (CType.longdouble.sizeInBits(target) == 128) Strat{ .intrinsic = {} } else Strat{ .libc = "fmaq" },
+    fn getCompilerRtTypeAbbrev(ty: Type, target: std.Target) []const u8 {
+        return switch (ty.floatBits(target)) {
+            16 => "h",
+            32 => "s",
+            64 => "d",
+            80 => "x",
+            128 => "t",
             else => unreachable,
         };
+    }
+
+    /// Creates a floating point comparison by lowering to the appropriate
+    /// hardware instruction or softfloat routine for the target
+    fn buildFloatCmp(
+        self: *FuncGen,
+        pred: math.CompareOperator,
+        ty: Type,
+        params: []const *const llvm.Value,
+    ) !*const llvm.Value {
+        const target = self.dg.module.getTarget();
+        const scalar_ty = ty.scalarType();
+        const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
+
+        // LLVM does not support all floating point comparisons for all targets, so we
+        // may need to manually generate a libc call
+        const intrinsics_allowed = switch (scalar_ty.tag()) {
+            .f80 => target.longDoubleIs(f80) and backendSupportsF80(target),
+            .f128 => target.longDoubleIs(f128),
+            else => true,
+        };
+        if (intrinsics_allowed) {
+            const llvm_predicate: llvm.RealPredicate = switch (pred) {
+                .eq => .OEQ,
+                .neq => .UNE,
+                .lt => .OLT,
+                .lte => .OLE,
+                .gt => .OGT,
+                .gte => .OGE,
+            };
+            return self.builder.buildFCmp(llvm_predicate, params[0], params[1], "");
+        }
+
+        const compiler_rt_type_abbrev = getCompilerRtTypeAbbrev(scalar_ty, target);
+        var fn_name_buf: [64]u8 = undefined;
+        const fn_base_name = switch (pred) {
+            .neq => "ne",
+            .eq => "eq",
+            .lt => "lt",
+            .lte => "le",
+            .gt => "gt",
+            .gte => "ge",
+        };
+        const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f2", .{ fn_base_name, compiler_rt_type_abbrev }) catch unreachable;
 
-        switch (strat) {
-            .intrinsic => {
-                const llvm_fn = self.getIntrinsic("llvm.fma", &.{llvm_ty});
-                const params = [_]*const llvm.Value{ mulend1, mulend2, addend };
-                return self.builder.buildCall(llvm_fn, &params, params.len, .C, .Auto, "");
-            },
-            .libc => |fn_name| {
-                const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
-                const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse b: {
-                    const param_types = [_]*const llvm.Type{ scalar_llvm_ty, scalar_llvm_ty, scalar_llvm_ty };
-                    const fn_type = llvm.functionType(scalar_llvm_ty, &param_types, param_types.len, .False);
-                    break :b self.dg.object.llvm_module.addFunction(fn_name, fn_type);
-                };
+        assert(params.len == 2);
+        const param_types = [2]*const llvm.Type{ scalar_llvm_ty, scalar_llvm_ty };
+        const llvm_i32 = self.context.intType(32);
+        const libc_fn = self.getLibcFunction(fn_name, param_types[0..], llvm_i32);
 
-                if (ty.zigTypeTag() == .Vector) {
-                    const llvm_i32 = self.context.intType(32);
-                    const vector_llvm_ty = try self.dg.llvmType(ty);
+        const zero = llvm_i32.constInt(0, .False);
+        const int_pred: llvm.IntPredicate = switch (pred) {
+            .eq => .EQ,
+            .neq => .NE,
+            .lt => .SLT,
+            .lte => .SLE,
+            .gt => .SGT,
+            .gte => .SGE,
+        };
 
-                    var i: usize = 0;
-                    var vector = vector_llvm_ty.getUndef();
-                    while (i < ty.vectorLen()) : (i += 1) {
-                        const index_i32 = llvm_i32.constInt(i, .False);
+        if (ty.zigTypeTag() == .Vector) {
+            const vec_len = ty.vectorLen();
+            const vector_result_ty = llvm_i32.vectorType(vec_len);
 
-                        const mulend1_elem = self.builder.buildExtractElement(mulend1, index_i32, "");
-                        const mulend2_elem = self.builder.buildExtractElement(mulend2, index_i32, "");
-                        const addend_elem = self.builder.buildExtractElement(addend, index_i32, "");
+            var result = vector_result_ty.getUndef();
+            result = try self.buildElementwiseCall(libc_fn, params[0..], result, vec_len);
 
-                        const params = [_]*const llvm.Value{ mulend1_elem, mulend2_elem, addend_elem };
-                        const mul_add = self.builder.buildCall(llvm_fn, &params, params.len, .C, .Auto, "");
+            const zero_vector = self.builder.buildVectorSplat(zero, vec_len, "");
+            return self.builder.buildICmp(int_pred, result, zero_vector, "");
+        }
 
-                        vector = self.builder.buildInsertElement(vector, mul_add, index_i32, "");
-                    }
+        const result = self.builder.buildCall(libc_fn, params.ptr, 2, .C, .Auto, "");
+        return self.builder.buildICmp(int_pred, result, zero, "");
+    }
 
-                    return vector;
-                } else {
-                    const params = [_]*const llvm.Value{ mulend1, mulend2, addend };
-                    return self.builder.buildCall(llvm_fn, &params, params.len, .C, .Auto, "");
+    /// Creates a floating point operation (add, sub, fma, sqrt, exp, etc.)
+    /// by lowering to the appropriate hardware instruction or softfloat
+    /// routine for the target
+    fn buildFloatOp(
+        self: *FuncGen,
+        comptime op: @TypeOf(.EnumLiteral),
+        ty: Type,
+        params: []const *const llvm.Value,
+    ) !*const llvm.Value {
+        const target = self.dg.module.getTarget();
+        const scalar_ty = ty.scalarType();
+        const llvm_ty = try self.dg.llvmType(ty);
+        const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
+
+        const Strat = union(enum) {
+            intrinsic: []const u8,
+            libc: [:0]const u8,
+        };
+
+        // LLVM does not support all relevant intrinsics for all targets, so we
+        // may need to manually generate a libc call
+        const intrinsics_allowed = switch (scalar_ty.tag()) {
+            .f80 => target.longDoubleIs(f80) and backendSupportsF80(target),
+            .f128 => target.longDoubleIs(f128),
+            else => true,
+        };
+        const strat: Strat = if (intrinsics_allowed) b: {
+            // Some operations are dedicated LLVM instructions, not available as intrinsics
+            switch (op) {
+                .add => return self.builder.buildFAdd(params[0], params[1], ""),
+                .sub => return self.builder.buildFSub(params[0], params[1], ""),
+                .mul => return self.builder.buildFMul(params[0], params[1], ""),
+                .div => return self.builder.buildFDiv(params[0], params[1], ""),
+                .rem => return self.builder.buildFRem(params[0], params[1], ""),
+                else => {},
+            }
+            // All other operations are available as intrinsics
+            break :b .{
+                .intrinsic = "llvm." ++ switch (op) {
+                    .max => "maximum",
+                    .min => "minimum",
+                    .fma, .sqrt, .sin, .cos, .exp, .exp2, .log, .log2, .log10, .fabs, .floor, .ceil, .round, .trunc => @tagName(op),
+                    .add, .sub, .mul, .div, .rem => unreachable,
+                    else => unreachable,
+                },
+            };
+        } else b: {
+            const math_h_type_abbrev = getMathHTypeAbbrev(scalar_ty);
+            const compiler_rt_type_abbrev = getCompilerRtTypeAbbrev(scalar_ty, target);
+            var fn_name_buf: [64]u8 = undefined;
+            break :b switch (op) {
+                .fma => Strat{
+                    .libc = switch (scalar_ty.floatBits(target)) {
+                        80 => "__fmax",
+                        else => std.fmt.bufPrintZ(&fn_name_buf, "fma{s}", .{math_h_type_abbrev}) catch unreachable,
+                    },
+                },
+                .add, .sub, .div, .mul => Strat{
+                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f3", .{ @tagName(op), compiler_rt_type_abbrev }) catch unreachable,
+                },
+                .rem => Strat{
+                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "fmod{s}", .{math_h_type_abbrev}) catch unreachable,
+                },
+                .max, .min => Strat{
+                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "f{s}{s}", .{ @tagName(op), math_h_type_abbrev }) catch unreachable,
+                },
+                .sqrt, .sin, .cos, .exp, .exp2, .log, .log2, .log10, .fabs, .floor, .ceil, .round, .trunc => Strat{
+                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "{s}{s}", .{ @tagName(op), math_h_type_abbrev }) catch unreachable,
+                },
+                else => unreachable,
+            };
+        };
+
+        var llvm_fn: *const llvm.Value = switch (strat) {
+            .intrinsic => |fn_name| self.getIntrinsic(fn_name, &.{llvm_ty}),
+            .libc => |fn_name| b: {
+                assert(params.len == switch (op) {
+                    .fma => 3,
+                    .add, .sub, .div, .mul, .rem, .max, .min => 2,
+                    .sqrt, .sin, .cos, .exp, .exp2, .log, .log2, .log10, .fabs, .floor, .ceil, .round, .trunc => 1,
+                    else => unreachable,
+                });
+                const param_types = [3]*const llvm.Type{ scalar_llvm_ty, scalar_llvm_ty, scalar_llvm_ty };
+                const libc_fn = self.getLibcFunction(fn_name, param_types[0..params.len], scalar_llvm_ty);
+                if (ty.zigTypeTag() == .Vector) {
+                    const result = llvm_ty.getUndef();
+                    return self.buildElementwiseCall(libc_fn, params[0..], result, ty.vectorLen());
                 }
+
+                break :b libc_fn;
             },
-        }
+        };
+        const params_len = @intCast(c_uint, params.len);
+        return self.builder.buildCall(llvm_fn, params.ptr, params_len, .C, .Auto, "");
+    }
+
+    fn airMulAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+        const extra = self.air.extraData(Air.Bin, pl_op.payload).data;
+
+        const mulend1 = try self.resolveInst(extra.lhs);
+        const mulend2 = try self.resolveInst(extra.rhs);
+        const addend = try self.resolveInst(pl_op.operand);
+
+        const ty = self.air.typeOfIndex(inst);
+        return self.buildFloatOp(.fma, ty, &.{ mulend1, mulend2, addend });
     }
 
     fn airShlWithOverflow(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -6381,14 +6550,15 @@ pub const FuncGen = struct {
         }
     }
 
-    fn airUnaryOp(self: *FuncGen, inst: Air.Inst.Index, llvm_fn_name: []const u8) !?*const llvm.Value {
+    fn airUnaryOp(self: *FuncGen, inst: Air.Inst.Index, comptime op: @TypeOf(.EnumLiteral)) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
 
         const un_op = self.air.instructions.items(.data)[inst].un_op;
         const operand = try self.resolveInst(un_op);
         const operand_ty = self.air.typeOf(un_op);
 
-        return self.callFloatUnary(operand, operand_ty, llvm_fn_name);
+        const params = [_]*const llvm.Value{operand};
+        return self.buildFloatOp(op, operand_ty, &params);
     }
 
     fn airClzCtz(self: *FuncGen, inst: Air.Inst.Index, llvm_fn_name: []const u8) !?*const llvm.Value {
@@ -7191,48 +7361,6 @@ pub const FuncGen = struct {
         return self.builder.buildExtractValue(opt_handle, 0, "");
     }
 
-    fn callFloor(self: *FuncGen, arg: *const llvm.Value, ty: Type) !*const llvm.Value {
-        return self.callFloatUnary(arg, ty, "floor");
-    }
-
-    fn callCeil(self: *FuncGen, arg: *const llvm.Value, ty: Type) !*const llvm.Value {
-        return self.callFloatUnary(arg, ty, "ceil");
-    }
-
-    fn callTrunc(self: *FuncGen, arg: *const llvm.Value, ty: Type) !*const llvm.Value {
-        return self.callFloatUnary(arg, ty, "trunc");
-    }
-
-    fn callFloatUnary(
-        self: *FuncGen,
-        arg: *const llvm.Value,
-        ty: Type,
-        name: []const u8,
-    ) !*const llvm.Value {
-        const target = self.dg.module.getTarget();
-
-        var fn_name_buf: [100]u8 = undefined;
-        const llvm_fn_name = switch (ty.zigTypeTag()) {
-            .Vector => std.fmt.bufPrintZ(&fn_name_buf, "llvm.{s}.v{d}f{d}", .{
-                name, ty.vectorLen(), ty.childType().floatBits(target),
-            }) catch unreachable,
-            .Float => std.fmt.bufPrintZ(&fn_name_buf, "llvm.{s}.f{d}", .{
-                name, ty.floatBits(target),
-            }) catch unreachable,
-            else => unreachable,
-        };
-
-        const llvm_fn = self.dg.object.llvm_module.getNamedFunction(llvm_fn_name) orelse blk: {
-            const operand_llvm_ty = try self.dg.llvmType(ty);
-            const param_types = [_]*const llvm.Type{operand_llvm_ty};
-            const fn_type = llvm.functionType(operand_llvm_ty, &param_types, param_types.len, .False);
-            break :blk self.dg.object.llvm_module.addFunction(llvm_fn_name, fn_type);
-        };
-
-        const args: [1]*const llvm.Value = .{arg};
-        return self.builder.buildCall(llvm_fn, &args, args.len, .C, .Auto, "");
-    }
-
     fn fieldPtr(
         self: *FuncGen,
         inst: Air.Inst.Index,
-- 
cgit v1.2.3


From 087aedfa38e0eb2a8f0e3055be18625017060b29 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 27 Apr 2022 13:51:53 -0700
Subject: stage2: fix recent LLVM backend code

 * std.math.snan: fix compilation error. Also make it and nan inline.
 * LLVM: use a proper enum type for float op instead of enum literal.
   Also various cleanups.
 * LLVM: use LLVMBuildVectorSplat for vector splat AIR instruction.
   - also the bindings had parameter order wrong
 * LLVM: additionally handle f16 lowering. For now all targets report OK
   but I think we will need to add some exceptions to this list.
---
 lib/std/math/nan.zig          |  17 +--
 src/codegen/llvm.zig          | 260 +++++++++++++++++++++++-------------------
 src/codegen/llvm/bindings.zig |   5 +-
 3 files changed, 147 insertions(+), 135 deletions(-)

(limited to 'src/codegen/llvm.zig')

diff --git a/lib/std/math/nan.zig b/lib/std/math/nan.zig
index 329f67b74e..8a27937242 100644
--- a/lib/std/math/nan.zig
+++ b/lib/std/math/nan.zig
@@ -1,7 +1,7 @@
 const math = @import("../math.zig");
 
 /// Returns the nan representation for type T.
-pub fn nan(comptime T: type) T {
+pub inline fn nan(comptime T: type) T {
     return switch (@typeInfo(T).Float.bits) {
         16 => math.nan_f16,
         32 => math.nan_f32,
@@ -13,15 +13,8 @@ pub fn nan(comptime T: type) T {
 }
 
 /// Returns the signalling nan representation for type T.
-pub fn snan(comptime T: type) T {
-    // Note: A signalling nan is identical to a standard right now by may have a different bit
-    // representation in the future when required.
-    return switch (@typeInfo(T).Float.bits) {
-        16 => math.nan_u16,
-        32 => math.nan_u32,
-        64 => math.nan_u64,
-        80 => math.nan_u80,
-        128 => math.nan_u128,
-        else => @compileError("unreachable"),
-    };
+/// Note: A signalling nan is identical to a standard right now by may have a different bit
+/// representation in the future when required.
+pub inline fn snan(comptime T: type) T {
+    return nan(T);
 }
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index bf7f4e2ce1..de25a7821d 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -3983,7 +3983,7 @@ pub const FuncGen = struct {
                 );
                 return phi_node;
             },
-            .Float => return self.buildFloatCmp(op, operand_ty, &.{ lhs, rhs }),
+            .Float => return self.buildFloatCmp(op, operand_ty, .{ lhs, rhs }),
             else => unreachable,
         };
         const is_signed = int_ty.isSignedInt();
@@ -5211,7 +5211,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.add, inst_ty, &.{ lhs, rhs });
+        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.add, inst_ty, 2, .{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildNSWAdd(lhs, rhs, "");
         return self.builder.buildNUWAdd(lhs, rhs, "");
     }
@@ -5250,7 +5250,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.sub, inst_ty, &.{ lhs, rhs });
+        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.sub, inst_ty, 2, .{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildNSWSub(lhs, rhs, "");
         return self.builder.buildNUWSub(lhs, rhs, "");
     }
@@ -5288,7 +5288,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.mul, inst_ty, &.{ lhs, rhs });
+        if (scalar_ty.isAnyFloat()) return self.buildFloatOp(.mul, inst_ty, 2, .{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildNSWMul(lhs, rhs, "");
         return self.builder.buildNUWMul(lhs, rhs, "");
     }
@@ -5325,7 +5325,7 @@ pub const FuncGen = struct {
         const rhs = try self.resolveInst(bin_op.rhs);
         const inst_ty = self.air.typeOfIndex(inst);
 
-        return self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
+        return self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
     }
 
     fn airDivTrunc(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -5338,8 +5338,8 @@ pub const FuncGen = struct {
         const scalar_ty = inst_ty.scalarType();
 
         if (scalar_ty.isRuntimeFloat()) {
-            const result = try self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
-            return self.buildFloatOp(.trunc, inst_ty, &.{result});
+            const result = try self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
+            return self.buildFloatOp(.trunc, inst_ty, 1, .{result});
         }
         if (scalar_ty.isSignedInt()) return self.builder.buildSDiv(lhs, rhs, "");
         return self.builder.buildUDiv(lhs, rhs, "");
@@ -5355,8 +5355,8 @@ pub const FuncGen = struct {
         const scalar_ty = inst_ty.scalarType();
 
         if (scalar_ty.isRuntimeFloat()) {
-            const result = try self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
-            return self.buildFloatOp(.floor, inst_ty, &.{result});
+            const result = try self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
+            return self.buildFloatOp(.floor, inst_ty, 1, .{result});
         }
         if (scalar_ty.isSignedInt()) {
             // const d = @divTrunc(a, b);
@@ -5386,7 +5386,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.div, inst_ty, &.{ lhs, rhs });
+        if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.div, inst_ty, 2, .{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildExactSDiv(lhs, rhs, "");
         return self.builder.buildExactUDiv(lhs, rhs, "");
     }
@@ -5400,7 +5400,7 @@ pub const FuncGen = struct {
         const inst_ty = self.air.typeOfIndex(inst);
         const scalar_ty = inst_ty.scalarType();
 
-        if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.rem, inst_ty, &.{ lhs, rhs });
+        if (scalar_ty.isRuntimeFloat()) return self.buildFloatOp(.fmod, inst_ty, 2, .{ lhs, rhs });
         if (scalar_ty.isSignedInt()) return self.builder.buildSRem(lhs, rhs, "");
         return self.builder.buildURem(lhs, rhs, "");
     }
@@ -5416,11 +5416,11 @@ pub const FuncGen = struct {
         const scalar_ty = inst_ty.scalarType();
 
         if (scalar_ty.isRuntimeFloat()) {
-            const a = try self.buildFloatOp(.rem, inst_ty, &.{ lhs, rhs });
-            const b = try self.buildFloatOp(.add, inst_ty, &.{ a, rhs });
-            const c = try self.buildFloatOp(.rem, inst_ty, &.{ b, rhs });
+            const a = try self.buildFloatOp(.fmod, inst_ty, 2, .{ lhs, rhs });
+            const b = try self.buildFloatOp(.add, inst_ty, 2, .{ a, rhs });
+            const c = try self.buildFloatOp(.fmod, inst_ty, 2, .{ b, rhs });
             const zero = inst_llvm_ty.constNull();
-            const ltz = try self.buildFloatCmp(.lt, inst_ty, &.{ lhs, zero });
+            const ltz = try self.buildFloatCmp(.lt, inst_ty, .{ lhs, zero });
             return self.builder.buildSelect(ltz, c, a, "");
         }
         if (scalar_ty.isSignedInt()) {
@@ -5508,18 +5508,18 @@ pub const FuncGen = struct {
     ) !*const llvm.Value {
         const args_len = @intCast(c_uint, args_vectors.len);
         const llvm_i32 = self.context.intType(32);
-        assert(args_len <= 8);
+        assert(args_len <= 3);
 
         var i: usize = 0;
         var result = result_vector;
         while (i < vector_len) : (i += 1) {
             const index_i32 = llvm_i32.constInt(i, .False);
 
-            var args: [8]*const llvm.Value = undefined;
+            var args: [3]*const llvm.Value = undefined;
             for (args_vectors) |arg_vector, k| {
                 args[k] = self.builder.buildExtractElement(arg_vector, index_i32, "");
             }
-            const result_elem = self.builder.buildCall(llvm_fn, args[0..], args_len, .C, .Auto, "");
+            const result_elem = self.builder.buildCall(llvm_fn, &args, args_len, .C, .Auto, "");
             result = self.builder.buildInsertElement(result, result_elem, index_i32, "");
         }
         return result;
@@ -5542,20 +5542,27 @@ pub const FuncGen = struct {
         };
     }
 
-    fn getMathHTypeAbbrev(ty: Type) []const u8 {
-        return switch (ty.tag()) {
-            .f16 => "h", // Non-standard
-            .f32 => "s",
-            .f64 => "",
-            .f80 => "x", // Non-standard
-            .c_longdouble => "l",
-            .f128 => "q", // Non-standard (mimics convention in GCC libquadmath)
+    fn libcFloatPrefix(float_bits: u16) []const u8 {
+        return switch (float_bits) {
+            16, 80 => "__",
+            32, 64, 128 => "",
             else => unreachable,
         };
     }
 
-    fn getCompilerRtTypeAbbrev(ty: Type, target: std.Target) []const u8 {
-        return switch (ty.floatBits(target)) {
+    fn libcFloatSuffix(float_bits: u16) []const u8 {
+        return switch (float_bits) {
+            16 => "h", // Non-standard
+            32 => "s",
+            64 => "",
+            80 => "x", // Non-standard
+            128 => "q", // Non-standard (mimics convention in GCC libquadmath)
+            else => unreachable,
+        };
+    }
+
+    fn compilerRtFloatAbbrev(float_bits: u16) []const u8 {
+        return switch (float_bits) {
             16 => "h",
             32 => "s",
             64 => "d",
@@ -5571,20 +5578,13 @@ pub const FuncGen = struct {
         self: *FuncGen,
         pred: math.CompareOperator,
         ty: Type,
-        params: []const *const llvm.Value,
+        params: [2]*const llvm.Value,
     ) !*const llvm.Value {
         const target = self.dg.module.getTarget();
         const scalar_ty = ty.scalarType();
         const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
 
-        // LLVM does not support all floating point comparisons for all targets, so we
-        // may need to manually generate a libc call
-        const intrinsics_allowed = switch (scalar_ty.tag()) {
-            .f80 => target.longDoubleIs(f80) and backendSupportsF80(target),
-            .f128 => target.longDoubleIs(f128),
-            else => true,
-        };
-        if (intrinsics_allowed) {
+        if (intrinsicsAllowed(scalar_ty, target)) {
             const llvm_predicate: llvm.RealPredicate = switch (pred) {
                 .eq => .OEQ,
                 .neq => .UNE,
@@ -5596,7 +5596,8 @@ pub const FuncGen = struct {
             return self.builder.buildFCmp(llvm_predicate, params[0], params[1], "");
         }
 
-        const compiler_rt_type_abbrev = getCompilerRtTypeAbbrev(scalar_ty, target);
+        const float_bits = scalar_ty.floatBits(target);
+        const compiler_rt_float_abbrev = compilerRtFloatAbbrev(float_bits);
         var fn_name_buf: [64]u8 = undefined;
         const fn_base_name = switch (pred) {
             .neq => "ne",
@@ -5606,9 +5607,10 @@ pub const FuncGen = struct {
             .gt => "gt",
             .gte => "ge",
         };
-        const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f2", .{ fn_base_name, compiler_rt_type_abbrev }) catch unreachable;
+        const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f2", .{
+            fn_base_name, compiler_rt_float_abbrev,
+        }) catch unreachable;
 
-        assert(params.len == 2);
         const param_types = [2]*const llvm.Type{ scalar_llvm_ty, scalar_llvm_ty };
         const llvm_i32 = self.context.intType(32);
         const libc_fn = self.getLibcFunction(fn_name, param_types[0..], llvm_i32);
@@ -5628,110 +5630,119 @@ pub const FuncGen = struct {
             const vector_result_ty = llvm_i32.vectorType(vec_len);
 
             var result = vector_result_ty.getUndef();
-            result = try self.buildElementwiseCall(libc_fn, params[0..], result, vec_len);
+            result = try self.buildElementwiseCall(libc_fn, &params, result, vec_len);
 
-            const zero_vector = self.builder.buildVectorSplat(zero, vec_len, "");
+            const zero_vector = self.builder.buildVectorSplat(vec_len, zero, "");
             return self.builder.buildICmp(int_pred, result, zero_vector, "");
         }
 
-        const result = self.builder.buildCall(libc_fn, params.ptr, 2, .C, .Auto, "");
+        const result = self.builder.buildCall(libc_fn, &params, params.len, .C, .Auto, "");
         return self.builder.buildICmp(int_pred, result, zero, "");
     }
 
+    const FloatOp = enum {
+        add,
+        ceil,
+        cos,
+        div,
+        exp,
+        exp2,
+        fabs,
+        floor,
+        fma,
+        log,
+        log10,
+        log2,
+        fmax,
+        fmin,
+        mul,
+        fmod,
+        round,
+        sin,
+        sqrt,
+        sub,
+        trunc,
+    };
+
+    const FloatOpStrat = union(enum) {
+        intrinsic: []const u8,
+        libc: [:0]const u8,
+    };
+
     /// Creates a floating point operation (add, sub, fma, sqrt, exp, etc.)
     /// by lowering to the appropriate hardware instruction or softfloat
     /// routine for the target
     fn buildFloatOp(
         self: *FuncGen,
-        comptime op: @TypeOf(.EnumLiteral),
+        comptime op: FloatOp,
         ty: Type,
-        params: []const *const llvm.Value,
+        comptime params_len: usize,
+        params: [params_len]*const llvm.Value,
     ) !*const llvm.Value {
         const target = self.dg.module.getTarget();
         const scalar_ty = ty.scalarType();
         const llvm_ty = try self.dg.llvmType(ty);
         const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
 
-        const Strat = union(enum) {
-            intrinsic: []const u8,
-            libc: [:0]const u8,
-        };
-
-        // LLVM does not support all relevant intrinsics for all targets, so we
-        // may need to manually generate a libc call
-        const intrinsics_allowed = switch (scalar_ty.tag()) {
-            .f80 => target.longDoubleIs(f80) and backendSupportsF80(target),
-            .f128 => target.longDoubleIs(f128),
-            else => true,
-        };
-        const strat: Strat = if (intrinsics_allowed) b: {
+        const intrinsics_allowed = intrinsicsAllowed(scalar_ty, target);
+        var fn_name_buf: [64]u8 = undefined;
+        const strat: FloatOpStrat = if (intrinsics_allowed) switch (op) {
             // Some operations are dedicated LLVM instructions, not available as intrinsics
-            switch (op) {
-                .add => return self.builder.buildFAdd(params[0], params[1], ""),
-                .sub => return self.builder.buildFSub(params[0], params[1], ""),
-                .mul => return self.builder.buildFMul(params[0], params[1], ""),
-                .div => return self.builder.buildFDiv(params[0], params[1], ""),
-                .rem => return self.builder.buildFRem(params[0], params[1], ""),
-                else => {},
-            }
-            // All other operations are available as intrinsics
-            break :b .{
-                .intrinsic = "llvm." ++ switch (op) {
-                    .max => "maximum",
-                    .min => "minimum",
-                    .fma, .sqrt, .sin, .cos, .exp, .exp2, .log, .log2, .log10, .fabs, .floor, .ceil, .round, .trunc => @tagName(op),
-                    .add, .sub, .mul, .div, .rem => unreachable,
-                    else => unreachable,
-                },
-            };
+            .add => return self.builder.buildFAdd(params[0], params[1], ""),
+            .sub => return self.builder.buildFSub(params[0], params[1], ""),
+            .mul => return self.builder.buildFMul(params[0], params[1], ""),
+            .div => return self.builder.buildFDiv(params[0], params[1], ""),
+            .fmod => return self.builder.buildFRem(params[0], params[1], ""),
+            .fmax => return self.builder.buildMaxNum(params[0], params[1], ""),
+            .fmin => return self.builder.buildMinNum(params[0], params[1], ""),
+            else => .{ .intrinsic = "llvm." ++ @tagName(op) },
         } else b: {
-            const math_h_type_abbrev = getMathHTypeAbbrev(scalar_ty);
-            const compiler_rt_type_abbrev = getCompilerRtTypeAbbrev(scalar_ty, target);
-            var fn_name_buf: [64]u8 = undefined;
+            const float_bits = scalar_ty.floatBits(target);
             break :b switch (op) {
-                .fma => Strat{
-                    .libc = switch (scalar_ty.floatBits(target)) {
-                        80 => "__fmax",
-                        else => std.fmt.bufPrintZ(&fn_name_buf, "fma{s}", .{math_h_type_abbrev}) catch unreachable,
-                    },
-                },
-                .add, .sub, .div, .mul => Strat{
-                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f3", .{ @tagName(op), compiler_rt_type_abbrev }) catch unreachable,
-                },
-                .rem => Strat{
-                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "fmod{s}", .{math_h_type_abbrev}) catch unreachable,
-                },
-                .max, .min => Strat{
-                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "f{s}{s}", .{ @tagName(op), math_h_type_abbrev }) catch unreachable,
+                .add, .sub, .div, .mul => FloatOpStrat{
+                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "__{s}{s}f3", .{
+                        @tagName(op), compilerRtFloatAbbrev(float_bits),
+                    }) catch unreachable,
                 },
-                .sqrt, .sin, .cos, .exp, .exp2, .log, .log2, .log10, .fabs, .floor, .ceil, .round, .trunc => Strat{
-                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "{s}{s}", .{ @tagName(op), math_h_type_abbrev }) catch unreachable,
+                .ceil,
+                .cos,
+                .exp,
+                .exp2,
+                .fabs,
+                .floor,
+                .fma,
+                .fmax,
+                .fmin,
+                .fmod,
+                .log,
+                .log10,
+                .log2,
+                .round,
+                .sin,
+                .sqrt,
+                .trunc,
+                => FloatOpStrat{
+                    .libc = std.fmt.bufPrintZ(&fn_name_buf, "{s}{s}{s}", .{
+                        libcFloatPrefix(float_bits), @tagName(op), libcFloatSuffix(float_bits),
+                    }) catch unreachable,
                 },
-                else => unreachable,
             };
         };
 
-        var llvm_fn: *const llvm.Value = switch (strat) {
+        const llvm_fn: *const llvm.Value = switch (strat) {
             .intrinsic => |fn_name| self.getIntrinsic(fn_name, &.{llvm_ty}),
             .libc => |fn_name| b: {
-                assert(params.len == switch (op) {
-                    .fma => 3,
-                    .add, .sub, .div, .mul, .rem, .max, .min => 2,
-                    .sqrt, .sin, .cos, .exp, .exp2, .log, .log2, .log10, .fabs, .floor, .ceil, .round, .trunc => 1,
-                    else => unreachable,
-                });
                 const param_types = [3]*const llvm.Type{ scalar_llvm_ty, scalar_llvm_ty, scalar_llvm_ty };
                 const libc_fn = self.getLibcFunction(fn_name, param_types[0..params.len], scalar_llvm_ty);
                 if (ty.zigTypeTag() == .Vector) {
                     const result = llvm_ty.getUndef();
-                    return self.buildElementwiseCall(libc_fn, params[0..], result, ty.vectorLen());
+                    return self.buildElementwiseCall(libc_fn, &params, result, ty.vectorLen());
                 }
 
                 break :b libc_fn;
             },
         };
-        const params_len = @intCast(c_uint, params.len);
-        return self.builder.buildCall(llvm_fn, params.ptr, params_len, .C, .Auto, "");
+        return self.builder.buildCall(llvm_fn, &params, params_len, .C, .Auto, "");
     }
 
     fn airMulAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -5745,7 +5756,7 @@ pub const FuncGen = struct {
         const addend = try self.resolveInst(pl_op.operand);
 
         const ty = self.air.typeOfIndex(inst);
-        return self.buildFloatOp(.fma, ty, &.{ mulend1, mulend2, addend });
+        return self.buildFloatOp(.fma, ty, 3, .{ mulend1, mulend2, addend });
     }
 
     fn airShlWithOverflow(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -6550,15 +6561,14 @@ pub const FuncGen = struct {
         }
     }
 
-    fn airUnaryOp(self: *FuncGen, inst: Air.Inst.Index, comptime op: @TypeOf(.EnumLiteral)) !?*const llvm.Value {
+    fn airUnaryOp(self: *FuncGen, inst: Air.Inst.Index, comptime op: FloatOp) !?*const llvm.Value {
         if (self.liveness.isUnused(inst)) return null;
 
         const un_op = self.air.instructions.items(.data)[inst].un_op;
         const operand = try self.resolveInst(un_op);
         const operand_ty = self.air.typeOf(un_op);
 
-        const params = [_]*const llvm.Value{operand};
-        return self.buildFloatOp(op, operand_ty, &params);
+        return self.buildFloatOp(op, operand_ty, 1, .{operand});
     }
 
     fn airClzCtz(self: *FuncGen, inst: Air.Inst.Index, llvm_fn_name: []const u8) !?*const llvm.Value {
@@ -6822,17 +6832,9 @@ pub const FuncGen = struct {
 
         const ty_op = self.air.instructions.items(.data)[inst].ty_op;
         const scalar = try self.resolveInst(ty_op.operand);
-        const scalar_ty = self.air.typeOf(ty_op.operand);
         const vector_ty = self.air.typeOfIndex(inst);
         const len = vector_ty.vectorLen();
-        const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
-        const op_llvm_ty = scalar_llvm_ty.vectorType(1);
-        const u32_llvm_ty = self.context.intType(32);
-        const mask_llvm_ty = u32_llvm_ty.vectorType(len);
-        const undef_vector = op_llvm_ty.getUndef();
-        const u32_zero = u32_llvm_ty.constNull();
-        const op_vector = self.builder.buildInsertElement(undef_vector, scalar, u32_zero, "");
-        return self.builder.buildShuffleVector(op_vector, undef_vector, mask_llvm_ty.constNull(), "");
+        return self.builder.buildVectorSplat(len, scalar, "");
     }
 
     fn airSelect(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value {
@@ -8183,6 +8185,26 @@ fn backendSupportsF80(target: std.Target) bool {
     };
 }
 
+/// This function returns true if we expect LLVM to lower f16 correctly
+/// and false if we expect LLVM to crash if it counters an f16 type or
+/// if it produces miscompilations.
+fn backendSupportsF16(target: std.Target) bool {
+    return switch (target.cpu.arch) {
+        else => true,
+    };
+}
+
+/// LLVM does not support all relevant intrinsics for all targets, so we
+/// may need to manually generate a libc call
+fn intrinsicsAllowed(scalar_ty: Type, target: std.Target) bool {
+    return switch (scalar_ty.tag()) {
+        .f16 => backendSupportsF16(target),
+        .f80 => target.longDoubleIs(f80) and backendSupportsF80(target),
+        .f128 => target.longDoubleIs(f128),
+        else => true,
+    };
+}
+
 /// We need to insert extra padding if LLVM's isn't enough.
 /// However we don't want to ever call LLVMABIAlignmentOfType or
 /// LLVMABISizeOfType because these functions will trip assertions
diff --git a/src/codegen/llvm/bindings.zig b/src/codegen/llvm/bindings.zig
index 81b5863aa0..b8dc3e1830 100644
--- a/src/codegen/llvm/bindings.zig
+++ b/src/codegen/llvm/bindings.zig
@@ -295,9 +295,6 @@ pub const Type = opaque {
 
     pub const countStructElementTypes = LLVMCountStructElementTypes;
     extern fn LLVMCountStructElementTypes(StructTy: *const Type) c_uint;
-
-    pub const getVectorSize = LLVMGetVectorSize;
-    extern fn LLVMGetVectorSize(VectorTy: *const Type) c_uint;
 };
 
 pub const Module = opaque {
@@ -681,8 +678,8 @@ pub const Builder = opaque {
     pub const buildVectorSplat = LLVMBuildVectorSplat;
     extern fn LLVMBuildVectorSplat(
         *const Builder,
-        EltVal: *const Value,
         ElementCount: c_uint,
+        EltVal: *const Value,
         Name: [*:0]const u8,
     ) *const Value;
 
-- 
cgit v1.2.3


From 09f1d62bdfb5794534b21d1cd9dafc4822697d60 Mon Sep 17 00:00:00 2001
From: Andrew Kelley <andrew@ziglang.org>
Date: Wed, 27 Apr 2022 16:45:23 -0700
Subject: add new builtin function `@tan`

The reason for having `@tan` is that we already have `@sin` and `@cos`
because some targets have machine code instructions for them, but in the
case that the implementation needs to go into compiler-rt, sin, cos, and
tan all share a common dependency which includes a table of data. To
avoid duplicating this table of data, we promote tan to become a builtin
alongside sin and cos.

ZIR: The tag enum is at capacity so this commit moves
`field_call_bind_named` to be `extended`. I measured this as one of
the least used tags in the zig codebase.

Fix libc math suffix for `f32` being wrong in both stage1 and stage2.
stage1: add missing libc prefix for float functions.
---
 doc/langref.html.in           | 16 +++++++++-
 lib/std/math/complex/tanh.zig |  4 +--
 src/Air.zig                   |  8 +++--
 src/AstGen.zig                |  6 ++--
 src/BuiltinFn.zig             |  8 +++++
 src/Liveness.zig              |  1 +
 src/Sema.zig                  | 70 +++++++++++++++++++++----------------------
 src/Zir.zig                   | 33 ++++++++++++--------
 src/arch/aarch64/CodeGen.zig  |  1 +
 src/arch/arm/CodeGen.zig      |  1 +
 src/arch/riscv64/CodeGen.zig  |  1 +
 src/arch/sparcv9/CodeGen.zig  |  1 +
 src/arch/wasm/CodeGen.zig     |  1 +
 src/arch/x86_64/CodeGen.zig   |  1 +
 src/codegen/c.zig             |  1 +
 src/codegen/llvm.zig          |  7 +++--
 src/print_air.zig             |  1 +
 src/print_zir.zig             | 12 +++++++-
 src/stage1/all_types.hpp      |  1 +
 src/stage1/analyze.cpp        |  2 ++
 src/stage1/astgen.cpp         |  1 +
 src/stage1/codegen.cpp        | 41 +++++++++++++++++++------
 src/stage1/ir.cpp             | 11 +++++++
 src/value.zig                 | 38 +++++++++++++++++++++++
 test/behavior/bugs/920.zig    |  5 ++--
 25 files changed, 203 insertions(+), 69 deletions(-)

(limited to 'src/codegen/llvm.zig')

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 5cccced446..3c5de6c8d2 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -8026,7 +8026,7 @@ fn func(y: *i32) void {
       only rounds once, and is thus more accurate.
       </p>
       <p>
-      Supports Floats and Vectors of floats.
+      Supports {#link|Floats#} and {#link|Vectors#} of floats.
       </p>
       {#header_close#}
 
@@ -9440,6 +9440,7 @@ fn doTheTest() !void {
       <a href="https://github.com/ziglang/zig/issues/4026">some float operations are not yet implemented for all float types</a>.
       </p>
       {#header_close#}
+
       {#header_open|@cos#}
       <pre>{#syntax#}@cos(value: anytype) @TypeOf(value){#endsyntax#}</pre>
       <p>
@@ -9451,6 +9452,19 @@ fn doTheTest() !void {
       <a href="https://github.com/ziglang/zig/issues/4026">some float operations are not yet implemented for all float types</a>.
       </p>
       {#header_close#}
+
+      {#header_open|@tan#}
+      <pre>{#syntax#}@tan(value: anytype) @TypeOf(value){#endsyntax#}</pre>
+      <p>
+      Tangent trigonometric function on a floating point number.
+      Uses a dedicated hardware instruction when available.
+      </p>
+      <p>
+      Supports {#link|Floats#} and {#link|Vectors#} of floats, with the caveat that
+      <a href="https://github.com/ziglang/zig/issues/4026">some float operations are not yet implemented for all float types</a>.
+      </p>
+      {#header_close#}
+
       {#header_open|@exp#}
       <pre>{#syntax#}@exp(value: anytype) @TypeOf(value){#endsyntax#}</pre>
       <p>
diff --git a/lib/std/math/complex/tanh.zig b/lib/std/math/complex/tanh.zig
index e61ec1e95b..d5195d6c73 100644
--- a/lib/std/math/complex/tanh.zig
+++ b/lib/std/math/complex/tanh.zig
@@ -49,7 +49,7 @@ fn tanh32(z: Complex(f32)) Complex(f32) {
     }
 
     // Kahan's algorithm
-    const t = math.tan(y);
+    const t = @tan(y);
     const beta = 1.0 + t * t;
     const s = math.sinh(x);
     const rho = @sqrt(1 + s * s);
@@ -92,7 +92,7 @@ fn tanh64(z: Complex(f64)) Complex(f64) {
     }
 
     // Kahan's algorithm
-    const t = math.tan(y);
+    const t = @tan(y);
     const beta = 1.0 + t * t;
     const s = math.sinh(x);
     const rho = @sqrt(1 + s * s);
diff --git a/src/Air.zig b/src/Air.zig
index d02491ff89..0968d95180 100644
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -249,12 +249,15 @@ pub const Inst = struct {
         /// Square root of a floating point number.
         /// Uses the `un_op` field.
         sqrt,
-        /// Sine a floating point number.
+        /// Sine function on a floating point number.
         /// Uses the `un_op` field.
         sin,
-        /// Cosine a floating point number.
+        /// Cosine function on a floating point number.
         /// Uses the `un_op` field.
         cos,
+        /// Tangent function on a floating point number.
+        /// Uses the `un_op` field.
+        tan,
         /// Base e exponential of a floating point number.
         /// Uses the `un_op` field.
         exp,
@@ -921,6 +924,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
         .sqrt,
         .sin,
         .cos,
+        .tan,
         .exp,
         .exp2,
         .log,
diff --git a/src/AstGen.zig b/src/AstGen.zig
index 34b29b28fb..230b46a489 100644
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@@ -2237,7 +2237,6 @@ fn unusedResultExpr(gz: *GenZir, scope: *Scope, statement: Ast.Node.Index) Inner
             .field_call_bind,
             .field_ptr_named,
             .field_val_named,
-            .field_call_bind_named,
             .func,
             .func_inferred,
             .int,
@@ -2329,6 +2328,7 @@ fn unusedResultExpr(gz: *GenZir, scope: *Scope, statement: Ast.Node.Index) Inner
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
@@ -7259,6 +7259,7 @@ fn builtinCall(
         .sqrt                  => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .sqrt),
         .sin                   => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .sin),
         .cos                   => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .cos),
+        .tan                   => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .tan),
         .exp                   => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .exp),
         .exp2                  => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .exp2),
         .log                   => return simpleUnOp(gz, scope, rl, node, .none,                               params[0], .log),
@@ -7947,7 +7948,8 @@ fn calleeExpr(
             if (std.mem.eql(u8, builtin_name, "@field") and params.len == 2) {
                 const lhs = try expr(gz, scope, .ref, params[0]);
                 const field_name = try comptimeExpr(gz, scope, .{ .ty = .const_slice_u8_type }, params[1]);
-                return gz.addPlNode(.field_call_bind_named, node, Zir.Inst.FieldNamed{
+                return gz.addExtendedPayload(.field_call_bind_named, Zir.Inst.FieldNamedNode{
+                    .node = gz.nodeIndexToRelative(node),
                     .lhs = lhs,
                     .field_name = field_name,
                 });
diff --git a/src/BuiltinFn.zig b/src/BuiltinFn.zig
index 3bf7224fab..04cad19354 100644
--- a/src/BuiltinFn.zig
+++ b/src/BuiltinFn.zig
@@ -89,6 +89,7 @@ pub const Tag = enum {
     sqrt,
     sin,
     cos,
+    tan,
     exp,
     exp2,
     log,
@@ -771,6 +772,13 @@ pub const list = list: {
                 .param_count = 1,
             },
         },
+        .{
+            "@tan",
+            .{
+                .tag = .tan,
+                .param_count = 1,
+            },
+        },
         .{
             "@exp",
             .{
diff --git a/src/Liveness.zig b/src/Liveness.zig
index be4344ab90..e606c15b4b 100644
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@@ -422,6 +422,7 @@ fn analyzeInst(
         .sqrt,
         .sin,
         .cos,
+        .tan,
         .exp,
         .exp2,
         .log,
diff --git a/src/Sema.zig b/src/Sema.zig
index 5d1d51b58f..3fa0353e9d 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -743,7 +743,6 @@ fn analyzeBodyInner(
             .field_val                    => try sema.zirFieldVal(block, inst),
             .field_val_named              => try sema.zirFieldValNamed(block, inst),
             .field_call_bind              => try sema.zirFieldCallBind(block, inst),
-            .field_call_bind_named        => try sema.zirFieldCallBindNamed(block, inst),
             .func                         => try sema.zirFunc(block, inst, false),
             .func_inferred                => try sema.zirFunc(block, inst, true),
             .import                       => try sema.zirImport(block, inst),
@@ -855,6 +854,7 @@ fn analyzeBodyInner(
             .sqrt  => try sema.zirUnaryMath(block, inst, .sqrt, Value.sqrt),
             .sin   => try sema.zirUnaryMath(block, inst, .sin, Value.sin),
             .cos   => try sema.zirUnaryMath(block, inst, .cos, Value.cos),
+            .tan   => try sema.zirUnaryMath(block, inst, .tan, Value.tan),
             .exp   => try sema.zirUnaryMath(block, inst, .exp, Value.exp),
             .exp2  => try sema.zirUnaryMath(block, inst, .exp2, Value.exp2),
             .log   => try sema.zirUnaryMath(block, inst, .log, Value.log),
@@ -910,35 +910,36 @@ fn analyzeBodyInner(
                 const extended = datas[inst].extended;
                 break :ext switch (extended.opcode) {
                     // zig fmt: off
-                    .func               => try sema.zirFuncExtended(      block, extended, inst),
-                    .variable           => try sema.zirVarExtended(       block, extended),
-                    .struct_decl        => try sema.zirStructDecl(        block, extended, inst),
-                    .enum_decl          => try sema.zirEnumDecl(          block, extended),
-                    .union_decl         => try sema.zirUnionDecl(         block, extended, inst),
-                    .opaque_decl        => try sema.zirOpaqueDecl(        block, extended),
-                    .ret_ptr            => try sema.zirRetPtr(            block, extended),
-                    .ret_type           => try sema.zirRetType(           block, extended),
-                    .this               => try sema.zirThis(              block, extended),
-                    .ret_addr           => try sema.zirRetAddr(           block, extended),
-                    .builtin_src        => try sema.zirBuiltinSrc(        block, extended),
-                    .error_return_trace => try sema.zirErrorReturnTrace(  block, extended),
-                    .frame              => try sema.zirFrame(             block, extended),
-                    .frame_address      => try sema.zirFrameAddress(      block, extended),
-                    .alloc              => try sema.zirAllocExtended(     block, extended),
-                    .builtin_extern     => try sema.zirBuiltinExtern(     block, extended),
-                    .@"asm"             => try sema.zirAsm(               block, extended),
-                    .typeof_peer        => try sema.zirTypeofPeer(        block, extended),
-                    .compile_log        => try sema.zirCompileLog(        block, extended),
-                    .add_with_overflow  => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
-                    .sub_with_overflow  => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
-                    .mul_with_overflow  => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
-                    .shl_with_overflow  => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
-                    .c_undef            => try sema.zirCUndef(            block, extended),
-                    .c_include          => try sema.zirCInclude(          block, extended),
-                    .c_define           => try sema.zirCDefine(           block, extended),
-                    .wasm_memory_size   => try sema.zirWasmMemorySize(    block, extended),
-                    .wasm_memory_grow   => try sema.zirWasmMemoryGrow(    block, extended),
-                    .prefetch           => try sema.zirPrefetch(          block, extended),
+                    .func                  => try sema.zirFuncExtended(      block, extended, inst),
+                    .variable              => try sema.zirVarExtended(       block, extended),
+                    .struct_decl           => try sema.zirStructDecl(        block, extended, inst),
+                    .enum_decl             => try sema.zirEnumDecl(          block, extended),
+                    .union_decl            => try sema.zirUnionDecl(         block, extended, inst),
+                    .opaque_decl           => try sema.zirOpaqueDecl(        block, extended),
+                    .ret_ptr               => try sema.zirRetPtr(            block, extended),
+                    .ret_type              => try sema.zirRetType(           block, extended),
+                    .this                  => try sema.zirThis(              block, extended),
+                    .ret_addr              => try sema.zirRetAddr(           block, extended),
+                    .builtin_src           => try sema.zirBuiltinSrc(        block, extended),
+                    .error_return_trace    => try sema.zirErrorReturnTrace(  block, extended),
+                    .frame                 => try sema.zirFrame(             block, extended),
+                    .frame_address         => try sema.zirFrameAddress(      block, extended),
+                    .alloc                 => try sema.zirAllocExtended(     block, extended),
+                    .builtin_extern        => try sema.zirBuiltinExtern(     block, extended),
+                    .@"asm"                => try sema.zirAsm(               block, extended),
+                    .typeof_peer           => try sema.zirTypeofPeer(        block, extended),
+                    .compile_log           => try sema.zirCompileLog(        block, extended),
+                    .add_with_overflow     => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
+                    .sub_with_overflow     => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
+                    .mul_with_overflow     => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
+                    .shl_with_overflow     => try sema.zirOverflowArithmetic(block, extended, extended.opcode),
+                    .c_undef               => try sema.zirCUndef(            block, extended),
+                    .c_include             => try sema.zirCInclude(          block, extended),
+                    .c_define              => try sema.zirCDefine(           block, extended),
+                    .wasm_memory_size      => try sema.zirWasmMemorySize(    block, extended),
+                    .wasm_memory_grow      => try sema.zirWasmMemoryGrow(    block, extended),
+                    .prefetch              => try sema.zirPrefetch(          block, extended),
+                    .field_call_bind_named => try sema.zirFieldCallBindNamed(block, extended),
                     // zig fmt: on
                     .dbg_block_begin => {
                         dbg_block_begins += 1;
@@ -6938,14 +6939,13 @@ fn zirFieldPtrNamed(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileErr
     return sema.fieldPtr(block, src, object_ptr, field_name, field_name_src);
 }
 
-fn zirFieldCallBindNamed(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref {
+fn zirFieldCallBindNamed(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!Air.Inst.Ref {
     const tracy = trace(@src());
     defer tracy.end();
 
-    const inst_data = sema.code.instructions.items(.data)[inst].pl_node;
-    const src = inst_data.src();
-    const field_name_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = inst_data.src_node };
-    const extra = sema.code.extraData(Zir.Inst.FieldNamed, inst_data.payload_index).data;
+    const extra = sema.code.extraData(Zir.Inst.FieldNamedNode, extended.operand).data;
+    const src: LazySrcLoc = .{ .node_offset = extra.node };
+    const field_name_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = extra.node };
     const object_ptr = sema.resolveInst(extra.lhs);
     const field_name = try sema.resolveConstString(block, field_name_src, extra.field_name);
     return sema.fieldCallBind(block, src, object_ptr, field_name, field_name_src);
diff --git a/src/Zir.zig b/src/Zir.zig
index 8fe5276792..f4c62a6f24 100644
--- a/src/Zir.zig
+++ b/src/Zir.zig
@@ -407,15 +407,6 @@ pub const Inst = struct {
         /// The field name is a comptime instruction. Used by @field.
         /// Uses `pl_node` field. The AST node is the builtin call. Payload is FieldNamed.
         field_val_named,
-        /// Given a pointer to a struct or object that contains virtual fields, returns the
-        /// named field.  If there is no named field, searches in the type for a decl that
-        /// matches the field name.  The decl is resolved and we ensure that it's a function
-        /// which can accept the object as the first parameter, with one pointer fixup.  If
-        /// all of that works, this instruction produces a special "bound function" value
-        /// which contains both the function and the saved first parameter value.
-        /// Bound functions may only be used as the function parameter to a `call` or
-        /// `builtin_call` instruction.  Any other use is invalid zir and may crash the compiler.
-        field_call_bind_named,
         /// Returns a function type, or a function instance, depending on whether
         /// the body_len is 0. Calling convention is auto.
         /// Uses the `pl_node` union field. `payload_index` points to a `Func`.
@@ -797,6 +788,8 @@ pub const Inst = struct {
         sin,
         /// Implement builtin `@cos`. Uses `un_node`.
         cos,
+        /// Implement builtin `@tan`. Uses `un_node`.
+        tan,
         /// Implement builtin `@exp`. Uses `un_node`.
         exp,
         /// Implement builtin `@exp2`. Uses `un_node`.
@@ -1069,7 +1062,6 @@ pub const Inst = struct {
                 .field_call_bind,
                 .field_ptr_named,
                 .field_val_named,
-                .field_call_bind_named,
                 .func,
                 .func_inferred,
                 .has_decl,
@@ -1179,6 +1171,7 @@ pub const Inst = struct {
                 .sqrt,
                 .sin,
                 .cos,
+                .tan,
                 .exp,
                 .exp2,
                 .log,
@@ -1358,7 +1351,6 @@ pub const Inst = struct {
                 .field_call_bind,
                 .field_ptr_named,
                 .field_val_named,
-                .field_call_bind_named,
                 .func,
                 .func_inferred,
                 .has_decl,
@@ -1451,6 +1443,7 @@ pub const Inst = struct {
                 .sqrt,
                 .sin,
                 .cos,
+                .tan,
                 .exp,
                 .exp2,
                 .log,
@@ -1607,7 +1600,6 @@ pub const Inst = struct {
                 .field_ptr_named = .pl_node,
                 .field_val_named = .pl_node,
                 .field_call_bind = .pl_node,
-                .field_call_bind_named = .pl_node,
                 .func = .pl_node,
                 .func_inferred = .pl_node,
                 .import = .str_tok,
@@ -1713,6 +1705,7 @@ pub const Inst = struct {
                 .sqrt = .un_node,
                 .sin = .un_node,
                 .cos = .un_node,
+                .tan = .un_node,
                 .exp = .un_node,
                 .exp2 = .un_node,
                 .log = .un_node,
@@ -1928,6 +1921,16 @@ pub const Inst = struct {
         dbg_block_begin,
         /// Marks the end of a semantic scope for debug info variables.
         dbg_block_end,
+        /// Given a pointer to a struct or object that contains virtual fields, returns the
+        /// named field.  If there is no named field, searches in the type for a decl that
+        /// matches the field name.  The decl is resolved and we ensure that it's a function
+        /// which can accept the object as the first parameter, with one pointer fixup.  If
+        /// all of that works, this instruction produces a special "bound function" value
+        /// which contains both the function and the saved first parameter value.
+        /// Bound functions may only be used as the function parameter to a `call` or
+        /// `builtin_call` instruction.  Any other use is invalid zir and may crash the compiler.
+        /// Uses `pl_node` field. The AST node is the `@field` builtin. Payload is FieldNamedNode.
+        field_call_bind_named,
 
         pub const InstData = struct {
             opcode: Extended,
@@ -2963,6 +2966,12 @@ pub const Inst = struct {
         field_name: Ref,
     };
 
+    pub const FieldNamedNode = struct {
+        node: i32,
+        lhs: Ref,
+        field_name: Ref,
+    };
+
     pub const As = struct {
         dest_type: Ref,
         operand: Ref,
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index fc37ae00dd..5ed7b63db3 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -533,6 +533,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 54de053475..73f51f6481 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -571,6 +571,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index 15377378cd..61fddee207 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -500,6 +500,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/arch/sparcv9/CodeGen.zig b/src/arch/sparcv9/CodeGen.zig
index 7e1ecefbb7..bcd8cf8eeb 100644
--- a/src/arch/sparcv9/CodeGen.zig
+++ b/src/arch/sparcv9/CodeGen.zig
@@ -451,6 +451,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 8eadfe6cd8..5171dfb460 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -1559,6 +1559,7 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue {
         .sqrt,
         .sin,
         .cos,
+        .tan,
         .exp,
         .exp2,
         .log,
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 4097352975..0103f5382f 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -656,6 +656,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 464f144f5a..46fee271cc 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -1749,6 +1749,7 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index de25a7821d..c9ea5bebac 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -3521,6 +3521,7 @@ pub const FuncGen = struct {
                 .sqrt         => try self.airUnaryOp(inst, .sqrt),
                 .sin          => try self.airUnaryOp(inst, .sin),
                 .cos          => try self.airUnaryOp(inst, .cos),
+                .tan          => try self.airUnaryOp(inst, .tan),
                 .exp          => try self.airUnaryOp(inst, .exp),
                 .exp2         => try self.airUnaryOp(inst, .exp2),
                 .log          => try self.airUnaryOp(inst, .log),
@@ -5553,7 +5554,7 @@ pub const FuncGen = struct {
     fn libcFloatSuffix(float_bits: u16) []const u8 {
         return switch (float_bits) {
             16 => "h", // Non-standard
-            32 => "s",
+            32 => "f",
             64 => "",
             80 => "x", // Non-standard
             128 => "q", // Non-standard (mimics convention in GCC libquadmath)
@@ -5661,6 +5662,7 @@ pub const FuncGen = struct {
         sin,
         sqrt,
         sub,
+        tan,
         trunc,
     };
 
@@ -5684,7 +5686,7 @@ pub const FuncGen = struct {
         const llvm_ty = try self.dg.llvmType(ty);
         const scalar_llvm_ty = try self.dg.llvmType(scalar_ty);
 
-        const intrinsics_allowed = intrinsicsAllowed(scalar_ty, target);
+        const intrinsics_allowed = op != .tan and intrinsicsAllowed(scalar_ty, target);
         var fn_name_buf: [64]u8 = undefined;
         const strat: FloatOpStrat = if (intrinsics_allowed) switch (op) {
             // Some operations are dedicated LLVM instructions, not available as intrinsics
@@ -5720,6 +5722,7 @@ pub const FuncGen = struct {
                 .round,
                 .sin,
                 .sqrt,
+                .tan,
                 .trunc,
                 => FloatOpStrat{
                     .libc = std.fmt.bufPrintZ(&fn_name_buf, "{s}{s}{s}", .{
diff --git a/src/print_air.zig b/src/print_air.zig
index 27d222f262..6e336e138b 100644
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -158,6 +158,7 @@ const Writer = struct {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
diff --git a/src/print_zir.zig b/src/print_zir.zig
index e85e69fe7f..776aeffbdc 100644
--- a/src/print_zir.zig
+++ b/src/print_zir.zig
@@ -207,6 +207,7 @@ const Writer = struct {
             .sqrt,
             .sin,
             .cos,
+            .tan,
             .exp,
             .exp2,
             .log,
@@ -400,7 +401,6 @@ const Writer = struct {
 
             .field_ptr_named,
             .field_val_named,
-            .field_call_bind_named,
             => try self.writePlNodeFieldNamed(stream, inst),
 
             .as_node => try self.writeAs(stream, inst),
@@ -509,6 +509,16 @@ const Writer = struct {
                 try stream.writeAll(")) ");
                 try self.writeSrc(stream, src);
             },
+
+            .field_call_bind_named => {
+                const extra = self.code.extraData(Zir.Inst.FieldNamedNode, extended.operand).data;
+                const src: LazySrcLoc = .{ .node_offset = extra.node };
+                try self.writeInstRef(stream, extra.lhs);
+                try stream.writeAll(", ");
+                try self.writeInstRef(stream, extra.field_name);
+                try stream.writeAll(") ");
+                try self.writeSrc(stream, src);
+            },
         }
     }
 
diff --git a/src/stage1/all_types.hpp b/src/stage1/all_types.hpp
index cbefcd1078..398693e6d8 100644
--- a/src/stage1/all_types.hpp
+++ b/src/stage1/all_types.hpp
@@ -1768,6 +1768,7 @@ enum BuiltinFnId {
     BuiltinFnIdSqrt,
     BuiltinFnIdSin,
     BuiltinFnIdCos,
+    BuiltinFnIdTan,
     BuiltinFnIdExp,
     BuiltinFnIdExp2,
     BuiltinFnIdLog,
diff --git a/src/stage1/analyze.cpp b/src/stage1/analyze.cpp
index 73e3cd0da6..6e49c91fd8 100644
--- a/src/stage1/analyze.cpp
+++ b/src/stage1/analyze.cpp
@@ -10383,6 +10383,8 @@ const char *float_un_op_to_name(BuiltinFnId op) {
         return "sin";
     case BuiltinFnIdCos:
         return "cos";
+    case BuiltinFnIdTan:
+        return "tan";
     case BuiltinFnIdExp:
         return "exp";
     case BuiltinFnIdExp2:
diff --git a/src/stage1/astgen.cpp b/src/stage1/astgen.cpp
index 35566e2143..367bed69cf 100644
--- a/src/stage1/astgen.cpp
+++ b/src/stage1/astgen.cpp
@@ -4497,6 +4497,7 @@ static Stage1ZirInst *astgen_builtin_fn_call(Stage1AstGen *ag, Scope *scope, Ast
         case BuiltinFnIdSqrt:
         case BuiltinFnIdSin:
         case BuiltinFnIdCos:
+        case BuiltinFnIdTan:
         case BuiltinFnIdExp:
         case BuiltinFnIdExp2:
         case BuiltinFnIdLog:
diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp
index 88e73baa3c..34ae82eb82 100644
--- a/src/stage1/codegen.cpp
+++ b/src/stage1/codegen.cpp
@@ -1629,11 +1629,28 @@ static const char *get_compiler_rt_type_abbrev(ZigType *type) {
     }
 }
 
-static const char *get_math_h_type_abbrev(CodeGen *g, ZigType *float_type) {
+static const char *libc_float_prefix(CodeGen *g, ZigType *float_type) {
+    if (float_type == g->builtin_types.entry_f16)
+        return "__";
+    else if (float_type == g->builtin_types.entry_f32)
+        return "";
+    else if (float_type == g->builtin_types.entry_f64)
+        return "";
+    else if (float_type == g->builtin_types.entry_f80)
+        return "__";
+    else if (float_type == g->builtin_types.entry_c_longdouble)
+        return "l";
+    else if (float_type == g->builtin_types.entry_f128)
+        return "";
+    else
+        zig_unreachable();
+}
+
+static const char *libc_float_suffix(CodeGen *g, ZigType *float_type) {
     if (float_type == g->builtin_types.entry_f16)
         return "h"; // Non-standard
     else if (float_type == g->builtin_types.entry_f32)
-        return "s";
+        return "f";
     else if (float_type == g->builtin_types.entry_f64)
         return "";
     else if (float_type == g->builtin_types.entry_f80)
@@ -2992,10 +3009,12 @@ static LLVMValueRef get_soft_float_fn(CodeGen *g, const char *name, int param_co
 
 static LLVMValueRef gen_soft_float_un_op(CodeGen *g, LLVMValueRef op, ZigType *operand_type, BuiltinFnId op_id) {
     uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0;
+    ZigType *scalar_type = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.elem_type : operand_type;
 
     char fn_name[64];
-    sprintf(fn_name, "%s%s", float_un_op_to_name(op_id), get_math_h_type_abbrev(g, operand_type));
-    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, 1, operand_type->llvm_type, operand_type->llvm_type);
+    sprintf(fn_name, "%s%s%s", libc_float_prefix(g, scalar_type),
+            float_un_op_to_name(op_id), libc_float_suffix(g, scalar_type));
+    LLVMValueRef func_ref = get_soft_float_fn(g, fn_name, 1, scalar_type->llvm_type, scalar_type->llvm_type);
 
     LLVMValueRef result;
     if (vector_len == 0) {
@@ -3018,7 +3037,9 @@ static LLVMValueRef gen_float_un_op(CodeGen *g, LLVMValueRef operand, ZigType *o
     assert(operand_type->id == ZigTypeIdFloat || operand_type->id == ZigTypeIdVector);
     ZigType *elem_type = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.elem_type : operand_type;
     if ((elem_type == g->builtin_types.entry_f80 && !target_has_f80(g->zig_target)) ||
-        (elem_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target))) {
+        (elem_type == g->builtin_types.entry_f128 && !target_long_double_is_f128(g->zig_target)) ||
+        op == BuiltinFnIdTan)
+    {
         return gen_soft_float_un_op(g, operand, operand_type, op);
     }
     LLVMValueRef float_op_fn = get_float_fn(g, operand_type, ZigLLVMFnIdFloatOp, op);
@@ -3466,7 +3487,8 @@ static LLVMValueRef gen_soft_float_bin_op(CodeGen *g, LLVMValueRef op1_value, LL
     int param_count = 2;
 
     const char *compiler_rt_type_abbrev = get_compiler_rt_type_abbrev(operand_type);
-    const char *math_h_type_abbrev = get_math_h_type_abbrev(g, operand_type);
+    const char *math_float_prefix = libc_float_prefix(g, operand_type);
+    const char *math_float_suffix = libc_float_suffix(g, operand_type);
 
     char fn_name[64];
     Icmp res_icmp = NONE;
@@ -3523,10 +3545,10 @@ static LLVMValueRef gen_soft_float_bin_op(CodeGen *g, LLVMValueRef op1_value, LL
             res_icmp = EQ_ONE;
             break;
         case IrBinOpMaximum:
-            sprintf(fn_name, "fmax%s", math_h_type_abbrev);
+            sprintf(fn_name, "%sfmax%s", math_float_prefix, math_float_suffix);
             break;
         case IrBinOpMinimum:
-            sprintf(fn_name, "fmin%s", math_h_type_abbrev);
+            sprintf(fn_name, "%sfmin%s", math_float_prefix, math_float_suffix);
             break;
         case IrBinOpMult:
             sprintf(fn_name, "__mul%sf3", compiler_rt_type_abbrev);
@@ -3545,7 +3567,7 @@ static LLVMValueRef gen_soft_float_bin_op(CodeGen *g, LLVMValueRef op1_value, LL
             break;
         case IrBinOpRemRem:
         case IrBinOpRemMod:
-            sprintf(fn_name, "fmod%s", math_h_type_abbrev);
+            sprintf(fn_name, "%sfmod%s", math_float_prefix, math_float_suffix);
             break;
         default:
             zig_unreachable();
@@ -9810,6 +9832,7 @@ static void define_builtin_fns(CodeGen *g) {
     create_builtin_fn(g, BuiltinFnIdSqrt, "sqrt", 1);
     create_builtin_fn(g, BuiltinFnIdSin, "sin", 1);
     create_builtin_fn(g, BuiltinFnIdCos, "cos", 1);
+    create_builtin_fn(g, BuiltinFnIdTan, "tan", 1);
     create_builtin_fn(g, BuiltinFnIdExp, "exp", 1);
     create_builtin_fn(g, BuiltinFnIdExp2, "exp2", 1);
     create_builtin_fn(g, BuiltinFnIdLog, "log", 1);
diff --git a/src/stage1/ir.cpp b/src/stage1/ir.cpp
index 874d068c03..1eef354864 100644
--- a/src/stage1/ir.cpp
+++ b/src/stage1/ir.cpp
@@ -24132,6 +24132,9 @@ static ErrorMsg *ir_eval_float_op(IrAnalyze *ira, Scope *scope, AstNode *source_
         case BuiltinFnIdCos:
             out_val->data.x_f16 = zig_double_to_f16(cos(zig_f16_to_double(op->data.x_f16)));
             break;
+        case BuiltinFnIdTan:
+            out_val->data.x_f16 = zig_double_to_f16(tan(zig_f16_to_double(op->data.x_f16)));
+            break;
         case BuiltinFnIdExp:
             out_val->data.x_f16 = zig_double_to_f16(exp(zig_f16_to_double(op->data.x_f16)));
             break;
@@ -24181,6 +24184,9 @@ static ErrorMsg *ir_eval_float_op(IrAnalyze *ira, Scope *scope, AstNode *source_
         case BuiltinFnIdCos:
             out_val->data.x_f32 = cosf(op->data.x_f32);
             break;
+        case BuiltinFnIdTan:
+            out_val->data.x_f32 = tanf(op->data.x_f32);
+            break;
         case BuiltinFnIdExp:
             out_val->data.x_f32 = expf(op->data.x_f32);
             break;
@@ -24230,6 +24236,9 @@ static ErrorMsg *ir_eval_float_op(IrAnalyze *ira, Scope *scope, AstNode *source_
         case BuiltinFnIdCos:
             out_val->data.x_f64 = cos(op->data.x_f64);
             break;
+        case BuiltinFnIdTan:
+            out_val->data.x_f64 = tan(op->data.x_f64);
+            break;
         case BuiltinFnIdExp:
             out_val->data.x_f64 = exp(op->data.x_f64);
             break;
@@ -24293,6 +24302,7 @@ static ErrorMsg *ir_eval_float_op(IrAnalyze *ira, Scope *scope, AstNode *source_
         case BuiltinFnIdNearbyInt:
         case BuiltinFnIdSin:
         case BuiltinFnIdCos:
+        case BuiltinFnIdTan:
         case BuiltinFnIdExp:
         case BuiltinFnIdExp2:
         case BuiltinFnIdLog:
@@ -24337,6 +24347,7 @@ static ErrorMsg *ir_eval_float_op(IrAnalyze *ira, Scope *scope, AstNode *source_
         case BuiltinFnIdNearbyInt:
         case BuiltinFnIdSin:
         case BuiltinFnIdCos:
+        case BuiltinFnIdTan:
         case BuiltinFnIdExp:
         case BuiltinFnIdExp2:
         case BuiltinFnIdLog:
diff --git a/src/value.zig b/src/value.zig
index e951b075c0..a39984d1d4 100644
--- a/src/value.zig
+++ b/src/value.zig
@@ -4473,6 +4473,44 @@ pub const Value = extern union {
         }
     }
 
+    pub fn tan(val: Value, float_type: Type, arena: Allocator, target: Target) Allocator.Error!Value {
+        if (float_type.zigTypeTag() == .Vector) {
+            const result_data = try arena.alloc(Value, float_type.vectorLen());
+            for (result_data) |*scalar, i| {
+                scalar.* = try tanScalar(val.indexVectorlike(i), float_type.scalarType(), arena, target);
+            }
+            return Value.Tag.aggregate.create(arena, result_data);
+        }
+        return tanScalar(val, float_type, arena, target);
+    }
+
+    pub fn tanScalar(val: Value, float_type: Type, arena: Allocator, target: Target) Allocator.Error!Value {
+        switch (float_type.floatBits(target)) {
+            16 => {
+                const f = val.toFloat(f16);
+                return Value.Tag.float_16.create(arena, @tan(f));
+            },
+            32 => {
+                const f = val.toFloat(f32);
+                return Value.Tag.float_32.create(arena, @tan(f));
+            },
+            64 => {
+                const f = val.toFloat(f64);
+                return Value.Tag.float_64.create(arena, @tan(f));
+            },
+            80 => {
+                const f = val.toFloat(f80);
+                return Value.Tag.float_80.create(arena, @tan(f));
+            },
+            128 => {
+                const f = val.toFloat(f128);
+                return Value.Tag.float_128.create(arena, @tan(f));
+            },
+            else => unreachable,
+        }
+    }
+
+
     pub fn exp(val: Value, float_type: Type, arena: Allocator, target: Target) Allocator.Error!Value {
         if (float_type.zigTypeTag() == .Vector) {
             const result_data = try arena.alloc(Value, float_type.vectorLen());
diff --git a/test/behavior/bugs/920.zig b/test/behavior/bugs/920.zig
index 380d42e5de..5a7cadc595 100644
--- a/test/behavior/bugs/920.zig
+++ b/test/behavior/bugs/920.zig
@@ -1,5 +1,4 @@
 const std = @import("std");
-const math = std.math;
 const Random = std.rand.Random;
 
 const ZigTable = struct {
@@ -40,10 +39,10 @@ const norm_r = 3.6541528853610088;
 const norm_v = 0.00492867323399;
 
 fn norm_f(x: f64) f64 {
-    return math.exp(-x * x / 2.0);
+    return @exp(-x * x / 2.0);
 }
 fn norm_f_inv(y: f64) f64 {
-    return math.sqrt(-2.0 * math.ln(y));
+    return @sqrt(-2.0 * @log(y));
 }
 fn norm_zero_case(random: *Random, u: f64) f64 {
     _ = random;
-- 
cgit v1.2.3