diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2022-03-06 15:23:21 -0700 |
|---|---|---|
| committer | Andrew Kelley <andrew@ziglang.org> | 2022-03-06 16:11:39 -0700 |
| commit | 71b8760d3b145c92dc6e331aefff7dac5cabebeb (patch) | |
| tree | 66b92748616634b689eb5c984f143042132d5e6c /src/codegen | |
| parent | 6637335981f7179b449fced78cfd4052b1618051 (diff) | |
| download | zig-71b8760d3b145c92dc6e331aefff7dac5cabebeb.tar.gz zig-71b8760d3b145c92dc6e331aefff7dac5cabebeb.zip | |
stage2: rework `@mulAdd`
* mul_add AIR instruction: use `pl_op` instead of `ty_pl`. The type is
always the same as the operand; no need to waste bytes redundantly
storing the type.
* AstGen: use coerced_ty for all the operands except for one which we
use to communicate the type.
* Sema: use the correct source location for requireRuntimeBlock in
handling of `@mulAdd`.
* native backends: handle liveness even for the functions that are
TODO.
* C backend: implement `@mulAdd`. It lowers to libc calls.
* LLVM backend: make `@mulAdd` handle all float types.
- improved fptrunc and fpext to handle f80 with compiler-rt calls.
* Value.mulAdd: handle all float types and use the `@mulAdd` builtin.
* behavior tests: revert the changes to testing `@mulAdd`. These
changes broke the test coverage, making it only tested at
compile-time.
Improved f80 support:
* std.math.fma handles f80
* move fma functions from freestanding libc to compiler-rt
- add __fmax and fmal
- make __fmax and fmaq only exported when they don't alias fmal.
- make their linkage weak just like the rest of compiler-rt symbols.
* removed `longDoubleIsF128` and replaced it with `longDoubleIs` which
takes a type as a parameter. The implementation is now more accurate
and handles more targets. Similarly, in stage2 the function
CTypes.sizeInBits is more accurate for long double for more targets.
Diffstat (limited to 'src/codegen')
| -rw-r--r-- | src/codegen/c.zig | 32 | ||||
| -rw-r--r-- | src/codegen/llvm.zig | 158 |
2 files changed, 158 insertions, 32 deletions
diff --git a/src/codegen/c.zig b/src/codegen/c.zig index e24ff0a6b0..2a10a8094a 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -16,6 +16,7 @@ const trace = @import("../tracy.zig").trace; const LazySrcLoc = Module.LazySrcLoc; const Air = @import("../Air.zig"); const Liveness = @import("../Liveness.zig"); +const CType = @import("../type.zig").CType; const Mutability = enum { Const, Mut }; const BigIntConst = std.math.big.int.Const; @@ -1635,7 +1636,7 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO .trunc_float, => |tag| return f.fail("TODO: C backend: implement unary op for tag '{s}'", .{@tagName(tag)}), - .mul_add => return f.fail("TODO: C backend: implement @mulAdd", .{}), + .mul_add => try airMulAdd(f, inst), .add_with_overflow => try airAddWithOverflow(f, inst), .sub_with_overflow => try airSubWithOverflow(f, inst), @@ -3623,6 +3624,35 @@ fn airWasmMemoryGrow(f: *Function, inst: Air.Inst.Index) !CValue { return local; } +fn airMulAdd(f: *Function, inst: Air.Inst.Index) !CValue { + if (f.liveness.isUnused(inst)) return CValue.none; + const pl_op = f.air.instructions.items(.data)[inst].pl_op; + const extra = f.air.extraData(Air.Bin, pl_op.payload).data; + const inst_ty = f.air.typeOfIndex(inst); + const mulend1 = try f.resolveInst(extra.lhs); + const mulend2 = try f.resolveInst(extra.rhs); + const addend = try f.resolveInst(pl_op.operand); + const writer = f.object.writer(); + const target = f.object.dg.module.getTarget(); + const fn_name = switch (inst_ty.floatBits(target)) { + 16, 32 => "fmaf", + 64 => "fma", + 80 => if (CType.longdouble.sizeInBits(target) == 80) "fmal" else "__fmax", + 128 => if (CType.longdouble.sizeInBits(target) == 128) "fmal" else "fmaq", + else => unreachable, + }; + const local = try f.allocLocal(inst_ty, .Const); + try writer.writeAll(" = "); + try writer.print("{s}(", .{fn_name}); + try f.writeCValue(writer, mulend1); + try writer.writeAll(", "); + try f.writeCValue(writer, mulend2); + try writer.writeAll(", "); + try f.writeCValue(writer, addend); + try writer.writeAll(");\n"); + return local; +} + fn toMemoryOrder(order: std.builtin.AtomicOrder) [:0]const u8 { return switch (order) { .Unordered => "memory_order_relaxed", diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 86573e182e..c837d9a00d 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -18,6 +18,7 @@ const target_util = @import("../target.zig"); const Value = @import("../value.zig").Value; const Type = @import("../type.zig").Type; const LazySrcLoc = Module.LazySrcLoc; +const CType = @import("../type.zig").CType; const Error = error{ OutOfMemory, CodegenFail }; @@ -2189,12 +2190,12 @@ pub const FuncGen = struct { .min => try self.airMin(inst), .max => try self.airMax(inst), .slice => try self.airSlice(inst), + .mul_add => try self.airMulAdd(inst), .add_with_overflow => try self.airOverflow(inst, "llvm.sadd.with.overflow", "llvm.uadd.with.overflow"), .sub_with_overflow => try self.airOverflow(inst, "llvm.ssub.with.overflow", "llvm.usub.with.overflow"), .mul_with_overflow => try self.airOverflow(inst, "llvm.smul.with.overflow", "llvm.umul.with.overflow"), .shl_with_overflow => try self.airShlWithOverflow(inst), - .mul_add => try self.airMulAdd(inst), .bit_and, .bool_and => try self.airAnd(inst), .bit_or, .bool_or => try self.airOr(inst), @@ -3844,43 +3845,43 @@ pub const FuncGen = struct { } fn airMulAdd(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { - if (self.liveness.isUnused(inst)) - return null; + if (self.liveness.isUnused(inst)) return null; - const ty_pl = self.air.instructions.items(.data)[inst].ty_pl; - const extra = self.air.extraData(Air.MulAdd, ty_pl.payload).data; + const pl_op = self.air.instructions.items(.data)[inst].pl_op; + const extra = self.air.extraData(Air.Bin, pl_op.payload).data; - const mulend1 = try self.resolveInst(extra.mulend1); - const mulend2 = try self.resolveInst(extra.mulend2); - const addend = try self.resolveInst(extra.addend); + const mulend1 = try self.resolveInst(extra.lhs); + const mulend2 = try self.resolveInst(extra.rhs); + const addend = try self.resolveInst(pl_op.operand); const ty = self.air.typeOfIndex(inst); const llvm_ty = try self.dg.llvmType(ty); const target = self.dg.module.getTarget(); - const fn_val = switch (ty.floatBits(target)) { - 16, 32, 64 => blk: { - break :blk self.getIntrinsic("llvm.fma", &.{llvm_ty}); - }, - // TODO: using `llvm.fma` for f80 does not seem to work for all targets, needs further investigation. - 80 => return self.dg.todo("Implement mulAdd for f80", .{}), - 128 => blk: { - // LLVM incorrectly lowers the fma builtin for f128 to fmal, which is for - // `long double`. On some targets this will be correct; on others it will be incorrect. - if (target.longDoubleIsF128()) { - break :blk self.getIntrinsic("llvm.fma", &.{llvm_ty}); - } else { - break :blk self.dg.object.llvm_module.getNamedFunction("fmaq") orelse fn_blk: { - const param_types = [_]*const llvm.Type{ llvm_ty, llvm_ty, llvm_ty }; - const fn_type = llvm.functionType(llvm_ty, ¶m_types, param_types.len, .False); - break :fn_blk self.dg.object.llvm_module.addFunction("fmaq", fn_type); - }; - } - }, + const Strat = union(enum) { + intrinsic, + libc: [*:0]const u8, + }; + const strat: Strat = switch (ty.floatBits(target)) { + 16, 32, 64 => Strat.intrinsic, + 80 => if (CType.longdouble.sizeInBits(target) == 80) Strat{ .intrinsic = {} } else Strat{ .libc = "__fmax" }, + // LLVM always lowers the fma builtin for f128 to fmal, which is for `long double`. + // On some targets this will be correct; on others it will be incorrect. + 128 => if (CType.longdouble.sizeInBits(target) == 128) Strat{ .intrinsic = {} } else Strat{ .libc = "fmaq" }, else => unreachable, }; + + const llvm_fn = switch (strat) { + .intrinsic => self.getIntrinsic("llvm.fma", &.{llvm_ty}), + .libc => |fn_name| self.dg.object.llvm_module.getNamedFunction(fn_name) orelse b: { + const param_types = [_]*const llvm.Type{ llvm_ty, llvm_ty, llvm_ty }; + const fn_type = llvm.functionType(llvm_ty, ¶m_types, param_types.len, .False); + break :b self.dg.object.llvm_module.addFunction(fn_name, fn_type); + }, + }; + const params = [_]*const llvm.Value{ mulend1, mulend2, addend }; - return self.builder.buildCall(fn_val, ¶ms, params.len, .C, .Auto, ""); + return self.builder.buildCall(llvm_fn, ¶ms, params.len, .C, .Auto, ""); } fn airShlWithOverflow(self: *FuncGen, inst: Air.Inst.Index) !?*const llvm.Value { @@ -4061,8 +4062,15 @@ pub const FuncGen = struct { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand = try self.resolveInst(ty_op.operand); - const dest_llvm_ty = try self.dg.llvmType(self.air.typeOfIndex(inst)); - + const operand_ty = self.air.typeOf(ty_op.operand); + const dest_ty = self.air.typeOfIndex(inst); + const target = self.dg.module.getTarget(); + const dest_bits = dest_ty.floatBits(target); + const src_bits = operand_ty.floatBits(target); + if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) { + return softF80TruncOrExt(self, operand, src_bits, dest_bits); + } + const dest_llvm_ty = try self.dg.llvmType(dest_ty); return self.builder.buildFPTrunc(operand, dest_llvm_ty, ""); } @@ -4072,8 +4080,15 @@ pub const FuncGen = struct { const ty_op = self.air.instructions.items(.data)[inst].ty_op; const operand = try self.resolveInst(ty_op.operand); + const operand_ty = self.air.typeOf(ty_op.operand); + const dest_ty = self.air.typeOfIndex(inst); + const target = self.dg.module.getTarget(); + const dest_bits = dest_ty.floatBits(target); + const src_bits = operand_ty.floatBits(target); + if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) { + return softF80TruncOrExt(self, operand, src_bits, dest_bits); + } const dest_llvm_ty = try self.dg.llvmType(self.air.typeOfIndex(inst)); - return self.builder.buildFPExt(operand, dest_llvm_ty, ""); } @@ -5105,6 +5120,87 @@ pub const FuncGen = struct { return null; } + fn softF80TruncOrExt( + self: *FuncGen, + operand: *const llvm.Value, + src_bits: u16, + dest_bits: u16, + ) !?*const llvm.Value { + const target = self.dg.module.getTarget(); + + var param_llvm_ty: *const llvm.Type = self.context.intType(80); + var ret_llvm_ty: *const llvm.Type = param_llvm_ty; + var fn_name: [*:0]const u8 = undefined; + var arg = operand; + var final_cast: ?*const llvm.Type = null; + + assert(src_bits == 80 or dest_bits == 80); + + if (src_bits == 80) switch (dest_bits) { + 16 => { + // See corresponding condition at definition of + // __truncxfhf2 in compiler-rt. + if (target.cpu.arch.isAARCH64()) { + ret_llvm_ty = self.context.halfType(); + } else { + ret_llvm_ty = self.context.intType(16); + final_cast = self.context.halfType(); + } + fn_name = "__truncxfhf2"; + }, + 32 => { + ret_llvm_ty = self.context.floatType(); + fn_name = "__truncxfsf2"; + }, + 64 => { + ret_llvm_ty = self.context.doubleType(); + fn_name = "__truncxfdf2"; + }, + 80 => return operand, + 128 => { + ret_llvm_ty = self.context.fp128Type(); + fn_name = "__extendxftf2"; + }, + else => unreachable, + } else switch (src_bits) { + 16 => { + // See corresponding condition at definition of + // __extendhfxf2 in compiler-rt. + param_llvm_ty = if (target.cpu.arch.isAARCH64()) + self.context.halfType() + else + self.context.intType(16); + arg = self.builder.buildBitCast(arg, param_llvm_ty, ""); + fn_name = "__extendhfxf2"; + }, + 32 => { + param_llvm_ty = self.context.floatType(); + fn_name = "__extendsfxf2"; + }, + 64 => { + param_llvm_ty = self.context.doubleType(); + fn_name = "__extenddfxf2"; + }, + 80 => return operand, + 128 => { + param_llvm_ty = self.context.fp128Type(); + fn_name = "__trunctfxf2"; + }, + else => unreachable, + } + + const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse f: { + const param_types = [_]*const llvm.Type{param_llvm_ty}; + const fn_type = llvm.functionType(ret_llvm_ty, ¶m_types, param_types.len, .False); + break :f self.dg.object.llvm_module.addFunction(fn_name, fn_type); + }; + + var args: [1]*const llvm.Value = .{arg}; + const result = self.builder.buildCall(llvm_fn, &args, args.len, .C, .Auto, ""); + const final_cast_llvm_ty = final_cast orelse return result; + return self.builder.buildBitCast(result, final_cast_llvm_ty, ""); + } + fn getErrorNameTable(self: *FuncGen) !*const llvm.Value { if (self.dg.object.error_name_table) |table| { return table; |
