diff options
Diffstat (limited to 'src/codegen')
| -rw-r--r-- | src/codegen/llvm.zig | 414 |
1 files changed, 219 insertions, 195 deletions
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index aff7656bd3..4a0978af5b 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -780,7 +780,7 @@ pub const Object = struct { null; const emit_asm_path = try locPath(arena, comp.emit_asm, cache_dir); - const emit_llvm_ir_path = try locPath(arena, comp.emit_llvm_ir, cache_dir); + var emit_llvm_ir_path = try locPath(arena, comp.emit_llvm_ir, cache_dir); const emit_llvm_bc_path = try locPath(arena, comp.emit_llvm_bc, cache_dir); const emit_asm_msg = emit_asm_path orelse "(none)"; @@ -791,7 +791,34 @@ pub const Object = struct { emit_asm_msg, emit_bin_msg, emit_llvm_ir_msg, emit_llvm_bc_msg, }); + // Unfortunately, LLVM shits the bed when we ask for both binary and assembly. + // So we call the entire pipeline multiple times if this is requested. var error_message: [*:0]const u8 = undefined; + if (emit_asm_path != null and emit_bin_path != null) { + if (self.target_machine.emitToFile( + self.llvm_module, + &error_message, + comp.bin_file.options.optimize_mode == .Debug, + comp.bin_file.options.optimize_mode == .ReleaseSmall, + comp.time_report, + comp.bin_file.options.tsan, + comp.bin_file.options.lto, + null, + emit_bin_path, + emit_llvm_ir_path, + null, + )) { + defer llvm.disposeMessage(error_message); + + log.err("LLVM failed to emit bin={s} ir={s}: {s}", .{ + emit_bin_msg, emit_llvm_ir_msg, error_message, + }); + return error.FailedToEmit; + } + emit_bin_path = null; + emit_llvm_ir_path = null; + } + if (self.target_machine.emitToFile( self.llvm_module, &error_message, @@ -2689,7 +2716,7 @@ pub const DeclGen = struct { return dg.context.intType(bit_count); }, .Float => switch (t.floatBits(target)) { - 16 => return dg.context.halfType(), + 16 => return if (backendSupportsF16(target)) dg.context.halfType() else dg.context.intType(16), 32 => return dg.context.floatType(), 64 => return dg.context.doubleType(), 80 => return if (backendSupportsF80(target)) dg.context.x86FP80Type() else dg.context.intType(80), @@ -3204,7 +3231,15 @@ pub const DeclGen = struct { .Float => { const llvm_ty = try dg.lowerType(tv.ty); switch (tv.ty.floatBits(target)) { - 16, 32, 64 => return llvm_ty.constReal(tv.val.toFloat(f64)), + 16 => if (intrinsicsAllowed(tv.ty, target)) { + return llvm_ty.constReal(tv.val.toFloat(f16)); + } else { + const repr = @bitCast(u16, tv.val.toFloat(f16)); + const llvm_i16 = dg.context.intType(16); + const int = llvm_i16.constInt(repr, .False); + return int.constBitCast(llvm_ty); + }, + 32, 64 => return llvm_ty.constReal(tv.val.toFloat(f64)), 80 => { const float = tv.val.toFloat(f80); const repr = std.math.break_f80(float); @@ -4316,13 +4351,17 @@ pub const FuncGen = struct { const gop = try self.func_inst_table.getOrPut(self.dg.gpa, inst); if (gop.found_existing) return gop.value_ptr.*; - const val = self.air.value(inst).?; - const ty = self.air.typeOf(inst); - const llvm_val = try self.dg.lowerValue(.{ .ty = ty, .val = val }); - if (!isByRef(ty)) { - gop.value_ptr.* = llvm_val; - return llvm_val; - } + const llvm_val = try self.resolveValue(.{ + .ty = self.air.typeOf(inst), + .val = self.air.value(inst).?, + }); + gop.value_ptr.* = llvm_val; + return llvm_val; + } + + fn resolveValue(self: *FuncGen, tv: TypedValue) !*llvm.Value { + const llvm_val = try self.dg.lowerValue(tv); + if (!isByRef(tv.ty)) return llvm_val; // We have an LLVM value but we need to create a global constant and // set the value as its initializer, and then return a pointer to the global. @@ -4334,11 +4373,11 @@ pub const FuncGen = struct { global.setLinkage(.Private); global.setGlobalConstant(.True); global.setUnnamedAddr(.True); - global.setAlignment(ty.abiAlignment(target)); + global.setAlignment(tv.ty.abiAlignment(target)); // Because of LLVM limitations for lowering certain types such as unions, // the type of global constants might not match the type it is supposed to // be, and so we must bitcast the pointer at the usage sites. - const wanted_llvm_ty = try self.dg.lowerType(ty); + const wanted_llvm_ty = try self.dg.lowerType(tv.ty); const wanted_bitcasted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_actual_addrspace); const bitcasted_ptr = global.constBitCast(wanted_bitcasted_llvm_ptr_ty); const wanted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_wanted_addrspace); @@ -4346,7 +4385,6 @@ pub const FuncGen = struct { bitcasted_ptr.constAddrSpaceCast(wanted_llvm_ptr_ty) else bitcasted_ptr; - gop.value_ptr.* = casted_ptr; return casted_ptr; } @@ -7576,13 +7614,25 @@ pub const FuncGen = struct { const target = self.dg.module.getTarget(); const dest_bits = dest_ty.floatBits(target); const src_bits = operand_ty.floatBits(target); - if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) { - return softF80TruncOrExt(self, operand, src_bits, dest_bits); - } else if (!backendSupportsF128(target) and (src_bits == 128 or dest_bits == 128)) { - return softF128TruncOrExt(self, operand, src_bits, dest_bits); + + if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) { + const dest_llvm_ty = try self.dg.lowerType(dest_ty); + return self.builder.buildFPTrunc(operand, dest_llvm_ty, ""); + } else { + const operand_llvm_ty = try self.dg.lowerType(operand_ty); + const dest_llvm_ty = try self.dg.lowerType(dest_ty); + + var fn_name_buf: [64]u8 = undefined; + const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__trunc{s}f{s}f2", .{ + compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits), + }) catch unreachable; + + const params = [1]*llvm.Value{operand}; + const param_types = [1]*llvm.Type{operand_llvm_ty}; + const llvm_fn = self.getLibcFunction(fn_name, ¶m_types, dest_llvm_ty); + + return self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, ¶ms, params.len, .C, .Auto, ""); } - const dest_llvm_ty = try self.dg.lowerType(dest_ty); - return self.builder.buildFPTrunc(operand, dest_llvm_ty, ""); } fn airFpext(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { @@ -7596,13 +7646,25 @@ pub const FuncGen = struct { const target = self.dg.module.getTarget(); const dest_bits = dest_ty.floatBits(target); const src_bits = operand_ty.floatBits(target); - if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) { - return softF80TruncOrExt(self, operand, src_bits, dest_bits); - } else if (!backendSupportsF128(target) and (src_bits == 128 or dest_bits == 128)) { - return softF128TruncOrExt(self, operand, src_bits, dest_bits); + + if (intrinsicsAllowed(dest_ty, target) and intrinsicsAllowed(operand_ty, target)) { + const dest_llvm_ty = try self.dg.lowerType(dest_ty); + return self.builder.buildFPExt(operand, dest_llvm_ty, ""); + } else { + const operand_llvm_ty = try self.dg.lowerType(operand_ty); + const dest_llvm_ty = try self.dg.lowerType(dest_ty); + + var fn_name_buf: [64]u8 = undefined; + const fn_name = std.fmt.bufPrintZ(&fn_name_buf, "__extend{s}f{s}f2", .{ + compilerRtFloatAbbrev(src_bits), compilerRtFloatAbbrev(dest_bits), + }) catch unreachable; + + const params = [1]*llvm.Value{operand}; + const param_types = [1]*llvm.Type{operand_llvm_ty}; + const llvm_fn = self.getLibcFunction(fn_name, ¶m_types, dest_llvm_ty); + + return self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, ¶ms, params.len, .C, .Auto, ""); } - const dest_llvm_ty = try self.dg.lowerType(self.air.typeOfIndex(inst)); - return self.builder.buildFPExt(operand, dest_llvm_ty, ""); } fn airPtrToInt(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { @@ -8680,12 +8742,78 @@ pub const FuncGen = struct { return self.builder.buildShuffleVector(a, b, llvm_mask_value, ""); } + /// Reduce a vector by repeatedly applying `llvm_fn` to produce an accumulated result. + /// + /// Equivalent to: + /// reduce: { + /// var i: usize = 0; + /// var accum: T = init; + /// while (i < vec.len) : (i += 1) { + /// accum = llvm_fn(accum, vec[i]); + /// } + /// break :reduce accum; + /// } + /// + fn buildReducedCall( + self: *FuncGen, + llvm_fn: *llvm.Value, + operand_vector: *llvm.Value, + vector_len: usize, + accum_init: *llvm.Value, + ) !*llvm.Value { + const llvm_usize_ty = try self.dg.lowerType(Type.usize); + const llvm_vector_len = llvm_usize_ty.constInt(vector_len, .False); + const llvm_result_ty = accum_init.typeOf(); + + // Allocate and initialize our mutable variables + const i_ptr = self.buildAlloca(llvm_usize_ty, null); + _ = self.builder.buildStore(llvm_usize_ty.constInt(0, .False), i_ptr); + const accum_ptr = self.buildAlloca(llvm_result_ty, null); + _ = self.builder.buildStore(accum_init, accum_ptr); + + // Setup the loop + const loop = self.context.appendBasicBlock(self.llvm_func, "ReduceLoop"); + const loop_exit = self.context.appendBasicBlock(self.llvm_func, "AfterReduce"); + _ = self.builder.buildBr(loop); + { + self.builder.positionBuilderAtEnd(loop); + + // while (i < vec.len) + const i = self.builder.buildLoad(llvm_usize_ty, i_ptr, ""); + const cond = self.builder.buildICmp(.ULT, i, llvm_vector_len, ""); + const loop_then = self.context.appendBasicBlock(self.llvm_func, "ReduceLoopThen"); + + _ = self.builder.buildCondBr(cond, loop_then, loop_exit); + + { + self.builder.positionBuilderAtEnd(loop_then); + + // accum = f(accum, vec[i]); + const accum = self.builder.buildLoad(llvm_result_ty, accum_ptr, ""); + const element = self.builder.buildExtractElement(operand_vector, i, ""); + const params = [2]*llvm.Value{ accum, element }; + const new_accum = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, ¶ms, params.len, .C, .Auto, ""); + _ = self.builder.buildStore(new_accum, accum_ptr); + + // i += 1 + const new_i = self.builder.buildAdd(i, llvm_usize_ty.constInt(1, .False), ""); + _ = self.builder.buildStore(new_i, i_ptr); + _ = self.builder.buildBr(loop); + } + } + + self.builder.positionBuilderAtEnd(loop_exit); + return self.builder.buildLoad(llvm_result_ty, accum_ptr, ""); + } + fn airReduce(self: *FuncGen, inst: Air.Inst.Index, want_fast_math: bool) !?*llvm.Value { if (self.liveness.isUnused(inst)) return null; self.builder.setFastMath(want_fast_math); + const target = self.dg.module.getTarget(); const reduce = self.air.instructions.items(.data)[inst].reduce; - const operand = try self.resolveInst(reduce.operand); + var operand = try self.resolveInst(reduce.operand); + const operand_ty = self.air.typeOf(reduce.operand); const scalar_ty = self.air.typeOfIndex(inst); // TODO handle the fast math setting @@ -8696,17 +8824,21 @@ pub const FuncGen = struct { .Xor => return self.builder.buildXorReduce(operand), .Min => switch (scalar_ty.zigTypeTag()) { .Int => return self.builder.buildIntMinReduce(operand, scalar_ty.isSignedInt()), - .Float => return self.builder.buildFPMinReduce(operand), + .Float => if (intrinsicsAllowed(scalar_ty, target)) { + return self.builder.buildFPMinReduce(operand); + }, else => unreachable, }, .Max => switch (scalar_ty.zigTypeTag()) { .Int => return self.builder.buildIntMaxReduce(operand, scalar_ty.isSignedInt()), - .Float => return self.builder.buildFPMaxReduce(operand), + .Float => if (intrinsicsAllowed(scalar_ty, target)) { + return self.builder.buildFPMaxReduce(operand); + }, else => unreachable, }, .Add => switch (scalar_ty.zigTypeTag()) { .Int => return self.builder.buildAddReduce(operand), - .Float => { + .Float => if (intrinsicsAllowed(scalar_ty, target)) { const scalar_llvm_ty = try self.dg.lowerType(scalar_ty); const neutral_value = scalar_llvm_ty.constReal(-0.0); return self.builder.buildFPAddReduce(neutral_value, operand); @@ -8715,7 +8847,7 @@ pub const FuncGen = struct { }, .Mul => switch (scalar_ty.zigTypeTag()) { .Int => return self.builder.buildMulReduce(operand), - .Float => { + .Float => if (intrinsicsAllowed(scalar_ty, target)) { const scalar_llvm_ty = try self.dg.lowerType(scalar_ty); const neutral_value = scalar_llvm_ty.constReal(1.0); return self.builder.buildFPMulReduce(neutral_value, operand); @@ -8723,6 +8855,44 @@ pub const FuncGen = struct { else => unreachable, }, } + + // Reduction could not be performed with intrinsics. + // Use a manual loop over a softfloat call instead. + var fn_name_buf: [64]u8 = undefined; + const float_bits = scalar_ty.floatBits(target); + const fn_name = switch (reduce.operation) { + .Min => std.fmt.bufPrintZ(&fn_name_buf, "{s}fmin{s}", .{ + libcFloatPrefix(float_bits), libcFloatSuffix(float_bits), + }) catch unreachable, + .Max => std.fmt.bufPrintZ(&fn_name_buf, "{s}fmax{s}", .{ + libcFloatPrefix(float_bits), libcFloatSuffix(float_bits), + }) catch unreachable, + .Add => std.fmt.bufPrintZ(&fn_name_buf, "__add{s}f3", .{ + compilerRtFloatAbbrev(float_bits), + }) catch unreachable, + .Mul => std.fmt.bufPrintZ(&fn_name_buf, "__mul{s}f3", .{ + compilerRtFloatAbbrev(float_bits), + }) catch unreachable, + else => unreachable, + }; + var init_value_payload = Value.Payload.Float_32{ + .data = switch (reduce.operation) { + .Min => std.math.nan(f32), + .Max => std.math.nan(f32), + .Add => -0.0, + .Mul => 1.0, + else => unreachable, + }, + }; + + const param_llvm_ty = try self.dg.lowerType(scalar_ty); + const param_types = [2]*llvm.Type{ param_llvm_ty, param_llvm_ty }; + const libc_fn = self.getLibcFunction(fn_name, ¶m_types, param_llvm_ty); + const init_value = try self.dg.lowerValue(.{ + .ty = scalar_ty, + .val = Value.initPayload(&init_value_payload.base), + }); + return self.buildReducedCall(libc_fn, operand, operand_ty.vectorLen(), init_value); } fn airAggregateInit(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { @@ -8849,7 +9019,7 @@ pub const FuncGen = struct { llvm_usize.constInt(@intCast(c_uint, array_info.len), .False), }; const elem_ptr = self.builder.buildInBoundsGEP(llvm_result_ty, alloca_inst, &indices, indices.len, ""); - const llvm_elem = try self.dg.lowerValue(.{ + const llvm_elem = try self.resolveValue(.{ .ty = array_info.elem_type, .val = sent_val, }); @@ -9012,7 +9182,13 @@ pub const FuncGen = struct { const target = self.dg.module.getTarget(); switch (prefetch.cache) { .instruction => switch (target.cpu.arch) { - .x86_64, .i386 => return null, + .x86_64, + .i386, + .powerpc, + .powerpcle, + .powerpc64, + .powerpc64le, + => return null, .arm, .armeb, .thumb, .thumbeb => { switch (prefetch.rw) { .write => return null, @@ -9063,169 +9239,6 @@ pub const FuncGen = struct { return self.builder.buildAddrSpaceCast(operand, llvm_dest_ty, ""); } - fn softF80TruncOrExt( - self: *FuncGen, - operand: *llvm.Value, - src_bits: u16, - dest_bits: u16, - ) !?*llvm.Value { - const target = self.dg.module.getTarget(); - - var param_llvm_ty: *llvm.Type = self.context.intType(80); - var ret_llvm_ty: *llvm.Type = param_llvm_ty; - var fn_name: [*:0]const u8 = undefined; - var arg = operand; - var final_cast: ?*llvm.Type = null; - - assert(src_bits == 80 or dest_bits == 80); - - if (src_bits == 80) switch (dest_bits) { - 16 => { - // See corresponding condition at definition of - // __truncxfhf2 in compiler-rt. - if (target.cpu.arch.isAARCH64()) { - ret_llvm_ty = self.context.halfType(); - } else { - ret_llvm_ty = self.context.intType(16); - final_cast = self.context.halfType(); - } - fn_name = "__truncxfhf2"; - }, - 32 => { - ret_llvm_ty = self.context.floatType(); - fn_name = "__truncxfsf2"; - }, - 64 => { - ret_llvm_ty = self.context.doubleType(); - fn_name = "__truncxfdf2"; - }, - 80 => return operand, - 128 => { - ret_llvm_ty = self.context.fp128Type(); - fn_name = "__extendxftf2"; - }, - else => unreachable, - } else switch (src_bits) { - 16 => { - // See corresponding condition at definition of - // __extendhfxf2 in compiler-rt. - param_llvm_ty = if (target.cpu.arch.isAARCH64()) - self.context.halfType() - else - self.context.intType(16); - arg = self.builder.buildBitCast(arg, param_llvm_ty, ""); - fn_name = "__extendhfxf2"; - }, - 32 => { - param_llvm_ty = self.context.floatType(); - fn_name = "__extendsfxf2"; - }, - 64 => { - param_llvm_ty = self.context.doubleType(); - fn_name = "__extenddfxf2"; - }, - 80 => return operand, - 128 => { - param_llvm_ty = self.context.fp128Type(); - fn_name = "__trunctfxf2"; - }, - else => unreachable, - } - - const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse f: { - const param_types = [_]*llvm.Type{param_llvm_ty}; - const fn_type = llvm.functionType(ret_llvm_ty, ¶m_types, param_types.len, .False); - break :f self.dg.object.llvm_module.addFunction(fn_name, fn_type); - }; - - var args: [1]*llvm.Value = .{arg}; - const result = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &args, args.len, .C, .Auto, ""); - const final_cast_llvm_ty = final_cast orelse return result; - return self.builder.buildBitCast(result, final_cast_llvm_ty, ""); - } - - fn softF128TruncOrExt( - self: *FuncGen, - operand: *llvm.Value, - src_bits: u16, - dest_bits: u16, - ) !?*llvm.Value { - const target = self.dg.module.getTarget(); - - var param_llvm_ty: *llvm.Type = self.context.fp128Type(); - var ret_llvm_ty: *llvm.Type = param_llvm_ty; - var fn_name: [*:0]const u8 = undefined; - var arg = operand; - var final_cast: ?*llvm.Type = null; - - assert(src_bits == 128 or dest_bits == 128); - - // TODO: Implement proper names and compiler-rt functions for this!! - if (src_bits == 128) switch (dest_bits) { - 16 => { - // See corresponding condition at definition of - // __truncxfhf2 in compiler-rt. - if (target.cpu.arch.isAARCH64()) { - ret_llvm_ty = self.context.halfType(); - } else { - ret_llvm_ty = self.context.intType(16); - final_cast = self.context.halfType(); - } - fn_name = "__trunctfhf2"; - }, - 32 => { - ret_llvm_ty = self.context.floatType(); - fn_name = "__trunctfsf2"; - }, - 64 => { - ret_llvm_ty = self.context.doubleType(); - fn_name = "__trunctfdf2"; - }, - 80 => { - ret_llvm_ty = self.context.intType(80); - fn_name = "__trunctfxf2"; - }, - 128 => return operand, - else => unreachable, - } else switch (src_bits) { - 16 => { - // See corresponding condition at definition of - // __extendhftf2 in compiler-rt. - param_llvm_ty = if (target.cpu.arch.isAARCH64()) - self.context.halfType() - else - self.context.intType(16); - arg = self.builder.buildBitCast(arg, param_llvm_ty, ""); - fn_name = "__extendhftf2"; - }, - 32 => { - param_llvm_ty = self.context.floatType(); - fn_name = "__extendsftf2"; - }, - 64 => { - param_llvm_ty = self.context.doubleType(); - fn_name = "__extenddftf2"; - }, - 80 => { - param_llvm_ty = self.context.intType(80); - fn_name = "__extendxftf2"; - }, - 128 => return operand, - else => unreachable, - } - - const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse f: { - const param_types = [_]*llvm.Type{param_llvm_ty}; - const fn_type = llvm.functionType(ret_llvm_ty, ¶m_types, param_types.len, .False); - break :f self.dg.object.llvm_module.addFunction(fn_name, fn_type); - }; - - var args: [1]*llvm.Value = .{arg}; - const result = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &args, args.len, .C, .Auto, ""); - const final_cast_llvm_ty = final_cast orelse return result; - return self.builder.buildBitCast(result, final_cast_llvm_ty, ""); - } - fn getErrorNameTable(self: *FuncGen) !*llvm.Value { if (self.dg.object.error_name_table) |table| { return table; @@ -10573,6 +10586,17 @@ fn backendSupportsF80(target: std.Target) bool { /// if it produces miscompilations. fn backendSupportsF16(target: std.Target) bool { return switch (target.cpu.arch) { + .powerpc, + .powerpcle, + .powerpc64, + .powerpc64le, + .wasm32, + .wasm64, + .mips, + .mipsel, + .mips64, + .mips64el, + => false, else => true, }; } |
