diff options
34 files changed, 588 insertions, 156 deletions
diff --git a/doc/langref.html.in b/doc/langref.html.in index effa974f22..872c305252 100644 --- a/doc/langref.html.in +++ b/doc/langref.html.in @@ -7956,6 +7956,15 @@ fn readFile(allocator: Allocator, filename: []const u8) ![]u8 { The {#syntax#}comptime{#endsyntax#} keyword on a parameter means that the parameter must be known at compile time. </p> + {#header_open|@addrSpaceCast#} + <pre>{#syntax#}@addrSpaceCast(comptime addrspace: std.builtin.AddressSpace, ptr: anytype) anytype{#endsyntax#}</pre> + <p> + Converts a pointer from one address space to another. Depending on the current target and + address spaces, this cast may be a no-op, a complex operation, or illegal. If the cast is + legal, then the resulting pointer points to the same memory location as the pointer operand. + It is always valid to cast a pointer between the same address spaces. + </p> + {#header_close#} {#header_open|@addWithOverflow#} <pre>{#syntax#}@addWithOverflow(comptime T: type, a: T, b: T, result: *T) bool{#endsyntax#}</pre> <p> @@ -64,10 +64,10 @@ pub fn panic(msg: []const u8, error_return_trace: ?*std.builtin.StackTrace, _: ? if (builtin.is_test) { std.debug.panic("{s}", .{msg}); } - if (native_os != .freestanding and native_os != .other) { - std.os.abort(); + switch (native_os) { + .freestanding, .other, .amdhsa, .amdpal => while (true) {}, + else => std.os.abort(), } - while (true) {} } extern fn main(argc: c_int, argv: [*:null]?[*:0]u8) c_int; diff --git a/lib/compiler_rt/atomics.zig b/lib/compiler_rt/atomics.zig index 6935a858aa..8f02600564 100644 --- a/lib/compiler_rt/atomics.zig +++ b/lib/compiler_rt/atomics.zig @@ -35,6 +35,17 @@ const largest_atomic_size = switch (arch) { else => @sizeOf(usize), }; +// The size (in bytes) of the smallest atomic object that the architecture can +// perform fetch/exchange atomically. Note, this does not encompass load and store. +// Objects smaller than this threshold are implemented in terms of compare-exchange +// of a larger value. +const smallest_atomic_fetch_exch_size = switch (arch) { + // On AMDGPU, there are no instructions for atomic operations other than load and store + // (as of LLVM 15), and so these need to be implemented in terms of atomic CAS. + .amdgcn => @sizeOf(u32), + else => @sizeOf(u8), +}; + const cache_line_size = 64; const SpinlockTable = struct { @@ -206,6 +217,31 @@ fn __atomic_store_8(dst: *u64, value: u64, model: i32) callconv(.C) void { return atomic_store_N(u64, dst, value, model); } +fn wideUpdate(comptime T: type, ptr: *T, val: T, update: anytype) T { + const WideAtomic = std.meta.Int(.unsigned, smallest_atomic_fetch_exch_size * 8); + + const addr = @ptrToInt(ptr); + const wide_addr = addr & ~(@as(T, smallest_atomic_fetch_exch_size) - 1); + const wide_ptr = @alignCast(smallest_atomic_fetch_exch_size, @intToPtr(*WideAtomic, wide_addr)); + + const inner_offset = addr & (@as(T, smallest_atomic_fetch_exch_size) - 1); + const inner_shift = @intCast(std.math.Log2Int(T), inner_offset * 8); + + const mask = @as(WideAtomic, std.math.maxInt(T)) << inner_shift; + + var wide_old = @atomicLoad(WideAtomic, wide_ptr, .SeqCst); + while (true) { + const old = @truncate(T, (wide_old & mask) >> inner_shift); + const new = update(val, old); + const wide_new = wide_old & ~mask | (@as(WideAtomic, new) << inner_shift); + if (@cmpxchgWeak(WideAtomic, wide_ptr, wide_old, wide_new, .SeqCst, .SeqCst)) |new_wide_old| { + wide_old = new_wide_old; + } else { + return old; + } + } +} + inline fn atomic_exchange_N(comptime T: type, ptr: *T, val: T, model: i32) T { _ = model; if (@sizeOf(T) > largest_atomic_size) { @@ -214,6 +250,15 @@ inline fn atomic_exchange_N(comptime T: type, ptr: *T, val: T, model: i32) T { const value = ptr.*; ptr.* = val; return value; + } else if (@sizeOf(T) < smallest_atomic_fetch_exch_size) { + // Machine does not support this type, but it does support a larger type. + const Updater = struct { + fn update(new: T, old: T) T { + _ = old; + return new; + } + }; + return wideUpdate(T, ptr, val, Updater.update); } else { return @atomicRmw(T, ptr, .Xchg, val, .SeqCst); } @@ -282,22 +327,30 @@ fn __atomic_compare_exchange_8(ptr: *u64, expected: *u64, desired: u64, success: inline fn fetch_op_N(comptime T: type, comptime op: std.builtin.AtomicRmwOp, ptr: *T, val: T, model: i32) T { _ = model; + const Updater = struct { + fn update(new: T, old: T) T { + return switch (op) { + .Add => old +% new, + .Sub => old -% new, + .And => old & new, + .Nand => ~(old & new), + .Or => old | new, + .Xor => old ^ new, + else => @compileError("unsupported atomic op"), + }; + } + }; + if (@sizeOf(T) > largest_atomic_size) { var sl = spinlocks.get(@ptrToInt(ptr)); defer sl.release(); const value = ptr.*; - ptr.* = switch (op) { - .Add => value +% val, - .Sub => value -% val, - .And => value & val, - .Nand => ~(value & val), - .Or => value | val, - .Xor => value ^ val, - else => @compileError("unsupported atomic op"), - }; - + ptr.* = Updater.update(val, value); return value; + } else if (@sizeOf(T) < smallest_atomic_fetch_exch_size) { + // Machine does not support this type, but it does support a larger type. + return wideUpdate(T, ptr, val, Updater.update); } return @atomicRmw(T, ptr, op, val, .SeqCst); diff --git a/lib/std/builtin.zig b/lib/std/builtin.zig index a2d3bcd870..87e8e90df8 100644 --- a/lib/std/builtin.zig +++ b/lib/std/builtin.zig @@ -157,6 +157,7 @@ pub const CallingConvention = enum { SysV, Win64, PtxKernel, + AmdgpuKernel, }; /// This data structure is used by the Zig language code generation and diff --git a/lib/std/math/big/int.zig b/lib/std/math/big/int.zig index 1040226993..b875f73b2e 100644 --- a/lib/std/math/big/int.zig +++ b/lib/std/math/big/int.zig @@ -1859,7 +1859,7 @@ pub const Mutable = struct { /// [1, 2, 3, 4, 0] -> [1, 2, 3, 4] /// [1, 2, 0, 0, 0] -> [1, 2] /// [0, 0, 0, 0, 0] -> [0] - fn normalize(r: *Mutable, length: usize) void { + pub fn normalize(r: *Mutable, length: usize) void { r.len = llnormalize(r.limbs[0..length]); } }; diff --git a/lib/std/target.zig b/lib/std/target.zig index 758113dfcd..201fac222c 100644 --- a/lib/std/target.zig +++ b/lib/std/target.zig @@ -1157,6 +1157,17 @@ pub const Target = struct { }; } + /// Returns whether this architecture supports the address space + pub fn supportsAddressSpace(arch: Arch, address_space: std.builtin.AddressSpace) bool { + const is_nvptx = arch == .nvptx or arch == .nvptx64; + return switch (address_space) { + .generic => true, + .fs, .gs, .ss => arch == .x86_64 or arch == .i386, + .global, .constant, .local, .shared => arch == .amdgcn or is_nvptx, + .param => is_nvptx, + }; + } + pub fn ptrBitWidth(arch: Arch) u16 { switch (arch) { .avr, diff --git a/src/Air.zig b/src/Air.zig index 46ba297003..57479af590 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -729,6 +729,10 @@ pub const Inst = struct { /// Sets the operand as the current error return trace, set_err_return_trace, + /// Convert the address space of a pointer. + /// Uses the `ty_op` field. + addrspace_cast, + pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag { switch (op) { .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt, @@ -1138,6 +1142,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type { .popcount, .byte_swap, .bit_reverse, + .addrspace_cast, => return air.getRefType(datas[inst].ty_op.ty), .loop, diff --git a/src/AstGen.zig b/src/AstGen.zig index 7534a0d2cc..7bb2ef765c 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -7789,6 +7789,14 @@ fn builtinCall( }); return rvalue(gz, rl, result, node); }, + .addrspace_cast => { + const result = try gz.addExtendedPayload(.addrspace_cast, Zir.Inst.BinNode{ + .lhs = try comptimeExpr(gz, scope, .{ .ty = .address_space_type }, params[0]), + .rhs = try expr(gz, scope, .none, params[1]), + .node = gz.nodeIndexToRelative(node), + }); + return rvalue(gz, rl, result, node); + }, // zig fmt: off .has_decl => return hasDeclOrField(gz, scope, rl, node, params[0], params[1], .has_decl), diff --git a/src/BuiltinFn.zig b/src/BuiltinFn.zig index 3a13dde1ab..eb878873a0 100644 --- a/src/BuiltinFn.zig +++ b/src/BuiltinFn.zig @@ -2,6 +2,7 @@ const std = @import("std"); pub const Tag = enum { add_with_overflow, + addrspace_cast, align_cast, align_of, as, @@ -153,6 +154,13 @@ pub const list = list: { }, }, .{ + "@addrSpaceCast", + .{ + .tag = .addrspace_cast, + .param_count = 2, + }, + }, + .{ "@alignCast", .{ .tag = .align_cast, diff --git a/src/Liveness.zig b/src/Liveness.zig index 5a4bd2265e..54a5041e8b 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -268,6 +268,7 @@ pub fn categorizeOperand( .bit_reverse, .splat, .error_set_has_value, + .addrspace_cast, => { const o = air_datas[inst].ty_op; if (o.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none); @@ -844,6 +845,7 @@ fn analyzeInst( .bit_reverse, .splat, .error_set_has_value, + .addrspace_cast, => { const o = inst_datas[inst].ty_op; return trackOperands(a, new_set, inst, main_tomb, .{ o.operand, .none, .none }); diff --git a/src/Module.zig b/src/Module.zig index 44502ab564..7d87bdba53 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -4617,7 +4617,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { .constant => target_util.defaultAddressSpace(target, .global_constant), else => unreachable, }, - else => |addrspace_ref| try sema.analyzeAddrspace(&block_scope, address_space_src, addrspace_ref, addrspace_ctx), + else => |addrspace_ref| try sema.analyzeAddressSpace(&block_scope, address_space_src, addrspace_ref, addrspace_ctx), }; }; diff --git a/src/Sema.zig b/src/Sema.zig index 5a0c30d71c..1b2bf84885 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -975,8 +975,9 @@ fn analyzeBodyInner( .reify => try sema.zirReify( block, extended, inst), .builtin_async_call => try sema.zirBuiltinAsyncCall( block, extended), .cmpxchg => try sema.zirCmpxchg( block, extended), - + .addrspace_cast => try sema.zirAddrSpaceCast( block, extended), // zig fmt: on + .fence => { try sema.zirFence(block, extended); i += 1; @@ -5897,7 +5898,7 @@ fn analyzeCall( }, else => {}, } - return sema.fail(block, func_src, "type '{}' not a function", .{callee_ty.fmt(sema.mod)}); + return sema.fail(block, func_src, "type '{}' is not a function", .{callee_ty.fmt(sema.mod)}); }; const func_ty_info = func_ty.fnInfo(); @@ -8141,6 +8142,10 @@ fn funcCommon( .nvptx, .nvptx64 => null, else => @as([]const u8, "nvptx and nvptx64"), }, + .AmdgpuKernel => switch (arch) { + .amdgcn => null, + else => @as([]const u8, "amdgcn"), + }, }) |allowed_platform| { return sema.fail(block, cc_src, "callconv '{s}' is only available on {s}, not {s}", .{ @tagName(cc_workaround), @@ -16246,7 +16251,7 @@ fn zirPtrType(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air const address_space = if (inst_data.flags.has_addrspace) blk: { const ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_i]); extra_i += 1; - break :blk try sema.analyzeAddrspace(block, addrspace_src, ref, .pointer); + break :blk try sema.analyzeAddressSpace(block, addrspace_src, ref, .pointer); } else .generic; const bit_offset = if (inst_data.flags.has_bit_range) blk: { @@ -18166,6 +18171,55 @@ fn reifyStruct( return sema.analyzeDeclVal(block, src, new_decl_index); } +fn zirAddrSpaceCast(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!Air.Inst.Ref { + const extra = sema.code.extraData(Zir.Inst.BinNode, extended.operand).data; + const src = LazySrcLoc.nodeOffset(extra.node); + const addrspace_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = extra.node }; + const ptr_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = extra.node }; + + const dest_addrspace = try sema.analyzeAddressSpace(block, addrspace_src, extra.lhs, .pointer); + const ptr = try sema.resolveInst(extra.rhs); + const ptr_ty = sema.typeOf(ptr); + + try sema.checkPtrOperand(block, ptr_src, ptr_ty); + + var ptr_info = ptr_ty.ptrInfo().data; + const src_addrspace = ptr_info.@"addrspace"; + if (!target_util.addrSpaceCastIsValid(sema.mod.getTarget(), src_addrspace, dest_addrspace)) { + const msg = msg: { + const msg = try sema.errMsg(block, src, "invalid address space cast", .{}); + errdefer msg.destroy(sema.gpa); + try sema.errNote(block, src, msg, "address space '{s}' is not compatible with address space '{s}'", .{ @tagName(src_addrspace), @tagName(dest_addrspace) }); + break :msg msg; + }; + return sema.failWithOwnedErrorMsg(msg); + } + + ptr_info.@"addrspace" = dest_addrspace; + const dest_ptr_ty = try Type.ptr(sema.arena, sema.mod, ptr_info); + const dest_ty = if (ptr_ty.zigTypeTag() == .Optional) + try Type.optional(sema.arena, dest_ptr_ty) + else + dest_ptr_ty; + + if (try sema.resolveMaybeUndefVal(block, ptr_src, ptr)) |val| { + // Pointer value should compatible with both address spaces. + // TODO: Figure out why this generates an invalid bitcast. + return sema.addConstant(dest_ty, val); + } + + try sema.requireRuntimeBlock(block, src, ptr_src); + // TODO: Address space cast safety? + + return block.addInst(.{ + .tag = .addrspace_cast, + .data = .{ .ty_op = .{ + .ty = try sema.addType(dest_ty), + .operand = ptr, + } }, + }); +} + fn zirTypeName(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { const inst_data = sema.code.instructions.items(.data)[inst].un_node; const ty_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node }; @@ -18413,6 +18467,9 @@ fn zirPtrCast(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air if (operand_info.@"volatile" and !dest_info.@"volatile") { return sema.fail(block, src, "cast discards volatile qualifier", .{}); } + if (operand_info.@"addrspace" != dest_info.@"addrspace") { + return sema.fail(block, src, "cast changes pointer address space", .{}); + } const dest_is_slice = dest_ty.isSlice(); const operand_is_slice = operand_ty.isSlice(); @@ -30302,7 +30359,7 @@ pub const AddressSpaceContext = enum { pointer, }; -pub fn analyzeAddrspace( +pub fn analyzeAddressSpace( sema: *Sema, block: *Block, src: LazySrcLoc, @@ -30313,13 +30370,15 @@ pub fn analyzeAddrspace( const address_space = addrspace_tv.val.toEnum(std.builtin.AddressSpace); const target = sema.mod.getTarget(); const arch = target.cpu.arch; - const is_gpu = arch == .nvptx or arch == .nvptx64; + const is_nv = arch == .nvptx or arch == .nvptx64; + const is_gpu = is_nv or arch == .amdgcn; const supported = switch (address_space) { .generic => true, .gs, .fs, .ss => (arch == .i386 or arch == .x86_64) and ctx == .pointer, // TODO: check that .shared and .local are left uninitialized - .global, .param, .shared, .local => is_gpu, + .param => is_nv, + .global, .shared, .local => is_gpu, .constant => is_gpu and (ctx == .constant), }; diff --git a/src/Zir.zig b/src/Zir.zig index 890109fcb0..351330b7c4 100644 --- a/src/Zir.zig +++ b/src/Zir.zig @@ -1969,6 +1969,9 @@ pub const Inst = struct { /// `small` 0=>weak 1=>strong /// `operand` is payload index to `Cmpxchg`. cmpxchg, + /// Implement the builtin `@addrSpaceCast` + /// `Operand` is payload index to `BinNode`. `lhs` is dest type, `rhs` is operand. + addrspace_cast, pub const InstData = struct { opcode: Extended, diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index ed3a281b80..2758fd36df 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -677,6 +677,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), .mul_add => try self.airMulAdd(inst), + .addrspace_cast => return self.fail("TODO implement addrspace_cast", .{}), .@"try" => try self.airTry(inst), .try_ptr => try self.airTryPtr(inst), diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 95dfb2eea3..855951f5fa 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -690,6 +690,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), .mul_add => try self.airMulAdd(inst), + .addrspace_cast => return self.fail("TODO implement addrspace_cast", .{}), .@"try" => try self.airTry(inst), .try_ptr => try self.airTryPtr(inst), diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index 87e81748f9..dd31bfb6f7 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -604,6 +604,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), .mul_add => try self.airMulAdd(inst), + .addrspace_cast => @panic("TODO"), .@"try" => @panic("TODO"), .try_ptr => @panic("TODO"), diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index cfcfedf7cc..6217119f34 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -618,6 +618,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .union_init => @panic("TODO try self.airUnionInit(inst)"), .prefetch => try self.airPrefetch(inst), .mul_add => @panic("TODO try self.airMulAdd(inst)"), + .addrspace_cast => @panic("TODO try self.airAddrSpaceCast(int)"), .@"try" => try self.airTry(inst), .try_ptr => @panic("TODO try self.airTryPtr(inst)"), diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index f27957d3f5..619addfba1 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -1699,6 +1699,7 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue { .set_err_return_trace, .is_named_enum_value, .error_set_has_value, + .addrspace_cast, => |tag| return self.fail("TODO: Implement wasm inst: {s}", .{@tagName(tag)}), .add_optimized, diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 27eb11c649..abba07b0e8 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -695,6 +695,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), .mul_add => try self.airMulAdd(inst), + .addrspace_cast => return self.fail("TODO implement addrspace_cast", .{}), .@"try" => try self.airTry(inst), .try_ptr => try self.airTryPtr(inst), diff --git a/src/codegen/c.zig b/src/codegen/c.zig index b25e05e118..072091d9b2 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -1871,6 +1871,7 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO .aggregate_init => try airAggregateInit(f, inst), .union_init => try airUnionInit(f, inst), .prefetch => try airPrefetch(f, inst), + .addrspace_cast => return f.fail("TODO: C backend: implement addrspace_cast", .{}), .@"try" => try airTry(f, inst), .try_ptr => try airTryPtr(f, inst), diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 9ecf475e92..4a0978af5b 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -956,8 +956,7 @@ pub const Object = struct { if (isByRef(param_ty)) { const alignment = param_ty.abiAlignment(target); const param_llvm_ty = param.typeOf(); - const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty); - arg_ptr.setAlignment(alignment); + const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, alignment, target); const store_inst = builder.buildStore(param, arg_ptr); store_inst.setAlignment(alignment); args.appendAssumeCapacity(arg_ptr); @@ -1001,8 +1000,7 @@ pub const Object = struct { param_ty.abiAlignment(target), dg.object.target_data.abiAlignmentOfType(int_llvm_ty), ); - const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty); - arg_ptr.setAlignment(alignment); + const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, alignment, target); const casted_ptr = builder.buildBitCast(arg_ptr, int_ptr_llvm_ty, ""); const store_inst = builder.buildStore(param, casted_ptr); store_inst.setAlignment(alignment); @@ -1053,8 +1051,7 @@ pub const Object = struct { const param_ty = fn_info.param_types[it.zig_index - 1]; const param_llvm_ty = try dg.lowerType(param_ty); const param_alignment = param_ty.abiAlignment(target); - const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty); - arg_ptr.setAlignment(param_alignment); + const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, param_alignment, target); var field_types_buf: [8]*llvm.Type = undefined; const field_types = field_types_buf[0..llvm_ints.len]; for (llvm_ints) |int_bits, i| { @@ -1085,8 +1082,7 @@ pub const Object = struct { const param_ty = fn_info.param_types[it.zig_index - 1]; const param_llvm_ty = try dg.lowerType(param_ty); const param_alignment = param_ty.abiAlignment(target); - const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty); - arg_ptr.setAlignment(param_alignment); + const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, param_alignment, target); var field_types_buf: [8]*llvm.Type = undefined; const field_types = field_types_buf[0..llvm_floats.len]; for (llvm_floats) |float_bits, i| { @@ -1130,8 +1126,7 @@ pub const Object = struct { llvm_arg_i += 1; const alignment = param_ty.abiAlignment(target); - const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty); - arg_ptr.setAlignment(alignment); + const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, alignment, target); const casted_ptr = builder.buildBitCast(arg_ptr, param.typeOf().pointerType(0), ""); _ = builder.buildStore(param, casted_ptr); @@ -2431,19 +2426,21 @@ pub const DeclGen = struct { // mismatch, because we don't have the LLVM type until the *value* is created, // whereas the global needs to be created based on the type alone, because // lowering the value may reference the global as a pointer. + const llvm_global_addrspace = toLlvmGlobalAddressSpace(decl.@"addrspace", target); const new_global = dg.object.llvm_module.addGlobalInAddressSpace( llvm_init.typeOf(), "", - dg.llvmAddressSpace(decl.@"addrspace"), + llvm_global_addrspace, ); new_global.setLinkage(global.getLinkage()); new_global.setUnnamedAddr(global.getUnnamedAddress()); new_global.setAlignment(global.getAlignment()); if (decl.@"linksection") |section| new_global.setSection(section); new_global.setInitializer(llvm_init); - // replaceAllUsesWith requires the type to be unchanged. So we bitcast + // replaceAllUsesWith requires the type to be unchanged. So we convert // the new global to the old type and use that as the thing to replace // old uses. + // TODO: How should this work then the address space of a global changed? const new_global_ptr = new_global.constBitCast(global.typeOf()); global.replaceAllUsesWith(new_global_ptr); dg.object.decl_map.putAssumeCapacity(decl_index, new_global); @@ -2492,7 +2489,7 @@ pub const DeclGen = struct { const fqn = try decl.getFullyQualifiedName(dg.module); defer dg.gpa.free(fqn); - const llvm_addrspace = dg.llvmAddressSpace(decl.@"addrspace"); + const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); const llvm_fn = dg.llvmModule().addFunctionInAddressSpace(fqn, fn_type, llvm_addrspace); gop.value_ptr.* = llvm_fn; @@ -2640,9 +2637,16 @@ pub const DeclGen = struct { const fqn = try decl.getFullyQualifiedName(dg.module); defer dg.gpa.free(fqn); + const target = dg.module.getTarget(); + const llvm_type = try dg.lowerType(decl.ty); - const llvm_addrspace = dg.llvmAddressSpace(decl.@"addrspace"); - const llvm_global = dg.object.llvm_module.addGlobalInAddressSpace(llvm_type, fqn, llvm_addrspace); + const llvm_actual_addrspace = toLlvmGlobalAddressSpace(decl.@"addrspace", target); + + const llvm_global = dg.object.llvm_module.addGlobalInAddressSpace( + llvm_type, + fqn, + llvm_actual_addrspace, + ); gop.value_ptr.* = llvm_global; // This is needed for declarations created by `@extern`. @@ -2667,32 +2671,6 @@ pub const DeclGen = struct { return llvm_global; } - fn llvmAddressSpace(self: DeclGen, address_space: std.builtin.AddressSpace) c_uint { - const target = self.module.getTarget(); - return switch (target.cpu.arch) { - .i386, .x86_64 => switch (address_space) { - .generic => llvm.address_space.default, - .gs => llvm.address_space.x86.gs, - .fs => llvm.address_space.x86.fs, - .ss => llvm.address_space.x86.ss, - else => unreachable, - }, - .nvptx, .nvptx64 => switch (address_space) { - .generic => llvm.address_space.default, - .global => llvm.address_space.nvptx.global, - .constant => llvm.address_space.nvptx.constant, - .param => llvm.address_space.nvptx.param, - .shared => llvm.address_space.nvptx.shared, - .local => llvm.address_space.nvptx.local, - else => unreachable, - }, - else => switch (address_space) { - .generic => llvm.address_space.default, - else => unreachable, - }, - }; - } - fn isUnnamedType(dg: *DeclGen, ty: Type, val: *llvm.Value) bool { // Once `lowerType` succeeds, successive calls to it with the same Zig type // are guaranteed to succeed. So if a call to `lowerType` fails here it means @@ -2758,7 +2736,7 @@ pub const DeclGen = struct { return dg.context.structType(&fields, fields.len, .False); } const ptr_info = t.ptrInfo().data; - const llvm_addrspace = dg.llvmAddressSpace(ptr_info.@"addrspace"); + const llvm_addrspace = toLlvmAddressSpace(ptr_info.@"addrspace", target); if (ptr_info.host_size != 0) { return dg.context.intType(ptr_info.host_size * 8).pointerType(llvm_addrspace); } @@ -3295,11 +3273,20 @@ pub const DeclGen = struct { const decl_index = tv.val.castTag(.variable).?.data.owner_decl; const decl = dg.module.declPtr(decl_index); dg.module.markDeclAlive(decl); - const val = try dg.resolveGlobalDecl(decl_index); + + const llvm_wanted_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); + const llvm_actual_addrspace = toLlvmGlobalAddressSpace(decl.@"addrspace", target); + const llvm_var_type = try dg.lowerType(tv.ty); - const llvm_addrspace = dg.llvmAddressSpace(decl.@"addrspace"); - const llvm_type = llvm_var_type.pointerType(llvm_addrspace); - return val.constBitCast(llvm_type); + const llvm_actual_ptr_type = llvm_var_type.pointerType(llvm_actual_addrspace); + + const val = try dg.resolveGlobalDecl(decl_index); + const val_ptr = val.constBitCast(llvm_actual_ptr_type); + if (llvm_actual_addrspace != llvm_wanted_addrspace) { + const llvm_wanted_ptr_type = llvm_var_type.pointerType(llvm_wanted_addrspace); + return val_ptr.constAddrSpaceCast(llvm_wanted_ptr_type); + } + return val_ptr; }, .slice => { const slice = tv.val.castTag(.slice).?.data; @@ -4096,11 +4083,20 @@ pub const DeclGen = struct { self.module.markDeclAlive(decl); - const llvm_val = if (is_fn_body) + const llvm_decl_val = if (is_fn_body) try self.resolveLlvmFunction(decl_index) else try self.resolveGlobalDecl(decl_index); + const target = self.module.getTarget(); + const llvm_wanted_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); + const llvm_actual_addrspace = toLlvmGlobalAddressSpace(decl.@"addrspace", target); + const llvm_val = if (llvm_wanted_addrspace != llvm_actual_addrspace) blk: { + const llvm_decl_ty = try self.lowerType(decl.ty); + const llvm_decl_wanted_ptr_ty = llvm_decl_ty.pointerType(llvm_wanted_addrspace); + break :blk llvm_decl_val.constAddrSpaceCast(llvm_decl_wanted_ptr_ty); + } else llvm_decl_val; + const llvm_type = try self.lowerType(tv.ty); if (tv.ty.zigTypeTag() == .Int) { return llvm_val.constPtrToInt(llvm_type); @@ -4370,7 +4366,9 @@ pub const FuncGen = struct { // We have an LLVM value but we need to create a global constant and // set the value as its initializer, and then return a pointer to the global. const target = self.dg.module.getTarget(); - const global = self.dg.object.llvm_module.addGlobal(llvm_val.typeOf(), ""); + const llvm_wanted_addrspace = toLlvmAddressSpace(.generic, target); + const llvm_actual_addrspace = toLlvmGlobalAddressSpace(.generic, target); + const global = self.dg.object.llvm_module.addGlobalInAddressSpace(llvm_val.typeOf(), "", llvm_actual_addrspace); global.setInitializer(llvm_val); global.setLinkage(.Private); global.setGlobalConstant(.True); @@ -4380,8 +4378,14 @@ pub const FuncGen = struct { // the type of global constants might not match the type it is supposed to // be, and so we must bitcast the pointer at the usage sites. const wanted_llvm_ty = try self.dg.lowerType(tv.ty); - const wanted_llvm_ptr_ty = wanted_llvm_ty.pointerType(0); - return global.constBitCast(wanted_llvm_ptr_ty); + const wanted_bitcasted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_actual_addrspace); + const bitcasted_ptr = global.constBitCast(wanted_bitcasted_llvm_ptr_ty); + const wanted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_wanted_addrspace); + const casted_ptr = if (llvm_wanted_addrspace != llvm_actual_addrspace) + bitcasted_ptr.constAddrSpaceCast(wanted_llvm_ptr_ty) + else + bitcasted_ptr; + return casted_ptr; } fn genBody(self: *FuncGen, body: []const Air.Inst.Index) Error!void { @@ -4462,7 +4466,7 @@ pub const FuncGen = struct { .cmp_lt => try self.airCmp(inst, .lt, false), .cmp_lte => try self.airCmp(inst, .lte, false), .cmp_neq => try self.airCmp(inst, .neq, false), - + .cmp_eq_optimized => try self.airCmp(inst, .eq, true), .cmp_gt_optimized => try self.airCmp(inst, .gt, true), .cmp_gte_optimized => try self.airCmp(inst, .gte, true), @@ -4548,6 +4552,7 @@ pub const FuncGen = struct { .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), + .addrspace_cast => try self.airAddrSpaceCast(inst), .is_named_enum_value => try self.airIsNamedEnumValue(inst), .error_set_has_value => try self.airErrorSetHasValue(inst), @@ -4635,8 +4640,7 @@ pub const FuncGen = struct { const ret_ptr = if (!sret) null else blk: { const llvm_ret_ty = try self.dg.lowerType(return_type); - const ret_ptr = self.buildAlloca(llvm_ret_ty); - ret_ptr.setAlignment(return_type.abiAlignment(target)); + const ret_ptr = self.buildAlloca(llvm_ret_ty, return_type.abiAlignment(target)); try llvm_args.append(ret_ptr); break :blk ret_ptr; }; @@ -4683,8 +4687,7 @@ pub const FuncGen = struct { } else { const alignment = param_ty.abiAlignment(target); const param_llvm_ty = llvm_arg.typeOf(); - const arg_ptr = self.buildAlloca(param_llvm_ty); - arg_ptr.setAlignment(alignment); + const arg_ptr = self.buildAlloca(param_llvm_ty, alignment); const store_inst = self.builder.buildStore(llvm_arg, arg_ptr); store_inst.setAlignment(alignment); try llvm_args.append(arg_ptr); @@ -4711,8 +4714,7 @@ pub const FuncGen = struct { param_ty.abiAlignment(target), self.dg.object.target_data.abiAlignmentOfType(int_llvm_ty), ); - const int_ptr = self.buildAlloca(int_llvm_ty); - int_ptr.setAlignment(alignment); + const int_ptr = self.buildAlloca(int_llvm_ty, alignment); const param_llvm_ty = try self.dg.lowerType(param_ty); const casted_ptr = self.builder.buildBitCast(int_ptr, param_llvm_ty.pointerType(0), ""); const store_inst = self.builder.buildStore(llvm_arg, casted_ptr); @@ -4738,7 +4740,7 @@ pub const FuncGen = struct { const llvm_arg = try self.resolveInst(arg); const is_by_ref = isByRef(param_ty); const arg_ptr = if (is_by_ref) llvm_arg else p: { - const p = self.buildAlloca(llvm_arg.typeOf()); + const p = self.buildAlloca(llvm_arg.typeOf(), null); const store_inst = self.builder.buildStore(llvm_arg, p); store_inst.setAlignment(param_ty.abiAlignment(target)); break :p p; @@ -4767,7 +4769,7 @@ pub const FuncGen = struct { const llvm_arg = try self.resolveInst(arg); const is_by_ref = isByRef(param_ty); const arg_ptr = if (is_by_ref) llvm_arg else p: { - const p = self.buildAlloca(llvm_arg.typeOf()); + const p = self.buildAlloca(llvm_arg.typeOf(), null); const store_inst = self.builder.buildStore(llvm_arg, p); store_inst.setAlignment(param_ty.abiAlignment(target)); break :p p; @@ -4804,7 +4806,7 @@ pub const FuncGen = struct { const arg_ty = self.air.typeOf(arg); var llvm_arg = try self.resolveInst(arg); if (!isByRef(arg_ty)) { - const p = self.buildAlloca(llvm_arg.typeOf()); + const p = self.buildAlloca(llvm_arg.typeOf(), null); const store_inst = self.builder.buildStore(llvm_arg, p); store_inst.setAlignment(arg_ty.abiAlignment(target)); llvm_arg = store_inst; @@ -4861,9 +4863,8 @@ pub const FuncGen = struct { // In this case the function return type is honoring the calling convention by having // a different LLVM type than the usual one. We solve this here at the callsite // by bitcasting a pointer to our canonical type, then loading it if necessary. - const rp = self.buildAlloca(llvm_ret_ty); const alignment = return_type.abiAlignment(target); - rp.setAlignment(alignment); + const rp = self.buildAlloca(llvm_ret_ty, alignment); const ptr_abi_ty = abi_ret_ty.pointerType(0); const casted_ptr = self.builder.buildBitCast(rp, ptr_abi_ty, ""); const store_inst = self.builder.buildStore(call, casted_ptr); @@ -4880,9 +4881,8 @@ pub const FuncGen = struct { if (isByRef(return_type)) { // our by-ref status disagrees with sret so we must allocate, store, // and return the allocation pointer. - const rp = self.buildAlloca(llvm_ret_ty); const alignment = return_type.abiAlignment(target); - rp.setAlignment(alignment); + const rp = self.buildAlloca(llvm_ret_ty, alignment); const store_inst = self.builder.buildStore(call, rp); store_inst.setAlignment(alignment); return rp; @@ -4941,8 +4941,7 @@ pub const FuncGen = struct { return null; } - const rp = self.buildAlloca(llvm_ret_ty); - rp.setAlignment(alignment); + const rp = self.buildAlloca(llvm_ret_ty, alignment); const store_inst = self.builder.buildStore(operand, rp); store_inst.setAlignment(alignment); const casted_ptr = self.builder.buildBitCast(rp, ptr_abi_ty, ""); @@ -6060,8 +6059,7 @@ pub const FuncGen = struct { llvm_param_types[llvm_param_i] = arg_llvm_value.typeOf(); } else { const alignment = arg_ty.abiAlignment(target); - const arg_ptr = self.buildAlloca(arg_llvm_value.typeOf()); - arg_ptr.setAlignment(alignment); + const arg_ptr = self.buildAlloca(arg_llvm_value.typeOf(), alignment); const store_inst = self.builder.buildStore(arg_llvm_value, arg_ptr); store_inst.setAlignment(alignment); llvm_param_values[llvm_param_i] = arg_ptr; @@ -6562,8 +6560,7 @@ pub const FuncGen = struct { const llvm_optional_ty = try self.dg.lowerType(optional_ty); if (isByRef(optional_ty)) { const target = self.dg.module.getTarget(); - const optional_ptr = self.buildAlloca(llvm_optional_ty); - optional_ptr.setAlignment(optional_ty.abiAlignment(target)); + const optional_ptr = self.buildAlloca(llvm_optional_ty, optional_ty.abiAlignment(target)); const payload_ptr = self.builder.buildStructGEP(llvm_optional_ty, optional_ptr, 0, ""); var ptr_ty_payload: Type.Payload.ElemType = .{ .base = .{ .tag = .single_mut_pointer }, @@ -6596,8 +6593,7 @@ pub const FuncGen = struct { const payload_offset = errUnionPayloadOffset(payload_ty, target); const error_offset = errUnionErrorOffset(payload_ty, target); if (isByRef(err_un_ty)) { - const result_ptr = self.buildAlloca(err_un_llvm_ty); - result_ptr.setAlignment(err_un_ty.abiAlignment(target)); + const result_ptr = self.buildAlloca(err_un_llvm_ty, err_un_ty.abiAlignment(target)); const err_ptr = self.builder.buildStructGEP(err_un_llvm_ty, result_ptr, error_offset, ""); const store_inst = self.builder.buildStore(ok_err_code, err_ptr); store_inst.setAlignment(Type.anyerror.abiAlignment(target)); @@ -6631,8 +6627,7 @@ pub const FuncGen = struct { const payload_offset = errUnionPayloadOffset(payload_ty, target); const error_offset = errUnionErrorOffset(payload_ty, target); if (isByRef(err_un_ty)) { - const result_ptr = self.buildAlloca(err_un_llvm_ty); - result_ptr.setAlignment(err_un_ty.abiAlignment(target)); + const result_ptr = self.buildAlloca(err_un_llvm_ty, err_un_ty.abiAlignment(target)); const err_ptr = self.builder.buildStructGEP(err_un_llvm_ty, result_ptr, error_offset, ""); const store_inst = self.builder.buildStore(operand, err_ptr); store_inst.setAlignment(Type.anyerror.abiAlignment(target)); @@ -7050,9 +7045,8 @@ pub const FuncGen = struct { if (isByRef(dest_ty)) { const target = self.dg.module.getTarget(); - const alloca_inst = self.buildAlloca(llvm_dest_ty); const result_alignment = dest_ty.abiAlignment(target); - alloca_inst.setAlignment(result_alignment); + const alloca_inst = self.buildAlloca(llvm_dest_ty, result_alignment); { const field_ptr = self.builder.buildStructGEP(llvm_dest_ty, alloca_inst, result_index, ""); const store_inst = self.builder.buildStore(result, field_ptr); @@ -7402,9 +7396,8 @@ pub const FuncGen = struct { if (isByRef(dest_ty)) { const target = self.dg.module.getTarget(); - const alloca_inst = self.buildAlloca(llvm_dest_ty); const result_alignment = dest_ty.abiAlignment(target); - alloca_inst.setAlignment(result_alignment); + const alloca_inst = self.buildAlloca(llvm_dest_ty, result_alignment); { const field_ptr = self.builder.buildStructGEP(llvm_dest_ty, alloca_inst, result_index, ""); const store_inst = self.builder.buildStore(result, field_ptr); @@ -7710,7 +7703,7 @@ pub const FuncGen = struct { if (!result_is_ref) { return self.dg.todo("implement bitcast vector to non-ref array", .{}); } - const array_ptr = self.buildAlloca(llvm_dest_ty); + const array_ptr = self.buildAlloca(llvm_dest_ty, null); const bitcast_ok = elem_ty.bitSize(target) == elem_ty.abiSize(target) * 8; if (bitcast_ok) { const llvm_vector_ty = try self.dg.lowerType(operand_ty); @@ -7786,8 +7779,7 @@ pub const FuncGen = struct { if (result_is_ref) { // Bitcast the result pointer, then store. const alignment = @maximum(operand_ty.abiAlignment(target), inst_ty.abiAlignment(target)); - const result_ptr = self.buildAlloca(llvm_dest_ty); - result_ptr.setAlignment(alignment); + const result_ptr = self.buildAlloca(llvm_dest_ty, alignment); const operand_llvm_ty = try self.dg.lowerType(operand_ty); const casted_ptr = self.builder.buildBitCast(result_ptr, operand_llvm_ty.pointerType(0), ""); const store_inst = self.builder.buildStore(operand, casted_ptr); @@ -7800,8 +7792,7 @@ pub const FuncGen = struct { // but LLVM won't let us bitcast struct values. // Therefore, we store operand to bitcasted alloca, then load for result. const alignment = @maximum(operand_ty.abiAlignment(target), inst_ty.abiAlignment(target)); - const result_ptr = self.buildAlloca(llvm_dest_ty); - result_ptr.setAlignment(alignment); + const result_ptr = self.buildAlloca(llvm_dest_ty, alignment); const operand_llvm_ty = try self.dg.lowerType(operand_ty); const casted_ptr = self.builder.buildBitCast(result_ptr, operand_llvm_ty.pointerType(0), ""); const store_inst = self.builder.buildStore(operand, casted_ptr); @@ -7877,11 +7868,9 @@ pub const FuncGen = struct { if (!pointee_type.isFnOrHasRuntimeBitsIgnoreComptime()) return self.dg.lowerPtrToVoid(ptr_ty); const pointee_llvm_ty = try self.dg.lowerType(pointee_type); - const alloca_inst = self.buildAlloca(pointee_llvm_ty); const target = self.dg.module.getTarget(); const alignment = ptr_ty.ptrAlignment(target); - alloca_inst.setAlignment(alignment); - return alloca_inst; + return self.buildAlloca(pointee_llvm_ty, alignment); } fn airRetPtr(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { @@ -7892,15 +7881,13 @@ pub const FuncGen = struct { if (self.ret_ptr) |ret_ptr| return ret_ptr; const ret_llvm_ty = try self.dg.lowerType(ret_ty); const target = self.dg.module.getTarget(); - const alloca_inst = self.buildAlloca(ret_llvm_ty); - alloca_inst.setAlignment(ptr_ty.ptrAlignment(target)); - return alloca_inst; + return self.buildAlloca(ret_llvm_ty, ptr_ty.ptrAlignment(target)); } /// Use this instead of builder.buildAlloca, because this function makes sure to /// put the alloca instruction at the top of the function! - fn buildAlloca(self: *FuncGen, llvm_ty: *llvm.Type) *llvm.Value { - return buildAllocaInner(self.builder, self.llvm_func, self.di_scope != null, llvm_ty); + fn buildAlloca(self: *FuncGen, llvm_ty: *llvm.Type, alignment: ?c_uint) *llvm.Value { + return buildAllocaInner(self.builder, self.llvm_func, self.di_scope != null, llvm_ty, alignment, self.dg.module.getTarget()); } fn airStore(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { @@ -8779,9 +8766,9 @@ pub const FuncGen = struct { const llvm_result_ty = accum_init.typeOf(); // Allocate and initialize our mutable variables - const i_ptr = self.buildAlloca(llvm_usize_ty); + const i_ptr = self.buildAlloca(llvm_usize_ty, null); _ = self.builder.buildStore(llvm_usize_ty.constInt(0, .False), i_ptr); - const accum_ptr = self.buildAlloca(llvm_result_ty); + const accum_ptr = self.buildAlloca(llvm_result_ty, null); _ = self.builder.buildStore(accum_init, accum_ptr); // Setup the loop @@ -8966,10 +8953,9 @@ pub const FuncGen = struct { if (isByRef(result_ty)) { const llvm_u32 = self.context.intType(32); - const alloca_inst = self.buildAlloca(llvm_result_ty); // TODO in debug builds init to undef so that the padding will be 0xaa // even if we fully populate the fields. - alloca_inst.setAlignment(result_ty.abiAlignment(target)); + const alloca_inst = self.buildAlloca(llvm_result_ty, result_ty.abiAlignment(target)); var indices: [2]*llvm.Value = .{ llvm_u32.constNull(), undefined }; for (elements) |elem, i| { @@ -9007,8 +8993,7 @@ pub const FuncGen = struct { assert(isByRef(result_ty)); const llvm_usize = try self.dg.lowerType(Type.usize); - const alloca_inst = self.buildAlloca(llvm_result_ty); - alloca_inst.setAlignment(result_ty.abiAlignment(target)); + const alloca_inst = self.buildAlloca(llvm_result_ty, result_ty.abiAlignment(target)); const array_info = result_ty.arrayInfo(); var elem_ptr_payload: Type.Payload.Pointer = .{ @@ -9083,7 +9068,7 @@ pub const FuncGen = struct { // necessarily match the format that we need, depending on which tag is active. We // must construct the correct unnamed struct type here and bitcast, in order to // then set the fields appropriately. - const result_ptr = self.buildAlloca(union_llvm_ty); + const result_ptr = self.buildAlloca(union_llvm_ty, null); const llvm_payload = try self.resolveInst(extra.init); assert(union_obj.haveFieldTypes()); const field = union_obj.fields.values()[extra.field_index]; @@ -9243,6 +9228,17 @@ pub const FuncGen = struct { return null; } + fn airAddrSpaceCast(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { + if (self.liveness.isUnused(inst)) return null; + + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const inst_ty = self.air.typeOfIndex(inst); + const operand = try self.resolveInst(ty_op.operand); + + const llvm_dest_ty = try self.dg.lowerType(inst_ty); + return self.builder.buildAddrSpaceCast(operand, llvm_dest_ty, ""); + } + fn getErrorNameTable(self: *FuncGen) !*llvm.Value { if (self.dg.object.error_name_table) |table| { return table; @@ -9324,9 +9320,8 @@ pub const FuncGen = struct { if (isByRef(optional_ty)) { const target = self.dg.module.getTarget(); - const alloca_inst = self.buildAlloca(optional_llvm_ty); const payload_alignment = optional_ty.abiAlignment(target); - alloca_inst.setAlignment(payload_alignment); + const alloca_inst = self.buildAlloca(optional_llvm_ty, payload_alignment); { const field_ptr = self.builder.buildStructGEP(optional_llvm_ty, alloca_inst, 0, ""); @@ -9450,8 +9445,7 @@ pub const FuncGen = struct { if (isByRef(info.pointee_type)) { const result_align = info.pointee_type.abiAlignment(target); const max_align = @maximum(result_align, ptr_alignment); - const result_ptr = self.buildAlloca(elem_llvm_ty); - result_ptr.setAlignment(max_align); + const result_ptr = self.buildAlloca(elem_llvm_ty, max_align); const llvm_ptr_u8 = self.context.intType(8).pointerType(0); const llvm_usize = self.context.intType(Type.usize.intInfo(target).bits); const size_bytes = info.pointee_type.abiSize(target); @@ -9484,8 +9478,7 @@ pub const FuncGen = struct { if (isByRef(info.pointee_type)) { const result_align = info.pointee_type.abiAlignment(target); - const result_ptr = self.buildAlloca(elem_llvm_ty); - result_ptr.setAlignment(result_align); + const result_ptr = self.buildAlloca(elem_llvm_ty, result_align); const same_size_int = self.context.intType(elem_bits); const truncated_int = self.builder.buildTrunc(shifted_value, same_size_int, ""); @@ -9609,8 +9602,7 @@ pub const FuncGen = struct { .x86_64 => { const array_llvm_ty = usize_llvm_ty.arrayType(6); const array_ptr = fg.valgrind_client_request_array orelse a: { - const array_ptr = fg.buildAlloca(array_llvm_ty); - array_ptr.setAlignment(usize_alignment); + const array_ptr = fg.buildAlloca(array_llvm_ty, usize_alignment); fg.valgrind_client_request_array = array_ptr; break :a array_ptr; }; @@ -9905,6 +9897,78 @@ fn toLlvmCallConv(cc: std.builtin.CallingConvention, target: std.Target) llvm.Ca .nvptx, .nvptx64 => .PTX_Kernel, else => unreachable, }, + .AmdgpuKernel => return switch (target.cpu.arch) { + .amdgcn => .AMDGPU_KERNEL, + else => unreachable, + }, + }; +} + +/// Convert a zig-address space to an llvm address space. +fn toLlvmAddressSpace(address_space: std.builtin.AddressSpace, target: std.Target) c_uint { + return switch (target.cpu.arch) { + .i386, .x86_64 => switch (address_space) { + .generic => llvm.address_space.default, + .gs => llvm.address_space.x86.gs, + .fs => llvm.address_space.x86.fs, + .ss => llvm.address_space.x86.ss, + else => unreachable, + }, + .nvptx, .nvptx64 => switch (address_space) { + .generic => llvm.address_space.default, + .global => llvm.address_space.nvptx.global, + .constant => llvm.address_space.nvptx.constant, + .param => llvm.address_space.nvptx.param, + .shared => llvm.address_space.nvptx.shared, + .local => llvm.address_space.nvptx.local, + else => unreachable, + }, + .amdgcn => switch (address_space) { + .generic => llvm.address_space.amdgpu.flat, + .global => llvm.address_space.amdgpu.global, + .constant => llvm.address_space.amdgpu.constant, + .shared => llvm.address_space.amdgpu.local, + .local => llvm.address_space.amdgpu.private, + else => unreachable, + }, + else => switch (address_space) { + .generic => llvm.address_space.default, + else => unreachable, + }, + }; +} + +/// On some targets, local values that are in the generic address space must be generated into a +/// different address, space and then cast back to the generic address space. +/// For example, on GPUs local variable declarations must be generated into the local address space. +/// This function returns the address space local values should be generated into. +fn llvmAllocaAddressSpace(target: std.Target) c_uint { + return switch (target.cpu.arch) { + // On amdgcn, locals should be generated into the private address space. + // To make Zig not impossible to use, these are then converted to addresses in the + // generic address space and treates as regular pointers. This is the way that HIP also does it. + .amdgcn => llvm.address_space.amdgpu.private, + else => llvm.address_space.default, + }; +} + +/// On some targets, global values that are in the generic address space must be generated into a +/// different address space, and then cast back to the generic address space. +fn llvmDefaultGlobalAddressSpace(target: std.Target) c_uint { + return switch (target.cpu.arch) { + // On amdgcn, globals must be explicitly allocated and uploaded so that the program can access + // them. + .amdgcn => llvm.address_space.amdgpu.global, + else => llvm.address_space.default, + }; +} + +/// Return the actual address space that a value should be stored in if its a global address space. +/// When a value is placed in the resulting address space, it needs to be cast back into wanted_address_space. +fn toLlvmGlobalAddressSpace(wanted_address_space: std.builtin.AddressSpace, target: std.Target) c_uint { + return switch (wanted_address_space) { + .generic => llvmDefaultGlobalAddressSpace(target), + else => |as| toLlvmAddressSpace(as, target), }; } @@ -10537,13 +10601,23 @@ fn backendSupportsF16(target: std.Target) bool { }; } +/// This function returns true if we expect LLVM to lower f128 correctly, +/// and false if we expect LLVm to crash if it encounters and f128 type +/// or if it produces miscompilations. +fn backendSupportsF128(target: std.Target) bool { + return switch (target.cpu.arch) { + .amdgcn => false, + else => true, + }; +} + /// LLVM does not support all relevant intrinsics for all targets, so we /// may need to manually generate a libc call fn intrinsicsAllowed(scalar_ty: Type, target: std.Target) bool { return switch (scalar_ty.tag()) { .f16 => backendSupportsF16(target), .f80 => target.longDoubleIs(f80) and backendSupportsF80(target), - .f128 => target.longDoubleIs(f128), + .f128 => target.longDoubleIs(f128) and backendSupportsF128(target), else => true, }; } @@ -10620,25 +10694,43 @@ fn buildAllocaInner( llvm_func: *llvm.Value, di_scope_non_null: bool, llvm_ty: *llvm.Type, + maybe_alignment: ?c_uint, + target: std.Target, ) *llvm.Value { - const prev_block = builder.getInsertBlock(); - const prev_debug_location = builder.getCurrentDebugLocation2(); - defer { - builder.positionBuilderAtEnd(prev_block); - if (di_scope_non_null) { - builder.setCurrentDebugLocation2(prev_debug_location); + const address_space = llvmAllocaAddressSpace(target); + + const alloca = blk: { + const prev_block = builder.getInsertBlock(); + const prev_debug_location = builder.getCurrentDebugLocation2(); + defer { + builder.positionBuilderAtEnd(prev_block); + if (di_scope_non_null) { + builder.setCurrentDebugLocation2(prev_debug_location); + } } + + const entry_block = llvm_func.getFirstBasicBlock().?; + if (entry_block.getFirstInstruction()) |first_inst| { + builder.positionBuilder(entry_block, first_inst); + } else { + builder.positionBuilderAtEnd(entry_block); + } + builder.clearCurrentDebugLocation(); + + break :blk builder.buildAllocaInAddressSpace(llvm_ty, address_space, ""); + }; + + if (maybe_alignment) |alignment| { + alloca.setAlignment(alignment); } - const entry_block = llvm_func.getFirstBasicBlock().?; - if (entry_block.getFirstInstruction()) |first_inst| { - builder.positionBuilder(entry_block, first_inst); - } else { - builder.positionBuilderAtEnd(entry_block); + // The pointer returned from this function should have the generic address space, + // if this isn't the case then cast it to the generic address space. + if (address_space != llvm.address_space.default) { + return builder.buildAddrSpaceCast(alloca, llvm_ty.pointerType(llvm.address_space.default), ""); } - builder.clearCurrentDebugLocation(); - return builder.buildAlloca(llvm_ty, ""); + return alloca; } fn errUnionPayloadOffset(payload_ty: Type, target: std.Target) u1 { diff --git a/src/codegen/llvm/bindings.zig b/src/codegen/llvm/bindings.zig index 96f4477daa..a5b01d6ddf 100644 --- a/src/codegen/llvm/bindings.zig +++ b/src/codegen/llvm/bindings.zig @@ -171,6 +171,9 @@ pub const Value = opaque { pub const constAdd = LLVMConstAdd; extern fn LLVMConstAdd(LHSConstant: *Value, RHSConstant: *Value) *Value; + pub const constAddrSpaceCast = LLVMConstAddrSpaceCast; + extern fn LLVMConstAddrSpaceCast(ConstantVal: *Value, ToType: *Type) *Value; + pub const setWeak = LLVMSetWeak; extern fn LLVMSetWeak(CmpXchgInst: *Value, IsWeak: Bool) void; @@ -956,6 +959,12 @@ pub const Builder = opaque { pub const setFastMath = ZigLLVMSetFastMath; extern fn ZigLLVMSetFastMath(B: *Builder, on_state: bool) void; + + pub const buildAddrSpaceCast = LLVMBuildAddrSpaceCast; + extern fn LLVMBuildAddrSpaceCast(B: *Builder, Val: *Value, DestTy: *Type, Name: [*:0]const u8) *Value; + + pub const buildAllocaInAddressSpace = ZigLLVMBuildAllocaInAddressSpace; + extern fn ZigLLVMBuildAllocaInAddressSpace(B: *Builder, Ty: *Type, AddressSpace: c_uint, Name: [*:0]const u8) *Value; }; pub const MDString = opaque { diff --git a/src/print_air.zig b/src/print_air.zig index fb6f7e6cf2..d3523c0fc6 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -244,6 +244,7 @@ const Writer = struct { .byte_swap, .bit_reverse, .error_set_has_value, + .addrspace_cast, => try w.writeTyOp(s, inst), .block, diff --git a/src/print_zir.zig b/src/print_zir.zig index 8f055e9ddd..f2a79d53a4 100644 --- a/src/print_zir.zig +++ b/src/print_zir.zig @@ -512,6 +512,7 @@ const Writer = struct { .err_set_cast, .wasm_memory_grow, .prefetch, + .addrspace_cast, => { const inst_data = self.code.extraData(Zir.Inst.BinNode, extended.operand).data; const src = LazySrcLoc.nodeOffset(inst_data.node); diff --git a/src/stage1/all_types.hpp b/src/stage1/all_types.hpp index d4a2abece9..88dac9107f 100644 --- a/src/stage1/all_types.hpp +++ b/src/stage1/all_types.hpp @@ -85,7 +85,8 @@ enum CallingConvention { CallingConventionAAPCSVFP, CallingConventionSysV, CallingConventionWin64, - CallingConventionPtxKernel + CallingConventionPtxKernel, + CallingConventionAmdgpuKernel }; // Stage 1 supports only the generic address space @@ -94,6 +95,11 @@ enum AddressSpace { AddressSpaceGS, AddressSpaceFS, AddressSpaceSS, + AddressSpaceGlobal, + AddressSpaceConstant, + AddressSpaceParam, + AddressSpaceShared, + AddressSpaceLocal }; // This one corresponds to the builtin.zig enum. @@ -1841,6 +1847,7 @@ enum BuiltinFnId { BuiltinFnIdMaximum, BuiltinFnIdMinimum, BuiltinFnIdPrefetch, + BuiltinFnIdAddrSpaceCast, }; struct BuiltinFnEntry { @@ -2672,6 +2679,7 @@ enum Stage1ZirInstId : uint8_t { Stage1ZirInstIdWasmMemoryGrow, Stage1ZirInstIdSrc, Stage1ZirInstIdPrefetch, + Stage1ZirInstIdAddrSpaceCast, }; // ir_render_* functions in codegen.cpp consume Gen instructions and produce LLVM IR. @@ -4168,6 +4176,13 @@ struct Stage1AirInstAlignCast { Stage1AirInst *target; }; +struct Stage1ZirInstAddrSpaceCast { + Stage1ZirInst base; + + Stage1ZirInst *addrspace; + Stage1ZirInst *ptr; +}; + struct Stage1ZirInstSetAlignStack { Stage1ZirInst base; diff --git a/src/stage1/analyze.cpp b/src/stage1/analyze.cpp index 2d0624a7c7..59ca43644a 100644 --- a/src/stage1/analyze.cpp +++ b/src/stage1/analyze.cpp @@ -993,6 +993,7 @@ const char *calling_convention_name(CallingConvention cc) { case CallingConventionSysV: return "SysV"; case CallingConventionWin64: return "Win64"; case CallingConventionPtxKernel: return "PtxKernel"; + case CallingConventionAmdgpuKernel: return "AmdgpuKernel"; } zig_unreachable(); } @@ -1017,6 +1018,7 @@ bool calling_convention_allows_zig_types(CallingConvention cc) { case CallingConventionAAPCSVFP: case CallingConventionSysV: case CallingConventionWin64: + case CallingConventionAmdgpuKernel: return false; } zig_unreachable(); @@ -1028,6 +1030,11 @@ const char *address_space_name(AddressSpace as) { case AddressSpaceGS: return "gs"; case AddressSpaceFS: return "fs"; case AddressSpaceSS: return "ss"; + case AddressSpaceGlobal: return "global"; + case AddressSpaceConstant: return "constant"; + case AddressSpaceParam: return "param"; + case AddressSpaceShared: return "shared"; + case AddressSpaceLocal: return "local"; } zig_unreachable(); } @@ -2019,6 +2026,9 @@ Error emit_error_unless_callconv_allowed_for_target(CodeGen *g, AstNode *source_ allowed_platforms = "nvptx and nvptx64"; } break; + case CallingConventionAmdgpuKernel: + if (g->zig_target->arch != ZigLLVM_amdgcn) + allowed_platforms = "amdgcn and amdpal"; } if (allowed_platforms != nullptr) { @@ -3857,6 +3867,7 @@ static void resolve_decl_fn(CodeGen *g, TldFn *tld_fn) { case CallingConventionSysV: case CallingConventionWin64: case CallingConventionPtxKernel: + case CallingConventionAmdgpuKernel: add_fn_export(g, fn_table_entry, buf_ptr(&fn_table_entry->symbol_name), GlobalLinkageIdStrong, fn_cc); break; @@ -6012,7 +6023,7 @@ Error type_has_bits2(CodeGen *g, ZigType *type_entry, bool *result) { bool fn_returns_c_abi_small_struct(FnTypeId *fn_type_id) { ZigType *type = fn_type_id->return_type; - return !calling_convention_allows_zig_types(fn_type_id->cc) && + return !calling_convention_allows_zig_types(fn_type_id->cc) && type->id == ZigTypeIdStruct && type->abi_size <= 16; } @@ -8700,7 +8711,7 @@ static LLVMTypeRef llvm_int_for_size(size_t size) { static LLVMTypeRef llvm_sse_for_size(size_t size) { if (size > 4) return LLVMDoubleType(); - else + else return LLVMFloatType(); } @@ -8758,7 +8769,7 @@ static Error resolve_llvm_c_abi_type(CodeGen *g, ZigType *ty) { LLVMTypeRef return_elem_types[] = { LLVMVoidType(), - LLVMVoidType(), + LLVMVoidType(), }; for (uint32_t i = 0; i <= eightbyte_index; i += 1) { if (type_classes[i] == X64CABIClass_INTEGER) { diff --git a/src/stage1/astgen.cpp b/src/stage1/astgen.cpp index 9eea2e650e..2d053a9e3c 100644 --- a/src/stage1/astgen.cpp +++ b/src/stage1/astgen.cpp @@ -351,6 +351,8 @@ void destroy_instruction_src(Stage1ZirInst *inst) { return heap::c_allocator.destroy(reinterpret_cast<Stage1ZirInstSrc *>(inst)); case Stage1ZirInstIdPrefetch: return heap::c_allocator.destroy(reinterpret_cast<Stage1ZirInstPrefetch *>(inst)); + case Stage1ZirInstIdAddrSpaceCast: + return heap::c_allocator.destroy(reinterpret_cast<Stage1ZirInstAddrSpaceCast *>(inst)); } zig_unreachable(); } @@ -947,6 +949,10 @@ static constexpr Stage1ZirInstId ir_inst_id(Stage1ZirInstPrefetch *) { return Stage1ZirInstIdPrefetch; } +static constexpr Stage1ZirInstId ir_inst_id(Stage1ZirInstAddrSpaceCast *) { + return Stage1ZirInstIdAddrSpaceCast; +} + template<typename T> static T *ir_create_instruction(Stage1AstGen *ag, Scope *scope, AstNode *source_node) { T *special_instruction = heap::c_allocator.create<T>(); @@ -2572,6 +2578,19 @@ static Stage1ZirInst *ir_build_align_cast_src(Stage1AstGen *ag, Scope *scope, As return &instruction->base; } +static Stage1ZirInst *ir_build_addrspace_cast(Stage1AstGen *ag, Scope *scope, AstNode *source_node, + Stage1ZirInst *addrspace, Stage1ZirInst *ptr) +{ + Stage1ZirInstAddrSpaceCast *instruction = ir_build_instruction<Stage1ZirInstAddrSpaceCast>(ag, scope, source_node); + instruction->addrspace = addrspace; + instruction->ptr = ptr; + + ir_ref_instruction(addrspace, ag->current_basic_block); + ir_ref_instruction(ptr, ag->current_basic_block); + + return &instruction->base; +} + static Stage1ZirInst *ir_build_resolve_result(Stage1AstGen *ag, Scope *scope, AstNode *source_node, ResultLoc *result_loc, Stage1ZirInst *ty) { @@ -5459,6 +5478,21 @@ static Stage1ZirInst *astgen_builtin_fn_call(Stage1AstGen *ag, Scope *scope, Ast Stage1ZirInst *ir_extern = ir_build_prefetch(ag, scope, node, ptr_value, casted_options_value); return ir_lval_wrap(ag, scope, ir_extern, lval, result_loc); } + case BuiltinFnIdAddrSpaceCast: + { + AstNode *arg0_node = node->data.fn_call_expr.params.at(0); + Stage1ZirInst *arg0_value = astgen_node(ag, arg0_node, scope); + if (arg0_value == ag->codegen->invalid_inst_src) + return arg0_value; + + AstNode* arg1_node = node->data.fn_call_expr.params.at(1); + Stage1ZirInst *arg1_value = astgen_node(ag, arg1_node, scope); + if (arg1_value == ag->codegen->invalid_inst_src) + return arg1_value; + + Stage1ZirInst *addrspace_cast = ir_build_addrspace_cast(ag, scope, node, arg0_value, arg1_value); + return ir_lval_wrap(ag, scope, addrspace_cast, lval, result_loc); + } } zig_unreachable(); } diff --git a/src/stage1/codegen.cpp b/src/stage1/codegen.cpp index e5c694967c..039b088c1e 100644 --- a/src/stage1/codegen.cpp +++ b/src/stage1/codegen.cpp @@ -217,6 +217,9 @@ static ZigLLVM_CallingConv get_llvm_cc(CodeGen *g, CallingConvention cc) { assert(g->zig_target->arch == ZigLLVM_nvptx || g->zig_target->arch == ZigLLVM_nvptx64); return ZigLLVM_PTX_Kernel; + case CallingConventionAmdgpuKernel: + assert(g->zig_target->arch == ZigLLVM_amdgcn); + return ZigLLVM_AMDGPU_KERNEL; } zig_unreachable(); @@ -365,6 +368,7 @@ static bool cc_want_sret_attr(CallingConvention cc) { case CallingConventionSysV: case CallingConventionWin64: case CallingConventionPtxKernel: + case CallingConventionAmdgpuKernel: return true; case CallingConventionAsync: case CallingConventionUnspecified: @@ -3515,7 +3519,7 @@ static LLVMValueRef gen_soft_float_to_int_op(CodeGen *g, LLVMValueRef value_ref, // Handle integers of non-pot bitsize by shortening them on the output if (result_type != wider_type) { - result = gen_widen_or_shorten(g, false, wider_type, result_type, result); + result = gen_widen_or_shorten(g, false, wider_type, result_type, result); } return result; @@ -4370,7 +4374,7 @@ static LLVMValueRef ir_render_binary_not(CodeGen *g, Stage1Air *executable, static LLVMValueRef gen_soft_float_neg(CodeGen *g, ZigType *operand_type, LLVMValueRef operand) { uint32_t vector_len = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.len : 0; - uint16_t num_bits = operand_type->id == ZigTypeIdVector ? + uint16_t num_bits = operand_type->id == ZigTypeIdVector ? operand_type->data.vector.elem_type->data.floating.bit_count : operand_type->data.floating.bit_count; @@ -10181,6 +10185,7 @@ static void define_builtin_fns(CodeGen *g) { create_builtin_fn(g, BuiltinFnIdMaximum, "maximum", 2); create_builtin_fn(g, BuiltinFnIdMinimum, "minimum", 2); create_builtin_fn(g, BuiltinFnIdPrefetch, "prefetch", 2); + create_builtin_fn(g, BuiltinFnIdAddrSpaceCast, "addrSpaceCast", 2); } static const char *bool_to_str(bool b) { diff --git a/src/stage1/ir.cpp b/src/stage1/ir.cpp index a5428945a9..9627384fbc 100644 --- a/src/stage1/ir.cpp +++ b/src/stage1/ir.cpp @@ -11753,6 +11753,7 @@ static Stage1AirInst *ir_analyze_instruction_export(IrAnalyze *ira, Stage1ZirIns case CallingConventionSysV: case CallingConventionWin64: case CallingConventionPtxKernel: + case CallingConventionAmdgpuKernel: add_fn_export(ira->codegen, fn_entry, buf_ptr(symbol_name), global_linkage_id, cc); fn_entry->section_name = section_name; break; @@ -23745,6 +23746,50 @@ static Stage1AirInst *ir_analyze_instruction_align_cast(IrAnalyze *ira, Stage1Zi return result; } +static bool ir_resolve_addrspace(IrAnalyze *ira, Stage1AirInst *value, AddressSpace *out) { + if (type_is_invalid(value->value->type)) + return false; + + ZigType *addrspace_type = get_builtin_type(ira->codegen, "AddressSpace"); + + Stage1AirInst *casted_value = ir_implicit_cast(ira, value, addrspace_type); + if (type_is_invalid(casted_value->value->type)) + return false; + + ZigValue *const_val = ir_resolve_const(ira, casted_value, UndefBad); + if (!const_val) + return false; + + *out = (AddressSpace)bigint_as_u32(&const_val->data.x_enum_tag); + return true; +} + +static Stage1AirInst *ir_analyze_instruction_addrspace_cast(IrAnalyze *ira, Stage1ZirInstAddrSpaceCast *instruction) { + Stage1AirInst *ptr_inst = instruction->ptr->child; + ZigType *ptr_type = ptr_inst->value->type; + if (type_is_invalid(ptr_type)) + return ira->codegen->invalid_inst_gen; + + AddressSpace addrspace; + if (!ir_resolve_addrspace(ira, instruction->addrspace->child, &addrspace)) + return ira->codegen->invalid_inst_gen; + + if (addrspace != AddressSpaceGeneric) { + ir_add_error_node(ira, instruction->addrspace->source_node, buf_sprintf( + "address space '%s' not available in stage 1 compiler, must be .generic", + address_space_name(addrspace))); + return ira->codegen->invalid_inst_gen; + } + + if (is_slice(ptr_type) || get_src_ptr_type(ptr_type) != nullptr) { + ir_add_error_node(ira, instruction->ptr->source_node, + buf_sprintf("expected pointer or slice, found '%s'", buf_ptr(&ptr_type->name))); + return ira->codegen->invalid_inst_gen; + } + + return ptr_inst; +} + static Stage1AirInst *ir_analyze_instruction_set_align_stack(IrAnalyze *ira, Stage1ZirInstSetAlignStack *instruction) { uint32_t align_bytes; Stage1AirInst *align_bytes_inst = instruction->align_bytes->child; @@ -25450,6 +25495,8 @@ static Stage1AirInst *ir_analyze_instruction_base(IrAnalyze *ira, Stage1ZirInst return ir_analyze_instruction_src(ira, (Stage1ZirInstSrc *)instruction); case Stage1ZirInstIdPrefetch: return ir_analyze_instruction_prefetch(ira, (Stage1ZirInstPrefetch *)instruction); + case Stage1ZirInstIdAddrSpaceCast: + return ir_analyze_instruction_addrspace_cast(ira, (Stage1ZirInstAddrSpaceCast *)instruction); } zig_unreachable(); } @@ -25831,6 +25878,7 @@ bool ir_inst_src_has_side_effects(Stage1ZirInst *instruction) { case Stage1ZirInstIdWasmMemorySize: case Stage1ZirInstIdSrc: case Stage1ZirInstIdReduce: + case Stage1ZirInstIdAddrSpaceCast: return false; case Stage1ZirInstIdAsm: diff --git a/src/stage1/ir_print.cpp b/src/stage1/ir_print.cpp index 9296242a3e..366e48004c 100644 --- a/src/stage1/ir_print.cpp +++ b/src/stage1/ir_print.cpp @@ -373,6 +373,8 @@ const char* ir_inst_src_type_str(Stage1ZirInstId id) { return "SrcSrc"; case Stage1ZirInstIdPrefetch: return "SrcPrefetch"; + case Stage1ZirInstIdAddrSpaceCast: + return "SrcAddrSpaceCast"; } zig_unreachable(); } @@ -2382,6 +2384,14 @@ static void ir_print_align_cast(IrPrintSrc *irp, Stage1ZirInstAlignCast *instruc fprintf(irp->f, ")"); } +static void ir_print_addrspace_cast(IrPrintSrc *irp, Stage1ZirInstAddrSpaceCast *instruction) { + fprintf(irp->f, "@addrSpaceCast("); + ir_print_other_inst_src(irp, instruction->addrspace); + fprintf(irp->f, ","); + ir_print_other_inst_src(irp, instruction->ptr); + fprintf(irp->f, ")"); +} + static void ir_print_align_cast(IrPrintGen *irp, Stage1AirInstAlignCast *instruction) { fprintf(irp->f, "@alignCast("); ir_print_other_inst_gen(irp, instruction->target); @@ -3127,6 +3137,9 @@ static void ir_print_inst_src(IrPrintSrc *irp, Stage1ZirInst *instruction, bool case Stage1ZirInstIdPrefetch: ir_print_prefetch(irp, (Stage1ZirInstPrefetch *)instruction); break; + case Stage1ZirInstIdAddrSpaceCast: + ir_print_addrspace_cast(irp, (Stage1ZirInstAddrSpaceCast *)instruction); + break; } fprintf(irp->f, "\n"); } diff --git a/src/target.zig b/src/target.zig index b7da04e548..3fbaf6abc4 100644 --- a/src/target.zig +++ b/src/target.zig @@ -1,5 +1,6 @@ const std = @import("std"); const Type = @import("type.zig").Type; +const AddressSpace = std.builtin.AddressSpace; pub const ArchOsAbi = struct { arch: std.Target.Cpu.Arch, @@ -635,12 +636,30 @@ pub fn defaultAddressSpace( /// Query the default address space for functions themselves. function, }, -) std.builtin.AddressSpace { +) AddressSpace { _ = target; _ = context; return .generic; } +/// Returns true if pointers in `from` can be converted to a pointer in `to`. +pub fn addrSpaceCastIsValid( + target: std.Target, + from: AddressSpace, + to: AddressSpace, +) bool { + const arch = target.cpu.arch; + switch (arch) { + .x86_64, .i386 => return arch.supportsAddressSpace(from) and arch.supportsAddressSpace(to), + .amdgcn => { + const to_generic = arch.supportsAddressSpace(from) and to == .generic; + const from_generic = arch.supportsAddressSpace(to) and from == .generic; + return to_generic or from_generic; + }, + else => return from == .generic and to == .generic, + } +} + pub fn llvmMachineAbi(target: std.Target) ?[:0]const u8 { const have_float = switch (target.abi) { .gnuilp32 => return "ilp32", diff --git a/src/type.zig b/src/type.zig index bd0718481a..3b46546df0 100644 --- a/src/type.zig +++ b/src/type.zig @@ -2786,6 +2786,12 @@ pub const Type = extern union { .pointer => self.castTag(.pointer).?.data.@"addrspace", + .optional => { + var buf: Payload.ElemType = undefined; + const child_type = self.optionalChild(&buf); + return child_type.ptrAddressSpace(); + }, + else => unreachable, }; } @@ -6768,6 +6774,13 @@ pub const CType = enum { }, }, + .amdhsa, .amdpal => switch (self) { + .short, .ushort => return 16, + .int, .uint => return 32, + .long, .ulong, .longlong, .ulonglong => return 64, + .longdouble => return 128, + }, + .cloudabi, .kfreebsd, .lv2, @@ -6777,13 +6790,11 @@ pub const CType = enum { .aix, .cuda, .nvcl, - .amdhsa, .ps4, .ps5, .elfiamcu, .mesa3d, .contiki, - .amdpal, .hermit, .hurd, .opencl, diff --git a/src/zig_llvm.cpp b/src/zig_llvm.cpp index 4f9cd76c6a..b5edb336a5 100644 --- a/src/zig_llvm.cpp +++ b/src/zig_llvm.cpp @@ -512,22 +512,22 @@ LLVMValueRef ZigLLVMBuildUSubSat(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRe LLVMValueRef ZigLLVMBuildSMulFixSat(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *name) { llvm::Type* types[1] = { - unwrap(LHS)->getType(), + unwrap(LHS)->getType(), }; // pass scale = 0 as third argument llvm::Value* values[3] = {unwrap(LHS), unwrap(RHS), unwrap(B)->getInt32(0)}; - + CallInst *call_inst = unwrap(B)->CreateIntrinsic(Intrinsic::smul_fix_sat, types, values, nullptr, name); return wrap(call_inst); } LLVMValueRef ZigLLVMBuildUMulFixSat(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *name) { llvm::Type* types[1] = { - unwrap(LHS)->getType(), + unwrap(LHS)->getType(), }; // pass scale = 0 as third argument llvm::Value* values[3] = {unwrap(LHS), unwrap(RHS), unwrap(B)->getInt32(0)}; - + CallInst *call_inst = unwrap(B)->CreateIntrinsic(Intrinsic::umul_fix_sat, types, values, nullptr, name); return wrap(call_inst); } @@ -808,7 +808,7 @@ void ZigLLVMSetCurrentDebugLocation2(LLVMBuilderRef builder, unsigned int line, unsigned int column, ZigLLVMDIScope *scope, ZigLLVMDILocation *inlined_at) { DIScope* di_scope = reinterpret_cast<DIScope*>(scope); - DebugLoc debug_loc = DILocation::get(di_scope->getContext(), line, column, di_scope, + DebugLoc debug_loc = DILocation::get(di_scope->getContext(), line, column, di_scope, reinterpret_cast<DILocation *>(inlined_at), false); unwrap(builder)->SetCurrentDebugLocation(debug_loc); } @@ -1177,9 +1177,14 @@ LLVMValueRef ZigLLVMBuildAShrExact(LLVMBuilderRef builder, LLVMValueRef LHS, LLV return wrap(unwrap(builder)->CreateAShr(unwrap(LHS), unwrap(RHS), name, true)); } +LLVMValueRef ZigLLVMBuildAllocaInAddressSpace(LLVMBuilderRef builder, LLVMTypeRef Ty, + unsigned AddressSpace, const char *Name) { + return wrap(unwrap(builder)->CreateAlloca(unwrap(Ty), AddressSpace, nullptr, Name)); +} + void ZigLLVMSetTailCall(LLVMValueRef Call) { unwrap<CallInst>(Call)->setTailCallKind(CallInst::TCK_MustTail); -} +} void ZigLLVMSetCallSret(LLVMValueRef Call, LLVMTypeRef return_type) { CallInst *call_inst = unwrap<CallInst>(Call); diff --git a/src/zig_llvm.h b/src/zig_llvm.h index 7fdddda6a4..1a4d5481b6 100644 --- a/src/zig_llvm.h +++ b/src/zig_llvm.h @@ -162,6 +162,8 @@ ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildLShrExact(LLVMBuilderRef builder, LLVMValu ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildAShrExact(LLVMBuilderRef builder, LLVMValueRef LHS, LLVMValueRef RHS, const char *name); +ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildAllocaInAddressSpace(LLVMBuilderRef builder, LLVMTypeRef Ty, unsigned AddressSpace, + const char *Name); ZIG_EXTERN_C struct ZigLLVMDIType *ZigLLVMCreateDebugPointerType(struct ZigLLVMDIBuilder *dibuilder, struct ZigLLVMDIType *pointee_type, uint64_t size_in_bits, uint64_t align_in_bits, const char *name); |
