From 6b69dcfdd28145791da43979474fec29a13e24d0 Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Sat, 6 Aug 2022 18:22:56 +0200 Subject: amdgpu: add AmdgpuKernel calling convention --- src/codegen/llvm.zig | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/codegen') diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 7038606611..7f411c8373 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -4425,7 +4425,7 @@ pub const FuncGen = struct { .cmp_lt => try self.airCmp(inst, .lt, false), .cmp_lte => try self.airCmp(inst, .lte, false), .cmp_neq => try self.airCmp(inst, .neq, false), - + .cmp_eq_optimized => try self.airCmp(inst, .eq, true), .cmp_gt_optimized => try self.airCmp(inst, .gt, true), .cmp_gte_optimized => try self.airCmp(inst, .gte, true), @@ -9807,6 +9807,10 @@ fn toLlvmCallConv(cc: std.builtin.CallingConvention, target: std.Target) llvm.Ca .nvptx, .nvptx64 => .PTX_Kernel, else => unreachable, }, + .AmdgpuKernel => return switch (target.cpu.arch) { + .amdgcn => .AMDGPU_KERNEL, + else => unreachable, + }, }; } -- cgit v1.2.3 From 3392de87dcf516ed2e3fd1904372195bdc75c0eb Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Sat, 20 Aug 2022 12:42:27 +0200 Subject: allow global/local/shared address spaces on amdgcn --- src/Sema.zig | 6 ++++-- src/codegen/llvm.zig | 8 ++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) (limited to 'src/codegen') diff --git a/src/Sema.zig b/src/Sema.zig index 505764da8a..b357beafdf 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -30303,13 +30303,15 @@ pub fn analyzeAddrspace( const address_space = addrspace_tv.val.toEnum(std.builtin.AddressSpace); const target = sema.mod.getTarget(); const arch = target.cpu.arch; - const is_gpu = arch == .nvptx or arch == .nvptx64; + const is_nv = arch == .nvptx or arch == .nvptx64; + const is_gpu = is_nv or arch == .amdgcn; const supported = switch (address_space) { .generic => true, .gs, .fs, .ss => (arch == .i386 or arch == .x86_64) and ctx == .pointer, // TODO: check that .shared and .local are left uninitialized - .global, .param, .shared, .local => is_gpu, + .param => is_nv, + .global, .shared, .local => is_gpu, .constant => is_gpu and (ctx == .constant), }; diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 7f411c8373..d0433be2b9 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -2659,6 +2659,14 @@ pub const DeclGen = struct { .local => llvm.address_space.nvptx.local, else => unreachable, }, + .amdgcn => switch (address_space) { + .generic => llvm.address_space.flat, + .global => llvm.address_space.amdgpu.global, + .constant => llvm.address_space.amdgpu.constant, + .shared => llvm.address_space.amdgpu.local, + .local => llvm.address_space.amdgpu.private, + else => unreachable, + }. else => switch (address_space) { .generic => llvm.address_space.default, else => unreachable, -- cgit v1.2.3 From 5f3b91437f5cd23bcae66227932555b7abe32669 Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Sun, 21 Aug 2022 01:32:19 +0200 Subject: stage2: improve addrspace handling This commit changes the way Zig is intended to deal with variable declaration for exotic targets. Where previously the idea was to enfore local/global variables to be placed into their respective address spaces, depending on the target, this is now fixed to the generic address space. To facilitate this for targets where local variables _must_ be generated into a specific address space (ex. amdgcn where locals must be generated into the private address space), the variable allocations (alloca) are generated into the right address space and then addrspace-casted back to the generic address space. While this could be less efficient in theory, LLVM will hopefull deal with figuring out the actual correct address space for a pointer for us. HIP seems to do the same thing in this regard. Global variables are handled in a similar way. --- src/codegen/llvm.zig | 307 +++++++++++++++++++++++++----------------- src/codegen/llvm/bindings.zig | 9 ++ src/zig_llvm.cpp | 17 ++- src/zig_llvm.h | 2 + 4 files changed, 203 insertions(+), 132 deletions(-) (limited to 'src/codegen') diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index d0433be2b9..f400e841ea 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -929,8 +929,7 @@ pub const Object = struct { if (isByRef(param_ty)) { const alignment = param_ty.abiAlignment(target); const param_llvm_ty = param.typeOf(); - const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty); - arg_ptr.setAlignment(alignment); + const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, alignment, target); const store_inst = builder.buildStore(param, arg_ptr); store_inst.setAlignment(alignment); args.appendAssumeCapacity(arg_ptr); @@ -974,8 +973,7 @@ pub const Object = struct { param_ty.abiAlignment(target), dg.object.target_data.abiAlignmentOfType(int_llvm_ty), ); - const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty); - arg_ptr.setAlignment(alignment); + const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, alignment, target); const casted_ptr = builder.buildBitCast(arg_ptr, int_ptr_llvm_ty, ""); const store_inst = builder.buildStore(param, casted_ptr); store_inst.setAlignment(alignment); @@ -1026,8 +1024,7 @@ pub const Object = struct { const param_ty = fn_info.param_types[it.zig_index - 1]; const param_llvm_ty = try dg.lowerType(param_ty); const param_alignment = param_ty.abiAlignment(target); - const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty); - arg_ptr.setAlignment(param_alignment); + const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, param_alignment, target); var field_types_buf: [8]*llvm.Type = undefined; const field_types = field_types_buf[0..llvm_ints.len]; for (llvm_ints) |int_bits, i| { @@ -1058,8 +1055,7 @@ pub const Object = struct { const param_ty = fn_info.param_types[it.zig_index - 1]; const param_llvm_ty = try dg.lowerType(param_ty); const param_alignment = param_ty.abiAlignment(target); - const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty); - arg_ptr.setAlignment(param_alignment); + const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, param_alignment, target); var field_types_buf: [8]*llvm.Type = undefined; const field_types = field_types_buf[0..llvm_floats.len]; for (llvm_floats) |float_bits, i| { @@ -1103,8 +1099,7 @@ pub const Object = struct { llvm_arg_i += 1; const alignment = param_ty.abiAlignment(target); - const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty); - arg_ptr.setAlignment(alignment); + const arg_ptr = buildAllocaInner(builder, llvm_func, false, param_llvm_ty, alignment, target); const casted_ptr = builder.buildBitCast(arg_ptr, param.typeOf().pointerType(0), ""); _ = builder.buildStore(param, casted_ptr); @@ -2404,21 +2399,27 @@ pub const DeclGen = struct { // mismatch, because we don't have the LLVM type until the *value* is created, // whereas the global needs to be created based on the type alone, because // lowering the value may reference the global as a pointer. + const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); + const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_addrspace, target); const new_global = dg.object.llvm_module.addGlobalInAddressSpace( llvm_init.typeOf(), "", - dg.llvmAddressSpace(decl.@"addrspace"), + llvm_global_addrspace, ); new_global.setLinkage(global.getLinkage()); new_global.setUnnamedAddr(global.getUnnamedAddress()); new_global.setAlignment(global.getAlignment()); if (decl.@"linksection") |section| new_global.setSection(section); new_global.setInitializer(llvm_init); - // replaceAllUsesWith requires the type to be unchanged. So we bitcast + // replaceAllUsesWith requires the type to be unchanged. So we convert // the new global to the old type and use that as the thing to replace // old uses. - const new_global_ptr = new_global.constBitCast(global.typeOf()); - global.replaceAllUsesWith(new_global_ptr); + const new_global_ptr = if (llvm_addrspace != llvm_global_addrspace) + new_global.constAddrSpaceCast(llvm_init.typeOf().pointerType(llvm_addrspace)) + else + new_global; + const new_global_casted_ptr = new_global_ptr.constBitCast(global.typeOf()); + global.replaceAllUsesWith(new_global_casted_ptr); dg.object.decl_map.putAssumeCapacity(decl_index, new_global); new_global.takeName(global); global.deleteGlobal(); @@ -2465,7 +2466,7 @@ pub const DeclGen = struct { const fqn = try decl.getFullyQualifiedName(dg.module); defer dg.gpa.free(fqn); - const llvm_addrspace = dg.llvmAddressSpace(decl.@"addrspace"); + const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); const llvm_fn = dg.llvmModule().addFunctionInAddressSpace(fqn, fn_type, llvm_addrspace); gop.value_ptr.* = llvm_fn; @@ -2613,9 +2614,15 @@ pub const DeclGen = struct { const fqn = try decl.getFullyQualifiedName(dg.module); defer dg.gpa.free(fqn); + const target = dg.module.getTarget(); + const llvm_type = try dg.lowerType(decl.ty); - const llvm_addrspace = dg.llvmAddressSpace(decl.@"addrspace"); - const llvm_global = dg.object.llvm_module.addGlobalInAddressSpace(llvm_type, fqn, llvm_addrspace); + const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); + const llvm_global = dg.object.llvm_module.addGlobalInAddressSpace( + llvm_type, + fqn, + toLlvmGlobalAddressSpace(llvm_addrspace, target), + ); gop.value_ptr.* = llvm_global; // This is needed for declarations created by `@extern`. @@ -2640,40 +2647,6 @@ pub const DeclGen = struct { return llvm_global; } - fn llvmAddressSpace(self: DeclGen, address_space: std.builtin.AddressSpace) c_uint { - const target = self.module.getTarget(); - return switch (target.cpu.arch) { - .i386, .x86_64 => switch (address_space) { - .generic => llvm.address_space.default, - .gs => llvm.address_space.x86.gs, - .fs => llvm.address_space.x86.fs, - .ss => llvm.address_space.x86.ss, - else => unreachable, - }, - .nvptx, .nvptx64 => switch (address_space) { - .generic => llvm.address_space.default, - .global => llvm.address_space.nvptx.global, - .constant => llvm.address_space.nvptx.constant, - .param => llvm.address_space.nvptx.param, - .shared => llvm.address_space.nvptx.shared, - .local => llvm.address_space.nvptx.local, - else => unreachable, - }, - .amdgcn => switch (address_space) { - .generic => llvm.address_space.flat, - .global => llvm.address_space.amdgpu.global, - .constant => llvm.address_space.amdgpu.constant, - .shared => llvm.address_space.amdgpu.local, - .local => llvm.address_space.amdgpu.private, - else => unreachable, - }. - else => switch (address_space) { - .generic => llvm.address_space.default, - else => unreachable, - }, - }; - } - fn isUnnamedType(dg: *DeclGen, ty: Type, val: *llvm.Value) bool { // Once `lowerType` succeeds, successive calls to it with the same Zig type // are guaranteed to succeed. So if a call to `lowerType` fails here it means @@ -2739,7 +2712,7 @@ pub const DeclGen = struct { return dg.context.structType(&fields, fields.len, .False); } const ptr_info = t.ptrInfo().data; - const llvm_addrspace = dg.llvmAddressSpace(ptr_info.@"addrspace"); + const llvm_addrspace = toLlvmAddressSpace(ptr_info.@"addrspace", target); if (ptr_info.host_size != 0) { return dg.context.intType(ptr_info.host_size * 8).pointerType(llvm_addrspace); } @@ -3268,11 +3241,18 @@ pub const DeclGen = struct { const decl_index = tv.val.castTag(.variable).?.data.owner_decl; const decl = dg.module.declPtr(decl_index); dg.module.markDeclAlive(decl); - const val = try dg.resolveGlobalDecl(decl_index); const llvm_var_type = try dg.lowerType(tv.ty); - const llvm_addrspace = dg.llvmAddressSpace(decl.@"addrspace"); - const llvm_type = llvm_var_type.pointerType(llvm_addrspace); - return val.constBitCast(llvm_type); + const llvm_var_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); + const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_var_addrspace, target); + const llvm_var_ptr_type = llvm_var_type.pointerType(llvm_global_addrspace); + + const val = try dg.resolveGlobalDecl(decl_index); + const val_ptr = val.constBitCast(llvm_var_ptr_type); + if (llvm_global_addrspace != llvm_var_addrspace) { + const llvm_ptr_type = llvm_var_type.pointerType(llvm_var_addrspace); + return val_ptr.constAddrSpaceCast(llvm_ptr_type); + } + return val_ptr; }, .slice => { const slice = tv.val.castTag(.slice).?.data; @@ -4069,11 +4049,20 @@ pub const DeclGen = struct { self.module.markDeclAlive(decl); - const llvm_val = if (is_fn_body) + const llvm_decl_val = if (is_fn_body) try self.resolveLlvmFunction(decl_index) else try self.resolveGlobalDecl(decl_index); + const target = self.module.getTarget(); + const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); + const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_addrspace, target); + const llvm_val = if (llvm_addrspace != llvm_global_addrspace) blk: { + const llvm_decl_ty = try self.lowerType(decl.ty); + const llvm_decl_ptr_ty = llvm_decl_ty.pointerType(llvm_addrspace); + break :blk llvm_decl_val.constAddrSpaceCast(llvm_decl_ptr_ty); + } else llvm_decl_val; + const llvm_type = try self.lowerType(tv.ty); if (tv.ty.zigTypeTag() == .Int) { return llvm_val.constPtrToInt(llvm_type); @@ -4339,7 +4328,9 @@ pub const FuncGen = struct { // We have an LLVM value but we need to create a global constant and // set the value as its initializer, and then return a pointer to the global. const target = self.dg.module.getTarget(); - const global = self.dg.object.llvm_module.addGlobal(llvm_val.typeOf(), ""); + const llvm_addrspace = toLlvmAddressSpace(.generic, target); + const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_addrspace, target); + const global = self.dg.object.llvm_module.addGlobalInAddressSpace(llvm_val.typeOf(), "", llvm_global_addrspace); global.setInitializer(llvm_val); global.setLinkage(.Private); global.setGlobalConstant(.True); @@ -4349,8 +4340,10 @@ pub const FuncGen = struct { // the type of global constants might not match the type it is supposed to // be, and so we must bitcast the pointer at the usage sites. const wanted_llvm_ty = try self.dg.lowerType(ty); - const wanted_llvm_ptr_ty = wanted_llvm_ty.pointerType(0); - const casted_ptr = global.constBitCast(wanted_llvm_ptr_ty); + const wanted_bitcasted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_global_addrspace); + const bitcasted_ptr = global.constBitCast(wanted_bitcasted_llvm_ptr_ty); + const wanted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_addrspace); + const casted_ptr = bitcasted_ptr.constAddrSpaceCast(wanted_llvm_ptr_ty); gop.value_ptr.* = casted_ptr; return casted_ptr; } @@ -4606,8 +4599,7 @@ pub const FuncGen = struct { const ret_ptr = if (!sret) null else blk: { const llvm_ret_ty = try self.dg.lowerType(return_type); - const ret_ptr = self.buildAlloca(llvm_ret_ty); - ret_ptr.setAlignment(return_type.abiAlignment(target)); + const ret_ptr = self.buildAlloca(llvm_ret_ty, return_type.abiAlignment(target)); try llvm_args.append(ret_ptr); break :blk ret_ptr; }; @@ -4654,8 +4646,7 @@ pub const FuncGen = struct { } else { const alignment = param_ty.abiAlignment(target); const param_llvm_ty = llvm_arg.typeOf(); - const arg_ptr = self.buildAlloca(param_llvm_ty); - arg_ptr.setAlignment(alignment); + const arg_ptr = self.buildAlloca(param_llvm_ty, alignment); const store_inst = self.builder.buildStore(llvm_arg, arg_ptr); store_inst.setAlignment(alignment); try llvm_args.append(arg_ptr); @@ -4682,8 +4673,7 @@ pub const FuncGen = struct { param_ty.abiAlignment(target), self.dg.object.target_data.abiAlignmentOfType(int_llvm_ty), ); - const int_ptr = self.buildAlloca(int_llvm_ty); - int_ptr.setAlignment(alignment); + const int_ptr = self.buildAlloca(int_llvm_ty, alignment); const param_llvm_ty = try self.dg.lowerType(param_ty); const casted_ptr = self.builder.buildBitCast(int_ptr, param_llvm_ty.pointerType(0), ""); const store_inst = self.builder.buildStore(llvm_arg, casted_ptr); @@ -4709,7 +4699,7 @@ pub const FuncGen = struct { const llvm_arg = try self.resolveInst(arg); const is_by_ref = isByRef(param_ty); const arg_ptr = if (is_by_ref) llvm_arg else p: { - const p = self.buildAlloca(llvm_arg.typeOf()); + const p = self.buildAlloca(llvm_arg.typeOf(), null); const store_inst = self.builder.buildStore(llvm_arg, p); store_inst.setAlignment(param_ty.abiAlignment(target)); break :p p; @@ -4738,7 +4728,7 @@ pub const FuncGen = struct { const llvm_arg = try self.resolveInst(arg); const is_by_ref = isByRef(param_ty); const arg_ptr = if (is_by_ref) llvm_arg else p: { - const p = self.buildAlloca(llvm_arg.typeOf()); + const p = self.buildAlloca(llvm_arg.typeOf(), null); const store_inst = self.builder.buildStore(llvm_arg, p); store_inst.setAlignment(param_ty.abiAlignment(target)); break :p p; @@ -4775,7 +4765,7 @@ pub const FuncGen = struct { const arg_ty = self.air.typeOf(arg); var llvm_arg = try self.resolveInst(arg); if (!isByRef(arg_ty)) { - const p = self.buildAlloca(llvm_arg.typeOf()); + const p = self.buildAlloca(llvm_arg.typeOf(), null); const store_inst = self.builder.buildStore(llvm_arg, p); store_inst.setAlignment(arg_ty.abiAlignment(target)); llvm_arg = store_inst; @@ -4832,9 +4822,8 @@ pub const FuncGen = struct { // In this case the function return type is honoring the calling convention by having // a different LLVM type than the usual one. We solve this here at the callsite // by bitcasting a pointer to our canonical type, then loading it if necessary. - const rp = self.buildAlloca(llvm_ret_ty); const alignment = return_type.abiAlignment(target); - rp.setAlignment(alignment); + const rp = self.buildAlloca(llvm_ret_ty, alignment); const ptr_abi_ty = abi_ret_ty.pointerType(0); const casted_ptr = self.builder.buildBitCast(rp, ptr_abi_ty, ""); const store_inst = self.builder.buildStore(call, casted_ptr); @@ -4851,9 +4840,8 @@ pub const FuncGen = struct { if (isByRef(return_type)) { // our by-ref status disagrees with sret so we must allocate, store, // and return the allocation pointer. - const rp = self.buildAlloca(llvm_ret_ty); const alignment = return_type.abiAlignment(target); - rp.setAlignment(alignment); + const rp = self.buildAlloca(llvm_ret_ty, alignment); const store_inst = self.builder.buildStore(call, rp); store_inst.setAlignment(alignment); return rp; @@ -4912,8 +4900,7 @@ pub const FuncGen = struct { return null; } - const rp = self.buildAlloca(llvm_ret_ty); - rp.setAlignment(alignment); + const rp = self.buildAlloca(llvm_ret_ty, alignment); const store_inst = self.builder.buildStore(operand, rp); store_inst.setAlignment(alignment); const casted_ptr = self.builder.buildBitCast(rp, ptr_abi_ty, ""); @@ -6031,8 +6018,7 @@ pub const FuncGen = struct { llvm_param_types[llvm_param_i] = arg_llvm_value.typeOf(); } else { const alignment = arg_ty.abiAlignment(target); - const arg_ptr = self.buildAlloca(arg_llvm_value.typeOf()); - arg_ptr.setAlignment(alignment); + const arg_ptr = self.buildAlloca(arg_llvm_value.typeOf(), alignment); const store_inst = self.builder.buildStore(arg_llvm_value, arg_ptr); store_inst.setAlignment(alignment); llvm_param_values[llvm_param_i] = arg_ptr; @@ -6533,8 +6519,7 @@ pub const FuncGen = struct { const llvm_optional_ty = try self.dg.lowerType(optional_ty); if (isByRef(optional_ty)) { const target = self.dg.module.getTarget(); - const optional_ptr = self.buildAlloca(llvm_optional_ty); - optional_ptr.setAlignment(optional_ty.abiAlignment(target)); + const optional_ptr = self.buildAlloca(llvm_optional_ty, optional_ty.abiAlignment(target)); const payload_ptr = self.builder.buildStructGEP(llvm_optional_ty, optional_ptr, 0, ""); var ptr_ty_payload: Type.Payload.ElemType = .{ .base = .{ .tag = .single_mut_pointer }, @@ -6567,8 +6552,7 @@ pub const FuncGen = struct { const payload_offset = errUnionPayloadOffset(payload_ty, target); const error_offset = errUnionErrorOffset(payload_ty, target); if (isByRef(err_un_ty)) { - const result_ptr = self.buildAlloca(err_un_llvm_ty); - result_ptr.setAlignment(err_un_ty.abiAlignment(target)); + const result_ptr = self.buildAlloca(err_un_llvm_ty, err_un_ty.abiAlignment(target)); const err_ptr = self.builder.buildStructGEP(err_un_llvm_ty, result_ptr, error_offset, ""); const store_inst = self.builder.buildStore(ok_err_code, err_ptr); store_inst.setAlignment(Type.anyerror.abiAlignment(target)); @@ -6602,8 +6586,7 @@ pub const FuncGen = struct { const payload_offset = errUnionPayloadOffset(payload_ty, target); const error_offset = errUnionErrorOffset(payload_ty, target); if (isByRef(err_un_ty)) { - const result_ptr = self.buildAlloca(err_un_llvm_ty); - result_ptr.setAlignment(err_un_ty.abiAlignment(target)); + const result_ptr = self.buildAlloca(err_un_llvm_ty, err_un_ty.abiAlignment(target)); const err_ptr = self.builder.buildStructGEP(err_un_llvm_ty, result_ptr, error_offset, ""); const store_inst = self.builder.buildStore(operand, err_ptr); store_inst.setAlignment(Type.anyerror.abiAlignment(target)); @@ -7021,9 +7004,8 @@ pub const FuncGen = struct { if (isByRef(dest_ty)) { const target = self.dg.module.getTarget(); - const alloca_inst = self.buildAlloca(llvm_dest_ty); const result_alignment = dest_ty.abiAlignment(target); - alloca_inst.setAlignment(result_alignment); + const alloca_inst = self.buildAlloca(llvm_dest_ty, result_alignment); { const field_ptr = self.builder.buildStructGEP(llvm_dest_ty, alloca_inst, result_index, ""); const store_inst = self.builder.buildStore(result, field_ptr); @@ -7373,9 +7355,8 @@ pub const FuncGen = struct { if (isByRef(dest_ty)) { const target = self.dg.module.getTarget(); - const alloca_inst = self.buildAlloca(llvm_dest_ty); const result_alignment = dest_ty.abiAlignment(target); - alloca_inst.setAlignment(result_alignment); + const alloca_inst = self.buildAlloca(llvm_dest_ty, result_alignment); { const field_ptr = self.builder.buildStructGEP(llvm_dest_ty, alloca_inst, result_index, ""); const store_inst = self.builder.buildStore(result, field_ptr); @@ -7653,7 +7634,7 @@ pub const FuncGen = struct { if (!result_is_ref) { return self.dg.todo("implement bitcast vector to non-ref array", .{}); } - const array_ptr = self.buildAlloca(llvm_dest_ty); + const array_ptr = self.buildAlloca(llvm_dest_ty, null); const bitcast_ok = elem_ty.bitSize(target) == elem_ty.abiSize(target) * 8; if (bitcast_ok) { const llvm_vector_ty = try self.dg.lowerType(operand_ty); @@ -7729,8 +7710,7 @@ pub const FuncGen = struct { if (result_is_ref) { // Bitcast the result pointer, then store. const alignment = @maximum(operand_ty.abiAlignment(target), inst_ty.abiAlignment(target)); - const result_ptr = self.buildAlloca(llvm_dest_ty); - result_ptr.setAlignment(alignment); + const result_ptr = self.buildAlloca(llvm_dest_ty, alignment); const operand_llvm_ty = try self.dg.lowerType(operand_ty); const casted_ptr = self.builder.buildBitCast(result_ptr, operand_llvm_ty.pointerType(0), ""); const store_inst = self.builder.buildStore(operand, casted_ptr); @@ -7743,8 +7723,7 @@ pub const FuncGen = struct { // but LLVM won't let us bitcast struct values. // Therefore, we store operand to bitcasted alloca, then load for result. const alignment = @maximum(operand_ty.abiAlignment(target), inst_ty.abiAlignment(target)); - const result_ptr = self.buildAlloca(llvm_dest_ty); - result_ptr.setAlignment(alignment); + const result_ptr = self.buildAlloca(llvm_dest_ty, alignment); const operand_llvm_ty = try self.dg.lowerType(operand_ty); const casted_ptr = self.builder.buildBitCast(result_ptr, operand_llvm_ty.pointerType(0), ""); const store_inst = self.builder.buildStore(operand, casted_ptr); @@ -7820,11 +7799,9 @@ pub const FuncGen = struct { if (!pointee_type.isFnOrHasRuntimeBitsIgnoreComptime()) return self.dg.lowerPtrToVoid(ptr_ty); const pointee_llvm_ty = try self.dg.lowerType(pointee_type); - const alloca_inst = self.buildAlloca(pointee_llvm_ty); const target = self.dg.module.getTarget(); const alignment = ptr_ty.ptrAlignment(target); - alloca_inst.setAlignment(alignment); - return alloca_inst; + return self.buildAlloca(pointee_llvm_ty, alignment); } fn airRetPtr(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { @@ -7835,15 +7812,13 @@ pub const FuncGen = struct { if (self.ret_ptr) |ret_ptr| return ret_ptr; const ret_llvm_ty = try self.dg.lowerType(ret_ty); const target = self.dg.module.getTarget(); - const alloca_inst = self.buildAlloca(ret_llvm_ty); - alloca_inst.setAlignment(ptr_ty.ptrAlignment(target)); - return alloca_inst; + return self.buildAlloca(ret_llvm_ty, ptr_ty.ptrAlignment(target)); } /// Use this instead of builder.buildAlloca, because this function makes sure to /// put the alloca instruction at the top of the function! - fn buildAlloca(self: *FuncGen, llvm_ty: *llvm.Type) *llvm.Value { - return buildAllocaInner(self.builder, self.llvm_func, self.di_scope != null, llvm_ty); + fn buildAlloca(self: *FuncGen, llvm_ty: *llvm.Type, alignment: ?c_uint) *llvm.Value { + return buildAllocaInner(self.builder, self.llvm_func, self.di_scope != null, llvm_ty, alignment, self.dg.module.getTarget()); } fn airStore(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { @@ -8801,10 +8776,9 @@ pub const FuncGen = struct { if (isByRef(result_ty)) { const llvm_u32 = self.context.intType(32); - const alloca_inst = self.buildAlloca(llvm_result_ty); // TODO in debug builds init to undef so that the padding will be 0xaa // even if we fully populate the fields. - alloca_inst.setAlignment(result_ty.abiAlignment(target)); + const alloca_inst = self.buildAlloca(llvm_result_ty, result_ty.abiAlignment(target)); var indices: [2]*llvm.Value = .{ llvm_u32.constNull(), undefined }; for (elements) |elem, i| { @@ -8842,8 +8816,7 @@ pub const FuncGen = struct { assert(isByRef(result_ty)); const llvm_usize = try self.dg.lowerType(Type.usize); - const alloca_inst = self.buildAlloca(llvm_result_ty); - alloca_inst.setAlignment(result_ty.abiAlignment(target)); + const alloca_inst = self.buildAlloca(llvm_result_ty, result_ty.abiAlignment(target)); const array_info = result_ty.arrayInfo(); var elem_ptr_payload: Type.Payload.Pointer = .{ @@ -8918,7 +8891,7 @@ pub const FuncGen = struct { // necessarily match the format that we need, depending on which tag is active. We // must construct the correct unnamed struct type here and bitcast, in order to // then set the fields appropriately. - const result_ptr = self.buildAlloca(union_llvm_ty); + const result_ptr = self.buildAlloca(union_llvm_ty, null); const llvm_payload = try self.resolveInst(extra.init); assert(union_obj.haveFieldTypes()); const field = union_obj.fields.values()[extra.field_index]; @@ -9234,9 +9207,8 @@ pub const FuncGen = struct { if (isByRef(optional_ty)) { const target = self.dg.module.getTarget(); - const alloca_inst = self.buildAlloca(optional_llvm_ty); const payload_alignment = optional_ty.abiAlignment(target); - alloca_inst.setAlignment(payload_alignment); + const alloca_inst = self.buildAlloca(optional_llvm_ty, payload_alignment); { const field_ptr = self.builder.buildStructGEP(optional_llvm_ty, alloca_inst, 0, ""); @@ -9360,8 +9332,7 @@ pub const FuncGen = struct { if (isByRef(info.pointee_type)) { const result_align = info.pointee_type.abiAlignment(target); const max_align = @maximum(result_align, ptr_alignment); - const result_ptr = self.buildAlloca(elem_llvm_ty); - result_ptr.setAlignment(max_align); + const result_ptr = self.buildAlloca(elem_llvm_ty, max_align); const llvm_ptr_u8 = self.context.intType(8).pointerType(0); const llvm_usize = self.context.intType(Type.usize.intInfo(target).bits); const size_bytes = info.pointee_type.abiSize(target); @@ -9394,8 +9365,7 @@ pub const FuncGen = struct { if (isByRef(info.pointee_type)) { const result_align = info.pointee_type.abiAlignment(target); - const result_ptr = self.buildAlloca(elem_llvm_ty); - result_ptr.setAlignment(result_align); + const result_ptr = self.buildAlloca(elem_llvm_ty, result_align); const same_size_int = self.context.intType(elem_bits); const truncated_int = self.builder.buildTrunc(shifted_value, same_size_int, ""); @@ -9519,8 +9489,7 @@ pub const FuncGen = struct { .x86_64 => { const array_llvm_ty = usize_llvm_ty.arrayType(6); const array_ptr = fg.valgrind_client_request_array orelse a: { - const array_ptr = fg.buildAlloca(array_llvm_ty); - array_ptr.setAlignment(usize_alignment); + const array_ptr = fg.buildAlloca(array_llvm_ty, usize_alignment); fg.valgrind_client_request_array = array_ptr; break :a array_ptr; }; @@ -9822,6 +9791,74 @@ fn toLlvmCallConv(cc: std.builtin.CallingConvention, target: std.Target) llvm.Ca }; } +/// Convert a zig-address space to an llvm address space. +fn toLlvmAddressSpace(address_space: std.builtin.AddressSpace, target: std.Target) c_uint { + return switch (target.cpu.arch) { + .i386, .x86_64 => switch (address_space) { + .generic => llvm.address_space.default, + .gs => llvm.address_space.x86.gs, + .fs => llvm.address_space.x86.fs, + .ss => llvm.address_space.x86.ss, + else => unreachable, + }, + .nvptx, .nvptx64 => switch (address_space) { + .generic => llvm.address_space.default, + .global => llvm.address_space.nvptx.global, + .constant => llvm.address_space.nvptx.constant, + .param => llvm.address_space.nvptx.param, + .shared => llvm.address_space.nvptx.shared, + .local => llvm.address_space.nvptx.local, + else => unreachable, + }, + .amdgcn => switch (address_space) { + .generic => llvm.address_space.amdgpu.flat, + .global => llvm.address_space.amdgpu.global, + .constant => llvm.address_space.amdgpu.constant, + .shared => llvm.address_space.amdgpu.local, + .local => llvm.address_space.amdgpu.private, + else => unreachable, + }, + else => switch (address_space) { + .generic => llvm.address_space.default, + else => unreachable, + }, + }; +} + +/// On some targets, local values that are in the generic address space must be generated into a +/// different address, space and then cast back to the generic address space. +/// For example, on GPUs local variable declarations must be generated into the local address space. +/// This function returns the address space local values should be generated into. +fn llvmAllocaAddressSpace(target: std.Target) c_uint { + return switch (target.cpu.arch) { + // On amdgcn, locals should be generated into the private address space. + // To make Zig not impossible to use, these are then converted to addresses in the + // generic address space and treates as regular pointers. This is the way that HIP also does it. + .amdgcn => llvm.address_space.amdgpu.private, + else => llvm.address_space.default, + }; +} + +/// On some targets, global values that are in the generic address space must be generated into a +/// different address space, and then cast back to the generic address space. +fn llvmDefaultGlobalAddressSpace(target: std.Target) c_uint { + return switch (target.cpu.arch) { + // On amdgcn, globals must be explicitly allocated and uploaded so that the program can access + // them. + .amdgcn => llvm.address_space.amdgpu.global, + else => llvm.address_space.default, + }; +} + +/// If `llvm_addrspace` is generic, convert it to the actual address space that globals +/// should be stored in by default. +fn toLlvmGlobalAddressSpace(llvm_addrspace: c_uint, target: std.Target) c_uint { + return if (llvm_addrspace == llvm.address_space.default) + llvmDefaultGlobalAddressSpace(target) + else + llvm_addrspace; +} + /// Take into account 0 bit fields and padding. Returns null if an llvm /// field could not be found. /// This only happens if you want the field index of a zero sized field at @@ -10523,25 +10560,43 @@ fn buildAllocaInner( llvm_func: *llvm.Value, di_scope_non_null: bool, llvm_ty: *llvm.Type, + maybe_alignment: ?c_uint, + target: std.Target, ) *llvm.Value { - const prev_block = builder.getInsertBlock(); - const prev_debug_location = builder.getCurrentDebugLocation2(); - defer { - builder.positionBuilderAtEnd(prev_block); - if (di_scope_non_null) { - builder.setCurrentDebugLocation2(prev_debug_location); + const address_space = llvmAllocaAddressSpace(target); + + const alloca = blk: { + const prev_block = builder.getInsertBlock(); + const prev_debug_location = builder.getCurrentDebugLocation2(); + defer { + builder.positionBuilderAtEnd(prev_block); + if (di_scope_non_null) { + builder.setCurrentDebugLocation2(prev_debug_location); + } + } + + const entry_block = llvm_func.getFirstBasicBlock().?; + if (entry_block.getFirstInstruction()) |first_inst| { + builder.positionBuilder(entry_block, first_inst); + } else { + builder.positionBuilderAtEnd(entry_block); } + builder.clearCurrentDebugLocation(); + + break :blk builder.buildAllocaInAddressSpace(llvm_ty, address_space, ""); + }; + + if (maybe_alignment) |alignment| { + alloca.setAlignment(alignment); } - const entry_block = llvm_func.getFirstBasicBlock().?; - if (entry_block.getFirstInstruction()) |first_inst| { - builder.positionBuilder(entry_block, first_inst); - } else { - builder.positionBuilderAtEnd(entry_block); + // The pointer returned from this function should have the generic address space, + // if this isn't the case then cast it to the generic address space. + if (address_space != llvm.address_space.default) { + return builder.buildAddrSpaceCast(alloca, llvm_ty.pointerType(llvm.address_space.default), ""); } - builder.clearCurrentDebugLocation(); - return builder.buildAlloca(llvm_ty, ""); + return alloca; } fn errUnionPayloadOffset(payload_ty: Type, target: std.Target) u1 { diff --git a/src/codegen/llvm/bindings.zig b/src/codegen/llvm/bindings.zig index 96f4477daa..a5b01d6ddf 100644 --- a/src/codegen/llvm/bindings.zig +++ b/src/codegen/llvm/bindings.zig @@ -171,6 +171,9 @@ pub const Value = opaque { pub const constAdd = LLVMConstAdd; extern fn LLVMConstAdd(LHSConstant: *Value, RHSConstant: *Value) *Value; + pub const constAddrSpaceCast = LLVMConstAddrSpaceCast; + extern fn LLVMConstAddrSpaceCast(ConstantVal: *Value, ToType: *Type) *Value; + pub const setWeak = LLVMSetWeak; extern fn LLVMSetWeak(CmpXchgInst: *Value, IsWeak: Bool) void; @@ -956,6 +959,12 @@ pub const Builder = opaque { pub const setFastMath = ZigLLVMSetFastMath; extern fn ZigLLVMSetFastMath(B: *Builder, on_state: bool) void; + + pub const buildAddrSpaceCast = LLVMBuildAddrSpaceCast; + extern fn LLVMBuildAddrSpaceCast(B: *Builder, Val: *Value, DestTy: *Type, Name: [*:0]const u8) *Value; + + pub const buildAllocaInAddressSpace = ZigLLVMBuildAllocaInAddressSpace; + extern fn ZigLLVMBuildAllocaInAddressSpace(B: *Builder, Ty: *Type, AddressSpace: c_uint, Name: [*:0]const u8) *Value; }; pub const MDString = opaque { diff --git a/src/zig_llvm.cpp b/src/zig_llvm.cpp index 4f9cd76c6a..b5edb336a5 100644 --- a/src/zig_llvm.cpp +++ b/src/zig_llvm.cpp @@ -512,22 +512,22 @@ LLVMValueRef ZigLLVMBuildUSubSat(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRe LLVMValueRef ZigLLVMBuildSMulFixSat(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *name) { llvm::Type* types[1] = { - unwrap(LHS)->getType(), + unwrap(LHS)->getType(), }; // pass scale = 0 as third argument llvm::Value* values[3] = {unwrap(LHS), unwrap(RHS), unwrap(B)->getInt32(0)}; - + CallInst *call_inst = unwrap(B)->CreateIntrinsic(Intrinsic::smul_fix_sat, types, values, nullptr, name); return wrap(call_inst); } LLVMValueRef ZigLLVMBuildUMulFixSat(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS, const char *name) { llvm::Type* types[1] = { - unwrap(LHS)->getType(), + unwrap(LHS)->getType(), }; // pass scale = 0 as third argument llvm::Value* values[3] = {unwrap(LHS), unwrap(RHS), unwrap(B)->getInt32(0)}; - + CallInst *call_inst = unwrap(B)->CreateIntrinsic(Intrinsic::umul_fix_sat, types, values, nullptr, name); return wrap(call_inst); } @@ -808,7 +808,7 @@ void ZigLLVMSetCurrentDebugLocation2(LLVMBuilderRef builder, unsigned int line, unsigned int column, ZigLLVMDIScope *scope, ZigLLVMDILocation *inlined_at) { DIScope* di_scope = reinterpret_cast(scope); - DebugLoc debug_loc = DILocation::get(di_scope->getContext(), line, column, di_scope, + DebugLoc debug_loc = DILocation::get(di_scope->getContext(), line, column, di_scope, reinterpret_cast(inlined_at), false); unwrap(builder)->SetCurrentDebugLocation(debug_loc); } @@ -1177,9 +1177,14 @@ LLVMValueRef ZigLLVMBuildAShrExact(LLVMBuilderRef builder, LLVMValueRef LHS, LLV return wrap(unwrap(builder)->CreateAShr(unwrap(LHS), unwrap(RHS), name, true)); } +LLVMValueRef ZigLLVMBuildAllocaInAddressSpace(LLVMBuilderRef builder, LLVMTypeRef Ty, + unsigned AddressSpace, const char *Name) { + return wrap(unwrap(builder)->CreateAlloca(unwrap(Ty), AddressSpace, nullptr, Name)); +} + void ZigLLVMSetTailCall(LLVMValueRef Call) { unwrap(Call)->setTailCallKind(CallInst::TCK_MustTail); -} +} void ZigLLVMSetCallSret(LLVMValueRef Call, LLVMTypeRef return_type) { CallInst *call_inst = unwrap(Call); diff --git a/src/zig_llvm.h b/src/zig_llvm.h index 7fdddda6a4..1a4d5481b6 100644 --- a/src/zig_llvm.h +++ b/src/zig_llvm.h @@ -162,6 +162,8 @@ ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildLShrExact(LLVMBuilderRef builder, LLVMValu ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildAShrExact(LLVMBuilderRef builder, LLVMValueRef LHS, LLVMValueRef RHS, const char *name); +ZIG_EXTERN_C LLVMValueRef ZigLLVMBuildAllocaInAddressSpace(LLVMBuilderRef builder, LLVMTypeRef Ty, unsigned AddressSpace, + const char *Name); ZIG_EXTERN_C struct ZigLLVMDIType *ZigLLVMCreateDebugPointerType(struct ZigLLVMDIBuilder *dibuilder, struct ZigLLVMDIType *pointee_type, uint64_t size_in_bits, uint64_t align_in_bits, const char *name); -- cgit v1.2.3 From 5d429b03e3d43e937e2b517d594275034a873959 Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Mon, 22 Aug 2022 00:21:31 +0200 Subject: stage2: add @addrSpaceCast builtin --- src/Air.zig | 5 ++++ src/AstGen.zig | 8 +++++++ src/BuiltinFn.zig | 8 +++++++ src/Liveness.zig | 2 ++ src/Module.zig | 2 +- src/Sema.zig | 54 +++++++++++++++++++++++++++++++++++++++++--- src/Zir.zig | 3 +++ src/arch/aarch64/CodeGen.zig | 1 + src/arch/arm/CodeGen.zig | 1 + src/arch/riscv64/CodeGen.zig | 1 + src/arch/sparc64/CodeGen.zig | 1 + src/arch/wasm/CodeGen.zig | 1 + src/arch/x86_64/CodeGen.zig | 1 + src/codegen/c.zig | 1 + src/codegen/llvm.zig | 12 ++++++++++ src/print_air.zig | 1 + src/print_zir.zig | 1 + 17 files changed, 99 insertions(+), 4 deletions(-) (limited to 'src/codegen') diff --git a/src/Air.zig b/src/Air.zig index 46ba297003..57479af590 100644 --- a/src/Air.zig +++ b/src/Air.zig @@ -729,6 +729,10 @@ pub const Inst = struct { /// Sets the operand as the current error return trace, set_err_return_trace, + /// Convert the address space of a pointer. + /// Uses the `ty_op` field. + addrspace_cast, + pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag { switch (op) { .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt, @@ -1138,6 +1142,7 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type { .popcount, .byte_swap, .bit_reverse, + .addrspace_cast, => return air.getRefType(datas[inst].ty_op.ty), .loop, diff --git a/src/AstGen.zig b/src/AstGen.zig index 7534a0d2cc..7bb2ef765c 100644 --- a/src/AstGen.zig +++ b/src/AstGen.zig @@ -7789,6 +7789,14 @@ fn builtinCall( }); return rvalue(gz, rl, result, node); }, + .addrspace_cast => { + const result = try gz.addExtendedPayload(.addrspace_cast, Zir.Inst.BinNode{ + .lhs = try comptimeExpr(gz, scope, .{ .ty = .address_space_type }, params[0]), + .rhs = try expr(gz, scope, .none, params[1]), + .node = gz.nodeIndexToRelative(node), + }); + return rvalue(gz, rl, result, node); + }, // zig fmt: off .has_decl => return hasDeclOrField(gz, scope, rl, node, params[0], params[1], .has_decl), diff --git a/src/BuiltinFn.zig b/src/BuiltinFn.zig index 3a13dde1ab..eb878873a0 100644 --- a/src/BuiltinFn.zig +++ b/src/BuiltinFn.zig @@ -2,6 +2,7 @@ const std = @import("std"); pub const Tag = enum { add_with_overflow, + addrspace_cast, align_cast, align_of, as, @@ -152,6 +153,13 @@ pub const list = list: { .param_count = 4, }, }, + .{ + "@addrSpaceCast", + .{ + .tag = .addrspace_cast, + .param_count = 2, + }, + }, .{ "@alignCast", .{ diff --git a/src/Liveness.zig b/src/Liveness.zig index 5a4bd2265e..54a5041e8b 100644 --- a/src/Liveness.zig +++ b/src/Liveness.zig @@ -268,6 +268,7 @@ pub fn categorizeOperand( .bit_reverse, .splat, .error_set_has_value, + .addrspace_cast, => { const o = air_datas[inst].ty_op; if (o.operand == operand_ref) return matchOperandSmallIndex(l, inst, 0, .none); @@ -844,6 +845,7 @@ fn analyzeInst( .bit_reverse, .splat, .error_set_has_value, + .addrspace_cast, => { const o = inst_datas[inst].ty_op; return trackOperands(a, new_set, inst, main_tomb, .{ o.operand, .none, .none }); diff --git a/src/Module.zig b/src/Module.zig index 44502ab564..7d87bdba53 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -4617,7 +4617,7 @@ fn semaDecl(mod: *Module, decl_index: Decl.Index) !bool { .constant => target_util.defaultAddressSpace(target, .global_constant), else => unreachable, }, - else => |addrspace_ref| try sema.analyzeAddrspace(&block_scope, address_space_src, addrspace_ref, addrspace_ctx), + else => |addrspace_ref| try sema.analyzeAddressSpace(&block_scope, address_space_src, addrspace_ref, addrspace_ctx), }; }; diff --git a/src/Sema.zig b/src/Sema.zig index b357beafdf..9a6c2acb14 100644 --- a/src/Sema.zig +++ b/src/Sema.zig @@ -975,8 +975,9 @@ fn analyzeBodyInner( .reify => try sema.zirReify( block, extended, inst), .builtin_async_call => try sema.zirBuiltinAsyncCall( block, extended), .cmpxchg => try sema.zirCmpxchg( block, extended), - + .addrspace_cast => try sema.zirAddrSpaceCast( block, extended), // zig fmt: on + .fence => { try sema.zirFence(block, extended); i += 1; @@ -16250,7 +16251,7 @@ fn zirPtrType(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air const address_space = if (inst_data.flags.has_addrspace) blk: { const ref = @intToEnum(Zir.Inst.Ref, sema.code.extra[extra_i]); extra_i += 1; - break :blk try sema.analyzeAddrspace(block, addrspace_src, ref, .pointer); + break :blk try sema.analyzeAddressSpace(block, addrspace_src, ref, .pointer); } else .generic; const bit_offset = if (inst_data.flags.has_bit_range) blk: { @@ -18170,6 +18171,53 @@ fn reifyStruct( return sema.analyzeDeclVal(block, src, new_decl_index); } +fn zirAddrSpaceCast(sema: *Sema, block: *Block, extended: Zir.Inst.Extended.InstData) CompileError!Air.Inst.Ref { + const extra = sema.code.extraData(Zir.Inst.BinNode, extended.operand).data; + const src = LazySrcLoc.nodeOffset(extra.node); + const addrspace_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = extra.node }; + const ptr_src: LazySrcLoc = .{ .node_offset_builtin_call_arg1 = extra.node }; + + const dest_addrspace = try sema.analyzeAddressSpace(block, addrspace_src, extra.lhs, .pointer); + const ptr = try sema.resolveInst(extra.rhs); + const ptr_ty = sema.typeOf(ptr); + + // TODO in addition to pointers, this instruction is supposed to work for + // pointer-like optionals and slices. + try sema.checkPtrOperand(block, ptr_src, ptr_ty); + + // TODO check address space cast validity. + const src_addrspace = ptr_ty.ptrAddressSpace(); + _ = src_addrspace; + + const ptr_info = ptr_ty.ptrInfo().data; + const dest_ty = try Type.ptr(sema.arena, sema.mod, .{ + .pointee_type = ptr_info.pointee_type, + .@"align" = ptr_info.@"align", + .@"addrspace" = dest_addrspace, + .mutable = ptr_info.mutable, + .@"allowzero" = ptr_info.@"allowzero", + .@"volatile" = ptr_info.@"volatile", + .size = ptr_info.size, + }); + + if (try sema.resolveMaybeUndefVal(block, ptr_src, ptr)) |val| { + // Pointer value should compatible with both address spaces. + // TODO: Figure out why this generates an invalid bitcast. + return sema.addConstant(dest_ty, val); + } + + try sema.requireRuntimeBlock(block, src, ptr_src); + // TODO: Address space cast safety? + + return block.addInst(.{ + .tag = .addrspace_cast, + .data = .{ .ty_op = .{ + .ty = try sema.addType(dest_ty), + .operand = ptr, + } }, + }); +} + fn zirTypeName(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileError!Air.Inst.Ref { const inst_data = sema.code.instructions.items(.data)[inst].un_node; const ty_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = inst_data.src_node }; @@ -30292,7 +30340,7 @@ pub const AddressSpaceContext = enum { pointer, }; -pub fn analyzeAddrspace( +pub fn analyzeAddressSpace( sema: *Sema, block: *Block, src: LazySrcLoc, diff --git a/src/Zir.zig b/src/Zir.zig index 890109fcb0..351330b7c4 100644 --- a/src/Zir.zig +++ b/src/Zir.zig @@ -1969,6 +1969,9 @@ pub const Inst = struct { /// `small` 0=>weak 1=>strong /// `operand` is payload index to `Cmpxchg`. cmpxchg, + /// Implement the builtin `@addrSpaceCast` + /// `Operand` is payload index to `BinNode`. `lhs` is dest type, `rhs` is operand. + addrspace_cast, pub const InstData = struct { opcode: Extended, diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig index ed3a281b80..2758fd36df 100644 --- a/src/arch/aarch64/CodeGen.zig +++ b/src/arch/aarch64/CodeGen.zig @@ -677,6 +677,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), .mul_add => try self.airMulAdd(inst), + .addrspace_cast => return self.fail("TODO implement addrspace_cast", .{}), .@"try" => try self.airTry(inst), .try_ptr => try self.airTryPtr(inst), diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig index 95dfb2eea3..855951f5fa 100644 --- a/src/arch/arm/CodeGen.zig +++ b/src/arch/arm/CodeGen.zig @@ -690,6 +690,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), .mul_add => try self.airMulAdd(inst), + .addrspace_cast => return self.fail("TODO implement addrspace_cast", .{}), .@"try" => try self.airTry(inst), .try_ptr => try self.airTryPtr(inst), diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig index 87e81748f9..dd31bfb6f7 100644 --- a/src/arch/riscv64/CodeGen.zig +++ b/src/arch/riscv64/CodeGen.zig @@ -604,6 +604,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), .mul_add => try self.airMulAdd(inst), + .addrspace_cast => @panic("TODO"), .@"try" => @panic("TODO"), .try_ptr => @panic("TODO"), diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig index cfcfedf7cc..6217119f34 100644 --- a/src/arch/sparc64/CodeGen.zig +++ b/src/arch/sparc64/CodeGen.zig @@ -618,6 +618,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .union_init => @panic("TODO try self.airUnionInit(inst)"), .prefetch => try self.airPrefetch(inst), .mul_add => @panic("TODO try self.airMulAdd(inst)"), + .addrspace_cast => @panic("TODO try self.airAddrSpaceCast(int)"), .@"try" => try self.airTry(inst), .try_ptr => @panic("TODO try self.airTryPtr(inst)"), diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig index f27957d3f5..619addfba1 100644 --- a/src/arch/wasm/CodeGen.zig +++ b/src/arch/wasm/CodeGen.zig @@ -1699,6 +1699,7 @@ fn genInst(self: *Self, inst: Air.Inst.Index) !WValue { .set_err_return_trace, .is_named_enum_value, .error_set_has_value, + .addrspace_cast, => |tag| return self.fail("TODO: Implement wasm inst: {s}", .{@tagName(tag)}), .add_optimized, diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig index 27eb11c649..abba07b0e8 100644 --- a/src/arch/x86_64/CodeGen.zig +++ b/src/arch/x86_64/CodeGen.zig @@ -695,6 +695,7 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void { .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), .mul_add => try self.airMulAdd(inst), + .addrspace_cast => return self.fail("TODO implement addrspace_cast", .{}), .@"try" => try self.airTry(inst), .try_ptr => try self.airTryPtr(inst), diff --git a/src/codegen/c.zig b/src/codegen/c.zig index b25e05e118..072091d9b2 100644 --- a/src/codegen/c.zig +++ b/src/codegen/c.zig @@ -1871,6 +1871,7 @@ fn genBody(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail, OutO .aggregate_init => try airAggregateInit(f, inst), .union_init => try airUnionInit(f, inst), .prefetch => try airPrefetch(f, inst), + .addrspace_cast => return f.fail("TODO: C backend: implement addrspace_cast", .{}), .@"try" => try airTry(f, inst), .try_ptr => try airTryPtr(f, inst), diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index f400e841ea..aebd7a7dd0 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -4512,6 +4512,7 @@ pub const FuncGen = struct { .aggregate_init => try self.airAggregateInit(inst), .union_init => try self.airUnionInit(inst), .prefetch => try self.airPrefetch(inst), + .addrspace_cast => try self.airAddrSpaceCast(inst), .is_named_enum_value => try self.airIsNamedEnumValue(inst), .error_set_has_value => try self.airErrorSetHasValue(inst), @@ -9045,6 +9046,17 @@ pub const FuncGen = struct { return null; } + fn airAddrSpaceCast(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value { + if (self.liveness.isUnused(inst)) return null; + + const ty_op = self.air.instructions.items(.data)[inst].ty_op; + const inst_ty = self.air.typeOfIndex(inst); + const operand = try self.resolveInst(ty_op.operand); + + const llvm_dest_ty = try self.dg.lowerType(inst_ty); + return self.builder.buildAddrSpaceCast(operand, llvm_dest_ty, ""); + } + fn softF80TruncOrExt( self: *FuncGen, operand: *llvm.Value, diff --git a/src/print_air.zig b/src/print_air.zig index fb6f7e6cf2..d3523c0fc6 100644 --- a/src/print_air.zig +++ b/src/print_air.zig @@ -244,6 +244,7 @@ const Writer = struct { .byte_swap, .bit_reverse, .error_set_has_value, + .addrspace_cast, => try w.writeTyOp(s, inst), .block, diff --git a/src/print_zir.zig b/src/print_zir.zig index 8f055e9ddd..f2a79d53a4 100644 --- a/src/print_zir.zig +++ b/src/print_zir.zig @@ -512,6 +512,7 @@ const Writer = struct { .err_set_cast, .wasm_memory_grow, .prefetch, + .addrspace_cast, => { const inst_data = self.code.extraData(Zir.Inst.BinNode, extended.operand).data; const src = LazySrcLoc.nodeOffset(inst_data.node); -- cgit v1.2.3 From 8894d1c45eb01fa3fbcc9173bac729e5812307ed Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Sun, 18 Sep 2022 13:21:49 +0200 Subject: stage2: f128 improvements for targets that do not support it --- src/codegen/llvm.zig | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 1 deletion(-) (limited to 'src/codegen') diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index aebd7a7dd0..9c8551854b 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -7576,6 +7576,8 @@ pub const FuncGen = struct { const src_bits = operand_ty.floatBits(target); if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) { return softF80TruncOrExt(self, operand, src_bits, dest_bits); + } else if (!backendSupportsF128(target) and (src_bits == 128 or dest_bits == 128)) { + return softF128TruncOrExt(self, operand, src_bits, dest_bits); } const dest_llvm_ty = try self.dg.lowerType(dest_ty); return self.builder.buildFPTrunc(operand, dest_llvm_ty, ""); @@ -7594,6 +7596,8 @@ pub const FuncGen = struct { const src_bits = operand_ty.floatBits(target); if (!backendSupportsF80(target) and (src_bits == 80 or dest_bits == 80)) { return softF80TruncOrExt(self, operand, src_bits, dest_bits); + } else if (!backendSupportsF128(target) and (src_bits == 128 or dest_bits == 128)) { + return softF128TruncOrExt(self, operand, src_bits, dest_bits); } const dest_llvm_ty = try self.dg.lowerType(self.air.typeOfIndex(inst)); return self.builder.buildFPExt(operand, dest_llvm_ty, ""); @@ -9138,6 +9142,88 @@ pub const FuncGen = struct { return self.builder.buildBitCast(result, final_cast_llvm_ty, ""); } + fn softF128TruncOrExt( + self: *FuncGen, + operand: *llvm.Value, + src_bits: u16, + dest_bits: u16, + ) !?*llvm.Value { + const target = self.dg.module.getTarget(); + + var param_llvm_ty: *llvm.Type = self.context.fp128Type(); + var ret_llvm_ty: *llvm.Type = param_llvm_ty; + var fn_name: [*:0]const u8 = undefined; + var arg = operand; + var final_cast: ?*llvm.Type = null; + + assert(src_bits == 128 or dest_bits == 128); + + // TODO: Implement proper names and compiler-rt functions for this!! + if (src_bits == 128) switch (dest_bits) { + 16 => { + // See corresponding condition at definition of + // __truncxfhf2 in compiler-rt. + if (target.cpu.arch.isAARCH64()) { + ret_llvm_ty = self.context.halfType(); + } else { + ret_llvm_ty = self.context.intType(16); + final_cast = self.context.halfType(); + } + fn_name = "__trunctfhf2"; + }, + 32 => { + ret_llvm_ty = self.context.floatType(); + fn_name = "__trunctfsf2"; + }, + 64 => { + ret_llvm_ty = self.context.doubleType(); + fn_name = "__trunctfdf2"; + }, + 80 => { + ret_llvm_ty = self.context.intType(80); + fn_name = "__trunctfxf2"; + }, + 128 => return operand, + else => unreachable, + } else switch (src_bits) { + 16 => { + // See corresponding condition at definition of + // __extendhftf2 in compiler-rt. + param_llvm_ty = if (target.cpu.arch.isAARCH64()) + self.context.halfType() + else + self.context.intType(16); + arg = self.builder.buildBitCast(arg, param_llvm_ty, ""); + fn_name = "__extendhftf2"; + }, + 32 => { + param_llvm_ty = self.context.floatType(); + fn_name = "__extendsftf2"; + }, + 64 => { + param_llvm_ty = self.context.doubleType(); + fn_name = "__extenddftf2"; + }, + 80 => { + param_llvm_ty = self.context.intType(80); + fn_name = "__extendxftf2"; + }, + 128 => return operand, + else => unreachable, + } + + const llvm_fn = self.dg.object.llvm_module.getNamedFunction(fn_name) orelse f: { + const param_types = [_]*llvm.Type{param_llvm_ty}; + const fn_type = llvm.functionType(ret_llvm_ty, ¶m_types, param_types.len, .False); + break :f self.dg.object.llvm_module.addFunction(fn_name, fn_type); + }; + + var args: [1]*llvm.Value = .{arg}; + const result = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &args, args.len, .C, .Auto, ""); + const final_cast_llvm_ty = final_cast orelse return result; + return self.builder.buildBitCast(result, final_cast_llvm_ty, ""); + } + fn getErrorNameTable(self: *FuncGen) !*llvm.Value { if (self.dg.object.error_name_table) |table| { return table; @@ -10489,13 +10575,23 @@ fn backendSupportsF16(target: std.Target) bool { }; } +/// This function returns true if we expect LLVM to lower f128 correctly, +/// and false if we expect LLVm to crash if it encounters and f128 type +/// or if it produces miscompilations. +fn backendSupportsF128(target: std.Target) bool { + return switch (target.cpu.arch) { + .amdgcn => false, + else => true, + }; +} + /// LLVM does not support all relevant intrinsics for all targets, so we /// may need to manually generate a libc call fn intrinsicsAllowed(scalar_ty: Type, target: std.Target) bool { return switch (scalar_ty.tag()) { .f16 => backendSupportsF16(target), .f80 => target.longDoubleIs(f80) and backendSupportsF80(target), - .f128 => target.longDoubleIs(f128), + .f128 => target.longDoubleIs(f128) and backendSupportsF128(target), else => true, }; } -- cgit v1.2.3 From e90a42a80844f49e8755ab92d1c082e9ac906dee Mon Sep 17 00:00:00 2001 From: Robin Voetter Date: Sun, 25 Sep 2022 01:15:33 +0200 Subject: stage2: improve globals with address spaces a little --- src/codegen/llvm.zig | 72 +++++++++++++++++++++++++++------------------------- 1 file changed, 37 insertions(+), 35 deletions(-) (limited to 'src/codegen') diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig index 9c8551854b..aff7656bd3 100644 --- a/src/codegen/llvm.zig +++ b/src/codegen/llvm.zig @@ -2399,8 +2399,7 @@ pub const DeclGen = struct { // mismatch, because we don't have the LLVM type until the *value* is created, // whereas the global needs to be created based on the type alone, because // lowering the value may reference the global as a pointer. - const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); - const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_addrspace, target); + const llvm_global_addrspace = toLlvmGlobalAddressSpace(decl.@"addrspace", target); const new_global = dg.object.llvm_module.addGlobalInAddressSpace( llvm_init.typeOf(), "", @@ -2414,12 +2413,9 @@ pub const DeclGen = struct { // replaceAllUsesWith requires the type to be unchanged. So we convert // the new global to the old type and use that as the thing to replace // old uses. - const new_global_ptr = if (llvm_addrspace != llvm_global_addrspace) - new_global.constAddrSpaceCast(llvm_init.typeOf().pointerType(llvm_addrspace)) - else - new_global; - const new_global_casted_ptr = new_global_ptr.constBitCast(global.typeOf()); - global.replaceAllUsesWith(new_global_casted_ptr); + // TODO: How should this work then the address space of a global changed? + const new_global_ptr = new_global.constBitCast(global.typeOf()); + global.replaceAllUsesWith(new_global_ptr); dg.object.decl_map.putAssumeCapacity(decl_index, new_global); new_global.takeName(global); global.deleteGlobal(); @@ -2617,11 +2613,12 @@ pub const DeclGen = struct { const target = dg.module.getTarget(); const llvm_type = try dg.lowerType(decl.ty); - const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); + const llvm_actual_addrspace = toLlvmGlobalAddressSpace(decl.@"addrspace", target); + const llvm_global = dg.object.llvm_module.addGlobalInAddressSpace( llvm_type, fqn, - toLlvmGlobalAddressSpace(llvm_addrspace, target), + llvm_actual_addrspace, ); gop.value_ptr.* = llvm_global; @@ -3241,16 +3238,18 @@ pub const DeclGen = struct { const decl_index = tv.val.castTag(.variable).?.data.owner_decl; const decl = dg.module.declPtr(decl_index); dg.module.markDeclAlive(decl); + + const llvm_wanted_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); + const llvm_actual_addrspace = toLlvmGlobalAddressSpace(decl.@"addrspace", target); + const llvm_var_type = try dg.lowerType(tv.ty); - const llvm_var_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); - const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_var_addrspace, target); - const llvm_var_ptr_type = llvm_var_type.pointerType(llvm_global_addrspace); + const llvm_actual_ptr_type = llvm_var_type.pointerType(llvm_actual_addrspace); const val = try dg.resolveGlobalDecl(decl_index); - const val_ptr = val.constBitCast(llvm_var_ptr_type); - if (llvm_global_addrspace != llvm_var_addrspace) { - const llvm_ptr_type = llvm_var_type.pointerType(llvm_var_addrspace); - return val_ptr.constAddrSpaceCast(llvm_ptr_type); + const val_ptr = val.constBitCast(llvm_actual_ptr_type); + if (llvm_actual_addrspace != llvm_wanted_addrspace) { + const llvm_wanted_ptr_type = llvm_var_type.pointerType(llvm_wanted_addrspace); + return val_ptr.constAddrSpaceCast(llvm_wanted_ptr_type); } return val_ptr; }, @@ -4055,12 +4054,12 @@ pub const DeclGen = struct { try self.resolveGlobalDecl(decl_index); const target = self.module.getTarget(); - const llvm_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); - const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_addrspace, target); - const llvm_val = if (llvm_addrspace != llvm_global_addrspace) blk: { + const llvm_wanted_addrspace = toLlvmAddressSpace(decl.@"addrspace", target); + const llvm_actual_addrspace = toLlvmGlobalAddressSpace(decl.@"addrspace", target); + const llvm_val = if (llvm_wanted_addrspace != llvm_actual_addrspace) blk: { const llvm_decl_ty = try self.lowerType(decl.ty); - const llvm_decl_ptr_ty = llvm_decl_ty.pointerType(llvm_addrspace); - break :blk llvm_decl_val.constAddrSpaceCast(llvm_decl_ptr_ty); + const llvm_decl_wanted_ptr_ty = llvm_decl_ty.pointerType(llvm_wanted_addrspace); + break :blk llvm_decl_val.constAddrSpaceCast(llvm_decl_wanted_ptr_ty); } else llvm_decl_val; const llvm_type = try self.lowerType(tv.ty); @@ -4328,9 +4327,9 @@ pub const FuncGen = struct { // We have an LLVM value but we need to create a global constant and // set the value as its initializer, and then return a pointer to the global. const target = self.dg.module.getTarget(); - const llvm_addrspace = toLlvmAddressSpace(.generic, target); - const llvm_global_addrspace = toLlvmGlobalAddressSpace(llvm_addrspace, target); - const global = self.dg.object.llvm_module.addGlobalInAddressSpace(llvm_val.typeOf(), "", llvm_global_addrspace); + const llvm_wanted_addrspace = toLlvmAddressSpace(.generic, target); + const llvm_actual_addrspace = toLlvmGlobalAddressSpace(.generic, target); + const global = self.dg.object.llvm_module.addGlobalInAddressSpace(llvm_val.typeOf(), "", llvm_actual_addrspace); global.setInitializer(llvm_val); global.setLinkage(.Private); global.setGlobalConstant(.True); @@ -4340,10 +4339,13 @@ pub const FuncGen = struct { // the type of global constants might not match the type it is supposed to // be, and so we must bitcast the pointer at the usage sites. const wanted_llvm_ty = try self.dg.lowerType(ty); - const wanted_bitcasted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_global_addrspace); + const wanted_bitcasted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_actual_addrspace); const bitcasted_ptr = global.constBitCast(wanted_bitcasted_llvm_ptr_ty); - const wanted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_addrspace); - const casted_ptr = bitcasted_ptr.constAddrSpaceCast(wanted_llvm_ptr_ty); + const wanted_llvm_ptr_ty = wanted_llvm_ty.pointerType(llvm_wanted_addrspace); + const casted_ptr = if (llvm_wanted_addrspace != llvm_actual_addrspace) + bitcasted_ptr.constAddrSpaceCast(wanted_llvm_ptr_ty) + else + bitcasted_ptr; gop.value_ptr.* = casted_ptr; return casted_ptr; } @@ -9948,13 +9950,13 @@ fn llvmDefaultGlobalAddressSpace(target: std.Target) c_uint { }; } -/// If `llvm_addrspace` is generic, convert it to the actual address space that globals -/// should be stored in by default. -fn toLlvmGlobalAddressSpace(llvm_addrspace: c_uint, target: std.Target) c_uint { - return if (llvm_addrspace == llvm.address_space.default) - llvmDefaultGlobalAddressSpace(target) - else - llvm_addrspace; +/// Return the actual address space that a value should be stored in if its a global address space. +/// When a value is placed in the resulting address space, it needs to be cast back into wanted_address_space. +fn toLlvmGlobalAddressSpace(wanted_address_space: std.builtin.AddressSpace, target: std.Target) c_uint { + return switch (wanted_address_space) { + .generic => llvmDefaultGlobalAddressSpace(target), + else => |as| toLlvmAddressSpace(as, target), + }; } /// Take into account 0 bit fields and padding. Returns null if an llvm -- cgit v1.2.3