Merge pull request #21030 from Snektron/nv-gpu-builtins

nvptx: add implementations for GPU builtins
author: Robin Voetter <robin@voetter.nl> 2024-08-14 04:14:34 +0200
committer: GitHub <noreply@github.com> 2024-08-14 04:14:34 +0200
commit: 1018cdc0a8d0bdd9c90cf09fed5a38f510f97b62 (patch)
tree: 38c11449e17222199c11afb30f942437498a3142 /src
parent: 0b5ea2b902b5802786cac70740e93872d2a0973d (diff)
parent: bcfc7cf13cd3eb16f4b864efac5269d68200b070 (diff)
download: zig-1018cdc0a8d0bdd9c90cf09fed5a38f510f97b62.tar.gz
zig-1018cdc0a8d0bdd9c90cf09fed5a38f510f97b62.zip
3 files changed, 113 insertions, 20 deletions
diff --git a/src/Sema.zig b/src/Sema.zig
index 4fcb9c98c8..2d8858d7cd 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -26703,7 +26703,7 @@ fn zirWorkItem(
 
     switch (target.cpu.arch) {
         // TODO: Allow for other GPU targets.
-        .amdgcn, .spirv, .spirv64, .spirv32 => {},
+        .amdgcn, .spirv, .spirv64, .spirv32, .nvptx, .nvptx64 => {},
         else => {
             return sema.fail(block, builtin_src, "builtin only available on GPU targets; targeted architecture is {s}", .{@tagName(target.cpu.arch)});
         },
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index 6453ea04a3..e656150584 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -10286,7 +10286,7 @@ pub const FuncGen = struct {
         return self.wip.cast(.addrspacecast, operand, try o.lowerType(inst_ty), "");
     }
 
-    fn amdgcnWorkIntrinsic(
+    fn workIntrinsic(
         self: *FuncGen,
         dimension: u32,
         default: u32,
@@ -10303,44 +10303,60 @@ pub const FuncGen = struct {
     fn airWorkItemId(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
         const o = self.ng.object;
         const target = o.pt.zcu.getTarget();
-        assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
 
         const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
         const dimension = pl_op.payload;
-        return self.amdgcnWorkIntrinsic(dimension, 0, "amdgcn.workitem.id");
+
+        return switch (target.cpu.arch) {
+            .amdgcn => self.workIntrinsic(dimension, 0, "amdgcn.workitem.id"),
+            .nvptx, .nvptx64 => self.workIntrinsic(dimension, 0, "nvvm.read.ptx.sreg.tid"),
+            else => unreachable,
+        };
     }
 
     fn airWorkGroupSize(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
         const o = self.ng.object;
         const target = o.pt.zcu.getTarget();
-        assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
 
         const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
         const dimension = pl_op.payload;
-        if (dimension >= 3) return .@"1";
-
-        // Fetch the dispatch pointer, which points to this structure:
-        // https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/adae6c61e10d371f7cbc3d0e94ae2c070cab18a4/src/inc/hsa.h#L2913
-        const dispatch_ptr =
-            try self.wip.callIntrinsic(.normal, .none, .@"amdgcn.dispatch.ptr", &.{}, &.{}, "");
 
-        // Load the work_group_* member from the struct as u16.
-        // Just treat the dispatch pointer as an array of u16 to keep things simple.
-        const workgroup_size_ptr = try self.wip.gep(.inbounds, .i16, dispatch_ptr, &.{
-            try o.builder.intValue(try o.lowerType(Type.usize), 2 + dimension),
-        }, "");
-        const workgroup_size_alignment = comptime Builder.Alignment.fromByteUnits(2);
-        return self.wip.load(.normal, .i16, workgroup_size_ptr, workgroup_size_alignment, "");
+        switch (target.cpu.arch) {
+            .amdgcn => {
+                if (dimension >= 3) return .@"1";
+
+                // Fetch the dispatch pointer, which points to this structure:
+                // https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/adae6c61e10d371f7cbc3d0e94ae2c070cab18a4/src/inc/hsa.h#L2913
+                const dispatch_ptr =
+                    try self.wip.callIntrinsic(.normal, .none, .@"amdgcn.dispatch.ptr", &.{}, &.{}, "");
+
+                // Load the work_group_* member from the struct as u16.
+                // Just treat the dispatch pointer as an array of u16 to keep things simple.
+                const workgroup_size_ptr = try self.wip.gep(.inbounds, .i16, dispatch_ptr, &.{
+                    try o.builder.intValue(try o.lowerType(Type.usize), 2 + dimension),
+                }, "");
+                const workgroup_size_alignment = comptime Builder.Alignment.fromByteUnits(2);
+                return self.wip.load(.normal, .i16, workgroup_size_ptr, workgroup_size_alignment, "");
+            },
+            .nvptx, .nvptx64 => {
+                return self.workIntrinsic(dimension, 1, "nvvm.read.ptx.sreg.ntid");
+            },
+            else => unreachable,
+        }
     }
 
     fn airWorkGroupId(self: *FuncGen, inst: Air.Inst.Index) !Builder.Value {
         const o = self.ng.object;
         const target = o.pt.zcu.getTarget();
-        assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
 
         const pl_op = self.air.instructions.items(.data)[@intFromEnum(inst)].pl_op;
         const dimension = pl_op.payload;
-        return self.amdgcnWorkIntrinsic(dimension, 0, "amdgcn.workgroup.id");
+
+        return switch (target.cpu.arch) {
+            .amdgcn => self.workIntrinsic(dimension, 0, "amdgcn.workgroup.id"),
+            .nvptx, .nvptx64 => self.workIntrinsic(dimension, 0, "nvvm.read.ptx.sreg.ctaid"),
+            else => unreachable,
+        };
     }
 
     fn getErrorNameTable(self: *FuncGen) Allocator.Error!Builder.Variable.Index {
diff --git a/src/codegen/llvm/Builder.zig b/src/codegen/llvm/Builder.zig
index a23bc86c57..90da3bdd7a 100644
--- a/src/codegen/llvm/Builder.zig
+++ b/src/codegen/llvm/Builder.zig
@@ -2729,6 +2729,17 @@ pub const Intrinsic = enum {
     @"amdgcn.workgroup.id.z",
     @"amdgcn.dispatch.ptr",
 
+    // NVPTX
+    @"nvvm.read.ptx.sreg.tid.x",
+    @"nvvm.read.ptx.sreg.tid.y",
+    @"nvvm.read.ptx.sreg.tid.z",
+    @"nvvm.read.ptx.sreg.ntid.x",
+    @"nvvm.read.ptx.sreg.ntid.y",
+    @"nvvm.read.ptx.sreg.ntid.z",
+    @"nvvm.read.ptx.sreg.ctaid.x",
+    @"nvvm.read.ptx.sreg.ctaid.y",
+    @"nvvm.read.ptx.sreg.ctaid.z",
+
     // WebAssembly
     @"wasm.memory.size",
     @"wasm.memory.grow",
@@ -3886,6 +3897,72 @@ pub const Intrinsic = enum {
             .attrs = &.{ .nocallback, .nofree, .nosync, .nounwind, .speculatable, .willreturn, .{ .memory = Attribute.Memory.all(.none) } },
         },
 
+        .@"nvvm.read.ptx.sreg.tid.x" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nounwind, .readnone },
+        },
+        .@"nvvm.read.ptx.sreg.tid.y" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nounwind, .readnone },
+        },
+        .@"nvvm.read.ptx.sreg.tid.z" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nounwind, .readnone },
+        },
+
+        .@"nvvm.read.ptx.sreg.ntid.x" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nounwind, .readnone },
+        },
+        .@"nvvm.read.ptx.sreg.ntid.y" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nounwind, .readnone },
+        },
+        .@"nvvm.read.ptx.sreg.ntid.z" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nounwind, .readnone },
+        },
+
+        .@"nvvm.read.ptx.sreg.ctaid.x" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nounwind, .readnone },
+        },
+        .@"nvvm.read.ptx.sreg.ctaid.y" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nounwind, .readnone },
+        },
+        .@"nvvm.read.ptx.sreg.ctaid.z" = .{
+            .ret_len = 1,
+            .params = &.{
+                .{ .kind = .{ .type = .i32 } },
+            },
+            .attrs = &.{ .nounwind, .readnone },
+        },
+
         .@"wasm.memory.size" = .{
             .ret_len = 1,
             .params = &.{
author	Robin Voetter <robin@voetter.nl>	2024-08-14 04:14:34 +0200
committer	GitHub <noreply@github.com>	2024-08-14 04:14:34 +0200
commit	1018cdc0a8d0bdd9c90cf09fed5a38f510f97b62 (patch)
tree	38c11449e17222199c11afb30f942437498a3142 /src
parent	0b5ea2b902b5802786cac70740e93872d2a0973d (diff)
parent	bcfc7cf13cd3eb16f4b864efac5269d68200b070 (diff)
download	zig-1018cdc0a8d0bdd9c90cf09fed5a38f510f97b62.tar.gz zig-1018cdc0a8d0bdd9c90cf09fed5a38f510f97b62.zip