aboutsummaryrefslogtreecommitdiff
path: root/src/codegen
diff options
context:
space:
mode:
authorRobin Voetter <robin@voetter.nl>2023-03-18 15:59:56 +0100
committerVeikka Tuominen <git@vexu.eu>2023-03-30 12:20:24 +0300
commit3357c59cebacb6b60da865376b20d2b307d12ec1 (patch)
tree51edbb19a1f063888bac386c6a51ba250e361b11 /src/codegen
parent83051b0cbf31b76e824d3911a7f4a0be3c0cf94d (diff)
downloadzig-3357c59cebacb6b60da865376b20d2b307d12ec1.tar.gz
zig-3357c59cebacb6b60da865376b20d2b307d12ec1.zip
new builtins: @workItemId, @workGroupId, @workGroupSize
* @workItemId returns the index of the work item in a work group for a dimension. * @workGroupId returns the index of the work group in the kernel dispatch for a dimension. * @workGroupSize returns the size of the work group for a dimension. These builtins are mainly useful for GPU backends. They are currently only implemented for the AMDGCN LLVM backend.
Diffstat (limited to 'src/codegen')
-rw-r--r--src/codegen/c.zig5
-rw-r--r--src/codegen/llvm.zig72
2 files changed, 77 insertions, 0 deletions
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 6c4bb3c688..704a1e31c5 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -2995,6 +2995,11 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
.c_va_arg => try airCVaArg(f, inst),
.c_va_end => try airCVaEnd(f, inst),
.c_va_copy => try airCVaCopy(f, inst),
+
+ .work_item_id,
+ .work_group_size,
+ .work_group_id,
+ => unreachable,
// zig fmt: on
};
if (result_value == .new_local) {
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index eea68a80e9..ce49fcde78 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -4745,6 +4745,10 @@ pub const FuncGen = struct {
.c_va_copy => try self.airCVaCopy(inst),
.c_va_end => try self.airCVaEnd(inst),
.c_va_start => try self.airCVaStart(inst),
+
+ .work_item_id => try self.airWorkItemId(inst),
+ .work_group_size => try self.airWorkGroupSize(inst),
+ .work_group_id => try self.airWorkGroupId(inst),
// zig fmt: on
};
if (opt_value) |val| {
@@ -9567,6 +9571,74 @@ pub const FuncGen = struct {
return self.builder.buildAddrSpaceCast(operand, llvm_dest_ty, "");
}
+ fn amdgcnWorkIntrinsic(self: *FuncGen, dimension: u32, default: u32, comptime basename: []const u8) !?*llvm.Value {
+ const llvm_u32 = self.context.intType(32);
+
+ const llvm_fn_name = switch (dimension) {
+ 0 => basename ++ ".x",
+ 1 => basename ++ ".y",
+ 2 => basename ++ ".z",
+ else => return llvm_u32.constInt(default, .False),
+ };
+
+ const args: [0]*llvm.Value = .{};
+ const llvm_fn = self.getIntrinsic(llvm_fn_name, &.{});
+ return self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &args, args.len, .Fast, .Auto, "");
+ }
+
+ fn airWorkItemId(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+ if (self.liveness.isUnused(inst)) return null;
+
+ const target = self.dg.module.getTarget();
+ assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
+
+ const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+ const dimension = pl_op.payload;
+ return self.amdgcnWorkIntrinsic(dimension, 0, "llvm.amdgcn.workitem.id");
+ }
+
+ fn airWorkGroupSize(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+ if (self.liveness.isUnused(inst)) return null;
+
+ const target = self.dg.module.getTarget();
+ assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
+
+ const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+ const dimension = pl_op.payload;
+ const llvm_u32 = self.context.intType(32);
+ if (dimension >= 3) {
+ return llvm_u32.constInt(1, .False);
+ }
+
+ // Fetch the dispatch pointer, which points to this structure:
+ // https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/adae6c61e10d371f7cbc3d0e94ae2c070cab18a4/src/inc/hsa.h#L2913
+ const llvm_fn = self.getIntrinsic("llvm.amdgcn.dispatch.ptr", &.{});
+ const args: [0]*llvm.Value = .{};
+ const dispatch_ptr = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &args, args.len, .Fast, .Auto, "");
+ dispatch_ptr.setAlignment(4);
+
+ // Load the work_group_* member from the struct as u16.
+ // Just treat the dispatch pointer as an array of u16 to keep things simple.
+ const offset = 2 + dimension;
+ const index = [_]*llvm.Value{llvm_u32.constInt(offset, .False)};
+ const llvm_u16 = self.context.intType(16);
+ const workgroup_size_ptr = self.builder.buildInBoundsGEP(llvm_u16, dispatch_ptr, &index, index.len, "");
+ const workgroup_size = self.builder.buildLoad(llvm_u16, workgroup_size_ptr, "");
+ workgroup_size.setAlignment(2);
+ return workgroup_size;
+ }
+
+ fn airWorkGroupId(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+ if (self.liveness.isUnused(inst)) return null;
+
+ const target = self.dg.module.getTarget();
+ assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
+
+ const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+ const dimension = pl_op.payload;
+ return self.amdgcnWorkIntrinsic(dimension, 0, "llvm.amdgcn.workgroup.id");
+ }
+
fn getErrorNameTable(self: *FuncGen) !*llvm.Value {
if (self.dg.object.error_name_table) |table| {
return table;