From 898e4473e8acf664d67474716bb9728ed601c5a0 Mon Sep 17 00:00:00 2001
From: Xavier Bouchoux <xavierb@gmail.com>
Date: Sun, 19 Mar 2023 12:56:37 +0000
Subject: CBE: implement aggregateInit() for array of array case.

fixes `error(compilation): clang failed with stderr: error: array type 'uint32_t[10]' (aka 'unsigned int[10]') is not assignable`
---
 src/codegen/c.zig | 40 +++++++++++++++++++++++++++++-----------
 1 file changed, 29 insertions(+), 11 deletions(-)

(limited to 'src/codegen/c.zig')

diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 519b2b45d5..0c85f0f923 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -6852,17 +6852,35 @@ fn airAggregateInit(f: *Function, inst: Air.Inst.Index) !CValue {
     switch (inst_ty.zigTypeTag()) {
         .Array, .Vector => {
             const elem_ty = inst_ty.childType();
-            for (resolved_elements, 0..) |element, i| {
-                try f.writeCValue(writer, local, .Other);
-                try writer.print("[{d}] = ", .{i});
-                try f.writeCValue(writer, element, .Other);
-                try writer.writeAll(";\n");
-            }
-            if (inst_ty.sentinel()) |sentinel| {
-                try f.writeCValue(writer, local, .Other);
-                try writer.print("[{d}] = ", .{resolved_elements.len});
-                try f.object.dg.renderValue(writer, elem_ty, sentinel, .Other);
-                try writer.writeAll(";\n");
+
+            const is_array = lowersToArray(elem_ty, target);
+            const need_memcpy = is_array;
+            if (need_memcpy) {
+                for (resolved_elements, 0..) |element, i| {
+                    try writer.writeAll("memcpy(");
+                    try f.writeCValue(writer, local, .Other);
+                    try writer.print("[{d}]", .{i});
+                    try writer.writeAll(", ");
+                    try f.writeCValue(writer, element, .Other);
+                    try writer.writeAll(", sizeof(");
+                    try f.renderType(writer, elem_ty);
+                    try writer.writeAll("))");
+                    try writer.writeAll(";\n");
+                }
+                assert(inst_ty.sentinel() == null);
+            } else {
+                for (resolved_elements, 0..) |element, i| {
+                    try f.writeCValue(writer, local, .Other);
+                    try writer.print("[{d}] = ", .{i});
+                    try f.writeCValue(writer, element, .Other);
+                    try writer.writeAll(";\n");
+                }
+                if (inst_ty.sentinel()) |sentinel| {
+                    try f.writeCValue(writer, local, .Other);
+                    try writer.print("[{d}] = ", .{resolved_elements.len});
+                    try f.object.dg.renderValue(writer, elem_ty, sentinel, .Other);
+                    try writer.writeAll(";\n");
+                }
             }
         },
         .Struct => switch (inst_ty.containerLayout()) {
-- 
cgit v1.2.3


From dd66e0addb30d795a04324096c913ca89ccbcf40 Mon Sep 17 00:00:00 2001
From: Jacob Young <jacobly0@users.noreply.github.com>
Date: Mon, 27 Mar 2023 06:55:48 -0400
Subject: Sema: fix empty slice pointer value

We just checked that inst_child_ty was effectively a zero-bit type, so
it is certainly not the non-zero alignment we are looking for.

Closes #15085
---
 src/Sema.zig            |  7 ++++++-
 src/codegen/c.zig       |  2 +-
 src/codegen/llvm.zig    |  2 +-
 test/behavior/slice.zig | 16 ++++++++++++----
 4 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'src/codegen/c.zig')

diff --git a/src/Sema.zig b/src/Sema.zig
index 13327657a8..1f375853cb 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -25152,7 +25152,7 @@ fn coerceExtra(
                             .ptr = if (dest_info.@"align" != 0)
                                 try Value.Tag.int_u64.create(sema.arena, dest_info.@"align")
                             else
-                                try inst_child_ty.lazyAbiAlignment(target, sema.arena),
+                                try dest_info.pointee_type.lazyAbiAlignment(target, sema.arena),
                             .len = Value.zero,
                         });
                         return sema.addConstant(dest_ty, slice_val);
@@ -30213,6 +30213,11 @@ fn resolveLazyValue(sema: *Sema, val: Value) CompileError!void {
                 try sema.resolveLazyValue(elem_val);
             }
         },
+        .slice => {
+            const slice = val.castTag(.slice).?.data;
+            try sema.resolveLazyValue(slice.ptr);
+            return sema.resolveLazyValue(slice.len);
+        },
         else => return,
     }
 }
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 0c85f0f923..6c4bb3c688 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -1069,7 +1069,7 @@ pub const DeclGen = struct {
                     const extern_fn = val.castTag(.extern_fn).?.data;
                     try dg.renderDeclName(writer, extern_fn.owner_decl, 0);
                 },
-                .int_u64, .one => {
+                .int_u64, .one, .int_big_positive, .lazy_align, .lazy_size => {
                     try writer.writeAll("((");
                     try dg.renderType(writer, ty);
                     return writer.print("){x})", .{try dg.fmtIntLiteral(Type.usize, val, .Other)});
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index dd13087afe..4b28fe2afe 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -3397,7 +3397,7 @@ pub const DeclGen = struct {
                     };
                     return dg.context.constStruct(&fields, fields.len, .False);
                 },
-                .int_u64, .one, .int_big_positive => {
+                .int_u64, .one, .int_big_positive, .lazy_align, .lazy_size => {
                     const llvm_usize = try dg.lowerType(Type.usize);
                     const llvm_int = llvm_usize.constInt(tv.val.toUnsignedInt(target), .False);
                     return llvm_int.constIntToPtr(try dg.lowerType(tv.ty));
diff --git a/test/behavior/slice.zig b/test/behavior/slice.zig
index 029f6838d0..a9aa9e50e1 100644
--- a/test/behavior/slice.zig
+++ b/test/behavior/slice.zig
@@ -723,10 +723,18 @@ test "slice with dereferenced value" {
 test "empty slice ptr is non null" {
     if (builtin.zig_backend == .stage2_aarch64 and builtin.os.tag == .macos) return error.SkipZigTest; // TODO
 
-    const empty_slice: []u8 = &[_]u8{};
-    const p: [*]u8 = empty_slice.ptr + 0;
-    const t = @ptrCast([*]i8, p);
-    try expect(@ptrToInt(t) == @ptrToInt(empty_slice.ptr));
+    {
+        const empty_slice: []u8 = &[_]u8{};
+        const p: [*]u8 = empty_slice.ptr + 0;
+        const t = @ptrCast([*]i8, p);
+        try expect(@ptrToInt(t) == @ptrToInt(empty_slice.ptr));
+    }
+    {
+        const empty_slice: []u8 = &.{};
+        const p: [*]u8 = empty_slice.ptr + 0;
+        const t = @ptrCast([*]i8, p);
+        try expect(@ptrToInt(t) == @ptrToInt(empty_slice.ptr));
+    }
 }
 
 test "slice decays to many pointer" {
-- 
cgit v1.2.3


From 3357c59cebacb6b60da865376b20d2b307d12ec1 Mon Sep 17 00:00:00 2001
From: Robin Voetter <robin@voetter.nl>
Date: Sat, 18 Mar 2023 15:59:56 +0100
Subject: new builtins: @workItemId, @workGroupId, @workGroupSize

* @workItemId returns the index of the work item in a work group for a
  dimension.
* @workGroupId returns the index of the work group in the kernel dispatch for a
  dimension.
* @workGroupSize returns the size of the work group for a dimension.

These builtins are mainly useful for GPU backends. They are currently only
implemented for the AMDGCN LLVM backend.
---
 doc/langref.html.in          | 22 ++++++++++++++
 src/Air.zig                  | 21 +++++++++++++
 src/AstGen.zig               | 34 +++++++++++++++++++++
 src/BuiltinFn.zig            | 23 ++++++++++++++
 src/Liveness.zig             |  6 ++++
 src/Sema.zig                 | 39 ++++++++++++++++++++++++
 src/Zir.zig                  |  9 ++++++
 src/arch/aarch64/CodeGen.zig |  4 +++
 src/arch/arm/CodeGen.zig     |  4 +++
 src/arch/riscv64/CodeGen.zig |  4 +++
 src/arch/sparc64/CodeGen.zig |  4 +++
 src/arch/wasm/CodeGen.zig    |  5 +++
 src/arch/x86_64/CodeGen.zig  |  4 +++
 src/codegen/c.zig            |  5 +++
 src/codegen/llvm.zig         | 72 ++++++++++++++++++++++++++++++++++++++++++++
 src/print_air.zig            | 10 ++++++
 src/print_zir.zig            |  3 ++
 17 files changed, 269 insertions(+)

(limited to 'src/codegen/c.zig')

diff --git a/doc/langref.html.in b/doc/langref.html.in
index 907464867e..19ee6cdab2 100644
--- a/doc/langref.html.in
+++ b/doc/langref.html.in
@@ -9578,6 +9578,28 @@ fn foo(comptime T: type, ptr: *T) T {
       Remove {#syntax#}volatile{#endsyntax#} qualifier from a pointer.
       </p>
       {#header_close#}
+
+      {#header_open|@workGroupId#}
+      <pre>{#syntax#}@workGroupId(comptime dimension: u32) u32{#endsyntax#}</pre>
+      <p>
+      Returns the index of the work group in the current kernel invocation in dimension {#syntax#}dimension{#endsyntax#}.
+      </p>
+      {#header_close#}
+
+      {#header_open|@workGroupSize#}
+      <pre>{#syntax#}@workGroupSize(comptime dimension: u32) u32{#endsyntax#}</pre>
+      <p>
+      Returns the number of work items that a work group has in dimension {#syntax#}dimension{#endsyntax#}.
+      </p>
+      {#header_close#}
+
+      {#header_open|@workItemId#}
+      <pre>{#syntax#}@workItemId(comptime dimension: u32) u32{#endsyntax#}</pre>
+      <p>
+      Returns the index of the work item in the work group in dimension {#syntax#}dimension{#endsyntax#}. This function returns values between {#syntax#}0{#endsyntax#} (inclusive) and {#syntax#}@workGroupSize(dimension){#endsyntax#} (exclusive).
+      </p>
+      {#header_close#}
+
       {#header_close#}
 
       {#header_open|Build Mode#}
diff --git a/src/Air.zig b/src/Air.zig
index 4646dcc89e..c63e9826f9 100644
--- a/src/Air.zig
+++ b/src/Air.zig
@@ -761,6 +761,22 @@ pub const Inst = struct {
         /// Uses the `ty` field.
         c_va_start,
 
+        /// Implements @workItemId builtin.
+        /// Result type is always `u32`
+        /// Uses the `pl_op` field, payload is the dimension to get the work item id for.
+        /// Operand is unused and set to Ref.none
+        work_item_id,
+        /// Implements @workGroupSize builtin.
+        /// Result type is always `u32`
+        /// Uses the `pl_op` field, payload is the dimension to get the work group size for.
+        /// Operand is unused and set to Ref.none
+        work_group_size,
+        /// Implements @workGroupId builtin.
+        /// Result type is always `u32`
+        /// Uses the `pl_op` field, payload is the dimension to get the work group id for.
+        /// Operand is unused and set to Ref.none
+        work_group_id,
+
         pub fn fromCmpOp(op: std.math.CompareOperator, optimized: bool) Tag {
             switch (op) {
                 .lt => return if (optimized) .cmp_lt_optimized else .cmp_lt,
@@ -1267,6 +1283,11 @@ pub fn typeOfIndex(air: Air, inst: Air.Inst.Index) Type {
             const err_union_ty = air.typeOf(datas[inst].pl_op.operand);
             return err_union_ty.errorUnionPayload();
         },
+
+        .work_item_id,
+        .work_group_size,
+        .work_group_id,
+        => return Type.u32,
     }
 }
 
diff --git a/src/AstGen.zig b/src/AstGen.zig
index c91303cdb1..4f786edce8 100644
--- a/src/AstGen.zig
+++ b/src/AstGen.zig
@@ -8549,6 +8549,40 @@ fn builtinCall(
             }
             return rvalue(gz, ri, try gz.addNodeExtended(.c_va_start, node), node);
         },
+
+        .work_item_id => {
+            if (astgen.fn_block == null) {
+                return astgen.failNode(node, "'@workItemId' outside function scope", .{});
+            }
+            const operand = try comptimeExpr(gz, scope, .{ .rl = .{ .coerced_ty = .u32_type } }, params[0]);
+            const result = try gz.addExtendedPayload(.work_item_id, Zir.Inst.UnNode{
+                .node = gz.nodeIndexToRelative(node),
+                .operand = operand,
+            });
+            return rvalue(gz, ri, result, node);
+        },
+        .work_group_size => {
+            if (astgen.fn_block == null) {
+                return astgen.failNode(node, "'@workGroupSize' outside function scope", .{});
+            }
+            const operand = try comptimeExpr(gz, scope, .{ .rl = .{ .coerced_ty = .u32_type } }, params[0]);
+            const result = try gz.addExtendedPayload(.work_group_size, Zir.Inst.UnNode{
+                .node = gz.nodeIndexToRelative(node),
+                .operand = operand,
+            });
+            return rvalue(gz, ri, result, node);
+        },
+        .work_group_id => {
+            if (astgen.fn_block == null) {
+                return astgen.failNode(node, "'@workGroupId' outside function scope", .{});
+            }
+            const operand = try comptimeExpr(gz, scope, .{ .rl = .{ .coerced_ty = .u32_type } }, params[0]);
+            const result = try gz.addExtendedPayload(.work_group_id, Zir.Inst.UnNode{
+                .node = gz.nodeIndexToRelative(node),
+                .operand = operand,
+            });
+            return rvalue(gz, ri, result, node);
+        },
     }
 }
 
diff --git a/src/BuiltinFn.zig b/src/BuiltinFn.zig
index 79c6617483..4a98a5a615 100644
--- a/src/BuiltinFn.zig
+++ b/src/BuiltinFn.zig
@@ -118,6 +118,9 @@ pub const Tag = enum {
     union_init,
     Vector,
     volatile_cast,
+    work_item_id,
+    work_group_size,
+    work_group_id,
 };
 
 pub const MemLocRequirement = enum {
@@ -980,5 +983,25 @@ pub const list = list: {
                 .param_count = 1,
             },
         },
+        .{
+            "@workItemId", .{
+                .tag = .work_item_id,
+                .param_count = 1,
+            },
+        },
+        .{
+            "@workGroupSize",
+            .{
+                .tag = .work_group_size,
+                .param_count = 1,
+            },
+        },
+        .{
+            "@workGroupId",
+            .{
+                .tag = .work_group_id,
+                .param_count = 1,
+            },
+        },
     });
 };
diff --git a/src/Liveness.zig b/src/Liveness.zig
index 8dc81aa165..1d57b80097 100644
--- a/src/Liveness.zig
+++ b/src/Liveness.zig
@@ -240,6 +240,9 @@ pub fn categorizeOperand(
         .err_return_trace,
         .save_err_return_trace_index,
         .c_va_start,
+        .work_item_id,
+        .work_group_size,
+        .work_group_id,
         => return .none,
 
         .fence => return .write,
@@ -864,6 +867,9 @@ fn analyzeInst(
         .err_return_trace,
         .save_err_return_trace_index,
         .c_va_start,
+        .work_item_id,
+        .work_group_size,
+        .work_group_id,
         => return trackOperands(a, new_set, inst, main_tomb, .{ .none, .none, .none }),
 
         .not,
diff --git a/src/Sema.zig b/src/Sema.zig
index 1f375853cb..da93a2906a 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -1164,6 +1164,9 @@ fn analyzeBodyInner(
                     .c_va_start            => try sema.zirCVaStart(          block, extended),
                     .const_cast,           => try sema.zirConstCast(         block, extended),
                     .volatile_cast,        => try sema.zirVolatileCast(      block, extended),
+                    .work_item_id          => try sema.zirWorkItem(          block, extended, extended.opcode),
+                    .work_group_size       => try sema.zirWorkItem(          block, extended, extended.opcode),
+                    .work_group_id         => try sema.zirWorkItem(          block, extended, extended.opcode),
                     // zig fmt: on
 
                     .fence => {
@@ -22437,6 +22440,42 @@ fn zirBuiltinExtern(
     return sema.addConstant(ty, ref);
 }
 
+fn zirWorkItem(
+    sema: *Sema,
+    block: *Block,
+    extended: Zir.Inst.Extended.InstData,
+    zir_tag: Zir.Inst.Extended,
+) CompileError!Air.Inst.Ref {
+    const extra = sema.code.extraData(Zir.Inst.UnNode, extended.operand).data;
+    const dimension_src: LazySrcLoc = .{ .node_offset_builtin_call_arg0 = extra.node };
+    const builtin_src = LazySrcLoc.nodeOffset(extra.node);
+    const target = sema.mod.getTarget();
+
+    switch (target.cpu.arch) {
+        // TODO: Allow for other GPU targets.
+        .amdgcn => {},
+        else => {
+            return sema.fail(block, builtin_src, "builtin only available on GPU targets; targeted architecture is {s}", .{@tagName(target.cpu.arch)});
+        },
+    }
+
+    const dimension = @intCast(u32, try sema.resolveInt(block, dimension_src, extra.operand, Type.u32, "dimension must be comptime-known"));
+    try sema.requireRuntimeBlock(block, builtin_src, null);
+
+    return block.addInst(.{
+        .tag = switch (zir_tag) {
+            .work_item_id => .work_item_id,
+            .work_group_size => .work_group_size,
+            .work_group_id => .work_group_id,
+            else => unreachable,
+        },
+        .data = .{ .pl_op = .{
+            .operand = .none,
+            .payload = dimension,
+        } },
+    });
+}
+
 fn requireRuntimeBlock(sema: *Sema, block: *Block, src: LazySrcLoc, runtime_src: ?LazySrcLoc) !void {
     if (block.is_comptime) {
         const msg = msg: {
diff --git a/src/Zir.zig b/src/Zir.zig
index bc5202c8aa..7a8df49fda 100644
--- a/src/Zir.zig
+++ b/src/Zir.zig
@@ -2032,6 +2032,15 @@ pub const Inst = struct {
         /// Implements the `@volatileCast` builtin.
         /// `operand` is payload index to `UnNode`.
         volatile_cast,
+        /// Implements the `@workItemId` builtin.
+        /// `operand` is payload index to `UnNode`.
+        work_item_id,
+        /// Implements the `@workGroupSize` builtin.
+        /// `operand` is payload index to `UnNode`.
+        work_group_size,
+        /// Implements the `@workGroupId` builtin.
+        /// `operand` is payload index to `UnNode`.
+        work_group_id,
 
         pub const InstData = struct {
             opcode: Extended,
diff --git a/src/arch/aarch64/CodeGen.zig b/src/arch/aarch64/CodeGen.zig
index ee23696950..1e07e7e719 100644
--- a/src/arch/aarch64/CodeGen.zig
+++ b/src/arch/aarch64/CodeGen.zig
@@ -890,6 +890,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 
             .wasm_memory_size => unreachable,
             .wasm_memory_grow => unreachable,
+
+            .work_item_id => unreachable,
+            .work_group_size => unreachable,
+            .work_group_id => unreachable,
             // zig fmt: on
         }
 
diff --git a/src/arch/arm/CodeGen.zig b/src/arch/arm/CodeGen.zig
index 8de5ae006a..1cf0e2981e 100644
--- a/src/arch/arm/CodeGen.zig
+++ b/src/arch/arm/CodeGen.zig
@@ -874,6 +874,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 
             .wasm_memory_size => unreachable,
             .wasm_memory_grow => unreachable,
+
+            .work_item_id => unreachable,
+            .work_group_size => unreachable,
+            .work_group_id => unreachable,
             // zig fmt: on
         }
 
diff --git a/src/arch/riscv64/CodeGen.zig b/src/arch/riscv64/CodeGen.zig
index 68df794bf7..11dbb2cb08 100644
--- a/src/arch/riscv64/CodeGen.zig
+++ b/src/arch/riscv64/CodeGen.zig
@@ -704,6 +704,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 
             .wasm_memory_size => unreachable,
             .wasm_memory_grow => unreachable,
+
+            .work_item_id => unreachable,
+            .work_group_size => unreachable,
+            .work_group_id => unreachable,
             // zig fmt: on
         }
         if (std.debug.runtime_safety) {
diff --git a/src/arch/sparc64/CodeGen.zig b/src/arch/sparc64/CodeGen.zig
index 30df999267..a1b1be3a76 100644
--- a/src/arch/sparc64/CodeGen.zig
+++ b/src/arch/sparc64/CodeGen.zig
@@ -720,6 +720,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 
             .wasm_memory_size => unreachable,
             .wasm_memory_grow => unreachable,
+
+            .work_item_id => unreachable,
+            .work_group_size => unreachable,
+            .work_group_id => unreachable,
             // zig fmt: on
         }
 
diff --git a/src/arch/wasm/CodeGen.zig b/src/arch/wasm/CodeGen.zig
index 199ddada65..fc5d13e5a4 100644
--- a/src/arch/wasm/CodeGen.zig
+++ b/src/arch/wasm/CodeGen.zig
@@ -1997,6 +1997,11 @@ fn genInst(func: *CodeGen, inst: Air.Inst.Index) InnerError!void {
         .reduce_optimized,
         .float_to_int_optimized,
         => return func.fail("TODO implement optimized float mode", .{}),
+
+        .work_item_id,
+        .work_group_size,
+        .work_group_id,
+        => unreachable,
     };
 }
 
diff --git a/src/arch/x86_64/CodeGen.zig b/src/arch/x86_64/CodeGen.zig
index 5ddc9c77ca..604052ee7e 100644
--- a/src/arch/x86_64/CodeGen.zig
+++ b/src/arch/x86_64/CodeGen.zig
@@ -1132,6 +1132,10 @@ fn genBody(self: *Self, body: []const Air.Inst.Index) InnerError!void {
 
             .wasm_memory_size => unreachable,
             .wasm_memory_grow => unreachable,
+
+            .work_item_id => unreachable,
+            .work_group_size => unreachable,
+            .work_group_id => unreachable,
             // zig fmt: on
         }
 
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 6c4bb3c688..704a1e31c5 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -2995,6 +2995,11 @@ fn genBodyInner(f: *Function, body: []const Air.Inst.Index) error{ AnalysisFail,
             .c_va_arg => try airCVaArg(f, inst),
             .c_va_end => try airCVaEnd(f, inst),
             .c_va_copy => try airCVaCopy(f, inst),
+
+            .work_item_id,
+            .work_group_size,
+            .work_group_id,
+            => unreachable,
             // zig fmt: on
         };
         if (result_value == .new_local) {
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index eea68a80e9..ce49fcde78 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -4745,6 +4745,10 @@ pub const FuncGen = struct {
                 .c_va_copy => try self.airCVaCopy(inst),
                 .c_va_end => try self.airCVaEnd(inst),
                 .c_va_start => try self.airCVaStart(inst),
+
+                .work_item_id => try self.airWorkItemId(inst),
+                .work_group_size => try self.airWorkGroupSize(inst),
+                .work_group_id => try self.airWorkGroupId(inst),
                 // zig fmt: on
             };
             if (opt_value) |val| {
@@ -9567,6 +9571,74 @@ pub const FuncGen = struct {
         return self.builder.buildAddrSpaceCast(operand, llvm_dest_ty, "");
     }
 
+    fn amdgcnWorkIntrinsic(self: *FuncGen, dimension: u32, default: u32, comptime basename: []const u8) !?*llvm.Value {
+        const llvm_u32 = self.context.intType(32);
+
+        const llvm_fn_name = switch (dimension) {
+            0 => basename ++ ".x",
+            1 => basename ++ ".y",
+            2 => basename ++ ".z",
+            else => return llvm_u32.constInt(default, .False),
+        };
+
+        const args: [0]*llvm.Value = .{};
+        const llvm_fn = self.getIntrinsic(llvm_fn_name, &.{});
+        return self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &args, args.len, .Fast, .Auto, "");
+    }
+
+    fn airWorkItemId(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const target = self.dg.module.getTarget();
+        assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
+
+        const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+        const dimension = pl_op.payload;
+        return self.amdgcnWorkIntrinsic(dimension, 0, "llvm.amdgcn.workitem.id");
+    }
+
+    fn airWorkGroupSize(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const target = self.dg.module.getTarget();
+        assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
+
+        const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+        const dimension = pl_op.payload;
+        const llvm_u32 = self.context.intType(32);
+        if (dimension >= 3) {
+            return llvm_u32.constInt(1, .False);
+        }
+
+        // Fetch the dispatch pointer, which points to this structure:
+        // https://github.com/RadeonOpenCompute/ROCR-Runtime/blob/adae6c61e10d371f7cbc3d0e94ae2c070cab18a4/src/inc/hsa.h#L2913
+        const llvm_fn = self.getIntrinsic("llvm.amdgcn.dispatch.ptr", &.{});
+        const args: [0]*llvm.Value = .{};
+        const dispatch_ptr = self.builder.buildCall(llvm_fn.globalGetValueType(), llvm_fn, &args, args.len, .Fast, .Auto, "");
+        dispatch_ptr.setAlignment(4);
+
+        // Load the work_group_* member from the struct as u16.
+        // Just treat the dispatch pointer as an array of u16 to keep things simple.
+        const offset = 2 + dimension;
+        const index = [_]*llvm.Value{llvm_u32.constInt(offset, .False)};
+        const llvm_u16 = self.context.intType(16);
+        const workgroup_size_ptr = self.builder.buildInBoundsGEP(llvm_u16, dispatch_ptr, &index, index.len, "");
+        const workgroup_size = self.builder.buildLoad(llvm_u16, workgroup_size_ptr, "");
+        workgroup_size.setAlignment(2);
+        return workgroup_size;
+    }
+
+    fn airWorkGroupId(self: *FuncGen, inst: Air.Inst.Index) !?*llvm.Value {
+        if (self.liveness.isUnused(inst)) return null;
+
+        const target = self.dg.module.getTarget();
+        assert(target.cpu.arch == .amdgcn); // TODO is to port this function to other GPU architectures
+
+        const pl_op = self.air.instructions.items(.data)[inst].pl_op;
+        const dimension = pl_op.payload;
+        return self.amdgcnWorkIntrinsic(dimension, 0, "llvm.amdgcn.workgroup.id");
+    }
+
     fn getErrorNameTable(self: *FuncGen) !*llvm.Value {
         if (self.dg.object.error_name_table) |table| {
             return table;
diff --git a/src/print_air.zig b/src/print_air.zig
index 8d29a272ca..803a0f2886 100644
--- a/src/print_air.zig
+++ b/src/print_air.zig
@@ -328,6 +328,11 @@ const Writer = struct {
             .vector_store_elem => try w.writeVectorStoreElem(s, inst),
 
             .dbg_block_begin, .dbg_block_end => {},
+
+            .work_item_id,
+            .work_group_size,
+            .work_group_id,
+            => try w.writeWorkDimension(s, inst),
         }
         try s.writeAll(")\n");
     }
@@ -869,6 +874,11 @@ const Writer = struct {
         try w.writeOperand(s, inst, 0, pl_op.operand);
     }
 
+    fn writeWorkDimension(w: *Writer, s: anytype, inst: Air.Inst.Index) @TypeOf(s).Error!void {
+        const pl_op = w.air.instructions.items(.data)[inst].pl_op;
+        try s.print("{d}", .{pl_op.payload});
+    }
+
     fn writeOperand(
         w: *Writer,
         s: anytype,
diff --git a/src/print_zir.zig b/src/print_zir.zig
index 755107cd1a..b70a4bbf67 100644
--- a/src/print_zir.zig
+++ b/src/print_zir.zig
@@ -512,6 +512,9 @@ const Writer = struct {
             .c_va_end,
             .const_cast,
             .volatile_cast,
+            .work_item_id,
+            .work_group_size,
+            .work_group_id,
             => {
                 const inst_data = self.code.extraData(Zir.Inst.UnNode, extended.operand).data;
                 const src = LazySrcLoc.nodeOffset(inst_data.node);
-- 
cgit v1.2.3


From f4b411314ccf8e852d3febddc8b31ce1f533938b Mon Sep 17 00:00:00 2001
From: Jacob Young <jacobly0@users.noreply.github.com>
Date: Fri, 31 Mar 2023 16:23:01 -0400
Subject: Sema: defer stores to inferred allocs

This lets us generate the store with knowledge of the type to be stored.
Therefore, we can avoid generating garbage Air with stores through
pointers to comptime-only types which backends cannot lower.

Closes #13410
Closes #15122
---
 src/Sema.zig            | 67 +++++++++++++++++++++++++++----------------------
 src/codegen/c.zig       |  8 +-----
 src/codegen/llvm.zig    |  1 -
 test/behavior/if.zig    |  6 -----
 test/behavior/union.zig | 16 ++++++++++++
 5 files changed, 54 insertions(+), 44 deletions(-)

(limited to 'src/codegen/c.zig')

diff --git a/src/Sema.zig b/src/Sema.zig
index da93a2906a..972efcff72 100644
--- a/src/Sema.zig
+++ b/src/Sema.zig
@@ -3866,8 +3866,8 @@ fn zirResolveInferredAlloc(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Com
             const dummy_ptr = try trash_block.addTy(.alloc, mut_final_ptr_ty);
             const empty_trash_count = trash_block.instructions.items.len;
 
-            for (placeholders, 0..) |bitcast_inst, i| {
-                const sub_ptr_ty = sema.typeOf(Air.indexToRef(bitcast_inst));
+            for (peer_inst_list, placeholders) |peer_inst, placeholder_inst| {
+                const sub_ptr_ty = sema.typeOf(Air.indexToRef(placeholder_inst));
 
                 if (mut_final_ptr_ty.eql(sub_ptr_ty, sema.mod)) {
                     // New result location type is the same as the old one; nothing
@@ -3875,39 +3875,54 @@ fn zirResolveInferredAlloc(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Com
                     continue;
                 }
 
-                var bitcast_block = block.makeSubBlock();
-                defer bitcast_block.instructions.deinit(gpa);
+                var replacement_block = block.makeSubBlock();
+                defer replacement_block.instructions.deinit(gpa);
 
-                trash_block.instructions.shrinkRetainingCapacity(empty_trash_count);
-                const sub_ptr = try sema.coerceResultPtr(&bitcast_block, src, ptr, dummy_ptr, peer_inst_list[i], &trash_block);
+                const result = switch (sema.air_instructions.items(.tag)[placeholder_inst]) {
+                    .bitcast => result: {
+                        trash_block.instructions.shrinkRetainingCapacity(empty_trash_count);
+                        const sub_ptr = try sema.coerceResultPtr(&replacement_block, src, ptr, dummy_ptr, peer_inst, &trash_block);
+
+                        assert(replacement_block.instructions.items.len > 0);
+                        break :result sub_ptr;
+                    },
+                    .store => result: {
+                        const bin_op = sema.air_instructions.items(.data)[placeholder_inst].bin_op;
+                        try sema.storePtr2(&replacement_block, src, bin_op.lhs, src, bin_op.rhs, src, .bitcast);
+                        break :result .void_value;
+                    },
+                    else => unreachable,
+                };
 
-                assert(bitcast_block.instructions.items.len > 0);
                 // If only one instruction is produced then we can replace the bitcast
                 // placeholder instruction with this instruction; no need for an entire block.
-                if (bitcast_block.instructions.items.len == 1) {
-                    const only_inst = bitcast_block.instructions.items[0];
-                    sema.air_instructions.set(bitcast_inst, sema.air_instructions.get(only_inst));
+                if (replacement_block.instructions.items.len == 1) {
+                    const only_inst = replacement_block.instructions.items[0];
+                    sema.air_instructions.set(placeholder_inst, sema.air_instructions.get(only_inst));
                     continue;
                 }
 
                 // Here we replace the placeholder bitcast instruction with a block
                 // that does the coerce_result_ptr logic.
-                _ = try bitcast_block.addBr(bitcast_inst, sub_ptr);
-                const ty_inst = sema.air_instructions.items(.data)[bitcast_inst].ty_op.ty;
+                _ = try replacement_block.addBr(placeholder_inst, result);
+                const ty_inst = if (result == .void_value)
+                    .void_type
+                else
+                    sema.air_instructions.items(.data)[placeholder_inst].ty_op.ty;
                 try sema.air_extra.ensureUnusedCapacity(
                     gpa,
-                    @typeInfo(Air.Block).Struct.fields.len + bitcast_block.instructions.items.len,
+                    @typeInfo(Air.Block).Struct.fields.len + replacement_block.instructions.items.len,
                 );
-                sema.air_instructions.set(bitcast_inst, .{
+                sema.air_instructions.set(placeholder_inst, .{
                     .tag = .block,
                     .data = .{ .ty_pl = .{
                         .ty = ty_inst,
                         .payload = sema.addExtraAssumeCapacity(Air.Block{
-                            .body_len = @intCast(u32, bitcast_block.instructions.items.len),
+                            .body_len = @intCast(u32, replacement_block.instructions.items.len),
                         }),
                     } },
                 });
-                sema.air_extra.appendSliceAssumeCapacity(bitcast_block.instructions.items);
+                sema.air_extra.appendSliceAssumeCapacity(replacement_block.instructions.items);
             }
         },
         else => unreachable,
@@ -4916,7 +4931,7 @@ fn zirStoreToBlockPtr(sema: *Sema, block: *Block, inst: Zir.Inst.Index) CompileE
             },
             .inferred_alloc => {
                 const inferred_alloc = ptr_val.castTag(.inferred_alloc).?;
-                return sema.storeToInferredAlloc(block, src, ptr, operand, inferred_alloc);
+                return sema.storeToInferredAlloc(block, ptr, operand, inferred_alloc);
             },
             else => break :blk,
         }
@@ -4945,7 +4960,7 @@ fn zirStoreToInferredPtr(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Compi
         },
         .inferred_alloc => {
             const inferred_alloc = ptr_val.castTag(.inferred_alloc).?;
-            return sema.storeToInferredAlloc(block, src, ptr, operand, inferred_alloc);
+            return sema.storeToInferredAlloc(block, ptr, operand, inferred_alloc);
         },
         else => unreachable,
     }
@@ -4954,27 +4969,19 @@ fn zirStoreToInferredPtr(sema: *Sema, block: *Block, inst: Zir.Inst.Index) Compi
 fn storeToInferredAlloc(
     sema: *Sema,
     block: *Block,
-    src: LazySrcLoc,
     ptr: Air.Inst.Ref,
     operand: Air.Inst.Ref,
     inferred_alloc: *Value.Payload.InferredAlloc,
 ) CompileError!void {
-    const operand_ty = sema.typeOf(operand);
-    // Create a runtime bitcast instruction with exactly the type the pointer wants.
-    const target = sema.mod.getTarget();
-    const ptr_ty = try Type.ptr(sema.arena, sema.mod, .{
-        .pointee_type = operand_ty,
-        .@"align" = inferred_alloc.data.alignment,
-        .@"addrspace" = target_util.defaultAddressSpace(target, .local),
-    });
-    const bitcasted_ptr = try block.addBitCast(ptr_ty, ptr);
+    // Create a store instruction as a placeholder.  This will be replaced by a
+    // proper store sequence once we know the stored type.
+    const dummy_store = try block.addBinOp(.store, ptr, operand);
     // Add the stored instruction to the set we will use to resolve peer types
     // for the inferred allocation.
     try inferred_alloc.data.prongs.append(sema.arena, .{
         .stored_inst = operand,
-        .placeholder = Air.refToIndex(bitcasted_ptr).?,
+        .placeholder = Air.refToIndex(dummy_store).?,
     });
-    return sema.storePtr2(block, src, bitcasted_ptr, src, operand, src, .bitcast);
 }
 
 fn storeToInferredAllocComptime(
diff --git a/src/codegen/c.zig b/src/codegen/c.zig
index 704a1e31c5..a3758bac69 100644
--- a/src/codegen/c.zig
+++ b/src/codegen/c.zig
@@ -3597,10 +3597,6 @@ fn airStore(f: *Function, inst: Air.Inst.Index) !CValue {
     const ptr_ty = f.air.typeOf(bin_op.lhs);
     const ptr_scalar_ty = ptr_ty.scalarType();
     const ptr_info = ptr_scalar_ty.ptrInfo().data;
-    if (!ptr_info.pointee_type.hasRuntimeBitsIgnoreComptime()) {
-        try reap(f, inst, &.{ bin_op.lhs, bin_op.rhs });
-        return .none;
-    }
 
     const ptr_val = try f.resolveInst(bin_op.lhs);
     const src_ty = f.air.typeOf(bin_op.rhs);
@@ -4461,9 +4457,7 @@ fn airBr(f: *Function, inst: Air.Inst.Index) !CValue {
 fn airBitcast(f: *Function, inst: Air.Inst.Index) !CValue {
     const ty_op = f.air.instructions.items(.data)[inst].ty_op;
     const dest_ty = f.air.typeOfIndex(inst);
-    // No IgnoreComptime until Sema stops giving us garbage Air.
-    // https://github.com/ziglang/zig/issues/13410
-    if (f.liveness.isUnused(inst) or !dest_ty.hasRuntimeBits()) {
+    if (f.liveness.isUnused(inst)) {
         try reap(f, inst, &.{ty_op.operand});
         return .none;
     }
diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig
index ce49fcde78..f32047fe64 100644
--- a/src/codegen/llvm.zig
+++ b/src/codegen/llvm.zig
@@ -8216,7 +8216,6 @@ pub const FuncGen = struct {
         const dest_ptr = try self.resolveInst(bin_op.lhs);
         const ptr_ty = self.air.typeOf(bin_op.lhs);
         const operand_ty = ptr_ty.childType();
-        if (!operand_ty.isFnOrHasRuntimeBitsIgnoreComptime()) return null;
 
         // TODO Sema should emit a different instruction when the store should
         // possibly do the safety 0xaa bytes for undefined.
diff --git a/test/behavior/if.zig b/test/behavior/if.zig
index 730c0713c6..2294a2bcfd 100644
--- a/test/behavior/if.zig
+++ b/test/behavior/if.zig
@@ -140,12 +140,6 @@ test "if-else expression with runtime condition result location is inferred opti
 }
 
 test "result location with inferred type ends up being pointer to comptime_int" {
-    if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest;
-    if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
-
     var a: ?u32 = 1234;
     var b: u32 = 2000;
     var c = if (a) |d| blk: {
diff --git a/test/behavior/union.zig b/test/behavior/union.zig
index 20ad0a60ff..e8a9f4c831 100644
--- a/test/behavior/union.zig
+++ b/test/behavior/union.zig
@@ -1540,3 +1540,19 @@ test "access the tag of a global tagged union" {
     };
     try expect(U.u == .a);
 }
+
+test "coerce enum literal to union in result loc" {
+    if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
+
+    const U = union(enum) {
+        a,
+        b: u8,
+
+        fn doTest(c: bool) !void {
+            var u = if (c) .a else @This(){ .b = 0 };
+            try expect(u == .a);
+        }
+    };
+    try U.doTest(true);
+    comptime try U.doTest(true);
+}
-- 
cgit v1.2.3