1 files changed, 255 insertions, 8 deletions
diff --git a/lib/std/target/amdgpu.zig b/lib/std/target/amdgpu.zig
index f8002071be..2fb8a6fa80 100644
--- a/lib/std/target/amdgpu.zig
+++ b/lib/std/target/amdgpu.zig
@@ -9,6 +9,7 @@ pub const Feature = enum {
     a16,
     add_no_carry_insts,
     aperture_regs,
+    architected_flat_scratch,
     atomic_fadd_insts,
     auto_waitcnt_before_barrier,
     ci_insts,
@@ -20,11 +21,14 @@ pub const Feature = enum {
     dot4_insts,
     dot5_insts,
     dot6_insts,
+    dot7_insts,
     dpp,
     dpp8,
+    dpp_64bit,
     ds_src2_insts,
     enable_ds128,
     enable_prt_strict_null,
+    extended_image_insts,
     fast_denormal_f32,
     fast_fmaf,
     flat_address_space,
@@ -36,16 +40,19 @@ pub const Feature = enum {
     fma_mix_insts,
     fmaf,
     fp64,
+    full_rate_64_ops,
     g16,
     gcn3_encoding,
     get_wave_id_inst,
     gfx10,
     gfx10_3_insts,
+    gfx10_a_encoding,
     gfx10_b_encoding,
     gfx10_insts,
     gfx7_gfx8_gfx9_insts,
     gfx8_insts,
     gfx9,
+    gfx90a_insts,
     gfx9_insts,
     half_rate_64_ops,
     image_gather4_d16_bug,
@@ -70,11 +77,18 @@ pub const Feature = enum {
     mfma_inline_literal_bug,
     mimg_r128,
     movrel,
+    negative_scratch_offset_bug,
+    negative_unaligned_scratch_offset_bug,
     no_data_dep_hazard,
     no_sdst_cmpx,
+    nsa_clause_bug,
     nsa_encoding,
+    nsa_max_size_13,
+    nsa_max_size_5,
     nsa_to_vmem_bug,
     offset_3f_bug,
+    packed_fp32_ops,
+    packed_tid,
     pk_fmac_f16_inst,
     promote_alloca,
     r128_a16,
@@ -92,11 +106,13 @@ pub const Feature = enum {
     sdwa_sdst,
     sea_islands,
     sgpr_init_bug,
+    shader_cycles_register,
     si_scheduler,
     smem_to_vector_write_hazard,
     southern_islands,
     sramecc,
     sramecc_support,
+    tgsplit,
     trap_handler,
     trig_reduced_range,
     unaligned_access_mode,
@@ -149,6 +165,11 @@ pub const all_features = blk: {
         .description = "Has Memory Aperture Base and Size Registers",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.architected_flat_scratch)] = .{
+        .llvm_name = "architected-flat-scratch",
+        .description = "Flat Scratch register is a readonly SPI initialized architected register",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.atomic_fadd_insts)] = .{
         .llvm_name = "atomic-fadd-insts",
         .description = "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, global_atomic_pk_add_f16 instructions",
@@ -183,7 +204,7 @@ pub const all_features = blk: {
     };
     result[@enumToInt(Feature.dot2_insts)] = .{
         .llvm_name = "dot2-insts",
-        .description = "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions",
+        .description = "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
     result[@enumToInt(Feature.dot3_insts)] = .{
@@ -206,6 +227,11 @@ pub const all_features = blk: {
         .description = "Has v_dot4c_i32_i8 instruction",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.dot7_insts)] = .{
+        .llvm_name = "dot7-insts",
+        .description = "Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.dpp)] = .{
         .llvm_name = "dpp",
         .description = "Support DPP (Data Parallel Primitives) extension",
@@ -216,6 +242,11 @@ pub const all_features = blk: {
         .description = "Support DPP8 (Data Parallel Primitives) extension",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.dpp_64bit)] = .{
+        .llvm_name = "dpp-64bit",
+        .description = "Support DPP (Data Parallel Primitives) extension",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.ds_src2_insts)] = .{
         .llvm_name = "ds-src2-insts",
         .description = "Has ds_*_src2 instructions",
@@ -231,6 +262,11 @@ pub const all_features = blk: {
         .description = "Enable zeroing of result registers for sparse texture fetches",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.extended_image_insts)] = .{
+        .llvm_name = "extended-image-insts",
+        .description = "Support mips != 0, lod != 0, gather4, and get_lod",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.fast_denormal_f32)] = .{
         .llvm_name = "fast-denormal-f32",
         .description = "Enabling denormals does not cause f32 instructions to run at f64 rates",
@@ -268,7 +304,7 @@ pub const all_features = blk: {
     };
     result[@enumToInt(Feature.flat_segment_offset_bug)] = .{
         .llvm_name = "flat-segment-offset-bug",
-        .description = "GFX10 bug, inst_offset ignored in flat segment",
+        .description = "GFX10 bug where inst_offset is ignored when flat instructions access global memory",
         .dependencies = featureSet(&[_]Feature{}),
     };
     result[@enumToInt(Feature.fma_mix_insts)] = .{
@@ -286,6 +322,11 @@ pub const all_features = blk: {
         .description = "Enable double precision operations",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.full_rate_64_ops)] = .{
+        .llvm_name = "full-rate-64-ops",
+        .description = "Most fp64 instructions are full rate",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.g16)] = .{
         .llvm_name = "g16",
         .description = "Support G16 for 16-bit gradient image operands",
@@ -312,6 +353,7 @@ pub const all_features = blk: {
             .ci_insts,
             .dpp,
             .dpp8,
+            .extended_image_insts,
             .fast_denormal_f32,
             .fast_fmaf,
             .flat_address_space,
@@ -334,6 +376,7 @@ pub const all_features = blk: {
             .pk_fmac_f16_inst,
             .register_banking,
             .s_memrealtime,
+            .s_memtime_inst,
             .sdwa,
             .sdwa_omod,
             .sdwa_scalar,
@@ -350,6 +393,11 @@ pub const all_features = blk: {
         .description = "Additional instructions for GFX10.3",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.gfx10_a_encoding)] = .{
+        .llvm_name = "gfx10_a-encoding",
+        .description = "Has BVH ray tracing instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.gfx10_b_encoding)] = .{
         .llvm_name = "gfx10_b-encoding",
         .description = "Encoding format GFX10_B",
@@ -379,7 +427,6 @@ pub const all_features = blk: {
             .aperture_regs,
             .ci_insts,
             .dpp,
-            .ds_src2_insts,
             .fast_denormal_f32,
             .fast_fmaf,
             .flat_address_space,
@@ -394,7 +441,7 @@ pub const all_features = blk: {
             .int_clamp_insts,
             .inv_2pi_inline_imm,
             .localmemorysize65536,
-            .mad_mac_f32_insts,
+            .negative_scratch_offset_bug,
             .r128_a16,
             .s_memrealtime,
             .s_memtime_inst,
@@ -413,6 +460,11 @@ pub const all_features = blk: {
             .xnack_support,
         }),
     };
+    result[@enumToInt(Feature.gfx90a_insts)] = .{
+        .llvm_name = "gfx90a-insts",
+        .description = "Additional instructions for GFX90A+",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.gfx9_insts)] = .{
         .llvm_name = "gfx9-insts",
         .description = "Additional instructions for GFX9+",
@@ -533,6 +585,16 @@ pub const all_features = blk: {
         .description = "Has v_movrel*_b32 instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.negative_scratch_offset_bug)] = .{
+        .llvm_name = "negative-scratch-offset-bug",
+        .description = "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@enumToInt(Feature.negative_unaligned_scratch_offset_bug)] = .{
+        .llvm_name = "negative-unaligned-scratch-offset-bug",
+        .description = "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.no_data_dep_hazard)] = .{
         .llvm_name = "no-data-dep-hazard",
         .description = "Does not need SW waitstates",
@@ -543,11 +605,26 @@ pub const all_features = blk: {
         .description = "V_CMPX does not write VCC/SGPR in addition to EXEC",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.nsa_clause_bug)] = .{
+        .llvm_name = "nsa-clause-bug",
+        .description = "MIMG-NSA in a hard clause has unpredictable results on GFX10.1",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.nsa_encoding)] = .{
         .llvm_name = "nsa-encoding",
         .description = "Support NSA encoding for image instructions",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.nsa_max_size_13)] = .{
+        .llvm_name = "nsa-max-size-13",
+        .description = "The maximum non-sequential address size in VGPRs.",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@enumToInt(Feature.nsa_max_size_5)] = .{
+        .llvm_name = "nsa-max-size-5",
+        .description = "The maximum non-sequential address size in VGPRs.",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.nsa_to_vmem_bug)] = .{
         .llvm_name = "nsa-to-vmem-bug",
         .description = "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero",
@@ -558,6 +635,16 @@ pub const all_features = blk: {
         .description = "Branch offset of 3f hardware bug",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.packed_fp32_ops)] = .{
+        .llvm_name = "packed-fp32-ops",
+        .description = "Support packed fp32 instructions",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
+    result[@enumToInt(Feature.packed_tid)] = .{
+        .llvm_name = "packed-tid",
+        .description = "Workitem IDs are packed into v0 at kernel launch",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.pk_fmac_f16_inst)] = .{
         .llvm_name = "pk-fmac-f16-inst",
         .description = "Has v_pk_fmac_f16 instruction",
@@ -639,6 +726,7 @@ pub const all_features = blk: {
         .dependencies = featureSet(&[_]Feature{
             .ci_insts,
             .ds_src2_insts,
+            .extended_image_insts,
             .flat_address_space,
             .fp64,
             .gfx7_gfx8_gfx9_insts,
@@ -657,6 +745,11 @@ pub const all_features = blk: {
         .description = "VI SGPR initialization bug requiring a fixed SGPR allocation size",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.shader_cycles_register)] = .{
+        .llvm_name = "shader-cycles-register",
+        .description = "Has SHADER_CYCLES hardware register",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.si_scheduler)] = .{
         .llvm_name = "si-scheduler",
         .description = "Enable SI Machine Scheduler",
@@ -672,6 +765,7 @@ pub const all_features = blk: {
         .description = "SOUTHERN_ISLANDS GPU generation",
         .dependencies = featureSet(&[_]Feature{
             .ds_src2_insts,
+            .extended_image_insts,
             .fp64,
             .ldsbankcount32,
             .localmemorysize32768,
@@ -693,6 +787,11 @@ pub const all_features = blk: {
         .description = "Hardware supports SRAMECC",
         .dependencies = featureSet(&[_]Feature{}),
     };
+    result[@enumToInt(Feature.tgsplit)] = .{
+        .llvm_name = "tgsplit",
+        .description = "Enable threadgroup split execution",
+        .dependencies = featureSet(&[_]Feature{}),
+    };
     result[@enumToInt(Feature.trap_handler)] = .{
         .llvm_name = "trap-handler",
         .description = "Trap handler support",
@@ -761,6 +860,7 @@ pub const all_features = blk: {
             .ci_insts,
             .dpp,
             .ds_src2_insts,
+            .extended_image_insts,
             .fast_denormal_f32,
             .flat_address_space,
             .fp64,
@@ -892,10 +992,12 @@ pub const cpu = struct {
             .lds_misaligned_bug,
             .ldsbankcount32,
             .mad_mac_f32_insts,
+            .negative_unaligned_scratch_offset_bug,
+            .nsa_clause_bug,
             .nsa_encoding,
+            .nsa_max_size_5,
             .nsa_to_vmem_bug,
             .offset_3f_bug,
-            .s_memtime_inst,
             .scalar_atomics,
             .scalar_flat_scratch_insts,
             .scalar_stores,
@@ -916,6 +1018,7 @@ pub const cpu = struct {
             .dot2_insts,
             .dot5_insts,
             .dot6_insts,
+            .dot7_insts,
             .ds_src2_insts,
             .flat_segment_offset_bug,
             .get_wave_id_inst,
@@ -925,10 +1028,12 @@ pub const cpu = struct {
             .lds_misaligned_bug,
             .ldsbankcount32,
             .mad_mac_f32_insts,
+            .negative_unaligned_scratch_offset_bug,
+            .nsa_clause_bug,
             .nsa_encoding,
+            .nsa_max_size_5,
             .nsa_to_vmem_bug,
             .offset_3f_bug,
-            .s_memtime_inst,
             .scalar_atomics,
             .scalar_flat_scratch_insts,
             .scalar_stores,
@@ -949,6 +1054,7 @@ pub const cpu = struct {
             .dot2_insts,
             .dot5_insts,
             .dot6_insts,
+            .dot7_insts,
             .ds_src2_insts,
             .flat_segment_offset_bug,
             .get_wave_id_inst,
@@ -958,10 +1064,44 @@ pub const cpu = struct {
             .lds_misaligned_bug,
             .ldsbankcount32,
             .mad_mac_f32_insts,
+            .negative_unaligned_scratch_offset_bug,
+            .nsa_clause_bug,
             .nsa_encoding,
+            .nsa_max_size_5,
+            .nsa_to_vmem_bug,
+            .offset_3f_bug,
+            .scalar_atomics,
+            .scalar_flat_scratch_insts,
+            .scalar_stores,
+            .smem_to_vector_write_hazard,
+            .vcmpx_exec_war_hazard,
+            .vcmpx_permlane_hazard,
+            .vmem_to_scalar_write_hazard,
+            .wavefrontsize32,
+            .xnack_support,
+        }),
+    };
+    pub const gfx1013 = CpuModel{
+        .name = "gfx1013",
+        .llvm_name = "gfx1013",
+        .features = featureSet(&[_]Feature{
+            .dl_insts,
+            .ds_src2_insts,
+            .flat_segment_offset_bug,
+            .get_wave_id_inst,
+            .gfx10,
+            .gfx10_a_encoding,
+            .inst_fwd_prefetch_bug,
+            .lds_branch_vmem_war_hazard,
+            .lds_misaligned_bug,
+            .ldsbankcount32,
+            .mad_mac_f32_insts,
+            .negative_unaligned_scratch_offset_bug,
+            .nsa_clause_bug,
+            .nsa_encoding,
+            .nsa_max_size_5,
             .nsa_to_vmem_bug,
             .offset_3f_bug,
-            .s_memtime_inst,
             .scalar_atomics,
             .scalar_flat_scratch_insts,
             .scalar_stores,
@@ -982,11 +1122,15 @@ pub const cpu = struct {
             .dot2_insts,
             .dot5_insts,
             .dot6_insts,
+            .dot7_insts,
             .gfx10,
             .gfx10_3_insts,
+            .gfx10_a_encoding,
             .gfx10_b_encoding,
             .ldsbankcount32,
             .nsa_encoding,
+            .nsa_max_size_13,
+            .shader_cycles_register,
             .wavefrontsize32,
         }),
     };
@@ -999,11 +1143,15 @@ pub const cpu = struct {
             .dot2_insts,
             .dot5_insts,
             .dot6_insts,
+            .dot7_insts,
             .gfx10,
             .gfx10_3_insts,
+            .gfx10_a_encoding,
             .gfx10_b_encoding,
             .ldsbankcount32,
             .nsa_encoding,
+            .nsa_max_size_13,
+            .shader_cycles_register,
             .wavefrontsize32,
         }),
     };
@@ -1016,11 +1164,15 @@ pub const cpu = struct {
             .dot2_insts,
             .dot5_insts,
             .dot6_insts,
+            .dot7_insts,
             .gfx10,
             .gfx10_3_insts,
+            .gfx10_a_encoding,
             .gfx10_b_encoding,
             .ldsbankcount32,
             .nsa_encoding,
+            .nsa_max_size_13,
+            .shader_cycles_register,
             .wavefrontsize32,
         }),
     };
@@ -1033,11 +1185,57 @@ pub const cpu = struct {
             .dot2_insts,
             .dot5_insts,
             .dot6_insts,
+            .dot7_insts,
+            .gfx10,
+            .gfx10_3_insts,
+            .gfx10_a_encoding,
+            .gfx10_b_encoding,
+            .ldsbankcount32,
+            .nsa_encoding,
+            .nsa_max_size_13,
+            .shader_cycles_register,
+            .wavefrontsize32,
+        }),
+    };
+    pub const gfx1034 = CpuModel{
+        .name = "gfx1034",
+        .llvm_name = "gfx1034",
+        .features = featureSet(&[_]Feature{
+            .dl_insts,
+            .dot1_insts,
+            .dot2_insts,
+            .dot5_insts,
+            .dot6_insts,
+            .dot7_insts,
             .gfx10,
             .gfx10_3_insts,
+            .gfx10_a_encoding,
             .gfx10_b_encoding,
             .ldsbankcount32,
             .nsa_encoding,
+            .nsa_max_size_13,
+            .shader_cycles_register,
+            .wavefrontsize32,
+        }),
+    };
+    pub const gfx1035 = CpuModel{
+        .name = "gfx1035",
+        .llvm_name = "gfx1035",
+        .features = featureSet(&[_]Feature{
+            .dl_insts,
+            .dot1_insts,
+            .dot2_insts,
+            .dot5_insts,
+            .dot6_insts,
+            .dot7_insts,
+            .gfx10,
+            .gfx10_3_insts,
+            .gfx10_a_encoding,
+            .gfx10_b_encoding,
+            .ldsbankcount32,
+            .nsa_encoding,
+            .nsa_max_size_13,
+            .shader_cycles_register,
             .wavefrontsize32,
         }),
     };
@@ -1171,9 +1369,12 @@ pub const cpu = struct {
         .name = "gfx900",
         .llvm_name = "gfx900",
         .features = featureSet(&[_]Feature{
+            .ds_src2_insts,
+            .extended_image_insts,
             .gfx9,
             .image_gather4_d16_bug,
             .ldsbankcount32,
+            .mad_mac_f32_insts,
             .mad_mix_insts,
         }),
     };
@@ -1181,9 +1382,12 @@ pub const cpu = struct {
         .name = "gfx902",
         .llvm_name = "gfx902",
         .features = featureSet(&[_]Feature{
+            .ds_src2_insts,
+            .extended_image_insts,
             .gfx9,
             .image_gather4_d16_bug,
             .ldsbankcount32,
+            .mad_mac_f32_insts,
             .mad_mix_insts,
         }),
     };
@@ -1191,10 +1395,13 @@ pub const cpu = struct {
         .name = "gfx904",
         .llvm_name = "gfx904",
         .features = featureSet(&[_]Feature{
+            .ds_src2_insts,
+            .extended_image_insts,
             .fma_mix_insts,
             .gfx9,
             .image_gather4_d16_bug,
             .ldsbankcount32,
+            .mad_mac_f32_insts,
         }),
     };
     pub const gfx906 = CpuModel{
@@ -1204,11 +1411,15 @@ pub const cpu = struct {
             .dl_insts,
             .dot1_insts,
             .dot2_insts,
+            .dot7_insts,
+            .ds_src2_insts,
+            .extended_image_insts,
             .fma_mix_insts,
             .gfx9,
             .half_rate_64_ops,
             .image_gather4_d16_bug,
             .ldsbankcount32,
+            .mad_mac_f32_insts,
             .sramecc_support,
         }),
     };
@@ -1224,11 +1435,15 @@ pub const cpu = struct {
             .dot4_insts,
             .dot5_insts,
             .dot6_insts,
+            .dot7_insts,
+            .ds_src2_insts,
+            .extended_image_insts,
             .fma_mix_insts,
             .gfx9,
             .half_rate_64_ops,
             .image_gather4_d16_bug,
             .ldsbankcount32,
+            .mad_mac_f32_insts,
             .mai_insts,
             .mfma_inline_literal_bug,
             .pk_fmac_f16_inst,
@@ -1239,21 +1454,53 @@ pub const cpu = struct {
         .name = "gfx909",
         .llvm_name = "gfx909",
         .features = featureSet(&[_]Feature{
+            .ds_src2_insts,
+            .extended_image_insts,
             .gfx9,
             .image_gather4_d16_bug,
             .ldsbankcount32,
+            .mad_mac_f32_insts,
             .mad_mix_insts,
         }),
     };
+    pub const gfx90a = CpuModel{
+        .name = "gfx90a",
+        .llvm_name = "gfx90a",
+        .features = featureSet(&[_]Feature{
+            .atomic_fadd_insts,
+            .dl_insts,
+            .dot1_insts,
+            .dot2_insts,
+            .dot3_insts,
+            .dot4_insts,
+            .dot5_insts,
+            .dot6_insts,
+            .dot7_insts,
+            .dpp_64bit,
+            .fma_mix_insts,
+            .full_rate_64_ops,
+            .gfx9,
+            .gfx90a_insts,
+            .ldsbankcount32,
+            .mad_mac_f32_insts,
+            .mai_insts,
+            .packed_fp32_ops,
+            .packed_tid,
+            .pk_fmac_f16_inst,
+            .sramecc_support,
+        }),
+    };
     pub const gfx90c = CpuModel{
         .name = "gfx90c",
         .llvm_name = "gfx90c",
         .features = featureSet(&[_]Feature{
+            .ds_src2_insts,
+            .extended_image_insts,
             .gfx9,
             .image_gather4_d16_bug,
             .ldsbankcount32,
+            .mad_mac_f32_insts,
             .mad_mix_insts,
-            .xnack,
         }),
     };
     pub const hainan = CpuModel{