aboutsummaryrefslogtreecommitdiff
path: root/lib/std/Target/amdgpu.zig
diff options
context:
space:
mode:
authorAndrew Kelley <andrew@ziglang.org>2024-01-02 14:11:27 -0800
committerGitHub <noreply@github.com>2024-01-02 14:11:27 -0800
commit289ae45c1b58e952867c4fa1e246d0ef7bc2ff64 (patch)
tree5dd034143a2354b7b44496e684f1c764e2f9664c /lib/std/Target/amdgpu.zig
parentc89bb3e141ee215add0b52930d48bffd8dae8342 (diff)
parentc546ddb3edc557fae4b932e5239b9dcb66117832 (diff)
downloadzig-289ae45c1b58e952867c4fa1e246d0ef7bc2ff64.tar.gz
zig-289ae45c1b58e952867c4fa1e246d0ef7bc2ff64.zip
Merge pull request #18160 from ziglang/std-build-module
Move many settings from being per-Compilation to being per-Module
Diffstat (limited to 'lib/std/Target/amdgpu.zig')
-rw-r--r--lib/std/Target/amdgpu.zig2153
1 files changed, 2153 insertions, 0 deletions
diff --git a/lib/std/Target/amdgpu.zig b/lib/std/Target/amdgpu.zig
new file mode 100644
index 0000000000..012f652088
--- /dev/null
+++ b/lib/std/Target/amdgpu.zig
@@ -0,0 +1,2153 @@
+//! This file is auto-generated by tools/update_cpu_features.zig.
+
+const std = @import("../std.zig");
+const CpuFeature = std.Target.Cpu.Feature;
+const CpuModel = std.Target.Cpu.Model;
+
+pub const Feature = enum {
+ @"16_bit_insts",
+ a16,
+ add_no_carry_insts,
+ aperture_regs,
+ architected_flat_scratch,
+ architected_sgprs,
+ atomic_buffer_global_pk_add_f16_insts,
+ atomic_buffer_global_pk_add_f16_no_rtn_insts,
+ atomic_ds_pk_add_16_insts,
+ atomic_fadd_no_rtn_insts,
+ atomic_fadd_rtn_insts,
+ atomic_flat_pk_add_16_insts,
+ atomic_global_pk_add_bf16_inst,
+ auto_waitcnt_before_barrier,
+ back_off_barrier,
+ ci_insts,
+ cumode,
+ dl_insts,
+ dot10_insts,
+ dot1_insts,
+ dot2_insts,
+ dot3_insts,
+ dot4_insts,
+ dot5_insts,
+ dot6_insts,
+ dot7_insts,
+ dot8_insts,
+ dot9_insts,
+ dpp,
+ dpp8,
+ dpp_64bit,
+ ds128,
+ ds_src2_insts,
+ extended_image_insts,
+ fast_denormal_f32,
+ fast_fmaf,
+ flat_address_space,
+ flat_atomic_fadd_f32_inst,
+ flat_for_global,
+ flat_global_insts,
+ flat_inst_offsets,
+ flat_scratch,
+ flat_scratch_insts,
+ flat_segment_offset_bug,
+ fma_mix_insts,
+ fmacf64_inst,
+ fmaf,
+ force_store_sc0_sc1,
+ fp64,
+ fp8_insts,
+ full_rate_64_ops,
+ g16,
+ gcn3_encoding,
+ get_wave_id_inst,
+ gfx10,
+ gfx10_3_insts,
+ gfx10_a_encoding,
+ gfx10_b_encoding,
+ gfx10_insts,
+ gfx11,
+ gfx11_full_vgprs,
+ gfx11_insts,
+ gfx7_gfx8_gfx9_insts,
+ gfx8_insts,
+ gfx9,
+ gfx90a_insts,
+ gfx940_insts,
+ gfx9_insts,
+ half_rate_64_ops,
+ image_gather4_d16_bug,
+ image_insts,
+ image_store_d16_bug,
+ inst_fwd_prefetch_bug,
+ int_clamp_insts,
+ inv_2pi_inline_imm,
+ lds_branch_vmem_war_hazard,
+ lds_misaligned_bug,
+ ldsbankcount16,
+ ldsbankcount32,
+ load_store_opt,
+ localmemorysize32768,
+ localmemorysize65536,
+ mad_intra_fwd_bug,
+ mad_mac_f32_insts,
+ mad_mix_insts,
+ mai_insts,
+ max_private_element_size_16,
+ max_private_element_size_4,
+ max_private_element_size_8,
+ mfma_inline_literal_bug,
+ mimg_r128,
+ movrel,
+ negative_scratch_offset_bug,
+ negative_unaligned_scratch_offset_bug,
+ no_data_dep_hazard,
+ no_sdst_cmpx,
+ nsa_clause_bug,
+ nsa_encoding,
+ nsa_to_vmem_bug,
+ offset_3f_bug,
+ packed_fp32_ops,
+ packed_tid,
+ partial_nsa_encoding,
+ pk_fmac_f16_inst,
+ promote_alloca,
+ prt_strict_null,
+ r128_a16,
+ s_memrealtime,
+ s_memtime_inst,
+ scalar_atomics,
+ scalar_flat_scratch_insts,
+ scalar_stores,
+ sdwa,
+ sdwa_mav,
+ sdwa_omod,
+ sdwa_out_mods_vopc,
+ sdwa_scalar,
+ sdwa_sdst,
+ sea_islands,
+ sgpr_init_bug,
+ shader_cycles_register,
+ si_scheduler,
+ smem_to_vector_write_hazard,
+ southern_islands,
+ sramecc,
+ sramecc_support,
+ tgsplit,
+ trap_handler,
+ trig_reduced_range,
+ true16,
+ unaligned_access_mode,
+ unaligned_buffer_access,
+ unaligned_ds_access,
+ unaligned_scratch_access,
+ unpacked_d16_vmem,
+ unsafe_ds_offset_folding,
+ user_sgpr_init16_bug,
+ valu_trans_use_hazard,
+ vcmpx_exec_war_hazard,
+ vcmpx_permlane_hazard,
+ vgpr_index_mode,
+ vmem_to_scalar_write_hazard,
+ volcanic_islands,
+ vop3_literal,
+ vop3p,
+ vopd,
+ vscnt,
+ wavefrontsize16,
+ wavefrontsize32,
+ wavefrontsize64,
+ xnack,
+ xnack_support,
+};
+
+pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet;
+pub const featureSetHas = CpuFeature.feature_set_fns(Feature).featureSetHas;
+pub const featureSetHasAny = CpuFeature.feature_set_fns(Feature).featureSetHasAny;
+pub const featureSetHasAll = CpuFeature.feature_set_fns(Feature).featureSetHasAll;
+
+pub const all_features = blk: {
+ const len = @typeInfo(Feature).Enum.fields.len;
+ std.debug.assert(len <= CpuFeature.Set.needed_bit_count);
+ var result: [len]CpuFeature = undefined;
+ result[@intFromEnum(Feature.@"16_bit_insts")] = .{
+ .llvm_name = "16-bit-insts",
+ .description = "Has i16/f16 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.a16)] = .{
+ .llvm_name = "a16",
+ .description = "Support A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.add_no_carry_insts)] = .{
+ .llvm_name = "add-no-carry-insts",
+ .description = "Have VALU add/sub instructions without carry out",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.aperture_regs)] = .{
+ .llvm_name = "aperture-regs",
+ .description = "Has Memory Aperture Base and Size Registers",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.architected_flat_scratch)] = .{
+ .llvm_name = "architected-flat-scratch",
+ .description = "Flat Scratch register is a readonly SPI initialized architected register",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.architected_sgprs)] = .{
+ .llvm_name = "architected-sgprs",
+ .description = "Enable the architected SGPRs",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.atomic_buffer_global_pk_add_f16_insts)] = .{
+ .llvm_name = "atomic-buffer-global-pk-add-f16-insts",
+ .description = "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that can return original value",
+ .dependencies = featureSet(&[_]Feature{
+ .flat_global_insts,
+ }),
+ };
+ result[@intFromEnum(Feature.atomic_buffer_global_pk_add_f16_no_rtn_insts)] = .{
+ .llvm_name = "atomic-buffer-global-pk-add-f16-no-rtn-insts",
+ .description = "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that don't return original value",
+ .dependencies = featureSet(&[_]Feature{
+ .flat_global_insts,
+ }),
+ };
+ result[@intFromEnum(Feature.atomic_ds_pk_add_16_insts)] = .{
+ .llvm_name = "atomic-ds-pk-add-16-insts",
+ .description = "Has ds_pk_add_bf16, ds_pk_add_f16, ds_pk_add_rtn_bf16, ds_pk_add_rtn_f16 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.atomic_fadd_no_rtn_insts)] = .{
+ .llvm_name = "atomic-fadd-no-rtn-insts",
+ .description = "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that don't return original value",
+ .dependencies = featureSet(&[_]Feature{
+ .flat_global_insts,
+ }),
+ };
+ result[@intFromEnum(Feature.atomic_fadd_rtn_insts)] = .{
+ .llvm_name = "atomic-fadd-rtn-insts",
+ .description = "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that return original value",
+ .dependencies = featureSet(&[_]Feature{
+ .flat_global_insts,
+ }),
+ };
+ result[@intFromEnum(Feature.atomic_flat_pk_add_16_insts)] = .{
+ .llvm_name = "atomic-flat-pk-add-16-insts",
+ .description = "Has flat_atomic_pk_add_f16 and flat_atomic_pk_add_bf16 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.atomic_global_pk_add_bf16_inst)] = .{
+ .llvm_name = "atomic-global-pk-add-bf16-inst",
+ .description = "Has global_atomic_pk_add_bf16 instruction",
+ .dependencies = featureSet(&[_]Feature{
+ .flat_global_insts,
+ }),
+ };
+ result[@intFromEnum(Feature.auto_waitcnt_before_barrier)] = .{
+ .llvm_name = "auto-waitcnt-before-barrier",
+ .description = "Hardware automatically inserts waitcnt before barrier",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.back_off_barrier)] = .{
+ .llvm_name = "back-off-barrier",
+ .description = "Hardware supports backing off s_barrier if an exception occurs",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.ci_insts)] = .{
+ .llvm_name = "ci-insts",
+ .description = "Additional instructions for CI+",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.cumode)] = .{
+ .llvm_name = "cumode",
+ .description = "Enable CU wavefront execution mode",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dl_insts)] = .{
+ .llvm_name = "dl-insts",
+ .description = "Has v_fmac_f32 and v_xnor_b32 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dot10_insts)] = .{
+ .llvm_name = "dot10-insts",
+ .description = "Has v_dot2_f32_f16 instruction",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dot1_insts)] = .{
+ .llvm_name = "dot1-insts",
+ .description = "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dot2_insts)] = .{
+ .llvm_name = "dot2-insts",
+ .description = "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dot3_insts)] = .{
+ .llvm_name = "dot3-insts",
+ .description = "Has v_dot8c_i32_i4 instruction",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dot4_insts)] = .{
+ .llvm_name = "dot4-insts",
+ .description = "Has v_dot2c_i32_i16 instruction",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dot5_insts)] = .{
+ .llvm_name = "dot5-insts",
+ .description = "Has v_dot2c_f32_f16 instruction",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dot6_insts)] = .{
+ .llvm_name = "dot6-insts",
+ .description = "Has v_dot4c_i32_i8 instruction",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dot7_insts)] = .{
+ .llvm_name = "dot7-insts",
+ .description = "Has v_dot4_u32_u8, v_dot8_u32_u4 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dot8_insts)] = .{
+ .llvm_name = "dot8-insts",
+ .description = "Has v_dot4_i32_iu8, v_dot8_i32_iu4 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dot9_insts)] = .{
+ .llvm_name = "dot9-insts",
+ .description = "Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dpp)] = .{
+ .llvm_name = "dpp",
+ .description = "Support DPP (Data Parallel Primitives) extension",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dpp8)] = .{
+ .llvm_name = "dpp8",
+ .description = "Support DPP8 (Data Parallel Primitives) extension",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.dpp_64bit)] = .{
+ .llvm_name = "dpp-64bit",
+ .description = "Support DPP (Data Parallel Primitives) extension",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.ds128)] = .{
+ .llvm_name = "enable-ds128",
+ .description = "Use ds_{read|write}_b128",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.ds_src2_insts)] = .{
+ .llvm_name = "ds-src2-insts",
+ .description = "Has ds_*_src2 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.extended_image_insts)] = .{
+ .llvm_name = "extended-image-insts",
+ .description = "Support mips != 0, lod != 0, gather4, and get_lod",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.fast_denormal_f32)] = .{
+ .llvm_name = "fast-denormal-f32",
+ .description = "Enabling denormals does not cause f32 instructions to run at f64 rates",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.fast_fmaf)] = .{
+ .llvm_name = "fast-fmaf",
+ .description = "Assuming f32 fma is at least as fast as mul + add",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.flat_address_space)] = .{
+ .llvm_name = "flat-address-space",
+ .description = "Support flat address space",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.flat_atomic_fadd_f32_inst)] = .{
+ .llvm_name = "flat-atomic-fadd-f32-inst",
+ .description = "Has flat_atomic_add_f32 instruction",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.flat_for_global)] = .{
+ .llvm_name = "flat-for-global",
+ .description = "Force to generate flat instruction for global",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.flat_global_insts)] = .{
+ .llvm_name = "flat-global-insts",
+ .description = "Have global_* flat memory instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.flat_inst_offsets)] = .{
+ .llvm_name = "flat-inst-offsets",
+ .description = "Flat instructions have immediate offset addressing mode",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.flat_scratch)] = .{
+ .llvm_name = "enable-flat-scratch",
+ .description = "Use scratch_* flat memory instructions to access scratch",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.flat_scratch_insts)] = .{
+ .llvm_name = "flat-scratch-insts",
+ .description = "Have scratch_* flat memory instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.flat_segment_offset_bug)] = .{
+ .llvm_name = "flat-segment-offset-bug",
+ .description = "GFX10 bug where inst_offset is ignored when flat instructions access global memory",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.fma_mix_insts)] = .{
+ .llvm_name = "fma-mix-insts",
+ .description = "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.fmacf64_inst)] = .{
+ .llvm_name = "fmacf64-inst",
+ .description = "Has v_fmac_f64 instruction",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.fmaf)] = .{
+ .llvm_name = "fmaf",
+ .description = "Enable single precision FMA (not as fast as mul+add, but fused)",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.force_store_sc0_sc1)] = .{
+ .llvm_name = "force-store-sc0-sc1",
+ .description = "Has SC0 and SC1 on stores",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.fp64)] = .{
+ .llvm_name = "fp64",
+ .description = "Enable double precision operations",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.fp8_insts)] = .{
+ .llvm_name = "fp8-insts",
+ .description = "Has fp8 and bf8 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.full_rate_64_ops)] = .{
+ .llvm_name = "full-rate-64-ops",
+ .description = "Most fp64 instructions are full rate",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.g16)] = .{
+ .llvm_name = "g16",
+ .description = "Support G16 for 16-bit gradient image operands",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gcn3_encoding)] = .{
+ .llvm_name = "gcn3-encoding",
+ .description = "Encoding format for VI",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.get_wave_id_inst)] = .{
+ .llvm_name = "get-wave-id-inst",
+ .description = "Has s_get_waveid_in_workgroup instruction",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gfx10)] = .{
+ .llvm_name = "gfx10",
+ .description = "GFX10 GPU generation",
+ .dependencies = featureSet(&[_]Feature{
+ .@"16_bit_insts",
+ .a16,
+ .add_no_carry_insts,
+ .aperture_regs,
+ .ci_insts,
+ .dpp,
+ .dpp8,
+ .extended_image_insts,
+ .fast_denormal_f32,
+ .fast_fmaf,
+ .flat_address_space,
+ .flat_global_insts,
+ .flat_inst_offsets,
+ .flat_scratch_insts,
+ .fma_mix_insts,
+ .fp64,
+ .g16,
+ .gfx10_insts,
+ .gfx8_insts,
+ .gfx9_insts,
+ .image_insts,
+ .int_clamp_insts,
+ .inv_2pi_inline_imm,
+ .localmemorysize65536,
+ .mimg_r128,
+ .movrel,
+ .no_data_dep_hazard,
+ .no_sdst_cmpx,
+ .pk_fmac_f16_inst,
+ .s_memrealtime,
+ .s_memtime_inst,
+ .sdwa,
+ .sdwa_omod,
+ .sdwa_scalar,
+ .sdwa_sdst,
+ .unaligned_buffer_access,
+ .unaligned_ds_access,
+ .vop3_literal,
+ .vop3p,
+ .vscnt,
+ }),
+ };
+ result[@intFromEnum(Feature.gfx10_3_insts)] = .{
+ .llvm_name = "gfx10-3-insts",
+ .description = "Additional instructions for GFX10.3",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gfx10_a_encoding)] = .{
+ .llvm_name = "gfx10_a-encoding",
+ .description = "Has BVH ray tracing instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gfx10_b_encoding)] = .{
+ .llvm_name = "gfx10_b-encoding",
+ .description = "Encoding format GFX10_B",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gfx10_insts)] = .{
+ .llvm_name = "gfx10-insts",
+ .description = "Additional instructions for GFX10+",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gfx11)] = .{
+ .llvm_name = "gfx11",
+ .description = "GFX11 GPU generation",
+ .dependencies = featureSet(&[_]Feature{
+ .@"16_bit_insts",
+ .a16,
+ .add_no_carry_insts,
+ .aperture_regs,
+ .ci_insts,
+ .dpp,
+ .dpp8,
+ .extended_image_insts,
+ .fast_denormal_f32,
+ .fast_fmaf,
+ .flat_address_space,
+ .flat_global_insts,
+ .flat_inst_offsets,
+ .flat_scratch_insts,
+ .fma_mix_insts,
+ .fp64,
+ .g16,
+ .gfx10_3_insts,
+ .gfx10_a_encoding,
+ .gfx10_b_encoding,
+ .gfx10_insts,
+ .gfx11_insts,
+ .gfx8_insts,
+ .gfx9_insts,
+ .int_clamp_insts,
+ .inv_2pi_inline_imm,
+ .localmemorysize65536,
+ .mimg_r128,
+ .movrel,
+ .no_data_dep_hazard,
+ .no_sdst_cmpx,
+ .pk_fmac_f16_inst,
+ .true16,
+ .unaligned_buffer_access,
+ .unaligned_ds_access,
+ .vop3_literal,
+ .vop3p,
+ .vopd,
+ .vscnt,
+ }),
+ };
+ result[@intFromEnum(Feature.gfx11_full_vgprs)] = .{
+ .llvm_name = "gfx11-full-vgprs",
+ .description = "GFX11 with 50% more physical VGPRs and 50% larger allocation granule than GFX10",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gfx11_insts)] = .{
+ .llvm_name = "gfx11-insts",
+ .description = "Additional instructions for GFX11+",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gfx7_gfx8_gfx9_insts)] = .{
+ .llvm_name = "gfx7-gfx8-gfx9-insts",
+ .description = "Instructions shared in GFX7, GFX8, GFX9",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gfx8_insts)] = .{
+ .llvm_name = "gfx8-insts",
+ .description = "Additional instructions for GFX8+",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gfx9)] = .{
+ .llvm_name = "gfx9",
+ .description = "GFX9 GPU generation",
+ .dependencies = featureSet(&[_]Feature{
+ .@"16_bit_insts",
+ .a16,
+ .add_no_carry_insts,
+ .aperture_regs,
+ .ci_insts,
+ .dpp,
+ .fast_denormal_f32,
+ .fast_fmaf,
+ .flat_address_space,
+ .flat_global_insts,
+ .flat_inst_offsets,
+ .flat_scratch_insts,
+ .fp64,
+ .gcn3_encoding,
+ .gfx7_gfx8_gfx9_insts,
+ .gfx8_insts,
+ .gfx9_insts,
+ .int_clamp_insts,
+ .inv_2pi_inline_imm,
+ .localmemorysize65536,
+ .negative_scratch_offset_bug,
+ .r128_a16,
+ .s_memrealtime,
+ .s_memtime_inst,
+ .scalar_atomics,
+ .scalar_flat_scratch_insts,
+ .scalar_stores,
+ .sdwa,
+ .sdwa_omod,
+ .sdwa_scalar,
+ .sdwa_sdst,
+ .unaligned_buffer_access,
+ .unaligned_ds_access,
+ .vgpr_index_mode,
+ .vop3p,
+ .wavefrontsize64,
+ .xnack_support,
+ }),
+ };
+ result[@intFromEnum(Feature.gfx90a_insts)] = .{
+ .llvm_name = "gfx90a-insts",
+ .description = "Additional instructions for GFX90A+",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gfx940_insts)] = .{
+ .llvm_name = "gfx940-insts",
+ .description = "Additional instructions for GFX940+",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.gfx9_insts)] = .{
+ .llvm_name = "gfx9-insts",
+ .description = "Additional instructions for GFX9+",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.half_rate_64_ops)] = .{
+ .llvm_name = "half-rate-64-ops",
+ .description = "Most fp64 instructions are half rate instead of quarter",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.image_gather4_d16_bug)] = .{
+ .llvm_name = "image-gather4-d16-bug",
+ .description = "Image Gather4 D16 hardware bug",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.image_insts)] = .{
+ .llvm_name = "image-insts",
+ .description = "Support image instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.image_store_d16_bug)] = .{
+ .llvm_name = "image-store-d16-bug",
+ .description = "Image Store D16 hardware bug",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.inst_fwd_prefetch_bug)] = .{
+ .llvm_name = "inst-fwd-prefetch-bug",
+ .description = "S_INST_PREFETCH instruction causes shader to hang",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.int_clamp_insts)] = .{
+ .llvm_name = "int-clamp-insts",
+ .description = "Support clamp for integer destination",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.inv_2pi_inline_imm)] = .{
+ .llvm_name = "inv-2pi-inline-imm",
+ .description = "Has 1 / (2 * pi) as inline immediate",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.lds_branch_vmem_war_hazard)] = .{
+ .llvm_name = "lds-branch-vmem-war-hazard",
+ .description = "Switching between LDS and VMEM-tex not waiting VM_VSRC=0",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.lds_misaligned_bug)] = .{
+ .llvm_name = "lds-misaligned-bug",
+ .description = "Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.ldsbankcount16)] = .{
+ .llvm_name = "ldsbankcount16",
+ .description = "The number of LDS banks per compute unit.",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.ldsbankcount32)] = .{
+ .llvm_name = "ldsbankcount32",
+ .description = "The number of LDS banks per compute unit.",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.load_store_opt)] = .{
+ .llvm_name = "load-store-opt",
+ .description = "Enable SI load/store optimizer pass",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.localmemorysize32768)] = .{
+ .llvm_name = "localmemorysize32768",
+ .description = "The size of local memory in bytes",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.localmemorysize65536)] = .{
+ .llvm_name = "localmemorysize65536",
+ .description = "The size of local memory in bytes",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.mad_intra_fwd_bug)] = .{
+ .llvm_name = "mad-intra-fwd-bug",
+ .description = "MAD_U64/I64 intra instruction forwarding bug",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.mad_mac_f32_insts)] = .{
+ .llvm_name = "mad-mac-f32-insts",
+ .description = "Has v_mad_f32/v_mac_f32/v_madak_f32/v_madmk_f32 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.mad_mix_insts)] = .{
+ .llvm_name = "mad-mix-insts",
+ .description = "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.mai_insts)] = .{
+ .llvm_name = "mai-insts",
+ .description = "Has mAI instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.max_private_element_size_16)] = .{
+ .llvm_name = "max-private-element-size-16",
+ .description = "Maximum private access size may be 16",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.max_private_element_size_4)] = .{
+ .llvm_name = "max-private-element-size-4",
+ .description = "Maximum private access size may be 4",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.max_private_element_size_8)] = .{
+ .llvm_name = "max-private-element-size-8",
+ .description = "Maximum private access size may be 8",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.mfma_inline_literal_bug)] = .{
+ .llvm_name = "mfma-inline-literal-bug",
+ .description = "MFMA cannot use inline literal as SrcC",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.mimg_r128)] = .{
+ .llvm_name = "mimg-r128",
+ .description = "Support 128-bit texture resources",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.movrel)] = .{
+ .llvm_name = "movrel",
+ .description = "Has v_movrel*_b32 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.negative_scratch_offset_bug)] = .{
+ .llvm_name = "negative-scratch-offset-bug",
+ .description = "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.negative_unaligned_scratch_offset_bug)] = .{
+ .llvm_name = "negative-unaligned-scratch-offset-bug",
+ .description = "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.no_data_dep_hazard)] = .{
+ .llvm_name = "no-data-dep-hazard",
+ .description = "Does not need SW waitstates",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.no_sdst_cmpx)] = .{
+ .llvm_name = "no-sdst-cmpx",
+ .description = "V_CMPX does not write VCC/SGPR in addition to EXEC",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.nsa_clause_bug)] = .{
+ .llvm_name = "nsa-clause-bug",
+ .description = "MIMG-NSA in a hard clause has unpredictable results on GFX10.1",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.nsa_encoding)] = .{
+ .llvm_name = "nsa-encoding",
+ .description = "Support NSA encoding for image instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.nsa_to_vmem_bug)] = .{
+ .llvm_name = "nsa-to-vmem-bug",
+ .description = "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.offset_3f_bug)] = .{
+ .llvm_name = "offset-3f-bug",
+ .description = "Branch offset of 3f hardware bug",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.packed_fp32_ops)] = .{
+ .llvm_name = "packed-fp32-ops",
+ .description = "Support packed fp32 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.packed_tid)] = .{
+ .llvm_name = "packed-tid",
+ .description = "Workitem IDs are packed into v0 at kernel launch",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.partial_nsa_encoding)] = .{
+ .llvm_name = "partial-nsa-encoding",
+ .description = "Support partial NSA encoding for image instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.pk_fmac_f16_inst)] = .{
+ .llvm_name = "pk-fmac-f16-inst",
+ .description = "Has v_pk_fmac_f16 instruction",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.promote_alloca)] = .{
+ .llvm_name = "promote-alloca",
+ .description = "Enable promote alloca pass",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.prt_strict_null)] = .{
+ .llvm_name = "enable-prt-strict-null",
+ .description = "Enable zeroing of result registers for sparse texture fetches",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.r128_a16)] = .{
+ .llvm_name = "r128-a16",
+ .description = "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.s_memrealtime)] = .{
+ .llvm_name = "s-memrealtime",
+ .description = "Has s_memrealtime instruction",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.s_memtime_inst)] = .{
+ .llvm_name = "s-memtime-inst",
+ .description = "Has s_memtime instruction",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.scalar_atomics)] = .{
+ .llvm_name = "scalar-atomics",
+ .description = "Has atomic scalar memory instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.scalar_flat_scratch_insts)] = .{
+ .llvm_name = "scalar-flat-scratch-insts",
+ .description = "Have s_scratch_* flat memory instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.scalar_stores)] = .{
+ .llvm_name = "scalar-stores",
+ .description = "Has store scalar memory instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.sdwa)] = .{
+ .llvm_name = "sdwa",
+ .description = "Support SDWA (Sub-DWORD Addressing) extension",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.sdwa_mav)] = .{
+ .llvm_name = "sdwa-mav",
+ .description = "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.sdwa_omod)] = .{
+ .llvm_name = "sdwa-omod",
+ .description = "Support OMod with SDWA (Sub-DWORD Addressing) extension",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.sdwa_out_mods_vopc)] = .{
+ .llvm_name = "sdwa-out-mods-vopc",
+ .description = "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.sdwa_scalar)] = .{
+ .llvm_name = "sdwa-scalar",
+ .description = "Support scalar register with SDWA (Sub-DWORD Addressing) extension",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.sdwa_sdst)] = .{
+ .llvm_name = "sdwa-sdst",
+ .description = "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.sea_islands)] = .{
+ .llvm_name = "sea-islands",
+ .description = "SEA_ISLANDS GPU generation",
+ .dependencies = featureSet(&[_]Feature{
+ .ci_insts,
+ .ds_src2_insts,
+ .extended_image_insts,
+ .flat_address_space,
+ .fp64,
+ .gfx7_gfx8_gfx9_insts,
+ .image_insts,
+ .localmemorysize65536,
+ .mad_mac_f32_insts,
+ .mimg_r128,
+ .movrel,
+ .s_memtime_inst,
+ .trig_reduced_range,
+ .unaligned_buffer_access,
+ .wavefrontsize64,
+ }),
+ };
+ result[@intFromEnum(Feature.sgpr_init_bug)] = .{
+ .llvm_name = "sgpr-init-bug",
+ .description = "VI SGPR initialization bug requiring a fixed SGPR allocation size",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.shader_cycles_register)] = .{
+ .llvm_name = "shader-cycles-register",
+ .description = "Has SHADER_CYCLES hardware register",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.si_scheduler)] = .{
+ .llvm_name = "si-scheduler",
+ .description = "Enable SI Machine Scheduler",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.smem_to_vector_write_hazard)] = .{
+ .llvm_name = "smem-to-vector-write-hazard",
+ .description = "s_load_dword followed by v_cmp page faults",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.southern_islands)] = .{
+ .llvm_name = "southern-islands",
+ .description = "SOUTHERN_ISLANDS GPU generation",
+ .dependencies = featureSet(&[_]Feature{
+ .ds_src2_insts,
+ .extended_image_insts,
+ .fp64,
+ .image_insts,
+ .ldsbankcount32,
+ .localmemorysize32768,
+ .mad_mac_f32_insts,
+ .mimg_r128,
+ .movrel,
+ .s_memtime_inst,
+ .trig_reduced_range,
+ .wavefrontsize64,
+ }),
+ };
+ result[@intFromEnum(Feature.sramecc)] = .{
+ .llvm_name = "sramecc",
+ .description = "Enable SRAMECC",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.sramecc_support)] = .{
+ .llvm_name = "sramecc-support",
+ .description = "Hardware supports SRAMECC",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.tgsplit)] = .{
+ .llvm_name = "tgsplit",
+ .description = "Enable threadgroup split execution",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.trap_handler)] = .{
+ .llvm_name = "trap-handler",
+ .description = "Trap handler support",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.trig_reduced_range)] = .{
+ .llvm_name = "trig-reduced-range",
+ .description = "Requires use of fract on arguments to trig instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.true16)] = .{
+ .llvm_name = "true16",
+ .description = "True 16-bit operand instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.unaligned_access_mode)] = .{
+ .llvm_name = "unaligned-access-mode",
+ .description = "Enable unaligned global, local and region loads and stores if the hardware supports it",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.unaligned_buffer_access)] = .{
+ .llvm_name = "unaligned-buffer-access",
+ .description = "Hardware supports unaligned global loads and stores",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.unaligned_ds_access)] = .{
+ .llvm_name = "unaligned-ds-access",
+ .description = "Hardware supports unaligned local and region loads and stores",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.unaligned_scratch_access)] = .{
+ .llvm_name = "unaligned-scratch-access",
+ .description = "Support unaligned scratch loads and stores",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.unpacked_d16_vmem)] = .{
+ .llvm_name = "unpacked-d16-vmem",
+ .description = "Has unpacked d16 vmem instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.unsafe_ds_offset_folding)] = .{
+ .llvm_name = "unsafe-ds-offset-folding",
+ .description = "Force using DS instruction immediate offsets on SI",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.user_sgpr_init16_bug)] = .{
+ .llvm_name = "user-sgpr-init16-bug",
+ .description = "Bug requiring at least 16 user+system SGPRs to be enabled",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.valu_trans_use_hazard)] = .{
+ .llvm_name = "valu-trans-use-hazard",
+ .description = "Hazard when TRANS instructions are closely followed by a use of the result",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.vcmpx_exec_war_hazard)] = .{
+ .llvm_name = "vcmpx-exec-war-hazard",
+ .description = "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.vcmpx_permlane_hazard)] = .{
+ .llvm_name = "vcmpx-permlane-hazard",
+ .description = "TODO: describe me",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.vgpr_index_mode)] = .{
+ .llvm_name = "vgpr-index-mode",
+ .description = "Has VGPR mode register indexing",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.vmem_to_scalar_write_hazard)] = .{
+ .llvm_name = "vmem-to-scalar-write-hazard",
+ .description = "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution.",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.volcanic_islands)] = .{
+ .llvm_name = "volcanic-islands",
+ .description = "VOLCANIC_ISLANDS GPU generation",
+ .dependencies = featureSet(&[_]Feature{
+ .@"16_bit_insts",
+ .ci_insts,
+ .dpp,
+ .ds_src2_insts,
+ .extended_image_insts,
+ .fast_denormal_f32,
+ .flat_address_space,
+ .fp64,
+ .gcn3_encoding,
+ .gfx7_gfx8_gfx9_insts,
+ .gfx8_insts,
+ .image_insts,
+ .int_clamp_insts,
+ .inv_2pi_inline_imm,
+ .localmemorysize65536,
+ .mad_mac_f32_insts,
+ .mimg_r128,
+ .movrel,
+ .s_memrealtime,
+ .s_memtime_inst,
+ .scalar_stores,
+ .sdwa,
+ .sdwa_mav,
+ .sdwa_out_mods_vopc,
+ .trig_reduced_range,
+ .unaligned_buffer_access,
+ .vgpr_index_mode,
+ .wavefrontsize64,
+ }),
+ };
+ result[@intFromEnum(Feature.vop3_literal)] = .{
+ .llvm_name = "vop3-literal",
+ .description = "Can use one literal in VOP3",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.vop3p)] = .{
+ .llvm_name = "vop3p",
+ .description = "Has VOP3P packed instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.vopd)] = .{
+ .llvm_name = "vopd",
+ .description = "Has VOPD dual issue wave32 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.vscnt)] = .{
+ .llvm_name = "vscnt",
+ .description = "Has separate store vscnt counter",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.wavefrontsize16)] = .{
+ .llvm_name = "wavefrontsize16",
+ .description = "The number of threads per wavefront",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.wavefrontsize32)] = .{
+ .llvm_name = "wavefrontsize32",
+ .description = "The number of threads per wavefront",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.wavefrontsize64)] = .{
+ .llvm_name = "wavefrontsize64",
+ .description = "The number of threads per wavefront",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.xnack)] = .{
+ .llvm_name = "xnack",
+ .description = "Enable XNACK support",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@intFromEnum(Feature.xnack_support)] = .{
+ .llvm_name = "xnack-support",
+ .description = "Hardware supports XNACK",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ const ti = @typeInfo(Feature);
+ for (&result, 0..) |*elem, i| {
+ elem.index = i;
+ elem.name = ti.Enum.fields[i].name;
+ }
+ break :blk result;
+};
+
+pub const cpu = struct {
+ pub const bonaire = CpuModel{
+ .name = "bonaire",
+ .llvm_name = "bonaire",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .sea_islands,
+ }),
+ };
+ pub const carrizo = CpuModel{
+ .name = "carrizo",
+ .llvm_name = "carrizo",
+ .features = featureSet(&[_]Feature{
+ .fast_fmaf,
+ .half_rate_64_ops,
+ .ldsbankcount32,
+ .unpacked_d16_vmem,
+ .volcanic_islands,
+ .xnack_support,
+ }),
+ };
+ pub const fiji = CpuModel{
+ .name = "fiji",
+ .llvm_name = "fiji",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .unpacked_d16_vmem,
+ .volcanic_islands,
+ }),
+ };
+ pub const generic = CpuModel{
+ .name = "generic",
+ .llvm_name = "generic",
+ .features = featureSet(&[_]Feature{
+ .wavefrontsize64,
+ }),
+ };
+ pub const generic_hsa = CpuModel{
+ .name = "generic_hsa",
+ .llvm_name = "generic-hsa",
+ .features = featureSet(&[_]Feature{
+ .flat_address_space,
+ .wavefrontsize64,
+ }),
+ };
+ pub const gfx1010 = CpuModel{
+ .name = "gfx1010",
+ .llvm_name = "gfx1010",
+ .features = featureSet(&[_]Feature{
+ .back_off_barrier,
+ .dl_insts,
+ .ds_src2_insts,
+ .flat_segment_offset_bug,
+ .get_wave_id_inst,
+ .gfx10,
+ .inst_fwd_prefetch_bug,
+ .lds_branch_vmem_war_hazard,
+ .lds_misaligned_bug,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .negative_unaligned_scratch_offset_bug,
+ .nsa_clause_bug,
+ .nsa_encoding,
+ .nsa_to_vmem_bug,
+ .offset_3f_bug,
+ .scalar_atomics,
+ .scalar_flat_scratch_insts,
+ .scalar_stores,
+ .smem_to_vector_write_hazard,
+ .vcmpx_exec_war_hazard,
+ .vcmpx_permlane_hazard,
+ .vmem_to_scalar_write_hazard,
+ .wavefrontsize32,
+ .xnack_support,
+ }),
+ };
+ pub const gfx1011 = CpuModel{
+ .name = "gfx1011",
+ .llvm_name = "gfx1011",
+ .features = featureSet(&[_]Feature{
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .ds_src2_insts,
+ .flat_segment_offset_bug,
+ .get_wave_id_inst,
+ .gfx10,
+ .inst_fwd_prefetch_bug,
+ .lds_branch_vmem_war_hazard,
+ .lds_misaligned_bug,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .negative_unaligned_scratch_offset_bug,
+ .nsa_clause_bug,
+ .nsa_encoding,
+ .nsa_to_vmem_bug,
+ .offset_3f_bug,
+ .scalar_atomics,
+ .scalar_flat_scratch_insts,
+ .scalar_stores,
+ .smem_to_vector_write_hazard,
+ .vcmpx_exec_war_hazard,
+ .vcmpx_permlane_hazard,
+ .vmem_to_scalar_write_hazard,
+ .wavefrontsize32,
+ .xnack_support,
+ }),
+ };
+ pub const gfx1012 = CpuModel{
+ .name = "gfx1012",
+ .llvm_name = "gfx1012",
+ .features = featureSet(&[_]Feature{
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .ds_src2_insts,
+ .flat_segment_offset_bug,
+ .get_wave_id_inst,
+ .gfx10,
+ .inst_fwd_prefetch_bug,
+ .lds_branch_vmem_war_hazard,
+ .lds_misaligned_bug,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .negative_unaligned_scratch_offset_bug,
+ .nsa_clause_bug,
+ .nsa_encoding,
+ .nsa_to_vmem_bug,
+ .offset_3f_bug,
+ .scalar_atomics,
+ .scalar_flat_scratch_insts,
+ .scalar_stores,
+ .smem_to_vector_write_hazard,
+ .vcmpx_exec_war_hazard,
+ .vcmpx_permlane_hazard,
+ .vmem_to_scalar_write_hazard,
+ .wavefrontsize32,
+ .xnack_support,
+ }),
+ };
+ pub const gfx1013 = CpuModel{
+ .name = "gfx1013",
+ .llvm_name = "gfx1013",
+ .features = featureSet(&[_]Feature{
+ .back_off_barrier,
+ .dl_insts,
+ .ds_src2_insts,
+ .flat_segment_offset_bug,
+ .get_wave_id_inst,
+ .gfx10,
+ .gfx10_a_encoding,
+ .inst_fwd_prefetch_bug,
+ .lds_branch_vmem_war_hazard,
+ .lds_misaligned_bug,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .negative_unaligned_scratch_offset_bug,
+ .nsa_clause_bug,
+ .nsa_encoding,
+ .nsa_to_vmem_bug,
+ .offset_3f_bug,
+ .scalar_atomics,
+ .scalar_flat_scratch_insts,
+ .scalar_stores,
+ .smem_to_vector_write_hazard,
+ .vcmpx_exec_war_hazard,
+ .vcmpx_permlane_hazard,
+ .vmem_to_scalar_write_hazard,
+ .wavefrontsize32,
+ .xnack_support,
+ }),
+ };
+ pub const gfx1030 = CpuModel{
+ .name = "gfx1030",
+ .llvm_name = "gfx1030",
+ .features = featureSet(&[_]Feature{
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .gfx10,
+ .gfx10_3_insts,
+ .gfx10_a_encoding,
+ .gfx10_b_encoding,
+ .ldsbankcount32,
+ .nsa_encoding,
+ .shader_cycles_register,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1031 = CpuModel{
+ .name = "gfx1031",
+ .llvm_name = "gfx1031",
+ .features = featureSet(&[_]Feature{
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .gfx10,
+ .gfx10_3_insts,
+ .gfx10_a_encoding,
+ .gfx10_b_encoding,
+ .ldsbankcount32,
+ .nsa_encoding,
+ .shader_cycles_register,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1032 = CpuModel{
+ .name = "gfx1032",
+ .llvm_name = "gfx1032",
+ .features = featureSet(&[_]Feature{
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .gfx10,
+ .gfx10_3_insts,
+ .gfx10_a_encoding,
+ .gfx10_b_encoding,
+ .ldsbankcount32,
+ .nsa_encoding,
+ .shader_cycles_register,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1033 = CpuModel{
+ .name = "gfx1033",
+ .llvm_name = "gfx1033",
+ .features = featureSet(&[_]Feature{
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .gfx10,
+ .gfx10_3_insts,
+ .gfx10_a_encoding,
+ .gfx10_b_encoding,
+ .ldsbankcount32,
+ .nsa_encoding,
+ .shader_cycles_register,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1034 = CpuModel{
+ .name = "gfx1034",
+ .llvm_name = "gfx1034",
+ .features = featureSet(&[_]Feature{
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .gfx10,
+ .gfx10_3_insts,
+ .gfx10_a_encoding,
+ .gfx10_b_encoding,
+ .ldsbankcount32,
+ .nsa_encoding,
+ .shader_cycles_register,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1035 = CpuModel{
+ .name = "gfx1035",
+ .llvm_name = "gfx1035",
+ .features = featureSet(&[_]Feature{
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .gfx10,
+ .gfx10_3_insts,
+ .gfx10_a_encoding,
+ .gfx10_b_encoding,
+ .ldsbankcount32,
+ .nsa_encoding,
+ .shader_cycles_register,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1036 = CpuModel{
+ .name = "gfx1036",
+ .llvm_name = "gfx1036",
+ .features = featureSet(&[_]Feature{
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .gfx10,
+ .gfx10_3_insts,
+ .gfx10_a_encoding,
+ .gfx10_b_encoding,
+ .ldsbankcount32,
+ .nsa_encoding,
+ .shader_cycles_register,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1100 = CpuModel{
+ .name = "gfx1100",
+ .llvm_name = "gfx1100",
+ .features = featureSet(&[_]Feature{
+ .architected_flat_scratch,
+ .atomic_fadd_no_rtn_insts,
+ .atomic_fadd_rtn_insts,
+ .dl_insts,
+ .dot10_insts,
+ .dot5_insts,
+ .dot7_insts,
+ .dot8_insts,
+ .dot9_insts,
+ .flat_atomic_fadd_f32_inst,
+ .gfx11,
+ .gfx11_full_vgprs,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_intra_fwd_bug,
+ .nsa_encoding,
+ .packed_tid,
+ .partial_nsa_encoding,
+ .shader_cycles_register,
+ .user_sgpr_init16_bug,
+ .valu_trans_use_hazard,
+ .vcmpx_permlane_hazard,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1101 = CpuModel{
+ .name = "gfx1101",
+ .llvm_name = "gfx1101",
+ .features = featureSet(&[_]Feature{
+ .architected_flat_scratch,
+ .atomic_fadd_no_rtn_insts,
+ .atomic_fadd_rtn_insts,
+ .dl_insts,
+ .dot10_insts,
+ .dot5_insts,
+ .dot7_insts,
+ .dot8_insts,
+ .dot9_insts,
+ .flat_atomic_fadd_f32_inst,
+ .gfx11,
+ .gfx11_full_vgprs,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_intra_fwd_bug,
+ .nsa_encoding,
+ .packed_tid,
+ .partial_nsa_encoding,
+ .shader_cycles_register,
+ .valu_trans_use_hazard,
+ .vcmpx_permlane_hazard,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1102 = CpuModel{
+ .name = "gfx1102",
+ .llvm_name = "gfx1102",
+ .features = featureSet(&[_]Feature{
+ .architected_flat_scratch,
+ .atomic_fadd_no_rtn_insts,
+ .atomic_fadd_rtn_insts,
+ .dl_insts,
+ .dot10_insts,
+ .dot5_insts,
+ .dot7_insts,
+ .dot8_insts,
+ .dot9_insts,
+ .flat_atomic_fadd_f32_inst,
+ .gfx11,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_intra_fwd_bug,
+ .nsa_encoding,
+ .packed_tid,
+ .partial_nsa_encoding,
+ .shader_cycles_register,
+ .user_sgpr_init16_bug,
+ .valu_trans_use_hazard,
+ .vcmpx_permlane_hazard,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1103 = CpuModel{
+ .name = "gfx1103",
+ .llvm_name = "gfx1103",
+ .features = featureSet(&[_]Feature{
+ .architected_flat_scratch,
+ .atomic_fadd_no_rtn_insts,
+ .atomic_fadd_rtn_insts,
+ .dl_insts,
+ .dot10_insts,
+ .dot5_insts,
+ .dot7_insts,
+ .dot8_insts,
+ .dot9_insts,
+ .flat_atomic_fadd_f32_inst,
+ .gfx11,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_intra_fwd_bug,
+ .nsa_encoding,
+ .packed_tid,
+ .partial_nsa_encoding,
+ .shader_cycles_register,
+ .valu_trans_use_hazard,
+ .vcmpx_permlane_hazard,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1150 = CpuModel{
+ .name = "gfx1150",
+ .llvm_name = "gfx1150",
+ .features = featureSet(&[_]Feature{
+ .architected_flat_scratch,
+ .atomic_fadd_no_rtn_insts,
+ .atomic_fadd_rtn_insts,
+ .dl_insts,
+ .dot10_insts,
+ .dot5_insts,
+ .dot7_insts,
+ .dot8_insts,
+ .dot9_insts,
+ .flat_atomic_fadd_f32_inst,
+ .gfx11,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_intra_fwd_bug,
+ .nsa_encoding,
+ .packed_tid,
+ .partial_nsa_encoding,
+ .shader_cycles_register,
+ .vcmpx_permlane_hazard,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1151 = CpuModel{
+ .name = "gfx1151",
+ .llvm_name = "gfx1151",
+ .features = featureSet(&[_]Feature{
+ .architected_flat_scratch,
+ .atomic_fadd_no_rtn_insts,
+ .atomic_fadd_rtn_insts,
+ .dl_insts,
+ .dot10_insts,
+ .dot5_insts,
+ .dot7_insts,
+ .dot8_insts,
+ .dot9_insts,
+ .flat_atomic_fadd_f32_inst,
+ .gfx11,
+ .gfx11_full_vgprs,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_intra_fwd_bug,
+ .nsa_encoding,
+ .packed_tid,
+ .partial_nsa_encoding,
+ .shader_cycles_register,
+ .vcmpx_permlane_hazard,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx600 = CpuModel{
+ .name = "gfx600",
+ .llvm_name = "gfx600",
+ .features = featureSet(&[_]Feature{
+ .fast_fmaf,
+ .half_rate_64_ops,
+ .southern_islands,
+ }),
+ };
+ pub const gfx601 = CpuModel{
+ .name = "gfx601",
+ .llvm_name = "gfx601",
+ .features = featureSet(&[_]Feature{
+ .southern_islands,
+ }),
+ };
+ pub const gfx602 = CpuModel{
+ .name = "gfx602",
+ .llvm_name = "gfx602",
+ .features = featureSet(&[_]Feature{
+ .southern_islands,
+ }),
+ };
+ pub const gfx700 = CpuModel{
+ .name = "gfx700",
+ .llvm_name = "gfx700",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .sea_islands,
+ }),
+ };
+ pub const gfx701 = CpuModel{
+ .name = "gfx701",
+ .llvm_name = "gfx701",
+ .features = featureSet(&[_]Feature{
+ .fast_fmaf,
+ .half_rate_64_ops,
+ .ldsbankcount32,
+ .sea_islands,
+ }),
+ };
+ pub const gfx702 = CpuModel{
+ .name = "gfx702",
+ .llvm_name = "gfx702",
+ .features = featureSet(&[_]Feature{
+ .fast_fmaf,
+ .ldsbankcount16,
+ .sea_islands,
+ }),
+ };
+ pub const gfx703 = CpuModel{
+ .name = "gfx703",
+ .llvm_name = "gfx703",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount16,
+ .sea_islands,
+ }),
+ };
+ pub const gfx704 = CpuModel{
+ .name = "gfx704",
+ .llvm_name = "gfx704",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .sea_islands,
+ }),
+ };
+ pub const gfx705 = CpuModel{
+ .name = "gfx705",
+ .llvm_name = "gfx705",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount16,
+ .sea_islands,
+ }),
+ };
+ pub const gfx801 = CpuModel{
+ .name = "gfx801",
+ .llvm_name = "gfx801",
+ .features = featureSet(&[_]Feature{
+ .fast_fmaf,
+ .half_rate_64_ops,
+ .ldsbankcount32,
+ .unpacked_d16_vmem,
+ .volcanic_islands,
+ .xnack_support,
+ }),
+ };
+ pub const gfx802 = CpuModel{
+ .name = "gfx802",
+ .llvm_name = "gfx802",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .sgpr_init_bug,
+ .unpacked_d16_vmem,
+ .volcanic_islands,
+ }),
+ };
+ pub const gfx803 = CpuModel{
+ .name = "gfx803",
+ .llvm_name = "gfx803",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .unpacked_d16_vmem,
+ .volcanic_islands,
+ }),
+ };
+ pub const gfx805 = CpuModel{
+ .name = "gfx805",
+ .llvm_name = "gfx805",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .sgpr_init_bug,
+ .unpacked_d16_vmem,
+ .volcanic_islands,
+ }),
+ };
+ pub const gfx810 = CpuModel{
+ .name = "gfx810",
+ .llvm_name = "gfx810",
+ .features = featureSet(&[_]Feature{
+ .image_gather4_d16_bug,
+ .image_store_d16_bug,
+ .ldsbankcount16,
+ .volcanic_islands,
+ .xnack_support,
+ }),
+ };
+ pub const gfx900 = CpuModel{
+ .name = "gfx900",
+ .llvm_name = "gfx900",
+ .features = featureSet(&[_]Feature{
+ .ds_src2_insts,
+ .extended_image_insts,
+ .gfx9,
+ .image_gather4_d16_bug,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .mad_mix_insts,
+ }),
+ };
+ pub const gfx902 = CpuModel{
+ .name = "gfx902",
+ .llvm_name = "gfx902",
+ .features = featureSet(&[_]Feature{
+ .ds_src2_insts,
+ .extended_image_insts,
+ .gfx9,
+ .image_gather4_d16_bug,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .mad_mix_insts,
+ }),
+ };
+ pub const gfx904 = CpuModel{
+ .name = "gfx904",
+ .llvm_name = "gfx904",
+ .features = featureSet(&[_]Feature{
+ .ds_src2_insts,
+ .extended_image_insts,
+ .fma_mix_insts,
+ .gfx9,
+ .image_gather4_d16_bug,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ }),
+ };
+ pub const gfx906 = CpuModel{
+ .name = "gfx906",
+ .llvm_name = "gfx906",
+ .features = featureSet(&[_]Feature{
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot7_insts,
+ .ds_src2_insts,
+ .extended_image_insts,
+ .fma_mix_insts,
+ .gfx9,
+ .half_rate_64_ops,
+ .image_gather4_d16_bug,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .sramecc_support,
+ }),
+ };
+ pub const gfx908 = CpuModel{
+ .name = "gfx908",
+ .llvm_name = "gfx908",
+ .features = featureSet(&[_]Feature{
+ .atomic_buffer_global_pk_add_f16_no_rtn_insts,
+ .atomic_fadd_no_rtn_insts,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot3_insts,
+ .dot4_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .ds_src2_insts,
+ .extended_image_insts,
+ .fma_mix_insts,
+ .gfx9,
+ .half_rate_64_ops,
+ .image_gather4_d16_bug,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .mai_insts,
+ .mfma_inline_literal_bug,
+ .pk_fmac_f16_inst,
+ .sramecc_support,
+ }),
+ };
+ pub const gfx909 = CpuModel{
+ .name = "gfx909",
+ .llvm_name = "gfx909",
+ .features = featureSet(&[_]Feature{
+ .ds_src2_insts,
+ .extended_image_insts,
+ .gfx9,
+ .image_gather4_d16_bug,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .mad_mix_insts,
+ }),
+ };
+ pub const gfx90a = CpuModel{
+ .name = "gfx90a",
+ .llvm_name = "gfx90a",
+ .features = featureSet(&[_]Feature{
+ .atomic_buffer_global_pk_add_f16_insts,
+ .atomic_fadd_no_rtn_insts,
+ .atomic_fadd_rtn_insts,
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot3_insts,
+ .dot4_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .dpp_64bit,
+ .fma_mix_insts,
+ .fmacf64_inst,
+ .full_rate_64_ops,
+ .gfx9,
+ .gfx90a_insts,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .mai_insts,
+ .packed_fp32_ops,
+ .packed_tid,
+ .pk_fmac_f16_inst,
+ .sramecc_support,
+ }),
+ };
+ pub const gfx90c = CpuModel{
+ .name = "gfx90c",
+ .llvm_name = "gfx90c",
+ .features = featureSet(&[_]Feature{
+ .ds_src2_insts,
+ .extended_image_insts,
+ .gfx9,
+ .image_gather4_d16_bug,
+ .image_insts,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .mad_mix_insts,
+ }),
+ };
+ pub const gfx940 = CpuModel{
+ .name = "gfx940",
+ .llvm_name = "gfx940",
+ .features = featureSet(&[_]Feature{
+ .architected_flat_scratch,
+ .atomic_buffer_global_pk_add_f16_insts,
+ .atomic_ds_pk_add_16_insts,
+ .atomic_fadd_no_rtn_insts,
+ .atomic_fadd_rtn_insts,
+ .atomic_flat_pk_add_16_insts,
+ .atomic_global_pk_add_bf16_inst,
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot3_insts,
+ .dot4_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .dpp_64bit,
+ .flat_atomic_fadd_f32_inst,
+ .fma_mix_insts,
+ .fmacf64_inst,
+ .force_store_sc0_sc1,
+ .fp8_insts,
+ .full_rate_64_ops,
+ .gfx9,
+ .gfx90a_insts,
+ .gfx940_insts,
+ .ldsbankcount32,
+ .mai_insts,
+ .packed_fp32_ops,
+ .packed_tid,
+ .pk_fmac_f16_inst,
+ .sramecc_support,
+ }),
+ };
+ pub const gfx941 = CpuModel{
+ .name = "gfx941",
+ .llvm_name = "gfx941",
+ .features = featureSet(&[_]Feature{
+ .architected_flat_scratch,
+ .atomic_buffer_global_pk_add_f16_insts,
+ .atomic_ds_pk_add_16_insts,
+ .atomic_fadd_no_rtn_insts,
+ .atomic_fadd_rtn_insts,
+ .atomic_flat_pk_add_16_insts,
+ .atomic_global_pk_add_bf16_inst,
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot3_insts,
+ .dot4_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .dpp_64bit,
+ .flat_atomic_fadd_f32_inst,
+ .fma_mix_insts,
+ .fmacf64_inst,
+ .force_store_sc0_sc1,
+ .fp8_insts,
+ .full_rate_64_ops,
+ .gfx9,
+ .gfx90a_insts,
+ .gfx940_insts,
+ .ldsbankcount32,
+ .mai_insts,
+ .packed_fp32_ops,
+ .packed_tid,
+ .pk_fmac_f16_inst,
+ .sramecc_support,
+ }),
+ };
+ pub const gfx942 = CpuModel{
+ .name = "gfx942",
+ .llvm_name = "gfx942",
+ .features = featureSet(&[_]Feature{
+ .architected_flat_scratch,
+ .atomic_buffer_global_pk_add_f16_insts,
+ .atomic_ds_pk_add_16_insts,
+ .atomic_fadd_no_rtn_insts,
+ .atomic_fadd_rtn_insts,
+ .atomic_flat_pk_add_16_insts,
+ .atomic_global_pk_add_bf16_inst,
+ .back_off_barrier,
+ .dl_insts,
+ .dot10_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot3_insts,
+ .dot4_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .dpp_64bit,
+ .flat_atomic_fadd_f32_inst,
+ .fma_mix_insts,
+ .fmacf64_inst,
+ .fp8_insts,
+ .full_rate_64_ops,
+ .gfx9,
+ .gfx90a_insts,
+ .gfx940_insts,
+ .ldsbankcount32,
+ .mai_insts,
+ .packed_fp32_ops,
+ .packed_tid,
+ .pk_fmac_f16_inst,
+ .sramecc_support,
+ }),
+ };
+ pub const hainan = CpuModel{
+ .name = "hainan",
+ .llvm_name = "hainan",
+ .features = featureSet(&[_]Feature{
+ .southern_islands,
+ }),
+ };
+ pub const hawaii = CpuModel{
+ .name = "hawaii",
+ .llvm_name = "hawaii",
+ .features = featureSet(&[_]Feature{
+ .fast_fmaf,
+ .half_rate_64_ops,
+ .ldsbankcount32,
+ .sea_islands,
+ }),
+ };
+ pub const iceland = CpuModel{
+ .name = "iceland",
+ .llvm_name = "iceland",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .sgpr_init_bug,
+ .unpacked_d16_vmem,
+ .volcanic_islands,
+ }),
+ };
+ pub const kabini = CpuModel{
+ .name = "kabini",
+ .llvm_name = "kabini",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount16,
+ .sea_islands,
+ }),
+ };
+ pub const kaveri = CpuModel{
+ .name = "kaveri",
+ .llvm_name = "kaveri",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .sea_islands,
+ }),
+ };
+ pub const mullins = CpuModel{
+ .name = "mullins",
+ .llvm_name = "mullins",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount16,
+ .sea_islands,
+ }),
+ };
+ pub const oland = CpuModel{
+ .name = "oland",
+ .llvm_name = "oland",
+ .features = featureSet(&[_]Feature{
+ .southern_islands,
+ }),
+ };
+ pub const pitcairn = CpuModel{
+ .name = "pitcairn",
+ .llvm_name = "pitcairn",
+ .features = featureSet(&[_]Feature{
+ .southern_islands,
+ }),
+ };
+ pub const polaris10 = CpuModel{
+ .name = "polaris10",
+ .llvm_name = "polaris10",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .unpacked_d16_vmem,
+ .volcanic_islands,
+ }),
+ };
+ pub const polaris11 = CpuModel{
+ .name = "polaris11",
+ .llvm_name = "polaris11",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .unpacked_d16_vmem,
+ .volcanic_islands,
+ }),
+ };
+ pub const stoney = CpuModel{
+ .name = "stoney",
+ .llvm_name = "stoney",
+ .features = featureSet(&[_]Feature{
+ .image_gather4_d16_bug,
+ .image_store_d16_bug,
+ .ldsbankcount16,
+ .volcanic_islands,
+ .xnack_support,
+ }),
+ };
+ pub const tahiti = CpuModel{
+ .name = "tahiti",
+ .llvm_name = "tahiti",
+ .features = featureSet(&[_]Feature{
+ .fast_fmaf,
+ .half_rate_64_ops,
+ .southern_islands,
+ }),
+ };
+ pub const tonga = CpuModel{
+ .name = "tonga",
+ .llvm_name = "tonga",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .sgpr_init_bug,
+ .unpacked_d16_vmem,
+ .volcanic_islands,
+ }),
+ };
+ pub const tongapro = CpuModel{
+ .name = "tongapro",
+ .llvm_name = "tongapro",
+ .features = featureSet(&[_]Feature{
+ .ldsbankcount32,
+ .sgpr_init_bug,
+ .unpacked_d16_vmem,
+ .volcanic_islands,
+ }),
+ };
+ pub const verde = CpuModel{
+ .name = "verde",
+ .llvm_name = "verde",
+ .features = featureSet(&[_]Feature{
+ .southern_islands,
+ }),
+ };
+};