diff options
| author | Andrew Kelley <andrew@ziglang.org> | 2024-01-02 14:11:27 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-01-02 14:11:27 -0800 |
| commit | 289ae45c1b58e952867c4fa1e246d0ef7bc2ff64 (patch) | |
| tree | 5dd034143a2354b7b44496e684f1c764e2f9664c /lib/std/Target/amdgpu.zig | |
| parent | c89bb3e141ee215add0b52930d48bffd8dae8342 (diff) | |
| parent | c546ddb3edc557fae4b932e5239b9dcb66117832 (diff) | |
| download | zig-289ae45c1b58e952867c4fa1e246d0ef7bc2ff64.tar.gz zig-289ae45c1b58e952867c4fa1e246d0ef7bc2ff64.zip | |
Merge pull request #18160 from ziglang/std-build-module
Move many settings from being per-Compilation to being per-Module
Diffstat (limited to 'lib/std/Target/amdgpu.zig')
| -rw-r--r-- | lib/std/Target/amdgpu.zig | 2153 |
1 files changed, 2153 insertions, 0 deletions
diff --git a/lib/std/Target/amdgpu.zig b/lib/std/Target/amdgpu.zig new file mode 100644 index 0000000000..012f652088 --- /dev/null +++ b/lib/std/Target/amdgpu.zig @@ -0,0 +1,2153 @@ +//! This file is auto-generated by tools/update_cpu_features.zig. + +const std = @import("../std.zig"); +const CpuFeature = std.Target.Cpu.Feature; +const CpuModel = std.Target.Cpu.Model; + +pub const Feature = enum { + @"16_bit_insts", + a16, + add_no_carry_insts, + aperture_regs, + architected_flat_scratch, + architected_sgprs, + atomic_buffer_global_pk_add_f16_insts, + atomic_buffer_global_pk_add_f16_no_rtn_insts, + atomic_ds_pk_add_16_insts, + atomic_fadd_no_rtn_insts, + atomic_fadd_rtn_insts, + atomic_flat_pk_add_16_insts, + atomic_global_pk_add_bf16_inst, + auto_waitcnt_before_barrier, + back_off_barrier, + ci_insts, + cumode, + dl_insts, + dot10_insts, + dot1_insts, + dot2_insts, + dot3_insts, + dot4_insts, + dot5_insts, + dot6_insts, + dot7_insts, + dot8_insts, + dot9_insts, + dpp, + dpp8, + dpp_64bit, + ds128, + ds_src2_insts, + extended_image_insts, + fast_denormal_f32, + fast_fmaf, + flat_address_space, + flat_atomic_fadd_f32_inst, + flat_for_global, + flat_global_insts, + flat_inst_offsets, + flat_scratch, + flat_scratch_insts, + flat_segment_offset_bug, + fma_mix_insts, + fmacf64_inst, + fmaf, + force_store_sc0_sc1, + fp64, + fp8_insts, + full_rate_64_ops, + g16, + gcn3_encoding, + get_wave_id_inst, + gfx10, + gfx10_3_insts, + gfx10_a_encoding, + gfx10_b_encoding, + gfx10_insts, + gfx11, + gfx11_full_vgprs, + gfx11_insts, + gfx7_gfx8_gfx9_insts, + gfx8_insts, + gfx9, + gfx90a_insts, + gfx940_insts, + gfx9_insts, + half_rate_64_ops, + image_gather4_d16_bug, + image_insts, + image_store_d16_bug, + inst_fwd_prefetch_bug, + int_clamp_insts, + inv_2pi_inline_imm, + lds_branch_vmem_war_hazard, + lds_misaligned_bug, + ldsbankcount16, + ldsbankcount32, + load_store_opt, + localmemorysize32768, + localmemorysize65536, + mad_intra_fwd_bug, + mad_mac_f32_insts, + mad_mix_insts, + mai_insts, + max_private_element_size_16, + max_private_element_size_4, + max_private_element_size_8, + mfma_inline_literal_bug, + mimg_r128, + movrel, + negative_scratch_offset_bug, + negative_unaligned_scratch_offset_bug, + no_data_dep_hazard, + no_sdst_cmpx, + nsa_clause_bug, + nsa_encoding, + nsa_to_vmem_bug, + offset_3f_bug, + packed_fp32_ops, + packed_tid, + partial_nsa_encoding, + pk_fmac_f16_inst, + promote_alloca, + prt_strict_null, + r128_a16, + s_memrealtime, + s_memtime_inst, + scalar_atomics, + scalar_flat_scratch_insts, + scalar_stores, + sdwa, + sdwa_mav, + sdwa_omod, + sdwa_out_mods_vopc, + sdwa_scalar, + sdwa_sdst, + sea_islands, + sgpr_init_bug, + shader_cycles_register, + si_scheduler, + smem_to_vector_write_hazard, + southern_islands, + sramecc, + sramecc_support, + tgsplit, + trap_handler, + trig_reduced_range, + true16, + unaligned_access_mode, + unaligned_buffer_access, + unaligned_ds_access, + unaligned_scratch_access, + unpacked_d16_vmem, + unsafe_ds_offset_folding, + user_sgpr_init16_bug, + valu_trans_use_hazard, + vcmpx_exec_war_hazard, + vcmpx_permlane_hazard, + vgpr_index_mode, + vmem_to_scalar_write_hazard, + volcanic_islands, + vop3_literal, + vop3p, + vopd, + vscnt, + wavefrontsize16, + wavefrontsize32, + wavefrontsize64, + xnack, + xnack_support, +}; + +pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet; +pub const featureSetHas = CpuFeature.feature_set_fns(Feature).featureSetHas; +pub const featureSetHasAny = CpuFeature.feature_set_fns(Feature).featureSetHasAny; +pub const featureSetHasAll = CpuFeature.feature_set_fns(Feature).featureSetHasAll; + +pub const all_features = blk: { + const len = @typeInfo(Feature).Enum.fields.len; + std.debug.assert(len <= CpuFeature.Set.needed_bit_count); + var result: [len]CpuFeature = undefined; + result[@intFromEnum(Feature.@"16_bit_insts")] = .{ + .llvm_name = "16-bit-insts", + .description = "Has i16/f16 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.a16)] = .{ + .llvm_name = "a16", + .description = "Support A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.add_no_carry_insts)] = .{ + .llvm_name = "add-no-carry-insts", + .description = "Have VALU add/sub instructions without carry out", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.aperture_regs)] = .{ + .llvm_name = "aperture-regs", + .description = "Has Memory Aperture Base and Size Registers", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.architected_flat_scratch)] = .{ + .llvm_name = "architected-flat-scratch", + .description = "Flat Scratch register is a readonly SPI initialized architected register", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.architected_sgprs)] = .{ + .llvm_name = "architected-sgprs", + .description = "Enable the architected SGPRs", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.atomic_buffer_global_pk_add_f16_insts)] = .{ + .llvm_name = "atomic-buffer-global-pk-add-f16-insts", + .description = "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that can return original value", + .dependencies = featureSet(&[_]Feature{ + .flat_global_insts, + }), + }; + result[@intFromEnum(Feature.atomic_buffer_global_pk_add_f16_no_rtn_insts)] = .{ + .llvm_name = "atomic-buffer-global-pk-add-f16-no-rtn-insts", + .description = "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that don't return original value", + .dependencies = featureSet(&[_]Feature{ + .flat_global_insts, + }), + }; + result[@intFromEnum(Feature.atomic_ds_pk_add_16_insts)] = .{ + .llvm_name = "atomic-ds-pk-add-16-insts", + .description = "Has ds_pk_add_bf16, ds_pk_add_f16, ds_pk_add_rtn_bf16, ds_pk_add_rtn_f16 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.atomic_fadd_no_rtn_insts)] = .{ + .llvm_name = "atomic-fadd-no-rtn-insts", + .description = "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that don't return original value", + .dependencies = featureSet(&[_]Feature{ + .flat_global_insts, + }), + }; + result[@intFromEnum(Feature.atomic_fadd_rtn_insts)] = .{ + .llvm_name = "atomic-fadd-rtn-insts", + .description = "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that return original value", + .dependencies = featureSet(&[_]Feature{ + .flat_global_insts, + }), + }; + result[@intFromEnum(Feature.atomic_flat_pk_add_16_insts)] = .{ + .llvm_name = "atomic-flat-pk-add-16-insts", + .description = "Has flat_atomic_pk_add_f16 and flat_atomic_pk_add_bf16 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.atomic_global_pk_add_bf16_inst)] = .{ + .llvm_name = "atomic-global-pk-add-bf16-inst", + .description = "Has global_atomic_pk_add_bf16 instruction", + .dependencies = featureSet(&[_]Feature{ + .flat_global_insts, + }), + }; + result[@intFromEnum(Feature.auto_waitcnt_before_barrier)] = .{ + .llvm_name = "auto-waitcnt-before-barrier", + .description = "Hardware automatically inserts waitcnt before barrier", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.back_off_barrier)] = .{ + .llvm_name = "back-off-barrier", + .description = "Hardware supports backing off s_barrier if an exception occurs", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.ci_insts)] = .{ + .llvm_name = "ci-insts", + .description = "Additional instructions for CI+", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.cumode)] = .{ + .llvm_name = "cumode", + .description = "Enable CU wavefront execution mode", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dl_insts)] = .{ + .llvm_name = "dl-insts", + .description = "Has v_fmac_f32 and v_xnor_b32 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dot10_insts)] = .{ + .llvm_name = "dot10-insts", + .description = "Has v_dot2_f32_f16 instruction", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dot1_insts)] = .{ + .llvm_name = "dot1-insts", + .description = "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dot2_insts)] = .{ + .llvm_name = "dot2-insts", + .description = "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dot3_insts)] = .{ + .llvm_name = "dot3-insts", + .description = "Has v_dot8c_i32_i4 instruction", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dot4_insts)] = .{ + .llvm_name = "dot4-insts", + .description = "Has v_dot2c_i32_i16 instruction", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dot5_insts)] = .{ + .llvm_name = "dot5-insts", + .description = "Has v_dot2c_f32_f16 instruction", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dot6_insts)] = .{ + .llvm_name = "dot6-insts", + .description = "Has v_dot4c_i32_i8 instruction", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dot7_insts)] = .{ + .llvm_name = "dot7-insts", + .description = "Has v_dot4_u32_u8, v_dot8_u32_u4 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dot8_insts)] = .{ + .llvm_name = "dot8-insts", + .description = "Has v_dot4_i32_iu8, v_dot8_i32_iu4 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dot9_insts)] = .{ + .llvm_name = "dot9-insts", + .description = "Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dpp)] = .{ + .llvm_name = "dpp", + .description = "Support DPP (Data Parallel Primitives) extension", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dpp8)] = .{ + .llvm_name = "dpp8", + .description = "Support DPP8 (Data Parallel Primitives) extension", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.dpp_64bit)] = .{ + .llvm_name = "dpp-64bit", + .description = "Support DPP (Data Parallel Primitives) extension", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.ds128)] = .{ + .llvm_name = "enable-ds128", + .description = "Use ds_{read|write}_b128", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.ds_src2_insts)] = .{ + .llvm_name = "ds-src2-insts", + .description = "Has ds_*_src2 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.extended_image_insts)] = .{ + .llvm_name = "extended-image-insts", + .description = "Support mips != 0, lod != 0, gather4, and get_lod", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.fast_denormal_f32)] = .{ + .llvm_name = "fast-denormal-f32", + .description = "Enabling denormals does not cause f32 instructions to run at f64 rates", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.fast_fmaf)] = .{ + .llvm_name = "fast-fmaf", + .description = "Assuming f32 fma is at least as fast as mul + add", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.flat_address_space)] = .{ + .llvm_name = "flat-address-space", + .description = "Support flat address space", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.flat_atomic_fadd_f32_inst)] = .{ + .llvm_name = "flat-atomic-fadd-f32-inst", + .description = "Has flat_atomic_add_f32 instruction", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.flat_for_global)] = .{ + .llvm_name = "flat-for-global", + .description = "Force to generate flat instruction for global", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.flat_global_insts)] = .{ + .llvm_name = "flat-global-insts", + .description = "Have global_* flat memory instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.flat_inst_offsets)] = .{ + .llvm_name = "flat-inst-offsets", + .description = "Flat instructions have immediate offset addressing mode", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.flat_scratch)] = .{ + .llvm_name = "enable-flat-scratch", + .description = "Use scratch_* flat memory instructions to access scratch", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.flat_scratch_insts)] = .{ + .llvm_name = "flat-scratch-insts", + .description = "Have scratch_* flat memory instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.flat_segment_offset_bug)] = .{ + .llvm_name = "flat-segment-offset-bug", + .description = "GFX10 bug where inst_offset is ignored when flat instructions access global memory", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.fma_mix_insts)] = .{ + .llvm_name = "fma-mix-insts", + .description = "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.fmacf64_inst)] = .{ + .llvm_name = "fmacf64-inst", + .description = "Has v_fmac_f64 instruction", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.fmaf)] = .{ + .llvm_name = "fmaf", + .description = "Enable single precision FMA (not as fast as mul+add, but fused)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.force_store_sc0_sc1)] = .{ + .llvm_name = "force-store-sc0-sc1", + .description = "Has SC0 and SC1 on stores", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.fp64)] = .{ + .llvm_name = "fp64", + .description = "Enable double precision operations", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.fp8_insts)] = .{ + .llvm_name = "fp8-insts", + .description = "Has fp8 and bf8 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.full_rate_64_ops)] = .{ + .llvm_name = "full-rate-64-ops", + .description = "Most fp64 instructions are full rate", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.g16)] = .{ + .llvm_name = "g16", + .description = "Support G16 for 16-bit gradient image operands", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gcn3_encoding)] = .{ + .llvm_name = "gcn3-encoding", + .description = "Encoding format for VI", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.get_wave_id_inst)] = .{ + .llvm_name = "get-wave-id-inst", + .description = "Has s_get_waveid_in_workgroup instruction", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gfx10)] = .{ + .llvm_name = "gfx10", + .description = "GFX10 GPU generation", + .dependencies = featureSet(&[_]Feature{ + .@"16_bit_insts", + .a16, + .add_no_carry_insts, + .aperture_regs, + .ci_insts, + .dpp, + .dpp8, + .extended_image_insts, + .fast_denormal_f32, + .fast_fmaf, + .flat_address_space, + .flat_global_insts, + .flat_inst_offsets, + .flat_scratch_insts, + .fma_mix_insts, + .fp64, + .g16, + .gfx10_insts, + .gfx8_insts, + .gfx9_insts, + .image_insts, + .int_clamp_insts, + .inv_2pi_inline_imm, + .localmemorysize65536, + .mimg_r128, + .movrel, + .no_data_dep_hazard, + .no_sdst_cmpx, + .pk_fmac_f16_inst, + .s_memrealtime, + .s_memtime_inst, + .sdwa, + .sdwa_omod, + .sdwa_scalar, + .sdwa_sdst, + .unaligned_buffer_access, + .unaligned_ds_access, + .vop3_literal, + .vop3p, + .vscnt, + }), + }; + result[@intFromEnum(Feature.gfx10_3_insts)] = .{ + .llvm_name = "gfx10-3-insts", + .description = "Additional instructions for GFX10.3", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gfx10_a_encoding)] = .{ + .llvm_name = "gfx10_a-encoding", + .description = "Has BVH ray tracing instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gfx10_b_encoding)] = .{ + .llvm_name = "gfx10_b-encoding", + .description = "Encoding format GFX10_B", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gfx10_insts)] = .{ + .llvm_name = "gfx10-insts", + .description = "Additional instructions for GFX10+", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gfx11)] = .{ + .llvm_name = "gfx11", + .description = "GFX11 GPU generation", + .dependencies = featureSet(&[_]Feature{ + .@"16_bit_insts", + .a16, + .add_no_carry_insts, + .aperture_regs, + .ci_insts, + .dpp, + .dpp8, + .extended_image_insts, + .fast_denormal_f32, + .fast_fmaf, + .flat_address_space, + .flat_global_insts, + .flat_inst_offsets, + .flat_scratch_insts, + .fma_mix_insts, + .fp64, + .g16, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .gfx10_insts, + .gfx11_insts, + .gfx8_insts, + .gfx9_insts, + .int_clamp_insts, + .inv_2pi_inline_imm, + .localmemorysize65536, + .mimg_r128, + .movrel, + .no_data_dep_hazard, + .no_sdst_cmpx, + .pk_fmac_f16_inst, + .true16, + .unaligned_buffer_access, + .unaligned_ds_access, + .vop3_literal, + .vop3p, + .vopd, + .vscnt, + }), + }; + result[@intFromEnum(Feature.gfx11_full_vgprs)] = .{ + .llvm_name = "gfx11-full-vgprs", + .description = "GFX11 with 50% more physical VGPRs and 50% larger allocation granule than GFX10", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gfx11_insts)] = .{ + .llvm_name = "gfx11-insts", + .description = "Additional instructions for GFX11+", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gfx7_gfx8_gfx9_insts)] = .{ + .llvm_name = "gfx7-gfx8-gfx9-insts", + .description = "Instructions shared in GFX7, GFX8, GFX9", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gfx8_insts)] = .{ + .llvm_name = "gfx8-insts", + .description = "Additional instructions for GFX8+", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gfx9)] = .{ + .llvm_name = "gfx9", + .description = "GFX9 GPU generation", + .dependencies = featureSet(&[_]Feature{ + .@"16_bit_insts", + .a16, + .add_no_carry_insts, + .aperture_regs, + .ci_insts, + .dpp, + .fast_denormal_f32, + .fast_fmaf, + .flat_address_space, + .flat_global_insts, + .flat_inst_offsets, + .flat_scratch_insts, + .fp64, + .gcn3_encoding, + .gfx7_gfx8_gfx9_insts, + .gfx8_insts, + .gfx9_insts, + .int_clamp_insts, + .inv_2pi_inline_imm, + .localmemorysize65536, + .negative_scratch_offset_bug, + .r128_a16, + .s_memrealtime, + .s_memtime_inst, + .scalar_atomics, + .scalar_flat_scratch_insts, + .scalar_stores, + .sdwa, + .sdwa_omod, + .sdwa_scalar, + .sdwa_sdst, + .unaligned_buffer_access, + .unaligned_ds_access, + .vgpr_index_mode, + .vop3p, + .wavefrontsize64, + .xnack_support, + }), + }; + result[@intFromEnum(Feature.gfx90a_insts)] = .{ + .llvm_name = "gfx90a-insts", + .description = "Additional instructions for GFX90A+", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gfx940_insts)] = .{ + .llvm_name = "gfx940-insts", + .description = "Additional instructions for GFX940+", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.gfx9_insts)] = .{ + .llvm_name = "gfx9-insts", + .description = "Additional instructions for GFX9+", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.half_rate_64_ops)] = .{ + .llvm_name = "half-rate-64-ops", + .description = "Most fp64 instructions are half rate instead of quarter", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.image_gather4_d16_bug)] = .{ + .llvm_name = "image-gather4-d16-bug", + .description = "Image Gather4 D16 hardware bug", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.image_insts)] = .{ + .llvm_name = "image-insts", + .description = "Support image instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.image_store_d16_bug)] = .{ + .llvm_name = "image-store-d16-bug", + .description = "Image Store D16 hardware bug", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.inst_fwd_prefetch_bug)] = .{ + .llvm_name = "inst-fwd-prefetch-bug", + .description = "S_INST_PREFETCH instruction causes shader to hang", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.int_clamp_insts)] = .{ + .llvm_name = "int-clamp-insts", + .description = "Support clamp for integer destination", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.inv_2pi_inline_imm)] = .{ + .llvm_name = "inv-2pi-inline-imm", + .description = "Has 1 / (2 * pi) as inline immediate", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.lds_branch_vmem_war_hazard)] = .{ + .llvm_name = "lds-branch-vmem-war-hazard", + .description = "Switching between LDS and VMEM-tex not waiting VM_VSRC=0", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.lds_misaligned_bug)] = .{ + .llvm_name = "lds-misaligned-bug", + .description = "Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.ldsbankcount16)] = .{ + .llvm_name = "ldsbankcount16", + .description = "The number of LDS banks per compute unit.", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.ldsbankcount32)] = .{ + .llvm_name = "ldsbankcount32", + .description = "The number of LDS banks per compute unit.", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.load_store_opt)] = .{ + .llvm_name = "load-store-opt", + .description = "Enable SI load/store optimizer pass", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.localmemorysize32768)] = .{ + .llvm_name = "localmemorysize32768", + .description = "The size of local memory in bytes", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.localmemorysize65536)] = .{ + .llvm_name = "localmemorysize65536", + .description = "The size of local memory in bytes", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.mad_intra_fwd_bug)] = .{ + .llvm_name = "mad-intra-fwd-bug", + .description = "MAD_U64/I64 intra instruction forwarding bug", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.mad_mac_f32_insts)] = .{ + .llvm_name = "mad-mac-f32-insts", + .description = "Has v_mad_f32/v_mac_f32/v_madak_f32/v_madmk_f32 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.mad_mix_insts)] = .{ + .llvm_name = "mad-mix-insts", + .description = "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.mai_insts)] = .{ + .llvm_name = "mai-insts", + .description = "Has mAI instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.max_private_element_size_16)] = .{ + .llvm_name = "max-private-element-size-16", + .description = "Maximum private access size may be 16", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.max_private_element_size_4)] = .{ + .llvm_name = "max-private-element-size-4", + .description = "Maximum private access size may be 4", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.max_private_element_size_8)] = .{ + .llvm_name = "max-private-element-size-8", + .description = "Maximum private access size may be 8", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.mfma_inline_literal_bug)] = .{ + .llvm_name = "mfma-inline-literal-bug", + .description = "MFMA cannot use inline literal as SrcC", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.mimg_r128)] = .{ + .llvm_name = "mimg-r128", + .description = "Support 128-bit texture resources", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.movrel)] = .{ + .llvm_name = "movrel", + .description = "Has v_movrel*_b32 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.negative_scratch_offset_bug)] = .{ + .llvm_name = "negative-scratch-offset-bug", + .description = "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.negative_unaligned_scratch_offset_bug)] = .{ + .llvm_name = "negative-unaligned-scratch-offset-bug", + .description = "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.no_data_dep_hazard)] = .{ + .llvm_name = "no-data-dep-hazard", + .description = "Does not need SW waitstates", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.no_sdst_cmpx)] = .{ + .llvm_name = "no-sdst-cmpx", + .description = "V_CMPX does not write VCC/SGPR in addition to EXEC", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.nsa_clause_bug)] = .{ + .llvm_name = "nsa-clause-bug", + .description = "MIMG-NSA in a hard clause has unpredictable results on GFX10.1", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.nsa_encoding)] = .{ + .llvm_name = "nsa-encoding", + .description = "Support NSA encoding for image instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.nsa_to_vmem_bug)] = .{ + .llvm_name = "nsa-to-vmem-bug", + .description = "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.offset_3f_bug)] = .{ + .llvm_name = "offset-3f-bug", + .description = "Branch offset of 3f hardware bug", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.packed_fp32_ops)] = .{ + .llvm_name = "packed-fp32-ops", + .description = "Support packed fp32 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.packed_tid)] = .{ + .llvm_name = "packed-tid", + .description = "Workitem IDs are packed into v0 at kernel launch", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.partial_nsa_encoding)] = .{ + .llvm_name = "partial-nsa-encoding", + .description = "Support partial NSA encoding for image instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.pk_fmac_f16_inst)] = .{ + .llvm_name = "pk-fmac-f16-inst", + .description = "Has v_pk_fmac_f16 instruction", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.promote_alloca)] = .{ + .llvm_name = "promote-alloca", + .description = "Enable promote alloca pass", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.prt_strict_null)] = .{ + .llvm_name = "enable-prt-strict-null", + .description = "Enable zeroing of result registers for sparse texture fetches", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.r128_a16)] = .{ + .llvm_name = "r128-a16", + .description = "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.s_memrealtime)] = .{ + .llvm_name = "s-memrealtime", + .description = "Has s_memrealtime instruction", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.s_memtime_inst)] = .{ + .llvm_name = "s-memtime-inst", + .description = "Has s_memtime instruction", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.scalar_atomics)] = .{ + .llvm_name = "scalar-atomics", + .description = "Has atomic scalar memory instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.scalar_flat_scratch_insts)] = .{ + .llvm_name = "scalar-flat-scratch-insts", + .description = "Have s_scratch_* flat memory instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.scalar_stores)] = .{ + .llvm_name = "scalar-stores", + .description = "Has store scalar memory instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.sdwa)] = .{ + .llvm_name = "sdwa", + .description = "Support SDWA (Sub-DWORD Addressing) extension", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.sdwa_mav)] = .{ + .llvm_name = "sdwa-mav", + .description = "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.sdwa_omod)] = .{ + .llvm_name = "sdwa-omod", + .description = "Support OMod with SDWA (Sub-DWORD Addressing) extension", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.sdwa_out_mods_vopc)] = .{ + .llvm_name = "sdwa-out-mods-vopc", + .description = "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.sdwa_scalar)] = .{ + .llvm_name = "sdwa-scalar", + .description = "Support scalar register with SDWA (Sub-DWORD Addressing) extension", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.sdwa_sdst)] = .{ + .llvm_name = "sdwa-sdst", + .description = "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.sea_islands)] = .{ + .llvm_name = "sea-islands", + .description = "SEA_ISLANDS GPU generation", + .dependencies = featureSet(&[_]Feature{ + .ci_insts, + .ds_src2_insts, + .extended_image_insts, + .flat_address_space, + .fp64, + .gfx7_gfx8_gfx9_insts, + .image_insts, + .localmemorysize65536, + .mad_mac_f32_insts, + .mimg_r128, + .movrel, + .s_memtime_inst, + .trig_reduced_range, + .unaligned_buffer_access, + .wavefrontsize64, + }), + }; + result[@intFromEnum(Feature.sgpr_init_bug)] = .{ + .llvm_name = "sgpr-init-bug", + .description = "VI SGPR initialization bug requiring a fixed SGPR allocation size", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.shader_cycles_register)] = .{ + .llvm_name = "shader-cycles-register", + .description = "Has SHADER_CYCLES hardware register", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.si_scheduler)] = .{ + .llvm_name = "si-scheduler", + .description = "Enable SI Machine Scheduler", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.smem_to_vector_write_hazard)] = .{ + .llvm_name = "smem-to-vector-write-hazard", + .description = "s_load_dword followed by v_cmp page faults", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.southern_islands)] = .{ + .llvm_name = "southern-islands", + .description = "SOUTHERN_ISLANDS GPU generation", + .dependencies = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, + .fp64, + .image_insts, + .ldsbankcount32, + .localmemorysize32768, + .mad_mac_f32_insts, + .mimg_r128, + .movrel, + .s_memtime_inst, + .trig_reduced_range, + .wavefrontsize64, + }), + }; + result[@intFromEnum(Feature.sramecc)] = .{ + .llvm_name = "sramecc", + .description = "Enable SRAMECC", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.sramecc_support)] = .{ + .llvm_name = "sramecc-support", + .description = "Hardware supports SRAMECC", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.tgsplit)] = .{ + .llvm_name = "tgsplit", + .description = "Enable threadgroup split execution", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.trap_handler)] = .{ + .llvm_name = "trap-handler", + .description = "Trap handler support", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.trig_reduced_range)] = .{ + .llvm_name = "trig-reduced-range", + .description = "Requires use of fract on arguments to trig instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.true16)] = .{ + .llvm_name = "true16", + .description = "True 16-bit operand instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.unaligned_access_mode)] = .{ + .llvm_name = "unaligned-access-mode", + .description = "Enable unaligned global, local and region loads and stores if the hardware supports it", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.unaligned_buffer_access)] = .{ + .llvm_name = "unaligned-buffer-access", + .description = "Hardware supports unaligned global loads and stores", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.unaligned_ds_access)] = .{ + .llvm_name = "unaligned-ds-access", + .description = "Hardware supports unaligned local and region loads and stores", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.unaligned_scratch_access)] = .{ + .llvm_name = "unaligned-scratch-access", + .description = "Support unaligned scratch loads and stores", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.unpacked_d16_vmem)] = .{ + .llvm_name = "unpacked-d16-vmem", + .description = "Has unpacked d16 vmem instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.unsafe_ds_offset_folding)] = .{ + .llvm_name = "unsafe-ds-offset-folding", + .description = "Force using DS instruction immediate offsets on SI", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.user_sgpr_init16_bug)] = .{ + .llvm_name = "user-sgpr-init16-bug", + .description = "Bug requiring at least 16 user+system SGPRs to be enabled", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.valu_trans_use_hazard)] = .{ + .llvm_name = "valu-trans-use-hazard", + .description = "Hazard when TRANS instructions are closely followed by a use of the result", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.vcmpx_exec_war_hazard)] = .{ + .llvm_name = "vcmpx-exec-war-hazard", + .description = "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.vcmpx_permlane_hazard)] = .{ + .llvm_name = "vcmpx-permlane-hazard", + .description = "TODO: describe me", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.vgpr_index_mode)] = .{ + .llvm_name = "vgpr-index-mode", + .description = "Has VGPR mode register indexing", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.vmem_to_scalar_write_hazard)] = .{ + .llvm_name = "vmem-to-scalar-write-hazard", + .description = "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution.", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.volcanic_islands)] = .{ + .llvm_name = "volcanic-islands", + .description = "VOLCANIC_ISLANDS GPU generation", + .dependencies = featureSet(&[_]Feature{ + .@"16_bit_insts", + .ci_insts, + .dpp, + .ds_src2_insts, + .extended_image_insts, + .fast_denormal_f32, + .flat_address_space, + .fp64, + .gcn3_encoding, + .gfx7_gfx8_gfx9_insts, + .gfx8_insts, + .image_insts, + .int_clamp_insts, + .inv_2pi_inline_imm, + .localmemorysize65536, + .mad_mac_f32_insts, + .mimg_r128, + .movrel, + .s_memrealtime, + .s_memtime_inst, + .scalar_stores, + .sdwa, + .sdwa_mav, + .sdwa_out_mods_vopc, + .trig_reduced_range, + .unaligned_buffer_access, + .vgpr_index_mode, + .wavefrontsize64, + }), + }; + result[@intFromEnum(Feature.vop3_literal)] = .{ + .llvm_name = "vop3-literal", + .description = "Can use one literal in VOP3", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.vop3p)] = .{ + .llvm_name = "vop3p", + .description = "Has VOP3P packed instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.vopd)] = .{ + .llvm_name = "vopd", + .description = "Has VOPD dual issue wave32 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.vscnt)] = .{ + .llvm_name = "vscnt", + .description = "Has separate store vscnt counter", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.wavefrontsize16)] = .{ + .llvm_name = "wavefrontsize16", + .description = "The number of threads per wavefront", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.wavefrontsize32)] = .{ + .llvm_name = "wavefrontsize32", + .description = "The number of threads per wavefront", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.wavefrontsize64)] = .{ + .llvm_name = "wavefrontsize64", + .description = "The number of threads per wavefront", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.xnack)] = .{ + .llvm_name = "xnack", + .description = "Enable XNACK support", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@intFromEnum(Feature.xnack_support)] = .{ + .llvm_name = "xnack-support", + .description = "Hardware supports XNACK", + .dependencies = featureSet(&[_]Feature{}), + }; + const ti = @typeInfo(Feature); + for (&result, 0..) |*elem, i| { + elem.index = i; + elem.name = ti.Enum.fields[i].name; + } + break :blk result; +}; + +pub const cpu = struct { + pub const bonaire = CpuModel{ + .name = "bonaire", + .llvm_name = "bonaire", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .sea_islands, + }), + }; + pub const carrizo = CpuModel{ + .name = "carrizo", + .llvm_name = "carrizo", + .features = featureSet(&[_]Feature{ + .fast_fmaf, + .half_rate_64_ops, + .ldsbankcount32, + .unpacked_d16_vmem, + .volcanic_islands, + .xnack_support, + }), + }; + pub const fiji = CpuModel{ + .name = "fiji", + .llvm_name = "fiji", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .unpacked_d16_vmem, + .volcanic_islands, + }), + }; + pub const generic = CpuModel{ + .name = "generic", + .llvm_name = "generic", + .features = featureSet(&[_]Feature{ + .wavefrontsize64, + }), + }; + pub const generic_hsa = CpuModel{ + .name = "generic_hsa", + .llvm_name = "generic-hsa", + .features = featureSet(&[_]Feature{ + .flat_address_space, + .wavefrontsize64, + }), + }; + pub const gfx1010 = CpuModel{ + .name = "gfx1010", + .llvm_name = "gfx1010", + .features = featureSet(&[_]Feature{ + .back_off_barrier, + .dl_insts, + .ds_src2_insts, + .flat_segment_offset_bug, + .get_wave_id_inst, + .gfx10, + .inst_fwd_prefetch_bug, + .lds_branch_vmem_war_hazard, + .lds_misaligned_bug, + .ldsbankcount32, + .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, + .nsa_encoding, + .nsa_to_vmem_bug, + .offset_3f_bug, + .scalar_atomics, + .scalar_flat_scratch_insts, + .scalar_stores, + .smem_to_vector_write_hazard, + .vcmpx_exec_war_hazard, + .vcmpx_permlane_hazard, + .vmem_to_scalar_write_hazard, + .wavefrontsize32, + .xnack_support, + }), + }; + pub const gfx1011 = CpuModel{ + .name = "gfx1011", + .llvm_name = "gfx1011", + .features = featureSet(&[_]Feature{ + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .ds_src2_insts, + .flat_segment_offset_bug, + .get_wave_id_inst, + .gfx10, + .inst_fwd_prefetch_bug, + .lds_branch_vmem_war_hazard, + .lds_misaligned_bug, + .ldsbankcount32, + .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, + .nsa_encoding, + .nsa_to_vmem_bug, + .offset_3f_bug, + .scalar_atomics, + .scalar_flat_scratch_insts, + .scalar_stores, + .smem_to_vector_write_hazard, + .vcmpx_exec_war_hazard, + .vcmpx_permlane_hazard, + .vmem_to_scalar_write_hazard, + .wavefrontsize32, + .xnack_support, + }), + }; + pub const gfx1012 = CpuModel{ + .name = "gfx1012", + .llvm_name = "gfx1012", + .features = featureSet(&[_]Feature{ + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .ds_src2_insts, + .flat_segment_offset_bug, + .get_wave_id_inst, + .gfx10, + .inst_fwd_prefetch_bug, + .lds_branch_vmem_war_hazard, + .lds_misaligned_bug, + .ldsbankcount32, + .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, + .nsa_encoding, + .nsa_to_vmem_bug, + .offset_3f_bug, + .scalar_atomics, + .scalar_flat_scratch_insts, + .scalar_stores, + .smem_to_vector_write_hazard, + .vcmpx_exec_war_hazard, + .vcmpx_permlane_hazard, + .vmem_to_scalar_write_hazard, + .wavefrontsize32, + .xnack_support, + }), + }; + pub const gfx1013 = CpuModel{ + .name = "gfx1013", + .llvm_name = "gfx1013", + .features = featureSet(&[_]Feature{ + .back_off_barrier, + .dl_insts, + .ds_src2_insts, + .flat_segment_offset_bug, + .get_wave_id_inst, + .gfx10, + .gfx10_a_encoding, + .inst_fwd_prefetch_bug, + .lds_branch_vmem_war_hazard, + .lds_misaligned_bug, + .ldsbankcount32, + .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, + .nsa_encoding, + .nsa_to_vmem_bug, + .offset_3f_bug, + .scalar_atomics, + .scalar_flat_scratch_insts, + .scalar_stores, + .smem_to_vector_write_hazard, + .vcmpx_exec_war_hazard, + .vcmpx_permlane_hazard, + .vmem_to_scalar_write_hazard, + .wavefrontsize32, + .xnack_support, + }), + }; + pub const gfx1030 = CpuModel{ + .name = "gfx1030", + .llvm_name = "gfx1030", + .features = featureSet(&[_]Feature{ + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .gfx10, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .ldsbankcount32, + .nsa_encoding, + .shader_cycles_register, + .wavefrontsize32, + }), + }; + pub const gfx1031 = CpuModel{ + .name = "gfx1031", + .llvm_name = "gfx1031", + .features = featureSet(&[_]Feature{ + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .gfx10, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .ldsbankcount32, + .nsa_encoding, + .shader_cycles_register, + .wavefrontsize32, + }), + }; + pub const gfx1032 = CpuModel{ + .name = "gfx1032", + .llvm_name = "gfx1032", + .features = featureSet(&[_]Feature{ + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .gfx10, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .ldsbankcount32, + .nsa_encoding, + .shader_cycles_register, + .wavefrontsize32, + }), + }; + pub const gfx1033 = CpuModel{ + .name = "gfx1033", + .llvm_name = "gfx1033", + .features = featureSet(&[_]Feature{ + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .gfx10, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .ldsbankcount32, + .nsa_encoding, + .shader_cycles_register, + .wavefrontsize32, + }), + }; + pub const gfx1034 = CpuModel{ + .name = "gfx1034", + .llvm_name = "gfx1034", + .features = featureSet(&[_]Feature{ + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .gfx10, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .ldsbankcount32, + .nsa_encoding, + .shader_cycles_register, + .wavefrontsize32, + }), + }; + pub const gfx1035 = CpuModel{ + .name = "gfx1035", + .llvm_name = "gfx1035", + .features = featureSet(&[_]Feature{ + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .gfx10, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .ldsbankcount32, + .nsa_encoding, + .shader_cycles_register, + .wavefrontsize32, + }), + }; + pub const gfx1036 = CpuModel{ + .name = "gfx1036", + .llvm_name = "gfx1036", + .features = featureSet(&[_]Feature{ + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .gfx10, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .ldsbankcount32, + .nsa_encoding, + .shader_cycles_register, + .wavefrontsize32, + }), + }; + pub const gfx1100 = CpuModel{ + .name = "gfx1100", + .llvm_name = "gfx1100", + .features = featureSet(&[_]Feature{ + .architected_flat_scratch, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .dl_insts, + .dot10_insts, + .dot5_insts, + .dot7_insts, + .dot8_insts, + .dot9_insts, + .flat_atomic_fadd_f32_inst, + .gfx11, + .gfx11_full_vgprs, + .image_insts, + .ldsbankcount32, + .mad_intra_fwd_bug, + .nsa_encoding, + .packed_tid, + .partial_nsa_encoding, + .shader_cycles_register, + .user_sgpr_init16_bug, + .valu_trans_use_hazard, + .vcmpx_permlane_hazard, + .wavefrontsize32, + }), + }; + pub const gfx1101 = CpuModel{ + .name = "gfx1101", + .llvm_name = "gfx1101", + .features = featureSet(&[_]Feature{ + .architected_flat_scratch, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .dl_insts, + .dot10_insts, + .dot5_insts, + .dot7_insts, + .dot8_insts, + .dot9_insts, + .flat_atomic_fadd_f32_inst, + .gfx11, + .gfx11_full_vgprs, + .image_insts, + .ldsbankcount32, + .mad_intra_fwd_bug, + .nsa_encoding, + .packed_tid, + .partial_nsa_encoding, + .shader_cycles_register, + .valu_trans_use_hazard, + .vcmpx_permlane_hazard, + .wavefrontsize32, + }), + }; + pub const gfx1102 = CpuModel{ + .name = "gfx1102", + .llvm_name = "gfx1102", + .features = featureSet(&[_]Feature{ + .architected_flat_scratch, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .dl_insts, + .dot10_insts, + .dot5_insts, + .dot7_insts, + .dot8_insts, + .dot9_insts, + .flat_atomic_fadd_f32_inst, + .gfx11, + .image_insts, + .ldsbankcount32, + .mad_intra_fwd_bug, + .nsa_encoding, + .packed_tid, + .partial_nsa_encoding, + .shader_cycles_register, + .user_sgpr_init16_bug, + .valu_trans_use_hazard, + .vcmpx_permlane_hazard, + .wavefrontsize32, + }), + }; + pub const gfx1103 = CpuModel{ + .name = "gfx1103", + .llvm_name = "gfx1103", + .features = featureSet(&[_]Feature{ + .architected_flat_scratch, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .dl_insts, + .dot10_insts, + .dot5_insts, + .dot7_insts, + .dot8_insts, + .dot9_insts, + .flat_atomic_fadd_f32_inst, + .gfx11, + .image_insts, + .ldsbankcount32, + .mad_intra_fwd_bug, + .nsa_encoding, + .packed_tid, + .partial_nsa_encoding, + .shader_cycles_register, + .valu_trans_use_hazard, + .vcmpx_permlane_hazard, + .wavefrontsize32, + }), + }; + pub const gfx1150 = CpuModel{ + .name = "gfx1150", + .llvm_name = "gfx1150", + .features = featureSet(&[_]Feature{ + .architected_flat_scratch, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .dl_insts, + .dot10_insts, + .dot5_insts, + .dot7_insts, + .dot8_insts, + .dot9_insts, + .flat_atomic_fadd_f32_inst, + .gfx11, + .image_insts, + .ldsbankcount32, + .mad_intra_fwd_bug, + .nsa_encoding, + .packed_tid, + .partial_nsa_encoding, + .shader_cycles_register, + .vcmpx_permlane_hazard, + .wavefrontsize32, + }), + }; + pub const gfx1151 = CpuModel{ + .name = "gfx1151", + .llvm_name = "gfx1151", + .features = featureSet(&[_]Feature{ + .architected_flat_scratch, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .dl_insts, + .dot10_insts, + .dot5_insts, + .dot7_insts, + .dot8_insts, + .dot9_insts, + .flat_atomic_fadd_f32_inst, + .gfx11, + .gfx11_full_vgprs, + .image_insts, + .ldsbankcount32, + .mad_intra_fwd_bug, + .nsa_encoding, + .packed_tid, + .partial_nsa_encoding, + .shader_cycles_register, + .vcmpx_permlane_hazard, + .wavefrontsize32, + }), + }; + pub const gfx600 = CpuModel{ + .name = "gfx600", + .llvm_name = "gfx600", + .features = featureSet(&[_]Feature{ + .fast_fmaf, + .half_rate_64_ops, + .southern_islands, + }), + }; + pub const gfx601 = CpuModel{ + .name = "gfx601", + .llvm_name = "gfx601", + .features = featureSet(&[_]Feature{ + .southern_islands, + }), + }; + pub const gfx602 = CpuModel{ + .name = "gfx602", + .llvm_name = "gfx602", + .features = featureSet(&[_]Feature{ + .southern_islands, + }), + }; + pub const gfx700 = CpuModel{ + .name = "gfx700", + .llvm_name = "gfx700", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .sea_islands, + }), + }; + pub const gfx701 = CpuModel{ + .name = "gfx701", + .llvm_name = "gfx701", + .features = featureSet(&[_]Feature{ + .fast_fmaf, + .half_rate_64_ops, + .ldsbankcount32, + .sea_islands, + }), + }; + pub const gfx702 = CpuModel{ + .name = "gfx702", + .llvm_name = "gfx702", + .features = featureSet(&[_]Feature{ + .fast_fmaf, + .ldsbankcount16, + .sea_islands, + }), + }; + pub const gfx703 = CpuModel{ + .name = "gfx703", + .llvm_name = "gfx703", + .features = featureSet(&[_]Feature{ + .ldsbankcount16, + .sea_islands, + }), + }; + pub const gfx704 = CpuModel{ + .name = "gfx704", + .llvm_name = "gfx704", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .sea_islands, + }), + }; + pub const gfx705 = CpuModel{ + .name = "gfx705", + .llvm_name = "gfx705", + .features = featureSet(&[_]Feature{ + .ldsbankcount16, + .sea_islands, + }), + }; + pub const gfx801 = CpuModel{ + .name = "gfx801", + .llvm_name = "gfx801", + .features = featureSet(&[_]Feature{ + .fast_fmaf, + .half_rate_64_ops, + .ldsbankcount32, + .unpacked_d16_vmem, + .volcanic_islands, + .xnack_support, + }), + }; + pub const gfx802 = CpuModel{ + .name = "gfx802", + .llvm_name = "gfx802", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .sgpr_init_bug, + .unpacked_d16_vmem, + .volcanic_islands, + }), + }; + pub const gfx803 = CpuModel{ + .name = "gfx803", + .llvm_name = "gfx803", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .unpacked_d16_vmem, + .volcanic_islands, + }), + }; + pub const gfx805 = CpuModel{ + .name = "gfx805", + .llvm_name = "gfx805", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .sgpr_init_bug, + .unpacked_d16_vmem, + .volcanic_islands, + }), + }; + pub const gfx810 = CpuModel{ + .name = "gfx810", + .llvm_name = "gfx810", + .features = featureSet(&[_]Feature{ + .image_gather4_d16_bug, + .image_store_d16_bug, + .ldsbankcount16, + .volcanic_islands, + .xnack_support, + }), + }; + pub const gfx900 = CpuModel{ + .name = "gfx900", + .llvm_name = "gfx900", + .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, + .gfx9, + .image_gather4_d16_bug, + .image_insts, + .ldsbankcount32, + .mad_mac_f32_insts, + .mad_mix_insts, + }), + }; + pub const gfx902 = CpuModel{ + .name = "gfx902", + .llvm_name = "gfx902", + .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, + .gfx9, + .image_gather4_d16_bug, + .image_insts, + .ldsbankcount32, + .mad_mac_f32_insts, + .mad_mix_insts, + }), + }; + pub const gfx904 = CpuModel{ + .name = "gfx904", + .llvm_name = "gfx904", + .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, + .fma_mix_insts, + .gfx9, + .image_gather4_d16_bug, + .image_insts, + .ldsbankcount32, + .mad_mac_f32_insts, + }), + }; + pub const gfx906 = CpuModel{ + .name = "gfx906", + .llvm_name = "gfx906", + .features = featureSet(&[_]Feature{ + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot7_insts, + .ds_src2_insts, + .extended_image_insts, + .fma_mix_insts, + .gfx9, + .half_rate_64_ops, + .image_gather4_d16_bug, + .image_insts, + .ldsbankcount32, + .mad_mac_f32_insts, + .sramecc_support, + }), + }; + pub const gfx908 = CpuModel{ + .name = "gfx908", + .llvm_name = "gfx908", + .features = featureSet(&[_]Feature{ + .atomic_buffer_global_pk_add_f16_no_rtn_insts, + .atomic_fadd_no_rtn_insts, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot3_insts, + .dot4_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .ds_src2_insts, + .extended_image_insts, + .fma_mix_insts, + .gfx9, + .half_rate_64_ops, + .image_gather4_d16_bug, + .image_insts, + .ldsbankcount32, + .mad_mac_f32_insts, + .mai_insts, + .mfma_inline_literal_bug, + .pk_fmac_f16_inst, + .sramecc_support, + }), + }; + pub const gfx909 = CpuModel{ + .name = "gfx909", + .llvm_name = "gfx909", + .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, + .gfx9, + .image_gather4_d16_bug, + .image_insts, + .ldsbankcount32, + .mad_mac_f32_insts, + .mad_mix_insts, + }), + }; + pub const gfx90a = CpuModel{ + .name = "gfx90a", + .llvm_name = "gfx90a", + .features = featureSet(&[_]Feature{ + .atomic_buffer_global_pk_add_f16_insts, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot3_insts, + .dot4_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .dpp_64bit, + .fma_mix_insts, + .fmacf64_inst, + .full_rate_64_ops, + .gfx9, + .gfx90a_insts, + .image_insts, + .ldsbankcount32, + .mad_mac_f32_insts, + .mai_insts, + .packed_fp32_ops, + .packed_tid, + .pk_fmac_f16_inst, + .sramecc_support, + }), + }; + pub const gfx90c = CpuModel{ + .name = "gfx90c", + .llvm_name = "gfx90c", + .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, + .gfx9, + .image_gather4_d16_bug, + .image_insts, + .ldsbankcount32, + .mad_mac_f32_insts, + .mad_mix_insts, + }), + }; + pub const gfx940 = CpuModel{ + .name = "gfx940", + .llvm_name = "gfx940", + .features = featureSet(&[_]Feature{ + .architected_flat_scratch, + .atomic_buffer_global_pk_add_f16_insts, + .atomic_ds_pk_add_16_insts, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .atomic_flat_pk_add_16_insts, + .atomic_global_pk_add_bf16_inst, + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot3_insts, + .dot4_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .dpp_64bit, + .flat_atomic_fadd_f32_inst, + .fma_mix_insts, + .fmacf64_inst, + .force_store_sc0_sc1, + .fp8_insts, + .full_rate_64_ops, + .gfx9, + .gfx90a_insts, + .gfx940_insts, + .ldsbankcount32, + .mai_insts, + .packed_fp32_ops, + .packed_tid, + .pk_fmac_f16_inst, + .sramecc_support, + }), + }; + pub const gfx941 = CpuModel{ + .name = "gfx941", + .llvm_name = "gfx941", + .features = featureSet(&[_]Feature{ + .architected_flat_scratch, + .atomic_buffer_global_pk_add_f16_insts, + .atomic_ds_pk_add_16_insts, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .atomic_flat_pk_add_16_insts, + .atomic_global_pk_add_bf16_inst, + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot3_insts, + .dot4_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .dpp_64bit, + .flat_atomic_fadd_f32_inst, + .fma_mix_insts, + .fmacf64_inst, + .force_store_sc0_sc1, + .fp8_insts, + .full_rate_64_ops, + .gfx9, + .gfx90a_insts, + .gfx940_insts, + .ldsbankcount32, + .mai_insts, + .packed_fp32_ops, + .packed_tid, + .pk_fmac_f16_inst, + .sramecc_support, + }), + }; + pub const gfx942 = CpuModel{ + .name = "gfx942", + .llvm_name = "gfx942", + .features = featureSet(&[_]Feature{ + .architected_flat_scratch, + .atomic_buffer_global_pk_add_f16_insts, + .atomic_ds_pk_add_16_insts, + .atomic_fadd_no_rtn_insts, + .atomic_fadd_rtn_insts, + .atomic_flat_pk_add_16_insts, + .atomic_global_pk_add_bf16_inst, + .back_off_barrier, + .dl_insts, + .dot10_insts, + .dot1_insts, + .dot2_insts, + .dot3_insts, + .dot4_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .dpp_64bit, + .flat_atomic_fadd_f32_inst, + .fma_mix_insts, + .fmacf64_inst, + .fp8_insts, + .full_rate_64_ops, + .gfx9, + .gfx90a_insts, + .gfx940_insts, + .ldsbankcount32, + .mai_insts, + .packed_fp32_ops, + .packed_tid, + .pk_fmac_f16_inst, + .sramecc_support, + }), + }; + pub const hainan = CpuModel{ + .name = "hainan", + .llvm_name = "hainan", + .features = featureSet(&[_]Feature{ + .southern_islands, + }), + }; + pub const hawaii = CpuModel{ + .name = "hawaii", + .llvm_name = "hawaii", + .features = featureSet(&[_]Feature{ + .fast_fmaf, + .half_rate_64_ops, + .ldsbankcount32, + .sea_islands, + }), + }; + pub const iceland = CpuModel{ + .name = "iceland", + .llvm_name = "iceland", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .sgpr_init_bug, + .unpacked_d16_vmem, + .volcanic_islands, + }), + }; + pub const kabini = CpuModel{ + .name = "kabini", + .llvm_name = "kabini", + .features = featureSet(&[_]Feature{ + .ldsbankcount16, + .sea_islands, + }), + }; + pub const kaveri = CpuModel{ + .name = "kaveri", + .llvm_name = "kaveri", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .sea_islands, + }), + }; + pub const mullins = CpuModel{ + .name = "mullins", + .llvm_name = "mullins", + .features = featureSet(&[_]Feature{ + .ldsbankcount16, + .sea_islands, + }), + }; + pub const oland = CpuModel{ + .name = "oland", + .llvm_name = "oland", + .features = featureSet(&[_]Feature{ + .southern_islands, + }), + }; + pub const pitcairn = CpuModel{ + .name = "pitcairn", + .llvm_name = "pitcairn", + .features = featureSet(&[_]Feature{ + .southern_islands, + }), + }; + pub const polaris10 = CpuModel{ + .name = "polaris10", + .llvm_name = "polaris10", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .unpacked_d16_vmem, + .volcanic_islands, + }), + }; + pub const polaris11 = CpuModel{ + .name = "polaris11", + .llvm_name = "polaris11", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .unpacked_d16_vmem, + .volcanic_islands, + }), + }; + pub const stoney = CpuModel{ + .name = "stoney", + .llvm_name = "stoney", + .features = featureSet(&[_]Feature{ + .image_gather4_d16_bug, + .image_store_d16_bug, + .ldsbankcount16, + .volcanic_islands, + .xnack_support, + }), + }; + pub const tahiti = CpuModel{ + .name = "tahiti", + .llvm_name = "tahiti", + .features = featureSet(&[_]Feature{ + .fast_fmaf, + .half_rate_64_ops, + .southern_islands, + }), + }; + pub const tonga = CpuModel{ + .name = "tonga", + .llvm_name = "tonga", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .sgpr_init_bug, + .unpacked_d16_vmem, + .volcanic_islands, + }), + }; + pub const tongapro = CpuModel{ + .name = "tongapro", + .llvm_name = "tongapro", + .features = featureSet(&[_]Feature{ + .ldsbankcount32, + .sgpr_init_bug, + .unpacked_d16_vmem, + .volcanic_islands, + }), + }; + pub const verde = CpuModel{ + .name = "verde", + .llvm_name = "verde", + .features = featureSet(&[_]Feature{ + .southern_islands, + }), + }; +}; |
