diff options
Diffstat (limited to 'lib/std/target/amdgpu.zig')
| -rw-r--r-- | lib/std/target/amdgpu.zig | 263 |
1 files changed, 255 insertions, 8 deletions
diff --git a/lib/std/target/amdgpu.zig b/lib/std/target/amdgpu.zig index f8002071be..2fb8a6fa80 100644 --- a/lib/std/target/amdgpu.zig +++ b/lib/std/target/amdgpu.zig @@ -9,6 +9,7 @@ pub const Feature = enum { a16, add_no_carry_insts, aperture_regs, + architected_flat_scratch, atomic_fadd_insts, auto_waitcnt_before_barrier, ci_insts, @@ -20,11 +21,14 @@ pub const Feature = enum { dot4_insts, dot5_insts, dot6_insts, + dot7_insts, dpp, dpp8, + dpp_64bit, ds_src2_insts, enable_ds128, enable_prt_strict_null, + extended_image_insts, fast_denormal_f32, fast_fmaf, flat_address_space, @@ -36,16 +40,19 @@ pub const Feature = enum { fma_mix_insts, fmaf, fp64, + full_rate_64_ops, g16, gcn3_encoding, get_wave_id_inst, gfx10, gfx10_3_insts, + gfx10_a_encoding, gfx10_b_encoding, gfx10_insts, gfx7_gfx8_gfx9_insts, gfx8_insts, gfx9, + gfx90a_insts, gfx9_insts, half_rate_64_ops, image_gather4_d16_bug, @@ -70,11 +77,18 @@ pub const Feature = enum { mfma_inline_literal_bug, mimg_r128, movrel, + negative_scratch_offset_bug, + negative_unaligned_scratch_offset_bug, no_data_dep_hazard, no_sdst_cmpx, + nsa_clause_bug, nsa_encoding, + nsa_max_size_13, + nsa_max_size_5, nsa_to_vmem_bug, offset_3f_bug, + packed_fp32_ops, + packed_tid, pk_fmac_f16_inst, promote_alloca, r128_a16, @@ -92,11 +106,13 @@ pub const Feature = enum { sdwa_sdst, sea_islands, sgpr_init_bug, + shader_cycles_register, si_scheduler, smem_to_vector_write_hazard, southern_islands, sramecc, sramecc_support, + tgsplit, trap_handler, trig_reduced_range, unaligned_access_mode, @@ -149,6 +165,11 @@ pub const all_features = blk: { .description = "Has Memory Aperture Base and Size Registers", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.architected_flat_scratch)] = .{ + .llvm_name = "architected-flat-scratch", + .description = "Flat Scratch register is a readonly SPI initialized architected register", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.atomic_fadd_insts)] = .{ .llvm_name = "atomic-fadd-insts", .description = "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, global_atomic_pk_add_f16 instructions", @@ -183,7 +204,7 @@ pub const all_features = blk: { }; result[@enumToInt(Feature.dot2_insts)] = .{ .llvm_name = "dot2-insts", - .description = "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions", + .description = "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions", .dependencies = featureSet(&[_]Feature{}), }; result[@enumToInt(Feature.dot3_insts)] = .{ @@ -206,6 +227,11 @@ pub const all_features = blk: { .description = "Has v_dot4c_i32_i8 instruction", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.dot7_insts)] = .{ + .llvm_name = "dot7-insts", + .description = "Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.dpp)] = .{ .llvm_name = "dpp", .description = "Support DPP (Data Parallel Primitives) extension", @@ -216,6 +242,11 @@ pub const all_features = blk: { .description = "Support DPP8 (Data Parallel Primitives) extension", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.dpp_64bit)] = .{ + .llvm_name = "dpp-64bit", + .description = "Support DPP (Data Parallel Primitives) extension", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.ds_src2_insts)] = .{ .llvm_name = "ds-src2-insts", .description = "Has ds_*_src2 instructions", @@ -231,6 +262,11 @@ pub const all_features = blk: { .description = "Enable zeroing of result registers for sparse texture fetches", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.extended_image_insts)] = .{ + .llvm_name = "extended-image-insts", + .description = "Support mips != 0, lod != 0, gather4, and get_lod", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.fast_denormal_f32)] = .{ .llvm_name = "fast-denormal-f32", .description = "Enabling denormals does not cause f32 instructions to run at f64 rates", @@ -268,7 +304,7 @@ pub const all_features = blk: { }; result[@enumToInt(Feature.flat_segment_offset_bug)] = .{ .llvm_name = "flat-segment-offset-bug", - .description = "GFX10 bug, inst_offset ignored in flat segment", + .description = "GFX10 bug where inst_offset is ignored when flat instructions access global memory", .dependencies = featureSet(&[_]Feature{}), }; result[@enumToInt(Feature.fma_mix_insts)] = .{ @@ -286,6 +322,11 @@ pub const all_features = blk: { .description = "Enable double precision operations", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.full_rate_64_ops)] = .{ + .llvm_name = "full-rate-64-ops", + .description = "Most fp64 instructions are full rate", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.g16)] = .{ .llvm_name = "g16", .description = "Support G16 for 16-bit gradient image operands", @@ -312,6 +353,7 @@ pub const all_features = blk: { .ci_insts, .dpp, .dpp8, + .extended_image_insts, .fast_denormal_f32, .fast_fmaf, .flat_address_space, @@ -334,6 +376,7 @@ pub const all_features = blk: { .pk_fmac_f16_inst, .register_banking, .s_memrealtime, + .s_memtime_inst, .sdwa, .sdwa_omod, .sdwa_scalar, @@ -350,6 +393,11 @@ pub const all_features = blk: { .description = "Additional instructions for GFX10.3", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.gfx10_a_encoding)] = .{ + .llvm_name = "gfx10_a-encoding", + .description = "Has BVH ray tracing instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.gfx10_b_encoding)] = .{ .llvm_name = "gfx10_b-encoding", .description = "Encoding format GFX10_B", @@ -379,7 +427,6 @@ pub const all_features = blk: { .aperture_regs, .ci_insts, .dpp, - .ds_src2_insts, .fast_denormal_f32, .fast_fmaf, .flat_address_space, @@ -394,7 +441,7 @@ pub const all_features = blk: { .int_clamp_insts, .inv_2pi_inline_imm, .localmemorysize65536, - .mad_mac_f32_insts, + .negative_scratch_offset_bug, .r128_a16, .s_memrealtime, .s_memtime_inst, @@ -413,6 +460,11 @@ pub const all_features = blk: { .xnack_support, }), }; + result[@enumToInt(Feature.gfx90a_insts)] = .{ + .llvm_name = "gfx90a-insts", + .description = "Additional instructions for GFX90A+", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.gfx9_insts)] = .{ .llvm_name = "gfx9-insts", .description = "Additional instructions for GFX9+", @@ -533,6 +585,16 @@ pub const all_features = blk: { .description = "Has v_movrel*_b32 instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.negative_scratch_offset_bug)] = .{ + .llvm_name = "negative-scratch-offset-bug", + .description = "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.negative_unaligned_scratch_offset_bug)] = .{ + .llvm_name = "negative-unaligned-scratch-offset-bug", + .description = "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.no_data_dep_hazard)] = .{ .llvm_name = "no-data-dep-hazard", .description = "Does not need SW waitstates", @@ -543,11 +605,26 @@ pub const all_features = blk: { .description = "V_CMPX does not write VCC/SGPR in addition to EXEC", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.nsa_clause_bug)] = .{ + .llvm_name = "nsa-clause-bug", + .description = "MIMG-NSA in a hard clause has unpredictable results on GFX10.1", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.nsa_encoding)] = .{ .llvm_name = "nsa-encoding", .description = "Support NSA encoding for image instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.nsa_max_size_13)] = .{ + .llvm_name = "nsa-max-size-13", + .description = "The maximum non-sequential address size in VGPRs.", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.nsa_max_size_5)] = .{ + .llvm_name = "nsa-max-size-5", + .description = "The maximum non-sequential address size in VGPRs.", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.nsa_to_vmem_bug)] = .{ .llvm_name = "nsa-to-vmem-bug", .description = "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero", @@ -558,6 +635,16 @@ pub const all_features = blk: { .description = "Branch offset of 3f hardware bug", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.packed_fp32_ops)] = .{ + .llvm_name = "packed-fp32-ops", + .description = "Support packed fp32 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.packed_tid)] = .{ + .llvm_name = "packed-tid", + .description = "Workitem IDs are packed into v0 at kernel launch", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.pk_fmac_f16_inst)] = .{ .llvm_name = "pk-fmac-f16-inst", .description = "Has v_pk_fmac_f16 instruction", @@ -639,6 +726,7 @@ pub const all_features = blk: { .dependencies = featureSet(&[_]Feature{ .ci_insts, .ds_src2_insts, + .extended_image_insts, .flat_address_space, .fp64, .gfx7_gfx8_gfx9_insts, @@ -657,6 +745,11 @@ pub const all_features = blk: { .description = "VI SGPR initialization bug requiring a fixed SGPR allocation size", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.shader_cycles_register)] = .{ + .llvm_name = "shader-cycles-register", + .description = "Has SHADER_CYCLES hardware register", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.si_scheduler)] = .{ .llvm_name = "si-scheduler", .description = "Enable SI Machine Scheduler", @@ -672,6 +765,7 @@ pub const all_features = blk: { .description = "SOUTHERN_ISLANDS GPU generation", .dependencies = featureSet(&[_]Feature{ .ds_src2_insts, + .extended_image_insts, .fp64, .ldsbankcount32, .localmemorysize32768, @@ -693,6 +787,11 @@ pub const all_features = blk: { .description = "Hardware supports SRAMECC", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.tgsplit)] = .{ + .llvm_name = "tgsplit", + .description = "Enable threadgroup split execution", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.trap_handler)] = .{ .llvm_name = "trap-handler", .description = "Trap handler support", @@ -761,6 +860,7 @@ pub const all_features = blk: { .ci_insts, .dpp, .ds_src2_insts, + .extended_image_insts, .fast_denormal_f32, .flat_address_space, .fp64, @@ -892,10 +992,12 @@ pub const cpu = struct { .lds_misaligned_bug, .ldsbankcount32, .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, .nsa_encoding, + .nsa_max_size_5, .nsa_to_vmem_bug, .offset_3f_bug, - .s_memtime_inst, .scalar_atomics, .scalar_flat_scratch_insts, .scalar_stores, @@ -916,6 +1018,7 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, .ds_src2_insts, .flat_segment_offset_bug, .get_wave_id_inst, @@ -925,10 +1028,12 @@ pub const cpu = struct { .lds_misaligned_bug, .ldsbankcount32, .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, .nsa_encoding, + .nsa_max_size_5, .nsa_to_vmem_bug, .offset_3f_bug, - .s_memtime_inst, .scalar_atomics, .scalar_flat_scratch_insts, .scalar_stores, @@ -949,6 +1054,7 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, .ds_src2_insts, .flat_segment_offset_bug, .get_wave_id_inst, @@ -958,10 +1064,44 @@ pub const cpu = struct { .lds_misaligned_bug, .ldsbankcount32, .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, .nsa_encoding, + .nsa_max_size_5, + .nsa_to_vmem_bug, + .offset_3f_bug, + .scalar_atomics, + .scalar_flat_scratch_insts, + .scalar_stores, + .smem_to_vector_write_hazard, + .vcmpx_exec_war_hazard, + .vcmpx_permlane_hazard, + .vmem_to_scalar_write_hazard, + .wavefrontsize32, + .xnack_support, + }), + }; + pub const gfx1013 = CpuModel{ + .name = "gfx1013", + .llvm_name = "gfx1013", + .features = featureSet(&[_]Feature{ + .dl_insts, + .ds_src2_insts, + .flat_segment_offset_bug, + .get_wave_id_inst, + .gfx10, + .gfx10_a_encoding, + .inst_fwd_prefetch_bug, + .lds_branch_vmem_war_hazard, + .lds_misaligned_bug, + .ldsbankcount32, + .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, + .nsa_encoding, + .nsa_max_size_5, .nsa_to_vmem_bug, .offset_3f_bug, - .s_memtime_inst, .scalar_atomics, .scalar_flat_scratch_insts, .scalar_stores, @@ -982,11 +1122,15 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, .gfx10, .gfx10_3_insts, + .gfx10_a_encoding, .gfx10_b_encoding, .ldsbankcount32, .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, .wavefrontsize32, }), }; @@ -999,11 +1143,15 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, .gfx10, .gfx10_3_insts, + .gfx10_a_encoding, .gfx10_b_encoding, .ldsbankcount32, .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, .wavefrontsize32, }), }; @@ -1016,11 +1164,15 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, .gfx10, .gfx10_3_insts, + .gfx10_a_encoding, .gfx10_b_encoding, .ldsbankcount32, .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, .wavefrontsize32, }), }; @@ -1033,11 +1185,57 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, + .gfx10, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .ldsbankcount32, + .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, + .wavefrontsize32, + }), + }; + pub const gfx1034 = CpuModel{ + .name = "gfx1034", + .llvm_name = "gfx1034", + .features = featureSet(&[_]Feature{ + .dl_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, .gfx10, .gfx10_3_insts, + .gfx10_a_encoding, .gfx10_b_encoding, .ldsbankcount32, .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, + .wavefrontsize32, + }), + }; + pub const gfx1035 = CpuModel{ + .name = "gfx1035", + .llvm_name = "gfx1035", + .features = featureSet(&[_]Feature{ + .dl_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .gfx10, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .ldsbankcount32, + .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, .wavefrontsize32, }), }; @@ -1171,9 +1369,12 @@ pub const cpu = struct { .name = "gfx900", .llvm_name = "gfx900", .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, .gfx9, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .mad_mix_insts, }), }; @@ -1181,9 +1382,12 @@ pub const cpu = struct { .name = "gfx902", .llvm_name = "gfx902", .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, .gfx9, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .mad_mix_insts, }), }; @@ -1191,10 +1395,13 @@ pub const cpu = struct { .name = "gfx904", .llvm_name = "gfx904", .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, .fma_mix_insts, .gfx9, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, }), }; pub const gfx906 = CpuModel{ @@ -1204,11 +1411,15 @@ pub const cpu = struct { .dl_insts, .dot1_insts, .dot2_insts, + .dot7_insts, + .ds_src2_insts, + .extended_image_insts, .fma_mix_insts, .gfx9, .half_rate_64_ops, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .sramecc_support, }), }; @@ -1224,11 +1435,15 @@ pub const cpu = struct { .dot4_insts, .dot5_insts, .dot6_insts, + .dot7_insts, + .ds_src2_insts, + .extended_image_insts, .fma_mix_insts, .gfx9, .half_rate_64_ops, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .mai_insts, .mfma_inline_literal_bug, .pk_fmac_f16_inst, @@ -1239,21 +1454,53 @@ pub const cpu = struct { .name = "gfx909", .llvm_name = "gfx909", .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, .gfx9, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .mad_mix_insts, }), }; + pub const gfx90a = CpuModel{ + .name = "gfx90a", + .llvm_name = "gfx90a", + .features = featureSet(&[_]Feature{ + .atomic_fadd_insts, + .dl_insts, + .dot1_insts, + .dot2_insts, + .dot3_insts, + .dot4_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .dpp_64bit, + .fma_mix_insts, + .full_rate_64_ops, + .gfx9, + .gfx90a_insts, + .ldsbankcount32, + .mad_mac_f32_insts, + .mai_insts, + .packed_fp32_ops, + .packed_tid, + .pk_fmac_f16_inst, + .sramecc_support, + }), + }; pub const gfx90c = CpuModel{ .name = "gfx90c", .llvm_name = "gfx90c", .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, .gfx9, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .mad_mix_insts, - .xnack, }), }; pub const hainan = CpuModel{ |
