aboutsummaryrefslogtreecommitdiff
path: root/lib/std/target/amdgpu.zig
diff options
context:
space:
mode:
Diffstat (limited to 'lib/std/target/amdgpu.zig')
-rw-r--r--lib/std/target/amdgpu.zig263
1 files changed, 255 insertions, 8 deletions
diff --git a/lib/std/target/amdgpu.zig b/lib/std/target/amdgpu.zig
index f8002071be..2fb8a6fa80 100644
--- a/lib/std/target/amdgpu.zig
+++ b/lib/std/target/amdgpu.zig
@@ -9,6 +9,7 @@ pub const Feature = enum {
a16,
add_no_carry_insts,
aperture_regs,
+ architected_flat_scratch,
atomic_fadd_insts,
auto_waitcnt_before_barrier,
ci_insts,
@@ -20,11 +21,14 @@ pub const Feature = enum {
dot4_insts,
dot5_insts,
dot6_insts,
+ dot7_insts,
dpp,
dpp8,
+ dpp_64bit,
ds_src2_insts,
enable_ds128,
enable_prt_strict_null,
+ extended_image_insts,
fast_denormal_f32,
fast_fmaf,
flat_address_space,
@@ -36,16 +40,19 @@ pub const Feature = enum {
fma_mix_insts,
fmaf,
fp64,
+ full_rate_64_ops,
g16,
gcn3_encoding,
get_wave_id_inst,
gfx10,
gfx10_3_insts,
+ gfx10_a_encoding,
gfx10_b_encoding,
gfx10_insts,
gfx7_gfx8_gfx9_insts,
gfx8_insts,
gfx9,
+ gfx90a_insts,
gfx9_insts,
half_rate_64_ops,
image_gather4_d16_bug,
@@ -70,11 +77,18 @@ pub const Feature = enum {
mfma_inline_literal_bug,
mimg_r128,
movrel,
+ negative_scratch_offset_bug,
+ negative_unaligned_scratch_offset_bug,
no_data_dep_hazard,
no_sdst_cmpx,
+ nsa_clause_bug,
nsa_encoding,
+ nsa_max_size_13,
+ nsa_max_size_5,
nsa_to_vmem_bug,
offset_3f_bug,
+ packed_fp32_ops,
+ packed_tid,
pk_fmac_f16_inst,
promote_alloca,
r128_a16,
@@ -92,11 +106,13 @@ pub const Feature = enum {
sdwa_sdst,
sea_islands,
sgpr_init_bug,
+ shader_cycles_register,
si_scheduler,
smem_to_vector_write_hazard,
southern_islands,
sramecc,
sramecc_support,
+ tgsplit,
trap_handler,
trig_reduced_range,
unaligned_access_mode,
@@ -149,6 +165,11 @@ pub const all_features = blk: {
.description = "Has Memory Aperture Base and Size Registers",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.architected_flat_scratch)] = .{
+ .llvm_name = "architected-flat-scratch",
+ .description = "Flat Scratch register is a readonly SPI initialized architected register",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.atomic_fadd_insts)] = .{
.llvm_name = "atomic-fadd-insts",
.description = "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, global_atomic_pk_add_f16 instructions",
@@ -183,7 +204,7 @@ pub const all_features = blk: {
};
result[@enumToInt(Feature.dot2_insts)] = .{
.llvm_name = "dot2-insts",
- .description = "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions",
+ .description = "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.dot3_insts)] = .{
@@ -206,6 +227,11 @@ pub const all_features = blk: {
.description = "Has v_dot4c_i32_i8 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.dot7_insts)] = .{
+ .llvm_name = "dot7-insts",
+ .description = "Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.dpp)] = .{
.llvm_name = "dpp",
.description = "Support DPP (Data Parallel Primitives) extension",
@@ -216,6 +242,11 @@ pub const all_features = blk: {
.description = "Support DPP8 (Data Parallel Primitives) extension",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.dpp_64bit)] = .{
+ .llvm_name = "dpp-64bit",
+ .description = "Support DPP (Data Parallel Primitives) extension",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.ds_src2_insts)] = .{
.llvm_name = "ds-src2-insts",
.description = "Has ds_*_src2 instructions",
@@ -231,6 +262,11 @@ pub const all_features = blk: {
.description = "Enable zeroing of result registers for sparse texture fetches",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.extended_image_insts)] = .{
+ .llvm_name = "extended-image-insts",
+ .description = "Support mips != 0, lod != 0, gather4, and get_lod",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.fast_denormal_f32)] = .{
.llvm_name = "fast-denormal-f32",
.description = "Enabling denormals does not cause f32 instructions to run at f64 rates",
@@ -268,7 +304,7 @@ pub const all_features = blk: {
};
result[@enumToInt(Feature.flat_segment_offset_bug)] = .{
.llvm_name = "flat-segment-offset-bug",
- .description = "GFX10 bug, inst_offset ignored in flat segment",
+ .description = "GFX10 bug where inst_offset is ignored when flat instructions access global memory",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.fma_mix_insts)] = .{
@@ -286,6 +322,11 @@ pub const all_features = blk: {
.description = "Enable double precision operations",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.full_rate_64_ops)] = .{
+ .llvm_name = "full-rate-64-ops",
+ .description = "Most fp64 instructions are full rate",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.g16)] = .{
.llvm_name = "g16",
.description = "Support G16 for 16-bit gradient image operands",
@@ -312,6 +353,7 @@ pub const all_features = blk: {
.ci_insts,
.dpp,
.dpp8,
+ .extended_image_insts,
.fast_denormal_f32,
.fast_fmaf,
.flat_address_space,
@@ -334,6 +376,7 @@ pub const all_features = blk: {
.pk_fmac_f16_inst,
.register_banking,
.s_memrealtime,
+ .s_memtime_inst,
.sdwa,
.sdwa_omod,
.sdwa_scalar,
@@ -350,6 +393,11 @@ pub const all_features = blk: {
.description = "Additional instructions for GFX10.3",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.gfx10_a_encoding)] = .{
+ .llvm_name = "gfx10_a-encoding",
+ .description = "Has BVH ray tracing instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.gfx10_b_encoding)] = .{
.llvm_name = "gfx10_b-encoding",
.description = "Encoding format GFX10_B",
@@ -379,7 +427,6 @@ pub const all_features = blk: {
.aperture_regs,
.ci_insts,
.dpp,
- .ds_src2_insts,
.fast_denormal_f32,
.fast_fmaf,
.flat_address_space,
@@ -394,7 +441,7 @@ pub const all_features = blk: {
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
- .mad_mac_f32_insts,
+ .negative_scratch_offset_bug,
.r128_a16,
.s_memrealtime,
.s_memtime_inst,
@@ -413,6 +460,11 @@ pub const all_features = blk: {
.xnack_support,
}),
};
+ result[@enumToInt(Feature.gfx90a_insts)] = .{
+ .llvm_name = "gfx90a-insts",
+ .description = "Additional instructions for GFX90A+",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.gfx9_insts)] = .{
.llvm_name = "gfx9-insts",
.description = "Additional instructions for GFX9+",
@@ -533,6 +585,16 @@ pub const all_features = blk: {
.description = "Has v_movrel*_b32 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.negative_scratch_offset_bug)] = .{
+ .llvm_name = "negative-scratch-offset-bug",
+ .description = "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@enumToInt(Feature.negative_unaligned_scratch_offset_bug)] = .{
+ .llvm_name = "negative-unaligned-scratch-offset-bug",
+ .description = "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.no_data_dep_hazard)] = .{
.llvm_name = "no-data-dep-hazard",
.description = "Does not need SW waitstates",
@@ -543,11 +605,26 @@ pub const all_features = blk: {
.description = "V_CMPX does not write VCC/SGPR in addition to EXEC",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.nsa_clause_bug)] = .{
+ .llvm_name = "nsa-clause-bug",
+ .description = "MIMG-NSA in a hard clause has unpredictable results on GFX10.1",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.nsa_encoding)] = .{
.llvm_name = "nsa-encoding",
.description = "Support NSA encoding for image instructions",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.nsa_max_size_13)] = .{
+ .llvm_name = "nsa-max-size-13",
+ .description = "The maximum non-sequential address size in VGPRs.",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@enumToInt(Feature.nsa_max_size_5)] = .{
+ .llvm_name = "nsa-max-size-5",
+ .description = "The maximum non-sequential address size in VGPRs.",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.nsa_to_vmem_bug)] = .{
.llvm_name = "nsa-to-vmem-bug",
.description = "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero",
@@ -558,6 +635,16 @@ pub const all_features = blk: {
.description = "Branch offset of 3f hardware bug",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.packed_fp32_ops)] = .{
+ .llvm_name = "packed-fp32-ops",
+ .description = "Support packed fp32 instructions",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
+ result[@enumToInt(Feature.packed_tid)] = .{
+ .llvm_name = "packed-tid",
+ .description = "Workitem IDs are packed into v0 at kernel launch",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.pk_fmac_f16_inst)] = .{
.llvm_name = "pk-fmac-f16-inst",
.description = "Has v_pk_fmac_f16 instruction",
@@ -639,6 +726,7 @@ pub const all_features = blk: {
.dependencies = featureSet(&[_]Feature{
.ci_insts,
.ds_src2_insts,
+ .extended_image_insts,
.flat_address_space,
.fp64,
.gfx7_gfx8_gfx9_insts,
@@ -657,6 +745,11 @@ pub const all_features = blk: {
.description = "VI SGPR initialization bug requiring a fixed SGPR allocation size",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.shader_cycles_register)] = .{
+ .llvm_name = "shader-cycles-register",
+ .description = "Has SHADER_CYCLES hardware register",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.si_scheduler)] = .{
.llvm_name = "si-scheduler",
.description = "Enable SI Machine Scheduler",
@@ -672,6 +765,7 @@ pub const all_features = blk: {
.description = "SOUTHERN_ISLANDS GPU generation",
.dependencies = featureSet(&[_]Feature{
.ds_src2_insts,
+ .extended_image_insts,
.fp64,
.ldsbankcount32,
.localmemorysize32768,
@@ -693,6 +787,11 @@ pub const all_features = blk: {
.description = "Hardware supports SRAMECC",
.dependencies = featureSet(&[_]Feature{}),
};
+ result[@enumToInt(Feature.tgsplit)] = .{
+ .llvm_name = "tgsplit",
+ .description = "Enable threadgroup split execution",
+ .dependencies = featureSet(&[_]Feature{}),
+ };
result[@enumToInt(Feature.trap_handler)] = .{
.llvm_name = "trap-handler",
.description = "Trap handler support",
@@ -761,6 +860,7 @@ pub const all_features = blk: {
.ci_insts,
.dpp,
.ds_src2_insts,
+ .extended_image_insts,
.fast_denormal_f32,
.flat_address_space,
.fp64,
@@ -892,10 +992,12 @@ pub const cpu = struct {
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
+ .negative_unaligned_scratch_offset_bug,
+ .nsa_clause_bug,
.nsa_encoding,
+ .nsa_max_size_5,
.nsa_to_vmem_bug,
.offset_3f_bug,
- .s_memtime_inst,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
@@ -916,6 +1018,7 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
+ .dot7_insts,
.ds_src2_insts,
.flat_segment_offset_bug,
.get_wave_id_inst,
@@ -925,10 +1028,12 @@ pub const cpu = struct {
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
+ .negative_unaligned_scratch_offset_bug,
+ .nsa_clause_bug,
.nsa_encoding,
+ .nsa_max_size_5,
.nsa_to_vmem_bug,
.offset_3f_bug,
- .s_memtime_inst,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
@@ -949,6 +1054,7 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
+ .dot7_insts,
.ds_src2_insts,
.flat_segment_offset_bug,
.get_wave_id_inst,
@@ -958,10 +1064,44 @@ pub const cpu = struct {
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
+ .negative_unaligned_scratch_offset_bug,
+ .nsa_clause_bug,
.nsa_encoding,
+ .nsa_max_size_5,
+ .nsa_to_vmem_bug,
+ .offset_3f_bug,
+ .scalar_atomics,
+ .scalar_flat_scratch_insts,
+ .scalar_stores,
+ .smem_to_vector_write_hazard,
+ .vcmpx_exec_war_hazard,
+ .vcmpx_permlane_hazard,
+ .vmem_to_scalar_write_hazard,
+ .wavefrontsize32,
+ .xnack_support,
+ }),
+ };
+ pub const gfx1013 = CpuModel{
+ .name = "gfx1013",
+ .llvm_name = "gfx1013",
+ .features = featureSet(&[_]Feature{
+ .dl_insts,
+ .ds_src2_insts,
+ .flat_segment_offset_bug,
+ .get_wave_id_inst,
+ .gfx10,
+ .gfx10_a_encoding,
+ .inst_fwd_prefetch_bug,
+ .lds_branch_vmem_war_hazard,
+ .lds_misaligned_bug,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .negative_unaligned_scratch_offset_bug,
+ .nsa_clause_bug,
+ .nsa_encoding,
+ .nsa_max_size_5,
.nsa_to_vmem_bug,
.offset_3f_bug,
- .s_memtime_inst,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
@@ -982,11 +1122,15 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
+ .dot7_insts,
.gfx10,
.gfx10_3_insts,
+ .gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
+ .nsa_max_size_13,
+ .shader_cycles_register,
.wavefrontsize32,
}),
};
@@ -999,11 +1143,15 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
+ .dot7_insts,
.gfx10,
.gfx10_3_insts,
+ .gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
+ .nsa_max_size_13,
+ .shader_cycles_register,
.wavefrontsize32,
}),
};
@@ -1016,11 +1164,15 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
+ .dot7_insts,
.gfx10,
.gfx10_3_insts,
+ .gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
+ .nsa_max_size_13,
+ .shader_cycles_register,
.wavefrontsize32,
}),
};
@@ -1033,11 +1185,57 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
+ .dot7_insts,
+ .gfx10,
+ .gfx10_3_insts,
+ .gfx10_a_encoding,
+ .gfx10_b_encoding,
+ .ldsbankcount32,
+ .nsa_encoding,
+ .nsa_max_size_13,
+ .shader_cycles_register,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1034 = CpuModel{
+ .name = "gfx1034",
+ .llvm_name = "gfx1034",
+ .features = featureSet(&[_]Feature{
+ .dl_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
.gfx10,
.gfx10_3_insts,
+ .gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
+ .nsa_max_size_13,
+ .shader_cycles_register,
+ .wavefrontsize32,
+ }),
+ };
+ pub const gfx1035 = CpuModel{
+ .name = "gfx1035",
+ .llvm_name = "gfx1035",
+ .features = featureSet(&[_]Feature{
+ .dl_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .gfx10,
+ .gfx10_3_insts,
+ .gfx10_a_encoding,
+ .gfx10_b_encoding,
+ .ldsbankcount32,
+ .nsa_encoding,
+ .nsa_max_size_13,
+ .shader_cycles_register,
.wavefrontsize32,
}),
};
@@ -1171,9 +1369,12 @@ pub const cpu = struct {
.name = "gfx900",
.llvm_name = "gfx900",
.features = featureSet(&[_]Feature{
+ .ds_src2_insts,
+ .extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.ldsbankcount32,
+ .mad_mac_f32_insts,
.mad_mix_insts,
}),
};
@@ -1181,9 +1382,12 @@ pub const cpu = struct {
.name = "gfx902",
.llvm_name = "gfx902",
.features = featureSet(&[_]Feature{
+ .ds_src2_insts,
+ .extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.ldsbankcount32,
+ .mad_mac_f32_insts,
.mad_mix_insts,
}),
};
@@ -1191,10 +1395,13 @@ pub const cpu = struct {
.name = "gfx904",
.llvm_name = "gfx904",
.features = featureSet(&[_]Feature{
+ .ds_src2_insts,
+ .extended_image_insts,
.fma_mix_insts,
.gfx9,
.image_gather4_d16_bug,
.ldsbankcount32,
+ .mad_mac_f32_insts,
}),
};
pub const gfx906 = CpuModel{
@@ -1204,11 +1411,15 @@ pub const cpu = struct {
.dl_insts,
.dot1_insts,
.dot2_insts,
+ .dot7_insts,
+ .ds_src2_insts,
+ .extended_image_insts,
.fma_mix_insts,
.gfx9,
.half_rate_64_ops,
.image_gather4_d16_bug,
.ldsbankcount32,
+ .mad_mac_f32_insts,
.sramecc_support,
}),
};
@@ -1224,11 +1435,15 @@ pub const cpu = struct {
.dot4_insts,
.dot5_insts,
.dot6_insts,
+ .dot7_insts,
+ .ds_src2_insts,
+ .extended_image_insts,
.fma_mix_insts,
.gfx9,
.half_rate_64_ops,
.image_gather4_d16_bug,
.ldsbankcount32,
+ .mad_mac_f32_insts,
.mai_insts,
.mfma_inline_literal_bug,
.pk_fmac_f16_inst,
@@ -1239,21 +1454,53 @@ pub const cpu = struct {
.name = "gfx909",
.llvm_name = "gfx909",
.features = featureSet(&[_]Feature{
+ .ds_src2_insts,
+ .extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.ldsbankcount32,
+ .mad_mac_f32_insts,
.mad_mix_insts,
}),
};
+ pub const gfx90a = CpuModel{
+ .name = "gfx90a",
+ .llvm_name = "gfx90a",
+ .features = featureSet(&[_]Feature{
+ .atomic_fadd_insts,
+ .dl_insts,
+ .dot1_insts,
+ .dot2_insts,
+ .dot3_insts,
+ .dot4_insts,
+ .dot5_insts,
+ .dot6_insts,
+ .dot7_insts,
+ .dpp_64bit,
+ .fma_mix_insts,
+ .full_rate_64_ops,
+ .gfx9,
+ .gfx90a_insts,
+ .ldsbankcount32,
+ .mad_mac_f32_insts,
+ .mai_insts,
+ .packed_fp32_ops,
+ .packed_tid,
+ .pk_fmac_f16_inst,
+ .sramecc_support,
+ }),
+ };
pub const gfx90c = CpuModel{
.name = "gfx90c",
.llvm_name = "gfx90c",
.features = featureSet(&[_]Feature{
+ .ds_src2_insts,
+ .extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.ldsbankcount32,
+ .mad_mac_f32_insts,
.mad_mix_insts,
- .xnack,
}),
};
pub const hainan = CpuModel{