zig/lib/std/Target/amdgpu.zig
2024-01-01 17:51:18 -07:00

2154 lines
71 KiB
Zig

//! This file is auto-generated by tools/update_cpu_features.zig.
const std = @import("../std.zig");
const CpuFeature = std.Target.Cpu.Feature;
const CpuModel = std.Target.Cpu.Model;
pub const Feature = enum {
@"16_bit_insts",
a16,
add_no_carry_insts,
aperture_regs,
architected_flat_scratch,
architected_sgprs,
atomic_buffer_global_pk_add_f16_insts,
atomic_buffer_global_pk_add_f16_no_rtn_insts,
atomic_ds_pk_add_16_insts,
atomic_fadd_no_rtn_insts,
atomic_fadd_rtn_insts,
atomic_flat_pk_add_16_insts,
atomic_global_pk_add_bf16_inst,
auto_waitcnt_before_barrier,
back_off_barrier,
ci_insts,
cumode,
dl_insts,
dot10_insts,
dot1_insts,
dot2_insts,
dot3_insts,
dot4_insts,
dot5_insts,
dot6_insts,
dot7_insts,
dot8_insts,
dot9_insts,
dpp,
dpp8,
dpp_64bit,
ds128,
ds_src2_insts,
extended_image_insts,
fast_denormal_f32,
fast_fmaf,
flat_address_space,
flat_atomic_fadd_f32_inst,
flat_for_global,
flat_global_insts,
flat_inst_offsets,
flat_scratch,
flat_scratch_insts,
flat_segment_offset_bug,
fma_mix_insts,
fmacf64_inst,
fmaf,
force_store_sc0_sc1,
fp64,
fp8_insts,
full_rate_64_ops,
g16,
gcn3_encoding,
get_wave_id_inst,
gfx10,
gfx10_3_insts,
gfx10_a_encoding,
gfx10_b_encoding,
gfx10_insts,
gfx11,
gfx11_full_vgprs,
gfx11_insts,
gfx7_gfx8_gfx9_insts,
gfx8_insts,
gfx9,
gfx90a_insts,
gfx940_insts,
gfx9_insts,
half_rate_64_ops,
image_gather4_d16_bug,
image_insts,
image_store_d16_bug,
inst_fwd_prefetch_bug,
int_clamp_insts,
inv_2pi_inline_imm,
lds_branch_vmem_war_hazard,
lds_misaligned_bug,
ldsbankcount16,
ldsbankcount32,
load_store_opt,
localmemorysize32768,
localmemorysize65536,
mad_intra_fwd_bug,
mad_mac_f32_insts,
mad_mix_insts,
mai_insts,
max_private_element_size_16,
max_private_element_size_4,
max_private_element_size_8,
mfma_inline_literal_bug,
mimg_r128,
movrel,
negative_scratch_offset_bug,
negative_unaligned_scratch_offset_bug,
no_data_dep_hazard,
no_sdst_cmpx,
nsa_clause_bug,
nsa_encoding,
nsa_to_vmem_bug,
offset_3f_bug,
packed_fp32_ops,
packed_tid,
partial_nsa_encoding,
pk_fmac_f16_inst,
promote_alloca,
prt_strict_null,
r128_a16,
s_memrealtime,
s_memtime_inst,
scalar_atomics,
scalar_flat_scratch_insts,
scalar_stores,
sdwa,
sdwa_mav,
sdwa_omod,
sdwa_out_mods_vopc,
sdwa_scalar,
sdwa_sdst,
sea_islands,
sgpr_init_bug,
shader_cycles_register,
si_scheduler,
smem_to_vector_write_hazard,
southern_islands,
sramecc,
sramecc_support,
tgsplit,
trap_handler,
trig_reduced_range,
true16,
unaligned_access_mode,
unaligned_buffer_access,
unaligned_ds_access,
unaligned_scratch_access,
unpacked_d16_vmem,
unsafe_ds_offset_folding,
user_sgpr_init16_bug,
valu_trans_use_hazard,
vcmpx_exec_war_hazard,
vcmpx_permlane_hazard,
vgpr_index_mode,
vmem_to_scalar_write_hazard,
volcanic_islands,
vop3_literal,
vop3p,
vopd,
vscnt,
wavefrontsize16,
wavefrontsize32,
wavefrontsize64,
xnack,
xnack_support,
};
pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet;
pub const featureSetHas = CpuFeature.feature_set_fns(Feature).featureSetHas;
pub const featureSetHasAny = CpuFeature.feature_set_fns(Feature).featureSetHasAny;
pub const featureSetHasAll = CpuFeature.feature_set_fns(Feature).featureSetHasAll;
pub const all_features = blk: {
const len = @typeInfo(Feature).Enum.fields.len;
std.debug.assert(len <= CpuFeature.Set.needed_bit_count);
var result: [len]CpuFeature = undefined;
result[@intFromEnum(Feature.@"16_bit_insts")] = .{
.llvm_name = "16-bit-insts",
.description = "Has i16/f16 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.a16)] = .{
.llvm_name = "a16",
.description = "Support A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.add_no_carry_insts)] = .{
.llvm_name = "add-no-carry-insts",
.description = "Have VALU add/sub instructions without carry out",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.aperture_regs)] = .{
.llvm_name = "aperture-regs",
.description = "Has Memory Aperture Base and Size Registers",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.architected_flat_scratch)] = .{
.llvm_name = "architected-flat-scratch",
.description = "Flat Scratch register is a readonly SPI initialized architected register",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.architected_sgprs)] = .{
.llvm_name = "architected-sgprs",
.description = "Enable the architected SGPRs",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.atomic_buffer_global_pk_add_f16_insts)] = .{
.llvm_name = "atomic-buffer-global-pk-add-f16-insts",
.description = "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that can return original value",
.dependencies = featureSet(&[_]Feature{
.flat_global_insts,
}),
};
result[@intFromEnum(Feature.atomic_buffer_global_pk_add_f16_no_rtn_insts)] = .{
.llvm_name = "atomic-buffer-global-pk-add-f16-no-rtn-insts",
.description = "Has buffer_atomic_pk_add_f16 and global_atomic_pk_add_f16 instructions that don't return original value",
.dependencies = featureSet(&[_]Feature{
.flat_global_insts,
}),
};
result[@intFromEnum(Feature.atomic_ds_pk_add_16_insts)] = .{
.llvm_name = "atomic-ds-pk-add-16-insts",
.description = "Has ds_pk_add_bf16, ds_pk_add_f16, ds_pk_add_rtn_bf16, ds_pk_add_rtn_f16 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.atomic_fadd_no_rtn_insts)] = .{
.llvm_name = "atomic-fadd-no-rtn-insts",
.description = "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that don't return original value",
.dependencies = featureSet(&[_]Feature{
.flat_global_insts,
}),
};
result[@intFromEnum(Feature.atomic_fadd_rtn_insts)] = .{
.llvm_name = "atomic-fadd-rtn-insts",
.description = "Has buffer_atomic_add_f32 and global_atomic_add_f32 instructions that return original value",
.dependencies = featureSet(&[_]Feature{
.flat_global_insts,
}),
};
result[@intFromEnum(Feature.atomic_flat_pk_add_16_insts)] = .{
.llvm_name = "atomic-flat-pk-add-16-insts",
.description = "Has flat_atomic_pk_add_f16 and flat_atomic_pk_add_bf16 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.atomic_global_pk_add_bf16_inst)] = .{
.llvm_name = "atomic-global-pk-add-bf16-inst",
.description = "Has global_atomic_pk_add_bf16 instruction",
.dependencies = featureSet(&[_]Feature{
.flat_global_insts,
}),
};
result[@intFromEnum(Feature.auto_waitcnt_before_barrier)] = .{
.llvm_name = "auto-waitcnt-before-barrier",
.description = "Hardware automatically inserts waitcnt before barrier",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.back_off_barrier)] = .{
.llvm_name = "back-off-barrier",
.description = "Hardware supports backing off s_barrier if an exception occurs",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ci_insts)] = .{
.llvm_name = "ci-insts",
.description = "Additional instructions for CI+",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.cumode)] = .{
.llvm_name = "cumode",
.description = "Enable CU wavefront execution mode",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dl_insts)] = .{
.llvm_name = "dl-insts",
.description = "Has v_fmac_f32 and v_xnor_b32 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot10_insts)] = .{
.llvm_name = "dot10-insts",
.description = "Has v_dot2_f32_f16 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot1_insts)] = .{
.llvm_name = "dot1-insts",
.description = "Has v_dot4_i32_i8 and v_dot8_i32_i4 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot2_insts)] = .{
.llvm_name = "dot2-insts",
.description = "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot3_insts)] = .{
.llvm_name = "dot3-insts",
.description = "Has v_dot8c_i32_i4 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot4_insts)] = .{
.llvm_name = "dot4-insts",
.description = "Has v_dot2c_i32_i16 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot5_insts)] = .{
.llvm_name = "dot5-insts",
.description = "Has v_dot2c_f32_f16 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot6_insts)] = .{
.llvm_name = "dot6-insts",
.description = "Has v_dot4c_i32_i8 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot7_insts)] = .{
.llvm_name = "dot7-insts",
.description = "Has v_dot4_u32_u8, v_dot8_u32_u4 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot8_insts)] = .{
.llvm_name = "dot8-insts",
.description = "Has v_dot4_i32_iu8, v_dot8_i32_iu4 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dot9_insts)] = .{
.llvm_name = "dot9-insts",
.description = "Has v_dot2_f16_f16, v_dot2_bf16_bf16, v_dot2_f32_bf16 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dpp)] = .{
.llvm_name = "dpp",
.description = "Support DPP (Data Parallel Primitives) extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dpp8)] = .{
.llvm_name = "dpp8",
.description = "Support DPP8 (Data Parallel Primitives) extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.dpp_64bit)] = .{
.llvm_name = "dpp-64bit",
.description = "Support DPP (Data Parallel Primitives) extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ds128)] = .{
.llvm_name = "enable-ds128",
.description = "Use ds_{read|write}_b128",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ds_src2_insts)] = .{
.llvm_name = "ds-src2-insts",
.description = "Has ds_*_src2 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.extended_image_insts)] = .{
.llvm_name = "extended-image-insts",
.description = "Support mips != 0, lod != 0, gather4, and get_lod",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fast_denormal_f32)] = .{
.llvm_name = "fast-denormal-f32",
.description = "Enabling denormals does not cause f32 instructions to run at f64 rates",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fast_fmaf)] = .{
.llvm_name = "fast-fmaf",
.description = "Assuming f32 fma is at least as fast as mul + add",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.flat_address_space)] = .{
.llvm_name = "flat-address-space",
.description = "Support flat address space",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.flat_atomic_fadd_f32_inst)] = .{
.llvm_name = "flat-atomic-fadd-f32-inst",
.description = "Has flat_atomic_add_f32 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.flat_for_global)] = .{
.llvm_name = "flat-for-global",
.description = "Force to generate flat instruction for global",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.flat_global_insts)] = .{
.llvm_name = "flat-global-insts",
.description = "Have global_* flat memory instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.flat_inst_offsets)] = .{
.llvm_name = "flat-inst-offsets",
.description = "Flat instructions have immediate offset addressing mode",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.flat_scratch)] = .{
.llvm_name = "enable-flat-scratch",
.description = "Use scratch_* flat memory instructions to access scratch",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.flat_scratch_insts)] = .{
.llvm_name = "flat-scratch-insts",
.description = "Have scratch_* flat memory instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.flat_segment_offset_bug)] = .{
.llvm_name = "flat-segment-offset-bug",
.description = "GFX10 bug where inst_offset is ignored when flat instructions access global memory",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fma_mix_insts)] = .{
.llvm_name = "fma-mix-insts",
.description = "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fmacf64_inst)] = .{
.llvm_name = "fmacf64-inst",
.description = "Has v_fmac_f64 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fmaf)] = .{
.llvm_name = "fmaf",
.description = "Enable single precision FMA (not as fast as mul+add, but fused)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.force_store_sc0_sc1)] = .{
.llvm_name = "force-store-sc0-sc1",
.description = "Has SC0 and SC1 on stores",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fp64)] = .{
.llvm_name = "fp64",
.description = "Enable double precision operations",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.fp8_insts)] = .{
.llvm_name = "fp8-insts",
.description = "Has fp8 and bf8 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.full_rate_64_ops)] = .{
.llvm_name = "full-rate-64-ops",
.description = "Most fp64 instructions are full rate",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.g16)] = .{
.llvm_name = "g16",
.description = "Support G16 for 16-bit gradient image operands",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gcn3_encoding)] = .{
.llvm_name = "gcn3-encoding",
.description = "Encoding format for VI",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.get_wave_id_inst)] = .{
.llvm_name = "get-wave-id-inst",
.description = "Has s_get_waveid_in_workgroup instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx10)] = .{
.llvm_name = "gfx10",
.description = "GFX10 GPU generation",
.dependencies = featureSet(&[_]Feature{
.@"16_bit_insts",
.a16,
.add_no_carry_insts,
.aperture_regs,
.ci_insts,
.dpp,
.dpp8,
.extended_image_insts,
.fast_denormal_f32,
.fast_fmaf,
.flat_address_space,
.flat_global_insts,
.flat_inst_offsets,
.flat_scratch_insts,
.fma_mix_insts,
.fp64,
.g16,
.gfx10_insts,
.gfx8_insts,
.gfx9_insts,
.image_insts,
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
.mimg_r128,
.movrel,
.no_data_dep_hazard,
.no_sdst_cmpx,
.pk_fmac_f16_inst,
.s_memrealtime,
.s_memtime_inst,
.sdwa,
.sdwa_omod,
.sdwa_scalar,
.sdwa_sdst,
.unaligned_buffer_access,
.unaligned_ds_access,
.vop3_literal,
.vop3p,
.vscnt,
}),
};
result[@intFromEnum(Feature.gfx10_3_insts)] = .{
.llvm_name = "gfx10-3-insts",
.description = "Additional instructions for GFX10.3",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx10_a_encoding)] = .{
.llvm_name = "gfx10_a-encoding",
.description = "Has BVH ray tracing instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx10_b_encoding)] = .{
.llvm_name = "gfx10_b-encoding",
.description = "Encoding format GFX10_B",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx10_insts)] = .{
.llvm_name = "gfx10-insts",
.description = "Additional instructions for GFX10+",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx11)] = .{
.llvm_name = "gfx11",
.description = "GFX11 GPU generation",
.dependencies = featureSet(&[_]Feature{
.@"16_bit_insts",
.a16,
.add_no_carry_insts,
.aperture_regs,
.ci_insts,
.dpp,
.dpp8,
.extended_image_insts,
.fast_denormal_f32,
.fast_fmaf,
.flat_address_space,
.flat_global_insts,
.flat_inst_offsets,
.flat_scratch_insts,
.fma_mix_insts,
.fp64,
.g16,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.gfx10_insts,
.gfx11_insts,
.gfx8_insts,
.gfx9_insts,
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
.mimg_r128,
.movrel,
.no_data_dep_hazard,
.no_sdst_cmpx,
.pk_fmac_f16_inst,
.true16,
.unaligned_buffer_access,
.unaligned_ds_access,
.vop3_literal,
.vop3p,
.vopd,
.vscnt,
}),
};
result[@intFromEnum(Feature.gfx11_full_vgprs)] = .{
.llvm_name = "gfx11-full-vgprs",
.description = "GFX11 with 50% more physical VGPRs and 50% larger allocation granule than GFX10",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx11_insts)] = .{
.llvm_name = "gfx11-insts",
.description = "Additional instructions for GFX11+",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx7_gfx8_gfx9_insts)] = .{
.llvm_name = "gfx7-gfx8-gfx9-insts",
.description = "Instructions shared in GFX7, GFX8, GFX9",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx8_insts)] = .{
.llvm_name = "gfx8-insts",
.description = "Additional instructions for GFX8+",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx9)] = .{
.llvm_name = "gfx9",
.description = "GFX9 GPU generation",
.dependencies = featureSet(&[_]Feature{
.@"16_bit_insts",
.a16,
.add_no_carry_insts,
.aperture_regs,
.ci_insts,
.dpp,
.fast_denormal_f32,
.fast_fmaf,
.flat_address_space,
.flat_global_insts,
.flat_inst_offsets,
.flat_scratch_insts,
.fp64,
.gcn3_encoding,
.gfx7_gfx8_gfx9_insts,
.gfx8_insts,
.gfx9_insts,
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
.negative_scratch_offset_bug,
.r128_a16,
.s_memrealtime,
.s_memtime_inst,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
.sdwa,
.sdwa_omod,
.sdwa_scalar,
.sdwa_sdst,
.unaligned_buffer_access,
.unaligned_ds_access,
.vgpr_index_mode,
.vop3p,
.wavefrontsize64,
.xnack_support,
}),
};
result[@intFromEnum(Feature.gfx90a_insts)] = .{
.llvm_name = "gfx90a-insts",
.description = "Additional instructions for GFX90A+",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx940_insts)] = .{
.llvm_name = "gfx940-insts",
.description = "Additional instructions for GFX940+",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.gfx9_insts)] = .{
.llvm_name = "gfx9-insts",
.description = "Additional instructions for GFX9+",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.half_rate_64_ops)] = .{
.llvm_name = "half-rate-64-ops",
.description = "Most fp64 instructions are half rate instead of quarter",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.image_gather4_d16_bug)] = .{
.llvm_name = "image-gather4-d16-bug",
.description = "Image Gather4 D16 hardware bug",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.image_insts)] = .{
.llvm_name = "image-insts",
.description = "Support image instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.image_store_d16_bug)] = .{
.llvm_name = "image-store-d16-bug",
.description = "Image Store D16 hardware bug",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.inst_fwd_prefetch_bug)] = .{
.llvm_name = "inst-fwd-prefetch-bug",
.description = "S_INST_PREFETCH instruction causes shader to hang",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.int_clamp_insts)] = .{
.llvm_name = "int-clamp-insts",
.description = "Support clamp for integer destination",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.inv_2pi_inline_imm)] = .{
.llvm_name = "inv-2pi-inline-imm",
.description = "Has 1 / (2 * pi) as inline immediate",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.lds_branch_vmem_war_hazard)] = .{
.llvm_name = "lds-branch-vmem-war-hazard",
.description = "Switching between LDS and VMEM-tex not waiting VM_VSRC=0",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.lds_misaligned_bug)] = .{
.llvm_name = "lds-misaligned-bug",
.description = "Some GFX10 bug with multi-dword LDS and flat access that is not naturally aligned in WGP mode",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ldsbankcount16)] = .{
.llvm_name = "ldsbankcount16",
.description = "The number of LDS banks per compute unit.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.ldsbankcount32)] = .{
.llvm_name = "ldsbankcount32",
.description = "The number of LDS banks per compute unit.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.load_store_opt)] = .{
.llvm_name = "load-store-opt",
.description = "Enable SI load/store optimizer pass",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.localmemorysize32768)] = .{
.llvm_name = "localmemorysize32768",
.description = "The size of local memory in bytes",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.localmemorysize65536)] = .{
.llvm_name = "localmemorysize65536",
.description = "The size of local memory in bytes",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.mad_intra_fwd_bug)] = .{
.llvm_name = "mad-intra-fwd-bug",
.description = "MAD_U64/I64 intra instruction forwarding bug",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.mad_mac_f32_insts)] = .{
.llvm_name = "mad-mac-f32-insts",
.description = "Has v_mad_f32/v_mac_f32/v_madak_f32/v_madmk_f32 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.mad_mix_insts)] = .{
.llvm_name = "mad-mix-insts",
.description = "Has v_mad_mix_f32, v_mad_mixlo_f16, v_mad_mixhi_f16 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.mai_insts)] = .{
.llvm_name = "mai-insts",
.description = "Has mAI instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.max_private_element_size_16)] = .{
.llvm_name = "max-private-element-size-16",
.description = "Maximum private access size may be 16",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.max_private_element_size_4)] = .{
.llvm_name = "max-private-element-size-4",
.description = "Maximum private access size may be 4",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.max_private_element_size_8)] = .{
.llvm_name = "max-private-element-size-8",
.description = "Maximum private access size may be 8",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.mfma_inline_literal_bug)] = .{
.llvm_name = "mfma-inline-literal-bug",
.description = "MFMA cannot use inline literal as SrcC",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.mimg_r128)] = .{
.llvm_name = "mimg-r128",
.description = "Support 128-bit texture resources",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.movrel)] = .{
.llvm_name = "movrel",
.description = "Has v_movrel*_b32 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.negative_scratch_offset_bug)] = .{
.llvm_name = "negative-scratch-offset-bug",
.description = "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.negative_unaligned_scratch_offset_bug)] = .{
.llvm_name = "negative-unaligned-scratch-offset-bug",
.description = "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.no_data_dep_hazard)] = .{
.llvm_name = "no-data-dep-hazard",
.description = "Does not need SW waitstates",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.no_sdst_cmpx)] = .{
.llvm_name = "no-sdst-cmpx",
.description = "V_CMPX does not write VCC/SGPR in addition to EXEC",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.nsa_clause_bug)] = .{
.llvm_name = "nsa-clause-bug",
.description = "MIMG-NSA in a hard clause has unpredictable results on GFX10.1",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.nsa_encoding)] = .{
.llvm_name = "nsa-encoding",
.description = "Support NSA encoding for image instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.nsa_to_vmem_bug)] = .{
.llvm_name = "nsa-to-vmem-bug",
.description = "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.offset_3f_bug)] = .{
.llvm_name = "offset-3f-bug",
.description = "Branch offset of 3f hardware bug",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.packed_fp32_ops)] = .{
.llvm_name = "packed-fp32-ops",
.description = "Support packed fp32 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.packed_tid)] = .{
.llvm_name = "packed-tid",
.description = "Workitem IDs are packed into v0 at kernel launch",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.partial_nsa_encoding)] = .{
.llvm_name = "partial-nsa-encoding",
.description = "Support partial NSA encoding for image instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.pk_fmac_f16_inst)] = .{
.llvm_name = "pk-fmac-f16-inst",
.description = "Has v_pk_fmac_f16 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.promote_alloca)] = .{
.llvm_name = "promote-alloca",
.description = "Enable promote alloca pass",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.prt_strict_null)] = .{
.llvm_name = "enable-prt-strict-null",
.description = "Enable zeroing of result registers for sparse texture fetches",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.r128_a16)] = .{
.llvm_name = "r128-a16",
.description = "Support gfx9-style A16 for 16-bit coordinates/gradients/lod/clamp/mip image operands, where a16 is aliased with r128",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.s_memrealtime)] = .{
.llvm_name = "s-memrealtime",
.description = "Has s_memrealtime instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.s_memtime_inst)] = .{
.llvm_name = "s-memtime-inst",
.description = "Has s_memtime instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.scalar_atomics)] = .{
.llvm_name = "scalar-atomics",
.description = "Has atomic scalar memory instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.scalar_flat_scratch_insts)] = .{
.llvm_name = "scalar-flat-scratch-insts",
.description = "Have s_scratch_* flat memory instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.scalar_stores)] = .{
.llvm_name = "scalar-stores",
.description = "Has store scalar memory instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sdwa)] = .{
.llvm_name = "sdwa",
.description = "Support SDWA (Sub-DWORD Addressing) extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sdwa_mav)] = .{
.llvm_name = "sdwa-mav",
.description = "Support v_mac_f32/f16 with SDWA (Sub-DWORD Addressing) extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sdwa_omod)] = .{
.llvm_name = "sdwa-omod",
.description = "Support OMod with SDWA (Sub-DWORD Addressing) extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sdwa_out_mods_vopc)] = .{
.llvm_name = "sdwa-out-mods-vopc",
.description = "Support clamp for VOPC with SDWA (Sub-DWORD Addressing) extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sdwa_scalar)] = .{
.llvm_name = "sdwa-scalar",
.description = "Support scalar register with SDWA (Sub-DWORD Addressing) extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sdwa_sdst)] = .{
.llvm_name = "sdwa-sdst",
.description = "Support scalar dst for VOPC with SDWA (Sub-DWORD Addressing) extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sea_islands)] = .{
.llvm_name = "sea-islands",
.description = "SEA_ISLANDS GPU generation",
.dependencies = featureSet(&[_]Feature{
.ci_insts,
.ds_src2_insts,
.extended_image_insts,
.flat_address_space,
.fp64,
.gfx7_gfx8_gfx9_insts,
.image_insts,
.localmemorysize65536,
.mad_mac_f32_insts,
.mimg_r128,
.movrel,
.s_memtime_inst,
.trig_reduced_range,
.unaligned_buffer_access,
.wavefrontsize64,
}),
};
result[@intFromEnum(Feature.sgpr_init_bug)] = .{
.llvm_name = "sgpr-init-bug",
.description = "VI SGPR initialization bug requiring a fixed SGPR allocation size",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.shader_cycles_register)] = .{
.llvm_name = "shader-cycles-register",
.description = "Has SHADER_CYCLES hardware register",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.si_scheduler)] = .{
.llvm_name = "si-scheduler",
.description = "Enable SI Machine Scheduler",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.smem_to_vector_write_hazard)] = .{
.llvm_name = "smem-to-vector-write-hazard",
.description = "s_load_dword followed by v_cmp page faults",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.southern_islands)] = .{
.llvm_name = "southern-islands",
.description = "SOUTHERN_ISLANDS GPU generation",
.dependencies = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.fp64,
.image_insts,
.ldsbankcount32,
.localmemorysize32768,
.mad_mac_f32_insts,
.mimg_r128,
.movrel,
.s_memtime_inst,
.trig_reduced_range,
.wavefrontsize64,
}),
};
result[@intFromEnum(Feature.sramecc)] = .{
.llvm_name = "sramecc",
.description = "Enable SRAMECC",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.sramecc_support)] = .{
.llvm_name = "sramecc-support",
.description = "Hardware supports SRAMECC",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.tgsplit)] = .{
.llvm_name = "tgsplit",
.description = "Enable threadgroup split execution",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.trap_handler)] = .{
.llvm_name = "trap-handler",
.description = "Trap handler support",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.trig_reduced_range)] = .{
.llvm_name = "trig-reduced-range",
.description = "Requires use of fract on arguments to trig instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.true16)] = .{
.llvm_name = "true16",
.description = "True 16-bit operand instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.unaligned_access_mode)] = .{
.llvm_name = "unaligned-access-mode",
.description = "Enable unaligned global, local and region loads and stores if the hardware supports it",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.unaligned_buffer_access)] = .{
.llvm_name = "unaligned-buffer-access",
.description = "Hardware supports unaligned global loads and stores",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.unaligned_ds_access)] = .{
.llvm_name = "unaligned-ds-access",
.description = "Hardware supports unaligned local and region loads and stores",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.unaligned_scratch_access)] = .{
.llvm_name = "unaligned-scratch-access",
.description = "Support unaligned scratch loads and stores",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.unpacked_d16_vmem)] = .{
.llvm_name = "unpacked-d16-vmem",
.description = "Has unpacked d16 vmem instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.unsafe_ds_offset_folding)] = .{
.llvm_name = "unsafe-ds-offset-folding",
.description = "Force using DS instruction immediate offsets on SI",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.user_sgpr_init16_bug)] = .{
.llvm_name = "user-sgpr-init16-bug",
.description = "Bug requiring at least 16 user+system SGPRs to be enabled",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.valu_trans_use_hazard)] = .{
.llvm_name = "valu-trans-use-hazard",
.description = "Hazard when TRANS instructions are closely followed by a use of the result",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.vcmpx_exec_war_hazard)] = .{
.llvm_name = "vcmpx-exec-war-hazard",
.description = "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.vcmpx_permlane_hazard)] = .{
.llvm_name = "vcmpx-permlane-hazard",
.description = "TODO: describe me",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.vgpr_index_mode)] = .{
.llvm_name = "vgpr-index-mode",
.description = "Has VGPR mode register indexing",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.vmem_to_scalar_write_hazard)] = .{
.llvm_name = "vmem-to-scalar-write-hazard",
.description = "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.volcanic_islands)] = .{
.llvm_name = "volcanic-islands",
.description = "VOLCANIC_ISLANDS GPU generation",
.dependencies = featureSet(&[_]Feature{
.@"16_bit_insts",
.ci_insts,
.dpp,
.ds_src2_insts,
.extended_image_insts,
.fast_denormal_f32,
.flat_address_space,
.fp64,
.gcn3_encoding,
.gfx7_gfx8_gfx9_insts,
.gfx8_insts,
.image_insts,
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
.mad_mac_f32_insts,
.mimg_r128,
.movrel,
.s_memrealtime,
.s_memtime_inst,
.scalar_stores,
.sdwa,
.sdwa_mav,
.sdwa_out_mods_vopc,
.trig_reduced_range,
.unaligned_buffer_access,
.vgpr_index_mode,
.wavefrontsize64,
}),
};
result[@intFromEnum(Feature.vop3_literal)] = .{
.llvm_name = "vop3-literal",
.description = "Can use one literal in VOP3",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.vop3p)] = .{
.llvm_name = "vop3p",
.description = "Has VOP3P packed instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.vopd)] = .{
.llvm_name = "vopd",
.description = "Has VOPD dual issue wave32 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.vscnt)] = .{
.llvm_name = "vscnt",
.description = "Has separate store vscnt counter",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.wavefrontsize16)] = .{
.llvm_name = "wavefrontsize16",
.description = "The number of threads per wavefront",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.wavefrontsize32)] = .{
.llvm_name = "wavefrontsize32",
.description = "The number of threads per wavefront",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.wavefrontsize64)] = .{
.llvm_name = "wavefrontsize64",
.description = "The number of threads per wavefront",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.xnack)] = .{
.llvm_name = "xnack",
.description = "Enable XNACK support",
.dependencies = featureSet(&[_]Feature{}),
};
result[@intFromEnum(Feature.xnack_support)] = .{
.llvm_name = "xnack-support",
.description = "Hardware supports XNACK",
.dependencies = featureSet(&[_]Feature{}),
};
const ti = @typeInfo(Feature);
for (&result, 0..) |*elem, i| {
elem.index = i;
elem.name = ti.Enum.fields[i].name;
}
break :blk result;
};
pub const cpu = struct {
pub const bonaire = CpuModel{
.name = "bonaire",
.llvm_name = "bonaire",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.sea_islands,
}),
};
pub const carrizo = CpuModel{
.name = "carrizo",
.llvm_name = "carrizo",
.features = featureSet(&[_]Feature{
.fast_fmaf,
.half_rate_64_ops,
.ldsbankcount32,
.unpacked_d16_vmem,
.volcanic_islands,
.xnack_support,
}),
};
pub const fiji = CpuModel{
.name = "fiji",
.llvm_name = "fiji",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.unpacked_d16_vmem,
.volcanic_islands,
}),
};
pub const generic = CpuModel{
.name = "generic",
.llvm_name = "generic",
.features = featureSet(&[_]Feature{
.wavefrontsize64,
}),
};
pub const generic_hsa = CpuModel{
.name = "generic_hsa",
.llvm_name = "generic-hsa",
.features = featureSet(&[_]Feature{
.flat_address_space,
.wavefrontsize64,
}),
};
pub const gfx1010 = CpuModel{
.name = "gfx1010",
.llvm_name = "gfx1010",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.ds_src2_insts,
.flat_segment_offset_bug,
.get_wave_id_inst,
.gfx10,
.inst_fwd_prefetch_bug,
.lds_branch_vmem_war_hazard,
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.negative_unaligned_scratch_offset_bug,
.nsa_clause_bug,
.nsa_encoding,
.nsa_to_vmem_bug,
.offset_3f_bug,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
.smem_to_vector_write_hazard,
.vcmpx_exec_war_hazard,
.vcmpx_permlane_hazard,
.vmem_to_scalar_write_hazard,
.wavefrontsize32,
.xnack_support,
}),
};
pub const gfx1011 = CpuModel{
.name = "gfx1011",
.llvm_name = "gfx1011",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.ds_src2_insts,
.flat_segment_offset_bug,
.get_wave_id_inst,
.gfx10,
.inst_fwd_prefetch_bug,
.lds_branch_vmem_war_hazard,
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.negative_unaligned_scratch_offset_bug,
.nsa_clause_bug,
.nsa_encoding,
.nsa_to_vmem_bug,
.offset_3f_bug,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
.smem_to_vector_write_hazard,
.vcmpx_exec_war_hazard,
.vcmpx_permlane_hazard,
.vmem_to_scalar_write_hazard,
.wavefrontsize32,
.xnack_support,
}),
};
pub const gfx1012 = CpuModel{
.name = "gfx1012",
.llvm_name = "gfx1012",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.ds_src2_insts,
.flat_segment_offset_bug,
.get_wave_id_inst,
.gfx10,
.inst_fwd_prefetch_bug,
.lds_branch_vmem_war_hazard,
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.negative_unaligned_scratch_offset_bug,
.nsa_clause_bug,
.nsa_encoding,
.nsa_to_vmem_bug,
.offset_3f_bug,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
.smem_to_vector_write_hazard,
.vcmpx_exec_war_hazard,
.vcmpx_permlane_hazard,
.vmem_to_scalar_write_hazard,
.wavefrontsize32,
.xnack_support,
}),
};
pub const gfx1013 = CpuModel{
.name = "gfx1013",
.llvm_name = "gfx1013",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.ds_src2_insts,
.flat_segment_offset_bug,
.get_wave_id_inst,
.gfx10,
.gfx10_a_encoding,
.inst_fwd_prefetch_bug,
.lds_branch_vmem_war_hazard,
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.negative_unaligned_scratch_offset_bug,
.nsa_clause_bug,
.nsa_encoding,
.nsa_to_vmem_bug,
.offset_3f_bug,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
.smem_to_vector_write_hazard,
.vcmpx_exec_war_hazard,
.vcmpx_permlane_hazard,
.vmem_to_scalar_write_hazard,
.wavefrontsize32,
.xnack_support,
}),
};
pub const gfx1030 = CpuModel{
.name = "gfx1030",
.llvm_name = "gfx1030",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1031 = CpuModel{
.name = "gfx1031",
.llvm_name = "gfx1031",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1032 = CpuModel{
.name = "gfx1032",
.llvm_name = "gfx1032",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1033 = CpuModel{
.name = "gfx1033",
.llvm_name = "gfx1033",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1034 = CpuModel{
.name = "gfx1034",
.llvm_name = "gfx1034",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1035 = CpuModel{
.name = "gfx1035",
.llvm_name = "gfx1035",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1036 = CpuModel{
.name = "gfx1036",
.llvm_name = "gfx1036",
.features = featureSet(&[_]Feature{
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1100 = CpuModel{
.name = "gfx1100",
.llvm_name = "gfx1100",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.dl_insts,
.dot10_insts,
.dot5_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
.flat_atomic_fadd_f32_inst,
.gfx11,
.gfx11_full_vgprs,
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.shader_cycles_register,
.user_sgpr_init16_bug,
.valu_trans_use_hazard,
.vcmpx_permlane_hazard,
.wavefrontsize32,
}),
};
pub const gfx1101 = CpuModel{
.name = "gfx1101",
.llvm_name = "gfx1101",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.dl_insts,
.dot10_insts,
.dot5_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
.flat_atomic_fadd_f32_inst,
.gfx11,
.gfx11_full_vgprs,
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.shader_cycles_register,
.valu_trans_use_hazard,
.vcmpx_permlane_hazard,
.wavefrontsize32,
}),
};
pub const gfx1102 = CpuModel{
.name = "gfx1102",
.llvm_name = "gfx1102",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.dl_insts,
.dot10_insts,
.dot5_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
.flat_atomic_fadd_f32_inst,
.gfx11,
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.shader_cycles_register,
.user_sgpr_init16_bug,
.valu_trans_use_hazard,
.vcmpx_permlane_hazard,
.wavefrontsize32,
}),
};
pub const gfx1103 = CpuModel{
.name = "gfx1103",
.llvm_name = "gfx1103",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.dl_insts,
.dot10_insts,
.dot5_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
.flat_atomic_fadd_f32_inst,
.gfx11,
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.shader_cycles_register,
.valu_trans_use_hazard,
.vcmpx_permlane_hazard,
.wavefrontsize32,
}),
};
pub const gfx1150 = CpuModel{
.name = "gfx1150",
.llvm_name = "gfx1150",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.dl_insts,
.dot10_insts,
.dot5_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
.flat_atomic_fadd_f32_inst,
.gfx11,
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.shader_cycles_register,
.vcmpx_permlane_hazard,
.wavefrontsize32,
}),
};
pub const gfx1151 = CpuModel{
.name = "gfx1151",
.llvm_name = "gfx1151",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.dl_insts,
.dot10_insts,
.dot5_insts,
.dot7_insts,
.dot8_insts,
.dot9_insts,
.flat_atomic_fadd_f32_inst,
.gfx11,
.gfx11_full_vgprs,
.image_insts,
.ldsbankcount32,
.mad_intra_fwd_bug,
.nsa_encoding,
.packed_tid,
.partial_nsa_encoding,
.shader_cycles_register,
.vcmpx_permlane_hazard,
.wavefrontsize32,
}),
};
pub const gfx600 = CpuModel{
.name = "gfx600",
.llvm_name = "gfx600",
.features = featureSet(&[_]Feature{
.fast_fmaf,
.half_rate_64_ops,
.southern_islands,
}),
};
pub const gfx601 = CpuModel{
.name = "gfx601",
.llvm_name = "gfx601",
.features = featureSet(&[_]Feature{
.southern_islands,
}),
};
pub const gfx602 = CpuModel{
.name = "gfx602",
.llvm_name = "gfx602",
.features = featureSet(&[_]Feature{
.southern_islands,
}),
};
pub const gfx700 = CpuModel{
.name = "gfx700",
.llvm_name = "gfx700",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.sea_islands,
}),
};
pub const gfx701 = CpuModel{
.name = "gfx701",
.llvm_name = "gfx701",
.features = featureSet(&[_]Feature{
.fast_fmaf,
.half_rate_64_ops,
.ldsbankcount32,
.sea_islands,
}),
};
pub const gfx702 = CpuModel{
.name = "gfx702",
.llvm_name = "gfx702",
.features = featureSet(&[_]Feature{
.fast_fmaf,
.ldsbankcount16,
.sea_islands,
}),
};
pub const gfx703 = CpuModel{
.name = "gfx703",
.llvm_name = "gfx703",
.features = featureSet(&[_]Feature{
.ldsbankcount16,
.sea_islands,
}),
};
pub const gfx704 = CpuModel{
.name = "gfx704",
.llvm_name = "gfx704",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.sea_islands,
}),
};
pub const gfx705 = CpuModel{
.name = "gfx705",
.llvm_name = "gfx705",
.features = featureSet(&[_]Feature{
.ldsbankcount16,
.sea_islands,
}),
};
pub const gfx801 = CpuModel{
.name = "gfx801",
.llvm_name = "gfx801",
.features = featureSet(&[_]Feature{
.fast_fmaf,
.half_rate_64_ops,
.ldsbankcount32,
.unpacked_d16_vmem,
.volcanic_islands,
.xnack_support,
}),
};
pub const gfx802 = CpuModel{
.name = "gfx802",
.llvm_name = "gfx802",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.sgpr_init_bug,
.unpacked_d16_vmem,
.volcanic_islands,
}),
};
pub const gfx803 = CpuModel{
.name = "gfx803",
.llvm_name = "gfx803",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.unpacked_d16_vmem,
.volcanic_islands,
}),
};
pub const gfx805 = CpuModel{
.name = "gfx805",
.llvm_name = "gfx805",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.sgpr_init_bug,
.unpacked_d16_vmem,
.volcanic_islands,
}),
};
pub const gfx810 = CpuModel{
.name = "gfx810",
.llvm_name = "gfx810",
.features = featureSet(&[_]Feature{
.image_gather4_d16_bug,
.image_store_d16_bug,
.ldsbankcount16,
.volcanic_islands,
.xnack_support,
}),
};
pub const gfx900 = CpuModel{
.name = "gfx900",
.llvm_name = "gfx900",
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.image_insts,
.ldsbankcount32,
.mad_mac_f32_insts,
.mad_mix_insts,
}),
};
pub const gfx902 = CpuModel{
.name = "gfx902",
.llvm_name = "gfx902",
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.image_insts,
.ldsbankcount32,
.mad_mac_f32_insts,
.mad_mix_insts,
}),
};
pub const gfx904 = CpuModel{
.name = "gfx904",
.llvm_name = "gfx904",
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.fma_mix_insts,
.gfx9,
.image_gather4_d16_bug,
.image_insts,
.ldsbankcount32,
.mad_mac_f32_insts,
}),
};
pub const gfx906 = CpuModel{
.name = "gfx906",
.llvm_name = "gfx906",
.features = featureSet(&[_]Feature{
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot7_insts,
.ds_src2_insts,
.extended_image_insts,
.fma_mix_insts,
.gfx9,
.half_rate_64_ops,
.image_gather4_d16_bug,
.image_insts,
.ldsbankcount32,
.mad_mac_f32_insts,
.sramecc_support,
}),
};
pub const gfx908 = CpuModel{
.name = "gfx908",
.llvm_name = "gfx908",
.features = featureSet(&[_]Feature{
.atomic_buffer_global_pk_add_f16_no_rtn_insts,
.atomic_fadd_no_rtn_insts,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot3_insts,
.dot4_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.ds_src2_insts,
.extended_image_insts,
.fma_mix_insts,
.gfx9,
.half_rate_64_ops,
.image_gather4_d16_bug,
.image_insts,
.ldsbankcount32,
.mad_mac_f32_insts,
.mai_insts,
.mfma_inline_literal_bug,
.pk_fmac_f16_inst,
.sramecc_support,
}),
};
pub const gfx909 = CpuModel{
.name = "gfx909",
.llvm_name = "gfx909",
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.image_insts,
.ldsbankcount32,
.mad_mac_f32_insts,
.mad_mix_insts,
}),
};
pub const gfx90a = CpuModel{
.name = "gfx90a",
.llvm_name = "gfx90a",
.features = featureSet(&[_]Feature{
.atomic_buffer_global_pk_add_f16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot3_insts,
.dot4_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.dpp_64bit,
.fma_mix_insts,
.fmacf64_inst,
.full_rate_64_ops,
.gfx9,
.gfx90a_insts,
.image_insts,
.ldsbankcount32,
.mad_mac_f32_insts,
.mai_insts,
.packed_fp32_ops,
.packed_tid,
.pk_fmac_f16_inst,
.sramecc_support,
}),
};
pub const gfx90c = CpuModel{
.name = "gfx90c",
.llvm_name = "gfx90c",
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.image_insts,
.ldsbankcount32,
.mad_mac_f32_insts,
.mad_mix_insts,
}),
};
pub const gfx940 = CpuModel{
.name = "gfx940",
.llvm_name = "gfx940",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.atomic_buffer_global_pk_add_f16_insts,
.atomic_ds_pk_add_16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.atomic_flat_pk_add_16_insts,
.atomic_global_pk_add_bf16_inst,
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot3_insts,
.dot4_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.dpp_64bit,
.flat_atomic_fadd_f32_inst,
.fma_mix_insts,
.fmacf64_inst,
.force_store_sc0_sc1,
.fp8_insts,
.full_rate_64_ops,
.gfx9,
.gfx90a_insts,
.gfx940_insts,
.ldsbankcount32,
.mai_insts,
.packed_fp32_ops,
.packed_tid,
.pk_fmac_f16_inst,
.sramecc_support,
}),
};
pub const gfx941 = CpuModel{
.name = "gfx941",
.llvm_name = "gfx941",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.atomic_buffer_global_pk_add_f16_insts,
.atomic_ds_pk_add_16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.atomic_flat_pk_add_16_insts,
.atomic_global_pk_add_bf16_inst,
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot3_insts,
.dot4_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.dpp_64bit,
.flat_atomic_fadd_f32_inst,
.fma_mix_insts,
.fmacf64_inst,
.force_store_sc0_sc1,
.fp8_insts,
.full_rate_64_ops,
.gfx9,
.gfx90a_insts,
.gfx940_insts,
.ldsbankcount32,
.mai_insts,
.packed_fp32_ops,
.packed_tid,
.pk_fmac_f16_inst,
.sramecc_support,
}),
};
pub const gfx942 = CpuModel{
.name = "gfx942",
.llvm_name = "gfx942",
.features = featureSet(&[_]Feature{
.architected_flat_scratch,
.atomic_buffer_global_pk_add_f16_insts,
.atomic_ds_pk_add_16_insts,
.atomic_fadd_no_rtn_insts,
.atomic_fadd_rtn_insts,
.atomic_flat_pk_add_16_insts,
.atomic_global_pk_add_bf16_inst,
.back_off_barrier,
.dl_insts,
.dot10_insts,
.dot1_insts,
.dot2_insts,
.dot3_insts,
.dot4_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.dpp_64bit,
.flat_atomic_fadd_f32_inst,
.fma_mix_insts,
.fmacf64_inst,
.fp8_insts,
.full_rate_64_ops,
.gfx9,
.gfx90a_insts,
.gfx940_insts,
.ldsbankcount32,
.mai_insts,
.packed_fp32_ops,
.packed_tid,
.pk_fmac_f16_inst,
.sramecc_support,
}),
};
pub const hainan = CpuModel{
.name = "hainan",
.llvm_name = "hainan",
.features = featureSet(&[_]Feature{
.southern_islands,
}),
};
pub const hawaii = CpuModel{
.name = "hawaii",
.llvm_name = "hawaii",
.features = featureSet(&[_]Feature{
.fast_fmaf,
.half_rate_64_ops,
.ldsbankcount32,
.sea_islands,
}),
};
pub const iceland = CpuModel{
.name = "iceland",
.llvm_name = "iceland",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.sgpr_init_bug,
.unpacked_d16_vmem,
.volcanic_islands,
}),
};
pub const kabini = CpuModel{
.name = "kabini",
.llvm_name = "kabini",
.features = featureSet(&[_]Feature{
.ldsbankcount16,
.sea_islands,
}),
};
pub const kaveri = CpuModel{
.name = "kaveri",
.llvm_name = "kaveri",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.sea_islands,
}),
};
pub const mullins = CpuModel{
.name = "mullins",
.llvm_name = "mullins",
.features = featureSet(&[_]Feature{
.ldsbankcount16,
.sea_islands,
}),
};
pub const oland = CpuModel{
.name = "oland",
.llvm_name = "oland",
.features = featureSet(&[_]Feature{
.southern_islands,
}),
};
pub const pitcairn = CpuModel{
.name = "pitcairn",
.llvm_name = "pitcairn",
.features = featureSet(&[_]Feature{
.southern_islands,
}),
};
pub const polaris10 = CpuModel{
.name = "polaris10",
.llvm_name = "polaris10",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.unpacked_d16_vmem,
.volcanic_islands,
}),
};
pub const polaris11 = CpuModel{
.name = "polaris11",
.llvm_name = "polaris11",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.unpacked_d16_vmem,
.volcanic_islands,
}),
};
pub const stoney = CpuModel{
.name = "stoney",
.llvm_name = "stoney",
.features = featureSet(&[_]Feature{
.image_gather4_d16_bug,
.image_store_d16_bug,
.ldsbankcount16,
.volcanic_islands,
.xnack_support,
}),
};
pub const tahiti = CpuModel{
.name = "tahiti",
.llvm_name = "tahiti",
.features = featureSet(&[_]Feature{
.fast_fmaf,
.half_rate_64_ops,
.southern_islands,
}),
};
pub const tonga = CpuModel{
.name = "tonga",
.llvm_name = "tonga",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.sgpr_init_bug,
.unpacked_d16_vmem,
.volcanic_islands,
}),
};
pub const tongapro = CpuModel{
.name = "tongapro",
.llvm_name = "tongapro",
.features = featureSet(&[_]Feature{
.ldsbankcount32,
.sgpr_init_bug,
.unpacked_d16_vmem,
.volcanic_islands,
}),
};
pub const verde = CpuModel{
.name = "verde",
.llvm_name = "verde",
.features = featureSet(&[_]Feature{
.southern_islands,
}),
};
};