update target CPU features with LLVM 13 rc1 data

This commit is contained in:
Andrew Kelley 2021-08-15 23:09:55 -07:00
parent d6467dcf71
commit 7adf15682c
13 changed files with 720 additions and 80 deletions

View File

@ -430,6 +430,7 @@ pub const Target = struct {
};
pub const aarch64 = @import("target/aarch64.zig");
pub const arc = @import("target/arc.zig");
pub const amdgpu = @import("target/amdgpu.zig");
pub const arm = @import("target/arm.zig");
pub const avr = @import("target/avr.zig");

View File

@ -65,6 +65,7 @@ pub const Feature = enum {
fuse_csel,
fuse_literals,
harden_sls_blr,
harden_sls_nocomdat,
harden_sls_retbr,
hcx,
i8mm,
@ -81,6 +82,7 @@ pub const Feature = enum {
neoverse_n2,
neoverse_v1,
no_neg_immediates,
no_zcz_fp,
nv,
outline_atomics,
pan,
@ -120,6 +122,7 @@ pub const Feature = enum {
reserve_x6,
reserve_x7,
reserve_x9,
rme,
sb,
sel2,
sha2,
@ -128,6 +131,9 @@ pub const Feature = enum {
slow_paired_128,
slow_strqro_store,
sm4,
sme,
sme_f64,
sme_i64,
spe,
spe_eef,
specrestrict,
@ -148,7 +154,6 @@ pub const Feature = enum {
tracev8_4,
trbe,
uaops,
use_aa,
use_experimental_zeroing_pseudos,
use_postra_scheduler,
use_reciprocal_square_root,
@ -166,7 +171,6 @@ pub const Feature = enum {
xs,
zcm,
zcz,
zcz_fp,
zcz_fp_workaround,
zcz_gp,
};
@ -594,6 +598,11 @@ pub const all_features = blk: {
.description = "Harden against straight line speculation across BLR instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.harden_sls_nocomdat)] = .{
.llvm_name = "harden-sls-nocomdat",
.description = "Generate thunk code for SLS mitigation in the normal text section",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.harden_sls_retbr)] = .{
.llvm_name = "harden-sls-retbr",
.description = "Harden against straight line speculation across RET and BR instructions",
@ -660,8 +669,10 @@ pub const all_features = blk: {
.crypto,
.dotprod,
.fullfp16,
.fuse_aes,
.rcpc,
.ssbs,
.use_postra_scheduler,
.v8_2a,
}),
};
@ -672,9 +683,11 @@ pub const all_features = blk: {
.crypto,
.dotprod,
.fullfp16,
.fuse_aes,
.rcpc,
.spe,
.ssbs,
.use_postra_scheduler,
.v8_2a,
}),
};
@ -683,10 +696,13 @@ pub const all_features = blk: {
.description = "Neoverse N2 ARM processors",
.dependencies = featureSet(&[_]Feature{
.bf16,
.crypto,
.ete,
.fuse_aes,
.i8mm,
.mte,
.sve2_bitperm,
.use_postra_scheduler,
.v8_5a,
}),
};
@ -714,6 +730,11 @@ pub const all_features = blk: {
.description = "Convert immediates and instructions to their negated or complemented equivalent when the immediate does not fit in the encoding.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.no_zcz_fp)] = .{
.llvm_name = "no-zcz-fp",
.description = "Has no zero-cycle zeroing instructions for FP registers",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.nv)] = .{
.llvm_name = "nv",
.description = "Enable v8.4-A Nested Virtualization Enchancement",
@ -913,6 +934,11 @@ pub const all_features = blk: {
.description = "Reserve X9, making it unavailable as a GPR",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.rme)] = .{
.llvm_name = "rme",
.description = "Enable Realm Management Extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.sb)] = .{
.llvm_name = "sb",
.description = "Enable v8.5 Speculation Barrier",
@ -959,6 +985,28 @@ pub const all_features = blk: {
.neon,
}),
};
result[@enumToInt(Feature.sme)] = .{
.llvm_name = "sme",
.description = "Enable Scalable Matrix Extension (SME)",
.dependencies = featureSet(&[_]Feature{
.bf16,
.sve2,
}),
};
result[@enumToInt(Feature.sme_f64)] = .{
.llvm_name = "sme-f64",
.description = "Enable Scalable Matrix Extension (SME) F64F64 instructions",
.dependencies = featureSet(&[_]Feature{
.sme,
}),
};
result[@enumToInt(Feature.sme_i64)] = .{
.llvm_name = "sme-i64",
.description = "Enable Scalable Matrix Extension (SME) I16I64 instructions",
.dependencies = featureSet(&[_]Feature{
.sme,
}),
};
result[@enumToInt(Feature.spe)] = .{
.llvm_name = "spe",
.description = "Enable Statistical Profiling extension",
@ -1074,11 +1122,6 @@ pub const all_features = blk: {
.description = "Enable v8.2 UAO PState",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.use_aa)] = .{
.llvm_name = "use-aa",
.description = "Use alias analysis during codegen",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.use_experimental_zeroing_pseudos)] = .{
.llvm_name = "use-experimental-zeroing-pseudos",
.description = "Hint to the compiler that the MOVPRFX instruction is merged with destructive operations",
@ -1251,15 +1294,9 @@ pub const all_features = blk: {
.llvm_name = "zcz",
.description = "Has zero-cycle zeroing instructions",
.dependencies = featureSet(&[_]Feature{
.zcz_fp,
.zcz_gp,
}),
};
result[@enumToInt(Feature.zcz_fp)] = .{
.llvm_name = "zcz-fp",
.description = "Has zero-cycle zeroing instructions for FP registers",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.zcz_fp_workaround)] = .{
.llvm_name = "zcz-fp-workaround",
.description = "The zero-cycle floating-point zeroing instruction has a bug",
@ -1403,7 +1440,62 @@ pub const cpu = struct {
.name = "apple_latest",
.llvm_name = "apple-latest",
.features = featureSet(&[_]Feature{
.apple_a13,
.aggressive_fma,
.alternate_sextload_cvt_f32_pattern,
.altnzcv,
.arith_bcc_fusion,
.arith_cbz_fusion,
.ccdp,
.crypto,
.disable_latency_sched_heuristic,
.fp16fml,
.fptoint,
.fuse_address,
.fuse_aes,
.fuse_arith_logic,
.fuse_crypto_eor,
.fuse_csel,
.fuse_literals,
.perfmon,
.predres,
.sb,
.sha3,
.specrestrict,
.ssbs,
.v8_4a,
.zcm,
.zcz,
}),
};
pub const apple_m1 = CpuModel{
.name = "apple_m1",
.llvm_name = "apple-m1",
.features = featureSet(&[_]Feature{
.aggressive_fma,
.alternate_sextload_cvt_f32_pattern,
.altnzcv,
.arith_bcc_fusion,
.arith_cbz_fusion,
.ccdp,
.crypto,
.disable_latency_sched_heuristic,
.fp16fml,
.fptoint,
.fuse_address,
.fuse_aes,
.fuse_arith_logic,
.fuse_crypto_eor,
.fuse_csel,
.fuse_literals,
.perfmon,
.predres,
.sb,
.sha3,
.specrestrict,
.ssbs,
.v8_4a,
.zcm,
.zcz,
}),
};
pub const apple_s4 = CpuModel{
@ -1459,7 +1551,6 @@ pub const cpu = struct {
.custom_cheap_as_move,
.fuse_aes,
.perfmon,
.use_aa,
.use_postra_scheduler,
.v8a,
}),
@ -1474,6 +1565,7 @@ pub const cpu = struct {
.fuse_aes,
.perfmon,
.rcpc,
.use_postra_scheduler,
.v8_2a,
}),
};
@ -1649,7 +1741,6 @@ pub const cpu = struct {
.use_postra_scheduler,
.use_reciprocal_square_root,
.v8a,
.zcz_fp,
}),
};
pub const exynos_m2 = CpuModel{
@ -1666,7 +1757,6 @@ pub const cpu = struct {
.slow_paired_128,
.use_postra_scheduler,
.v8a,
.zcz_fp,
}),
};
pub const exynos_m3 = CpuModel{
@ -1686,7 +1776,6 @@ pub const cpu = struct {
.predictable_select_expensive,
.use_postra_scheduler,
.v8a,
.zcz_fp,
}),
};
pub const exynos_m4 = CpuModel{
@ -1824,7 +1913,6 @@ pub const cpu = struct {
.perfmon,
.predictable_select_expensive,
.strict_align,
.use_aa,
.use_postra_scheduler,
.v8_3a,
}),

View File

@ -9,6 +9,7 @@ pub const Feature = enum {
a16,
add_no_carry_insts,
aperture_regs,
architected_flat_scratch,
atomic_fadd_insts,
auto_waitcnt_before_barrier,
ci_insts,
@ -20,11 +21,14 @@ pub const Feature = enum {
dot4_insts,
dot5_insts,
dot6_insts,
dot7_insts,
dpp,
dpp8,
dpp_64bit,
ds_src2_insts,
enable_ds128,
enable_prt_strict_null,
extended_image_insts,
fast_denormal_f32,
fast_fmaf,
flat_address_space,
@ -36,16 +40,19 @@ pub const Feature = enum {
fma_mix_insts,
fmaf,
fp64,
full_rate_64_ops,
g16,
gcn3_encoding,
get_wave_id_inst,
gfx10,
gfx10_3_insts,
gfx10_a_encoding,
gfx10_b_encoding,
gfx10_insts,
gfx7_gfx8_gfx9_insts,
gfx8_insts,
gfx9,
gfx90a_insts,
gfx9_insts,
half_rate_64_ops,
image_gather4_d16_bug,
@ -70,11 +77,18 @@ pub const Feature = enum {
mfma_inline_literal_bug,
mimg_r128,
movrel,
negative_scratch_offset_bug,
negative_unaligned_scratch_offset_bug,
no_data_dep_hazard,
no_sdst_cmpx,
nsa_clause_bug,
nsa_encoding,
nsa_max_size_13,
nsa_max_size_5,
nsa_to_vmem_bug,
offset_3f_bug,
packed_fp32_ops,
packed_tid,
pk_fmac_f16_inst,
promote_alloca,
r128_a16,
@ -92,11 +106,13 @@ pub const Feature = enum {
sdwa_sdst,
sea_islands,
sgpr_init_bug,
shader_cycles_register,
si_scheduler,
smem_to_vector_write_hazard,
southern_islands,
sramecc,
sramecc_support,
tgsplit,
trap_handler,
trig_reduced_range,
unaligned_access_mode,
@ -149,6 +165,11 @@ pub const all_features = blk: {
.description = "Has Memory Aperture Base and Size Registers",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.architected_flat_scratch)] = .{
.llvm_name = "architected-flat-scratch",
.description = "Flat Scratch register is a readonly SPI initialized architected register",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.atomic_fadd_insts)] = .{
.llvm_name = "atomic-fadd-insts",
.description = "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, global_atomic_pk_add_f16 instructions",
@ -183,7 +204,7 @@ pub const all_features = blk: {
};
result[@enumToInt(Feature.dot2_insts)] = .{
.llvm_name = "dot2-insts",
.description = "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions",
.description = "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.dot3_insts)] = .{
@ -206,6 +227,11 @@ pub const all_features = blk: {
.description = "Has v_dot4c_i32_i8 instruction",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.dot7_insts)] = .{
.llvm_name = "dot7-insts",
.description = "Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.dpp)] = .{
.llvm_name = "dpp",
.description = "Support DPP (Data Parallel Primitives) extension",
@ -216,6 +242,11 @@ pub const all_features = blk: {
.description = "Support DPP8 (Data Parallel Primitives) extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.dpp_64bit)] = .{
.llvm_name = "dpp-64bit",
.description = "Support DPP (Data Parallel Primitives) extension",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.ds_src2_insts)] = .{
.llvm_name = "ds-src2-insts",
.description = "Has ds_*_src2 instructions",
@ -231,6 +262,11 @@ pub const all_features = blk: {
.description = "Enable zeroing of result registers for sparse texture fetches",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.extended_image_insts)] = .{
.llvm_name = "extended-image-insts",
.description = "Support mips != 0, lod != 0, gather4, and get_lod",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.fast_denormal_f32)] = .{
.llvm_name = "fast-denormal-f32",
.description = "Enabling denormals does not cause f32 instructions to run at f64 rates",
@ -268,7 +304,7 @@ pub const all_features = blk: {
};
result[@enumToInt(Feature.flat_segment_offset_bug)] = .{
.llvm_name = "flat-segment-offset-bug",
.description = "GFX10 bug, inst_offset ignored in flat segment",
.description = "GFX10 bug where inst_offset is ignored when flat instructions access global memory",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.fma_mix_insts)] = .{
@ -286,6 +322,11 @@ pub const all_features = blk: {
.description = "Enable double precision operations",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.full_rate_64_ops)] = .{
.llvm_name = "full-rate-64-ops",
.description = "Most fp64 instructions are full rate",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.g16)] = .{
.llvm_name = "g16",
.description = "Support G16 for 16-bit gradient image operands",
@ -312,6 +353,7 @@ pub const all_features = blk: {
.ci_insts,
.dpp,
.dpp8,
.extended_image_insts,
.fast_denormal_f32,
.fast_fmaf,
.flat_address_space,
@ -334,6 +376,7 @@ pub const all_features = blk: {
.pk_fmac_f16_inst,
.register_banking,
.s_memrealtime,
.s_memtime_inst,
.sdwa,
.sdwa_omod,
.sdwa_scalar,
@ -350,6 +393,11 @@ pub const all_features = blk: {
.description = "Additional instructions for GFX10.3",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.gfx10_a_encoding)] = .{
.llvm_name = "gfx10_a-encoding",
.description = "Has BVH ray tracing instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.gfx10_b_encoding)] = .{
.llvm_name = "gfx10_b-encoding",
.description = "Encoding format GFX10_B",
@ -379,7 +427,6 @@ pub const all_features = blk: {
.aperture_regs,
.ci_insts,
.dpp,
.ds_src2_insts,
.fast_denormal_f32,
.fast_fmaf,
.flat_address_space,
@ -394,7 +441,7 @@ pub const all_features = blk: {
.int_clamp_insts,
.inv_2pi_inline_imm,
.localmemorysize65536,
.mad_mac_f32_insts,
.negative_scratch_offset_bug,
.r128_a16,
.s_memrealtime,
.s_memtime_inst,
@ -413,6 +460,11 @@ pub const all_features = blk: {
.xnack_support,
}),
};
result[@enumToInt(Feature.gfx90a_insts)] = .{
.llvm_name = "gfx90a-insts",
.description = "Additional instructions for GFX90A+",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.gfx9_insts)] = .{
.llvm_name = "gfx9-insts",
.description = "Additional instructions for GFX9+",
@ -533,6 +585,16 @@ pub const all_features = blk: {
.description = "Has v_movrel*_b32 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.negative_scratch_offset_bug)] = .{
.llvm_name = "negative-scratch-offset-bug",
.description = "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.negative_unaligned_scratch_offset_bug)] = .{
.llvm_name = "negative-unaligned-scratch-offset-bug",
.description = "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.no_data_dep_hazard)] = .{
.llvm_name = "no-data-dep-hazard",
.description = "Does not need SW waitstates",
@ -543,11 +605,26 @@ pub const all_features = blk: {
.description = "V_CMPX does not write VCC/SGPR in addition to EXEC",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.nsa_clause_bug)] = .{
.llvm_name = "nsa-clause-bug",
.description = "MIMG-NSA in a hard clause has unpredictable results on GFX10.1",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.nsa_encoding)] = .{
.llvm_name = "nsa-encoding",
.description = "Support NSA encoding for image instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.nsa_max_size_13)] = .{
.llvm_name = "nsa-max-size-13",
.description = "The maximum non-sequential address size in VGPRs.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.nsa_max_size_5)] = .{
.llvm_name = "nsa-max-size-5",
.description = "The maximum non-sequential address size in VGPRs.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.nsa_to_vmem_bug)] = .{
.llvm_name = "nsa-to-vmem-bug",
.description = "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero",
@ -558,6 +635,16 @@ pub const all_features = blk: {
.description = "Branch offset of 3f hardware bug",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.packed_fp32_ops)] = .{
.llvm_name = "packed-fp32-ops",
.description = "Support packed fp32 instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.packed_tid)] = .{
.llvm_name = "packed-tid",
.description = "Workitem IDs are packed into v0 at kernel launch",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.pk_fmac_f16_inst)] = .{
.llvm_name = "pk-fmac-f16-inst",
.description = "Has v_pk_fmac_f16 instruction",
@ -639,6 +726,7 @@ pub const all_features = blk: {
.dependencies = featureSet(&[_]Feature{
.ci_insts,
.ds_src2_insts,
.extended_image_insts,
.flat_address_space,
.fp64,
.gfx7_gfx8_gfx9_insts,
@ -657,6 +745,11 @@ pub const all_features = blk: {
.description = "VI SGPR initialization bug requiring a fixed SGPR allocation size",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.shader_cycles_register)] = .{
.llvm_name = "shader-cycles-register",
.description = "Has SHADER_CYCLES hardware register",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.si_scheduler)] = .{
.llvm_name = "si-scheduler",
.description = "Enable SI Machine Scheduler",
@ -672,6 +765,7 @@ pub const all_features = blk: {
.description = "SOUTHERN_ISLANDS GPU generation",
.dependencies = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.fp64,
.ldsbankcount32,
.localmemorysize32768,
@ -693,6 +787,11 @@ pub const all_features = blk: {
.description = "Hardware supports SRAMECC",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.tgsplit)] = .{
.llvm_name = "tgsplit",
.description = "Enable threadgroup split execution",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.trap_handler)] = .{
.llvm_name = "trap-handler",
.description = "Trap handler support",
@ -761,6 +860,7 @@ pub const all_features = blk: {
.ci_insts,
.dpp,
.ds_src2_insts,
.extended_image_insts,
.fast_denormal_f32,
.flat_address_space,
.fp64,
@ -892,10 +992,12 @@ pub const cpu = struct {
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.negative_unaligned_scratch_offset_bug,
.nsa_clause_bug,
.nsa_encoding,
.nsa_max_size_5,
.nsa_to_vmem_bug,
.offset_3f_bug,
.s_memtime_inst,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
@ -916,6 +1018,7 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.ds_src2_insts,
.flat_segment_offset_bug,
.get_wave_id_inst,
@ -925,10 +1028,12 @@ pub const cpu = struct {
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.negative_unaligned_scratch_offset_bug,
.nsa_clause_bug,
.nsa_encoding,
.nsa_max_size_5,
.nsa_to_vmem_bug,
.offset_3f_bug,
.s_memtime_inst,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
@ -949,6 +1054,7 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.ds_src2_insts,
.flat_segment_offset_bug,
.get_wave_id_inst,
@ -958,10 +1064,44 @@ pub const cpu = struct {
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.negative_unaligned_scratch_offset_bug,
.nsa_clause_bug,
.nsa_encoding,
.nsa_max_size_5,
.nsa_to_vmem_bug,
.offset_3f_bug,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
.smem_to_vector_write_hazard,
.vcmpx_exec_war_hazard,
.vcmpx_permlane_hazard,
.vmem_to_scalar_write_hazard,
.wavefrontsize32,
.xnack_support,
}),
};
pub const gfx1013 = CpuModel{
.name = "gfx1013",
.llvm_name = "gfx1013",
.features = featureSet(&[_]Feature{
.dl_insts,
.ds_src2_insts,
.flat_segment_offset_bug,
.get_wave_id_inst,
.gfx10,
.gfx10_a_encoding,
.inst_fwd_prefetch_bug,
.lds_branch_vmem_war_hazard,
.lds_misaligned_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.negative_unaligned_scratch_offset_bug,
.nsa_clause_bug,
.nsa_encoding,
.nsa_max_size_5,
.nsa_to_vmem_bug,
.offset_3f_bug,
.s_memtime_inst,
.scalar_atomics,
.scalar_flat_scratch_insts,
.scalar_stores,
@ -982,11 +1122,15 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.nsa_max_size_13,
.shader_cycles_register,
.wavefrontsize32,
}),
};
@ -999,11 +1143,15 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.nsa_max_size_13,
.shader_cycles_register,
.wavefrontsize32,
}),
};
@ -1016,11 +1164,15 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.nsa_max_size_13,
.shader_cycles_register,
.wavefrontsize32,
}),
};
@ -1033,11 +1185,57 @@ pub const cpu = struct {
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.nsa_max_size_13,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1034 = CpuModel{
.name = "gfx1034",
.llvm_name = "gfx1034",
.features = featureSet(&[_]Feature{
.dl_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.nsa_max_size_13,
.shader_cycles_register,
.wavefrontsize32,
}),
};
pub const gfx1035 = CpuModel{
.name = "gfx1035",
.llvm_name = "gfx1035",
.features = featureSet(&[_]Feature{
.dl_insts,
.dot1_insts,
.dot2_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.gfx10,
.gfx10_3_insts,
.gfx10_a_encoding,
.gfx10_b_encoding,
.ldsbankcount32,
.nsa_encoding,
.nsa_max_size_13,
.shader_cycles_register,
.wavefrontsize32,
}),
};
@ -1171,9 +1369,12 @@ pub const cpu = struct {
.name = "gfx900",
.llvm_name = "gfx900",
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.mad_mix_insts,
}),
};
@ -1181,9 +1382,12 @@ pub const cpu = struct {
.name = "gfx902",
.llvm_name = "gfx902",
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.mad_mix_insts,
}),
};
@ -1191,10 +1395,13 @@ pub const cpu = struct {
.name = "gfx904",
.llvm_name = "gfx904",
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.fma_mix_insts,
.gfx9,
.image_gather4_d16_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
}),
};
pub const gfx906 = CpuModel{
@ -1204,11 +1411,15 @@ pub const cpu = struct {
.dl_insts,
.dot1_insts,
.dot2_insts,
.dot7_insts,
.ds_src2_insts,
.extended_image_insts,
.fma_mix_insts,
.gfx9,
.half_rate_64_ops,
.image_gather4_d16_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.sramecc_support,
}),
};
@ -1224,11 +1435,15 @@ pub const cpu = struct {
.dot4_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.ds_src2_insts,
.extended_image_insts,
.fma_mix_insts,
.gfx9,
.half_rate_64_ops,
.image_gather4_d16_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.mai_insts,
.mfma_inline_literal_bug,
.pk_fmac_f16_inst,
@ -1239,21 +1454,53 @@ pub const cpu = struct {
.name = "gfx909",
.llvm_name = "gfx909",
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.mad_mix_insts,
}),
};
pub const gfx90a = CpuModel{
.name = "gfx90a",
.llvm_name = "gfx90a",
.features = featureSet(&[_]Feature{
.atomic_fadd_insts,
.dl_insts,
.dot1_insts,
.dot2_insts,
.dot3_insts,
.dot4_insts,
.dot5_insts,
.dot6_insts,
.dot7_insts,
.dpp_64bit,
.fma_mix_insts,
.full_rate_64_ops,
.gfx9,
.gfx90a_insts,
.ldsbankcount32,
.mad_mac_f32_insts,
.mai_insts,
.packed_fp32_ops,
.packed_tid,
.pk_fmac_f16_inst,
.sramecc_support,
}),
};
pub const gfx90c = CpuModel{
.name = "gfx90c",
.llvm_name = "gfx90c",
.features = featureSet(&[_]Feature{
.ds_src2_insts,
.extended_image_insts,
.gfx9,
.image_gather4_d16_bug,
.ldsbankcount32,
.mad_mac_f32_insts,
.mad_mix_insts,
.xnack,
}),
};
pub const hainan = CpuModel{

39
lib/std/target/arc.zig Normal file
View File

@ -0,0 +1,39 @@
//! This file is auto-generated by tools/update_cpu_features.zig.
const std = @import("../std.zig");
const CpuFeature = std.Target.Cpu.Feature;
const CpuModel = std.Target.Cpu.Model;
pub const Feature = enum {
norm,
};
pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet;
pub const featureSetHas = CpuFeature.feature_set_fns(Feature).featureSetHas;
pub const featureSetHasAny = CpuFeature.feature_set_fns(Feature).featureSetHasAny;
pub const featureSetHasAll = CpuFeature.feature_set_fns(Feature).featureSetHasAll;
pub const all_features = blk: {
const len = @typeInfo(Feature).Enum.fields.len;
std.debug.assert(len <= CpuFeature.Set.needed_bit_count);
var result: [len]CpuFeature = undefined;
result[@enumToInt(Feature.norm)] = .{
.llvm_name = "norm",
.description = "Enable support for norm instruction.",
.dependencies = featureSet(&[_]Feature{}),
};
const ti = @typeInfo(Feature);
for (result) |*elem, i| {
elem.index = i;
elem.name = ti.Enum.fields[i].name;
}
break :blk result;
};
pub const cpu = struct {
pub const generic = CpuModel{
.name = "generic",
.llvm_name = "generic",
.features = featureSet(&[_]Feature{}),
};
};

View File

@ -51,6 +51,7 @@ pub const Feature = enum {
fuse_aes,
fuse_literals,
harden_sls_blr,
harden_sls_nocomdat,
harden_sls_retbr,
has_v4t,
has_v5t,
@ -487,6 +488,11 @@ pub const all_features = blk: {
.description = "Harden against straight line speculation across indirect calls",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.harden_sls_nocomdat)] = .{
.llvm_name = "harden-sls-nocomdat",
.description = "Generate thunk code for SLS mitigation in the normal text section",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.harden_sls_retbr)] = .{
.llvm_name = "harden-sls-retbr",
.description = "Harden against straight line speculation across RETurn and BranchRegister instructions",
@ -1245,7 +1251,7 @@ pub const all_features = blk: {
};
result[@enumToInt(Feature.v8_7a)] = .{
.llvm_name = "armv8.7-a",
.description = "ARMv86a architecture",
.description = "ARMv87a architecture",
.dependencies = featureSet(&[_]Feature{
.aclass,
.crc,
@ -1517,13 +1523,6 @@ pub const cpu = struct {
.vfp2,
}),
};
pub const arm1176j_s = CpuModel{
.name = "arm1176j_s",
.llvm_name = "arm1176j-s",
.features = featureSet(&[_]Feature{
.v6kz,
}),
};
pub const arm1176jz_s = CpuModel{
.name = "arm1176jz_s",
.llvm_name = "arm1176jz-s",
@ -1889,6 +1888,7 @@ pub const cpu = struct {
.name = "cortex_m0",
.llvm_name = "cortex-m0",
.features = featureSet(&[_]Feature{
.no_branch_predictor,
.v6m,
}),
};
@ -1896,6 +1896,7 @@ pub const cpu = struct {
.name = "cortex_m0plus",
.llvm_name = "cortex-m0plus",
.features = featureSet(&[_]Feature{
.no_branch_predictor,
.v6m,
}),
};
@ -1903,6 +1904,7 @@ pub const cpu = struct {
.name = "cortex_m1",
.llvm_name = "cortex-m1",
.features = featureSet(&[_]Feature{
.no_branch_predictor,
.v6m,
}),
};
@ -1910,6 +1912,7 @@ pub const cpu = struct {
.name = "cortex_m23",
.llvm_name = "cortex-m23",
.features = featureSet(&[_]Feature{
.no_branch_predictor,
.no_movt,
.v8m,
}),
@ -2225,6 +2228,7 @@ pub const cpu = struct {
.name = "sc000",
.llvm_name = "sc000",
.features = featureSet(&[_]Feature{
.no_branch_predictor,
.v6m,
}),
};

View File

@ -16,6 +16,7 @@ pub const Feature = enum {
hvxv65,
hvxv66,
hvxv67,
hvxv68,
long_calls,
mem_noshuf,
memops,
@ -35,6 +36,7 @@ pub const Feature = enum {
v65,
v66,
v67,
v68,
zreg,
};
@ -117,6 +119,13 @@ pub const all_features = blk: {
.hvxv66,
}),
};
result[@enumToInt(Feature.hvxv68)] = .{
.llvm_name = "hvxv68",
.description = "Hexagon HVX instructions",
.dependencies = featureSet(&[_]Feature{
.hvxv67,
}),
};
result[@enumToInt(Feature.long_calls)] = .{
.llvm_name = "long-calls",
.description = "Use constant-extended calls",
@ -216,6 +225,11 @@ pub const all_features = blk: {
.description = "Enable Hexagon V67 architecture",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.v68)] = .{
.llvm_name = "v68",
.description = "Enable Hexagon V68 architecture",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.zreg)] = .{
.llvm_name = "zreg",
.description = "Hexagon ZReg extension instructions",
@ -385,4 +399,25 @@ pub const cpu = struct {
.v67,
}),
};
pub const hexagonv68 = CpuModel{
.name = "hexagonv68",
.llvm_name = "hexagonv68",
.features = featureSet(&[_]Feature{
.compound,
.duplex,
.mem_noshuf,
.memops,
.nvj,
.nvs,
.small_data,
.v5,
.v55,
.v60,
.v62,
.v65,
.v66,
.v67,
.v68,
}),
};
};

View File

@ -17,6 +17,8 @@ pub const Feature = enum {
ptx64,
ptx65,
ptx70,
ptx71,
ptx72,
sm_20,
sm_21,
sm_30,
@ -33,6 +35,7 @@ pub const Feature = enum {
sm_72,
sm_75,
sm_80,
sm_86,
};
pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet;
@ -104,6 +107,16 @@ pub const all_features = blk: {
.description = "Use PTX version 7.0",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.ptx71)] = .{
.llvm_name = "ptx71",
.description = "Use PTX version 7.1",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.ptx72)] = .{
.llvm_name = "ptx72",
.description = "Use PTX version 7.2",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.sm_20)] = .{
.llvm_name = "sm_20",
.description = "Target SM 2.0",
@ -184,6 +197,11 @@ pub const all_features = blk: {
.description = "Target SM 8.0",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.sm_86)] = .{
.llvm_name = "sm_86",
.description = "Target SM 8.6",
.dependencies = featureSet(&[_]Feature{}),
};
const ti = @typeInfo(Feature);
for (result) |*elem, i| {
elem.index = i;
@ -317,4 +335,12 @@ pub const cpu = struct {
.sm_80,
}),
};
pub const sm_86 = CpuModel{
.name = "sm_86",
.llvm_name = "sm_86",
.features = featureSet(&[_]Feature{
.ptx71,
.sm_86,
}),
};
};

View File

@ -37,6 +37,7 @@ pub const Feature = enum {
htm,
icbt,
invariant_function_descriptors,
isa_v207_instructions,
isa_v30_instructions,
isa_v31_instructions,
isel,
@ -62,7 +63,10 @@ pub const Feature = enum {
ppc_prera_sched,
predictable_select_expensive,
prefix_instrs,
privileged,
quadword_atomics,
recipprec,
rop_protect,
secure_plt,
slow_popcntd,
spe,
@ -277,10 +281,17 @@ pub const all_features = blk: {
.description = "Assume function descriptors are invariant",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.isa_v207_instructions)] = .{
.llvm_name = "isa-v207-instructions",
.description = "Enable instructions in ISA 2.07.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.isa_v30_instructions)] = .{
.llvm_name = "isa-v30-instructions",
.description = "Enable instructions in ISA 3.0.",
.dependencies = featureSet(&[_]Feature{}),
.dependencies = featureSet(&[_]Feature{
.isa_v207_instructions,
}),
};
result[@enumToInt(Feature.isa_v31_instructions)] = .{
.llvm_name = "isa-v31-instructions",
@ -433,11 +444,26 @@ pub const all_features = blk: {
.power9_altivec,
}),
};
result[@enumToInt(Feature.privileged)] = .{
.llvm_name = "privileged",
.description = "Add privileged instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.quadword_atomics)] = .{
.llvm_name = "quadword-atomics",
.description = "Enable lqarx and stqcx.",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.recipprec)] = .{
.llvm_name = "recipprec",
.description = "Assume higher precision reciprocal estimates",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.rop_protect)] = .{
.llvm_name = "rop-protect",
.description = "Add ROP protect",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.secure_plt)] = .{
.llvm_name = "secure-plt",
.description = "Enable secure plt mode",
@ -696,6 +722,7 @@ pub const cpu = struct {
.ppc_postra_sched,
.ppc_prera_sched,
.predictable_select_expensive,
.quadword_atomics,
.recipprec,
.stfiwx,
.two_const_nr,
@ -790,6 +817,7 @@ pub const cpu = struct {
.fuse_addis_load,
.htm,
.icbt,
.isa_v207_instructions,
.isel,
.ldbrx,
.lfiwax,
@ -798,6 +826,7 @@ pub const cpu = struct {
.popcntd,
.power8_vector,
.predictable_select_expensive,
.quadword_atomics,
.recipprec,
.stfiwx,
.two_const_nr,
@ -837,6 +866,7 @@ pub const cpu = struct {
.ppc_postra_sched,
.ppc_prera_sched,
.predictable_select_expensive,
.quadword_atomics,
.recipprec,
.stfiwx,
.two_const_nr,
@ -989,6 +1019,7 @@ pub const cpu = struct {
.fuse_addis_load,
.htm,
.icbt,
.isa_v207_instructions,
.isel,
.ldbrx,
.lfiwax,
@ -997,6 +1028,7 @@ pub const cpu = struct {
.popcntd,
.power8_vector,
.predictable_select_expensive,
.quadword_atomics,
.recipprec,
.stfiwx,
.two_const_nr,
@ -1033,6 +1065,7 @@ pub const cpu = struct {
.ppc_postra_sched,
.ppc_prera_sched,
.predictable_select_expensive,
.quadword_atomics,
.recipprec,
.stfiwx,
.two_const_nr,

View File

@ -185,7 +185,7 @@ pub const all_features = blk: {
};
result[@enumToInt(Feature.experimental_zvamo)] = .{
.llvm_name = "experimental-zvamo",
.description = "'Zvamo'(Vector AMO Operations)",
.description = "'Zvamo' (Vector AMO Operations)",
.dependencies = featureSet(&[_]Feature{
.experimental_v,
}),

View File

@ -5,6 +5,7 @@ const CpuFeature = std.Target.Cpu.Feature;
const CpuModel = std.Target.Cpu.Model;
pub const Feature = enum {
bear_enhancement,
deflate_conversion,
dfp_packed_conversion,
dfp_zoned_conversion,
@ -31,8 +32,11 @@ pub const Feature = enum {
miscellaneous_extensions,
miscellaneous_extensions_2,
miscellaneous_extensions_3,
nnp_assist,
population_count,
processor_activity_instrumentation,
processor_assist,
reset_dat_protection,
reset_reference_bits_multiple,
soft_float,
transactional_execution,
@ -41,6 +45,7 @@ pub const Feature = enum {
vector_enhancements_2,
vector_packed_decimal,
vector_packed_decimal_enhancement,
vector_packed_decimal_enhancement_2,
};
pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet;
@ -52,6 +57,11 @@ pub const all_features = blk: {
const len = @typeInfo(Feature).Enum.fields.len;
std.debug.assert(len <= CpuFeature.Set.needed_bit_count);
var result: [len]CpuFeature = undefined;
result[@enumToInt(Feature.bear_enhancement)] = .{
.llvm_name = "bear-enhancement",
.description = "Assume that the BEAR-enhancement facility is installed",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.deflate_conversion)] = .{
.llvm_name = "deflate-conversion",
.description = "Assume that the deflate-conversion facility is installed",
@ -182,16 +192,31 @@ pub const all_features = blk: {
.description = "Assume that the miscellaneous-extensions facility 3 is installed",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.nnp_assist)] = .{
.llvm_name = "nnp-assist",
.description = "Assume that the NNP-assist facility is installed",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.population_count)] = .{
.llvm_name = "population-count",
.description = "Assume that the population-count facility is installed",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.processor_activity_instrumentation)] = .{
.llvm_name = "processor-activity-instrumentation",
.description = "Assume that the processor-activity-instrumentation facility is installed",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.processor_assist)] = .{
.llvm_name = "processor-assist",
.description = "Assume that the processor-assist facility is installed",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.reset_dat_protection)] = .{
.llvm_name = "reset-dat-protection",
.description = "Assume that the reset-DAT-protection facility is installed",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.reset_reference_bits_multiple)] = .{
.llvm_name = "reset-reference-bits-multiple",
.description = "Assume that the reset-reference-bits-multiple facility is installed",
@ -232,6 +257,11 @@ pub const all_features = blk: {
.description = "Assume that the vector packed decimal enhancement facility is installed",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.vector_packed_decimal_enhancement_2)] = .{
.llvm_name = "vector-packed-decimal-enhancement-2",
.description = "Assume that the vector packed decimal enhancement facility 2 is installed",
.dependencies = featureSet(&[_]Feature{}),
};
const ti = @typeInfo(Feature);
for (result) |*elem, i| {
elem.index = i;
@ -368,6 +398,52 @@ pub const cpu = struct {
.vector_packed_decimal_enhancement,
}),
};
pub const arch14 = CpuModel{
.name = "arch14",
.llvm_name = "arch14",
.features = featureSet(&[_]Feature{
.bear_enhancement,
.deflate_conversion,
.dfp_packed_conversion,
.dfp_zoned_conversion,
.distinct_ops,
.enhanced_dat_2,
.enhanced_sort,
.execution_hint,
.fast_serialization,
.fp_extension,
.guarded_storage,
.high_word,
.insert_reference_bits_multiple,
.interlocked_access1,
.load_and_trap,
.load_and_zero_rightmost_byte,
.load_store_on_cond,
.load_store_on_cond_2,
.message_security_assist_extension3,
.message_security_assist_extension4,
.message_security_assist_extension5,
.message_security_assist_extension7,
.message_security_assist_extension8,
.message_security_assist_extension9,
.miscellaneous_extensions,
.miscellaneous_extensions_2,
.miscellaneous_extensions_3,
.nnp_assist,
.population_count,
.processor_activity_instrumentation,
.processor_assist,
.reset_dat_protection,
.reset_reference_bits_multiple,
.transactional_execution,
.vector,
.vector_enhancements_1,
.vector_enhancements_2,
.vector_packed_decimal,
.vector_packed_decimal_enhancement,
.vector_packed_decimal_enhancement_2,
}),
};
pub const arch8 = CpuModel{
.name = "arch8",
.llvm_name = "arch8",

View File

@ -15,7 +15,6 @@ pub const Feature = enum {
sign_ext,
simd128,
tail_call,
unimplemented_simd128,
};
pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet;
@ -77,13 +76,6 @@ pub const all_features = blk: {
.description = "Enable tail call instructions",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.unimplemented_simd128)] = .{
.llvm_name = "unimplemented-simd128",
.description = "Enable 128-bit SIMD not yet implemented in engines",
.dependencies = featureSet(&[_]Feature{
.simd128,
}),
};
const ti = @typeInfo(Feature);
for (result) |*elem, i| {
elem.index = i;

View File

@ -55,10 +55,12 @@ pub const Feature = enum {
fast_gather,
fast_hops,
fast_lzcnt,
fast_movbe,
fast_scalar_fsqrt,
fast_scalar_shift_masks,
fast_shld_rotate,
fast_variable_shuffle,
fast_variable_crosslane_shuffle,
fast_variable_perlane_shuffle,
fast_vector_fsqrt,
fast_vector_shift_masks,
fma,
@ -458,6 +460,11 @@ pub const all_features = blk: {
.description = "LZCNT instructions are as fast as most simple integer ops",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.fast_movbe)] = .{
.llvm_name = "fast-movbe",
.description = "Prefer a movbe over a single-use load + bswap / single-use bswap + store",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.fast_scalar_fsqrt)] = .{
.llvm_name = "fast-scalar-fsqrt",
.description = "Scalar SQRT is fast (disable Newton-Raphson)",
@ -473,9 +480,14 @@ pub const all_features = blk: {
.description = "SHLD can be used as a faster rotate",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.fast_variable_shuffle)] = .{
.llvm_name = "fast-variable-shuffle",
.description = "Shuffles with variable masks are fast",
result[@enumToInt(Feature.fast_variable_crosslane_shuffle)] = .{
.llvm_name = "fast-variable-crosslane-shuffle",
.description = "Cross-lane shuffles with variable masks are fast",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.fast_variable_perlane_shuffle)] = .{
.llvm_name = "fast-variable-perlane-shuffle",
.description = "Per-lane shuffles with variable masks are fast",
.dependencies = featureSet(&[_]Feature{}),
};
result[@enumToInt(Feature.fast_vector_fsqrt)] = .{
@ -1010,26 +1022,27 @@ pub const cpu = struct {
.features = featureSet(&[_]Feature{
.@"64bit",
.adx,
.aes,
.avxvnni,
.bmi,
.bmi2,
.cldemote,
.clflushopt,
.clwb,
.cmov,
.cx16,
.ermsb,
.f16c,
.false_deps_popcnt,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fma,
.fsgsbase,
.fxsr,
.gfni,
.hreset,
.idivq_to_divl,
.invpcid,
@ -1037,19 +1050,27 @@ pub const cpu = struct {
.macrofusion,
.mmx,
.movbe,
.movdir64b,
.movdiri,
.nopl,
.pclmul,
.pconfig,
.pku,
.popcnt,
.prfchw,
.ptwrite,
.rdpid,
.rdrnd,
.rdseed,
.sahf,
.serialize,
.sgx,
.sha,
.shstk,
.slow_3ops_lea,
.vaes,
.vpclmulqdq,
.vzeroupper,
.waitpkg,
.widekl,
.x87,
.xsavec,
.xsaveopt,
@ -1292,6 +1313,7 @@ pub const cpu = struct {
.f16c,
.fast_11bytenop,
.fast_bextr,
.fast_movbe,
.fast_scalar_shift_masks,
.fma,
.fxsr,
@ -1324,6 +1346,7 @@ pub const cpu = struct {
.f16c,
.fast_11bytenop,
.fast_bextr,
.fast_movbe,
.fast_scalar_shift_masks,
.fma,
.fsgsbase,
@ -1359,6 +1382,7 @@ pub const cpu = struct {
.f16c,
.fast_11bytenop,
.fast_bextr,
.fast_movbe,
.fast_scalar_shift_masks,
.fma,
.fsgsbase,
@ -1424,7 +1448,8 @@ pub const cpu = struct {
.fast_15bytenop,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fma,
.fsgsbase,
.fxsr,
@ -1485,6 +1510,7 @@ pub const cpu = struct {
.fast_bextr,
.fast_hops,
.fast_lzcnt,
.fast_movbe,
.fast_scalar_shift_masks,
.fast_vector_shift_masks,
.fxsr,
@ -1548,7 +1574,8 @@ pub const cpu = struct {
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fsgsbase,
.fxsr,
@ -1567,7 +1594,6 @@ pub const cpu = struct {
.rdrnd,
.rdseed,
.sahf,
.sgx,
.sha,
.slow_3ops_lea,
.vzeroupper,
@ -1601,7 +1627,8 @@ pub const cpu = struct {
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fsgsbase,
.fxsr,
@ -1652,7 +1679,8 @@ pub const cpu = struct {
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fsgsbase,
.fxsr,
@ -1714,7 +1742,8 @@ pub const cpu = struct {
.fast_15bytenop,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fma,
.fsgsbase,
.fxsr,
@ -1844,6 +1873,7 @@ pub const cpu = struct {
.cmov,
.cx16,
.false_deps_popcnt,
.fast_movbe,
.fsgsbase,
.fxsr,
.mmx,
@ -1877,6 +1907,7 @@ pub const cpu = struct {
.clflushopt,
.cmov,
.cx16,
.fast_movbe,
.fsgsbase,
.fxsr,
.mmx,
@ -1890,7 +1921,6 @@ pub const cpu = struct {
.rdrnd,
.rdseed,
.sahf,
.sgx,
.sha,
.slow_incdec,
.slow_lea,
@ -1921,7 +1951,8 @@ pub const cpu = struct {
.fast_15bytenop,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fma,
.fsgsbase,
.fxsr,
@ -1960,7 +1991,6 @@ pub const cpu = struct {
.bmi,
.bmi2,
.clflushopt,
.clwb,
.cmov,
.cx16,
.ermsb,
@ -1968,7 +1998,8 @@ pub const cpu = struct {
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fsgsbase,
.fsrm,
@ -1989,7 +2020,6 @@ pub const cpu = struct {
.rdrnd,
.rdseed,
.sahf,
.sgx,
.sha,
.slow_3ops_lea,
.vaes,
@ -2027,7 +2057,8 @@ pub const cpu = struct {
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fsgsbase,
.fsrm,
@ -2049,7 +2080,6 @@ pub const cpu = struct {
.rdrnd,
.rdseed,
.sahf,
.sgx,
.sha,
.slow_3ops_lea,
.vaes,
@ -2175,6 +2205,7 @@ pub const cpu = struct {
.cmov,
.cx16,
.fast_gather,
.fast_movbe,
.fsgsbase,
.fxsr,
.idivq_to_divl,
@ -2214,6 +2245,7 @@ pub const cpu = struct {
.cmov,
.cx16,
.fast_gather,
.fast_movbe,
.fsgsbase,
.fxsr,
.idivq_to_divl,
@ -2472,6 +2504,64 @@ pub const cpu = struct {
.x87,
}),
};
pub const rocketlake = CpuModel{
.name = "rocketlake",
.llvm_name = "rocketlake",
.features = featureSet(&[_]Feature{
.@"64bit",
.adx,
.avx512bitalg,
.avx512cd,
.avx512dq,
.avx512ifma,
.avx512vbmi,
.avx512vbmi2,
.avx512vl,
.avx512vnni,
.avx512vpopcntdq,
.bmi,
.bmi2,
.clflushopt,
.cmov,
.cx16,
.ermsb,
.fast_15bytenop,
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fsgsbase,
.fsrm,
.fxsr,
.gfni,
.idivq_to_divl,
.invpcid,
.lzcnt,
.macrofusion,
.mmx,
.movbe,
.nopl,
.pku,
.popcnt,
.prefer_256_bit,
.prfchw,
.rdpid,
.rdrnd,
.rdseed,
.sahf,
.sha,
.slow_3ops_lea,
.vaes,
.vpclmulqdq,
.vzeroupper,
.x87,
.xsavec,
.xsaveopt,
.xsaves,
}),
};
pub const sandybridge = CpuModel{
.name = "sandybridge",
.llvm_name = "sandybridge",
@ -2532,7 +2622,8 @@ pub const cpu = struct {
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fsgsbase,
.fsrm,
@ -2558,7 +2649,6 @@ pub const cpu = struct {
.rdseed,
.sahf,
.serialize,
.sgx,
.sha,
.shstk,
.slow_3ops_lea,
@ -2584,6 +2674,7 @@ pub const cpu = struct {
.cx16,
.false_deps_popcnt,
.fast_7bytenop,
.fast_movbe,
.fxsr,
.idivq_to_divl,
.mmx,
@ -2626,7 +2717,8 @@ pub const cpu = struct {
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fsgsbase,
.fxsr,
@ -2673,7 +2765,8 @@ pub const cpu = struct {
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fma,
.fsgsbase,
@ -2691,7 +2784,6 @@ pub const cpu = struct {
.rdrnd,
.rdseed,
.sahf,
.sgx,
.slow_3ops_lea,
.vzeroupper,
.x87,
@ -2723,7 +2815,8 @@ pub const cpu = struct {
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fsgsbase,
.fxsr,
@ -2759,6 +2852,7 @@ pub const cpu = struct {
.cx16,
.false_deps_popcnt,
.fast_7bytenop,
.fast_movbe,
.fxsr,
.idivq_to_divl,
.mmx,
@ -2805,7 +2899,8 @@ pub const cpu = struct {
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fsgsbase,
.fsrm,
@ -2828,7 +2923,6 @@ pub const cpu = struct {
.rdrnd,
.rdseed,
.sahf,
.sgx,
.sha,
.shstk,
.slow_3ops_lea,
@ -2851,6 +2945,7 @@ pub const cpu = struct {
.clwb,
.cmov,
.cx16,
.fast_movbe,
.fsgsbase,
.fxsr,
.gfni,
@ -2865,7 +2960,6 @@ pub const cpu = struct {
.rdrnd,
.rdseed,
.sahf,
.sgx,
.sha,
.slow_incdec,
.slow_lea,
@ -2978,7 +3072,8 @@ pub const cpu = struct {
.fast_15bytenop,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fma,
.fxsr,
.idivq_to_divl,
@ -3013,7 +3108,8 @@ pub const cpu = struct {
.fast_gather,
.fast_scalar_fsqrt,
.fast_shld_rotate,
.fast_variable_shuffle,
.fast_variable_crosslane_shuffle,
.fast_variable_perlane_shuffle,
.fast_vector_fsqrt,
.fxsr,
.idivq_to_divl,
@ -3065,6 +3161,7 @@ pub const cpu = struct {
.fast_15bytenop,
.fast_bextr,
.fast_lzcnt,
.fast_movbe,
.fast_scalar_shift_masks,
.fma,
.fsgsbase,
@ -3110,6 +3207,7 @@ pub const cpu = struct {
.fast_15bytenop,
.fast_bextr,
.fast_lzcnt,
.fast_movbe,
.fast_scalar_shift_masks,
.fma,
.fsgsbase,
@ -3156,13 +3254,16 @@ pub const cpu = struct {
.fast_15bytenop,
.fast_bextr,
.fast_lzcnt,
.fast_movbe,
.fast_scalar_shift_masks,
.fast_variable_perlane_shuffle,
.fma,
.fsgsbase,
.fsrm,
.fxsr,
.invpcid,
.lzcnt,
.macrofusion,
.mmx,
.movbe,
.mwaitx,

View File

@ -219,7 +219,6 @@ const llvm_targets = [_]LlvmTarget{
"use_postra_scheduler",
"use_reciprocal_square_root",
"v8a",
"zcz_fp",
},
},
.{
@ -236,7 +235,6 @@ const llvm_targets = [_]LlvmTarget{
"slow_paired_128",
"use_postra_scheduler",
"v8a",
"zcz_fp",
},
},
.{