diff --git a/lib/std/target.zig b/lib/std/target.zig index 1b9f0084c8..69800c9f8b 100644 --- a/lib/std/target.zig +++ b/lib/std/target.zig @@ -430,6 +430,7 @@ pub const Target = struct { }; pub const aarch64 = @import("target/aarch64.zig"); + pub const arc = @import("target/arc.zig"); pub const amdgpu = @import("target/amdgpu.zig"); pub const arm = @import("target/arm.zig"); pub const avr = @import("target/avr.zig"); diff --git a/lib/std/target/aarch64.zig b/lib/std/target/aarch64.zig index 1073cf1cdb..53363ee982 100644 --- a/lib/std/target/aarch64.zig +++ b/lib/std/target/aarch64.zig @@ -65,6 +65,7 @@ pub const Feature = enum { fuse_csel, fuse_literals, harden_sls_blr, + harden_sls_nocomdat, harden_sls_retbr, hcx, i8mm, @@ -81,6 +82,7 @@ pub const Feature = enum { neoverse_n2, neoverse_v1, no_neg_immediates, + no_zcz_fp, nv, outline_atomics, pan, @@ -120,6 +122,7 @@ pub const Feature = enum { reserve_x6, reserve_x7, reserve_x9, + rme, sb, sel2, sha2, @@ -128,6 +131,9 @@ pub const Feature = enum { slow_paired_128, slow_strqro_store, sm4, + sme, + sme_f64, + sme_i64, spe, spe_eef, specrestrict, @@ -148,7 +154,6 @@ pub const Feature = enum { tracev8_4, trbe, uaops, - use_aa, use_experimental_zeroing_pseudos, use_postra_scheduler, use_reciprocal_square_root, @@ -166,7 +171,6 @@ pub const Feature = enum { xs, zcm, zcz, - zcz_fp, zcz_fp_workaround, zcz_gp, }; @@ -594,6 +598,11 @@ pub const all_features = blk: { .description = "Harden against straight line speculation across BLR instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.harden_sls_nocomdat)] = .{ + .llvm_name = "harden-sls-nocomdat", + .description = "Generate thunk code for SLS mitigation in the normal text section", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.harden_sls_retbr)] = .{ .llvm_name = "harden-sls-retbr", .description = "Harden against straight line speculation across RET and BR instructions", @@ -660,8 +669,10 @@ pub const all_features = blk: { .crypto, .dotprod, .fullfp16, + .fuse_aes, .rcpc, .ssbs, + .use_postra_scheduler, .v8_2a, }), }; @@ -672,9 +683,11 @@ pub const all_features = blk: { .crypto, .dotprod, .fullfp16, + .fuse_aes, .rcpc, .spe, .ssbs, + .use_postra_scheduler, .v8_2a, }), }; @@ -683,10 +696,13 @@ pub const all_features = blk: { .description = "Neoverse N2 ARM processors", .dependencies = featureSet(&[_]Feature{ .bf16, + .crypto, .ete, + .fuse_aes, .i8mm, .mte, .sve2_bitperm, + .use_postra_scheduler, .v8_5a, }), }; @@ -714,6 +730,11 @@ pub const all_features = blk: { .description = "Convert immediates and instructions to their negated or complemented equivalent when the immediate does not fit in the encoding.", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.no_zcz_fp)] = .{ + .llvm_name = "no-zcz-fp", + .description = "Has no zero-cycle zeroing instructions for FP registers", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.nv)] = .{ .llvm_name = "nv", .description = "Enable v8.4-A Nested Virtualization Enchancement", @@ -913,6 +934,11 @@ pub const all_features = blk: { .description = "Reserve X9, making it unavailable as a GPR", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.rme)] = .{ + .llvm_name = "rme", + .description = "Enable Realm Management Extension", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.sb)] = .{ .llvm_name = "sb", .description = "Enable v8.5 Speculation Barrier", @@ -959,6 +985,28 @@ pub const all_features = blk: { .neon, }), }; + result[@enumToInt(Feature.sme)] = .{ + .llvm_name = "sme", + .description = "Enable Scalable Matrix Extension (SME)", + .dependencies = featureSet(&[_]Feature{ + .bf16, + .sve2, + }), + }; + result[@enumToInt(Feature.sme_f64)] = .{ + .llvm_name = "sme-f64", + .description = "Enable Scalable Matrix Extension (SME) F64F64 instructions", + .dependencies = featureSet(&[_]Feature{ + .sme, + }), + }; + result[@enumToInt(Feature.sme_i64)] = .{ + .llvm_name = "sme-i64", + .description = "Enable Scalable Matrix Extension (SME) I16I64 instructions", + .dependencies = featureSet(&[_]Feature{ + .sme, + }), + }; result[@enumToInt(Feature.spe)] = .{ .llvm_name = "spe", .description = "Enable Statistical Profiling extension", @@ -1074,11 +1122,6 @@ pub const all_features = blk: { .description = "Enable v8.2 UAO PState", .dependencies = featureSet(&[_]Feature{}), }; - result[@enumToInt(Feature.use_aa)] = .{ - .llvm_name = "use-aa", - .description = "Use alias analysis during codegen", - .dependencies = featureSet(&[_]Feature{}), - }; result[@enumToInt(Feature.use_experimental_zeroing_pseudos)] = .{ .llvm_name = "use-experimental-zeroing-pseudos", .description = "Hint to the compiler that the MOVPRFX instruction is merged with destructive operations", @@ -1251,15 +1294,9 @@ pub const all_features = blk: { .llvm_name = "zcz", .description = "Has zero-cycle zeroing instructions", .dependencies = featureSet(&[_]Feature{ - .zcz_fp, .zcz_gp, }), }; - result[@enumToInt(Feature.zcz_fp)] = .{ - .llvm_name = "zcz-fp", - .description = "Has zero-cycle zeroing instructions for FP registers", - .dependencies = featureSet(&[_]Feature{}), - }; result[@enumToInt(Feature.zcz_fp_workaround)] = .{ .llvm_name = "zcz-fp-workaround", .description = "The zero-cycle floating-point zeroing instruction has a bug", @@ -1403,7 +1440,62 @@ pub const cpu = struct { .name = "apple_latest", .llvm_name = "apple-latest", .features = featureSet(&[_]Feature{ - .apple_a13, + .aggressive_fma, + .alternate_sextload_cvt_f32_pattern, + .altnzcv, + .arith_bcc_fusion, + .arith_cbz_fusion, + .ccdp, + .crypto, + .disable_latency_sched_heuristic, + .fp16fml, + .fptoint, + .fuse_address, + .fuse_aes, + .fuse_arith_logic, + .fuse_crypto_eor, + .fuse_csel, + .fuse_literals, + .perfmon, + .predres, + .sb, + .sha3, + .specrestrict, + .ssbs, + .v8_4a, + .zcm, + .zcz, + }), + }; + pub const apple_m1 = CpuModel{ + .name = "apple_m1", + .llvm_name = "apple-m1", + .features = featureSet(&[_]Feature{ + .aggressive_fma, + .alternate_sextload_cvt_f32_pattern, + .altnzcv, + .arith_bcc_fusion, + .arith_cbz_fusion, + .ccdp, + .crypto, + .disable_latency_sched_heuristic, + .fp16fml, + .fptoint, + .fuse_address, + .fuse_aes, + .fuse_arith_logic, + .fuse_crypto_eor, + .fuse_csel, + .fuse_literals, + .perfmon, + .predres, + .sb, + .sha3, + .specrestrict, + .ssbs, + .v8_4a, + .zcm, + .zcz, }), }; pub const apple_s4 = CpuModel{ @@ -1459,7 +1551,6 @@ pub const cpu = struct { .custom_cheap_as_move, .fuse_aes, .perfmon, - .use_aa, .use_postra_scheduler, .v8a, }), @@ -1474,6 +1565,7 @@ pub const cpu = struct { .fuse_aes, .perfmon, .rcpc, + .use_postra_scheduler, .v8_2a, }), }; @@ -1649,7 +1741,6 @@ pub const cpu = struct { .use_postra_scheduler, .use_reciprocal_square_root, .v8a, - .zcz_fp, }), }; pub const exynos_m2 = CpuModel{ @@ -1666,7 +1757,6 @@ pub const cpu = struct { .slow_paired_128, .use_postra_scheduler, .v8a, - .zcz_fp, }), }; pub const exynos_m3 = CpuModel{ @@ -1686,7 +1776,6 @@ pub const cpu = struct { .predictable_select_expensive, .use_postra_scheduler, .v8a, - .zcz_fp, }), }; pub const exynos_m4 = CpuModel{ @@ -1824,7 +1913,6 @@ pub const cpu = struct { .perfmon, .predictable_select_expensive, .strict_align, - .use_aa, .use_postra_scheduler, .v8_3a, }), diff --git a/lib/std/target/amdgpu.zig b/lib/std/target/amdgpu.zig index f8002071be..2fb8a6fa80 100644 --- a/lib/std/target/amdgpu.zig +++ b/lib/std/target/amdgpu.zig @@ -9,6 +9,7 @@ pub const Feature = enum { a16, add_no_carry_insts, aperture_regs, + architected_flat_scratch, atomic_fadd_insts, auto_waitcnt_before_barrier, ci_insts, @@ -20,11 +21,14 @@ pub const Feature = enum { dot4_insts, dot5_insts, dot6_insts, + dot7_insts, dpp, dpp8, + dpp_64bit, ds_src2_insts, enable_ds128, enable_prt_strict_null, + extended_image_insts, fast_denormal_f32, fast_fmaf, flat_address_space, @@ -36,16 +40,19 @@ pub const Feature = enum { fma_mix_insts, fmaf, fp64, + full_rate_64_ops, g16, gcn3_encoding, get_wave_id_inst, gfx10, gfx10_3_insts, + gfx10_a_encoding, gfx10_b_encoding, gfx10_insts, gfx7_gfx8_gfx9_insts, gfx8_insts, gfx9, + gfx90a_insts, gfx9_insts, half_rate_64_ops, image_gather4_d16_bug, @@ -70,11 +77,18 @@ pub const Feature = enum { mfma_inline_literal_bug, mimg_r128, movrel, + negative_scratch_offset_bug, + negative_unaligned_scratch_offset_bug, no_data_dep_hazard, no_sdst_cmpx, + nsa_clause_bug, nsa_encoding, + nsa_max_size_13, + nsa_max_size_5, nsa_to_vmem_bug, offset_3f_bug, + packed_fp32_ops, + packed_tid, pk_fmac_f16_inst, promote_alloca, r128_a16, @@ -92,11 +106,13 @@ pub const Feature = enum { sdwa_sdst, sea_islands, sgpr_init_bug, + shader_cycles_register, si_scheduler, smem_to_vector_write_hazard, southern_islands, sramecc, sramecc_support, + tgsplit, trap_handler, trig_reduced_range, unaligned_access_mode, @@ -149,6 +165,11 @@ pub const all_features = blk: { .description = "Has Memory Aperture Base and Size Registers", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.architected_flat_scratch)] = .{ + .llvm_name = "architected-flat-scratch", + .description = "Flat Scratch register is a readonly SPI initialized architected register", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.atomic_fadd_insts)] = .{ .llvm_name = "atomic-fadd-insts", .description = "Has buffer_atomic_add_f32, buffer_atomic_pk_add_f16, global_atomic_add_f32, global_atomic_pk_add_f16 instructions", @@ -183,7 +204,7 @@ pub const all_features = blk: { }; result[@enumToInt(Feature.dot2_insts)] = .{ .llvm_name = "dot2-insts", - .description = "Has v_dot2_f32_f16, v_dot2_i32_i16, v_dot2_u32_u16, v_dot4_u32_u8, v_dot8_u32_u4 instructions", + .description = "Has v_dot2_i32_i16, v_dot2_u32_u16 instructions", .dependencies = featureSet(&[_]Feature{}), }; result[@enumToInt(Feature.dot3_insts)] = .{ @@ -206,6 +227,11 @@ pub const all_features = blk: { .description = "Has v_dot4c_i32_i8 instruction", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.dot7_insts)] = .{ + .llvm_name = "dot7-insts", + .description = "Has v_dot2_f32_f16, v_dot4_u32_u8, v_dot8_u32_u4 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.dpp)] = .{ .llvm_name = "dpp", .description = "Support DPP (Data Parallel Primitives) extension", @@ -216,6 +242,11 @@ pub const all_features = blk: { .description = "Support DPP8 (Data Parallel Primitives) extension", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.dpp_64bit)] = .{ + .llvm_name = "dpp-64bit", + .description = "Support DPP (Data Parallel Primitives) extension", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.ds_src2_insts)] = .{ .llvm_name = "ds-src2-insts", .description = "Has ds_*_src2 instructions", @@ -231,6 +262,11 @@ pub const all_features = blk: { .description = "Enable zeroing of result registers for sparse texture fetches", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.extended_image_insts)] = .{ + .llvm_name = "extended-image-insts", + .description = "Support mips != 0, lod != 0, gather4, and get_lod", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.fast_denormal_f32)] = .{ .llvm_name = "fast-denormal-f32", .description = "Enabling denormals does not cause f32 instructions to run at f64 rates", @@ -268,7 +304,7 @@ pub const all_features = blk: { }; result[@enumToInt(Feature.flat_segment_offset_bug)] = .{ .llvm_name = "flat-segment-offset-bug", - .description = "GFX10 bug, inst_offset ignored in flat segment", + .description = "GFX10 bug where inst_offset is ignored when flat instructions access global memory", .dependencies = featureSet(&[_]Feature{}), }; result[@enumToInt(Feature.fma_mix_insts)] = .{ @@ -286,6 +322,11 @@ pub const all_features = blk: { .description = "Enable double precision operations", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.full_rate_64_ops)] = .{ + .llvm_name = "full-rate-64-ops", + .description = "Most fp64 instructions are full rate", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.g16)] = .{ .llvm_name = "g16", .description = "Support G16 for 16-bit gradient image operands", @@ -312,6 +353,7 @@ pub const all_features = blk: { .ci_insts, .dpp, .dpp8, + .extended_image_insts, .fast_denormal_f32, .fast_fmaf, .flat_address_space, @@ -334,6 +376,7 @@ pub const all_features = blk: { .pk_fmac_f16_inst, .register_banking, .s_memrealtime, + .s_memtime_inst, .sdwa, .sdwa_omod, .sdwa_scalar, @@ -350,6 +393,11 @@ pub const all_features = blk: { .description = "Additional instructions for GFX10.3", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.gfx10_a_encoding)] = .{ + .llvm_name = "gfx10_a-encoding", + .description = "Has BVH ray tracing instructions", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.gfx10_b_encoding)] = .{ .llvm_name = "gfx10_b-encoding", .description = "Encoding format GFX10_B", @@ -379,7 +427,6 @@ pub const all_features = blk: { .aperture_regs, .ci_insts, .dpp, - .ds_src2_insts, .fast_denormal_f32, .fast_fmaf, .flat_address_space, @@ -394,7 +441,7 @@ pub const all_features = blk: { .int_clamp_insts, .inv_2pi_inline_imm, .localmemorysize65536, - .mad_mac_f32_insts, + .negative_scratch_offset_bug, .r128_a16, .s_memrealtime, .s_memtime_inst, @@ -413,6 +460,11 @@ pub const all_features = blk: { .xnack_support, }), }; + result[@enumToInt(Feature.gfx90a_insts)] = .{ + .llvm_name = "gfx90a-insts", + .description = "Additional instructions for GFX90A+", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.gfx9_insts)] = .{ .llvm_name = "gfx9-insts", .description = "Additional instructions for GFX9+", @@ -533,6 +585,16 @@ pub const all_features = blk: { .description = "Has v_movrel*_b32 instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.negative_scratch_offset_bug)] = .{ + .llvm_name = "negative-scratch-offset-bug", + .description = "Negative immediate offsets in scratch instructions with an SGPR offset page fault on GFX9", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.negative_unaligned_scratch_offset_bug)] = .{ + .llvm_name = "negative-unaligned-scratch-offset-bug", + .description = "Scratch instructions with a VGPR offset and a negative immediate offset that is not a multiple of 4 read wrong memory on GFX10", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.no_data_dep_hazard)] = .{ .llvm_name = "no-data-dep-hazard", .description = "Does not need SW waitstates", @@ -543,11 +605,26 @@ pub const all_features = blk: { .description = "V_CMPX does not write VCC/SGPR in addition to EXEC", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.nsa_clause_bug)] = .{ + .llvm_name = "nsa-clause-bug", + .description = "MIMG-NSA in a hard clause has unpredictable results on GFX10.1", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.nsa_encoding)] = .{ .llvm_name = "nsa-encoding", .description = "Support NSA encoding for image instructions", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.nsa_max_size_13)] = .{ + .llvm_name = "nsa-max-size-13", + .description = "The maximum non-sequential address size in VGPRs.", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.nsa_max_size_5)] = .{ + .llvm_name = "nsa-max-size-5", + .description = "The maximum non-sequential address size in VGPRs.", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.nsa_to_vmem_bug)] = .{ .llvm_name = "nsa-to-vmem-bug", .description = "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero", @@ -558,6 +635,16 @@ pub const all_features = blk: { .description = "Branch offset of 3f hardware bug", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.packed_fp32_ops)] = .{ + .llvm_name = "packed-fp32-ops", + .description = "Support packed fp32 instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.packed_tid)] = .{ + .llvm_name = "packed-tid", + .description = "Workitem IDs are packed into v0 at kernel launch", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.pk_fmac_f16_inst)] = .{ .llvm_name = "pk-fmac-f16-inst", .description = "Has v_pk_fmac_f16 instruction", @@ -639,6 +726,7 @@ pub const all_features = blk: { .dependencies = featureSet(&[_]Feature{ .ci_insts, .ds_src2_insts, + .extended_image_insts, .flat_address_space, .fp64, .gfx7_gfx8_gfx9_insts, @@ -657,6 +745,11 @@ pub const all_features = blk: { .description = "VI SGPR initialization bug requiring a fixed SGPR allocation size", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.shader_cycles_register)] = .{ + .llvm_name = "shader-cycles-register", + .description = "Has SHADER_CYCLES hardware register", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.si_scheduler)] = .{ .llvm_name = "si-scheduler", .description = "Enable SI Machine Scheduler", @@ -672,6 +765,7 @@ pub const all_features = blk: { .description = "SOUTHERN_ISLANDS GPU generation", .dependencies = featureSet(&[_]Feature{ .ds_src2_insts, + .extended_image_insts, .fp64, .ldsbankcount32, .localmemorysize32768, @@ -693,6 +787,11 @@ pub const all_features = blk: { .description = "Hardware supports SRAMECC", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.tgsplit)] = .{ + .llvm_name = "tgsplit", + .description = "Enable threadgroup split execution", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.trap_handler)] = .{ .llvm_name = "trap-handler", .description = "Trap handler support", @@ -761,6 +860,7 @@ pub const all_features = blk: { .ci_insts, .dpp, .ds_src2_insts, + .extended_image_insts, .fast_denormal_f32, .flat_address_space, .fp64, @@ -892,10 +992,12 @@ pub const cpu = struct { .lds_misaligned_bug, .ldsbankcount32, .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, .nsa_encoding, + .nsa_max_size_5, .nsa_to_vmem_bug, .offset_3f_bug, - .s_memtime_inst, .scalar_atomics, .scalar_flat_scratch_insts, .scalar_stores, @@ -916,6 +1018,7 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, .ds_src2_insts, .flat_segment_offset_bug, .get_wave_id_inst, @@ -925,10 +1028,12 @@ pub const cpu = struct { .lds_misaligned_bug, .ldsbankcount32, .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, .nsa_encoding, + .nsa_max_size_5, .nsa_to_vmem_bug, .offset_3f_bug, - .s_memtime_inst, .scalar_atomics, .scalar_flat_scratch_insts, .scalar_stores, @@ -949,6 +1054,7 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, .ds_src2_insts, .flat_segment_offset_bug, .get_wave_id_inst, @@ -958,10 +1064,44 @@ pub const cpu = struct { .lds_misaligned_bug, .ldsbankcount32, .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, .nsa_encoding, + .nsa_max_size_5, + .nsa_to_vmem_bug, + .offset_3f_bug, + .scalar_atomics, + .scalar_flat_scratch_insts, + .scalar_stores, + .smem_to_vector_write_hazard, + .vcmpx_exec_war_hazard, + .vcmpx_permlane_hazard, + .vmem_to_scalar_write_hazard, + .wavefrontsize32, + .xnack_support, + }), + }; + pub const gfx1013 = CpuModel{ + .name = "gfx1013", + .llvm_name = "gfx1013", + .features = featureSet(&[_]Feature{ + .dl_insts, + .ds_src2_insts, + .flat_segment_offset_bug, + .get_wave_id_inst, + .gfx10, + .gfx10_a_encoding, + .inst_fwd_prefetch_bug, + .lds_branch_vmem_war_hazard, + .lds_misaligned_bug, + .ldsbankcount32, + .mad_mac_f32_insts, + .negative_unaligned_scratch_offset_bug, + .nsa_clause_bug, + .nsa_encoding, + .nsa_max_size_5, .nsa_to_vmem_bug, .offset_3f_bug, - .s_memtime_inst, .scalar_atomics, .scalar_flat_scratch_insts, .scalar_stores, @@ -982,11 +1122,15 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, .gfx10, .gfx10_3_insts, + .gfx10_a_encoding, .gfx10_b_encoding, .ldsbankcount32, .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, .wavefrontsize32, }), }; @@ -999,11 +1143,15 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, .gfx10, .gfx10_3_insts, + .gfx10_a_encoding, .gfx10_b_encoding, .ldsbankcount32, .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, .wavefrontsize32, }), }; @@ -1016,11 +1164,15 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, .gfx10, .gfx10_3_insts, + .gfx10_a_encoding, .gfx10_b_encoding, .ldsbankcount32, .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, .wavefrontsize32, }), }; @@ -1033,11 +1185,57 @@ pub const cpu = struct { .dot2_insts, .dot5_insts, .dot6_insts, + .dot7_insts, .gfx10, .gfx10_3_insts, + .gfx10_a_encoding, .gfx10_b_encoding, .ldsbankcount32, .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, + .wavefrontsize32, + }), + }; + pub const gfx1034 = CpuModel{ + .name = "gfx1034", + .llvm_name = "gfx1034", + .features = featureSet(&[_]Feature{ + .dl_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .gfx10, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .ldsbankcount32, + .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, + .wavefrontsize32, + }), + }; + pub const gfx1035 = CpuModel{ + .name = "gfx1035", + .llvm_name = "gfx1035", + .features = featureSet(&[_]Feature{ + .dl_insts, + .dot1_insts, + .dot2_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .gfx10, + .gfx10_3_insts, + .gfx10_a_encoding, + .gfx10_b_encoding, + .ldsbankcount32, + .nsa_encoding, + .nsa_max_size_13, + .shader_cycles_register, .wavefrontsize32, }), }; @@ -1171,9 +1369,12 @@ pub const cpu = struct { .name = "gfx900", .llvm_name = "gfx900", .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, .gfx9, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .mad_mix_insts, }), }; @@ -1181,9 +1382,12 @@ pub const cpu = struct { .name = "gfx902", .llvm_name = "gfx902", .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, .gfx9, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .mad_mix_insts, }), }; @@ -1191,10 +1395,13 @@ pub const cpu = struct { .name = "gfx904", .llvm_name = "gfx904", .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, .fma_mix_insts, .gfx9, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, }), }; pub const gfx906 = CpuModel{ @@ -1204,11 +1411,15 @@ pub const cpu = struct { .dl_insts, .dot1_insts, .dot2_insts, + .dot7_insts, + .ds_src2_insts, + .extended_image_insts, .fma_mix_insts, .gfx9, .half_rate_64_ops, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .sramecc_support, }), }; @@ -1224,11 +1435,15 @@ pub const cpu = struct { .dot4_insts, .dot5_insts, .dot6_insts, + .dot7_insts, + .ds_src2_insts, + .extended_image_insts, .fma_mix_insts, .gfx9, .half_rate_64_ops, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .mai_insts, .mfma_inline_literal_bug, .pk_fmac_f16_inst, @@ -1239,21 +1454,53 @@ pub const cpu = struct { .name = "gfx909", .llvm_name = "gfx909", .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, .gfx9, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .mad_mix_insts, }), }; + pub const gfx90a = CpuModel{ + .name = "gfx90a", + .llvm_name = "gfx90a", + .features = featureSet(&[_]Feature{ + .atomic_fadd_insts, + .dl_insts, + .dot1_insts, + .dot2_insts, + .dot3_insts, + .dot4_insts, + .dot5_insts, + .dot6_insts, + .dot7_insts, + .dpp_64bit, + .fma_mix_insts, + .full_rate_64_ops, + .gfx9, + .gfx90a_insts, + .ldsbankcount32, + .mad_mac_f32_insts, + .mai_insts, + .packed_fp32_ops, + .packed_tid, + .pk_fmac_f16_inst, + .sramecc_support, + }), + }; pub const gfx90c = CpuModel{ .name = "gfx90c", .llvm_name = "gfx90c", .features = featureSet(&[_]Feature{ + .ds_src2_insts, + .extended_image_insts, .gfx9, .image_gather4_d16_bug, .ldsbankcount32, + .mad_mac_f32_insts, .mad_mix_insts, - .xnack, }), }; pub const hainan = CpuModel{ diff --git a/lib/std/target/arc.zig b/lib/std/target/arc.zig new file mode 100644 index 0000000000..822104b466 --- /dev/null +++ b/lib/std/target/arc.zig @@ -0,0 +1,39 @@ +//! This file is auto-generated by tools/update_cpu_features.zig. + +const std = @import("../std.zig"); +const CpuFeature = std.Target.Cpu.Feature; +const CpuModel = std.Target.Cpu.Model; + +pub const Feature = enum { + norm, +}; + +pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet; +pub const featureSetHas = CpuFeature.feature_set_fns(Feature).featureSetHas; +pub const featureSetHasAny = CpuFeature.feature_set_fns(Feature).featureSetHasAny; +pub const featureSetHasAll = CpuFeature.feature_set_fns(Feature).featureSetHasAll; + +pub const all_features = blk: { + const len = @typeInfo(Feature).Enum.fields.len; + std.debug.assert(len <= CpuFeature.Set.needed_bit_count); + var result: [len]CpuFeature = undefined; + result[@enumToInt(Feature.norm)] = .{ + .llvm_name = "norm", + .description = "Enable support for norm instruction.", + .dependencies = featureSet(&[_]Feature{}), + }; + const ti = @typeInfo(Feature); + for (result) |*elem, i| { + elem.index = i; + elem.name = ti.Enum.fields[i].name; + } + break :blk result; +}; + +pub const cpu = struct { + pub const generic = CpuModel{ + .name = "generic", + .llvm_name = "generic", + .features = featureSet(&[_]Feature{}), + }; +}; diff --git a/lib/std/target/arm.zig b/lib/std/target/arm.zig index 394ba05638..42e5f3bfdb 100644 --- a/lib/std/target/arm.zig +++ b/lib/std/target/arm.zig @@ -51,6 +51,7 @@ pub const Feature = enum { fuse_aes, fuse_literals, harden_sls_blr, + harden_sls_nocomdat, harden_sls_retbr, has_v4t, has_v5t, @@ -487,6 +488,11 @@ pub const all_features = blk: { .description = "Harden against straight line speculation across indirect calls", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.harden_sls_nocomdat)] = .{ + .llvm_name = "harden-sls-nocomdat", + .description = "Generate thunk code for SLS mitigation in the normal text section", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.harden_sls_retbr)] = .{ .llvm_name = "harden-sls-retbr", .description = "Harden against straight line speculation across RETurn and BranchRegister instructions", @@ -1245,7 +1251,7 @@ pub const all_features = blk: { }; result[@enumToInt(Feature.v8_7a)] = .{ .llvm_name = "armv8.7-a", - .description = "ARMv86a architecture", + .description = "ARMv87a architecture", .dependencies = featureSet(&[_]Feature{ .aclass, .crc, @@ -1517,13 +1523,6 @@ pub const cpu = struct { .vfp2, }), }; - pub const arm1176j_s = CpuModel{ - .name = "arm1176j_s", - .llvm_name = "arm1176j-s", - .features = featureSet(&[_]Feature{ - .v6kz, - }), - }; pub const arm1176jz_s = CpuModel{ .name = "arm1176jz_s", .llvm_name = "arm1176jz-s", @@ -1889,6 +1888,7 @@ pub const cpu = struct { .name = "cortex_m0", .llvm_name = "cortex-m0", .features = featureSet(&[_]Feature{ + .no_branch_predictor, .v6m, }), }; @@ -1896,6 +1896,7 @@ pub const cpu = struct { .name = "cortex_m0plus", .llvm_name = "cortex-m0plus", .features = featureSet(&[_]Feature{ + .no_branch_predictor, .v6m, }), }; @@ -1903,6 +1904,7 @@ pub const cpu = struct { .name = "cortex_m1", .llvm_name = "cortex-m1", .features = featureSet(&[_]Feature{ + .no_branch_predictor, .v6m, }), }; @@ -1910,6 +1912,7 @@ pub const cpu = struct { .name = "cortex_m23", .llvm_name = "cortex-m23", .features = featureSet(&[_]Feature{ + .no_branch_predictor, .no_movt, .v8m, }), @@ -2225,6 +2228,7 @@ pub const cpu = struct { .name = "sc000", .llvm_name = "sc000", .features = featureSet(&[_]Feature{ + .no_branch_predictor, .v6m, }), }; diff --git a/lib/std/target/hexagon.zig b/lib/std/target/hexagon.zig index a9922e2866..de0e34aa3d 100644 --- a/lib/std/target/hexagon.zig +++ b/lib/std/target/hexagon.zig @@ -16,6 +16,7 @@ pub const Feature = enum { hvxv65, hvxv66, hvxv67, + hvxv68, long_calls, mem_noshuf, memops, @@ -35,6 +36,7 @@ pub const Feature = enum { v65, v66, v67, + v68, zreg, }; @@ -117,6 +119,13 @@ pub const all_features = blk: { .hvxv66, }), }; + result[@enumToInt(Feature.hvxv68)] = .{ + .llvm_name = "hvxv68", + .description = "Hexagon HVX instructions", + .dependencies = featureSet(&[_]Feature{ + .hvxv67, + }), + }; result[@enumToInt(Feature.long_calls)] = .{ .llvm_name = "long-calls", .description = "Use constant-extended calls", @@ -216,6 +225,11 @@ pub const all_features = blk: { .description = "Enable Hexagon V67 architecture", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.v68)] = .{ + .llvm_name = "v68", + .description = "Enable Hexagon V68 architecture", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.zreg)] = .{ .llvm_name = "zreg", .description = "Hexagon ZReg extension instructions", @@ -385,4 +399,25 @@ pub const cpu = struct { .v67, }), }; + pub const hexagonv68 = CpuModel{ + .name = "hexagonv68", + .llvm_name = "hexagonv68", + .features = featureSet(&[_]Feature{ + .compound, + .duplex, + .mem_noshuf, + .memops, + .nvj, + .nvs, + .small_data, + .v5, + .v55, + .v60, + .v62, + .v65, + .v66, + .v67, + .v68, + }), + }; }; diff --git a/lib/std/target/nvptx.zig b/lib/std/target/nvptx.zig index d4eb0f4e3f..0a5581090e 100644 --- a/lib/std/target/nvptx.zig +++ b/lib/std/target/nvptx.zig @@ -17,6 +17,8 @@ pub const Feature = enum { ptx64, ptx65, ptx70, + ptx71, + ptx72, sm_20, sm_21, sm_30, @@ -33,6 +35,7 @@ pub const Feature = enum { sm_72, sm_75, sm_80, + sm_86, }; pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet; @@ -104,6 +107,16 @@ pub const all_features = blk: { .description = "Use PTX version 7.0", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.ptx71)] = .{ + .llvm_name = "ptx71", + .description = "Use PTX version 7.1", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.ptx72)] = .{ + .llvm_name = "ptx72", + .description = "Use PTX version 7.2", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.sm_20)] = .{ .llvm_name = "sm_20", .description = "Target SM 2.0", @@ -184,6 +197,11 @@ pub const all_features = blk: { .description = "Target SM 8.0", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.sm_86)] = .{ + .llvm_name = "sm_86", + .description = "Target SM 8.6", + .dependencies = featureSet(&[_]Feature{}), + }; const ti = @typeInfo(Feature); for (result) |*elem, i| { elem.index = i; @@ -317,4 +335,12 @@ pub const cpu = struct { .sm_80, }), }; + pub const sm_86 = CpuModel{ + .name = "sm_86", + .llvm_name = "sm_86", + .features = featureSet(&[_]Feature{ + .ptx71, + .sm_86, + }), + }; }; diff --git a/lib/std/target/powerpc.zig b/lib/std/target/powerpc.zig index 6fc05a2379..3145b2b14c 100644 --- a/lib/std/target/powerpc.zig +++ b/lib/std/target/powerpc.zig @@ -37,6 +37,7 @@ pub const Feature = enum { htm, icbt, invariant_function_descriptors, + isa_v207_instructions, isa_v30_instructions, isa_v31_instructions, isel, @@ -62,7 +63,10 @@ pub const Feature = enum { ppc_prera_sched, predictable_select_expensive, prefix_instrs, + privileged, + quadword_atomics, recipprec, + rop_protect, secure_plt, slow_popcntd, spe, @@ -277,10 +281,17 @@ pub const all_features = blk: { .description = "Assume function descriptors are invariant", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.isa_v207_instructions)] = .{ + .llvm_name = "isa-v207-instructions", + .description = "Enable instructions in ISA 2.07.", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.isa_v30_instructions)] = .{ .llvm_name = "isa-v30-instructions", .description = "Enable instructions in ISA 3.0.", - .dependencies = featureSet(&[_]Feature{}), + .dependencies = featureSet(&[_]Feature{ + .isa_v207_instructions, + }), }; result[@enumToInt(Feature.isa_v31_instructions)] = .{ .llvm_name = "isa-v31-instructions", @@ -433,11 +444,26 @@ pub const all_features = blk: { .power9_altivec, }), }; + result[@enumToInt(Feature.privileged)] = .{ + .llvm_name = "privileged", + .description = "Add privileged instructions", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.quadword_atomics)] = .{ + .llvm_name = "quadword-atomics", + .description = "Enable lqarx and stqcx.", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.recipprec)] = .{ .llvm_name = "recipprec", .description = "Assume higher precision reciprocal estimates", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.rop_protect)] = .{ + .llvm_name = "rop-protect", + .description = "Add ROP protect", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.secure_plt)] = .{ .llvm_name = "secure-plt", .description = "Enable secure plt mode", @@ -696,6 +722,7 @@ pub const cpu = struct { .ppc_postra_sched, .ppc_prera_sched, .predictable_select_expensive, + .quadword_atomics, .recipprec, .stfiwx, .two_const_nr, @@ -790,6 +817,7 @@ pub const cpu = struct { .fuse_addis_load, .htm, .icbt, + .isa_v207_instructions, .isel, .ldbrx, .lfiwax, @@ -798,6 +826,7 @@ pub const cpu = struct { .popcntd, .power8_vector, .predictable_select_expensive, + .quadword_atomics, .recipprec, .stfiwx, .two_const_nr, @@ -837,6 +866,7 @@ pub const cpu = struct { .ppc_postra_sched, .ppc_prera_sched, .predictable_select_expensive, + .quadword_atomics, .recipprec, .stfiwx, .two_const_nr, @@ -989,6 +1019,7 @@ pub const cpu = struct { .fuse_addis_load, .htm, .icbt, + .isa_v207_instructions, .isel, .ldbrx, .lfiwax, @@ -997,6 +1028,7 @@ pub const cpu = struct { .popcntd, .power8_vector, .predictable_select_expensive, + .quadword_atomics, .recipprec, .stfiwx, .two_const_nr, @@ -1033,6 +1065,7 @@ pub const cpu = struct { .ppc_postra_sched, .ppc_prera_sched, .predictable_select_expensive, + .quadword_atomics, .recipprec, .stfiwx, .two_const_nr, diff --git a/lib/std/target/riscv.zig b/lib/std/target/riscv.zig index 28c6bdc366..70a8f73245 100644 --- a/lib/std/target/riscv.zig +++ b/lib/std/target/riscv.zig @@ -185,7 +185,7 @@ pub const all_features = blk: { }; result[@enumToInt(Feature.experimental_zvamo)] = .{ .llvm_name = "experimental-zvamo", - .description = "'Zvamo'(Vector AMO Operations)", + .description = "'Zvamo' (Vector AMO Operations)", .dependencies = featureSet(&[_]Feature{ .experimental_v, }), diff --git a/lib/std/target/systemz.zig b/lib/std/target/systemz.zig index 566a6de333..be97570629 100644 --- a/lib/std/target/systemz.zig +++ b/lib/std/target/systemz.zig @@ -5,6 +5,7 @@ const CpuFeature = std.Target.Cpu.Feature; const CpuModel = std.Target.Cpu.Model; pub const Feature = enum { + bear_enhancement, deflate_conversion, dfp_packed_conversion, dfp_zoned_conversion, @@ -31,8 +32,11 @@ pub const Feature = enum { miscellaneous_extensions, miscellaneous_extensions_2, miscellaneous_extensions_3, + nnp_assist, population_count, + processor_activity_instrumentation, processor_assist, + reset_dat_protection, reset_reference_bits_multiple, soft_float, transactional_execution, @@ -41,6 +45,7 @@ pub const Feature = enum { vector_enhancements_2, vector_packed_decimal, vector_packed_decimal_enhancement, + vector_packed_decimal_enhancement_2, }; pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet; @@ -52,6 +57,11 @@ pub const all_features = blk: { const len = @typeInfo(Feature).Enum.fields.len; std.debug.assert(len <= CpuFeature.Set.needed_bit_count); var result: [len]CpuFeature = undefined; + result[@enumToInt(Feature.bear_enhancement)] = .{ + .llvm_name = "bear-enhancement", + .description = "Assume that the BEAR-enhancement facility is installed", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.deflate_conversion)] = .{ .llvm_name = "deflate-conversion", .description = "Assume that the deflate-conversion facility is installed", @@ -182,16 +192,31 @@ pub const all_features = blk: { .description = "Assume that the miscellaneous-extensions facility 3 is installed", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.nnp_assist)] = .{ + .llvm_name = "nnp-assist", + .description = "Assume that the NNP-assist facility is installed", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.population_count)] = .{ .llvm_name = "population-count", .description = "Assume that the population-count facility is installed", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.processor_activity_instrumentation)] = .{ + .llvm_name = "processor-activity-instrumentation", + .description = "Assume that the processor-activity-instrumentation facility is installed", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.processor_assist)] = .{ .llvm_name = "processor-assist", .description = "Assume that the processor-assist facility is installed", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.reset_dat_protection)] = .{ + .llvm_name = "reset-dat-protection", + .description = "Assume that the reset-DAT-protection facility is installed", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.reset_reference_bits_multiple)] = .{ .llvm_name = "reset-reference-bits-multiple", .description = "Assume that the reset-reference-bits-multiple facility is installed", @@ -232,6 +257,11 @@ pub const all_features = blk: { .description = "Assume that the vector packed decimal enhancement facility is installed", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.vector_packed_decimal_enhancement_2)] = .{ + .llvm_name = "vector-packed-decimal-enhancement-2", + .description = "Assume that the vector packed decimal enhancement facility 2 is installed", + .dependencies = featureSet(&[_]Feature{}), + }; const ti = @typeInfo(Feature); for (result) |*elem, i| { elem.index = i; @@ -368,6 +398,52 @@ pub const cpu = struct { .vector_packed_decimal_enhancement, }), }; + pub const arch14 = CpuModel{ + .name = "arch14", + .llvm_name = "arch14", + .features = featureSet(&[_]Feature{ + .bear_enhancement, + .deflate_conversion, + .dfp_packed_conversion, + .dfp_zoned_conversion, + .distinct_ops, + .enhanced_dat_2, + .enhanced_sort, + .execution_hint, + .fast_serialization, + .fp_extension, + .guarded_storage, + .high_word, + .insert_reference_bits_multiple, + .interlocked_access1, + .load_and_trap, + .load_and_zero_rightmost_byte, + .load_store_on_cond, + .load_store_on_cond_2, + .message_security_assist_extension3, + .message_security_assist_extension4, + .message_security_assist_extension5, + .message_security_assist_extension7, + .message_security_assist_extension8, + .message_security_assist_extension9, + .miscellaneous_extensions, + .miscellaneous_extensions_2, + .miscellaneous_extensions_3, + .nnp_assist, + .population_count, + .processor_activity_instrumentation, + .processor_assist, + .reset_dat_protection, + .reset_reference_bits_multiple, + .transactional_execution, + .vector, + .vector_enhancements_1, + .vector_enhancements_2, + .vector_packed_decimal, + .vector_packed_decimal_enhancement, + .vector_packed_decimal_enhancement_2, + }), + }; pub const arch8 = CpuModel{ .name = "arch8", .llvm_name = "arch8", diff --git a/lib/std/target/wasm.zig b/lib/std/target/wasm.zig index ac14ac318b..f120dfd7a5 100644 --- a/lib/std/target/wasm.zig +++ b/lib/std/target/wasm.zig @@ -15,7 +15,6 @@ pub const Feature = enum { sign_ext, simd128, tail_call, - unimplemented_simd128, }; pub const featureSet = CpuFeature.feature_set_fns(Feature).featureSet; @@ -77,13 +76,6 @@ pub const all_features = blk: { .description = "Enable tail call instructions", .dependencies = featureSet(&[_]Feature{}), }; - result[@enumToInt(Feature.unimplemented_simd128)] = .{ - .llvm_name = "unimplemented-simd128", - .description = "Enable 128-bit SIMD not yet implemented in engines", - .dependencies = featureSet(&[_]Feature{ - .simd128, - }), - }; const ti = @typeInfo(Feature); for (result) |*elem, i| { elem.index = i; diff --git a/lib/std/target/x86.zig b/lib/std/target/x86.zig index 5c57831bd5..c0a64c1077 100644 --- a/lib/std/target/x86.zig +++ b/lib/std/target/x86.zig @@ -55,10 +55,12 @@ pub const Feature = enum { fast_gather, fast_hops, fast_lzcnt, + fast_movbe, fast_scalar_fsqrt, fast_scalar_shift_masks, fast_shld_rotate, - fast_variable_shuffle, + fast_variable_crosslane_shuffle, + fast_variable_perlane_shuffle, fast_vector_fsqrt, fast_vector_shift_masks, fma, @@ -458,6 +460,11 @@ pub const all_features = blk: { .description = "LZCNT instructions are as fast as most simple integer ops", .dependencies = featureSet(&[_]Feature{}), }; + result[@enumToInt(Feature.fast_movbe)] = .{ + .llvm_name = "fast-movbe", + .description = "Prefer a movbe over a single-use load + bswap / single-use bswap + store", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.fast_scalar_fsqrt)] = .{ .llvm_name = "fast-scalar-fsqrt", .description = "Scalar SQRT is fast (disable Newton-Raphson)", @@ -473,9 +480,14 @@ pub const all_features = blk: { .description = "SHLD can be used as a faster rotate", .dependencies = featureSet(&[_]Feature{}), }; - result[@enumToInt(Feature.fast_variable_shuffle)] = .{ - .llvm_name = "fast-variable-shuffle", - .description = "Shuffles with variable masks are fast", + result[@enumToInt(Feature.fast_variable_crosslane_shuffle)] = .{ + .llvm_name = "fast-variable-crosslane-shuffle", + .description = "Cross-lane shuffles with variable masks are fast", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.fast_variable_perlane_shuffle)] = .{ + .llvm_name = "fast-variable-perlane-shuffle", + .description = "Per-lane shuffles with variable masks are fast", .dependencies = featureSet(&[_]Feature{}), }; result[@enumToInt(Feature.fast_vector_fsqrt)] = .{ @@ -1010,26 +1022,27 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"64bit", .adx, - .aes, .avxvnni, .bmi, .bmi2, .cldemote, .clflushopt, + .clwb, .cmov, .cx16, - .ermsb, .f16c, .false_deps_popcnt, .fast_15bytenop, .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fma, .fsgsbase, .fxsr, + .gfni, .hreset, .idivq_to_divl, .invpcid, @@ -1037,19 +1050,27 @@ pub const cpu = struct { .macrofusion, .mmx, .movbe, + .movdir64b, + .movdiri, .nopl, - .pclmul, + .pconfig, + .pku, .popcnt, .prfchw, .ptwrite, + .rdpid, .rdrnd, .rdseed, .sahf, .serialize, - .sgx, + .sha, + .shstk, .slow_3ops_lea, + .vaes, + .vpclmulqdq, .vzeroupper, .waitpkg, + .widekl, .x87, .xsavec, .xsaveopt, @@ -1292,6 +1313,7 @@ pub const cpu = struct { .f16c, .fast_11bytenop, .fast_bextr, + .fast_movbe, .fast_scalar_shift_masks, .fma, .fxsr, @@ -1324,6 +1346,7 @@ pub const cpu = struct { .f16c, .fast_11bytenop, .fast_bextr, + .fast_movbe, .fast_scalar_shift_masks, .fma, .fsgsbase, @@ -1359,6 +1382,7 @@ pub const cpu = struct { .f16c, .fast_11bytenop, .fast_bextr, + .fast_movbe, .fast_scalar_shift_masks, .fma, .fsgsbase, @@ -1424,7 +1448,8 @@ pub const cpu = struct { .fast_15bytenop, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fma, .fsgsbase, .fxsr, @@ -1485,6 +1510,7 @@ pub const cpu = struct { .fast_bextr, .fast_hops, .fast_lzcnt, + .fast_movbe, .fast_scalar_shift_masks, .fast_vector_shift_masks, .fxsr, @@ -1548,7 +1574,8 @@ pub const cpu = struct { .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fsgsbase, .fxsr, @@ -1567,7 +1594,6 @@ pub const cpu = struct { .rdrnd, .rdseed, .sahf, - .sgx, .sha, .slow_3ops_lea, .vzeroupper, @@ -1601,7 +1627,8 @@ pub const cpu = struct { .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fsgsbase, .fxsr, @@ -1652,7 +1679,8 @@ pub const cpu = struct { .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fsgsbase, .fxsr, @@ -1714,7 +1742,8 @@ pub const cpu = struct { .fast_15bytenop, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fma, .fsgsbase, .fxsr, @@ -1844,6 +1873,7 @@ pub const cpu = struct { .cmov, .cx16, .false_deps_popcnt, + .fast_movbe, .fsgsbase, .fxsr, .mmx, @@ -1877,6 +1907,7 @@ pub const cpu = struct { .clflushopt, .cmov, .cx16, + .fast_movbe, .fsgsbase, .fxsr, .mmx, @@ -1890,7 +1921,6 @@ pub const cpu = struct { .rdrnd, .rdseed, .sahf, - .sgx, .sha, .slow_incdec, .slow_lea, @@ -1921,7 +1951,8 @@ pub const cpu = struct { .fast_15bytenop, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fma, .fsgsbase, .fxsr, @@ -1960,7 +1991,6 @@ pub const cpu = struct { .bmi, .bmi2, .clflushopt, - .clwb, .cmov, .cx16, .ermsb, @@ -1968,7 +1998,8 @@ pub const cpu = struct { .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fsgsbase, .fsrm, @@ -1989,7 +2020,6 @@ pub const cpu = struct { .rdrnd, .rdseed, .sahf, - .sgx, .sha, .slow_3ops_lea, .vaes, @@ -2027,7 +2057,8 @@ pub const cpu = struct { .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fsgsbase, .fsrm, @@ -2049,7 +2080,6 @@ pub const cpu = struct { .rdrnd, .rdseed, .sahf, - .sgx, .sha, .slow_3ops_lea, .vaes, @@ -2175,6 +2205,7 @@ pub const cpu = struct { .cmov, .cx16, .fast_gather, + .fast_movbe, .fsgsbase, .fxsr, .idivq_to_divl, @@ -2214,6 +2245,7 @@ pub const cpu = struct { .cmov, .cx16, .fast_gather, + .fast_movbe, .fsgsbase, .fxsr, .idivq_to_divl, @@ -2472,6 +2504,64 @@ pub const cpu = struct { .x87, }), }; + pub const rocketlake = CpuModel{ + .name = "rocketlake", + .llvm_name = "rocketlake", + .features = featureSet(&[_]Feature{ + .@"64bit", + .adx, + .avx512bitalg, + .avx512cd, + .avx512dq, + .avx512ifma, + .avx512vbmi, + .avx512vbmi2, + .avx512vl, + .avx512vnni, + .avx512vpopcntdq, + .bmi, + .bmi2, + .clflushopt, + .cmov, + .cx16, + .ermsb, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, + .fast_vector_fsqrt, + .fsgsbase, + .fsrm, + .fxsr, + .gfni, + .idivq_to_divl, + .invpcid, + .lzcnt, + .macrofusion, + .mmx, + .movbe, + .nopl, + .pku, + .popcnt, + .prefer_256_bit, + .prfchw, + .rdpid, + .rdrnd, + .rdseed, + .sahf, + .sha, + .slow_3ops_lea, + .vaes, + .vpclmulqdq, + .vzeroupper, + .x87, + .xsavec, + .xsaveopt, + .xsaves, + }), + }; pub const sandybridge = CpuModel{ .name = "sandybridge", .llvm_name = "sandybridge", @@ -2532,7 +2622,8 @@ pub const cpu = struct { .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fsgsbase, .fsrm, @@ -2558,7 +2649,6 @@ pub const cpu = struct { .rdseed, .sahf, .serialize, - .sgx, .sha, .shstk, .slow_3ops_lea, @@ -2584,6 +2674,7 @@ pub const cpu = struct { .cx16, .false_deps_popcnt, .fast_7bytenop, + .fast_movbe, .fxsr, .idivq_to_divl, .mmx, @@ -2626,7 +2717,8 @@ pub const cpu = struct { .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fsgsbase, .fxsr, @@ -2673,7 +2765,8 @@ pub const cpu = struct { .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fma, .fsgsbase, @@ -2691,7 +2784,6 @@ pub const cpu = struct { .rdrnd, .rdseed, .sahf, - .sgx, .slow_3ops_lea, .vzeroupper, .x87, @@ -2723,7 +2815,8 @@ pub const cpu = struct { .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fsgsbase, .fxsr, @@ -2759,6 +2852,7 @@ pub const cpu = struct { .cx16, .false_deps_popcnt, .fast_7bytenop, + .fast_movbe, .fxsr, .idivq_to_divl, .mmx, @@ -2805,7 +2899,8 @@ pub const cpu = struct { .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fsgsbase, .fsrm, @@ -2828,7 +2923,6 @@ pub const cpu = struct { .rdrnd, .rdseed, .sahf, - .sgx, .sha, .shstk, .slow_3ops_lea, @@ -2851,6 +2945,7 @@ pub const cpu = struct { .clwb, .cmov, .cx16, + .fast_movbe, .fsgsbase, .fxsr, .gfni, @@ -2865,7 +2960,6 @@ pub const cpu = struct { .rdrnd, .rdseed, .sahf, - .sgx, .sha, .slow_incdec, .slow_lea, @@ -2978,7 +3072,8 @@ pub const cpu = struct { .fast_15bytenop, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fma, .fxsr, .idivq_to_divl, @@ -3013,7 +3108,8 @@ pub const cpu = struct { .fast_gather, .fast_scalar_fsqrt, .fast_shld_rotate, - .fast_variable_shuffle, + .fast_variable_crosslane_shuffle, + .fast_variable_perlane_shuffle, .fast_vector_fsqrt, .fxsr, .idivq_to_divl, @@ -3065,6 +3161,7 @@ pub const cpu = struct { .fast_15bytenop, .fast_bextr, .fast_lzcnt, + .fast_movbe, .fast_scalar_shift_masks, .fma, .fsgsbase, @@ -3110,6 +3207,7 @@ pub const cpu = struct { .fast_15bytenop, .fast_bextr, .fast_lzcnt, + .fast_movbe, .fast_scalar_shift_masks, .fma, .fsgsbase, @@ -3156,13 +3254,16 @@ pub const cpu = struct { .fast_15bytenop, .fast_bextr, .fast_lzcnt, + .fast_movbe, .fast_scalar_shift_masks, + .fast_variable_perlane_shuffle, .fma, .fsgsbase, .fsrm, .fxsr, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .mwaitx, diff --git a/tools/update_cpu_features.zig b/tools/update_cpu_features.zig index 34cf6c59b6..59107a4317 100644 --- a/tools/update_cpu_features.zig +++ b/tools/update_cpu_features.zig @@ -219,7 +219,6 @@ const llvm_targets = [_]LlvmTarget{ "use_postra_scheduler", "use_reciprocal_square_root", "v8a", - "zcz_fp", }, }, .{ @@ -236,7 +235,6 @@ const llvm_targets = [_]LlvmTarget{ "slow_paired_128", "use_postra_scheduler", "v8a", - "zcz_fp", }, }, .{