From 59b4facd34dc525d2258bde8f5d94049d02355dc Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Sat, 27 Feb 2021 00:54:40 -0700 Subject: [PATCH] update x86 CPU features --- lib/std/target/x86.zig | 685 ++++++++++++++++++++++++++++------------- 1 file changed, 474 insertions(+), 211 deletions(-) diff --git a/lib/std/target/x86.zig b/lib/std/target/x86.zig index 1a52162969..34fb64d7cc 100644 --- a/lib/std/target/x86.zig +++ b/lib/std/target/x86.zig @@ -1,13 +1,12 @@ -// SPDX-License-Identifier: MIT -// Copyright (c) 2015-2021 Zig Contributors -// This file is part of [zig](https://ziglang.org/), which is MIT licensed. -// The MIT license requires this copyright notice to be included in all copies -// and substantial portions of the software. +//! This file is auto-generated by tools/update_cpu_features.zig. + const std = @import("../std.zig"); const CpuFeature = std.Target.Cpu.Feature; const CpuModel = std.Target.Cpu.Model; pub const Feature = enum { + @"16bit_mode", + @"32bit_mode", @"3dnow", @"3dnowa", @"64bit", @@ -154,6 +153,16 @@ pub const all_features = blk: { const len = @typeInfo(Feature).Enum.fields.len; std.debug.assert(len <= CpuFeature.Set.needed_bit_count); var result: [len]CpuFeature = undefined; + result[@enumToInt(Feature.@"16bit_mode")] = .{ + .llvm_name = "16bit-mode", + .description = "16-bit mode (i8086)", + .dependencies = featureSet(&[_]Feature{}), + }; + result[@enumToInt(Feature.@"32bit_mode")] = .{ + .llvm_name = "32bit-mode", + .description = "32-bit mode (80386)", + .dependencies = featureSet(&[_]Feature{}), + }; result[@enumToInt(Feature.@"3dnow")] = .{ .llvm_name = "3dnow", .description = "Enable 3DNow! instructions", @@ -960,8 +969,6 @@ pub const cpu = struct { .@"64bit", .adx, .aes, - .avx, - .avx2, .avxvnni, .bmi, .bmi2, @@ -969,15 +976,23 @@ pub const cpu = struct { .clflushopt, .cmov, .cx16, - .cx8, .ermsb, .f16c, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fma, .fsgsbase, .fxsr, .hreset, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, @@ -990,11 +1005,10 @@ pub const cpu = struct { .sahf, .serialize, .sgx, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .waitpkg, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -1008,14 +1022,16 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, + .fast_scalar_shift_masks, .fxsr, .lzcnt, .nopl, .popcnt, .prfchw, .sahf, + .slow_shld, .sse4a, + .vzeroupper, .x87, }), }; @@ -1027,6 +1043,9 @@ pub const cpu = struct { .cmov, .cx8, .nopl, + .slow_shld, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1039,7 +1058,10 @@ pub const cpu = struct { .cx8, .fxsr, .nopl, + .slow_shld, + .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -1051,9 +1073,13 @@ pub const cpu = struct { .@"64bit", .cmov, .cx8, + .fast_scalar_shift_masks, .fxsr, .nopl, + .slow_shld, + .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -1066,7 +1092,10 @@ pub const cpu = struct { .cx8, .fxsr, .nopl, + .slow_shld, + .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -1078,6 +1107,9 @@ pub const cpu = struct { .cmov, .cx8, .nopl, + .slow_shld, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1090,7 +1122,10 @@ pub const cpu = struct { .cx8, .fxsr, .nopl, + .slow_shld, + .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -1102,9 +1137,13 @@ pub const cpu = struct { .@"64bit", .cmov, .cx8, + .fast_scalar_shift_masks, .fxsr, .nopl, + .slow_shld, + .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -1116,10 +1155,13 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, + .fast_scalar_shift_masks, .fxsr, .nopl, + .slow_shld, + .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -1130,13 +1172,20 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, .fxsr, + .idivl_to_divb, + .idivq_to_divl, + .lea_sp, + .lea_uses_ag, .mmx, .movbe, .nopl, + .pad_short_functions, .sahf, + .slow_two_mem_ops, + .slow_unaligned_mem_16, .ssse3, + .vzeroupper, .x87, }), }; @@ -1148,14 +1197,16 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, + .fast_scalar_shift_masks, .fxsr, .lzcnt, .nopl, .popcnt, .prfchw, .sahf, + .slow_shld, .sse4a, + .vzeroupper, .x87, }), }; @@ -1165,9 +1216,11 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"64bit", .aes, + .branchfusion, .cmov, .cx16, - .cx8, + .fast_11bytenop, + .fast_scalar_shift_masks, .fxsr, .lwp, .lzcnt, @@ -1177,6 +1230,8 @@ pub const cpu = struct { .popcnt, .prfchw, .sahf, + .slow_shld, + .vzeroupper, .x87, .xop, .xsave, @@ -1189,11 +1244,13 @@ pub const cpu = struct { .@"64bit", .aes, .bmi, + .branchfusion, .cmov, .cx16, - .cx8, .f16c, + .fast_11bytenop, .fast_bextr, + .fast_scalar_shift_masks, .fma, .fxsr, .lwp, @@ -1204,7 +1261,9 @@ pub const cpu = struct { .popcnt, .prfchw, .sahf, + .slow_shld, .tbm, + .vzeroupper, .x87, .xop, .xsave, @@ -1217,11 +1276,13 @@ pub const cpu = struct { .@"64bit", .aes, .bmi, + .branchfusion, .cmov, .cx16, - .cx8, .f16c, + .fast_11bytenop, .fast_bextr, + .fast_scalar_shift_masks, .fma, .fsgsbase, .fxsr, @@ -1233,10 +1294,11 @@ pub const cpu = struct { .popcnt, .prfchw, .sahf, + .slow_shld, .tbm, + .vzeroupper, .x87, .xop, - .xsave, .xsaveopt, }), }; @@ -1249,11 +1311,13 @@ pub const cpu = struct { .avx2, .bmi, .bmi2, + .branchfusion, .cmov, .cx16, - .cx8, .f16c, + .fast_11bytenop, .fast_bextr, + .fast_scalar_shift_masks, .fma, .fsgsbase, .fxsr, @@ -1268,10 +1332,11 @@ pub const cpu = struct { .prfchw, .rdrnd, .sahf, + .slow_shld, .tbm, + .vzeroupper, .x87, .xop, - .xsave, .xsaveopt, }), }; @@ -1282,13 +1347,20 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, .fxsr, + .idivl_to_divb, + .idivq_to_divl, + .lea_sp, + .lea_uses_ag, .mmx, .movbe, .nopl, + .pad_short_functions, .sahf, + .slow_two_mem_ops, + .slow_unaligned_mem_16, .ssse3, + .vzeroupper, .x87, }), }; @@ -1298,20 +1370,26 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"64bit", .adx, - .avx, .avx2, .bmi, .bmi2, .cmov, .cx16, - .cx8, .ermsb, .f16c, + .false_deps_lzcnt_tzcnt, + .false_deps_popcnt, + .fast_15bytenop, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, .fma, .fsgsbase, .fxsr, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, @@ -1321,10 +1399,9 @@ pub const cpu = struct { .rdrnd, .rdseed, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .x87, - .xsave, .xsaveopt, }), }; @@ -1335,7 +1412,9 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, + .fast_15bytenop, + .fast_scalar_shift_masks, + .fast_vector_shift_masks, .fxsr, .lzcnt, .mmx, @@ -1343,8 +1422,10 @@ pub const cpu = struct { .popcnt, .prfchw, .sahf, + .slow_shld, .sse4a, .ssse3, + .vzeroupper, .x87, }), }; @@ -1354,12 +1435,16 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"64bit", .aes, - .avx, .bmi, .cmov, .cx16, - .cx8, .f16c, + .fast_15bytenop, + .fast_bextr, + .fast_hops, + .fast_lzcnt, + .fast_scalar_shift_masks, + .fast_vector_shift_masks, .fxsr, .lzcnt, .mmx, @@ -1369,10 +1454,9 @@ pub const cpu = struct { .popcnt, .prfchw, .sahf, + .slow_shld, .sse4a, - .ssse3, .x87, - .xsave, .xsaveopt, }), }; @@ -1381,6 +1465,8 @@ pub const cpu = struct { .llvm_name = "c3", .features = featureSet(&[_]Feature{ .@"3dnow", + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1392,7 +1478,9 @@ pub const cpu = struct { .cx8, .fxsr, .mmx, + .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -1403,12 +1491,8 @@ pub const cpu = struct { .@"64bit", .adx, .aes, - .avx, - .avx2, - .avx512bw, .avx512cd, .avx512dq, - .avx512f, .avx512ifma, .avx512vbmi, .avx512vl, @@ -1417,30 +1501,35 @@ pub const cpu = struct { .clflushopt, .cmov, .cx16, - .cx8, .ermsb, - .f16c, - .fma, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fsgsbase, .fxsr, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdrnd, .rdseed, .sahf, .sgx, .sha, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -1453,12 +1542,9 @@ pub const cpu = struct { .@"64bit", .adx, .aes, - .avx, - .avx2, .avx512bw, .avx512cd, .avx512dq, - .avx512f, .avx512vl, .avx512vnni, .bmi, @@ -1467,28 +1553,34 @@ pub const cpu = struct { .clwb, .cmov, .cx16, - .cx8, .ermsb, - .f16c, - .fma, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fsgsbase, .fxsr, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdrnd, .rdseed, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -1501,13 +1593,9 @@ pub const cpu = struct { .@"64bit", .adx, .aes, - .avx, - .avx2, .avx512bf16, - .avx512bw, .avx512cd, .avx512dq, - .avx512f, .avx512vl, .avx512vnni, .bmi, @@ -1516,28 +1604,34 @@ pub const cpu = struct { .clwb, .cmov, .cx16, - .cx8, .ermsb, - .f16c, - .fma, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fsgsbase, .fxsr, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdrnd, .rdseed, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -1548,23 +1642,27 @@ pub const cpu = struct { .llvm_name = "core-avx-i", .features = featureSet(&[_]Feature{ .@"64bit", - .avx, .cmov, .cx16, - .cx8, .f16c, + .false_deps_popcnt, + .fast_15bytenop, + .fast_scalar_fsqrt, + .fast_shld_rotate, .fsgsbase, .fxsr, + .idivq_to_divl, + .macrofusion, .mmx, .nopl, .pclmul, .popcnt, .rdrnd, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .slow_unaligned_mem_32, + .vzeroupper, .x87, - .xsave, .xsaveopt, }), }; @@ -1573,20 +1671,26 @@ pub const cpu = struct { .llvm_name = "core-avx2", .features = featureSet(&[_]Feature{ .@"64bit", - .avx, .avx2, .bmi, .bmi2, .cmov, .cx16, - .cx8, .ermsb, .f16c, + .false_deps_lzcnt_tzcnt, + .false_deps_popcnt, + .fast_15bytenop, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, .fma, .fsgsbase, .fxsr, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, @@ -1594,10 +1698,9 @@ pub const cpu = struct { .popcnt, .rdrnd, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .x87, - .xsave, .xsaveopt, }), }; @@ -1608,12 +1711,14 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, .fxsr, + .macrofusion, .mmx, .nopl, .sahf, + .slow_unaligned_mem_16, .ssse3, + .vzeroupper, .x87, }), }; @@ -1624,14 +1729,14 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, .fxsr, + .macrofusion, .mmx, .nopl, .popcnt, .sahf, - .sse2, .sse4_2, + .vzeroupper, .x87, }), }; @@ -1643,17 +1748,22 @@ pub const cpu = struct { .avx, .cmov, .cx16, - .cx8, + .false_deps_popcnt, + .fast_15bytenop, + .fast_scalar_fsqrt, + .fast_shld_rotate, .fxsr, + .idivq_to_divl, + .macrofusion, .mmx, .nopl, .pclmul, .popcnt, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .slow_unaligned_mem_32, + .vzeroupper, .x87, - .xsave, .xsaveopt, }), }; @@ -1661,7 +1771,13 @@ pub const cpu = struct { .name = "generic", .llvm_name = "generic", .features = featureSet(&[_]Feature{ + .@"64bit", .cx8, + .idivq_to_divl, + .macrofusion, + .slow_3ops_lea, + .slow_incdec, + .vzeroupper, .x87, }), }; @@ -1671,6 +1787,8 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"3dnowa", .cx8, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1683,7 +1801,7 @@ pub const cpu = struct { .clflushopt, .cmov, .cx16, - .cx8, + .false_deps_popcnt, .fsgsbase, .fxsr, .mmx, @@ -1696,10 +1814,13 @@ pub const cpu = struct { .rdseed, .sahf, .sha, + .slow_incdec, + .slow_lea, + .slow_two_mem_ops, .sse4_2, - .ssse3, + .use_glm_div_sqrt_costs, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -1714,7 +1835,6 @@ pub const cpu = struct { .clflushopt, .cmov, .cx16, - .cx8, .fsgsbase, .fxsr, .mmx, @@ -1730,10 +1850,13 @@ pub const cpu = struct { .sahf, .sgx, .sha, + .slow_incdec, + .slow_lea, + .slow_two_mem_ops, .sse4_2, - .ssse3, + .use_glm_div_sqrt_costs, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -1744,20 +1867,26 @@ pub const cpu = struct { .llvm_name = "haswell", .features = featureSet(&[_]Feature{ .@"64bit", - .avx, .avx2, .bmi, .bmi2, .cmov, .cx16, - .cx8, .ermsb, .f16c, + .false_deps_lzcnt_tzcnt, + .false_deps_popcnt, + .fast_15bytenop, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, .fma, .fsgsbase, .fxsr, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, @@ -1765,10 +1894,9 @@ pub const cpu = struct { .popcnt, .rdrnd, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .x87, - .xsave, .xsaveopt, }), }; @@ -1776,6 +1904,8 @@ pub const cpu = struct { .name = "_i386", .llvm_name = "i386", .features = featureSet(&[_]Feature{ + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1783,6 +1913,8 @@ pub const cpu = struct { .name = "_i486", .llvm_name = "i486", .features = featureSet(&[_]Feature{ + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1791,6 +1923,8 @@ pub const cpu = struct { .llvm_name = "i586", .features = featureSet(&[_]Feature{ .cx8, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1800,6 +1934,8 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .cmov, .cx8, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1809,14 +1945,9 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"64bit", .adx, - .aes, - .avx, - .avx2, .avx512bitalg, - .avx512bw, .avx512cd, .avx512dq, - .avx512f, .avx512ifma, .avx512vbmi, .avx512vbmi2, @@ -1829,22 +1960,27 @@ pub const cpu = struct { .clwb, .cmov, .cx16, - .cx8, .ermsb, - .f16c, - .fma, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fsgsbase, .fsrm, .fxsr, .gfni, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, - .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdpid, .rdrnd, @@ -1852,12 +1988,11 @@ pub const cpu = struct { .sahf, .sgx, .sha, - .sse2, - .sse4_2, + .slow_3ops_lea, .vaes, .vpclmulqdq, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -1869,14 +2004,9 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"64bit", .adx, - .aes, - .avx, - .avx2, .avx512bitalg, - .avx512bw, .avx512cd, .avx512dq, - .avx512f, .avx512ifma, .avx512vbmi, .avx512vbmi2, @@ -1889,23 +2019,28 @@ pub const cpu = struct { .clwb, .cmov, .cx16, - .cx8, .ermsb, - .f16c, - .fma, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fsgsbase, .fsrm, .fxsr, .gfni, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, - .pclmul, .pconfig, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdpid, .rdrnd, @@ -1913,13 +2048,12 @@ pub const cpu = struct { .sahf, .sgx, .sha, - .sse2, - .sse4_2, + .slow_3ops_lea, .vaes, .vpclmulqdq, + .vzeroupper, .wbnoinvd, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -1930,23 +2064,27 @@ pub const cpu = struct { .llvm_name = "ivybridge", .features = featureSet(&[_]Feature{ .@"64bit", - .avx, .cmov, .cx16, - .cx8, .f16c, + .false_deps_popcnt, + .fast_15bytenop, + .fast_scalar_fsqrt, + .fast_shld_rotate, .fsgsbase, .fxsr, + .idivq_to_divl, + .macrofusion, .mmx, .nopl, .pclmul, .popcnt, .rdrnd, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .slow_unaligned_mem_32, + .vzeroupper, .x87, - .xsave, .xsaveopt, }), }; @@ -1956,6 +2094,8 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .cx8, .mmx, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1965,6 +2105,8 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"3dnow", .cx8, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1974,6 +2116,8 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"3dnow", .cx8, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -1985,9 +2129,13 @@ pub const cpu = struct { .@"64bit", .cmov, .cx8, + .fast_scalar_shift_masks, .fxsr, .nopl, + .slow_shld, + .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -1999,10 +2147,13 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, + .fast_scalar_shift_masks, .fxsr, .nopl, + .slow_shld, + .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -2015,30 +2166,32 @@ pub const cpu = struct { .aes, .avx512cd, .avx512er, - .avx512f, .avx512pf, .bmi, .bmi2, .cmov, .cx16, - .cx8, - .f16c, - .fma, + .fast_gather, .fsgsbase, .fxsr, + .idivq_to_divl, .lzcnt, .mmx, .movbe, .nopl, .pclmul, .popcnt, + .prefer_mask_registers, .prefetchwt1, .prfchw, .rdrnd, .rdseed, .sahf, + .slow_3ops_lea, + .slow_incdec, + .slow_pmaddwd, + .slow_two_mem_ops, .x87, - .xsave, .xsaveopt, }), }; @@ -2051,31 +2204,33 @@ pub const cpu = struct { .aes, .avx512cd, .avx512er, - .avx512f, .avx512pf, .avx512vpopcntdq, .bmi, .bmi2, .cmov, .cx16, - .cx8, - .f16c, - .fma, + .fast_gather, .fsgsbase, .fxsr, + .idivq_to_divl, .lzcnt, .mmx, .movbe, .nopl, .pclmul, .popcnt, + .prefer_mask_registers, .prefetchwt1, .prfchw, .rdrnd, .rdseed, .sahf, + .slow_3ops_lea, + .slow_incdec, + .slow_pmaddwd, + .slow_two_mem_ops, .x87, - .xsave, .xsaveopt, }), }; @@ -2084,6 +2239,8 @@ pub const cpu = struct { .llvm_name = "lakemont", .features = featureSet(&[_]Feature{ .cx8, + .slow_unaligned_mem_16, + .vzeroupper, }), }; pub const nehalem = CpuModel{ @@ -2093,14 +2250,14 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, .fxsr, + .macrofusion, .mmx, .nopl, .popcnt, .sahf, - .sse2, .sse4_2, + .vzeroupper, .x87, }), }; @@ -2111,11 +2268,12 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, .fxsr, .mmx, .nopl, + .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -2127,9 +2285,13 @@ pub const cpu = struct { .@"64bit", .cmov, .cx8, + .fast_scalar_shift_masks, .fxsr, .nopl, + .slow_shld, + .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -2141,10 +2303,13 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, + .fast_scalar_shift_masks, .fxsr, .nopl, + .slow_shld, + .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -2155,12 +2320,14 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, .fxsr, + .macrofusion, .mmx, .nopl, .sahf, + .slow_unaligned_mem_16, .sse4_1, + .vzeroupper, .x87, }), }; @@ -2169,6 +2336,8 @@ pub const cpu = struct { .llvm_name = "pentium", .features = featureSet(&[_]Feature{ .cx8, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2181,7 +2350,9 @@ pub const cpu = struct { .fxsr, .mmx, .nopl, + .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -2191,6 +2362,8 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .cx8, .mmx, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2203,6 +2376,8 @@ pub const cpu = struct { .fxsr, .mmx, .nopl, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2215,7 +2390,9 @@ pub const cpu = struct { .fxsr, .mmx, .nopl, + .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -2228,7 +2405,9 @@ pub const cpu = struct { .fxsr, .mmx, .nopl, + .slow_unaligned_mem_16, .sse, + .vzeroupper, .x87, }), }; @@ -2241,7 +2420,9 @@ pub const cpu = struct { .fxsr, .mmx, .nopl, + .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -2254,7 +2435,9 @@ pub const cpu = struct { .fxsr, .mmx, .nopl, + .slow_unaligned_mem_16, .sse2, + .vzeroupper, .x87, }), }; @@ -2265,6 +2448,8 @@ pub const cpu = struct { .cmov, .cx8, .nopl, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2277,7 +2462,9 @@ pub const cpu = struct { .fxsr, .mmx, .nopl, + .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -2289,17 +2476,22 @@ pub const cpu = struct { .avx, .cmov, .cx16, - .cx8, + .false_deps_popcnt, + .fast_15bytenop, + .fast_scalar_fsqrt, + .fast_shld_rotate, .fxsr, + .idivq_to_divl, + .macrofusion, .mmx, .nopl, .pclmul, .popcnt, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .slow_unaligned_mem_32, + .vzeroupper, .x87, - .xsave, .xsaveopt, }), }; @@ -2309,18 +2501,12 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"64bit", .adx, - .aes, .amx_bf16, .amx_int8, - .amx_tile, - .avx, - .avx2, .avx512bf16, .avx512bitalg, - .avx512bw, .avx512cd, .avx512dq, - .avx512f, .avx512ifma, .avx512vbmi, .avx512vbmi2, @@ -2336,26 +2522,31 @@ pub const cpu = struct { .clwb, .cmov, .cx16, - .cx8, .enqcmd, .ermsb, - .f16c, - .fma, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fsgsbase, .fsrm, .fxsr, .gfni, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .movdir64b, .movdiri, .nopl, - .pclmul, .pconfig, .pku, .popcnt, + .prefer_256_bit, .prfchw, .ptwrite, .rdpid, @@ -2366,16 +2557,15 @@ pub const cpu = struct { .sgx, .sha, .shstk, - .sse2, - .sse4_2, + .slow_3ops_lea, .tsxldtrk, .uintr, .vaes, .vpclmulqdq, + .vzeroupper, .waitpkg, .wbnoinvd, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -2388,8 +2578,10 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, + .false_deps_popcnt, + .fast_7bytenop, .fxsr, + .idivq_to_divl, .mmx, .movbe, .nopl, @@ -2398,8 +2590,12 @@ pub const cpu = struct { .prfchw, .rdrnd, .sahf, + .slow_incdec, + .slow_lea, + .slow_pmulld, + .slow_two_mem_ops, .sse4_2, - .ssse3, + .vzeroupper, .x87, }), }; @@ -2410,12 +2606,9 @@ pub const cpu = struct { .@"64bit", .adx, .aes, - .avx, - .avx2, .avx512bw, .avx512cd, .avx512dq, - .avx512f, .avx512vl, .bmi, .bmi2, @@ -2423,28 +2616,34 @@ pub const cpu = struct { .clwb, .cmov, .cx16, - .cx8, .ermsb, - .f16c, - .fma, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fsgsbase, .fxsr, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdrnd, .rdseed, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -2457,21 +2656,28 @@ pub const cpu = struct { .@"64bit", .adx, .aes, - .avx, .avx2, .bmi, .bmi2, .clflushopt, .cmov, .cx16, - .cx8, .ermsb, .f16c, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fma, .fsgsbase, .fxsr, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, @@ -2482,10 +2688,9 @@ pub const cpu = struct { .rdseed, .sahf, .sgx, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -2498,12 +2703,9 @@ pub const cpu = struct { .@"64bit", .adx, .aes, - .avx, - .avx2, .avx512bw, .avx512cd, .avx512dq, - .avx512f, .avx512vl, .bmi, .bmi2, @@ -2511,28 +2713,34 @@ pub const cpu = struct { .clwb, .cmov, .cx16, - .cx8, .ermsb, - .f16c, - .fma, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fsgsbase, .fxsr, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdrnd, .rdseed, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -2545,8 +2753,10 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, + .false_deps_popcnt, + .fast_7bytenop, .fxsr, + .idivq_to_divl, .mmx, .movbe, .nopl, @@ -2555,8 +2765,12 @@ pub const cpu = struct { .prfchw, .rdrnd, .sahf, + .slow_incdec, + .slow_lea, + .slow_pmulld, + .slow_two_mem_ops, .sse4_2, - .ssse3, + .vzeroupper, .x87, }), }; @@ -2566,14 +2780,9 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"64bit", .adx, - .aes, - .avx, - .avx2, .avx512bitalg, - .avx512bw, .avx512cd, .avx512dq, - .avx512f, .avx512ifma, .avx512vbmi, .avx512vbmi2, @@ -2587,24 +2796,29 @@ pub const cpu = struct { .clwb, .cmov, .cx16, - .cx8, .ermsb, - .f16c, - .fma, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fsgsbase, .fsrm, .fxsr, .gfni, + .idivq_to_divl, .invpcid, .lzcnt, + .macrofusion, .mmx, .movbe, .movdir64b, .movdiri, .nopl, - .pclmul, .pku, .popcnt, + .prefer_256_bit, .prfchw, .rdpid, .rdrnd, @@ -2613,12 +2827,11 @@ pub const cpu = struct { .sgx, .sha, .shstk, - .sse2, - .sse4_2, + .slow_3ops_lea, .vaes, .vpclmulqdq, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -2634,7 +2847,6 @@ pub const cpu = struct { .clwb, .cmov, .cx16, - .cx8, .fsgsbase, .fxsr, .gfni, @@ -2651,10 +2863,13 @@ pub const cpu = struct { .sahf, .sgx, .sha, + .slow_incdec, + .slow_lea, + .slow_two_mem_ops, .sse4_2, - .ssse3, + .use_glm_div_sqrt_costs, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -2667,15 +2882,15 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, .fxsr, + .macrofusion, .mmx, .nopl, .pclmul, .popcnt, .sahf, - .sse2, .sse4_2, + .vzeroupper, .x87, }), }; @@ -2684,6 +2899,8 @@ pub const cpu = struct { .llvm_name = "winchip-c6", .features = featureSet(&[_]Feature{ .mmx, + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2692,6 +2909,8 @@ pub const cpu = struct { .llvm_name = "winchip2", .features = featureSet(&[_]Feature{ .@"3dnow", + .slow_unaligned_mem_16, + .vzeroupper, .x87, }), }; @@ -2703,9 +2922,14 @@ pub const cpu = struct { .cmov, .cx8, .fxsr, + .idivq_to_divl, + .macrofusion, .mmx, .nopl, + .slow_3ops_lea, + .slow_incdec, .sse2, + .vzeroupper, .x87, }), }; @@ -2716,14 +2940,21 @@ pub const cpu = struct { .@"64bit", .cmov, .cx16, - .cx8, + .false_deps_popcnt, + .fast_15bytenop, + .fast_scalar_fsqrt, + .fast_shld_rotate, .fxsr, + .idivq_to_divl, + .macrofusion, .mmx, .nopl, .popcnt, .sahf, - .sse2, + .slow_3ops_lea, + .slow_unaligned_mem_32, .sse4_2, + .vzeroupper, .x87, }), }; @@ -2737,18 +2968,25 @@ pub const cpu = struct { .bmi2, .cmov, .cx16, - .cx8, .f16c, + .false_deps_lzcnt_tzcnt, + .false_deps_popcnt, + .fast_15bytenop, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, .fma, .fxsr, + .idivq_to_divl, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, .popcnt, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .x87, .xsave, }), @@ -2758,7 +2996,6 @@ pub const cpu = struct { .llvm_name = "x86-64-v4", .features = featureSet(&[_]Feature{ .@"64bit", - .avx2, .avx512bw, .avx512cd, .avx512dq, @@ -2767,18 +3004,25 @@ pub const cpu = struct { .bmi2, .cmov, .cx16, - .cx8, - .f16c, - .fma, + .false_deps_popcnt, + .fast_15bytenop, + .fast_gather, + .fast_scalar_fsqrt, + .fast_shld_rotate, + .fast_variable_shuffle, + .fast_vector_fsqrt, .fxsr, + .idivq_to_divl, .lzcnt, + .macrofusion, .mmx, .movbe, .nopl, .popcnt, + .prefer_256_bit, .sahf, - .sse2, - .sse4_2, + .slow_3ops_lea, + .vzeroupper, .x87, .xsave, }), @@ -2792,7 +3036,9 @@ pub const cpu = struct { .fxsr, .mmx, .nopl, + .slow_unaligned_mem_16, .sse3, + .vzeroupper, .x87, }), }; @@ -2806,11 +3052,16 @@ pub const cpu = struct { .avx2, .bmi, .bmi2, + .branchfusion, .clflushopt, .clzero, .cmov, .cx16, .f16c, + .fast_15bytenop, + .fast_bextr, + .fast_lzcnt, + .fast_scalar_shift_masks, .fma, .fsgsbase, .fxsr, @@ -2826,9 +3077,10 @@ pub const cpu = struct { .rdseed, .sahf, .sha, + .slow_shld, .sse4a, + .vzeroupper, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -2844,12 +3096,17 @@ pub const cpu = struct { .avx2, .bmi, .bmi2, + .branchfusion, .clflushopt, .clwb, .clzero, .cmov, .cx16, .f16c, + .fast_15bytenop, + .fast_bextr, + .fast_lzcnt, + .fast_scalar_shift_masks, .fma, .fsgsbase, .fxsr, @@ -2866,10 +3123,11 @@ pub const cpu = struct { .rdseed, .sahf, .sha, + .slow_shld, .sse4a, + .vzeroupper, .wbnoinvd, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves, @@ -2881,18 +3139,23 @@ pub const cpu = struct { .features = featureSet(&[_]Feature{ .@"64bit", .adx, - .aes, .avx2, .bmi, .bmi2, + .branchfusion, .clflushopt, .clwb, .clzero, .cmov, .cx16, .f16c, + .fast_15bytenop, + .fast_bextr, + .fast_lzcnt, + .fast_scalar_shift_masks, .fma, .fsgsbase, + .fsrm, .fxsr, .invpcid, .lzcnt, @@ -2900,7 +3163,6 @@ pub const cpu = struct { .movbe, .mwaitx, .nopl, - .pclmul, .pku, .popcnt, .prfchw, @@ -2909,12 +3171,13 @@ pub const cpu = struct { .rdseed, .sahf, .sha, + .slow_shld, .sse4a, .vaes, .vpclmulqdq, + .vzeroupper, .wbnoinvd, .x87, - .xsave, .xsavec, .xsaveopt, .xsaves,