mirror of
https://github.com/ziglang/zig.git
synced 2024-11-26 15:12:31 +00:00
Merge branch 'llvm19'
Upgrades the LLVM, Clang, and LLD dependencies to LLVM 19.x Related to #16270 Big thanks to Alex Rønne Petersen for doing the bulk of the upgrade work in this branch.
This commit is contained in:
commit
c6ad4521c7
2
.github/workflows/ci.yaml
vendored
2
.github/workflows/ci.yaml
vendored
@ -4,7 +4,7 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- llvm18
|
||||
- llvm19
|
||||
concurrency:
|
||||
# Cancels pending runs when a PR gets updated.
|
||||
group: ${{ github.head_ref || github.run_id }}-${{ github.actor }}
|
||||
|
@ -137,9 +137,9 @@ else()
|
||||
set(ZIG_SYSTEM_LIBCXX "stdc++" CACHE STRING "system libcxx name for build.zig")
|
||||
endif()
|
||||
|
||||
find_package(llvm 18)
|
||||
find_package(clang 18)
|
||||
find_package(lld 18)
|
||||
find_package(llvm 19)
|
||||
find_package(clang 19)
|
||||
find_package(lld 19)
|
||||
|
||||
if(ZIG_STATIC_ZLIB)
|
||||
if (MSVC)
|
||||
@ -834,6 +834,11 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(ZIG2_NO_RTLIB "Build zig2 without linking to a compiler runtime library (for `zig cc` only)" OFF)
|
||||
if(ZIG2_NO_RTLIB)
|
||||
set(ZIG2_LINK_FLAGS "${ZIG2_LINK_FLAGS} -rtlib=none")
|
||||
endif()
|
||||
|
||||
set(ZIG1_WASM_MODULE "${PROJECT_SOURCE_DIR}/stage1/zig1.wasm")
|
||||
set(ZIG1_C_SOURCE "${PROJECT_BINARY_DIR}/zig1.c")
|
||||
set(ZIG2_C_SOURCE "${PROJECT_BINARY_DIR}/zig2.c")
|
||||
@ -889,9 +894,7 @@ set(BUILD_COMPILER_RT_ARGS
|
||||
--name compiler_rt
|
||||
-femit-bin="${ZIG_COMPILER_RT_C_SOURCE}"
|
||||
-target "${ZIG_HOST_TARGET_TRIPLE}"
|
||||
--dep "build_options"
|
||||
"-Mroot=lib/compiler_rt.zig"
|
||||
"-Mbuild_options=${ZIG_CONFIG_ZIG_OUT}"
|
||||
)
|
||||
|
||||
add_custom_command(
|
||||
|
@ -170,9 +170,7 @@ int main(int argc, char **argv) {
|
||||
"-ofmt=c", "-OReleaseSmall",
|
||||
"--name", "compiler_rt", "-femit-bin=compiler_rt.c",
|
||||
"-target", host_triple,
|
||||
"--dep", "build_options",
|
||||
"-Mroot=lib/compiler_rt.zig",
|
||||
"-Mbuild_options=config.zig",
|
||||
NULL,
|
||||
};
|
||||
print_and_run(child_argv);
|
||||
|
@ -1099,6 +1099,8 @@ const clang_libs = [_][]const u8{
|
||||
"clangToolingCore",
|
||||
"clangExtractAPI",
|
||||
"clangSupport",
|
||||
"clangInstallAPI",
|
||||
"clangAST",
|
||||
};
|
||||
const lld_libs = [_][]const u8{
|
||||
"lldMinGW",
|
||||
@ -1120,6 +1122,7 @@ const llvm_libs = [_][]const u8{
|
||||
"LLVMTextAPIBinaryReader",
|
||||
"LLVMCoverage",
|
||||
"LLVMLineEditor",
|
||||
"LLVMSandboxIR",
|
||||
"LLVMXCoreDisassembler",
|
||||
"LLVMXCoreCodeGen",
|
||||
"LLVMXCoreDesc",
|
||||
@ -1255,6 +1258,7 @@ const llvm_libs = [_][]const u8{
|
||||
"LLVMDWARFLinkerParallel",
|
||||
"LLVMDWARFLinkerClassic",
|
||||
"LLVMDWARFLinker",
|
||||
"LLVMCodeGenData",
|
||||
"LLVMGlobalISel",
|
||||
"LLVMMIRParser",
|
||||
"LLVMAsmPrinter",
|
||||
|
@ -8,7 +8,7 @@ set -e
|
||||
ARCH="$(uname -m)"
|
||||
TARGET="$ARCH-linux-musl"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.418+ebd9efa85"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
|
||||
PREFIX="$HOME/deps/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
|
@ -8,7 +8,7 @@ set -e
|
||||
ARCH="$(uname -m)"
|
||||
TARGET="$ARCH-linux-musl"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.418+ebd9efa85"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
|
||||
PREFIX="$HOME/deps/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
|
@ -9,7 +9,7 @@ set -e
|
||||
ZIGDIR="$PWD"
|
||||
TARGET="$ARCH-macos-none"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
|
||||
PREFIX="$HOME/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
|
@ -9,7 +9,7 @@ set -e
|
||||
ZIGDIR="$PWD"
|
||||
TARGET="$ARCH-macos-none"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
|
||||
PREFIX="$HOME/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
$TARGET = "$($Env:ARCH)-windows-gnu"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
|
||||
$MCPU = "baseline"
|
||||
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
|
||||
$PREFIX_PATH = "$(Get-Location)\..\$ZIG_LLVM_CLANG_LLD_NAME"
|
||||
|
@ -8,7 +8,7 @@ set -e
|
||||
ARCH="$(uname -m)"
|
||||
TARGET="$ARCH-linux-musl"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.418+ebd9efa85"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
|
||||
PREFIX="$HOME/deps/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
|
@ -8,7 +8,7 @@ set -e
|
||||
ARCH="$(uname -m)"
|
||||
TARGET="$ARCH-linux-musl"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.418+ebd9efa85"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
|
||||
PREFIX="$HOME/deps/$CACHE_BASENAME"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
||||
|
@ -6,7 +6,7 @@ set -e
|
||||
ZIGDIR="$PWD"
|
||||
TARGET="$ARCH-macos-none"
|
||||
MCPU="baseline"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
CACHE_BASENAME="zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
|
||||
PREFIX="$HOME/$CACHE_BASENAME"
|
||||
JOBS="-j3"
|
||||
ZIG="$PREFIX/bin/zig"
|
||||
|
@ -1,5 +1,5 @@
|
||||
$TARGET = "$($Env:ARCH)-windows-gnu"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
|
||||
$MCPU = "baseline"
|
||||
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
|
||||
$PREFIX_PATH = "$($Env:USERPROFILE)\$ZIG_LLVM_CLANG_LLD_NAME"
|
||||
|
@ -1,5 +1,5 @@
|
||||
$TARGET = "$($Env:ARCH)-windows-gnu"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.13.0-dev.130+98a30acad"
|
||||
$ZIG_LLVM_CLANG_LLD_NAME = "zig+llvm+lld+clang-$TARGET-0.14.0-dev.1622+2ac543388"
|
||||
$MCPU = "baseline"
|
||||
$ZIG_LLVM_CLANG_LLD_URL = "https://ziglang.org/deps/$ZIG_LLVM_CLANG_LLD_NAME.zip"
|
||||
$PREFIX_PATH = "$($Env:USERPROFILE)\$ZIG_LLVM_CLANG_LLD_NAME"
|
||||
|
@ -17,10 +17,10 @@ find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h
|
||||
if(${LLVM_LINK_MODE} STREQUAL "shared")
|
||||
find_library(CLANG_LIBRARIES
|
||||
NAMES
|
||||
libclang-cpp.so.18
|
||||
libclang-cpp.so.18.1
|
||||
clang-cpp-18.0
|
||||
clang-cpp180
|
||||
libclang-cpp.so.19
|
||||
libclang-cpp.so.19.1
|
||||
clang-cpp-19.0
|
||||
clang-cpp190
|
||||
clang-cpp
|
||||
NAMES_PER_DIR
|
||||
HINTS "${LLVM_LIBDIRS}"
|
||||
@ -68,6 +68,8 @@ else()
|
||||
FIND_AND_ADD_CLANG_LIB(clangToolingCore)
|
||||
FIND_AND_ADD_CLANG_LIB(clangExtractAPI)
|
||||
FIND_AND_ADD_CLANG_LIB(clangSupport)
|
||||
FIND_AND_ADD_CLANG_LIB(clangInstallAPI)
|
||||
FIND_AND_ADD_CLANG_LIB(clangAST)
|
||||
endif()
|
||||
|
||||
if (MSVC)
|
||||
|
@ -9,21 +9,21 @@
|
||||
find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h
|
||||
HINTS ${LLVM_INCLUDE_DIRS}
|
||||
PATHS
|
||||
/usr/lib/llvm-18/include
|
||||
/usr/local/llvm180/include
|
||||
/usr/local/llvm18/include
|
||||
/usr/local/opt/llvm@18/include
|
||||
/opt/homebrew/opt/llvm@18/include
|
||||
/usr/lib/llvm-19/include
|
||||
/usr/local/llvm190/include
|
||||
/usr/local/llvm19/include
|
||||
/usr/local/opt/llvm@19/include
|
||||
/opt/homebrew/opt/llvm@19/include
|
||||
/mingw64/include)
|
||||
|
||||
find_library(LLD_LIBRARY NAMES lld-18.0 lld180 lld NAMES_PER_DIR
|
||||
find_library(LLD_LIBRARY NAMES lld-19.0 lld190 lld NAMES_PER_DIR
|
||||
HINTS ${LLVM_LIBDIRS}
|
||||
PATHS
|
||||
/usr/lib/llvm-18/lib
|
||||
/usr/local/llvm180/lib
|
||||
/usr/local/llvm18/lib
|
||||
/usr/local/opt/llvm@18/lib
|
||||
/opt/homebrew/opt/llvm@18/lib
|
||||
/usr/lib/llvm-19/lib
|
||||
/usr/local/llvm190/lib
|
||||
/usr/local/llvm19/lib
|
||||
/usr/local/opt/llvm@19/lib
|
||||
/opt/homebrew/opt/llvm@19/lib
|
||||
)
|
||||
if(EXISTS ${LLD_LIBRARY})
|
||||
set(LLD_LIBRARIES ${LLD_LIBRARY})
|
||||
@ -34,11 +34,11 @@ else()
|
||||
HINTS ${LLVM_LIBDIRS}
|
||||
PATHS
|
||||
${LLD_LIBDIRS}
|
||||
/usr/lib/llvm-18/lib
|
||||
/usr/local/llvm180/lib
|
||||
/usr/local/llvm18/lib
|
||||
/usr/local/opt/llvm@18/lib
|
||||
/opt/homebrew/opt/llvm@18/lib
|
||||
/usr/lib/llvm-19/lib
|
||||
/usr/local/llvm190/lib
|
||||
/usr/local/llvm19/lib
|
||||
/usr/local/opt/llvm@19/lib
|
||||
/opt/homebrew/opt/llvm@19/lib
|
||||
/mingw64/lib
|
||||
/c/msys64/mingw64/lib
|
||||
c:/msys64/mingw64/lib)
|
||||
|
@ -17,12 +17,12 @@ if(ZIG_USE_LLVM_CONFIG)
|
||||
# terminate when the right LLVM version is not found.
|
||||
unset(LLVM_CONFIG_EXE CACHE)
|
||||
find_program(LLVM_CONFIG_EXE
|
||||
NAMES llvm-config-18 llvm-config-18.0 llvm-config180 llvm-config18 llvm-config NAMES_PER_DIR
|
||||
NAMES llvm-config-19 llvm-config-19.0 llvm-config190 llvm-config19 llvm-config NAMES_PER_DIR
|
||||
PATHS
|
||||
"/mingw64/bin"
|
||||
"/c/msys64/mingw64/bin"
|
||||
"c:/msys64/mingw64/bin"
|
||||
"C:/Libraries/llvm-18.0.0/bin")
|
||||
"C:/Libraries/llvm-19.0.0/bin")
|
||||
|
||||
if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND")
|
||||
if (NOT LLVM_CONFIG_ERROR_MESSAGES STREQUAL "")
|
||||
@ -40,9 +40,9 @@ if(ZIG_USE_LLVM_CONFIG)
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
get_filename_component(LLVM_CONFIG_DIR "${LLVM_CONFIG_EXE}" DIRECTORY)
|
||||
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 18 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 19 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 19)
|
||||
if("${LLVM_CONFIG_VERSION}" VERSION_LESS 19 OR "${LLVM_CONFIG_VERSION}" VERSION_EQUAL 20 OR "${LLVM_CONFIG_VERSION}" VERSION_GREATER 20)
|
||||
# Save the error message, in case this is the last llvm-config we find
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 18.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}")
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "expected LLVM 19.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}")
|
||||
|
||||
# Ignore this directory and try the search again
|
||||
list(APPEND CMAKE_IGNORE_PATH "${LLVM_CONFIG_DIR}")
|
||||
@ -63,12 +63,12 @@ if(ZIG_USE_LLVM_CONFIG)
|
||||
ERROR_VARIABLE LLVM_CONFIG_ERROR
|
||||
ERROR_STRIP_TRAILING_WHITESPACE)
|
||||
|
||||
if (LLVM_CONFIG_ERROR)
|
||||
if (LLVM_CONFIG_ERROR)
|
||||
# Save the error message, in case this is the last llvm-config we find
|
||||
if (ZIG_SHARED_LLVM)
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 18.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library")
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 19.x found at ${LLVM_CONFIG_EXE} does not support linking as a shared library")
|
||||
else()
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 18.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library")
|
||||
list(APPEND LLVM_CONFIG_ERROR_MESSAGES "LLVM 19.x found at ${LLVM_CONFIG_EXE} does not support linking as a static library")
|
||||
endif()
|
||||
|
||||
# Ignore this directory and try the search again
|
||||
@ -200,6 +200,7 @@ else()
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMTextAPIBinaryReader)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMCoverage)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMLineEditor)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMSandboxIR)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMXCoreDisassembler)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMXCoreCodeGen)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMXCoreDesc)
|
||||
@ -335,6 +336,7 @@ else()
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerParallel)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinkerClassic)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinker)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMCodeGenData)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMGlobalISel)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMMIRParser)
|
||||
FIND_AND_ADD_LLVM_LIB(LLVMAsmPrinter)
|
||||
|
1
lib/compiler/aro/aro/target.zig
vendored
1
lib/compiler/aro/aro/target.zig
vendored
@ -658,6 +658,7 @@ pub fn toLLVMTriple(target: std.Target, buf: []u8) []const u8 {
|
||||
.shadermodel => "shadermodel",
|
||||
.visionos => "xros",
|
||||
.serenity => "serenity",
|
||||
.bridgeos => "bridgeos",
|
||||
.opencl,
|
||||
.opengl,
|
||||
.vulkan,
|
||||
|
@ -1,8 +1,14 @@
|
||||
const std = @import("std");
|
||||
const builtin = @import("builtin");
|
||||
const native_endian = builtin.cpu.arch.endian();
|
||||
const ofmt_c = builtin.object_format == .c;
|
||||
|
||||
pub const linkage: std.builtin.GlobalLinkage = if (builtin.is_test) .internal else .weak;
|
||||
pub const linkage: std.builtin.GlobalLinkage = if (builtin.is_test)
|
||||
.internal
|
||||
else if (ofmt_c)
|
||||
.strong
|
||||
else
|
||||
.weak;
|
||||
/// Determines the symbol's visibility to other objects.
|
||||
/// For WebAssembly this allows the symbol to be resolved to other modules, but will not
|
||||
/// export it to the host runtime.
|
||||
@ -28,7 +34,7 @@ pub const want_float_exceptions = !builtin.cpu.arch.isWasm();
|
||||
|
||||
// Libcalls that involve u128 on Windows x86-64 are expected by LLVM to use the
|
||||
// calling convention of @Vector(2, u64), rather than what's standard.
|
||||
pub const want_windows_v2u64_abi = builtin.os.tag == .windows and builtin.cpu.arch == .x86_64 and @import("builtin").object_format != .c;
|
||||
pub const want_windows_v2u64_abi = builtin.os.tag == .windows and builtin.cpu.arch == .x86_64 and !ofmt_c;
|
||||
|
||||
/// This governs whether to use these symbol names for f16/f32 conversions
|
||||
/// rather than the standard names:
|
||||
|
4
lib/include/__clang_cuda_intrinsics.h
vendored
4
lib/include/__clang_cuda_intrinsics.h
vendored
@ -215,9 +215,7 @@ inline __device__ unsigned int __activemask() {
|
||||
#if CUDA_VERSION < 9020
|
||||
return __nvvm_vote_ballot(1);
|
||||
#else
|
||||
unsigned int mask;
|
||||
asm volatile("activemask.b32 %0;" : "=r"(mask));
|
||||
return mask;
|
||||
return __nvvm_activemask();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
12
lib/include/__stdarg_header_macro.h
vendored
Normal file
12
lib/include/__stdarg_header_macro.h
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
/*===---- __stdarg_header_macro.h ------------------------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __STDARG_H
|
||||
#define __STDARG_H
|
||||
#endif
|
12
lib/include/__stddef_header_macro.h
vendored
Normal file
12
lib/include/__stddef_header_macro.h
vendored
Normal file
@ -0,0 +1,12 @@
|
||||
/*===---- __stddef_header_macro.h ------------------------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __STDDEF_H
|
||||
#define __STDDEF_H
|
||||
#endif
|
36
lib/include/arm_acle.h
vendored
36
lib/include/arm_acle.h
vendored
@ -75,6 +75,14 @@ static __inline__ void __attribute__((__always_inline__, __nodebug__)) __yield(v
|
||||
#define __dbg(t) __builtin_arm_dbg(t)
|
||||
#endif
|
||||
|
||||
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
|
||||
#define _CHKFEAT_GCS 1
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
|
||||
__chkfeat(uint64_t __features) {
|
||||
return __builtin_arm_chkfeat(__features) ^ __features;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 7.5 Swap */
|
||||
static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__swp(uint32_t __x, volatile uint32_t *__p) {
|
||||
@ -109,7 +117,7 @@ __swp(uint32_t __x, volatile uint32_t *__p) {
|
||||
#endif
|
||||
|
||||
/* 7.7 NOP */
|
||||
#if !defined(_MSC_VER) || !defined(__aarch64__)
|
||||
#if !defined(_MSC_VER) || (!defined(__aarch64__) && !defined(__arm64ec__))
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__)) __nop(void) {
|
||||
__builtin_arm_nop();
|
||||
}
|
||||
@ -313,7 +321,7 @@ __qdbl(int32_t __t) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 8.4.3 Accumultating multiplications */
|
||||
/* 8.4.3 Accumulating multiplications */
|
||||
#if defined(__ARM_FEATURE_DSP) && __ARM_FEATURE_DSP
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlabb(int32_t __a, int32_t __b, int32_t __c) {
|
||||
@ -545,7 +553,7 @@ __usub16(uint16x2_t __a, uint16x2_t __b) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 8.5.10 Parallel 16-bit multiplications */
|
||||
/* 8.5.10 Parallel 16-bit multiplication */
|
||||
#if defined(__ARM_FEATURE_SIMD32) && __ARM_FEATURE_SIMD32
|
||||
static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
|
||||
__smlad(int16x2_t __a, int16x2_t __b, int32_t __c) {
|
||||
@ -748,7 +756,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
|
||||
#define __arm_wsrf(sysreg, v) __arm_wsr(sysreg, __builtin_bit_cast(uint32_t, v))
|
||||
#define __arm_wsrf64(sysreg, v) __arm_wsr64(sysreg, __builtin_bit_cast(uint64_t, v))
|
||||
|
||||
/* 10.3 Memory Tagging Extensions (MTE) Intrinsics */
|
||||
/* 10.3 MTE intrinsics */
|
||||
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
|
||||
#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask)
|
||||
#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset)
|
||||
@ -757,7 +765,7 @@ __arm_st64bv0(void *__addr, data512_t __value) {
|
||||
#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr)
|
||||
#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb)
|
||||
|
||||
/* 18 Memory Operations Intrinsics */
|
||||
/* 18 memcpy family of operations intrinsics - MOPS */
|
||||
#define __arm_mops_memset_tag(__tagged_address, __value, __size) \
|
||||
__builtin_arm_mops_memset_tag(__tagged_address, __value, __size)
|
||||
#endif
|
||||
@ -855,6 +863,24 @@ __rndrrs(uint64_t *__p) {
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 11.2 Guarded Control Stack intrinsics */
|
||||
#if defined(__ARM_64BIT_STATE) && __ARM_64BIT_STATE
|
||||
static __inline__ void * __attribute__((__always_inline__, __nodebug__))
|
||||
__gcspr() {
|
||||
return (void *)__builtin_arm_rsr64("gcspr_el0");
|
||||
}
|
||||
|
||||
static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__, target("gcs")))
|
||||
__gcspopm() {
|
||||
return __builtin_arm_gcspopm(0);
|
||||
}
|
||||
|
||||
static __inline__ const void * __attribute__((__always_inline__, __nodebug__, target("gcs")))
|
||||
__gcsss(const void *__stack) {
|
||||
return __builtin_arm_gcsss(__stack);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
2
lib/include/arm_fp16.h
vendored
2
lib/include/arm_fp16.h
vendored
@ -29,7 +29,7 @@
|
||||
typedef __fp16 float16_t;
|
||||
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#if defined(__aarch64__) || defined(__arm64ec__)
|
||||
#define vabdh_f16(__p0, __p1) __extension__ ({ \
|
||||
float16_t __ret; \
|
||||
float16_t __s0 = __p0; \
|
||||
|
39667
lib/include/arm_neon.h
vendored
39667
lib/include/arm_neon.h
vendored
File diff suppressed because it is too large
Load Diff
409
lib/include/arm_sme.h
vendored
409
lib/include/arm_sme.h
vendored
@ -16,6 +16,8 @@
|
||||
#endif
|
||||
#include <arm_sve.h>
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
/* Function attributes */
|
||||
#define __ai static __inline__ __attribute__((__always_inline__, __nodebug__))
|
||||
|
||||
@ -39,6 +41,11 @@ __ai bool __arm_in_streaming_mode(void) __arm_streaming_compatible {
|
||||
return x0 & 1;
|
||||
}
|
||||
|
||||
void *__arm_sc_memcpy(void *dest, const void *src, size_t n) __arm_streaming_compatible;
|
||||
void *__arm_sc_memmove(void *dest, const void *src, size_t n) __arm_streaming_compatible;
|
||||
void *__arm_sc_memset(void *s, int c, size_t n) __arm_streaming_compatible;
|
||||
void *__arm_sc_memchr(void *s, int c, size_t n) __arm_streaming_compatible;
|
||||
|
||||
__ai __attribute__((target("sme"))) void svundef_za(void) __arm_streaming_compatible __arm_out("za") { }
|
||||
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m)))
|
||||
@ -368,7 +375,7 @@ void svwrite_ver_za8_s8_m(uint64_t, uint32_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_mask_za)))
|
||||
void svzero_mask_za(uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za)))
|
||||
void svzero_za();
|
||||
void svzero_za(void);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_u32_m)))
|
||||
void svaddha_za32_m(uint64_t, svbool_t, svbool_t, svuint32_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svaddha_za32_s32_m)))
|
||||
@ -597,6 +604,78 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_u8_
|
||||
void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svuint8_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_ver_za8_s8_m)))
|
||||
void svwrite_ver_za8_m(uint64_t, uint32_t, svbool_t, svint8_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_f16_vg1x2)))
|
||||
void svmla_single_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_f16_vg1x4)))
|
||||
void svmla_single_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_f16_vg1x2)))
|
||||
void svmla_lane_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_f16_vg1x4)))
|
||||
void svmla_lane_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_f16_vg1x2)))
|
||||
void svmla_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_f16_vg1x4)))
|
||||
void svmla_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_f16_vg1x2)))
|
||||
void svmls_single_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_f16_vg1x4)))
|
||||
void svmls_single_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_f16_vg1x2)))
|
||||
void svmls_lane_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_f16_vg1x4)))
|
||||
void svmls_lane_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_f16_vg1x2)))
|
||||
void svmls_za16_f16_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_f16_vg1x4)))
|
||||
void svmls_za16_f16_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_f16_m)))
|
||||
void svmopa_za16_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_f16_m)))
|
||||
void svmops_za16_f16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_f16_vg1x2)))
|
||||
void svmla_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_f16_vg1x4)))
|
||||
void svmla_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_f16_vg1x2)))
|
||||
void svmla_lane_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_f16_vg1x4)))
|
||||
void svmla_lane_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_f16_vg1x2)))
|
||||
void svmla_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_f16_vg1x4)))
|
||||
void svmla_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_f16_vg1x2)))
|
||||
void svmls_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_f16_vg1x4)))
|
||||
void svmls_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_f16_vg1x2)))
|
||||
void svmls_lane_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_f16_vg1x4)))
|
||||
void svmls_lane_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_f16_vg1x2)))
|
||||
void svmls_za16_vg1x2(uint32_t, svfloat16x2_t, svfloat16x2_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_f16_vg1x4)))
|
||||
void svmls_za16_vg1x4(uint32_t, svfloat16x4_t, svfloat16x4_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_f16_m)))
|
||||
void svmopa_za16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_f16_m)))
|
||||
void svmops_za16_m(uint64_t, svbool_t, svbool_t, svfloat16_t, svfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x2)))
|
||||
void svadd_za16_f16_vg1x2(uint32_t, svfloat16x2_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x4)))
|
||||
void svadd_za16_f16_vg1x4(uint32_t, svfloat16x4_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x2)))
|
||||
void svsub_za16_f16_vg1x2(uint32_t, svfloat16x2_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x4)))
|
||||
void svsub_za16_f16_vg1x4(uint32_t, svfloat16x4_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x2)))
|
||||
void svadd_za16_vg1x2(uint32_t, svfloat16x2_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_f16_vg1x4)))
|
||||
void svadd_za16_vg1x4(uint32_t, svfloat16x4_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x2)))
|
||||
void svsub_za16_vg1x2(uint32_t, svfloat16x2_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_f16_vg1x4)))
|
||||
void svsub_za16_vg1x4(uint32_t, svfloat16x4_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za64_f64_m)))
|
||||
void svmopa_za64_f64_m(uint64_t, svbool_t, svbool_t, svfloat64_t, svfloat64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za64_f64_m)))
|
||||
@ -2059,6 +2138,78 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_u8_vg1x
|
||||
void svwrite_za8_vg1x4(uint32_t, svuint8x4_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svwrite_za8_s8_vg1x4)))
|
||||
void svwrite_za8_vg1x4(uint32_t, svint8x4_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x2)))
|
||||
void svadd_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x4)))
|
||||
void svadd_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x2)))
|
||||
void svmla_single_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x4)))
|
||||
void svmla_single_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x2)))
|
||||
void svmla_lane_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x4)))
|
||||
void svmla_lane_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x2)))
|
||||
void svmla_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x4)))
|
||||
void svmla_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x2)))
|
||||
void svmls_single_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x4)))
|
||||
void svmls_single_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x2)))
|
||||
void svmls_lane_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x4)))
|
||||
void svmls_lane_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x2)))
|
||||
void svmls_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x4)))
|
||||
void svmls_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_bf16_m)))
|
||||
void svmopa_za16_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_bf16_m)))
|
||||
void svmops_za16_bf16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x2)))
|
||||
void svsub_za16_bf16_vg1x2(uint32_t, svbfloat16x2_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x4)))
|
||||
void svsub_za16_bf16_vg1x4(uint32_t, svbfloat16x4_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x2)))
|
||||
void svadd_za16_vg1x2(uint32_t, svbfloat16x2_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za16_bf16_vg1x4)))
|
||||
void svadd_za16_vg1x4(uint32_t, svbfloat16x4_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x2)))
|
||||
void svmla_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_single_za16_bf16_vg1x4)))
|
||||
void svmla_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x2)))
|
||||
void svmla_lane_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_lane_za16_bf16_vg1x4)))
|
||||
void svmla_lane_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x2)))
|
||||
void svmla_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmla_za16_bf16_vg1x4)))
|
||||
void svmla_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x2)))
|
||||
void svmls_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_single_za16_bf16_vg1x4)))
|
||||
void svmls_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x2)))
|
||||
void svmls_lane_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_lane_za16_bf16_vg1x4)))
|
||||
void svmls_lane_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x2)))
|
||||
void svmls_za16_vg1x2(uint32_t, svbfloat16x2_t, svbfloat16x2_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmls_za16_bf16_vg1x4)))
|
||||
void svmls_za16_vg1x4(uint32_t, svbfloat16x4_t, svbfloat16x4_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmopa_za16_bf16_m)))
|
||||
void svmopa_za16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svmops_za16_bf16_m)))
|
||||
void svmops_za16_m(uint64_t, svbool_t, svbool_t, svbfloat16_t, svbfloat16_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x2)))
|
||||
void svsub_za16_vg1x2(uint32_t, svbfloat16x2_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svsub_za16_bf16_vg1x4)))
|
||||
void svsub_za16_vg1x4(uint32_t, svbfloat16x4_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x2)))
|
||||
void svadd_za64_f64_vg1x2(uint32_t, svfloat64x2_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svadd_za64_f64_vg1x4)))
|
||||
@ -2403,6 +2554,262 @@ __aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_s1
|
||||
void svvdot_lane_za64_vg1x4(uint32_t, svint16x4_t, svint16_t, uint64_t);
|
||||
__aio __attribute__((__clang_arm_builtin_alias(__builtin_sme_svvdot_lane_za64_u16_vg1x4)))
|
||||
void svvdot_lane_za64_vg1x4(uint32_t, svuint16x4_t, svuint16_t, uint64_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_u8)))
|
||||
svuint8_t svreadz_hor_za128_u8(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_u32)))
|
||||
svuint32_t svreadz_hor_za128_u32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_u64)))
|
||||
svuint64_t svreadz_hor_za128_u64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_u16)))
|
||||
svuint16_t svreadz_hor_za128_u16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_bf16)))
|
||||
svbfloat16_t svreadz_hor_za128_bf16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_s8)))
|
||||
svint8_t svreadz_hor_za128_s8(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_f64)))
|
||||
svfloat64_t svreadz_hor_za128_f64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_f32)))
|
||||
svfloat32_t svreadz_hor_za128_f32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_f16)))
|
||||
svfloat16_t svreadz_hor_za128_f16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_s32)))
|
||||
svint32_t svreadz_hor_za128_s32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_s64)))
|
||||
svint64_t svreadz_hor_za128_s64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za128_s16)))
|
||||
svint16_t svreadz_hor_za128_s16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_u16)))
|
||||
svuint16_t svreadz_hor_za16_u16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_bf16)))
|
||||
svbfloat16_t svreadz_hor_za16_bf16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_f16)))
|
||||
svfloat16_t svreadz_hor_za16_f16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_s16)))
|
||||
svint16_t svreadz_hor_za16_s16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_u16_vg2)))
|
||||
svuint16x2_t svreadz_hor_za16_u16_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_bf16_vg2)))
|
||||
svbfloat16x2_t svreadz_hor_za16_bf16_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_f16_vg2)))
|
||||
svfloat16x2_t svreadz_hor_za16_f16_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_s16_vg2)))
|
||||
svint16x2_t svreadz_hor_za16_s16_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_u16_vg4)))
|
||||
svuint16x4_t svreadz_hor_za16_u16_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_bf16_vg4)))
|
||||
svbfloat16x4_t svreadz_hor_za16_bf16_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_f16_vg4)))
|
||||
svfloat16x4_t svreadz_hor_za16_f16_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za16_s16_vg4)))
|
||||
svint16x4_t svreadz_hor_za16_s16_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_u32)))
|
||||
svuint32_t svreadz_hor_za32_u32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_f32)))
|
||||
svfloat32_t svreadz_hor_za32_f32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_s32)))
|
||||
svint32_t svreadz_hor_za32_s32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_u32_vg2)))
|
||||
svuint32x2_t svreadz_hor_za32_u32_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_f32_vg2)))
|
||||
svfloat32x2_t svreadz_hor_za32_f32_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_s32_vg2)))
|
||||
svint32x2_t svreadz_hor_za32_s32_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_u32_vg4)))
|
||||
svuint32x4_t svreadz_hor_za32_u32_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_f32_vg4)))
|
||||
svfloat32x4_t svreadz_hor_za32_f32_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za32_s32_vg4)))
|
||||
svint32x4_t svreadz_hor_za32_s32_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_u64)))
|
||||
svuint64_t svreadz_hor_za64_u64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_f64)))
|
||||
svfloat64_t svreadz_hor_za64_f64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_s64)))
|
||||
svint64_t svreadz_hor_za64_s64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_u64_vg2)))
|
||||
svuint64x2_t svreadz_hor_za64_u64_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_f64_vg2)))
|
||||
svfloat64x2_t svreadz_hor_za64_f64_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_s64_vg2)))
|
||||
svint64x2_t svreadz_hor_za64_s64_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_u64_vg4)))
|
||||
svuint64x4_t svreadz_hor_za64_u64_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_f64_vg4)))
|
||||
svfloat64x4_t svreadz_hor_za64_f64_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za64_s64_vg4)))
|
||||
svint64x4_t svreadz_hor_za64_s64_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_u8)))
|
||||
svuint8_t svreadz_hor_za8_u8(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_s8)))
|
||||
svint8_t svreadz_hor_za8_s8(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_u8_vg2)))
|
||||
svuint8x2_t svreadz_hor_za8_u8_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_s8_vg2)))
|
||||
svint8x2_t svreadz_hor_za8_s8_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_u8_vg4)))
|
||||
svuint8x4_t svreadz_hor_za8_u8_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_hor_za8_s8_vg4)))
|
||||
svint8x4_t svreadz_hor_za8_s8_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_u8)))
|
||||
svuint8_t svreadz_ver_za128_u8(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_u32)))
|
||||
svuint32_t svreadz_ver_za128_u32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_u64)))
|
||||
svuint64_t svreadz_ver_za128_u64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_u16)))
|
||||
svuint16_t svreadz_ver_za128_u16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_bf16)))
|
||||
svbfloat16_t svreadz_ver_za128_bf16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_s8)))
|
||||
svint8_t svreadz_ver_za128_s8(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_f64)))
|
||||
svfloat64_t svreadz_ver_za128_f64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_f32)))
|
||||
svfloat32_t svreadz_ver_za128_f32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_f16)))
|
||||
svfloat16_t svreadz_ver_za128_f16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_s32)))
|
||||
svint32_t svreadz_ver_za128_s32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_s64)))
|
||||
svint64_t svreadz_ver_za128_s64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za128_s16)))
|
||||
svint16_t svreadz_ver_za128_s16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_u16)))
|
||||
svuint16_t svreadz_ver_za16_u16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_bf16)))
|
||||
svbfloat16_t svreadz_ver_za16_bf16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_f16)))
|
||||
svfloat16_t svreadz_ver_za16_f16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_s16)))
|
||||
svint16_t svreadz_ver_za16_s16(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_u16_vg2)))
|
||||
svuint16x2_t svreadz_ver_za16_u16_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_bf16_vg2)))
|
||||
svbfloat16x2_t svreadz_ver_za16_bf16_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_f16_vg2)))
|
||||
svfloat16x2_t svreadz_ver_za16_f16_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_s16_vg2)))
|
||||
svint16x2_t svreadz_ver_za16_s16_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_u16_vg4)))
|
||||
svuint16x4_t svreadz_ver_za16_u16_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_bf16_vg4)))
|
||||
svbfloat16x4_t svreadz_ver_za16_bf16_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_f16_vg4)))
|
||||
svfloat16x4_t svreadz_ver_za16_f16_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za16_s16_vg4)))
|
||||
svint16x4_t svreadz_ver_za16_s16_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_u32)))
|
||||
svuint32_t svreadz_ver_za32_u32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_f32)))
|
||||
svfloat32_t svreadz_ver_za32_f32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_s32)))
|
||||
svint32_t svreadz_ver_za32_s32(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_u32_vg2)))
|
||||
svuint32x2_t svreadz_ver_za32_u32_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_f32_vg2)))
|
||||
svfloat32x2_t svreadz_ver_za32_f32_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_s32_vg2)))
|
||||
svint32x2_t svreadz_ver_za32_s32_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_u32_vg4)))
|
||||
svuint32x4_t svreadz_ver_za32_u32_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_f32_vg4)))
|
||||
svfloat32x4_t svreadz_ver_za32_f32_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za32_s32_vg4)))
|
||||
svint32x4_t svreadz_ver_za32_s32_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_u64)))
|
||||
svuint64_t svreadz_ver_za64_u64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_f64)))
|
||||
svfloat64_t svreadz_ver_za64_f64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_s64)))
|
||||
svint64_t svreadz_ver_za64_s64(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_u64_vg2)))
|
||||
svuint64x2_t svreadz_ver_za64_u64_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_f64_vg2)))
|
||||
svfloat64x2_t svreadz_ver_za64_f64_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_s64_vg2)))
|
||||
svint64x2_t svreadz_ver_za64_s64_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_u64_vg4)))
|
||||
svuint64x4_t svreadz_ver_za64_u64_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_f64_vg4)))
|
||||
svfloat64x4_t svreadz_ver_za64_f64_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za64_s64_vg4)))
|
||||
svint64x4_t svreadz_ver_za64_s64_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_u8)))
|
||||
svuint8_t svreadz_ver_za8_u8(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_s8)))
|
||||
svint8_t svreadz_ver_za8_s8(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_u8_vg2)))
|
||||
svuint8x2_t svreadz_ver_za8_u8_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_s8_vg2)))
|
||||
svint8x2_t svreadz_ver_za8_s8_vg2(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_u8_vg4)))
|
||||
svuint8x4_t svreadz_ver_za8_u8_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_ver_za8_s8_vg4)))
|
||||
svint8x4_t svreadz_ver_za8_s8_vg4(uint64_t, uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_u16_vg1x2)))
|
||||
svuint16x2_t svreadz_za16_u16_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_bf16_vg1x2)))
|
||||
svbfloat16x2_t svreadz_za16_bf16_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_f16_vg1x2)))
|
||||
svfloat16x2_t svreadz_za16_f16_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_s16_vg1x2)))
|
||||
svint16x2_t svreadz_za16_s16_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_u16_vg1x4)))
|
||||
svuint16x4_t svreadz_za16_u16_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_bf16_vg1x4)))
|
||||
svbfloat16x4_t svreadz_za16_bf16_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_f16_vg1x4)))
|
||||
svfloat16x4_t svreadz_za16_f16_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za16_s16_vg1x4)))
|
||||
svint16x4_t svreadz_za16_s16_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_u32_vg1x2)))
|
||||
svuint32x2_t svreadz_za32_u32_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_f32_vg1x2)))
|
||||
svfloat32x2_t svreadz_za32_f32_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_s32_vg1x2)))
|
||||
svint32x2_t svreadz_za32_s32_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_u32_vg1x4)))
|
||||
svuint32x4_t svreadz_za32_u32_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_f32_vg1x4)))
|
||||
svfloat32x4_t svreadz_za32_f32_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za32_s32_vg1x4)))
|
||||
svint32x4_t svreadz_za32_s32_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_u64_vg1x2)))
|
||||
svuint64x2_t svreadz_za64_u64_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_f64_vg1x2)))
|
||||
svfloat64x2_t svreadz_za64_f64_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_s64_vg1x2)))
|
||||
svint64x2_t svreadz_za64_s64_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_u64_vg1x4)))
|
||||
svuint64x4_t svreadz_za64_u64_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_f64_vg1x4)))
|
||||
svfloat64x4_t svreadz_za64_f64_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za64_s64_vg1x4)))
|
||||
svint64x4_t svreadz_za64_s64_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_u8_vg1x2)))
|
||||
svuint8x2_t svreadz_za8_u8_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_s8_vg1x2)))
|
||||
svint8x2_t svreadz_za8_s8_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_u8_vg1x4)))
|
||||
svuint8x4_t svreadz_za8_u8_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svreadz_za8_s8_vg1x4)))
|
||||
svint8x4_t svreadz_za8_s8_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg1x2)))
|
||||
void svzero_za64_vg1x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg1x4)))
|
||||
void svzero_za64_vg1x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg2x1)))
|
||||
void svzero_za64_vg2x1(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg2x2)))
|
||||
void svzero_za64_vg2x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg2x4)))
|
||||
void svzero_za64_vg2x4(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg4x1)))
|
||||
void svzero_za64_vg4x1(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg4x2)))
|
||||
void svzero_za64_vg4x2(uint32_t);
|
||||
__ai __attribute__((__clang_arm_builtin_alias(__builtin_sme_svzero_za64_vg4x4)))
|
||||
void svzero_za64_vg4x4(uint32_t);
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
33276
lib/include/arm_sve.h
vendored
33276
lib/include/arm_sve.h
vendored
File diff suppressed because it is too large
Load Diff
10
lib/include/arm_vector_types.h
vendored
10
lib/include/arm_vector_types.h
vendored
@ -16,7 +16,7 @@
|
||||
#define __ARM_NEON_TYPES_H
|
||||
typedef float float32_t;
|
||||
typedef __fp16 float16_t;
|
||||
#ifdef __aarch64__
|
||||
#if defined(__aarch64__) || defined(__arm64ec__)
|
||||
typedef double float64_t;
|
||||
#endif
|
||||
|
||||
@ -40,7 +40,7 @@ typedef __attribute__((neon_vector_type(4))) float16_t float16x4_t;
|
||||
typedef __attribute__((neon_vector_type(8))) float16_t float16x8_t;
|
||||
typedef __attribute__((neon_vector_type(2))) float32_t float32x2_t;
|
||||
typedef __attribute__((neon_vector_type(4))) float32_t float32x4_t;
|
||||
#ifdef __aarch64__
|
||||
#if defined(__aarch64__) || defined(__arm64ec__)
|
||||
typedef __attribute__((neon_vector_type(1))) float64_t float64x1_t;
|
||||
typedef __attribute__((neon_vector_type(2))) float64_t float64x2_t;
|
||||
#endif
|
||||
@ -125,7 +125,7 @@ typedef struct float32x4x2_t {
|
||||
float32x4_t val[2];
|
||||
} float32x4x2_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
#if defined(__aarch64__) || defined(__arm64ec__)
|
||||
typedef struct float64x1x2_t {
|
||||
float64x1_t val[2];
|
||||
} float64x1x2_t;
|
||||
@ -215,7 +215,7 @@ typedef struct float32x4x3_t {
|
||||
float32x4_t val[3];
|
||||
} float32x4x3_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
#if defined(__aarch64__) || defined(__arm64ec__)
|
||||
typedef struct float64x1x3_t {
|
||||
float64x1_t val[3];
|
||||
} float64x1x3_t;
|
||||
@ -305,7 +305,7 @@ typedef struct float32x4x4_t {
|
||||
float32x4_t val[4];
|
||||
} float32x4x4_t;
|
||||
|
||||
#ifdef __aarch64__
|
||||
#if defined(__aarch64__) || defined(__arm64ec__)
|
||||
typedef struct float64x1x4_t {
|
||||
float64x1_t val[4];
|
||||
} float64x1x4_t;
|
||||
|
271
lib/include/avx512erintrin.h
vendored
271
lib/include/avx512erintrin.h
vendored
@ -1,271 +0,0 @@
|
||||
/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512ERINTRIN_H
|
||||
#define __AVX512ERINTRIN_H
|
||||
|
||||
/* exp2a23 */
|
||||
#define _mm512_exp2a23_round_pd(A, R) \
|
||||
((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)-1, (int)(R)))
|
||||
|
||||
#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
|
||||
((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)(__m512d)(S), (__mmask8)(M), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
|
||||
((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)(M), (int)(R)))
|
||||
|
||||
#define _mm512_exp2a23_pd(A) \
|
||||
_mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_exp2a23_pd(S, M, A) \
|
||||
_mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_exp2a23_pd(M, A) \
|
||||
_mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_exp2a23_round_ps(A, R) \
|
||||
((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)-1, (int)(R)))
|
||||
|
||||
#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
|
||||
((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)(__m512)(S), (__mmask16)(M), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
|
||||
((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)(M), (int)(R)))
|
||||
|
||||
#define _mm512_exp2a23_ps(A) \
|
||||
_mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_exp2a23_ps(S, M, A) \
|
||||
_mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_exp2a23_ps(M, A) \
|
||||
_mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
/* rsqrt28 */
|
||||
#define _mm512_rsqrt28_round_pd(A, R) \
|
||||
((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)-1, (int)(R)))
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
|
||||
((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)(__m512d)(S), (__mmask8)(M), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
|
||||
((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)(M), (int)(R)))
|
||||
|
||||
#define _mm512_rsqrt28_pd(A) \
|
||||
_mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rsqrt28_pd(S, M, A) \
|
||||
_mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_pd(M, A) \
|
||||
_mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rsqrt28_round_ps(A, R) \
|
||||
((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)-1, (int)(R)))
|
||||
|
||||
#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
|
||||
((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)(__m512)(S), (__mmask16)(M), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
|
||||
((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)(M), (int)(R)))
|
||||
|
||||
#define _mm512_rsqrt28_ps(A) \
|
||||
_mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rsqrt28_ps(S, M, A) \
|
||||
_mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rsqrt28_ps(M, A) \
|
||||
_mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_round_ss(A, B, R) \
|
||||
((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)-1, (int)(R)))
|
||||
|
||||
#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
|
||||
((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)(__m128)(S), \
|
||||
(__mmask8)(M), (int)(R)))
|
||||
|
||||
#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
|
||||
((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)(M), (int)(R)))
|
||||
|
||||
#define _mm_rsqrt28_ss(A, B) \
|
||||
_mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rsqrt28_ss(S, M, A, B) \
|
||||
_mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rsqrt28_ss(M, A, B) \
|
||||
_mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rsqrt28_round_sd(A, B, R) \
|
||||
((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)-1, (int)(R)))
|
||||
|
||||
#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
|
||||
((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)(__m128d)(S), \
|
||||
(__mmask8)(M), (int)(R)))
|
||||
|
||||
#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
|
||||
((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)(M), (int)(R)))
|
||||
|
||||
#define _mm_rsqrt28_sd(A, B) \
|
||||
_mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rsqrt28_sd(S, M, A, B) \
|
||||
_mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rsqrt28_sd(M, A, B) \
|
||||
_mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
/* rcp28 */
|
||||
#define _mm512_rcp28_round_pd(A, R) \
|
||||
((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)-1, (int)(R)))
|
||||
|
||||
#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
|
||||
((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)(__m512d)(S), (__mmask8)(M), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_maskz_rcp28_round_pd(M, A, R) \
|
||||
((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
|
||||
(__v8df)_mm512_setzero_pd(), \
|
||||
(__mmask8)(M), (int)(R)))
|
||||
|
||||
#define _mm512_rcp28_pd(A) \
|
||||
_mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_pd(S, M, A) \
|
||||
_mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rcp28_pd(M, A) \
|
||||
_mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_rcp28_round_ps(A, R) \
|
||||
((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)-1, (int)(R)))
|
||||
|
||||
#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
|
||||
((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)(__m512)(S), (__mmask16)(M), \
|
||||
(int)(R)))
|
||||
|
||||
#define _mm512_maskz_rcp28_round_ps(M, A, R) \
|
||||
((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
|
||||
(__v16sf)_mm512_setzero_ps(), \
|
||||
(__mmask16)(M), (int)(R)))
|
||||
|
||||
#define _mm512_rcp28_ps(A) \
|
||||
_mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_mask_rcp28_ps(S, M, A) \
|
||||
_mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm512_maskz_rcp28_ps(M, A) \
|
||||
_mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_round_ss(A, B, R) \
|
||||
((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)-1, (int)(R)))
|
||||
|
||||
#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
|
||||
((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)(__m128)(S), \
|
||||
(__mmask8)(M), (int)(R)))
|
||||
|
||||
#define _mm_maskz_rcp28_round_ss(M, A, B, R) \
|
||||
((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
|
||||
(__v4sf)(__m128)(B), \
|
||||
(__v4sf)_mm_setzero_ps(), \
|
||||
(__mmask8)(M), (int)(R)))
|
||||
|
||||
#define _mm_rcp28_ss(A, B) \
|
||||
_mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rcp28_ss(S, M, A, B) \
|
||||
_mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rcp28_ss(M, A, B) \
|
||||
_mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_rcp28_round_sd(A, B, R) \
|
||||
((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)-1, (int)(R)))
|
||||
|
||||
#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
|
||||
((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)(__m128d)(S), \
|
||||
(__mmask8)(M), (int)(R)))
|
||||
|
||||
#define _mm_maskz_rcp28_round_sd(M, A, B, R) \
|
||||
((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
|
||||
(__v2df)(__m128d)(B), \
|
||||
(__v2df)_mm_setzero_pd(), \
|
||||
(__mmask8)(M), (int)(R)))
|
||||
|
||||
#define _mm_rcp28_sd(A, B) \
|
||||
_mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_mask_rcp28_sd(S, M, A, B) \
|
||||
_mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#define _mm_maskz_rcp28_sd(M, A, B) \
|
||||
_mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
#endif /* __AVX512ERINTRIN_H */
|
76
lib/include/avx512fp16intrin.h
vendored
76
lib/include/avx512fp16intrin.h
vendored
@ -96,8 +96,8 @@ _mm512_set_ph(_Float16 __h1, _Float16 __h2, _Float16 __h3, _Float16 __h4,
|
||||
(h5), (h4), (h3), (h2), (h1))
|
||||
|
||||
static __inline __m512h __DEFAULT_FN_ATTRS512
|
||||
_mm512_set1_pch(_Float16 _Complex h) {
|
||||
return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, h));
|
||||
_mm512_set1_pch(_Float16 _Complex __h) {
|
||||
return (__m512h)_mm512_set1_ps(__builtin_bit_cast(float, __h));
|
||||
}
|
||||
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_castph_ps(__m128h __a) {
|
||||
@ -282,75 +282,75 @@ _mm512_zextph256_ph512(__m256h __a) {
|
||||
#define _mm_comi_sh(A, B, pred) \
|
||||
_mm_comi_round_sh((A), (B), (pred), _MM_FROUND_CUR_DIRECTION)
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OS,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comieq_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OS,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OS,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comilt_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OS,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OS,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comile_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OS,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OS,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comigt_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OS,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OS,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comige_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OS,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_US,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_comineq_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_US,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_EQ_OQ,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomieq_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_EQ_OQ,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LT_OQ,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomilt_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LT_OQ,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_LE_OQ,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomile_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_LE_OQ,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GT_OQ,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomigt_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GT_OQ,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_GE_OQ,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomige_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_GE_OQ,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h A,
|
||||
__m128h B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)A, (__v8hf)B, _CMP_NEQ_UQ,
|
||||
static __inline__ int __DEFAULT_FN_ATTRS128 _mm_ucomineq_sh(__m128h __A,
|
||||
__m128h __B) {
|
||||
return __builtin_ia32_vcomish((__v8hf)__A, (__v8hf)__B, _CMP_NEQ_UQ,
|
||||
_MM_FROUND_CUR_DIRECTION);
|
||||
}
|
||||
|
||||
|
92
lib/include/avx512pfintrin.h
vendored
92
lib/include/avx512pfintrin.h
vendored
@ -1,92 +0,0 @@
|
||||
/*===------------- avx512pfintrin.h - PF intrinsics ------------------------===
|
||||
*
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
#ifndef __IMMINTRIN_H
|
||||
#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __AVX512PFINTRIN_H
|
||||
#define __AVX512PFINTRIN_H
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), (void const *)(addr), \
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfdps((__mmask16) -1, \
|
||||
(__v16si)(__m512i)(index), (void const *)(addr), \
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
|
||||
(void const *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(void const *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
|
||||
__builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
|
||||
(void const *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
|
||||
(void *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfdps((__mmask16)(mask), \
|
||||
(__v16si)(__m512i)(index), (void *)(addr), \
|
||||
(int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(void *)(addr), (int)(scale), \
|
||||
(int)(hint))
|
||||
|
||||
#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
|
||||
(void *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
|
||||
__builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
|
||||
(void *)(addr), (int)(scale), (int)(hint))
|
||||
|
||||
#endif
|
102
lib/include/avxintrin.h
vendored
102
lib/include/avxintrin.h
vendored
@ -207,6 +207,8 @@ _mm256_div_ps(__m256 __a, __m256 __b)
|
||||
/// Compares two 256-bit vectors of [4 x double] and returns the greater
|
||||
/// of each pair of values.
|
||||
///
|
||||
/// If either value in a comparison is NaN, returns the value from \a __b.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMAXPD </c> instruction.
|
||||
@ -226,6 +228,8 @@ _mm256_max_pd(__m256d __a, __m256d __b)
|
||||
/// Compares two 256-bit vectors of [8 x float] and returns the greater
|
||||
/// of each pair of values.
|
||||
///
|
||||
/// If either value in a comparison is NaN, returns the value from \a __b.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMAXPS </c> instruction.
|
||||
@ -245,6 +249,8 @@ _mm256_max_ps(__m256 __a, __m256 __b)
|
||||
/// Compares two 256-bit vectors of [4 x double] and returns the lesser
|
||||
/// of each pair of values.
|
||||
///
|
||||
/// If either value in a comparison is NaN, returns the value from \a __b.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMINPD </c> instruction.
|
||||
@ -264,6 +270,8 @@ _mm256_min_pd(__m256d __a, __m256d __b)
|
||||
/// Compares two 256-bit vectors of [8 x float] and returns the lesser
|
||||
/// of each pair of values.
|
||||
///
|
||||
/// If either value in a comparison is NaN, returns the value from \a __b.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMINPS </c> instruction.
|
||||
@ -832,6 +840,7 @@ _mm256_permutevar_pd(__m256d __a, __m256i __c)
|
||||
|
||||
/// Copies the values stored in a 128-bit vector of [4 x float] as
|
||||
/// specified by the 128-bit integer vector operand.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPERMILPS </c> instruction.
|
||||
@ -1574,14 +1583,6 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
||||
(__v4df)(__m256d)(b), (int)(mask)))
|
||||
|
||||
/* Compare */
|
||||
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
|
||||
#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
|
||||
#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
|
||||
#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
|
||||
#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
|
||||
#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
|
||||
#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
|
||||
#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */
|
||||
#define _CMP_EQ_UQ 0x08 /* Equal (unordered, non-signaling) */
|
||||
#define _CMP_NGE_US 0x09 /* Not-greater-than-or-equal (unordered, signaling) */
|
||||
#define _CMP_NGT_US 0x0a /* Not-greater-than (unordered, signaling) */
|
||||
@ -1607,13 +1608,14 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
||||
#define _CMP_GT_OQ 0x1e /* Greater-than (ordered, non-signaling) */
|
||||
#define _CMP_TRUE_US 0x1f /* True (unordered, signaling) */
|
||||
|
||||
/* Below intrinsic defined in emmintrin.h can be used for AVX */
|
||||
/// Compares each of the corresponding double-precision values of two
|
||||
/// 128-bit vectors of [2 x double], using the operation specified by the
|
||||
/// immediate integer operand.
|
||||
///
|
||||
/// Returns a [2 x double] vector consisting of two doubles corresponding to
|
||||
/// the two comparison results: zero if the comparison is false, and all 1's
|
||||
/// if the comparison is true.
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, comparisons that are ordered
|
||||
/// return false, and comparisons that are unordered return true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1663,17 +1665,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
||||
/// 0x1E: Greater-than (ordered, non-signaling) \n
|
||||
/// 0x1F: True (unordered, signaling)
|
||||
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
|
||||
#define _mm_cmp_pd(a, b, c) \
|
||||
((__m128d)__builtin_ia32_cmppd((__v2df)(__m128d)(a), \
|
||||
(__v2df)(__m128d)(b), (c)))
|
||||
/// \fn __m128d _mm_cmp_pd(__m128d a, __m128d b, const int c)
|
||||
|
||||
/* Below intrinsic defined in xmmintrin.h can be used for AVX */
|
||||
/// Compares each of the corresponding values of two 128-bit vectors of
|
||||
/// [4 x float], using the operation specified by the immediate integer
|
||||
/// operand.
|
||||
///
|
||||
/// Returns a [4 x float] vector consisting of four floats corresponding to
|
||||
/// the four comparison results: zero if the comparison is false, and all 1's
|
||||
/// if the comparison is true.
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, comparisons that are ordered
|
||||
/// return false, and comparisons that are unordered return true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1723,17 +1724,15 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
||||
/// 0x1E: Greater-than (ordered, non-signaling) \n
|
||||
/// 0x1F: True (unordered, signaling)
|
||||
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
|
||||
#define _mm_cmp_ps(a, b, c) \
|
||||
((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), \
|
||||
(__v4sf)(__m128)(b), (c)))
|
||||
/// \fn __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c)
|
||||
|
||||
/// Compares each of the corresponding double-precision values of two
|
||||
/// 256-bit vectors of [4 x double], using the operation specified by the
|
||||
/// immediate integer operand.
|
||||
///
|
||||
/// Returns a [4 x double] vector consisting of four doubles corresponding to
|
||||
/// the four comparison results: zero if the comparison is false, and all 1's
|
||||
/// if the comparison is true.
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, comparisons that are ordered
|
||||
/// return false, and comparisons that are unordered return true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1791,9 +1790,9 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
||||
/// [8 x float], using the operation specified by the immediate integer
|
||||
/// operand.
|
||||
///
|
||||
/// Returns a [8 x float] vector consisting of eight floats corresponding to
|
||||
/// the eight comparison results: zero if the comparison is false, and all
|
||||
/// 1's if the comparison is true.
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, comparisons that are ordered
|
||||
/// return false, and comparisons that are unordered return true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1847,12 +1846,14 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
||||
((__m256)__builtin_ia32_cmpps256((__v8sf)(__m256)(a), \
|
||||
(__v8sf)(__m256)(b), (c)))
|
||||
|
||||
/* Below intrinsic defined in emmintrin.h can be used for AVX */
|
||||
/// Compares each of the corresponding scalar double-precision values of
|
||||
/// two 128-bit vectors of [2 x double], using the operation specified by the
|
||||
/// immediate integer operand.
|
||||
///
|
||||
/// If the result is true, all 64 bits of the destination vector are set;
|
||||
/// otherwise they are cleared.
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, comparisons that are ordered
|
||||
/// return false, and comparisons that are unordered return true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1902,16 +1903,16 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
||||
/// 0x1E: Greater-than (ordered, non-signaling) \n
|
||||
/// 0x1F: True (unordered, signaling)
|
||||
/// \returns A 128-bit vector of [2 x double] containing the comparison results.
|
||||
#define _mm_cmp_sd(a, b, c) \
|
||||
((__m128d)__builtin_ia32_cmpsd((__v2df)(__m128d)(a), \
|
||||
(__v2df)(__m128d)(b), (c)))
|
||||
/// \fn __m128d _mm_cmp_sd(__m128d a, __m128d b, const int c)
|
||||
|
||||
/* Below intrinsic defined in xmmintrin.h can be used for AVX */
|
||||
/// Compares each of the corresponding scalar values of two 128-bit
|
||||
/// vectors of [4 x float], using the operation specified by the immediate
|
||||
/// integer operand.
|
||||
///
|
||||
/// If the result is true, all 32 bits of the destination vector are set;
|
||||
/// otherwise they are cleared.
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, comparisons that are ordered
|
||||
/// return false, and comparisons that are unordered return true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1961,9 +1962,7 @@ _mm256_blendv_ps(__m256 __a, __m256 __b, __m256 __c)
|
||||
/// 0x1E: Greater-than (ordered, non-signaling) \n
|
||||
/// 0x1F: True (unordered, signaling)
|
||||
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
|
||||
#define _mm_cmp_ss(a, b, c) \
|
||||
((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), \
|
||||
(__v4sf)(__m128)(b), (c)))
|
||||
/// \fn __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c)
|
||||
|
||||
/// Takes a [8 x i32] vector and returns the vector element value
|
||||
/// indexed by the immediate constant operand.
|
||||
@ -2213,6 +2212,10 @@ _mm256_cvtpd_ps(__m256d __a)
|
||||
|
||||
/// Converts a vector of [8 x float] into a vector of [8 x i32].
|
||||
///
|
||||
/// If a converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCVTPS2DQ </c> instruction.
|
||||
@ -2242,9 +2245,13 @@ _mm256_cvtps_pd(__m128 __a)
|
||||
return (__m256d)__builtin_convertvector((__v4sf)__a, __v4df);
|
||||
}
|
||||
|
||||
/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4
|
||||
/// x i32], truncating the result by rounding towards zero when it is
|
||||
/// inexact.
|
||||
/// Converts a 256-bit vector of [4 x double] into four signed truncated
|
||||
/// (rounded toward zero) 32-bit integers returned in a 128-bit vector of
|
||||
/// [4 x i32].
|
||||
///
|
||||
/// If a converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -2259,9 +2266,12 @@ _mm256_cvttpd_epi32(__m256d __a)
|
||||
return (__m128i)__builtin_ia32_cvttpd2dq256((__v4df) __a);
|
||||
}
|
||||
|
||||
/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of [4
|
||||
/// x i32]. When a conversion is inexact, the value returned is rounded
|
||||
/// according to the rounding control bits in the MXCSR register.
|
||||
/// Converts a 256-bit vector of [4 x double] into a 128-bit vector of
|
||||
/// [4 x i32].
|
||||
///
|
||||
/// If a converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -2276,8 +2286,12 @@ _mm256_cvtpd_epi32(__m256d __a)
|
||||
return (__m128i)__builtin_ia32_cvtpd2dq256((__v4df) __a);
|
||||
}
|
||||
|
||||
/// Converts a vector of [8 x float] into a vector of [8 x i32],
|
||||
/// truncating the result by rounding towards zero when it is inexact.
|
||||
/// Converts a vector of [8 x float] into eight signed truncated (rounded
|
||||
/// toward zero) 32-bit integers returned in a vector of [8 x i32].
|
||||
///
|
||||
/// If a converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
|
6
lib/include/bmiintrin.h
vendored
6
lib/include/bmiintrin.h
vendored
@ -161,8 +161,7 @@ _mm_tzcnt_64(unsigned long long __X)
|
||||
|
||||
#undef __RELAXED_FN_ATTRS
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__BMI__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI__)
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("bmi")))
|
||||
@ -610,7 +609,6 @@ __blsr_u64(unsigned long long __X)
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \
|
||||
|| defined(__BMI__) */
|
||||
#endif /* !defined(__SCE__) || __has_feature(modules) || defined(__BMI__) */
|
||||
|
||||
#endif /* __BMIINTRIN_H */
|
||||
|
3
lib/include/builtins.h
vendored
3
lib/include/builtins.h
vendored
@ -13,4 +13,7 @@
|
||||
#ifndef __BUILTINS_H
|
||||
#define __BUILTINS_H
|
||||
|
||||
#if defined(__MVS__) && __has_include_next(<builtins.h>)
|
||||
#include_next <builtins.h>
|
||||
#endif /* __MVS__ */
|
||||
#endif /* __BUILTINS_H */
|
||||
|
26
lib/include/cpuid.h
vendored
26
lib/include/cpuid.h
vendored
@ -10,7 +10,7 @@
|
||||
#ifndef __CPUID_H
|
||||
#define __CPUID_H
|
||||
|
||||
#if !(__x86_64__ || __i386__)
|
||||
#if !defined(__x86_64__) && !defined(__i386__)
|
||||
#error this header is for x86 only
|
||||
#endif
|
||||
|
||||
@ -200,6 +200,9 @@
|
||||
#define bit_AMXINT8 0x02000000
|
||||
|
||||
/* Features in %eax for leaf 7 sub-leaf 1 */
|
||||
#define bit_SHA512 0x00000001
|
||||
#define bit_SM3 0x00000002
|
||||
#define bit_SM4 0x00000004
|
||||
#define bit_RAOINT 0x00000008
|
||||
#define bit_AVXVNNI 0x00000010
|
||||
#define bit_AVX512BF16 0x00000020
|
||||
@ -211,7 +214,12 @@
|
||||
/* Features in %edx for leaf 7 sub-leaf 1 */
|
||||
#define bit_AVXVNNIINT8 0x00000010
|
||||
#define bit_AVXNECONVERT 0x00000020
|
||||
#define bit_AMXCOMPLEX 0x00000100
|
||||
#define bit_AVXVNNIINT16 0x00000400
|
||||
#define bit_PREFETCHI 0x00004000
|
||||
#define bit_USERMSR 0x00008000
|
||||
#define bit_AVX10 0x00080000
|
||||
#define bit_APXF 0x00200000
|
||||
|
||||
/* Features in %eax for leaf 13 sub-leaf 1 */
|
||||
#define bit_XSAVEOPT 0x00000001
|
||||
@ -244,8 +252,11 @@
|
||||
#define bit_RDPRU 0x00000010
|
||||
#define bit_WBNOINVD 0x00000200
|
||||
|
||||
/* Features in %ebx for leaf 0x24 */
|
||||
#define bit_AVX10_256 0x00020000
|
||||
#define bit_AVX10_512 0x00040000
|
||||
|
||||
#if __i386__
|
||||
#ifdef __i386__
|
||||
#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \
|
||||
__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
|
||||
: "0"(__leaf))
|
||||
@ -274,7 +285,7 @@ static __inline unsigned int __get_cpuid_max (unsigned int __leaf,
|
||||
unsigned int *__sig)
|
||||
{
|
||||
unsigned int __eax, __ebx, __ecx, __edx;
|
||||
#if __i386__
|
||||
#ifdef __i386__
|
||||
int __cpuid_supported;
|
||||
|
||||
__asm(" pushfl\n"
|
||||
@ -328,4 +339,13 @@ static __inline int __get_cpuid_count (unsigned int __leaf,
|
||||
return 1;
|
||||
}
|
||||
|
||||
// In some configurations, __cpuidex is defined as a builtin (primarily
|
||||
// -fms-extensions) which will conflict with the __cpuidex definition below.
|
||||
#if !(__has_builtin(__cpuidex))
|
||||
static __inline void __cpuidex(int __cpu_info[4], int __leaf, int __subleaf) {
|
||||
__cpuid_count(__leaf, __subleaf, __cpu_info[0], __cpu_info[1], __cpu_info[2],
|
||||
__cpu_info[3]);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __CPUID_H */
|
||||
|
2
lib/include/cuda_wrappers/algorithm
vendored
2
lib/include/cuda_wrappers/algorithm
vendored
@ -99,7 +99,7 @@ template <class __T>
|
||||
__attribute__((enable_if(true, "")))
|
||||
inline _CPP14_CONSTEXPR __host__ __device__ const __T &
|
||||
min(const __T &__a, const __T &__b) {
|
||||
return __a < __b ? __a : __b;
|
||||
return __b < __a ? __b : __a;
|
||||
}
|
||||
|
||||
#pragma pop_macro("_CPP14_CONSTEXPR")
|
||||
|
471
lib/include/emmintrin.h
vendored
471
lib/include/emmintrin.h
vendored
File diff suppressed because it is too large
Load Diff
28
lib/include/float.h
vendored
28
lib/include/float.h
vendored
@ -10,6 +10,10 @@
|
||||
#ifndef __CLANG_FLOAT_H
|
||||
#define __CLANG_FLOAT_H
|
||||
|
||||
#if defined(__MVS__) && __has_include_next(<float.h>)
|
||||
#include_next <float.h>
|
||||
#else
|
||||
|
||||
/* If we're on MinGW, fall back to the system's float.h, which might have
|
||||
* additional definitions provided for Windows.
|
||||
* For more details see http://msdn.microsoft.com/en-us/library/y0ybw9fy.aspx
|
||||
@ -82,6 +86,18 @@
|
||||
# undef DBL_HAS_SUBNORM
|
||||
# undef LDBL_HAS_SUBNORM
|
||||
# endif
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \
|
||||
!defined(__STRICT_ANSI__)
|
||||
# undef FLT_NORM_MAX
|
||||
# undef DBL_NORM_MAX
|
||||
# undef LDBL_NORM_MAX
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \
|
||||
!defined(__STRICT_ANSI__)
|
||||
# undef INFINITY
|
||||
# undef NAN
|
||||
#endif
|
||||
|
||||
/* Characteristics of floating point types, C99 5.2.4.2.2 */
|
||||
@ -151,6 +167,17 @@
|
||||
# define LDBL_HAS_SUBNORM __LDBL_HAS_DENORM__
|
||||
#endif
|
||||
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \
|
||||
!defined(__STRICT_ANSI__)
|
||||
/* C23 5.2.5.3.3p29-30 */
|
||||
# define INFINITY (__builtin_inff())
|
||||
# define NAN (__builtin_nanf(""))
|
||||
/* C23 5.2.5.3.3p32 */
|
||||
# define FLT_NORM_MAX __FLT_NORM_MAX__
|
||||
# define DBL_NORM_MAX __DBL_NORM_MAX__
|
||||
# define LDBL_NORM_MAX __LDBL_NORM_MAX__
|
||||
#endif
|
||||
|
||||
#ifdef __STDC_WANT_IEC_60559_TYPES_EXT__
|
||||
# define FLT16_MANT_DIG __FLT16_MANT_DIG__
|
||||
# define FLT16_DECIMAL_DIG __FLT16_DECIMAL_DIG__
|
||||
@ -165,4 +192,5 @@
|
||||
# define FLT16_TRUE_MIN __FLT16_TRUE_MIN__
|
||||
#endif /* __STDC_WANT_IEC_60559_TYPES_EXT__ */
|
||||
|
||||
#endif /* __MVS__ */
|
||||
#endif /* __CLANG_FLOAT_H */
|
||||
|
48
lib/include/fmaintrin.h
vendored
48
lib/include/fmaintrin.h
vendored
@ -60,7 +60,8 @@ _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
|
||||
/// Computes a scalar multiply-add of the single-precision values in the
|
||||
/// low 32 bits of 128-bit vectors of [4 x float].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
|
||||
/// result[127:32] = __A[127:32]
|
||||
/// \endcode
|
||||
@ -88,7 +89,8 @@ _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
|
||||
/// Computes a scalar multiply-add of the double-precision values in the
|
||||
/// low 64 bits of 128-bit vectors of [2 x double].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
|
||||
/// result[127:64] = __A[127:64]
|
||||
/// \endcode
|
||||
@ -156,7 +158,8 @@ _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
|
||||
/// Computes a scalar multiply-subtract of the single-precision values in
|
||||
/// the low 32 bits of 128-bit vectors of [4 x float].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
|
||||
/// result[127:32] = __A[127:32]
|
||||
/// \endcode
|
||||
@ -184,7 +187,8 @@ _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
|
||||
/// Computes a scalar multiply-subtract of the double-precision values in
|
||||
/// the low 64 bits of 128-bit vectors of [2 x double].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
|
||||
/// result[127:64] = __A[127:64]
|
||||
/// \endcode
|
||||
@ -252,7 +256,8 @@ _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
|
||||
/// Computes a scalar negated multiply-add of the single-precision values in
|
||||
/// the low 32 bits of 128-bit vectors of [4 x float].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[31:0] = -(__A[31:0] * __B[31:0]) + __C[31:0]
|
||||
/// result[127:32] = __A[127:32]
|
||||
/// \endcode
|
||||
@ -280,7 +285,8 @@ _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
|
||||
/// Computes a scalar negated multiply-add of the double-precision values
|
||||
/// in the low 64 bits of 128-bit vectors of [2 x double].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[63:0] = -(__A[63:0] * __B[63:0]) + __C[63:0]
|
||||
/// result[127:64] = __A[127:64]
|
||||
/// \endcode
|
||||
@ -348,7 +354,8 @@ _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
|
||||
/// Computes a scalar negated multiply-subtract of the single-precision
|
||||
/// values in the low 32 bits of 128-bit vectors of [4 x float].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[31:0] = -(__A[31:0] * __B[31:0]) - __C[31:0]
|
||||
/// result[127:32] = __A[127:32]
|
||||
/// \endcode
|
||||
@ -376,7 +383,8 @@ _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
|
||||
|
||||
/// Computes a scalar negated multiply-subtract of the double-precision
|
||||
/// values in the low 64 bits of 128-bit vectors of [2 x double].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[63:0] = -(__A[63:0] * __B[63:0]) - __C[63:0]
|
||||
/// result[127:64] = __A[127:64]
|
||||
/// \endcode
|
||||
@ -404,7 +412,8 @@ _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
|
||||
|
||||
/// Computes a multiply with alternating add/subtract of 128-bit vectors of
|
||||
/// [4 x float].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
|
||||
/// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
|
||||
/// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
|
||||
@ -430,7 +439,8 @@ _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
|
||||
/// Computes a multiply with alternating add/subtract of 128-bit vectors of
|
||||
/// [2 x double].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
|
||||
/// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
|
||||
/// \endcode
|
||||
@ -454,7 +464,8 @@ _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
|
||||
|
||||
/// Computes a multiply with alternating add/subtract of 128-bit vectors of
|
||||
/// [4 x float].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
|
||||
/// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
|
||||
/// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
|
||||
@ -480,7 +491,8 @@ _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
|
||||
|
||||
/// Computes a multiply with alternating add/subtract of 128-bit vectors of
|
||||
/// [2 x double].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
|
||||
/// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
|
||||
/// \endcode
|
||||
@ -664,7 +676,8 @@ _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
|
||||
/// Computes a multiply with alternating add/subtract of 256-bit vectors of
|
||||
/// [8 x float].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[31:0] = (__A[31:0] * __B[31:0]) - __C[31:0]
|
||||
/// result[63:32] = (__A[63:32] * __B[63:32]) + __C[63:32]
|
||||
/// result[95:64] = (__A[95:64] * __B[95:64]) - __C[95:64]
|
||||
@ -694,7 +707,8 @@ _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
|
||||
/// Computes a multiply with alternating add/subtract of 256-bit vectors of
|
||||
/// [4 x double].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[63:0] = (__A[63:0] * __B[63:0]) - __C[63:0]
|
||||
/// result[127:64] = (__A[127:64] * __B[127:64]) + __C[127:64]
|
||||
/// result[191:128] = (__A[191:128] * __B[191:128]) - __C[191:128]
|
||||
@ -720,7 +734,8 @@ _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
|
||||
|
||||
/// Computes a vector multiply with alternating add/subtract of 256-bit
|
||||
/// vectors of [8 x float].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[31:0] = (__A[31:0] * __B[31:0]) + __C[31:0]
|
||||
/// result[63:32] = (__A[63:32] * __B[63:32]) - __C[63:32]
|
||||
/// result[95:64] = (__A[95:64] * __B[95:64]) + __C[95:64]
|
||||
@ -750,7 +765,8 @@ _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
|
||||
|
||||
/// Computes a vector multiply with alternating add/subtract of 256-bit
|
||||
/// vectors of [4 x double].
|
||||
/// \code
|
||||
///
|
||||
/// \code{.operation}
|
||||
/// result[63:0] = (__A[63:0] * __B[63:0]) + __C[63:0]
|
||||
/// result[127:64] = (__A[127:64] * __B[127:64]) - __C[127:64]
|
||||
/// result[191:128] = (__A[191:128] * __B[191:128]) + __C[191:128]
|
||||
|
72
lib/include/ia32intrin.h
vendored
72
lib/include/ia32intrin.h
vendored
@ -26,8 +26,8 @@
|
||||
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
|
||||
#endif
|
||||
|
||||
/// Find the first set bit starting from the lsb. Result is undefined if
|
||||
/// input is 0.
|
||||
/// Finds the first set bit starting from the least significant bit. The result
|
||||
/// is undefined if the input is 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -43,8 +43,8 @@ __bsfd(int __A) {
|
||||
return __builtin_ctz((unsigned int)__A);
|
||||
}
|
||||
|
||||
/// Find the first set bit starting from the msb. Result is undefined if
|
||||
/// input is 0.
|
||||
/// Finds the first set bit starting from the most significant bit. The result
|
||||
/// is undefined if the input is 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -90,8 +90,8 @@ _bswap(int __A) {
|
||||
return (int)__builtin_bswap32((unsigned int)__A);
|
||||
}
|
||||
|
||||
/// Find the first set bit starting from the lsb. Result is undefined if
|
||||
/// input is 0.
|
||||
/// Finds the first set bit starting from the least significant bit. The result
|
||||
/// is undefined if the input is 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -108,8 +108,8 @@ _bswap(int __A) {
|
||||
/// \see __bsfd
|
||||
#define _bit_scan_forward(A) __bsfd((A))
|
||||
|
||||
/// Find the first set bit starting from the msb. Result is undefined if
|
||||
/// input is 0.
|
||||
/// Finds the first set bit starting from the most significant bit. The result
|
||||
/// is undefined if the input is 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -127,8 +127,8 @@ _bswap(int __A) {
|
||||
#define _bit_scan_reverse(A) __bsrd((A))
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Find the first set bit starting from the lsb. Result is undefined if
|
||||
/// input is 0.
|
||||
/// Finds the first set bit starting from the least significant bit. The result
|
||||
/// is undefined if the input is 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -143,8 +143,8 @@ __bsfq(long long __A) {
|
||||
return (long long)__builtin_ctzll((unsigned long long)__A);
|
||||
}
|
||||
|
||||
/// Find the first set bit starting from the msb. Result is undefined if
|
||||
/// input is 0.
|
||||
/// Finds the first set bit starting from the most significant bit. The result
|
||||
/// is undefined if input is 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -159,7 +159,7 @@ __bsrq(long long __A) {
|
||||
return 63 - __builtin_clzll((unsigned long long)__A);
|
||||
}
|
||||
|
||||
/// Swaps the bytes in the input. Converting little endian to big endian or
|
||||
/// Swaps the bytes in the input, converting little endian to big endian or
|
||||
/// vice versa.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@ -175,7 +175,7 @@ __bswapq(long long __A) {
|
||||
return (long long)__builtin_bswap64((unsigned long long)__A);
|
||||
}
|
||||
|
||||
/// Swaps the bytes in the input. Converting little endian to big endian or
|
||||
/// Swaps the bytes in the input, converting little endian to big endian or
|
||||
/// vice versa.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@ -198,7 +198,7 @@ __bswapq(long long __A) {
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c POPCNT instruction or a
|
||||
/// a sequence of arithmetic and logic ops to calculate it.
|
||||
/// sequence of arithmetic and logic operations to calculate it.
|
||||
///
|
||||
/// \param __A
|
||||
/// An unsigned 32-bit integer operand.
|
||||
@ -220,7 +220,7 @@ __popcntd(unsigned int __A)
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c POPCNT instruction or a
|
||||
/// a sequence of arithmetic and logic ops to calculate it.
|
||||
/// sequence of arithmetic and logic operations to calculate it.
|
||||
///
|
||||
/// \param A
|
||||
/// An unsigned 32-bit integer operand.
|
||||
@ -235,7 +235,7 @@ __popcntd(unsigned int __A)
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the \c POPCNT instruction or a
|
||||
/// a sequence of arithmetic and logic ops to calculate it.
|
||||
/// sequence of arithmetic and logic operations to calculate it.
|
||||
///
|
||||
/// \param __A
|
||||
/// An unsigned 64-bit integer operand.
|
||||
@ -257,7 +257,7 @@ __popcntq(unsigned long long __A)
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the \c POPCNT instruction or a
|
||||
/// a sequence of arithmetic and logic ops to calculate it.
|
||||
/// sequence of arithmetic and logic operations to calculate it.
|
||||
///
|
||||
/// \param A
|
||||
/// An unsigned 64-bit integer operand.
|
||||
@ -268,7 +268,7 @@ __popcntq(unsigned long long __A)
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Returns the program status and control \c RFLAGS register with the \c VM
|
||||
/// Returns the program status-and-control \c RFLAGS register with the \c VM
|
||||
/// and \c RF flags cleared.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@ -282,7 +282,7 @@ __readeflags(void)
|
||||
return __builtin_ia32_readeflags_u64();
|
||||
}
|
||||
|
||||
/// Writes the specified value to the program status and control \c RFLAGS
|
||||
/// Writes the specified value to the program status-and-control \c RFLAGS
|
||||
/// register. Reserved bits are not affected.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@ -298,7 +298,7 @@ __writeeflags(unsigned long long __f)
|
||||
}
|
||||
|
||||
#else /* !__x86_64__ */
|
||||
/// Returns the program status and control \c EFLAGS register with the \c VM
|
||||
/// Returns the program status-and-control \c EFLAGS register with the \c VM
|
||||
/// and \c RF flags cleared.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@ -312,7 +312,7 @@ __readeflags(void)
|
||||
return __builtin_ia32_readeflags_u32();
|
||||
}
|
||||
|
||||
/// Writes the specified value to the program status and control \c EFLAGS
|
||||
/// Writes the specified value to the program status-and-control \c EFLAGS
|
||||
/// register. Reserved bits are not affected.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@ -328,7 +328,7 @@ __writeeflags(unsigned int __f)
|
||||
}
|
||||
#endif /* !__x86_64__ */
|
||||
|
||||
/// Cast a 32-bit float value to a 32-bit unsigned integer value.
|
||||
/// Casts a 32-bit float value to a 32-bit unsigned integer value.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -337,13 +337,13 @@ __writeeflags(unsigned int __f)
|
||||
///
|
||||
/// \param __A
|
||||
/// A 32-bit float value.
|
||||
/// \returns a 32-bit unsigned integer containing the converted value.
|
||||
/// \returns A 32-bit unsigned integer containing the converted value.
|
||||
static __inline__ unsigned int __DEFAULT_FN_ATTRS_CAST
|
||||
_castf32_u32(float __A) {
|
||||
return __builtin_bit_cast(unsigned int, __A);
|
||||
}
|
||||
|
||||
/// Cast a 64-bit float value to a 64-bit unsigned integer value.
|
||||
/// Casts a 64-bit float value to a 64-bit unsigned integer value.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -352,13 +352,13 @@ _castf32_u32(float __A) {
|
||||
///
|
||||
/// \param __A
|
||||
/// A 64-bit float value.
|
||||
/// \returns a 64-bit unsigned integer containing the converted value.
|
||||
/// \returns A 64-bit unsigned integer containing the converted value.
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CAST
|
||||
_castf64_u64(double __A) {
|
||||
return __builtin_bit_cast(unsigned long long, __A);
|
||||
}
|
||||
|
||||
/// Cast a 32-bit unsigned integer value to a 32-bit float value.
|
||||
/// Casts a 32-bit unsigned integer value to a 32-bit float value.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -367,13 +367,13 @@ _castf64_u64(double __A) {
|
||||
///
|
||||
/// \param __A
|
||||
/// A 32-bit unsigned integer value.
|
||||
/// \returns a 32-bit float value containing the converted value.
|
||||
/// \returns A 32-bit float value containing the converted value.
|
||||
static __inline__ float __DEFAULT_FN_ATTRS_CAST
|
||||
_castu32_f32(unsigned int __A) {
|
||||
return __builtin_bit_cast(float, __A);
|
||||
}
|
||||
|
||||
/// Cast a 64-bit unsigned integer value to a 64-bit float value.
|
||||
/// Casts a 64-bit unsigned integer value to a 64-bit float value.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -382,7 +382,7 @@ _castu32_f32(unsigned int __A) {
|
||||
///
|
||||
/// \param __A
|
||||
/// A 64-bit unsigned integer value.
|
||||
/// \returns a 64-bit float value containing the converted value.
|
||||
/// \returns A 64-bit float value containing the converted value.
|
||||
static __inline__ double __DEFAULT_FN_ATTRS_CAST
|
||||
_castu64_f64(unsigned long long __A) {
|
||||
return __builtin_bit_cast(double, __A);
|
||||
@ -470,7 +470,7 @@ __crc32q(unsigned long long __C, unsigned long long __D)
|
||||
}
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
/// Reads the specified performance monitoring counter. Refer to your
|
||||
/// Reads the specified performance-monitoring counter. Refer to your
|
||||
/// processor's documentation to determine which performance counters are
|
||||
/// supported.
|
||||
///
|
||||
@ -487,7 +487,7 @@ __rdpmc(int __A) {
|
||||
return __builtin_ia32_rdpmc(__A);
|
||||
}
|
||||
|
||||
/// Reads the processor's time stamp counter and the \c IA32_TSC_AUX MSR
|
||||
/// Reads the processor's time-stamp counter and the \c IA32_TSC_AUX MSR
|
||||
/// \c (0xc0000103).
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
@ -495,14 +495,14 @@ __rdpmc(int __A) {
|
||||
/// This intrinsic corresponds to the \c RDTSCP instruction.
|
||||
///
|
||||
/// \param __A
|
||||
/// Address of where to store the 32-bit \c IA32_TSC_AUX value.
|
||||
/// \returns The 64-bit value of the time stamp counter.
|
||||
/// The address of where to store the 32-bit \c IA32_TSC_AUX value.
|
||||
/// \returns The 64-bit value of the time-stamp counter.
|
||||
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
|
||||
__rdtscp(unsigned int *__A) {
|
||||
return __builtin_ia32_rdtscp(__A);
|
||||
}
|
||||
|
||||
/// Reads the processor's time stamp counter.
|
||||
/// Reads the processor's time-stamp counter.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -512,7 +512,7 @@ __rdtscp(unsigned int *__A) {
|
||||
///
|
||||
/// This intrinsic corresponds to the \c RDTSC instruction.
|
||||
///
|
||||
/// \returns The 64-bit value of the time stamp counter.
|
||||
/// \returns The 64-bit value of the time-stamp counter.
|
||||
#define _rdtsc() __rdtsc()
|
||||
|
||||
/// Reads the specified performance monitoring counter. Refer to your
|
||||
|
244
lib/include/immintrin.h
vendored
244
lib/include/immintrin.h
vendored
@ -16,281 +16,231 @@
|
||||
|
||||
#include <x86gprintrin.h>
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__MMX__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__)
|
||||
#include <mmintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SSE__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__)
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SSE2__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SSE3__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__)
|
||||
#include <pmmintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SSSE3__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__)
|
||||
#include <tmmintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__SSE4_2__) || defined(__SSE4_1__))
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AES__) || defined(__PCLMUL__))
|
||||
#include <wmmintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__CLFLUSHOPT__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__)
|
||||
#include <clflushoptintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__CLWB__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__)
|
||||
#include <clwbintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__)
|
||||
#include <avxintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX2__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__)
|
||||
#include <avx2intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__F16C__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__)
|
||||
#include <f16cintrin.h>
|
||||
#endif
|
||||
|
||||
/* No feature check desired due to internal checks */
|
||||
#include <bmiintrin.h>
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__BMI2__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__)
|
||||
#include <bmi2intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__LZCNT__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__)
|
||||
#include <lzcntintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__POPCNT__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__)
|
||||
#include <popcntintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__FMA__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__)
|
||||
#include <fmaintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512F__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__)
|
||||
#include <avx512fintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512VL__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__)
|
||||
#include <avx512vlintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512BW__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__)
|
||||
#include <avx512bwintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512BITALG__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__)
|
||||
#include <avx512bitalgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512CD__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__)
|
||||
#include <avx512cdintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512VPOPCNTDQ__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
|
||||
#include <avx512vpopcntdqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
|
||||
#include <avx512vpopcntdqvlintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512VNNI__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__)
|
||||
#include <avx512vnniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512VNNI__))
|
||||
#include <avx512vlvnniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVXVNNI__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__)
|
||||
#include <avxvnniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512DQ__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__)
|
||||
#include <avx512dqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512BITALG__))
|
||||
#include <avx512vlbitalgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512BW__))
|
||||
#include <avx512vlbwintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512CD__))
|
||||
#include <avx512vlcdintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512DQ__))
|
||||
#include <avx512vldqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512ER__)
|
||||
#include <avx512erintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512IFMA__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
|
||||
#include <avx512ifmaintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512IFMA__) && defined(__AVX512VL__))
|
||||
#include <avx512ifmavlintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVXIFMA__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__)
|
||||
#include <avxifmaintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512VBMI__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__)
|
||||
#include <avx512vbmiintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512VBMI__) && defined(__AVX512VL__))
|
||||
#include <avx512vbmivlintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512VBMI2__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__)
|
||||
#include <avx512vbmi2intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512VBMI2__) && defined(__AVX512VL__))
|
||||
#include <avx512vlvbmi2intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512PF__)
|
||||
#include <avx512pfintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512FP16__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
|
||||
#include <avx512fp16intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512FP16__))
|
||||
#include <avx512vlfp16intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVX512BF16__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__)
|
||||
#include <avx512bf16intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512BF16__))
|
||||
#include <avx512vlbf16intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__PKU__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)
|
||||
#include <pkuintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__VPCLMULQDQ__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__)
|
||||
#include <vpclmulqdqintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__VAES__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__)
|
||||
#include <vaesintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__GFNI__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__)
|
||||
#include <gfniintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVXVNNIINT8__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__)
|
||||
#include <avxvnniint8intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVXNECONVERT__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__)
|
||||
#include <avxneconvertintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SHA512__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__)
|
||||
#include <sha512intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SM3__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__)
|
||||
#include <sm3intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SM4__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__)
|
||||
#include <sm4intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AVXVNNIINT16__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__)
|
||||
#include <avxvnniint16intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__RDPID__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__)
|
||||
/// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
@ -304,8 +254,7 @@ _rdpid_u32(void) {
|
||||
}
|
||||
#endif // __RDPID__
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__RDRND__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__)
|
||||
/// Returns a 16-bit hardware-generated random value.
|
||||
///
|
||||
/// \headerfile <immintrin.h>
|
||||
@ -367,8 +316,7 @@ _rdrand64_step(unsigned long long *__p)
|
||||
}
|
||||
#endif /* __RDRND__ */
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__FSGSBASE__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__)
|
||||
#ifdef __x86_64__
|
||||
/// Reads the FS base register.
|
||||
///
|
||||
@ -481,8 +429,7 @@ _writegsbase_u64(unsigned long long __V)
|
||||
#endif
|
||||
#endif /* __FSGSBASE__ */
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__MOVBE__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__)
|
||||
|
||||
/* The structs used below are to force the load/store to be unaligned. This
|
||||
* is accomplished with the __packed__ attribute. The __may_alias__ prevents
|
||||
@ -598,139 +545,118 @@ _storebe_i64(void * __P, long long __D) {
|
||||
#endif
|
||||
#endif /* __MOVBE */
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__RTM__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__)
|
||||
#include <rtmintrin.h>
|
||||
#include <xtestintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SHA__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__)
|
||||
#include <shaintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__FXSR__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__)
|
||||
#include <fxsrintrin.h>
|
||||
#endif
|
||||
|
||||
/* No feature check desired due to internal MSC_VER checks */
|
||||
#include <xsaveintrin.h>
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__XSAVEOPT__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__)
|
||||
#include <xsaveoptintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__XSAVEC__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__)
|
||||
#include <xsavecintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__XSAVES__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__)
|
||||
#include <xsavesintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SHSTK__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__)
|
||||
#include <cetintrin.h>
|
||||
#endif
|
||||
|
||||
/* Intrinsics inside adcintrin.h are available at all times. */
|
||||
#include <adcintrin.h>
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__ADX__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__)
|
||||
#include <adxintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__RDSEED__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__)
|
||||
#include <rdseedintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__WBNOINVD__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__)
|
||||
#include <wbnoinvdintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__CLDEMOTE__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__)
|
||||
#include <cldemoteintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__WAITPKG__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__)
|
||||
#include <waitpkgintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__MOVDIRI__) || defined(__MOVDIR64B__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) || \
|
||||
defined(__MOVDIR64B__)
|
||||
#include <movdirintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__PCONFIG__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
|
||||
#include <pconfigintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SGX__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__)
|
||||
#include <sgxintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__PTWRITE__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__)
|
||||
#include <ptwriteintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__INVPCID__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__)
|
||||
#include <invpcidintrin.h>
|
||||
#endif
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AMX_FP16__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__)
|
||||
#include <amxfp16intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__KL__) || defined(__WIDEKL__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) || \
|
||||
defined(__WIDEKL__)
|
||||
#include <keylockerintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AMX_TILE__) || defined(__AMX_INT8__) || defined(__AMX_BF16__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) || \
|
||||
defined(__AMX_INT8__) || defined(__AMX_BF16__)
|
||||
#include <amxintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__AMX_COMPLEX__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__)
|
||||
#include <amxcomplexintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
defined(__AVX512VP2INTERSECT__)
|
||||
#include <avx512vp2intersectintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
#if !defined(__SCE__) || __has_feature(modules) || \
|
||||
(defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
|
||||
#include <avx512vlvp2intersectintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__ENQCMD__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
|
||||
#include <enqcmdintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SERIALIZE__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__)
|
||||
#include <serializeintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__TSXLDTRK__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__)
|
||||
#include <tsxldtrkintrin.h>
|
||||
#endif
|
||||
|
||||
|
272
lib/include/intrin.h
vendored
272
lib/include/intrin.h
vendored
@ -15,8 +15,10 @@
|
||||
#ifndef __INTRIN_H
|
||||
#define __INTRIN_H
|
||||
|
||||
#include <intrin0.h>
|
||||
|
||||
/* First include the standard intrinsics. */
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
|
||||
#include <x86intrin.h>
|
||||
#endif
|
||||
|
||||
@ -24,7 +26,7 @@
|
||||
#include <armintr.h>
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__)
|
||||
#if defined(__aarch64__) || defined(__arm64ec__)
|
||||
#include <arm64intr.h>
|
||||
#endif
|
||||
|
||||
@ -131,8 +133,6 @@ void __writefsqword(unsigned long, unsigned __int64);
|
||||
void __writefsword(unsigned long, unsigned short);
|
||||
void __writemsr(unsigned long, unsigned __int64);
|
||||
void *_AddressOfReturnAddress(void);
|
||||
unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
|
||||
unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
|
||||
unsigned char _bittest(long const *, long);
|
||||
unsigned char _bittestandcomplement(long *, long);
|
||||
unsigned char _bittestandreset(long *, long);
|
||||
@ -151,7 +151,6 @@ long _InterlockedExchangeAdd_HLERelease(long volatile *, long);
|
||||
__int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64);
|
||||
__int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64);
|
||||
void _ReadBarrier(void);
|
||||
void _ReadWriteBarrier(void);
|
||||
unsigned int _rorx_u32(unsigned int, const unsigned int);
|
||||
int _sarx_i32(int, unsigned int);
|
||||
#if __STDC_HOSTED__
|
||||
@ -167,7 +166,7 @@ unsigned __int32 xbegin(void);
|
||||
void _xend(void);
|
||||
|
||||
/* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */
|
||||
#ifdef __x86_64__
|
||||
#if defined(__x86_64__) && !defined(__arm64ec__)
|
||||
void __addgsbyte(unsigned long, unsigned char);
|
||||
void __addgsdword(unsigned long, unsigned long);
|
||||
void __addgsqword(unsigned long, unsigned __int64);
|
||||
@ -182,12 +181,6 @@ unsigned char __readgsbyte(unsigned long);
|
||||
unsigned long __readgsdword(unsigned long);
|
||||
unsigned __int64 __readgsqword(unsigned long);
|
||||
unsigned short __readgsword(unsigned long);
|
||||
unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,
|
||||
unsigned __int64 _HighPart,
|
||||
unsigned char _Shift);
|
||||
unsigned __int64 __shiftright128(unsigned __int64 _LowPart,
|
||||
unsigned __int64 _HighPart,
|
||||
unsigned char _Shift);
|
||||
void __stosq(unsigned __int64 *, unsigned __int64, size_t);
|
||||
unsigned char __vmx_on(unsigned __int64 *);
|
||||
unsigned char __vmx_vmclear(unsigned __int64 *);
|
||||
@ -236,216 +229,15 @@ unsigned __int64 _shlx_u64(unsigned __int64, unsigned int);
|
||||
unsigned __int64 _shrx_u64(unsigned __int64, unsigned int);
|
||||
__int64 __mulh(__int64, __int64);
|
||||
unsigned __int64 __umulh(unsigned __int64, unsigned __int64);
|
||||
__int64 _mul128(__int64, __int64, __int64*);
|
||||
unsigned __int64 _umul128(unsigned __int64,
|
||||
unsigned __int64,
|
||||
unsigned __int64*);
|
||||
__int64 _mul128(__int64, __int64, __int64 *);
|
||||
|
||||
#endif /* __x86_64__ */
|
||||
|
||||
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
|
||||
|
||||
unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
|
||||
__int64 _InterlockedDecrement64(__int64 volatile *_Addend);
|
||||
__int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value);
|
||||
__int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value);
|
||||
__int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value);
|
||||
__int64 _InterlockedIncrement64(__int64 volatile *_Addend);
|
||||
__int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);
|
||||
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange Add
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value);
|
||||
char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value);
|
||||
char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value);
|
||||
short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value);
|
||||
short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value);
|
||||
short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value);
|
||||
long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value);
|
||||
long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value);
|
||||
long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value);
|
||||
__int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend, __int64 _Value);
|
||||
__int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value);
|
||||
__int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend, __int64 _Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Increment
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
short _InterlockedIncrement16_acq(short volatile *_Value);
|
||||
short _InterlockedIncrement16_nf(short volatile *_Value);
|
||||
short _InterlockedIncrement16_rel(short volatile *_Value);
|
||||
long _InterlockedIncrement_acq(long volatile *_Value);
|
||||
long _InterlockedIncrement_nf(long volatile *_Value);
|
||||
long _InterlockedIncrement_rel(long volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_acq(__int64 volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_nf(__int64 volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_rel(__int64 volatile *_Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Decrement
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
short _InterlockedDecrement16_acq(short volatile *_Value);
|
||||
short _InterlockedDecrement16_nf(short volatile *_Value);
|
||||
short _InterlockedDecrement16_rel(short volatile *_Value);
|
||||
long _InterlockedDecrement_acq(long volatile *_Value);
|
||||
long _InterlockedDecrement_nf(long volatile *_Value);
|
||||
long _InterlockedDecrement_rel(long volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_acq(__int64 volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_nf(__int64 volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_rel(__int64 volatile *_Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked And
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
char _InterlockedAnd8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedAnd8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedAnd8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedAnd16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedAnd16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedAnd16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedAnd_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedAnd_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedAnd_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Bit Counting and Testing
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
unsigned char _interlockedbittestandset_acq(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandset_nf(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandset_rel(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Or
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
char _InterlockedOr8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedOr8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedOr8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedOr16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedOr16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedOr16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedOr_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedOr_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedOr_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Xor
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
char _InterlockedXor8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedXor8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedXor8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedXor16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedXor16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedXor16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedXor_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedXor_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedXor_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
char _InterlockedExchange8_acq(char volatile *_Target, char _Value);
|
||||
char _InterlockedExchange8_nf(char volatile *_Target, char _Value);
|
||||
char _InterlockedExchange8_rel(char volatile *_Target, char _Value);
|
||||
short _InterlockedExchange16_acq(short volatile *_Target, short _Value);
|
||||
short _InterlockedExchange16_nf(short volatile *_Target, short _Value);
|
||||
short _InterlockedExchange16_rel(short volatile *_Target, short _Value);
|
||||
long _InterlockedExchange_acq(long volatile *_Target, long _Value);
|
||||
long _InterlockedExchange_nf(long volatile *_Target, long _Value);
|
||||
long _InterlockedExchange_rel(long volatile *_Target, long _Value);
|
||||
__int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value);
|
||||
__int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value);
|
||||
__int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value);
|
||||
#endif
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Compare Exchange
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
char _InterlockedCompareExchange8_acq(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
char _InterlockedCompareExchange8_nf(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
char _InterlockedCompareExchange8_rel(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
short _InterlockedCompareExchange16_acq(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
short _InterlockedCompareExchange16_nf(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
short _InterlockedCompareExchange16_rel(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
long _InterlockedCompareExchange_acq(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
long _InterlockedCompareExchange_nf(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
long _InterlockedCompareExchange_rel(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
#endif
|
||||
#if defined(__x86_64__) || defined(__aarch64__)
|
||||
unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,
|
||||
__int64 _ExchangeHigh,
|
||||
__int64 _ExchangeLow,
|
||||
__int64 *_ComparandResult);
|
||||
#endif
|
||||
#if defined(__aarch64__)
|
||||
unsigned char _InterlockedCompareExchange128_acq(__int64 volatile *_Destination,
|
||||
__int64 _ExchangeHigh,
|
||||
__int64 _ExchangeLow,
|
||||
__int64 *_ComparandResult);
|
||||
unsigned char _InterlockedCompareExchange128_nf(__int64 volatile *_Destination,
|
||||
__int64 _ExchangeHigh,
|
||||
__int64 _ExchangeLow,
|
||||
__int64 *_ComparandResult);
|
||||
unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination,
|
||||
__int64 _ExchangeHigh,
|
||||
__int64 _ExchangeLow,
|
||||
__int64 *_ComparandResult);
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* movs, stos
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
|
||||
#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
|
||||
static __inline__ void __DEFAULT_FN_ATTRS __movsb(unsigned char *__dst,
|
||||
unsigned char const *__src,
|
||||
size_t __n) {
|
||||
@ -514,7 +306,7 @@ static __inline__ void __DEFAULT_FN_ATTRS __stosw(unsigned short *__dst,
|
||||
: "memory");
|
||||
}
|
||||
#endif
|
||||
#ifdef __x86_64__
|
||||
#if defined(__x86_64__) && !defined(__arm64ec__)
|
||||
static __inline__ void __DEFAULT_FN_ATTRS __movsq(
|
||||
unsigned long long *__dst, unsigned long long const *__src, size_t __n) {
|
||||
__asm__ __volatile__("rep movsq"
|
||||
@ -533,10 +325,40 @@ static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst,
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Misc
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
|
||||
static __inline__ void __DEFAULT_FN_ATTRS __halt(void) {
|
||||
__asm__ volatile("hlt");
|
||||
}
|
||||
|
||||
static inline unsigned char __inbyte(unsigned short port) {
|
||||
unsigned char ret;
|
||||
__asm__ __volatile__("inb %w1, %b0" : "=a"(ret) : "Nd"(port));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline unsigned short __inword(unsigned short port) {
|
||||
unsigned short ret;
|
||||
__asm__ __volatile__("inw %w1, %w0" : "=a"(ret) : "Nd"(port));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline unsigned long __indword(unsigned short port) {
|
||||
unsigned long ret;
|
||||
__asm__ __volatile__("inl %w1, %k0" : "=a"(ret) : "Nd"(port));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void __outbyte(unsigned short port, unsigned char data) {
|
||||
__asm__ __volatile__("outb %b0, %w1" : : "a"(data), "Nd"(port));
|
||||
}
|
||||
|
||||
static inline void __outword(unsigned short port, unsigned short data) {
|
||||
__asm__ __volatile__("outw %w0, %w1" : : "a"(data), "Nd"(port));
|
||||
}
|
||||
|
||||
static inline void __outdword(unsigned short port, unsigned long data) {
|
||||
__asm__ __volatile__("outl %k0, %w1" : : "a"(data), "Nd"(port));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
|
||||
@ -548,9 +370,10 @@ static __inline__ void __DEFAULT_FN_ATTRS __nop(void) {
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* MS AArch64 specific
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__aarch64__)
|
||||
#if defined(__aarch64__) || defined(__arm64ec__)
|
||||
unsigned __int64 __getReg(int);
|
||||
long _InterlockedAdd(long volatile *Addend, long Value);
|
||||
__int64 _InterlockedAdd64(__int64 volatile *Addend, __int64 Value);
|
||||
__int64 _ReadStatusReg(int);
|
||||
void _WriteStatusReg(int, __int64);
|
||||
|
||||
@ -582,18 +405,19 @@ unsigned int _CountLeadingOnes(unsigned long);
|
||||
unsigned int _CountLeadingOnes64(unsigned __int64);
|
||||
unsigned int _CountLeadingSigns(long);
|
||||
unsigned int _CountLeadingSigns64(__int64);
|
||||
unsigned int _CountLeadingZeros(unsigned long);
|
||||
unsigned int _CountLeadingZeros64(unsigned _int64);
|
||||
unsigned int _CountOneBits(unsigned long);
|
||||
unsigned int _CountOneBits64(unsigned __int64);
|
||||
|
||||
void __cdecl __prefetch(void *);
|
||||
unsigned int __hlt(unsigned int, ...);
|
||||
|
||||
void __cdecl __prefetch(const void *);
|
||||
|
||||
#endif
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Privileged intrinsics
|
||||
\*----------------------------------------------------------------------------*/
|
||||
#if defined(__i386__) || defined(__x86_64__)
|
||||
#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
|
||||
static __inline__ unsigned __int64 __DEFAULT_FN_ATTRS
|
||||
__readmsr(unsigned long __register) {
|
||||
// Loads the contents of a 64-bit model specific register (MSR) specified in
|
||||
@ -607,7 +431,6 @@ __readmsr(unsigned long __register) {
|
||||
__asm__ ("rdmsr" : "=d"(__edx), "=a"(__eax) : "c"(__register));
|
||||
return (((unsigned __int64)__edx) << 32) | (unsigned __int64)__eax;
|
||||
}
|
||||
#endif
|
||||
|
||||
static __inline__ unsigned __LPTRINT_TYPE__ __DEFAULT_FN_ATTRS __readcr3(void) {
|
||||
unsigned __LPTRINT_TYPE__ __cr3_val;
|
||||
@ -623,6 +446,7 @@ static __inline__ void __DEFAULT_FN_ATTRS
|
||||
__writecr3(unsigned __INTPTR_TYPE__ __cr3_val) {
|
||||
__asm__ ("mov {%0, %%cr3|cr3, %0}" : : "r"(__cr3_val) : "memory");
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
247
lib/include/intrin0.h
vendored
Normal file
247
lib/include/intrin0.h
vendored
Normal file
@ -0,0 +1,247 @@
|
||||
/* ===-------- intrin.h ---------------------------------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
/* Only include this if we're compiling for the windows platform. */
|
||||
#ifndef _MSC_VER
|
||||
#include_next <intrin0.h>
|
||||
#else
|
||||
|
||||
#ifndef __INTRIN0_H
|
||||
#define __INTRIN0_H
|
||||
|
||||
#if defined(__x86_64__) && !defined(__arm64ec__)
|
||||
#include <adcintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
|
||||
unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
|
||||
void _ReadWriteBarrier(void);
|
||||
|
||||
#if defined(__aarch64__) || defined(__arm64ec__)
|
||||
unsigned int _CountLeadingZeros(unsigned long);
|
||||
unsigned int _CountLeadingZeros64(unsigned _int64);
|
||||
unsigned char _InterlockedCompareExchange128_acq(__int64 volatile *_Destination,
|
||||
__int64 _ExchangeHigh,
|
||||
__int64 _ExchangeLow,
|
||||
__int64 *_ComparandResult);
|
||||
unsigned char _InterlockedCompareExchange128_nf(__int64 volatile *_Destination,
|
||||
__int64 _ExchangeHigh,
|
||||
__int64 _ExchangeLow,
|
||||
__int64 *_ComparandResult);
|
||||
unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination,
|
||||
__int64 _ExchangeHigh,
|
||||
__int64 _ExchangeLow,
|
||||
__int64 *_ComparandResult);
|
||||
#endif
|
||||
|
||||
#ifdef __x86_64__ && !defined(__arm64ec__)
|
||||
unsigned __int64 _umul128(unsigned __int64, unsigned __int64,
|
||||
unsigned __int64 *);
|
||||
unsigned __int64 __shiftleft128(unsigned __int64 _LowPart,
|
||||
unsigned __int64 _HighPart,
|
||||
unsigned char _Shift);
|
||||
unsigned __int64 __shiftright128(unsigned __int64 _LowPart,
|
||||
unsigned __int64 _HighPart,
|
||||
unsigned char _Shift);
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || (defined(__x86_64__) && !defined(__arm64ec__))
|
||||
void _mm_pause(void);
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(__aarch64__)
|
||||
unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,
|
||||
__int64 _ExchangeHigh,
|
||||
__int64 _ExchangeLow,
|
||||
__int64 *_ComparandResult);
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
|
||||
unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
|
||||
#endif
|
||||
|
||||
#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || \
|
||||
defined(__aarch64__)
|
||||
__int64 _InterlockedDecrement64(__int64 volatile *_Addend);
|
||||
__int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value);
|
||||
__int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value);
|
||||
__int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value);
|
||||
__int64 _InterlockedIncrement64(__int64 volatile *_Addend);
|
||||
__int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask);
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || defined(__aarch64__) || defined(__arm64ec__)
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange Add
|
||||
\*----------------------------------------------------------------------------*/
|
||||
char _InterlockedExchangeAdd8_acq(char volatile *_Addend, char _Value);
|
||||
char _InterlockedExchangeAdd8_nf(char volatile *_Addend, char _Value);
|
||||
char _InterlockedExchangeAdd8_rel(char volatile *_Addend, char _Value);
|
||||
short _InterlockedExchangeAdd16_acq(short volatile *_Addend, short _Value);
|
||||
short _InterlockedExchangeAdd16_nf(short volatile *_Addend, short _Value);
|
||||
short _InterlockedExchangeAdd16_rel(short volatile *_Addend, short _Value);
|
||||
long _InterlockedExchangeAdd_acq(long volatile *_Addend, long _Value);
|
||||
long _InterlockedExchangeAdd_nf(long volatile *_Addend, long _Value);
|
||||
long _InterlockedExchangeAdd_rel(long volatile *_Addend, long _Value);
|
||||
__int64 _InterlockedExchangeAdd64_acq(__int64 volatile *_Addend,
|
||||
__int64 _Value);
|
||||
__int64 _InterlockedExchangeAdd64_nf(__int64 volatile *_Addend, __int64 _Value);
|
||||
__int64 _InterlockedExchangeAdd64_rel(__int64 volatile *_Addend,
|
||||
__int64 _Value);
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Increment
|
||||
\*----------------------------------------------------------------------------*/
|
||||
short _InterlockedIncrement16_acq(short volatile *_Value);
|
||||
short _InterlockedIncrement16_nf(short volatile *_Value);
|
||||
short _InterlockedIncrement16_rel(short volatile *_Value);
|
||||
long _InterlockedIncrement_acq(long volatile *_Value);
|
||||
long _InterlockedIncrement_nf(long volatile *_Value);
|
||||
long _InterlockedIncrement_rel(long volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_acq(__int64 volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_nf(__int64 volatile *_Value);
|
||||
__int64 _InterlockedIncrement64_rel(__int64 volatile *_Value);
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Decrement
|
||||
\*----------------------------------------------------------------------------*/
|
||||
short _InterlockedDecrement16_acq(short volatile *_Value);
|
||||
short _InterlockedDecrement16_nf(short volatile *_Value);
|
||||
short _InterlockedDecrement16_rel(short volatile *_Value);
|
||||
long _InterlockedDecrement_acq(long volatile *_Value);
|
||||
long _InterlockedDecrement_nf(long volatile *_Value);
|
||||
long _InterlockedDecrement_rel(long volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_acq(__int64 volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_nf(__int64 volatile *_Value);
|
||||
__int64 _InterlockedDecrement64_rel(__int64 volatile *_Value);
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked And
|
||||
\*----------------------------------------------------------------------------*/
|
||||
char _InterlockedAnd8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedAnd8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedAnd8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedAnd16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedAnd16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedAnd16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedAnd_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedAnd_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedAnd_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedAnd64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedAnd64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedAnd64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Bit Counting and Testing
|
||||
\*----------------------------------------------------------------------------*/
|
||||
unsigned char _interlockedbittestandset_acq(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandset_nf(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandset_rel(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandreset_acq(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandreset_nf(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
unsigned char _interlockedbittestandreset_rel(long volatile *_BitBase,
|
||||
long _BitPos);
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Or
|
||||
\*----------------------------------------------------------------------------*/
|
||||
char _InterlockedOr8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedOr8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedOr8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedOr16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedOr16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedOr16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedOr_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedOr_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedOr_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedOr64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedOr64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedOr64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Xor
|
||||
\*----------------------------------------------------------------------------*/
|
||||
char _InterlockedXor8_acq(char volatile *_Value, char _Mask);
|
||||
char _InterlockedXor8_nf(char volatile *_Value, char _Mask);
|
||||
char _InterlockedXor8_rel(char volatile *_Value, char _Mask);
|
||||
short _InterlockedXor16_acq(short volatile *_Value, short _Mask);
|
||||
short _InterlockedXor16_nf(short volatile *_Value, short _Mask);
|
||||
short _InterlockedXor16_rel(short volatile *_Value, short _Mask);
|
||||
long _InterlockedXor_acq(long volatile *_Value, long _Mask);
|
||||
long _InterlockedXor_nf(long volatile *_Value, long _Mask);
|
||||
long _InterlockedXor_rel(long volatile *_Value, long _Mask);
|
||||
__int64 _InterlockedXor64_acq(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedXor64_nf(__int64 volatile *_Value, __int64 _Mask);
|
||||
__int64 _InterlockedXor64_rel(__int64 volatile *_Value, __int64 _Mask);
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Exchange
|
||||
\*----------------------------------------------------------------------------*/
|
||||
char _InterlockedExchange8_acq(char volatile *_Target, char _Value);
|
||||
char _InterlockedExchange8_nf(char volatile *_Target, char _Value);
|
||||
char _InterlockedExchange8_rel(char volatile *_Target, char _Value);
|
||||
short _InterlockedExchange16_acq(short volatile *_Target, short _Value);
|
||||
short _InterlockedExchange16_nf(short volatile *_Target, short _Value);
|
||||
short _InterlockedExchange16_rel(short volatile *_Target, short _Value);
|
||||
long _InterlockedExchange_acq(long volatile *_Target, long _Value);
|
||||
long _InterlockedExchange_nf(long volatile *_Target, long _Value);
|
||||
long _InterlockedExchange_rel(long volatile *_Target, long _Value);
|
||||
__int64 _InterlockedExchange64_acq(__int64 volatile *_Target, __int64 _Value);
|
||||
__int64 _InterlockedExchange64_nf(__int64 volatile *_Target, __int64 _Value);
|
||||
__int64 _InterlockedExchange64_rel(__int64 volatile *_Target, __int64 _Value);
|
||||
|
||||
/*----------------------------------------------------------------------------*\
|
||||
|* Interlocked Compare Exchange
|
||||
\*----------------------------------------------------------------------------*/
|
||||
char _InterlockedCompareExchange8_acq(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
char _InterlockedCompareExchange8_nf(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
char _InterlockedCompareExchange8_rel(char volatile *_Destination,
|
||||
char _Exchange, char _Comparand);
|
||||
short _InterlockedCompareExchange16_acq(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
short _InterlockedCompareExchange16_nf(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
short _InterlockedCompareExchange16_rel(short volatile *_Destination,
|
||||
short _Exchange, short _Comparand);
|
||||
long _InterlockedCompareExchange_acq(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
long _InterlockedCompareExchange_nf(long volatile *_Destination, long _Exchange,
|
||||
long _Comparand);
|
||||
long _InterlockedCompareExchange_rel(long volatile *_Destination,
|
||||
long _Exchange, long _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_acq(__int64 volatile *_Destination,
|
||||
__int64 _Exchange,
|
||||
__int64 _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
|
||||
__int64 _Exchange, __int64 _Comparand);
|
||||
__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
|
||||
__int64 _Exchange,
|
||||
__int64 _Comparand);
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* __INTRIN0_H */
|
||||
#endif /* _MSC_VER */
|
4
lib/include/inttypes.h
vendored
4
lib/include/inttypes.h
vendored
@ -13,6 +13,9 @@
|
||||
#if !defined(_AIX) || !defined(_STD_TYPES_T)
|
||||
#define __CLANG_INTTYPES_H
|
||||
#endif
|
||||
#if defined(__MVS__) && __has_include_next(<inttypes.h>)
|
||||
#include_next <inttypes.h>
|
||||
#else
|
||||
|
||||
#if defined(_MSC_VER) && _MSC_VER < 1800
|
||||
#error MSVC does not have inttypes.h prior to Visual Studio 2013
|
||||
@ -94,4 +97,5 @@
|
||||
#define SCNxFAST32 "x"
|
||||
#endif
|
||||
|
||||
#endif /* __MVS__ */
|
||||
#endif /* __CLANG_INTTYPES_H */
|
||||
|
4
lib/include/iso646.h
vendored
4
lib/include/iso646.h
vendored
@ -9,6 +9,9 @@
|
||||
|
||||
#ifndef __ISO646_H
|
||||
#define __ISO646_H
|
||||
#if defined(__MVS__) && __has_include_next(<iso646.h>)
|
||||
#include_next <iso646.h>
|
||||
#else
|
||||
|
||||
#ifndef __cplusplus
|
||||
#define and &&
|
||||
@ -24,4 +27,5 @@
|
||||
#define xor_eq ^=
|
||||
#endif
|
||||
|
||||
#endif /* __MVS__ */
|
||||
#endif /* __ISO646_H */
|
||||
|
13
lib/include/keylockerintrin.h
vendored
13
lib/include/keylockerintrin.h
vendored
@ -28,8 +28,7 @@
|
||||
#ifndef _KEYLOCKERINTRIN_H
|
||||
#define _KEYLOCKERINTRIN_H
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__KL__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__KL__)
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
@ -327,11 +326,9 @@ _mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) {
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \
|
||||
|| defined(__KL__) */
|
||||
#endif /* !defined(__SCE__ || __has_feature(modules) || defined(__KL__) */
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__WIDEKL__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__WIDEKL__)
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS \
|
||||
@ -524,7 +521,7 @@ _mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void*
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \
|
||||
|| defined(__WIDEKL__) */
|
||||
#endif /* !defined(__SCE__) || __has_feature(modules) || defined(__WIDEKL__) \
|
||||
*/
|
||||
|
||||
#endif /* _KEYLOCKERINTRIN_H */
|
||||
|
5
lib/include/limits.h
vendored
5
lib/include/limits.h
vendored
@ -9,6 +9,10 @@
|
||||
#ifndef __CLANG_LIMITS_H
|
||||
#define __CLANG_LIMITS_H
|
||||
|
||||
#if defined(__MVS__) && __has_include_next(<limits.h>)
|
||||
#include_next <limits.h>
|
||||
#else
|
||||
|
||||
/* The system's limits.h may, in turn, try to #include_next GCC's limits.h.
|
||||
Avert this #include_next madness. */
|
||||
#if defined __GNUC__ && !defined _GCC_LIMITS_H_
|
||||
@ -122,4 +126,5 @@
|
||||
#define ULONG_LONG_MAX (__LONG_LONG_MAX__*2ULL+1ULL)
|
||||
#endif
|
||||
|
||||
#endif /* __MVS__ */
|
||||
#endif /* __CLANG_LIMITS_H */
|
||||
|
2
lib/include/llvm_libc_wrappers/assert.h
vendored
2
lib/include/llvm_libc_wrappers/assert.h
vendored
@ -1,4 +1,4 @@
|
||||
//===-- Wrapper for C standard assert.h declarations on the GPU ------------===//
|
||||
//===-- Wrapper for C standard assert.h declarations on the GPU -*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
|
147
lib/include/mm3dnow.h
vendored
147
lib/include/mm3dnow.h
vendored
@ -7,151 +7,16 @@
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
// 3dNow intrinsics are no longer supported.
|
||||
|
||||
#ifndef _MM3DNOW_H_INCLUDED
|
||||
#define _MM3DNOW_H_INCLUDED
|
||||
|
||||
#ifndef _CLANG_DISABLE_CRT_DEPRECATION_WARNINGS
|
||||
#warning "The <mm3dnow.h> header is deprecated, and 3dNow! intrinsics are unsupported. For other intrinsics, include <x86intrin.h>, instead."
|
||||
#endif
|
||||
|
||||
#include <mmintrin.h>
|
||||
#include <prfchwintrin.h>
|
||||
|
||||
typedef float __v2sf __attribute__((__vector_size__(8)));
|
||||
|
||||
/* Define the default attributes for the functions in this file. */
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64)))
|
||||
|
||||
static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
|
||||
_m_femms(void) {
|
||||
__builtin_ia32_femms();
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pavgusb(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pf2id(__m64 __m) {
|
||||
return (__m64)__builtin_ia32_pf2id((__v2sf)__m);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfacc(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfadd(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfcmpeq(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfcmpge(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfcmpgt(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfmax(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfmin(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfmul(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfrcp(__m64 __m) {
|
||||
return (__m64)__builtin_ia32_pfrcp((__v2sf)__m);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfrcpit1(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfrcpit2(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfrsqrt(__m64 __m) {
|
||||
return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfrsqrtit1(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfsub(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfsubr(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pi2fd(__m64 __m) {
|
||||
return (__m64)__builtin_ia32_pi2fd((__v2si)__m);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pmulhrw(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/* Handle the 3dnowa instructions here. */
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64)))
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pf2iw(__m64 __m) {
|
||||
return (__m64)__builtin_ia32_pf2iw((__v2sf)__m);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfnacc(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pfpnacc(__m64 __m1, __m64 __m2) {
|
||||
return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pi2fw(__m64 __m) {
|
||||
return (__m64)__builtin_ia32_pi2fw((__v2si)__m);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pswapdsf(__m64 __m) {
|
||||
return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m);
|
||||
}
|
||||
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
_m_pswapdsi(__m64 __m) {
|
||||
return (__m64)__builtin_ia32_pswapdsi((__v2si)__m);
|
||||
}
|
||||
|
||||
#undef __DEFAULT_FN_ATTRS
|
||||
|
||||
#endif
|
||||
|
160
lib/include/mmintrin.h
vendored
160
lib/include/mmintrin.h
vendored
@ -105,28 +105,23 @@ _mm_cvtm64_si64(__m64 __m)
|
||||
return (long long)__m;
|
||||
}
|
||||
|
||||
/// Converts 16-bit signed integers from both 64-bit integer vector
|
||||
/// parameters of [4 x i16] into 8-bit signed integer values, and constructs
|
||||
/// a 64-bit integer vector of [8 x i8] as the result. Positive values
|
||||
/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
|
||||
/// are saturated to 0x80.
|
||||
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
|
||||
/// vector parameters of [4 x i16] into 8-bit signed integer values, and
|
||||
/// constructs a 64-bit integer vector of [8 x i8] as the result.
|
||||
///
|
||||
/// Positive values greater than 0x7F are saturated to 0x7F. Negative values
|
||||
/// less than 0x80 are saturated to 0x80.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
|
||||
///
|
||||
/// \param __m1
|
||||
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
|
||||
/// 16-bit signed integer and is converted to an 8-bit signed integer with
|
||||
/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
|
||||
/// Negative values less than 0x80 are saturated to 0x80. The converted
|
||||
/// [4 x i8] values are written to the lower 32 bits of the result.
|
||||
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
|
||||
/// written to the lower 32 bits of the result.
|
||||
/// \param __m2
|
||||
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
|
||||
/// 16-bit signed integer and is converted to an 8-bit signed integer with
|
||||
/// saturation. Positive values greater than 0x7F are saturated to 0x7F.
|
||||
/// Negative values less than 0x80 are saturated to 0x80. The converted
|
||||
/// [4 x i8] values are written to the upper 32 bits of the result.
|
||||
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
|
||||
/// written to the upper 32 bits of the result.
|
||||
/// \returns A 64-bit integer vector of [8 x i8] containing the converted
|
||||
/// values.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
@ -135,28 +130,23 @@ _mm_packs_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// Converts 32-bit signed integers from both 64-bit integer vector
|
||||
/// parameters of [2 x i32] into 16-bit signed integer values, and constructs
|
||||
/// a 64-bit integer vector of [4 x i16] as the result. Positive values
|
||||
/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
|
||||
/// 0x8000 are saturated to 0x8000.
|
||||
/// Converts, with saturation, 32-bit signed integers from both 64-bit integer
|
||||
/// vector parameters of [2 x i32] into 16-bit signed integer values, and
|
||||
/// constructs a 64-bit integer vector of [4 x i16] as the result.
|
||||
///
|
||||
/// Positive values greater than 0x7FFF are saturated to 0x7FFF. Negative
|
||||
/// values less than 0x8000 are saturated to 0x8000.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
|
||||
///
|
||||
/// \param __m1
|
||||
/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
|
||||
/// 32-bit signed integer and is converted to a 16-bit signed integer with
|
||||
/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
|
||||
/// Negative values less than 0x8000 are saturated to 0x8000. The converted
|
||||
/// [2 x i16] values are written to the lower 32 bits of the result.
|
||||
/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
|
||||
/// written to the lower 32 bits of the result.
|
||||
/// \param __m2
|
||||
/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
|
||||
/// 32-bit signed integer and is converted to a 16-bit signed integer with
|
||||
/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
|
||||
/// Negative values less than 0x8000 are saturated to 0x8000. The converted
|
||||
/// [2 x i16] values are written to the upper 32 bits of the result.
|
||||
/// A 64-bit integer vector of [2 x i32]. The converted [2 x i16] values are
|
||||
/// written to the upper 32 bits of the result.
|
||||
/// \returns A 64-bit integer vector of [4 x i16] containing the converted
|
||||
/// values.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
@ -165,28 +155,23 @@ _mm_packs_pi32(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
/// Converts 16-bit signed integers from both 64-bit integer vector
|
||||
/// parameters of [4 x i16] into 8-bit unsigned integer values, and
|
||||
/// constructs a 64-bit integer vector of [8 x i8] as the result. Values
|
||||
/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
|
||||
/// to 0.
|
||||
/// Converts, with saturation, 16-bit signed integers from both 64-bit integer
|
||||
/// vector parameters of [4 x i16] into 8-bit unsigned integer values, and
|
||||
/// constructs a 64-bit integer vector of [8 x i8] as the result.
|
||||
///
|
||||
/// Values greater than 0xFF are saturated to 0xFF. Values less than 0 are
|
||||
/// saturated to 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
|
||||
///
|
||||
/// \param __m1
|
||||
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
|
||||
/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
|
||||
/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
|
||||
/// than 0 are saturated to 0. The converted [4 x i8] values are written to
|
||||
/// the lower 32 bits of the result.
|
||||
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
|
||||
/// written to the lower 32 bits of the result.
|
||||
/// \param __m2
|
||||
/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
|
||||
/// 16-bit signed integer and is converted to an 8-bit unsigned integer with
|
||||
/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less
|
||||
/// than 0 are saturated to 0. The converted [4 x i8] values are written to
|
||||
/// the upper 32 bits of the result.
|
||||
/// A 64-bit integer vector of [4 x i16]. The converted [4 x i8] values are
|
||||
/// written to the upper 32 bits of the result.
|
||||
/// \returns A 64-bit integer vector of [8 x i8] containing the converted
|
||||
/// values.
|
||||
static __inline__ __m64 __DEFAULT_FN_ATTRS
|
||||
@ -400,11 +385,13 @@ _mm_add_pi32(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
/// Adds each 8-bit signed integer element of the first 64-bit integer
|
||||
/// vector of [8 x i8] to the corresponding 8-bit signed integer element of
|
||||
/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than
|
||||
/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
|
||||
/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8].
|
||||
/// Adds, with saturation, each 8-bit signed integer element of the first
|
||||
/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit signed
|
||||
/// integer element of the second 64-bit integer vector of [8 x i8].
|
||||
///
|
||||
/// Positive sums greater than 0x7F are saturated to 0x7F. Negative sums
|
||||
/// less than 0x80 are saturated to 0x80. The results are packed into a
|
||||
/// 64-bit integer vector of [8 x i8].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -422,12 +409,13 @@ _mm_adds_pi8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// Adds each 16-bit signed integer element of the first 64-bit integer
|
||||
/// vector of [4 x i16] to the corresponding 16-bit signed integer element of
|
||||
/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than
|
||||
/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
|
||||
/// saturated to 0x8000. The results are packed into a 64-bit integer vector
|
||||
/// of [4 x i16].
|
||||
/// Adds, with saturation, each 16-bit signed integer element of the first
|
||||
/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit signed
|
||||
/// integer element of the second 64-bit integer vector of [4 x i16].
|
||||
///
|
||||
/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
|
||||
/// less than 0x8000 are saturated to 0x8000. The results are packed into a
|
||||
/// 64-bit integer vector of [4 x i16].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -445,11 +433,12 @@ _mm_adds_pi16(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
|
||||
}
|
||||
|
||||
/// Adds each 8-bit unsigned integer element of the first 64-bit integer
|
||||
/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
|
||||
/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
|
||||
/// saturated to 0xFF. The results are packed into a 64-bit integer vector of
|
||||
/// [8 x i8].
|
||||
/// Adds, with saturation, each 8-bit unsigned integer element of the first
|
||||
/// 64-bit integer vector of [8 x i8] to the corresponding 8-bit unsigned
|
||||
/// integer element of the second 64-bit integer vector of [8 x i8].
|
||||
///
|
||||
/// Sums greater than 0xFF are saturated to 0xFF. The results are packed
|
||||
/// into a 64-bit integer vector of [8 x i8].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -467,11 +456,12 @@ _mm_adds_pu8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// Adds each 16-bit unsigned integer element of the first 64-bit integer
|
||||
/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element
|
||||
/// of the second 64-bit integer vector of [4 x i16]. Sums greater than
|
||||
/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
|
||||
/// integer vector of [4 x i16].
|
||||
/// Adds, with saturation, each 16-bit unsigned integer element of the first
|
||||
/// 64-bit integer vector of [4 x i16] to the corresponding 16-bit unsigned
|
||||
/// integer element of the second 64-bit integer vector of [4 x i16].
|
||||
///
|
||||
/// Sums greater than 0xFFFF are saturated to 0xFFFF. The results are packed
|
||||
/// into a 64-bit integer vector of [4 x i16].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -552,12 +542,13 @@ _mm_sub_pi32(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
|
||||
}
|
||||
|
||||
/// Subtracts each 8-bit signed integer element of the second 64-bit
|
||||
/// integer vector of [8 x i8] from the corresponding 8-bit signed integer
|
||||
/// element of the first 64-bit integer vector of [8 x i8]. Positive results
|
||||
/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
|
||||
/// are saturated to 0x80. The results are packed into a 64-bit integer
|
||||
/// vector of [8 x i8].
|
||||
/// Subtracts, with saturation, each 8-bit signed integer element of the second
|
||||
/// 64-bit integer vector of [8 x i8] from the corresponding 8-bit signed
|
||||
/// integer element of the first 64-bit integer vector of [8 x i8].
|
||||
///
|
||||
/// Positive results greater than 0x7F are saturated to 0x7F. Negative
|
||||
/// results less than 0x80 are saturated to 0x80. The results are packed
|
||||
/// into a 64-bit integer vector of [8 x i8].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -575,12 +566,13 @@ _mm_subs_pi8(__m64 __m1, __m64 __m2)
|
||||
return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
|
||||
}
|
||||
|
||||
/// Subtracts each 16-bit signed integer element of the second 64-bit
|
||||
/// integer vector of [4 x i16] from the corresponding 16-bit signed integer
|
||||
/// element of the first 64-bit integer vector of [4 x i16]. Positive results
|
||||
/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
|
||||
/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit
|
||||
/// integer vector of [4 x i16].
|
||||
/// Subtracts, with saturation, each 16-bit signed integer element of the
|
||||
/// second 64-bit integer vector of [4 x i16] from the corresponding 16-bit
|
||||
/// signed integer element of the first 64-bit integer vector of [4 x i16].
|
||||
///
|
||||
/// Positive results greater than 0x7FFF are saturated to 0x7FFF. Negative
|
||||
/// results less than 0x8000 are saturated to 0x8000. The results are packed
|
||||
/// into a 64-bit integer vector of [4 x i16].
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1149,7 +1141,7 @@ _mm_xor_si64(__m64 __m1, __m64 __m2)
|
||||
/// [8 x i8] to determine if the element of the first vector is equal to the
|
||||
/// corresponding element of the second vector.
|
||||
///
|
||||
/// The comparison yields 0 for false, 0xFF for true.
|
||||
/// Each comparison returns 0 for false, 0xFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1171,7 +1163,7 @@ _mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
|
||||
/// [4 x i16] to determine if the element of the first vector is equal to the
|
||||
/// corresponding element of the second vector.
|
||||
///
|
||||
/// The comparison yields 0 for false, 0xFFFF for true.
|
||||
/// Each comparison returns 0 for false, 0xFFFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1193,7 +1185,7 @@ _mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
|
||||
/// [2 x i32] to determine if the element of the first vector is equal to the
|
||||
/// corresponding element of the second vector.
|
||||
///
|
||||
/// The comparison yields 0 for false, 0xFFFFFFFF for true.
|
||||
/// Each comparison returns 0 for false, 0xFFFFFFFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1215,7 +1207,7 @@ _mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
|
||||
/// [8 x i8] to determine if the element of the first vector is greater than
|
||||
/// the corresponding element of the second vector.
|
||||
///
|
||||
/// The comparison yields 0 for false, 0xFF for true.
|
||||
/// Each comparison returns 0 for false, 0xFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1237,7 +1229,7 @@ _mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
|
||||
/// [4 x i16] to determine if the element of the first vector is greater than
|
||||
/// the corresponding element of the second vector.
|
||||
///
|
||||
/// The comparison yields 0 for false, 0xFFFF for true.
|
||||
/// Each comparison returns 0 for false, 0xFFFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1259,7 +1251,7 @@ _mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
|
||||
/// [2 x i32] to determine if the element of the first vector is greater than
|
||||
/// the corresponding element of the second vector.
|
||||
///
|
||||
/// The comparison yields 0 for false, 0xFFFFFFFF for true.
|
||||
/// Each comparison returns 0 for false, 0xFFFFFFFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
|
15
lib/include/module.modulemap
vendored
15
lib/include/module.modulemap
vendored
@ -44,7 +44,6 @@ module _Builtin_intrinsics [system] [extern_c] {
|
||||
textual header "avxintrin.h"
|
||||
textual header "avx2intrin.h"
|
||||
textual header "avx512fintrin.h"
|
||||
textual header "avx512erintrin.h"
|
||||
textual header "fmaintrin.h"
|
||||
|
||||
header "x86intrin.h"
|
||||
@ -203,6 +202,11 @@ module _Builtin_stdarg [system] {
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module header_macro {
|
||||
header "__stdarg_header_macro.h"
|
||||
export *
|
||||
}
|
||||
|
||||
explicit module va_arg {
|
||||
header "__stdarg_va_arg.h"
|
||||
export *
|
||||
@ -232,6 +236,10 @@ module _Builtin_stdbool [system] {
|
||||
module _Builtin_stddef [system] {
|
||||
textual header "stddef.h"
|
||||
|
||||
explicit module header_macro {
|
||||
header "__stddef_header_macro.h"
|
||||
export *
|
||||
}
|
||||
// __stddef_max_align_t.h is always in this module, even if
|
||||
// -fbuiltin-headers-in-system-modules is passed.
|
||||
explicit module max_align_t {
|
||||
@ -315,3 +323,8 @@ module opencl_c {
|
||||
header "opencl-c.h"
|
||||
header "opencl-c-base.h"
|
||||
}
|
||||
|
||||
module ptrauth {
|
||||
header "ptrauth.h"
|
||||
export *
|
||||
}
|
||||
|
4
lib/include/opencl-c-base.h
vendored
4
lib/include/opencl-c-base.h
vendored
@ -46,6 +46,10 @@
|
||||
#define __opencl_c_ext_fp32_global_atomic_min_max 1
|
||||
#define __opencl_c_ext_fp32_local_atomic_min_max 1
|
||||
#define __opencl_c_ext_image_raw10_raw12 1
|
||||
#define cl_khr_kernel_clock 1
|
||||
#define __opencl_c_kernel_clock_scope_device 1
|
||||
#define __opencl_c_kernel_clock_scope_work_group 1
|
||||
#define __opencl_c_kernel_clock_scope_sub_group 1
|
||||
|
||||
#endif // defined(__SPIR__) || defined(__SPIRV__)
|
||||
#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200)
|
||||
|
15
lib/include/opencl-c.h
vendored
15
lib/include/opencl-c.h
vendored
@ -17314,6 +17314,21 @@ half __ovld __conv sub_group_clustered_rotate(half, int, uint);
|
||||
#endif // cl_khr_fp16
|
||||
#endif // cl_khr_subgroup_rotate
|
||||
|
||||
#if defined(cl_khr_kernel_clock)
|
||||
#if defined(__opencl_c_kernel_clock_scope_device)
|
||||
ulong __ovld clock_read_device();
|
||||
uint2 __ovld clock_read_hilo_device();
|
||||
#endif // __opencl_c_kernel_clock_scope_device
|
||||
#if defined(__opencl_c_kernel_clock_scope_work_group)
|
||||
ulong __ovld clock_read_work_group();
|
||||
uint2 __ovld clock_read_hilo_work_group();
|
||||
#endif // __opencl_c_kernel_clock_scope_work_group
|
||||
#if defined(__opencl_c_kernel_clock_scope_sub_group)
|
||||
ulong __ovld clock_read_sub_group();
|
||||
uint2 __ovld clock_read_hilo_sub_group();
|
||||
#endif // __opencl_c_kernel_clock_scope_sub_group
|
||||
#endif // cl_khr_kernel_clock
|
||||
|
||||
#if defined(cl_intel_subgroups)
|
||||
// Intel-Specific Sub Group Functions
|
||||
float __ovld __conv intel_sub_group_shuffle( float , uint );
|
||||
|
18
lib/include/prfchwintrin.h
vendored
18
lib/include/prfchwintrin.h
vendored
@ -8,16 +8,17 @@
|
||||
*/
|
||||
|
||||
#if !defined(__X86INTRIN_H) && !defined(_MM3DNOW_H_INCLUDED)
|
||||
#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
|
||||
#error "Never use <prfchwintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef __PRFCHWINTRIN_H
|
||||
#define __PRFCHWINTRIN_H
|
||||
|
||||
/// Loads a memory sequence containing the specified memory address into
|
||||
/// all data cache levels. The cache-coherency state is set to exclusive.
|
||||
/// Data can be read from and written to the cache line without additional
|
||||
/// delay.
|
||||
/// all data cache levels.
|
||||
///
|
||||
/// The cache-coherency state is set to exclusive. Data can be read from
|
||||
/// and written to the cache line without additional delay.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -32,10 +33,11 @@ _m_prefetch(void *__P)
|
||||
}
|
||||
|
||||
/// Loads a memory sequence containing the specified memory address into
|
||||
/// the L1 data cache and sets the cache-coherency to modified. This
|
||||
/// provides a hint to the processor that the cache line will be modified.
|
||||
/// It is intended for use when the cache line will be written to shortly
|
||||
/// after the prefetch is performed.
|
||||
/// the L1 data cache and sets the cache-coherency state to modified.
|
||||
///
|
||||
/// This provides a hint to the processor that the cache line will be
|
||||
/// modified. It is intended for use when the cache line will be written to
|
||||
/// shortly after the prefetch is performed.
|
||||
///
|
||||
/// Note that the effect of this intrinsic is dependent on the processor
|
||||
/// implementation.
|
||||
|
330
lib/include/ptrauth.h
vendored
Normal file
330
lib/include/ptrauth.h
vendored
Normal file
@ -0,0 +1,330 @@
|
||||
/*===---- ptrauth.h - Pointer authentication -------------------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __PTRAUTH_H
|
||||
#define __PTRAUTH_H
|
||||
|
||||
typedef enum {
|
||||
ptrauth_key_asia = 0,
|
||||
ptrauth_key_asib = 1,
|
||||
ptrauth_key_asda = 2,
|
||||
ptrauth_key_asdb = 3,
|
||||
|
||||
/* A process-independent key which can be used to sign code pointers. */
|
||||
ptrauth_key_process_independent_code = ptrauth_key_asia,
|
||||
|
||||
/* A process-specific key which can be used to sign code pointers. */
|
||||
ptrauth_key_process_dependent_code = ptrauth_key_asib,
|
||||
|
||||
/* A process-independent key which can be used to sign data pointers. */
|
||||
ptrauth_key_process_independent_data = ptrauth_key_asda,
|
||||
|
||||
/* A process-specific key which can be used to sign data pointers. */
|
||||
ptrauth_key_process_dependent_data = ptrauth_key_asdb,
|
||||
|
||||
/* The key used to sign return addresses on the stack.
|
||||
The extra data is based on the storage address of the return address.
|
||||
On AArch64, that is always the storage address of the return address + 8
|
||||
(or, in other words, the value of the stack pointer on function entry) */
|
||||
ptrauth_key_return_address = ptrauth_key_process_dependent_code,
|
||||
|
||||
/* The key used to sign C function pointers.
|
||||
The extra data is always 0. */
|
||||
ptrauth_key_function_pointer = ptrauth_key_process_independent_code,
|
||||
|
||||
/* The key used to sign C++ v-table pointers.
|
||||
The extra data is always 0. */
|
||||
ptrauth_key_cxx_vtable_pointer = ptrauth_key_process_independent_data,
|
||||
|
||||
/* Other pointers signed under the ABI use private ABI rules. */
|
||||
|
||||
} ptrauth_key;
|
||||
|
||||
/* An integer type of the appropriate size for a discriminator argument. */
|
||||
typedef __UINTPTR_TYPE__ ptrauth_extra_data_t;
|
||||
|
||||
/* An integer type of the appropriate size for a generic signature. */
|
||||
typedef __UINTPTR_TYPE__ ptrauth_generic_signature_t;
|
||||
|
||||
/* A signed pointer value embeds the original pointer together with
|
||||
a signature that attests to the validity of that pointer. Because
|
||||
this signature must use only "spare" bits of the pointer, a
|
||||
signature's validity is probabilistic in practice: it is unlikely
|
||||
but still plausible that an invalidly-derived signature will
|
||||
somehow equal the correct signature and therefore successfully
|
||||
authenticate. Nonetheless, this scheme provides a strong degree
|
||||
of protection against certain kinds of attacks. */
|
||||
|
||||
/* Authenticating a pointer that was not signed with the given key
|
||||
and extra-data value will (likely) fail by trapping. */
|
||||
|
||||
/* The null function pointer is always the all-zero bit pattern.
|
||||
Signing an all-zero bit pattern will embed a (likely) non-zero
|
||||
signature in the result, and so the result will not seem to be
|
||||
a null function pointer. Authenticating this value will yield
|
||||
a null function pointer back. However, authenticating an
|
||||
all-zero bit pattern will probably fail, because the
|
||||
authentication will expect a (likely) non-zero signature to
|
||||
embedded in the value.
|
||||
|
||||
Because of this, if a pointer may validly be null, you should
|
||||
check for null before attempting to authenticate it with one
|
||||
of these intrinsics. This is not necessary when using the
|
||||
__ptrauth qualifier; the compiler will perform this check
|
||||
automatically. */
|
||||
|
||||
#if __has_feature(ptrauth_intrinsics)
|
||||
|
||||
/* Strip the signature from a value without authenticating it.
|
||||
|
||||
If the value is a function pointer, the result will not be a
|
||||
legal function pointer because of the missing signature, and
|
||||
attempting to call it will result in an authentication failure.
|
||||
|
||||
The value must be an expression of pointer type.
|
||||
The key must be a constant expression of type ptrauth_key.
|
||||
The result will have the same type as the original value. */
|
||||
#define ptrauth_strip(__value, __key) __builtin_ptrauth_strip(__value, __key)
|
||||
|
||||
/* Blend a constant discriminator into the given pointer-like value
|
||||
to form a new discriminator. Not all bits of the inputs are
|
||||
guaranteed to contribute to the result.
|
||||
|
||||
On arm64e, the integer must fall within the range of a uint16_t;
|
||||
other bits may be ignored.
|
||||
|
||||
For the purposes of ptrauth_sign_constant, the result of calling
|
||||
this function is considered a constant expression if the arguments
|
||||
are constant. Some restrictions may be imposed on the pointer.
|
||||
|
||||
The first argument must be an expression of pointer type.
|
||||
The second argument must be an expression of integer type.
|
||||
The result will have type uintptr_t. */
|
||||
#define ptrauth_blend_discriminator(__pointer, __integer) \
|
||||
__builtin_ptrauth_blend_discriminator(__pointer, __integer)
|
||||
|
||||
/* Return a signed pointer for a constant address in a manner which guarantees
|
||||
a non-attackable sequence.
|
||||
|
||||
The value must be a constant expression of pointer type which evaluates to
|
||||
a non-null pointer.
|
||||
The key must be a constant expression of type ptrauth_key.
|
||||
The extra data must be a constant expression of pointer or integer type;
|
||||
if an integer, it will be coerced to ptrauth_extra_data_t.
|
||||
The result will have the same type as the original value.
|
||||
|
||||
This can be used in constant expressions. */
|
||||
#define ptrauth_sign_constant(__value, __key, __data) \
|
||||
__builtin_ptrauth_sign_constant(__value, __key, __data)
|
||||
|
||||
/* Add a signature to the given pointer value using a specific key,
|
||||
using the given extra data as a salt to the signing process.
|
||||
|
||||
This operation does not authenticate the original value and is
|
||||
therefore potentially insecure if an attacker could possibly
|
||||
control that value.
|
||||
|
||||
The value must be an expression of pointer type.
|
||||
The key must be a constant expression of type ptrauth_key.
|
||||
The extra data must be an expression of pointer or integer type;
|
||||
if an integer, it will be coerced to ptrauth_extra_data_t.
|
||||
The result will have the same type as the original value. */
|
||||
#define ptrauth_sign_unauthenticated(__value, __key, __data) \
|
||||
__builtin_ptrauth_sign_unauthenticated(__value, __key, __data)
|
||||
|
||||
/* Authenticate a pointer using one scheme and resign it using another.
|
||||
|
||||
If the result is subsequently authenticated using the new scheme, that
|
||||
authentication is guaranteed to fail if and only if the initial
|
||||
authentication failed.
|
||||
|
||||
The value must be an expression of pointer type.
|
||||
The key must be a constant expression of type ptrauth_key.
|
||||
The extra data must be an expression of pointer or integer type;
|
||||
if an integer, it will be coerced to ptrauth_extra_data_t.
|
||||
The result will have the same type as the original value.
|
||||
|
||||
This operation is guaranteed to not leave the intermediate value
|
||||
available for attack before it is re-signed.
|
||||
|
||||
Do not pass a null pointer to this function. A null pointer
|
||||
will not successfully authenticate.
|
||||
|
||||
This operation traps if the authentication fails. */
|
||||
#define ptrauth_auth_and_resign(__value, __old_key, __old_data, __new_key, \
|
||||
__new_data) \
|
||||
__builtin_ptrauth_auth_and_resign(__value, __old_key, __old_data, __new_key, \
|
||||
__new_data)
|
||||
|
||||
/* Authenticate a pointer using one scheme and resign it as a C
|
||||
function pointer.
|
||||
|
||||
If the result is subsequently authenticated using the new scheme, that
|
||||
authentication is guaranteed to fail if and only if the initial
|
||||
authentication failed.
|
||||
|
||||
The value must be an expression of function pointer type.
|
||||
The key must be a constant expression of type ptrauth_key.
|
||||
The extra data must be an expression of pointer or integer type;
|
||||
if an integer, it will be coerced to ptrauth_extra_data_t.
|
||||
The result will have the same type as the original value.
|
||||
|
||||
This operation is guaranteed to not leave the intermediate value
|
||||
available for attack before it is re-signed. Additionally, if this
|
||||
expression is used syntactically as the function expression in a
|
||||
call, only a single authentication will be performed. */
|
||||
#define ptrauth_auth_function(__value, __old_key, __old_data) \
|
||||
ptrauth_auth_and_resign(__value, __old_key, __old_data, \
|
||||
ptrauth_key_function_pointer, 0)
|
||||
|
||||
/* Authenticate a data pointer.
|
||||
|
||||
The value must be an expression of non-function pointer type.
|
||||
The key must be a constant expression of type ptrauth_key.
|
||||
The extra data must be an expression of pointer or integer type;
|
||||
if an integer, it will be coerced to ptrauth_extra_data_t.
|
||||
The result will have the same type as the original value.
|
||||
|
||||
This operation traps if the authentication fails. */
|
||||
#define ptrauth_auth_data(__value, __old_key, __old_data) \
|
||||
__builtin_ptrauth_auth(__value, __old_key, __old_data)
|
||||
|
||||
/* Compute a constant discriminator from the given string.
|
||||
|
||||
The argument must be a string literal of char character type. The result
|
||||
has type ptrauth_extra_data_t.
|
||||
|
||||
The result value is never zero and always within range for both the
|
||||
__ptrauth qualifier and ptrauth_blend_discriminator.
|
||||
|
||||
This can be used in constant expressions.
|
||||
*/
|
||||
#define ptrauth_string_discriminator(__string) \
|
||||
__builtin_ptrauth_string_discriminator(__string)
|
||||
|
||||
/* Compute a constant discriminator from the given type.
|
||||
|
||||
The result can be used as the second argument to
|
||||
ptrauth_blend_discriminator or the third argument to the
|
||||
__ptrauth qualifier. It has type size_t.
|
||||
|
||||
If the type is a C++ member function pointer type, the result is
|
||||
the discriminator used to signed member function pointers of that
|
||||
type. If the type is a function, function pointer, or function
|
||||
reference type, the result is the discriminator used to sign
|
||||
functions of that type. It is ill-formed to use this macro with any
|
||||
other type.
|
||||
|
||||
A call to this function is an integer constant expression. */
|
||||
#define ptrauth_type_discriminator(__type) \
|
||||
__builtin_ptrauth_type_discriminator(__type)
|
||||
|
||||
/* Compute a signature for the given pair of pointer-sized values.
|
||||
The order of the arguments is significant.
|
||||
|
||||
Like a pointer signature, the resulting signature depends on
|
||||
private key data and therefore should not be reliably reproducible
|
||||
by attackers. That means that this can be used to validate the
|
||||
integrity of arbitrary data by storing a signature for that data
|
||||
alongside it, then checking that the signature is still valid later.
|
||||
Data which exceeds two pointers in size can be signed by either
|
||||
computing a tree of generic signatures or just signing an ordinary
|
||||
cryptographic hash of the data.
|
||||
|
||||
The result has type ptrauth_generic_signature_t. However, it may
|
||||
not have as many bits of entropy as that type's width would suggest;
|
||||
some implementations are known to compute a compressed signature as
|
||||
if the arguments were a pointer and a discriminator.
|
||||
|
||||
The arguments must be either pointers or integers; if integers, they
|
||||
will be coerce to uintptr_t. */
|
||||
#define ptrauth_sign_generic_data(__value, __data) \
|
||||
__builtin_ptrauth_sign_generic_data(__value, __data)
|
||||
|
||||
/* C++ vtable pointer signing class attribute */
|
||||
#define ptrauth_cxx_vtable_pointer(key, address_discrimination, \
|
||||
extra_discrimination...) \
|
||||
[[clang::ptrauth_vtable_pointer(key, address_discrimination, \
|
||||
extra_discrimination)]]
|
||||
|
||||
#else
|
||||
|
||||
#define ptrauth_strip(__value, __key) \
|
||||
({ \
|
||||
(void)__key; \
|
||||
__value; \
|
||||
})
|
||||
|
||||
#define ptrauth_blend_discriminator(__pointer, __integer) \
|
||||
({ \
|
||||
(void)__pointer; \
|
||||
(void)__integer; \
|
||||
((ptrauth_extra_data_t)0); \
|
||||
})
|
||||
|
||||
#define ptrauth_sign_constant(__value, __key, __data) \
|
||||
({ \
|
||||
(void)__key; \
|
||||
(void)__data; \
|
||||
__value; \
|
||||
})
|
||||
|
||||
#define ptrauth_sign_unauthenticated(__value, __key, __data) \
|
||||
({ \
|
||||
(void)__key; \
|
||||
(void)__data; \
|
||||
__value; \
|
||||
})
|
||||
|
||||
#define ptrauth_auth_and_resign(__value, __old_key, __old_data, __new_key, \
|
||||
__new_data) \
|
||||
({ \
|
||||
(void)__old_key; \
|
||||
(void)__old_data; \
|
||||
(void)__new_key; \
|
||||
(void)__new_data; \
|
||||
__value; \
|
||||
})
|
||||
|
||||
#define ptrauth_auth_function(__value, __old_key, __old_data) \
|
||||
({ \
|
||||
(void)__old_key; \
|
||||
(void)__old_data; \
|
||||
__value; \
|
||||
})
|
||||
|
||||
#define ptrauth_auth_data(__value, __old_key, __old_data) \
|
||||
({ \
|
||||
(void)__old_key; \
|
||||
(void)__old_data; \
|
||||
__value; \
|
||||
})
|
||||
|
||||
#define ptrauth_string_discriminator(__string) \
|
||||
({ \
|
||||
(void)__string; \
|
||||
((ptrauth_extra_data_t)0); \
|
||||
})
|
||||
|
||||
#define ptrauth_type_discriminator(__type) ((ptrauth_extra_data_t)0)
|
||||
|
||||
#define ptrauth_sign_generic_data(__value, __data) \
|
||||
({ \
|
||||
(void)__value; \
|
||||
(void)__data; \
|
||||
((ptrauth_generic_signature_t)0); \
|
||||
})
|
||||
|
||||
|
||||
#define ptrauth_cxx_vtable_pointer(key, address_discrimination, \
|
||||
extra_discrimination...)
|
||||
|
||||
#endif /* __has_feature(ptrauth_intrinsics) */
|
||||
|
||||
#endif /* __PTRAUTH_H */
|
4
lib/include/riscv_vector.h
vendored
4
lib/include/riscv_vector.h
vendored
@ -14,10 +14,6 @@
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#ifndef __riscv_vector
|
||||
#error "Vector intrinsics require the vector extension."
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
102
lib/include/sifive_vector.h
vendored
102
lib/include/sifive_vector.h
vendored
@ -13,4 +13,106 @@
|
||||
|
||||
#pragma clang riscv intrinsic sifive_vector
|
||||
|
||||
#define __riscv_sf_vc_x_se_u8mf4(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 6, vl)
|
||||
#define __riscv_sf_vc_x_se_u8mf2(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 7, vl)
|
||||
#define __riscv_sf_vc_x_se_u8m1(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 0, vl)
|
||||
#define __riscv_sf_vc_x_se_u8m2(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 1, vl)
|
||||
#define __riscv_sf_vc_x_se_u8m4(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 2, vl)
|
||||
#define __riscv_sf_vc_x_se_u8m8(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 3, vl)
|
||||
|
||||
#define __riscv_sf_vc_x_se_u16mf2(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 7, vl)
|
||||
#define __riscv_sf_vc_x_se_u16m1(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 0, vl)
|
||||
#define __riscv_sf_vc_x_se_u16m2(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 1, vl)
|
||||
#define __riscv_sf_vc_x_se_u16m4(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 2, vl)
|
||||
#define __riscv_sf_vc_x_se_u16m8(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 3, vl)
|
||||
|
||||
#define __riscv_sf_vc_x_se_u32m1(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 0, vl)
|
||||
#define __riscv_sf_vc_x_se_u32m2(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 1, vl)
|
||||
#define __riscv_sf_vc_x_se_u32m4(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 2, vl)
|
||||
#define __riscv_sf_vc_x_se_u32m8(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 3, vl)
|
||||
|
||||
#define __riscv_sf_vc_i_se_u8mf4(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 7, vl)
|
||||
#define __riscv_sf_vc_i_se_u8mf2(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 6, vl)
|
||||
#define __riscv_sf_vc_i_se_u8m1(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 0, vl)
|
||||
#define __riscv_sf_vc_i_se_u8m2(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 1, vl)
|
||||
#define __riscv_sf_vc_i_se_u8m4(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 2, vl)
|
||||
#define __riscv_sf_vc_i_se_u8m8(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 3, vl)
|
||||
|
||||
#define __riscv_sf_vc_i_se_u16mf2(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 7, vl)
|
||||
#define __riscv_sf_vc_i_se_u16m1(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 0, vl)
|
||||
#define __riscv_sf_vc_i_se_u16m2(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 1, vl)
|
||||
#define __riscv_sf_vc_i_se_u16m4(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 2, vl)
|
||||
#define __riscv_sf_vc_i_se_u16m8(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 3, vl)
|
||||
|
||||
#define __riscv_sf_vc_i_se_u32m1(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 0, vl)
|
||||
#define __riscv_sf_vc_i_se_u32m2(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 1, vl)
|
||||
#define __riscv_sf_vc_i_se_u32m4(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 2, vl)
|
||||
#define __riscv_sf_vc_i_se_u32m8(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 3, vl)
|
||||
|
||||
#if __riscv_v_elen >= 64
|
||||
#define __riscv_sf_vc_x_se_u8mf8(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint8_t)rs1, 8, 5, vl)
|
||||
#define __riscv_sf_vc_x_se_u16mf4(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint16_t)rs1, 16, 6, vl)
|
||||
#define __riscv_sf_vc_x_se_u32mf2(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint32_t)rs1, 32, 7, vl)
|
||||
|
||||
#define __riscv_sf_vc_i_se_u8mf8(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 8, 5, vl)
|
||||
#define __riscv_sf_vc_i_se_u16mf4(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 16, 6, vl)
|
||||
#define __riscv_sf_vc_i_se_u32mf2(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 32, 7, vl)
|
||||
|
||||
#define __riscv_sf_vc_i_se_u64m1(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 0, vl)
|
||||
#define __riscv_sf_vc_i_se_u64m2(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 1, vl)
|
||||
#define __riscv_sf_vc_i_se_u64m4(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 2, vl)
|
||||
#define __riscv_sf_vc_i_se_u64m8(p27_26, p24_20, p11_7, simm5, vl) \
|
||||
__riscv_sf_vc_i_se(p27_26, p24_20, p11_7, simm5, 64, 3, vl)
|
||||
|
||||
#if __riscv_xlen >= 64
|
||||
#define __riscv_sf_vc_x_se_u64m1(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 0, vl)
|
||||
#define __riscv_sf_vc_x_se_u64m2(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 1, vl)
|
||||
#define __riscv_sf_vc_x_se_u64m4(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 2, vl)
|
||||
#define __riscv_sf_vc_x_se_u64m8(p27_26, p24_20, p11_7, rs1, vl) \
|
||||
__riscv_sf_vc_x_se(p27_26, p24_20, p11_7, (uint64_t)rs1, 64, 3, vl)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif //_SIFIVE_VECTOR_H_
|
||||
|
24
lib/include/smmintrin.h
vendored
24
lib/include/smmintrin.h
vendored
@ -1188,6 +1188,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M,
|
||||
/// Compares each of the corresponding 64-bit values of the 128-bit
|
||||
/// integer vectors for equality.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPCMPEQQ / PCMPEQQ </c> instruction.
|
||||
@ -1431,8 +1433,10 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) {
|
||||
}
|
||||
|
||||
/* SSE4 Pack with Unsigned Saturation. */
|
||||
/// Converts 32-bit signed integers from both 128-bit integer vector
|
||||
/// operands into 16-bit unsigned integers, and returns the packed result.
|
||||
/// Converts, with saturation, 32-bit signed integers from both 128-bit integer
|
||||
/// vector operands into 16-bit unsigned integers, and returns the packed
|
||||
/// result.
|
||||
///
|
||||
/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than
|
||||
/// 0x0000 are saturated to 0x0000.
|
||||
///
|
||||
@ -1441,17 +1445,11 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cvtepu32_epi64(__m128i __V) {
|
||||
/// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction.
|
||||
///
|
||||
/// \param __V1
|
||||
/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a
|
||||
/// signed integer and is converted to a 16-bit unsigned integer with
|
||||
/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values
|
||||
/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values
|
||||
/// are written to the lower 64 bits of the result.
|
||||
/// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
|
||||
/// written to the lower 64 bits of the result.
|
||||
/// \param __V2
|
||||
/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a
|
||||
/// signed integer and is converted to a 16-bit unsigned integer with
|
||||
/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values
|
||||
/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values
|
||||
/// are written to the higher 64 bits of the result.
|
||||
/// A 128-bit vector of [4 x i32]. The converted [4 x i16] values are
|
||||
/// written to the higher 64 bits of the result.
|
||||
/// \returns A 128-bit vector of [8 x i16] containing the converted values.
|
||||
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_packus_epi32(__m128i __V1,
|
||||
__m128i __V2) {
|
||||
@ -2305,6 +2303,8 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) {
|
||||
/// integer vectors to determine if the values in the first operand are
|
||||
/// greater than those in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VPCMPGTQ / PCMPGTQ </c> instruction.
|
||||
|
5
lib/include/stdalign.h
vendored
5
lib/include/stdalign.h
vendored
@ -10,6 +10,10 @@
|
||||
#ifndef __STDALIGN_H
|
||||
#define __STDALIGN_H
|
||||
|
||||
#if defined(__MVS__) && __has_include_next(<stdalign.h>)
|
||||
#include_next <stdalign.h>
|
||||
#else
|
||||
|
||||
#if defined(__cplusplus) || \
|
||||
(defined(__STDC_VERSION__) && __STDC_VERSION__ < 202311L)
|
||||
#ifndef __cplusplus
|
||||
@ -21,4 +25,5 @@
|
||||
#define __alignof_is_defined 1
|
||||
#endif /* __STDC_VERSION__ */
|
||||
|
||||
#endif /* __MVS__ */
|
||||
#endif /* __STDALIGN_H */
|
||||
|
34
lib/include/stdarg.h
vendored
34
lib/include/stdarg.h
vendored
@ -14,29 +14,24 @@
|
||||
* need to use some of its interfaces. Otherwise this header provides all of
|
||||
* the expected interfaces.
|
||||
*
|
||||
* When clang modules are enabled, this header is a textual header. It ignores
|
||||
* its header guard so that multiple submodules can export its interfaces.
|
||||
* Take module SM with submodules A and B, whose headers both include stdarg.h
|
||||
* When SM.A builds, __STDARG_H will be defined. When SM.B builds, the
|
||||
* definition from SM.A will leak when building without local submodule
|
||||
* visibility. stdarg.h wouldn't include any of its implementation headers, and
|
||||
* SM.B wouldn't import any of the stdarg modules, and SM.B's `export *`
|
||||
* wouldn't export any stdarg interfaces as expected. However, since stdarg.h
|
||||
* ignores its header guard when building with modules, it all works as
|
||||
* expected.
|
||||
*
|
||||
* When clang modules are not enabled, the header guards can function in the
|
||||
* normal simple fashion.
|
||||
* When clang modules are enabled, this header is a textual header to support
|
||||
* the multiple include behavior. As such, it doesn't directly declare anything
|
||||
* so that it doesn't add duplicate declarations to all of its includers'
|
||||
* modules.
|
||||
*/
|
||||
#if !defined(__STDARG_H) || __has_feature(modules) || \
|
||||
defined(__need___va_list) || defined(__need_va_list) || \
|
||||
defined(__need_va_arg) || defined(__need___va_copy) || \
|
||||
defined(__need_va_copy)
|
||||
#if defined(__MVS__) && __has_include_next(<stdarg.h>)
|
||||
#undef __need___va_list
|
||||
#undef __need_va_list
|
||||
#undef __need_va_arg
|
||||
#undef __need___va_copy
|
||||
#undef __need_va_copy
|
||||
#include <__stdarg_header_macro.h>
|
||||
#include_next <stdarg.h>
|
||||
|
||||
#else
|
||||
#if !defined(__need___va_list) && !defined(__need_va_list) && \
|
||||
!defined(__need_va_arg) && !defined(__need___va_copy) && \
|
||||
!defined(__need_va_copy)
|
||||
#define __STDARG_H
|
||||
#define __need___va_list
|
||||
#define __need_va_list
|
||||
#define __need_va_arg
|
||||
@ -49,6 +44,7 @@
|
||||
!defined(__STRICT_ANSI__)
|
||||
#define __need_va_copy
|
||||
#endif
|
||||
#include <__stdarg_header_macro.h>
|
||||
#endif
|
||||
|
||||
#ifdef __need___va_list
|
||||
@ -76,4 +72,4 @@
|
||||
#undef __need_va_copy
|
||||
#endif /* defined(__need_va_copy) */
|
||||
|
||||
#endif
|
||||
#endif /* __MVS__ */
|
||||
|
12
lib/include/stdatomic.h
vendored
12
lib/include/stdatomic.h
vendored
@ -16,7 +16,7 @@
|
||||
* Exclude the MSVC path as well as the MSVC header as of the 14.31.30818
|
||||
* explicitly disallows `stdatomic.h` in the C mode via an `#error`. Fallback
|
||||
* to the clang resource header until that is fully supported. The
|
||||
* `stdatomic.h` header requires C++ 23 or newer.
|
||||
* `stdatomic.h` header requires C++23 or newer.
|
||||
*/
|
||||
#if __STDC_HOSTED__ && \
|
||||
__has_include_next(<stdatomic.h>) && \
|
||||
@ -35,6 +35,9 @@ extern "C" {
|
||||
|
||||
#define ATOMIC_BOOL_LOCK_FREE __CLANG_ATOMIC_BOOL_LOCK_FREE
|
||||
#define ATOMIC_CHAR_LOCK_FREE __CLANG_ATOMIC_CHAR_LOCK_FREE
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
#define ATOMIC_CHAR8_T_LOCK_FREE __CLANG_ATOMIC_CHAR8_T_LOCK_FREE
|
||||
#endif
|
||||
#define ATOMIC_CHAR16_T_LOCK_FREE __CLANG_ATOMIC_CHAR16_T_LOCK_FREE
|
||||
#define ATOMIC_CHAR32_T_LOCK_FREE __CLANG_ATOMIC_CHAR32_T_LOCK_FREE
|
||||
#define ATOMIC_WCHAR_T_LOCK_FREE __CLANG_ATOMIC_WCHAR_T_LOCK_FREE
|
||||
@ -104,6 +107,9 @@ typedef _Atomic(long) atomic_long;
|
||||
typedef _Atomic(unsigned long) atomic_ulong;
|
||||
typedef _Atomic(long long) atomic_llong;
|
||||
typedef _Atomic(unsigned long long) atomic_ullong;
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L
|
||||
typedef _Atomic(unsigned char) atomic_char8_t;
|
||||
#endif
|
||||
typedef _Atomic(uint_least16_t) atomic_char16_t;
|
||||
typedef _Atomic(uint_least32_t) atomic_char32_t;
|
||||
typedef _Atomic(wchar_t) atomic_wchar_t;
|
||||
@ -166,7 +172,11 @@ typedef _Atomic(uintmax_t) atomic_uintmax_t;
|
||||
|
||||
typedef struct atomic_flag { atomic_bool _Value; } atomic_flag;
|
||||
|
||||
#ifdef __cplusplus
|
||||
#define ATOMIC_FLAG_INIT {false}
|
||||
#else
|
||||
#define ATOMIC_FLAG_INIT { 0 }
|
||||
#endif
|
||||
|
||||
/* These should be provided by the libc implementation. */
|
||||
#ifdef __cplusplus
|
||||
|
5
lib/include/stdbool.h
vendored
5
lib/include/stdbool.h
vendored
@ -12,6 +12,10 @@
|
||||
|
||||
#define __bool_true_false_are_defined 1
|
||||
|
||||
#if defined(__MVS__) && __has_include_next(<stdbool.h>)
|
||||
#include_next <stdbool.h>
|
||||
#else
|
||||
|
||||
#if defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L
|
||||
/* FIXME: We should be issuing a deprecation warning here, but cannot yet due
|
||||
* to system headers which include this header file unconditionally.
|
||||
@ -31,4 +35,5 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif /* __MVS__ */
|
||||
#endif /* __STDBOOL_H */
|
||||
|
60
lib/include/stddef.h
vendored
60
lib/include/stddef.h
vendored
@ -14,34 +14,32 @@
|
||||
* need to use some of its interfaces. Otherwise this header provides all of
|
||||
* the expected interfaces.
|
||||
*
|
||||
* When clang modules are enabled, this header is a textual header. It ignores
|
||||
* its header guard so that multiple submodules can export its interfaces.
|
||||
* Take module SM with submodules A and B, whose headers both include stddef.h
|
||||
* When SM.A builds, __STDDEF_H will be defined. When SM.B builds, the
|
||||
* definition from SM.A will leak when building without local submodule
|
||||
* visibility. stddef.h wouldn't include any of its implementation headers, and
|
||||
* SM.B wouldn't import any of the stddef modules, and SM.B's `export *`
|
||||
* wouldn't export any stddef interfaces as expected. However, since stddef.h
|
||||
* ignores its header guard when building with modules, it all works as
|
||||
* expected.
|
||||
*
|
||||
* When clang modules are not enabled, the header guards can function in the
|
||||
* normal simple fashion.
|
||||
* When clang modules are enabled, this header is a textual header to support
|
||||
* the multiple include behavior. As such, it doesn't directly declare anything
|
||||
* so that it doesn't add duplicate declarations to all of its includers'
|
||||
* modules.
|
||||
*/
|
||||
#if !defined(__STDDEF_H) || __has_feature(modules) || \
|
||||
(defined(__STDC_WANT_LIB_EXT1__) && __STDC_WANT_LIB_EXT1__ >= 1) || \
|
||||
defined(__need_ptrdiff_t) || defined(__need_size_t) || \
|
||||
defined(__need_rsize_t) || defined(__need_wchar_t) || \
|
||||
defined(__need_NULL) || defined(__need_nullptr_t) || \
|
||||
defined(__need_unreachable) || defined(__need_max_align_t) || \
|
||||
defined(__need_offsetof) || defined(__need_wint_t)
|
||||
#if defined(__MVS__) && __has_include_next(<stddef.h>)
|
||||
#undef __need_ptrdiff_t
|
||||
#undef __need_size_t
|
||||
#undef __need_rsize_t
|
||||
#undef __need_wchar_t
|
||||
#undef __need_NULL
|
||||
#undef __need_nullptr_t
|
||||
#undef __need_unreachable
|
||||
#undef __need_max_align_t
|
||||
#undef __need_offsetof
|
||||
#undef __need_wint_t
|
||||
#include <__stddef_header_macro.h>
|
||||
#include_next <stddef.h>
|
||||
|
||||
#else
|
||||
|
||||
#if !defined(__need_ptrdiff_t) && !defined(__need_size_t) && \
|
||||
!defined(__need_rsize_t) && !defined(__need_wchar_t) && \
|
||||
!defined(__need_NULL) && !defined(__need_nullptr_t) && \
|
||||
!defined(__need_unreachable) && !defined(__need_max_align_t) && \
|
||||
!defined(__need_offsetof) && !defined(__need_wint_t)
|
||||
#define __STDDEF_H
|
||||
#define __need_ptrdiff_t
|
||||
#define __need_size_t
|
||||
/* ISO9899:2011 7.20 (C11 Annex K): Define rsize_t if __STDC_WANT_LIB_EXT1__ is
|
||||
@ -50,7 +48,24 @@
|
||||
#define __need_rsize_t
|
||||
#endif
|
||||
#define __need_wchar_t
|
||||
#if !defined(__STDDEF_H) || __has_feature(modules)
|
||||
/*
|
||||
* __stddef_null.h is special when building without modules: if __need_NULL is
|
||||
* set, then it will unconditionally redefine NULL. To avoid stepping on client
|
||||
* definitions of NULL, __need_NULL should only be set the first time this
|
||||
* header is included, that is when __STDDEF_H is not defined. However, when
|
||||
* building with modules, this header is a textual header and needs to
|
||||
* unconditionally include __stdef_null.h to support multiple submodules
|
||||
* exporting _Builtin_stddef.null. Take module SM with submodules A and B, whose
|
||||
* headers both include stddef.h When SM.A builds, __STDDEF_H will be defined.
|
||||
* When SM.B builds, the definition from SM.A will leak when building without
|
||||
* local submodule visibility. stddef.h wouldn't include __stddef_null.h, and
|
||||
* SM.B wouldn't import _Builtin_stddef.null, and SM.B's `export *` wouldn't
|
||||
* export NULL as expected. When building with modules, always include
|
||||
* __stddef_null.h so that everything works as expected.
|
||||
*/
|
||||
#define __need_NULL
|
||||
#endif
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L) || \
|
||||
defined(__cplusplus)
|
||||
#define __need_nullptr_t
|
||||
@ -66,6 +81,7 @@
|
||||
/* wint_t is provided by <wchar.h> and not <stddef.h>. It's here
|
||||
* for compatibility, but must be explicitly requested. Therefore
|
||||
* __need_wint_t is intentionally not defined here. */
|
||||
#include <__stddef_header_macro.h>
|
||||
#endif
|
||||
|
||||
#if defined(__need_ptrdiff_t)
|
||||
@ -120,4 +136,4 @@ __WINT_TYPE__ directly; accommodate both by requiring __need_wint_t */
|
||||
#undef __need_wint_t
|
||||
#endif /* __need_wint_t */
|
||||
|
||||
#endif
|
||||
#endif /* __MVS__ */
|
||||
|
5
lib/include/stdint.h
vendored
5
lib/include/stdint.h
vendored
@ -14,6 +14,10 @@
|
||||
#define __CLANG_STDINT_H
|
||||
#endif
|
||||
|
||||
#if defined(__MVS__) && __has_include_next(<stdint.h>)
|
||||
#include_next <stdint.h>
|
||||
#else
|
||||
|
||||
/* If we're hosted, fall back to the system's stdint.h, which might have
|
||||
* additional definitions.
|
||||
*/
|
||||
@ -947,4 +951,5 @@ typedef __UINTMAX_TYPE__ uintmax_t;
|
||||
#endif
|
||||
|
||||
#endif /* __STDC_HOSTED__ */
|
||||
#endif /* __MVS__ */
|
||||
#endif /* __CLANG_STDINT_H */
|
||||
|
6
lib/include/stdnoreturn.h
vendored
6
lib/include/stdnoreturn.h
vendored
@ -10,9 +10,15 @@
|
||||
#ifndef __STDNORETURN_H
|
||||
#define __STDNORETURN_H
|
||||
|
||||
#if defined(__MVS__) && __has_include_next(<stdnoreturn.h>)
|
||||
#include_next <stdnoreturn.h>
|
||||
#else
|
||||
|
||||
#define noreturn _Noreturn
|
||||
#define __noreturn_is_defined 1
|
||||
|
||||
#endif /* __MVS__ */
|
||||
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) && \
|
||||
!defined(_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS)
|
||||
/* The noreturn macro is deprecated in C23. We do not mark it as such because
|
||||
|
36
lib/include/tmmintrin.h
vendored
36
lib/include/tmmintrin.h
vendored
@ -271,10 +271,11 @@ _mm_hadd_pi32(__m64 __a, __m64 __b)
|
||||
return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
|
||||
}
|
||||
|
||||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
|
||||
/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
|
||||
/// 0x8000.
|
||||
/// Horizontally adds, with saturation, the adjacent pairs of values contained
|
||||
/// in two packed 128-bit vectors of [8 x i16].
|
||||
///
|
||||
/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
|
||||
/// less than 0x8000 are saturated to 0x8000.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -296,10 +297,11 @@ _mm_hadds_epi16(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
/// Horizontally adds the adjacent pairs of values contained in 2 packed
|
||||
/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
|
||||
/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
|
||||
/// 0x8000.
|
||||
/// Horizontally adds, with saturation, the adjacent pairs of values contained
|
||||
/// in two packed 64-bit vectors of [4 x i16].
|
||||
///
|
||||
/// Positive sums greater than 0x7FFF are saturated to 0x7FFF. Negative sums
|
||||
/// less than 0x8000 are saturated to 0x8000.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -413,10 +415,11 @@ _mm_hsub_pi32(__m64 __a, __m64 __b)
|
||||
return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
|
||||
}
|
||||
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
|
||||
/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
|
||||
/// saturated to 0x8000.
|
||||
/// Horizontally subtracts, with saturation, the adjacent pairs of values
|
||||
/// contained in two packed 128-bit vectors of [8 x i16].
|
||||
///
|
||||
/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
|
||||
/// Negative differences less than 0x8000 are saturated to 0x8000.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -438,10 +441,11 @@ _mm_hsubs_epi16(__m128i __a, __m128i __b)
|
||||
return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
|
||||
}
|
||||
|
||||
/// Horizontally subtracts the adjacent pairs of values contained in 2
|
||||
/// packed 64-bit vectors of [4 x i16]. Positive differences greater than
|
||||
/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
|
||||
/// saturated to 0x8000.
|
||||
/// Horizontally subtracts, with saturation, the adjacent pairs of values
|
||||
/// contained in two packed 64-bit vectors of [4 x i16].
|
||||
///
|
||||
/// Positive differences greater than 0x7FFF are saturated to 0x7FFF.
|
||||
/// Negative differences less than 0x8000 are saturated to 0x8000.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
|
6
lib/include/varargs.h
vendored
6
lib/include/varargs.h
vendored
@ -8,5 +8,9 @@
|
||||
*/
|
||||
#ifndef __VARARGS_H
|
||||
#define __VARARGS_H
|
||||
#error "Please use <stdarg.h> instead of <varargs.h>"
|
||||
#if defined(__MVS__) && __has_include_next(<varargs.h>)
|
||||
#include_next <varargs.h>
|
||||
#else
|
||||
#error "Please use <stdarg.h> instead of <varargs.h>"
|
||||
#endif /* __MVS__ */
|
||||
#endif
|
||||
|
21
lib/include/x86gprintrin.h
vendored
21
lib/include/x86gprintrin.h
vendored
@ -10,38 +10,31 @@
|
||||
#ifndef __X86GPRINTRIN_H
|
||||
#define __X86GPRINTRIN_H
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__HRESET__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__HRESET__)
|
||||
#include <hresetintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__UINTR__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__UINTR__)
|
||||
#include <uintrintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__USERMSR__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__USERMSR__)
|
||||
#include <usermsrintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__CRC32__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__CRC32__)
|
||||
#include <crc32intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__PRFCHI__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHI__)
|
||||
#include <prfchiintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__RAOINT__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__RAOINT__)
|
||||
#include <raointintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__CMPCCXADD__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__CMPCCXADD__)
|
||||
#include <cmpccxaddintrin.h>
|
||||
#endif
|
||||
|
||||
|
32
lib/include/x86intrin.h
vendored
32
lib/include/x86intrin.h
vendored
@ -14,53 +14,39 @@
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__3dNOW__)
|
||||
#include <mm3dnow.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__PRFCHW__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHW__)
|
||||
#include <prfchwintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__SSE4A__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__SSE4A__)
|
||||
#include <ammintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__FMA4__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__FMA4__)
|
||||
#include <fma4intrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__XOP__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__XOP__)
|
||||
#include <xopintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__TBM__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__TBM__)
|
||||
#include <tbmintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__LWP__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__LWP__)
|
||||
#include <lwpintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__MWAITX__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__MWAITX__)
|
||||
#include <mwaitxintrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__CLZERO__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__CLZERO__)
|
||||
#include <clzerointrin.h>
|
||||
#endif
|
||||
|
||||
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
|
||||
defined(__RDPRU__)
|
||||
#if !defined(__SCE__) || __has_feature(modules) || defined(__RDPRU__)
|
||||
#include <rdpruintrin.h>
|
||||
#endif
|
||||
|
||||
|
384
lib/include/xmmintrin.h
vendored
384
lib/include/xmmintrin.h
vendored
@ -316,6 +316,8 @@ _mm_rsqrt_ps(__m128 __a)
|
||||
/// operands and returns the lesser value in the low-order bits of the
|
||||
/// vector of [4 x float].
|
||||
///
|
||||
/// If either value in a comparison is NaN, returns the value from \a __b.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMINSS / MINSS </c> instructions.
|
||||
@ -338,6 +340,8 @@ _mm_min_ss(__m128 __a, __m128 __b)
|
||||
/// Compares two 128-bit vectors of [4 x float] and returns the lesser
|
||||
/// of each pair of values.
|
||||
///
|
||||
/// If either value in a comparison is NaN, returns the value from \a __b.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMINPS / MINPS </c> instructions.
|
||||
@ -358,6 +362,8 @@ _mm_min_ps(__m128 __a, __m128 __b)
|
||||
/// operands and returns the greater value in the low-order bits of a 128-bit
|
||||
/// vector of [4 x float].
|
||||
///
|
||||
/// If either value in a comparison is NaN, returns the value from \a __b.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMAXSS / MAXSS </c> instructions.
|
||||
@ -380,6 +386,8 @@ _mm_max_ss(__m128 __a, __m128 __b)
|
||||
/// Compares two 128-bit vectors of [4 x float] and returns the greater
|
||||
/// of each pair of values.
|
||||
///
|
||||
/// If either value in a comparison is NaN, returns the value from \a __b.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VMAXPS / MAXPS </c> instructions.
|
||||
@ -474,8 +482,11 @@ _mm_xor_ps(__m128 __a, __m128 __b)
|
||||
}
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands for equality and returns the result of the comparison in the
|
||||
/// operands for equality.
|
||||
///
|
||||
/// The comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
|
||||
/// low-order bits of a vector [4 x float].
|
||||
/// If either value in a comparison is NaN, returns false.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -498,6 +509,9 @@ _mm_cmpeq_ss(__m128 __a, __m128 __b)
|
||||
/// Compares each of the corresponding 32-bit float values of the
|
||||
/// 128-bit vectors of [4 x float] for equality.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, returns false.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPEQPS / CMPEQPS </c> instructions.
|
||||
@ -515,8 +529,11 @@ _mm_cmpeq_ps(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the value in the first operand is less than the
|
||||
/// corresponding value in the second operand and returns the result of the
|
||||
/// comparison in the low-order bits of a vector of [4 x float].
|
||||
/// corresponding value in the second operand.
|
||||
///
|
||||
/// The comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
|
||||
/// low-order bits of a vector of [4 x float].
|
||||
/// If either value in a comparison is NaN, returns false.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -540,6 +557,9 @@ _mm_cmplt_ss(__m128 __a, __m128 __b)
|
||||
/// 128-bit vectors of [4 x float] to determine if the values in the first
|
||||
/// operand are less than those in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, returns false.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.
|
||||
@ -557,9 +577,11 @@ _mm_cmplt_ps(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the value in the first operand is less than or
|
||||
/// equal to the corresponding value in the second operand and returns the
|
||||
/// result of the comparison in the low-order bits of a vector of
|
||||
/// [4 x float].
|
||||
/// equal to the corresponding value in the second operand.
|
||||
///
|
||||
/// The comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true, in
|
||||
/// the low-order bits of a vector of [4 x float].
|
||||
/// If either value in a comparison is NaN, returns false.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -583,6 +605,9 @@ _mm_cmple_ss(__m128 __a, __m128 __b)
|
||||
/// 128-bit vectors of [4 x float] to determine if the values in the first
|
||||
/// operand are less than or equal to those in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, returns false.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.
|
||||
@ -600,8 +625,11 @@ _mm_cmple_ps(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the value in the first operand is greater than
|
||||
/// the corresponding value in the second operand and returns the result of
|
||||
/// the comparison in the low-order bits of a vector of [4 x float].
|
||||
/// the corresponding value in the second operand.
|
||||
///
|
||||
/// The comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
|
||||
/// low-order bits of a vector of [4 x float].
|
||||
/// If either value in a comparison is NaN, returns false.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -627,6 +655,9 @@ _mm_cmpgt_ss(__m128 __a, __m128 __b)
|
||||
/// 128-bit vectors of [4 x float] to determine if the values in the first
|
||||
/// operand are greater than those in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, returns false.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPLTPS / CMPLTPS </c> instructions.
|
||||
@ -644,9 +675,11 @@ _mm_cmpgt_ps(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the value in the first operand is greater than
|
||||
/// or equal to the corresponding value in the second operand and returns
|
||||
/// the result of the comparison in the low-order bits of a vector of
|
||||
/// [4 x float].
|
||||
/// or equal to the corresponding value in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
|
||||
/// low-order bits of a vector of [4 x float].
|
||||
/// If either value in a comparison is NaN, returns false.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -672,6 +705,9 @@ _mm_cmpge_ss(__m128 __a, __m128 __b)
|
||||
/// 128-bit vectors of [4 x float] to determine if the values in the first
|
||||
/// operand are greater than or equal to those in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFFFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, returns false.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPLEPS / CMPLEPS </c> instructions.
|
||||
@ -687,9 +723,12 @@ _mm_cmpge_ps(__m128 __a, __m128 __b)
|
||||
return (__m128)__builtin_ia32_cmpleps((__v4sf)__b, (__v4sf)__a);
|
||||
}
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands for inequality and returns the result of the comparison in the
|
||||
/// Compares two 32-bit float values in the low-order bits of both operands
|
||||
/// for inequality.
|
||||
///
|
||||
/// The comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
|
||||
/// low-order bits of a vector of [4 x float].
|
||||
/// If either value in a comparison is NaN, returns true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -713,6 +752,9 @@ _mm_cmpneq_ss(__m128 __a, __m128 __b)
|
||||
/// Compares each of the corresponding 32-bit float values of the
|
||||
/// 128-bit vectors of [4 x float] for inequality.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, returns true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPNEQPS / CMPNEQPS </c>
|
||||
@ -731,8 +773,11 @@ _mm_cmpneq_ps(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the value in the first operand is not less than
|
||||
/// the corresponding value in the second operand and returns the result of
|
||||
/// the comparison in the low-order bits of a vector of [4 x float].
|
||||
/// the corresponding value in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
|
||||
/// low-order bits of a vector of [4 x float].
|
||||
/// If either value in a comparison is NaN, returns true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -757,6 +802,9 @@ _mm_cmpnlt_ss(__m128 __a, __m128 __b)
|
||||
/// 128-bit vectors of [4 x float] to determine if the values in the first
|
||||
/// operand are not less than those in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, returns true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>
|
||||
@ -775,9 +823,11 @@ _mm_cmpnlt_ps(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the value in the first operand is not less than
|
||||
/// or equal to the corresponding value in the second operand and returns
|
||||
/// the result of the comparison in the low-order bits of a vector of
|
||||
/// [4 x float].
|
||||
/// or equal to the corresponding value in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
|
||||
/// low-order bits of a vector of [4 x float].
|
||||
/// If either value in a comparison is NaN, returns true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -802,6 +852,9 @@ _mm_cmpnle_ss(__m128 __a, __m128 __b)
|
||||
/// 128-bit vectors of [4 x float] to determine if the values in the first
|
||||
/// operand are not less than or equal to those in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, returns true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>
|
||||
@ -820,9 +873,11 @@ _mm_cmpnle_ps(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the value in the first operand is not greater
|
||||
/// than the corresponding value in the second operand and returns the
|
||||
/// result of the comparison in the low-order bits of a vector of
|
||||
/// [4 x float].
|
||||
/// than the corresponding value in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
|
||||
/// low-order bits of a vector of [4 x float].
|
||||
/// If either value in a comparison is NaN, returns true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -849,6 +904,9 @@ _mm_cmpngt_ss(__m128 __a, __m128 __b)
|
||||
/// 128-bit vectors of [4 x float] to determine if the values in the first
|
||||
/// operand are not greater than those in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, returns true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPNLTPS / CMPNLTPS </c>
|
||||
@ -867,9 +925,11 @@ _mm_cmpngt_ps(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the value in the first operand is not greater
|
||||
/// than or equal to the corresponding value in the second operand and
|
||||
/// returns the result of the comparison in the low-order bits of a vector
|
||||
/// of [4 x float].
|
||||
/// than or equal to the corresponding value in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true, in the
|
||||
/// low-order bits of a vector of [4 x float].
|
||||
/// If either value in a comparison is NaN, returns true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -896,6 +956,9 @@ _mm_cmpnge_ss(__m128 __a, __m128 __b)
|
||||
/// 128-bit vectors of [4 x float] to determine if the values in the first
|
||||
/// operand are not greater than or equal to those in the second operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, returns true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPNLEPS / CMPNLEPS </c>
|
||||
@ -914,9 +977,11 @@ _mm_cmpnge_ps(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the value in the first operand is ordered with
|
||||
/// respect to the corresponding value in the second operand and returns the
|
||||
/// result of the comparison in the low-order bits of a vector of
|
||||
/// [4 x float].
|
||||
/// respect to the corresponding value in the second operand.
|
||||
///
|
||||
/// A pair of floating-point values are ordered with respect to each
|
||||
/// other if neither value is a NaN. Each comparison returns 0x0 for false,
|
||||
/// 0xFFFFFFFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -941,6 +1006,10 @@ _mm_cmpord_ss(__m128 __a, __m128 __b)
|
||||
/// 128-bit vectors of [4 x float] to determine if the values in the first
|
||||
/// operand are ordered with respect to those in the second operand.
|
||||
///
|
||||
/// A pair of floating-point values are ordered with respect to each
|
||||
/// other if neither value is a NaN. Each comparison returns 0x0 for false,
|
||||
/// 0xFFFFFFFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPORDPS / CMPORDPS </c>
|
||||
@ -959,9 +1028,11 @@ _mm_cmpord_ps(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the value in the first operand is unordered
|
||||
/// with respect to the corresponding value in the second operand and
|
||||
/// returns the result of the comparison in the low-order bits of a vector
|
||||
/// of [4 x float].
|
||||
/// with respect to the corresponding value in the second operand.
|
||||
///
|
||||
/// A pair of double-precision values are unordered with respect to each
|
||||
/// other if one or both values are NaN. Each comparison returns 0x0 for
|
||||
/// false, 0xFFFFFFFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -986,6 +1057,10 @@ _mm_cmpunord_ss(__m128 __a, __m128 __b)
|
||||
/// 128-bit vectors of [4 x float] to determine if the values in the first
|
||||
/// operand are unordered with respect to those in the second operand.
|
||||
///
|
||||
/// A pair of double-precision values are unordered with respect to each
|
||||
/// other if one or both values are NaN. Each comparison returns 0x0 for
|
||||
/// false, 0xFFFFFFFFFFFFFFFF for true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCMPUNORDPS / CMPUNORDPS </c>
|
||||
@ -1003,9 +1078,10 @@ _mm_cmpunord_ps(__m128 __a, __m128 __b)
|
||||
}
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands for equality and returns the result of the comparison.
|
||||
/// operands for equality.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 0 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1018,8 +1094,7 @@ _mm_cmpunord_ps(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the
|
||||
/// two lower 32-bit values is NaN, 0 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comieq_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1028,9 +1103,10 @@ _mm_comieq_ss(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the first operand is less than the second
|
||||
/// operand and returns the result of the comparison.
|
||||
/// operand.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 0 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1043,8 +1119,7 @@ _mm_comieq_ss(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower 32-bit values is NaN, 0 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comilt_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1053,9 +1128,10 @@ _mm_comilt_ss(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the first operand is less than or equal to the
|
||||
/// second operand and returns the result of the comparison.
|
||||
/// second operand.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 0 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1067,8 +1143,7 @@ _mm_comilt_ss(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower 32-bit values is NaN, 0 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comile_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1077,9 +1152,10 @@ _mm_comile_ss(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the first operand is greater than the second
|
||||
/// operand and returns the result of the comparison.
|
||||
/// operand.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 0 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1091,8 +1167,7 @@ _mm_comile_ss(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the
|
||||
/// two lower 32-bit values is NaN, 0 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comigt_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1101,9 +1176,10 @@ _mm_comigt_ss(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the first operand is greater than or equal to
|
||||
/// the second operand and returns the result of the comparison.
|
||||
/// the second operand.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 0 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1115,8 +1191,7 @@ _mm_comigt_ss(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower 32-bit values is NaN, 0 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comige_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1125,9 +1200,10 @@ _mm_comige_ss(__m128 __a, __m128 __b)
|
||||
|
||||
/// Compares two 32-bit float values in the low-order bits of both
|
||||
/// operands to determine if the first operand is not equal to the second
|
||||
/// operand and returns the result of the comparison.
|
||||
/// operand.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 1 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 1.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1139,8 +1215,7 @@ _mm_comige_ss(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the
|
||||
/// two lower 32-bit values is NaN, 1 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_comineq_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1148,10 +1223,10 @@ _mm_comineq_ss(__m128 __a, __m128 __b)
|
||||
}
|
||||
|
||||
/// Performs an unordered comparison of two 32-bit float values using
|
||||
/// the low-order bits of both operands to determine equality and returns
|
||||
/// the result of the comparison.
|
||||
/// the low-order bits of both operands to determine equality.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 0 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1163,8 +1238,7 @@ _mm_comineq_ss(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower 32-bit values is NaN, 0 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_ucomieq_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1173,9 +1247,10 @@ _mm_ucomieq_ss(__m128 __a, __m128 __b)
|
||||
|
||||
/// Performs an unordered comparison of two 32-bit float values using
|
||||
/// the low-order bits of both operands to determine if the first operand is
|
||||
/// less than the second operand and returns the result of the comparison.
|
||||
/// less than the second operand.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 0 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1187,8 +1262,7 @@ _mm_ucomieq_ss(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower 32-bit values is NaN, 0 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_ucomilt_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1197,10 +1271,10 @@ _mm_ucomilt_ss(__m128 __a, __m128 __b)
|
||||
|
||||
/// Performs an unordered comparison of two 32-bit float values using
|
||||
/// the low-order bits of both operands to determine if the first operand is
|
||||
/// less than or equal to the second operand and returns the result of the
|
||||
/// comparison.
|
||||
/// less than or equal to the second operand.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 0 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1212,8 +1286,7 @@ _mm_ucomilt_ss(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower 32-bit values is NaN, 0 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_ucomile_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1222,10 +1295,10 @@ _mm_ucomile_ss(__m128 __a, __m128 __b)
|
||||
|
||||
/// Performs an unordered comparison of two 32-bit float values using
|
||||
/// the low-order bits of both operands to determine if the first operand is
|
||||
/// greater than the second operand and returns the result of the
|
||||
/// comparison.
|
||||
/// greater than the second operand.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 0 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1237,8 +1310,7 @@ _mm_ucomile_ss(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower 32-bit values is NaN, 0 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_ucomigt_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1247,10 +1319,10 @@ _mm_ucomigt_ss(__m128 __a, __m128 __b)
|
||||
|
||||
/// Performs an unordered comparison of two 32-bit float values using
|
||||
/// the low-order bits of both operands to determine if the first operand is
|
||||
/// greater than or equal to the second operand and returns the result of
|
||||
/// the comparison.
|
||||
/// greater than or equal to the second operand.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 0 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1262,8 +1334,7 @@ _mm_ucomigt_ss(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower 32-bit values is NaN, 0 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_ucomige_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1271,10 +1342,10 @@ _mm_ucomige_ss(__m128 __a, __m128 __b)
|
||||
}
|
||||
|
||||
/// Performs an unordered comparison of two 32-bit float values using
|
||||
/// the low-order bits of both operands to determine inequality and returns
|
||||
/// the result of the comparison.
|
||||
/// the low-order bits of both operands to determine inequality.
|
||||
///
|
||||
/// If either of the two lower 32-bit values is NaN, 1 is returned.
|
||||
/// The comparison returns 0 for false, 1 for true. If either value in a
|
||||
/// comparison is NaN, returns 0.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1286,8 +1357,7 @@ _mm_ucomige_ss(__m128 __a, __m128 __b)
|
||||
/// \param __b
|
||||
/// A 128-bit vector of [4 x float]. The lower 32 bits of this operand are
|
||||
/// used in the comparison.
|
||||
/// \returns An integer containing the comparison results. If either of the two
|
||||
/// lower 32-bit values is NaN, 1 is returned.
|
||||
/// \returns An integer containing the comparison results.
|
||||
static __inline__ int __DEFAULT_FN_ATTRS
|
||||
_mm_ucomineq_ss(__m128 __a, __m128 __b)
|
||||
{
|
||||
@ -1297,6 +1367,10 @@ _mm_ucomineq_ss(__m128 __a, __m128 __b)
|
||||
/// Converts a float value contained in the lower 32 bits of a vector of
|
||||
/// [4 x float] into a 32-bit integer.
|
||||
///
|
||||
/// If the converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>
|
||||
@ -1315,6 +1389,10 @@ _mm_cvtss_si32(__m128 __a)
|
||||
/// Converts a float value contained in the lower 32 bits of a vector of
|
||||
/// [4 x float] into a 32-bit integer.
|
||||
///
|
||||
/// If the converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>
|
||||
@ -1335,6 +1413,10 @@ _mm_cvt_ss2si(__m128 __a)
|
||||
/// Converts a float value contained in the lower 32 bits of a vector of
|
||||
/// [4 x float] into a 64-bit integer.
|
||||
///
|
||||
/// If the converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> VCVTSS2SI / CVTSS2SI </c>
|
||||
@ -1355,6 +1437,10 @@ _mm_cvtss_si64(__m128 __a)
|
||||
/// Converts two low-order float values in a 128-bit vector of
|
||||
/// [4 x float] into a 64-bit vector of [2 x i32].
|
||||
///
|
||||
/// If a converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.
|
||||
@ -1371,6 +1457,10 @@ _mm_cvtps_pi32(__m128 __a)
|
||||
/// Converts two low-order float values in a 128-bit vector of
|
||||
/// [4 x float] into a 64-bit vector of [2 x i32].
|
||||
///
|
||||
/// If a converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> CVTPS2PI </c> instruction.
|
||||
@ -1384,9 +1474,12 @@ _mm_cvt_ps2pi(__m128 __a)
|
||||
return _mm_cvtps_pi32(__a);
|
||||
}
|
||||
|
||||
/// Converts a float value contained in the lower 32 bits of a vector of
|
||||
/// [4 x float] into a 32-bit integer, truncating the result when it is
|
||||
/// inexact.
|
||||
/// Converts the lower (first) element of a vector of [4 x float] into a signed
|
||||
/// truncated (rounded toward zero) 32-bit integer.
|
||||
///
|
||||
/// If the converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1403,9 +1496,12 @@ _mm_cvttss_si32(__m128 __a)
|
||||
return __builtin_ia32_cvttss2si((__v4sf)__a);
|
||||
}
|
||||
|
||||
/// Converts a float value contained in the lower 32 bits of a vector of
|
||||
/// [4 x float] into a 32-bit integer, truncating the result when it is
|
||||
/// inexact.
|
||||
/// Converts the lower (first) element of a vector of [4 x float] into a signed
|
||||
/// truncated (rounded toward zero) 32-bit integer.
|
||||
///
|
||||
/// If the converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1423,9 +1519,12 @@ _mm_cvtt_ss2si(__m128 __a)
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
/// Converts a float value contained in the lower 32 bits of a vector of
|
||||
/// [4 x float] into a 64-bit integer, truncating the result when it is
|
||||
/// inexact.
|
||||
/// Converts the lower (first) element of a vector of [4 x float] into a signed
|
||||
/// truncated (rounded toward zero) 64-bit integer.
|
||||
///
|
||||
/// If the converted value does not fit in a 64-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1443,9 +1542,13 @@ _mm_cvttss_si64(__m128 __a)
|
||||
}
|
||||
#endif
|
||||
|
||||
/// Converts two low-order float values in a 128-bit vector of
|
||||
/// [4 x float] into a 64-bit vector of [2 x i32], truncating the result
|
||||
/// when it is inexact.
|
||||
/// Converts the lower (first) two elements of a 128-bit vector of [4 x float]
|
||||
/// into two signed truncated (rounded toward zero) 32-bit integers,
|
||||
/// returned in a 64-bit vector of [2 x i32].
|
||||
///
|
||||
/// If a converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1461,9 +1564,13 @@ _mm_cvttps_pi32(__m128 __a)
|
||||
return (__m64)__builtin_ia32_cvttps2pi((__v4sf)__a);
|
||||
}
|
||||
|
||||
/// Converts two low-order float values in a 128-bit vector of [4 x
|
||||
/// float] into a 64-bit vector of [2 x i32], truncating the result when it
|
||||
/// is inexact.
|
||||
/// Converts the lower (first) two elements of a 128-bit vector of [4 x float]
|
||||
/// into two signed truncated (rounded toward zero) 64-bit integers,
|
||||
/// returned in a 64-bit vector of [2 x i32].
|
||||
///
|
||||
/// If a converted value does not fit in a 32-bit integer, raises a
|
||||
/// floating-point invalid exception. If the exception is masked, returns
|
||||
/// the most negative integer.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
@ -1803,7 +1910,7 @@ _mm_undefined_ps(void)
|
||||
static __inline__ __m128 __DEFAULT_FN_ATTRS
|
||||
_mm_set_ss(float __w)
|
||||
{
|
||||
return __extension__ (__m128){ __w, 0, 0, 0 };
|
||||
return __extension__ (__m128){ __w, 0.0f, 0.0f, 0.0f };
|
||||
}
|
||||
|
||||
/// Constructs a 128-bit floating-point vector of [4 x float], with each
|
||||
@ -2940,6 +3047,85 @@ _mm_movemask_ps(__m128 __a)
|
||||
return __builtin_ia32_movmskps((__v4sf)__a);
|
||||
}
|
||||
|
||||
/* Compare */
|
||||
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
|
||||
#define _CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */
|
||||
#define _CMP_LE_OS 0x02 /* Less-than-or-equal (ordered, signaling) */
|
||||
#define _CMP_UNORD_Q 0x03 /* Unordered (non-signaling) */
|
||||
#define _CMP_NEQ_UQ 0x04 /* Not-equal (unordered, non-signaling) */
|
||||
#define _CMP_NLT_US 0x05 /* Not-less-than (unordered, signaling) */
|
||||
#define _CMP_NLE_US 0x06 /* Not-less-than-or-equal (unordered, signaling) */
|
||||
#define _CMP_ORD_Q 0x07 /* Ordered (non-signaling) */
|
||||
|
||||
/// Compares each of the corresponding values of two 128-bit vectors of
|
||||
/// [4 x float], using the operation specified by the immediate integer
|
||||
/// operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, comparisons that are ordered
|
||||
/// return false, and comparisons that are unordered return true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m128 _mm_cmp_ps(__m128 a, __m128 b, const int c);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> (V)CMPPS </c> instruction.
|
||||
///
|
||||
/// \param a
|
||||
/// A 128-bit vector of [4 x float].
|
||||
/// \param b
|
||||
/// A 128-bit vector of [4 x float].
|
||||
/// \param c
|
||||
/// An immediate integer operand, with bits [4:0] specifying which comparison
|
||||
/// operation to use: \n
|
||||
/// 0x00: Equal (ordered, non-signaling) \n
|
||||
/// 0x01: Less-than (ordered, signaling) \n
|
||||
/// 0x02: Less-than-or-equal (ordered, signaling) \n
|
||||
/// 0x03: Unordered (non-signaling) \n
|
||||
/// 0x04: Not-equal (unordered, non-signaling) \n
|
||||
/// 0x05: Not-less-than (unordered, signaling) \n
|
||||
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
|
||||
/// 0x07: Ordered (non-signaling) \n
|
||||
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
|
||||
#define _mm_cmp_ps(a, b, c) \
|
||||
((__m128)__builtin_ia32_cmpps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c)))
|
||||
|
||||
/// Compares each of the corresponding scalar values of two 128-bit
|
||||
/// vectors of [4 x float], using the operation specified by the immediate
|
||||
/// integer operand.
|
||||
///
|
||||
/// Each comparison returns 0x0 for false, 0xFFFFFFFF for true.
|
||||
/// If either value in a comparison is NaN, comparisons that are ordered
|
||||
/// return false, and comparisons that are unordered return true.
|
||||
///
|
||||
/// \headerfile <x86intrin.h>
|
||||
///
|
||||
/// \code
|
||||
/// __m128 _mm_cmp_ss(__m128 a, __m128 b, const int c);
|
||||
/// \endcode
|
||||
///
|
||||
/// This intrinsic corresponds to the <c> (V)CMPSS </c> instruction.
|
||||
///
|
||||
/// \param a
|
||||
/// A 128-bit vector of [4 x float].
|
||||
/// \param b
|
||||
/// A 128-bit vector of [4 x float].
|
||||
/// \param c
|
||||
/// An immediate integer operand, with bits [4:0] specifying which comparison
|
||||
/// operation to use: \n
|
||||
/// 0x00: Equal (ordered, non-signaling) \n
|
||||
/// 0x01: Less-than (ordered, signaling) \n
|
||||
/// 0x02: Less-than-or-equal (ordered, signaling) \n
|
||||
/// 0x03: Unordered (non-signaling) \n
|
||||
/// 0x04: Not-equal (unordered, non-signaling) \n
|
||||
/// 0x05: Not-less-than (unordered, signaling) \n
|
||||
/// 0x06: Not-less-than-or-equal (unordered, signaling) \n
|
||||
/// 0x07: Ordered (non-signaling) \n
|
||||
/// \returns A 128-bit vector of [4 x float] containing the comparison results.
|
||||
#define _mm_cmp_ss(a, b, c) \
|
||||
((__m128)__builtin_ia32_cmpss((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), (c)))
|
||||
|
||||
#define _MM_ALIGN16 __attribute__((aligned(16)))
|
||||
|
||||
|
25
lib/include/yvals_core.h
vendored
Normal file
25
lib/include/yvals_core.h
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
//===----- yvals_core.h - Internal MSVC STL core header -------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Only include this if we are aiming for MSVC compatibility.
|
||||
#ifndef _MSC_VER
|
||||
#include_next <yvals_core.h>
|
||||
#else
|
||||
|
||||
#ifndef __clang_yvals_core_h
|
||||
#define __clang_yvals_core_h
|
||||
|
||||
#include_next <yvals_core.h>
|
||||
|
||||
#ifdef _STL_INTRIN_HEADER
|
||||
#undef _STL_INTRIN_HEADER
|
||||
#define _STL_INTRIN_HEADER <intrin0.h>
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif
|
18
lib/include/zos_wrappers/builtins.h
vendored
Normal file
18
lib/include/zos_wrappers/builtins.h
vendored
Normal file
@ -0,0 +1,18 @@
|
||||
/*===---- builtins.h - z/Architecture Builtin Functions --------------------===
|
||||
*
|
||||
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
* See https://llvm.org/LICENSE.txt for license information.
|
||||
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
*
|
||||
*===-----------------------------------------------------------------------===
|
||||
*/
|
||||
|
||||
#ifndef __ZOS_WRAPPERS_BUILTINS_H
|
||||
#define __ZOS_WRAPPERS_BUILTINS_H
|
||||
#if defined(__MVS__)
|
||||
#include_next <builtins.h>
|
||||
#if defined(__VEC__)
|
||||
#include <vecintrin.h>
|
||||
#endif
|
||||
#endif /* defined(__MVS__) */
|
||||
#endif /* __ZOS_WRAPPERS_BUILTINS_H */
|
@ -26,7 +26,7 @@ _LIBCPP_PUSH_MACROS
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
template <class _Iter, class _Sent, class _BinaryPredicate>
|
||||
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
|
||||
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Iter
|
||||
__adjacent_find(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) {
|
||||
if (__first == __last)
|
||||
return __first;
|
||||
@ -40,13 +40,13 @@ __adjacent_find(_Iter __first, _Sent __last, _BinaryPredicate&& __pred) {
|
||||
}
|
||||
|
||||
template <class _ForwardIterator, class _BinaryPredicate>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
|
||||
adjacent_find(_ForwardIterator __first, _ForwardIterator __last, _BinaryPredicate __pred) {
|
||||
return std::__adjacent_find(std::move(__first), std::move(__last), __pred);
|
||||
}
|
||||
|
||||
template <class _ForwardIterator>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator
|
||||
adjacent_find(_ForwardIterator __first, _ForwardIterator __last) {
|
||||
return std::adjacent_find(std::move(__first), std::move(__last), __equal_to());
|
||||
}
|
||||
|
2
lib/libcxx/include/__algorithm/all_of.h
vendored
2
lib/libcxx/include/__algorithm/all_of.h
vendored
@ -19,7 +19,7 @@
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
template <class _InputIterator, class _Predicate>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
all_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
|
||||
for (; __first != __last; ++__first)
|
||||
if (!__pred(*__first))
|
||||
|
2
lib/libcxx/include/__algorithm/any_of.h
vendored
2
lib/libcxx/include/__algorithm/any_of.h
vendored
@ -19,7 +19,7 @@
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
template <class _InputIterator, class _Predicate>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
any_of(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
|
||||
for (; __first != __last; ++__first)
|
||||
if (__pred(*__first))
|
||||
|
@ -22,14 +22,14 @@
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
template <class _ForwardIterator, class _Tp, class _Compare>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
|
||||
__first = std::lower_bound<_ForwardIterator, _Tp, __comp_ref_type<_Compare> >(__first, __last, __value, __comp);
|
||||
return __first != __last && !__comp(__value, *__first);
|
||||
}
|
||||
|
||||
template <class _ForwardIterator, class _Tp>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
binary_search(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
|
||||
return std::binary_search(__first, __last, __value, __less<>());
|
||||
}
|
||||
|
4
lib/libcxx/include/__algorithm/clamp.h
vendored
4
lib/libcxx/include/__algorithm/clamp.h
vendored
@ -21,7 +21,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
#if _LIBCPP_STD_VER >= 17
|
||||
template <class _Tp, class _Compare>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&
|
||||
[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&
|
||||
clamp(_LIBCPP_LIFETIMEBOUND const _Tp& __v,
|
||||
_LIBCPP_LIFETIMEBOUND const _Tp& __lo,
|
||||
_LIBCPP_LIFETIMEBOUND const _Tp& __hi,
|
||||
@ -31,7 +31,7 @@ clamp(_LIBCPP_LIFETIMEBOUND const _Tp& __v,
|
||||
}
|
||||
|
||||
template <class _Tp>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&
|
||||
[[nodiscard]] inline _LIBCPP_HIDE_FROM_ABI constexpr const _Tp&
|
||||
clamp(_LIBCPP_LIFETIMEBOUND const _Tp& __v,
|
||||
_LIBCPP_LIFETIMEBOUND const _Tp& __lo,
|
||||
_LIBCPP_LIFETIMEBOUND const _Tp& __hi) {
|
||||
|
8
lib/libcxx/include/__algorithm/comp.h
vendored
8
lib/libcxx/include/__algorithm/comp.h
vendored
@ -10,8 +10,7 @@
|
||||
#define _LIBCPP___ALGORITHM_COMP_H
|
||||
|
||||
#include <__config>
|
||||
#include <__type_traits/integral_constant.h>
|
||||
#include <__type_traits/operation_traits.h>
|
||||
#include <__type_traits/desugars_to.h>
|
||||
|
||||
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
||||
# pragma GCC system_header
|
||||
@ -27,7 +26,7 @@ struct __equal_to {
|
||||
};
|
||||
|
||||
template <class _Tp, class _Up>
|
||||
struct __desugars_to<__equal_tag, __equal_to, _Tp, _Up> : true_type {};
|
||||
inline const bool __desugars_to_v<__equal_tag, __equal_to, _Tp, _Up> = true;
|
||||
|
||||
// The definition is required because __less is part of the ABI, but it's empty
|
||||
// because all comparisons should be transparent.
|
||||
@ -42,6 +41,9 @@ struct __less<void, void> {
|
||||
}
|
||||
};
|
||||
|
||||
template <class _Tp>
|
||||
inline const bool __desugars_to_v<__less_tag, __less<>, _Tp, _Tp> = true;
|
||||
|
||||
_LIBCPP_END_NAMESPACE_STD
|
||||
|
||||
#endif // _LIBCPP___ALGORITHM_COMP_H
|
||||
|
@ -41,9 +41,9 @@ struct __debug_less {
|
||||
}
|
||||
|
||||
template <class _LHS, class _RHS>
|
||||
_LIBCPP_CONSTEXPR_SINCE_CXX14 inline _LIBCPP_HIDE_FROM_ABI decltype((void)std::declval<_Compare&>()(
|
||||
std::declval<_LHS&>(), std::declval<_RHS&>()))
|
||||
__do_compare_assert(int, _LHS& __l, _RHS& __r) {
|
||||
_LIBCPP_CONSTEXPR_SINCE_CXX14 inline
|
||||
_LIBCPP_HIDE_FROM_ABI decltype((void)std::declval<_Compare&>()(std::declval<_LHS&>(), std::declval<_RHS&>()))
|
||||
__do_compare_assert(int, _LHS& __l, _RHS& __r) {
|
||||
_LIBCPP_ASSERT_SEMANTIC_REQUIREMENT(!__comp_(__l, __r), "Comparator does not induce a strict weak ordering");
|
||||
(void)__l;
|
||||
(void)__r;
|
||||
|
6
lib/libcxx/include/__algorithm/copy.h
vendored
6
lib/libcxx/include/__algorithm/copy.h
vendored
@ -32,7 +32,7 @@ template <class, class _InIter, class _Sent, class _OutIter>
|
||||
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter> __copy(_InIter, _Sent, _OutIter);
|
||||
|
||||
template <class _AlgPolicy>
|
||||
struct __copy_loop {
|
||||
struct __copy_impl {
|
||||
template <class _InIter, class _Sent, class _OutIter>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
|
||||
operator()(_InIter __first, _Sent __last, _OutIter __result) const {
|
||||
@ -94,9 +94,7 @@ struct __copy_loop {
|
||||
__local_first = _Traits::__begin(++__segment_iterator);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct __copy_trivial {
|
||||
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
|
||||
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
|
||||
@ -108,7 +106,7 @@ struct __copy_trivial {
|
||||
template <class _AlgPolicy, class _InIter, class _Sent, class _OutIter>
|
||||
pair<_InIter, _OutIter> inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14
|
||||
__copy(_InIter __first, _Sent __last, _OutIter __result) {
|
||||
return std::__dispatch_copy_or_move<_AlgPolicy, __copy_loop<_AlgPolicy>, __copy_trivial>(
|
||||
return std::__copy_move_unwrap_iters<__copy_impl<_AlgPolicy> >(
|
||||
std::move(__first), std::move(__last), std::move(__result));
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
#include <__config>
|
||||
#include <__iterator/segmented_iterator.h>
|
||||
#include <__type_traits/common_type.h>
|
||||
#include <__type_traits/is_copy_constructible.h>
|
||||
#include <__type_traits/is_constructible.h>
|
||||
#include <__utility/move.h>
|
||||
#include <__utility/pair.h>
|
||||
|
||||
@ -33,7 +33,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_InIter, _OutIter>
|
||||
__copy_backward(_InIter __first, _Sent __last, _OutIter __result);
|
||||
|
||||
template <class _AlgPolicy>
|
||||
struct __copy_backward_loop {
|
||||
struct __copy_backward_impl {
|
||||
template <class _InIter, class _Sent, class _OutIter>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_InIter, _OutIter>
|
||||
operator()(_InIter __first, _Sent __last, _OutIter __result) const {
|
||||
@ -104,9 +104,7 @@ struct __copy_backward_loop {
|
||||
__local_last = _Traits::__end(__segment_iterator);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct __copy_backward_trivial {
|
||||
// At this point, the iterators have been unwrapped so any `contiguous_iterator` has been unwrapped to a pointer.
|
||||
template <class _In, class _Out, __enable_if_t<__can_lower_copy_assignment_to_memmove<_In, _Out>::value, int> = 0>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 pair<_In*, _Out*>
|
||||
@ -118,7 +116,7 @@ struct __copy_backward_trivial {
|
||||
template <class _AlgPolicy, class _BidirectionalIterator1, class _Sentinel, class _BidirectionalIterator2>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_BidirectionalIterator1, _BidirectionalIterator2>
|
||||
__copy_backward(_BidirectionalIterator1 __first, _Sentinel __last, _BidirectionalIterator2 __result) {
|
||||
return std::__dispatch_copy_or_move<_AlgPolicy, __copy_backward_loop<_AlgPolicy>, __copy_backward_trivial>(
|
||||
return std::__copy_move_unwrap_iters<__copy_backward_impl<_AlgPolicy> >(
|
||||
std::move(__first), std::move(__last), std::move(__result));
|
||||
}
|
||||
|
||||
|
@ -19,9 +19,8 @@
|
||||
#include <__type_traits/enable_if.h>
|
||||
#include <__type_traits/is_always_bitcastable.h>
|
||||
#include <__type_traits/is_constant_evaluated.h>
|
||||
#include <__type_traits/is_copy_constructible.h>
|
||||
#include <__type_traits/is_constructible.h>
|
||||
#include <__type_traits/is_trivially_assignable.h>
|
||||
#include <__type_traits/is_trivially_copyable.h>
|
||||
#include <__type_traits/is_volatile.h>
|
||||
#include <__utility/move.h>
|
||||
#include <__utility/pair.h>
|
||||
@ -81,30 +80,17 @@ __copy_backward_trivial_impl(_In* __first, _In* __last, _Out* __result) {
|
||||
|
||||
// Iterator unwrapping and dispatching to the correct overload.
|
||||
|
||||
template <class _F1, class _F2>
|
||||
struct __overload : _F1, _F2 {
|
||||
using _F1::operator();
|
||||
using _F2::operator();
|
||||
};
|
||||
|
||||
template <class _InIter, class _Sent, class _OutIter, class = void>
|
||||
struct __can_rewrap : false_type {};
|
||||
|
||||
template <class _InIter, class _Sent, class _OutIter>
|
||||
struct __can_rewrap<_InIter,
|
||||
_Sent,
|
||||
_OutIter,
|
||||
// Note that sentinels are always copy-constructible.
|
||||
__enable_if_t< is_copy_constructible<_InIter>::value && is_copy_constructible<_OutIter>::value > >
|
||||
: true_type {};
|
||||
template <class _InIter, class _OutIter>
|
||||
struct __can_rewrap
|
||||
: integral_constant<bool, is_copy_constructible<_InIter>::value && is_copy_constructible<_OutIter>::value> {};
|
||||
|
||||
template <class _Algorithm,
|
||||
class _InIter,
|
||||
class _Sent,
|
||||
class _OutIter,
|
||||
__enable_if_t<__can_rewrap<_InIter, _Sent, _OutIter>::value, int> = 0>
|
||||
__enable_if_t<__can_rewrap<_InIter, _OutIter>::value, int> = 0>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter>
|
||||
__unwrap_and_dispatch(_InIter __first, _Sent __last, _OutIter __out_first) {
|
||||
__copy_move_unwrap_iters(_InIter __first, _Sent __last, _OutIter __out_first) {
|
||||
auto __range = std::__unwrap_range(__first, std::move(__last));
|
||||
auto __result = _Algorithm()(std::move(__range.first), std::move(__range.second), std::__unwrap_iter(__out_first));
|
||||
return std::make_pair(std::__rewrap_range<_Sent>(std::move(__first), std::move(__result.first)),
|
||||
@ -115,24 +101,12 @@ template <class _Algorithm,
|
||||
class _InIter,
|
||||
class _Sent,
|
||||
class _OutIter,
|
||||
__enable_if_t<!__can_rewrap<_InIter, _Sent, _OutIter>::value, int> = 0>
|
||||
__enable_if_t<!__can_rewrap<_InIter, _OutIter>::value, int> = 0>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter>
|
||||
__unwrap_and_dispatch(_InIter __first, _Sent __last, _OutIter __out_first) {
|
||||
__copy_move_unwrap_iters(_InIter __first, _Sent __last, _OutIter __out_first) {
|
||||
return _Algorithm()(std::move(__first), std::move(__last), std::move(__out_first));
|
||||
}
|
||||
|
||||
template <class _AlgPolicy,
|
||||
class _NaiveAlgorithm,
|
||||
class _OptimizedAlgorithm,
|
||||
class _InIter,
|
||||
class _Sent,
|
||||
class _OutIter>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX17 pair<_InIter, _OutIter>
|
||||
__dispatch_copy_or_move(_InIter __first, _Sent __last, _OutIter __out_first) {
|
||||
using _Algorithm = __overload<_NaiveAlgorithm, _OptimizedAlgorithm>;
|
||||
return std::__unwrap_and_dispatch<_Algorithm>(std::move(__first), std::move(__last), std::move(__out_first));
|
||||
}
|
||||
|
||||
_LIBCPP_END_NAMESPACE_STD
|
||||
|
||||
_LIBCPP_POP_MACROS
|
||||
|
2
lib/libcxx/include/__algorithm/count.h
vendored
2
lib/libcxx/include/__algorithm/count.h
vendored
@ -79,7 +79,7 @@ __count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __l
|
||||
}
|
||||
|
||||
template <class _InputIterator, class _Tp>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator>
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __iter_diff_t<_InputIterator>
|
||||
count(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
|
||||
__identity __proj;
|
||||
return std::__count<_ClassicAlgPolicy>(__first, __last, __value, __proj);
|
||||
|
6
lib/libcxx/include/__algorithm/count_if.h
vendored
6
lib/libcxx/include/__algorithm/count_if.h
vendored
@ -20,9 +20,9 @@
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
template <class _InputIterator, class _Predicate>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
|
||||
typename iterator_traits<_InputIterator>::difference_type
|
||||
count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
|
||||
typename iterator_traits<_InputIterator>::difference_type
|
||||
count_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
|
||||
typename iterator_traits<_InputIterator>::difference_type __r(0);
|
||||
for (; __first != __last; ++__first)
|
||||
if (__pred(*__first))
|
||||
|
81
lib/libcxx/include/__algorithm/equal.h
vendored
81
lib/libcxx/include/__algorithm/equal.h
vendored
@ -18,12 +18,11 @@
|
||||
#include <__iterator/distance.h>
|
||||
#include <__iterator/iterator_traits.h>
|
||||
#include <__string/constexpr_c_functions.h>
|
||||
#include <__type_traits/desugars_to.h>
|
||||
#include <__type_traits/enable_if.h>
|
||||
#include <__type_traits/integral_constant.h>
|
||||
#include <__type_traits/is_constant_evaluated.h>
|
||||
#include <__type_traits/is_equality_comparable.h>
|
||||
#include <__type_traits/is_volatile.h>
|
||||
#include <__type_traits/operation_traits.h>
|
||||
#include <__utility/move.h>
|
||||
|
||||
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
||||
@ -47,7 +46,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo
|
||||
template <class _Tp,
|
||||
class _Up,
|
||||
class _BinaryPredicate,
|
||||
__enable_if_t<__desugars_to<__equal_tag, _BinaryPredicate, _Tp, _Up>::value && !is_volatile<_Tp>::value &&
|
||||
__enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, _Tp, _Up> && !is_volatile<_Tp>::value &&
|
||||
!is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
|
||||
int> = 0>
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
@ -56,33 +55,19 @@ __equal_iter_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _BinaryPredicate&)
|
||||
}
|
||||
|
||||
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _BinaryPredicate __pred) {
|
||||
return std::__equal_iter_impl(
|
||||
std::__unwrap_iter(__first1), std::__unwrap_iter(__last1), std::__unwrap_iter(__first2), __pred);
|
||||
}
|
||||
|
||||
template <class _InputIterator1, class _InputIterator2>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2) {
|
||||
return std::equal(__first1, __last1, __first2, __equal_to());
|
||||
}
|
||||
|
||||
#if _LIBCPP_STD_VER >= 14
|
||||
template <class _BinaryPredicate, class _InputIterator1, class _InputIterator2>
|
||||
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
__equal(_InputIterator1 __first1,
|
||||
_InputIterator1 __last1,
|
||||
_InputIterator2 __first2,
|
||||
_InputIterator2 __last2,
|
||||
_BinaryPredicate __pred,
|
||||
input_iterator_tag,
|
||||
input_iterator_tag) {
|
||||
for (; __first1 != __last1 && __first2 != __last2; ++__first1, (void)++__first2)
|
||||
if (!__pred(*__first1, *__first2))
|
||||
return false;
|
||||
return __first1 == __last1 && __first2 == __last2;
|
||||
}
|
||||
|
||||
template <class _Iter1, class _Sent1, class _Iter2, class _Sent2, class _Pred, class _Proj1, class _Proj2>
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __equal_impl(
|
||||
@ -101,7 +86,7 @@ template <class _Tp,
|
||||
class _Pred,
|
||||
class _Proj1,
|
||||
class _Proj2,
|
||||
__enable_if_t<__desugars_to<__equal_tag, _Pred, _Tp, _Up>::value && __is_identity<_Proj1>::value &&
|
||||
__enable_if_t<__desugars_to_v<__equal_tag, _Pred, _Tp, _Up> && __is_identity<_Proj1>::value &&
|
||||
__is_identity<_Proj2>::value && !is_volatile<_Tp>::value && !is_volatile<_Up>::value &&
|
||||
__libcpp_is_trivially_equality_comparable<_Tp, _Up>::value,
|
||||
int> = 0>
|
||||
@ -110,17 +95,18 @@ __equal_impl(_Tp* __first1, _Tp* __last1, _Up* __first2, _Up*, _Pred&, _Proj1&,
|
||||
return std::__constexpr_memcmp_equal(__first1, __first2, __element_count(__last1 - __first1));
|
||||
}
|
||||
|
||||
template <class _BinaryPredicate, class _RandomAccessIterator1, class _RandomAccessIterator2>
|
||||
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
__equal(_RandomAccessIterator1 __first1,
|
||||
_RandomAccessIterator1 __last1,
|
||||
_RandomAccessIterator2 __first2,
|
||||
_RandomAccessIterator2 __last2,
|
||||
_BinaryPredicate __pred,
|
||||
random_access_iterator_tag,
|
||||
random_access_iterator_tag) {
|
||||
if (std::distance(__first1, __last1) != std::distance(__first2, __last2))
|
||||
return false;
|
||||
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
equal(_InputIterator1 __first1,
|
||||
_InputIterator1 __last1,
|
||||
_InputIterator2 __first2,
|
||||
_InputIterator2 __last2,
|
||||
_BinaryPredicate __pred) {
|
||||
if constexpr (__has_random_access_iterator_category<_InputIterator1>::value &&
|
||||
__has_random_access_iterator_category<_InputIterator2>::value) {
|
||||
if (std::distance(__first1, __last1) != std::distance(__first2, __last2))
|
||||
return false;
|
||||
}
|
||||
__identity __proj;
|
||||
return std::__equal_impl(
|
||||
std::__unwrap_iter(__first1),
|
||||
@ -132,36 +118,13 @@ __equal(_RandomAccessIterator1 __first1,
|
||||
__proj);
|
||||
}
|
||||
|
||||
template <class _InputIterator1, class _InputIterator2, class _BinaryPredicate>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
equal(_InputIterator1 __first1,
|
||||
_InputIterator1 __last1,
|
||||
_InputIterator2 __first2,
|
||||
_InputIterator2 __last2,
|
||||
_BinaryPredicate __pred) {
|
||||
return std::__equal<_BinaryPredicate&>(
|
||||
__first1,
|
||||
__last1,
|
||||
__first2,
|
||||
__last2,
|
||||
__pred,
|
||||
typename iterator_traits<_InputIterator1>::iterator_category(),
|
||||
typename iterator_traits<_InputIterator2>::iterator_category());
|
||||
template <class _InputIterator1, class _InputIterator2>
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
|
||||
return std::equal(__first1, __last1, __first2, __last2, __equal_to());
|
||||
}
|
||||
|
||||
template <class _InputIterator1, class _InputIterator2>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
equal(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
|
||||
return std::__equal(
|
||||
__first1,
|
||||
__last1,
|
||||
__first2,
|
||||
__last2,
|
||||
__equal_to(),
|
||||
typename iterator_traits<_InputIterator1>::iterator_category(),
|
||||
typename iterator_traits<_InputIterator2>::iterator_category());
|
||||
}
|
||||
#endif
|
||||
#endif // _LIBCPP_STD_VER >= 14
|
||||
|
||||
_LIBCPP_END_NAMESPACE_STD
|
||||
|
||||
|
6
lib/libcxx/include/__algorithm/equal_range.h
vendored
6
lib/libcxx/include/__algorithm/equal_range.h
vendored
@ -23,7 +23,7 @@
|
||||
#include <__iterator/iterator_traits.h>
|
||||
#include <__iterator/next.h>
|
||||
#include <__type_traits/is_callable.h>
|
||||
#include <__type_traits/is_copy_constructible.h>
|
||||
#include <__type_traits/is_constructible.h>
|
||||
#include <__utility/move.h>
|
||||
#include <__utility/pair.h>
|
||||
|
||||
@ -60,7 +60,7 @@ __equal_range(_Iter __first, _Sent __last, const _Tp& __value, _Compare&& __comp
|
||||
}
|
||||
|
||||
template <class _ForwardIterator, class _Tp, class _Compare>
|
||||
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
|
||||
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
|
||||
equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value, _Compare __comp) {
|
||||
static_assert(__is_callable<_Compare, decltype(*__first), const _Tp&>::value, "The comparator has to be callable");
|
||||
static_assert(is_copy_constructible<_ForwardIterator>::value, "Iterator has to be copy constructible");
|
||||
@ -73,7 +73,7 @@ equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __valu
|
||||
}
|
||||
|
||||
template <class _ForwardIterator, class _Tp>
|
||||
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
|
||||
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_ForwardIterator, _ForwardIterator>
|
||||
equal_range(_ForwardIterator __first, _ForwardIterator __last, const _Tp& __value) {
|
||||
return std::equal_range(std::move(__first), std::move(__last), __value, __less<>());
|
||||
}
|
||||
|
58
lib/libcxx/include/__algorithm/fill_n.h
vendored
58
lib/libcxx/include/__algorithm/fill_n.h
vendored
@ -9,18 +9,74 @@
|
||||
#ifndef _LIBCPP___ALGORITHM_FILL_N_H
|
||||
#define _LIBCPP___ALGORITHM_FILL_N_H
|
||||
|
||||
#include <__algorithm/min.h>
|
||||
#include <__config>
|
||||
#include <__fwd/bit_reference.h>
|
||||
#include <__iterator/iterator_traits.h>
|
||||
#include <__memory/pointer_traits.h>
|
||||
#include <__utility/convert_to_integral.h>
|
||||
|
||||
#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
|
||||
# pragma GCC system_header
|
||||
#endif
|
||||
|
||||
_LIBCPP_PUSH_MACROS
|
||||
#include <__undef_macros>
|
||||
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
// fill_n isn't specialized for std::memset, because the compiler already optimizes the loop to a call to std::memset.
|
||||
|
||||
template <class _OutputIterator, class _Size, class _Tp>
|
||||
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
|
||||
__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value);
|
||||
|
||||
template <bool _FillVal, class _Cp>
|
||||
_LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI void
|
||||
__fill_n_bool(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n) {
|
||||
using _It = __bit_iterator<_Cp, false>;
|
||||
using __storage_type = typename _It::__storage_type;
|
||||
|
||||
const int __bits_per_word = _It::__bits_per_word;
|
||||
// do first partial word
|
||||
if (__first.__ctz_ != 0) {
|
||||
__storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_);
|
||||
__storage_type __dn = std::min(__clz_f, __n);
|
||||
__storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn));
|
||||
if (_FillVal)
|
||||
*__first.__seg_ |= __m;
|
||||
else
|
||||
*__first.__seg_ &= ~__m;
|
||||
__n -= __dn;
|
||||
++__first.__seg_;
|
||||
}
|
||||
// do middle whole words
|
||||
__storage_type __nw = __n / __bits_per_word;
|
||||
std::__fill_n(std::__to_address(__first.__seg_), __nw, _FillVal ? static_cast<__storage_type>(-1) : 0);
|
||||
__n -= __nw * __bits_per_word;
|
||||
// do last partial word
|
||||
if (__n > 0) {
|
||||
__first.__seg_ += __nw;
|
||||
__storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n);
|
||||
if (_FillVal)
|
||||
*__first.__seg_ |= __m;
|
||||
else
|
||||
*__first.__seg_ &= ~__m;
|
||||
}
|
||||
}
|
||||
|
||||
template <class _Cp, class _Size>
|
||||
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, false>
|
||||
__fill_n(__bit_iterator<_Cp, false> __first, _Size __n, const bool& __value) {
|
||||
if (__n > 0) {
|
||||
if (__value)
|
||||
std::__fill_n_bool<true>(__first, __n);
|
||||
else
|
||||
std::__fill_n_bool<false>(__first, __n);
|
||||
}
|
||||
return __first + __n;
|
||||
}
|
||||
|
||||
template <class _OutputIterator, class _Size, class _Tp>
|
||||
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _OutputIterator
|
||||
__fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
|
||||
@ -37,4 +93,6 @@ fill_n(_OutputIterator __first, _Size __n, const _Tp& __value) {
|
||||
|
||||
_LIBCPP_END_NAMESPACE_STD
|
||||
|
||||
_LIBCPP_POP_MACROS
|
||||
|
||||
#endif // _LIBCPP___ALGORITHM_FILL_N_H
|
||||
|
22
lib/libcxx/include/__algorithm/find.h
vendored
22
lib/libcxx/include/__algorithm/find.h
vendored
@ -43,7 +43,7 @@ _LIBCPP_BEGIN_NAMESPACE_STD
|
||||
// generic implementation
|
||||
template <class _Iter, class _Sent, class _Tp, class _Proj>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Iter
|
||||
__find_impl(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
|
||||
__find(_Iter __first, _Sent __last, const _Tp& __value, _Proj& __proj) {
|
||||
for (; __first != __last; ++__first)
|
||||
if (std::__invoke(__proj, *__first) == __value)
|
||||
break;
|
||||
@ -57,8 +57,7 @@ template <class _Tp,
|
||||
__enable_if_t<__is_identity<_Proj>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value &&
|
||||
sizeof(_Tp) == 1,
|
||||
int> = 0>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp*
|
||||
__find_impl(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __find(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
|
||||
if (auto __ret = std::__constexpr_memchr(__first, __value, __last - __first))
|
||||
return __ret;
|
||||
return __last;
|
||||
@ -71,8 +70,7 @@ template <class _Tp,
|
||||
__enable_if_t<__is_identity<_Proj>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value &&
|
||||
sizeof(_Tp) == sizeof(wchar_t) && _LIBCPP_ALIGNOF(_Tp) >= _LIBCPP_ALIGNOF(wchar_t),
|
||||
int> = 0>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp*
|
||||
__find_impl(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp* __find(_Tp* __first, _Tp* __last, const _Up& __value, _Proj&) {
|
||||
if (auto __ret = std::__constexpr_wmemchr(__first, __value, __last - __first))
|
||||
return __ret;
|
||||
return __last;
|
||||
@ -89,10 +87,10 @@ template <class _Tp,
|
||||
is_signed<_Tp>::value == is_signed<_Up>::value,
|
||||
int> = 0>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Tp*
|
||||
__find_impl(_Tp* __first, _Tp* __last, const _Up& __value, _Proj& __proj) {
|
||||
__find(_Tp* __first, _Tp* __last, const _Up& __value, _Proj& __proj) {
|
||||
if (__value < numeric_limits<_Tp>::min() || __value > numeric_limits<_Tp>::max())
|
||||
return __last;
|
||||
return std::__find_impl(__first, __last, _Tp(__value), __proj);
|
||||
return std::__find(__first, __last, _Tp(__value), __proj);
|
||||
}
|
||||
|
||||
// __bit_iterator implementation
|
||||
@ -134,7 +132,7 @@ __find_bool(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __n)
|
||||
|
||||
template <class _Cp, bool _IsConst, class _Tp, class _Proj, __enable_if_t<__is_identity<_Proj>::value, int> = 0>
|
||||
inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 __bit_iterator<_Cp, _IsConst>
|
||||
__find_impl(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) {
|
||||
__find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value, _Proj&) {
|
||||
if (static_cast<bool>(__value))
|
||||
return std::__find_bool<true>(__first, static_cast<typename _Cp::size_type>(__last - __first));
|
||||
return std::__find_bool<false>(__first, static_cast<typename _Cp::size_type>(__last - __first));
|
||||
@ -150,7 +148,7 @@ template <class _SegmentedIterator,
|
||||
class _Proj,
|
||||
__enable_if_t<__is_segmented_iterator<_SegmentedIterator>::value, int> = 0>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _SegmentedIterator
|
||||
__find_impl(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value, _Proj& __proj) {
|
||||
__find(_SegmentedIterator __first, _SegmentedIterator __last, const _Tp& __value, _Proj& __proj) {
|
||||
return std::__find_segment_if(std::move(__first), std::move(__last), __find_segment<_Tp>(__value), __proj);
|
||||
}
|
||||
|
||||
@ -163,17 +161,17 @@ struct __find_segment {
|
||||
template <class _InputIterator, class _Proj>
|
||||
_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR _InputIterator
|
||||
operator()(_InputIterator __first, _InputIterator __last, _Proj& __proj) const {
|
||||
return std::__find_impl(__first, __last, __value_, __proj);
|
||||
return std::__find(__first, __last, __value_, __proj);
|
||||
}
|
||||
};
|
||||
|
||||
// public API
|
||||
template <class _InputIterator, class _Tp>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
|
||||
find(_InputIterator __first, _InputIterator __last, const _Tp& __value) {
|
||||
__identity __proj;
|
||||
return std::__rewrap_iter(
|
||||
__first, std::__find_impl(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __value, __proj));
|
||||
__first, std::__find(std::__unwrap_iter(__first), std::__unwrap_iter(__last), __value, __proj));
|
||||
}
|
||||
|
||||
_LIBCPP_END_NAMESPACE_STD
|
||||
|
4
lib/libcxx/include/__algorithm/find_end.h
vendored
4
lib/libcxx/include/__algorithm/find_end.h
vendored
@ -205,7 +205,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _Fo
|
||||
}
|
||||
|
||||
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end(
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_end(
|
||||
_ForwardIterator1 __first1,
|
||||
_ForwardIterator1 __last1,
|
||||
_ForwardIterator2 __first2,
|
||||
@ -215,7 +215,7 @@ _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
|
||||
}
|
||||
|
||||
template <class _ForwardIterator1, class _ForwardIterator2>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1
|
||||
find_end(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) {
|
||||
return std::find_end(__first1, __last1, __first2, __last2, __equal_to());
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX14 _ForwardIterator1 __find_fir
|
||||
}
|
||||
|
||||
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
|
||||
_ForwardIterator1 __first1,
|
||||
_ForwardIterator1 __last1,
|
||||
_ForwardIterator2 __first2,
|
||||
@ -45,7 +45,7 @@ _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
|
||||
}
|
||||
|
||||
template <class _ForwardIterator1, class _ForwardIterator2>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _ForwardIterator1 find_first_of(
|
||||
_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) {
|
||||
return std::__find_first_of_ce(__first1, __last1, __first2, __last2, __equal_to());
|
||||
}
|
||||
|
2
lib/libcxx/include/__algorithm/find_if.h
vendored
2
lib/libcxx/include/__algorithm/find_if.h
vendored
@ -19,7 +19,7 @@
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
template <class _InputIterator, class _Predicate>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
|
||||
find_if(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
|
||||
for (; __first != __last; ++__first)
|
||||
if (__pred(*__first))
|
||||
|
2
lib/libcxx/include/__algorithm/find_if_not.h
vendored
2
lib/libcxx/include/__algorithm/find_if_not.h
vendored
@ -19,7 +19,7 @@
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
template <class _InputIterator, class _Predicate>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator
|
||||
find_if_not(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
|
||||
for (; __first != __last; ++__first)
|
||||
if (!__pred(*__first))
|
||||
|
10
lib/libcxx/include/__algorithm/fold.h
vendored
10
lib/libcxx/include/__algorithm/fold.h
vendored
@ -78,8 +78,7 @@ concept __indirectly_binary_left_foldable =
|
||||
|
||||
struct __fold_left_with_iter {
|
||||
template <input_iterator _Ip, sentinel_for<_Ip> _Sp, class _Tp, __indirectly_binary_left_foldable<_Tp, _Ip> _Fp>
|
||||
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr auto
|
||||
operator()(_Ip __first, _Sp __last, _Tp __init, _Fp __f) {
|
||||
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Ip __first, _Sp __last, _Tp __init, _Fp __f) {
|
||||
using _Up = decay_t<invoke_result_t<_Fp&, _Tp, iter_reference_t<_Ip>>>;
|
||||
|
||||
if (__first == __last) {
|
||||
@ -95,7 +94,7 @@ struct __fold_left_with_iter {
|
||||
}
|
||||
|
||||
template <input_range _Rp, class _Tp, __indirectly_binary_left_foldable<_Tp, iterator_t<_Rp>> _Fp>
|
||||
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Rp&& __r, _Tp __init, _Fp __f) {
|
||||
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Rp&& __r, _Tp __init, _Fp __f) {
|
||||
auto __result = operator()(ranges::begin(__r), ranges::end(__r), std::move(__init), std::ref(__f));
|
||||
|
||||
using _Up = decay_t<invoke_result_t<_Fp&, _Tp, range_reference_t<_Rp>>>;
|
||||
@ -107,13 +106,12 @@ inline constexpr auto fold_left_with_iter = __fold_left_with_iter();
|
||||
|
||||
struct __fold_left {
|
||||
template <input_iterator _Ip, sentinel_for<_Ip> _Sp, class _Tp, __indirectly_binary_left_foldable<_Tp, _Ip> _Fp>
|
||||
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr auto
|
||||
operator()(_Ip __first, _Sp __last, _Tp __init, _Fp __f) {
|
||||
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Ip __first, _Sp __last, _Tp __init, _Fp __f) {
|
||||
return fold_left_with_iter(std::move(__first), std::move(__last), std::move(__init), std::ref(__f)).value;
|
||||
}
|
||||
|
||||
template <input_range _Rp, class _Tp, __indirectly_binary_left_foldable<_Tp, iterator_t<_Rp>> _Fp>
|
||||
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Rp&& __r, _Tp __init, _Fp __f) {
|
||||
[[nodiscard]] _LIBCPP_HIDE_FROM_ABI static constexpr auto operator()(_Rp&& __r, _Tp __init, _Fp __f) {
|
||||
return fold_left_with_iter(ranges::begin(__r), ranges::end(__r), std::move(__init), std::ref(__f)).value;
|
||||
}
|
||||
};
|
||||
|
4
lib/libcxx/include/__algorithm/includes.h
vendored
4
lib/libcxx/include/__algorithm/includes.h
vendored
@ -47,7 +47,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __includes(
|
||||
}
|
||||
|
||||
template <class _InputIterator1, class _InputIterator2, class _Compare>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
includes(_InputIterator1 __first1,
|
||||
_InputIterator1 __last1,
|
||||
_InputIterator2 __first2,
|
||||
@ -67,7 +67,7 @@ includes(_InputIterator1 __first1,
|
||||
}
|
||||
|
||||
template <class _InputIterator1, class _InputIterator2>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
includes(_InputIterator1 __first1, _InputIterator1 __last1, _InputIterator2 __first2, _InputIterator2 __last2) {
|
||||
return std::includes(std::move(__first1), std::move(__last1), std::move(__first2), std::move(__last2), __less<>());
|
||||
}
|
||||
|
@ -114,8 +114,8 @@ _LIBCPP_HIDE_FROM_ABI void __buffered_inplace_merge(
|
||||
for (_BidirectionalIterator __i = __middle; __i != __last;
|
||||
__d.template __incr<value_type>(), (void)++__i, (void)++__p)
|
||||
::new ((void*)__p) value_type(_IterOps<_AlgPolicy>::__iter_move(__i));
|
||||
typedef __unconstrained_reverse_iterator<_BidirectionalIterator> _RBi;
|
||||
typedef __unconstrained_reverse_iterator<value_type*> _Rv;
|
||||
typedef reverse_iterator<_BidirectionalIterator> _RBi;
|
||||
typedef reverse_iterator<value_type*> _Rv;
|
||||
typedef __invert<_Compare> _Inverted;
|
||||
std::__half_inplace_merge<_AlgPolicy>(
|
||||
_Rv(__p), _Rv(__buff), _RBi(__middle), _RBi(__first), _RBi(__last), _Inverted(__comp));
|
||||
|
4
lib/libcxx/include/__algorithm/is_heap.h
vendored
4
lib/libcxx/include/__algorithm/is_heap.h
vendored
@ -22,13 +22,13 @@
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
template <class _RandomAccessIterator, class _Compare>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
is_heap(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
|
||||
return std::__is_heap_until(__first, __last, static_cast<__comp_ref_type<_Compare> >(__comp)) == __last;
|
||||
}
|
||||
|
||||
template <class _RandomAccessIterator>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
is_heap(_RandomAccessIterator __first, _RandomAccessIterator __last) {
|
||||
return std::is_heap(__first, __last, __less<>());
|
||||
}
|
||||
|
@ -46,13 +46,13 @@ __is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Co
|
||||
}
|
||||
|
||||
template <class _RandomAccessIterator, class _Compare>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
|
||||
is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last, _Compare __comp) {
|
||||
return std::__is_heap_until(__first, __last, static_cast<__comp_ref_type<_Compare> >(__comp));
|
||||
}
|
||||
|
||||
template <class _RandomAccessIterator>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandomAccessIterator
|
||||
is_heap_until(_RandomAccessIterator __first, _RandomAccessIterator __last) {
|
||||
return std::__is_heap_until(__first, __last, __less<>());
|
||||
}
|
||||
|
@ -18,7 +18,7 @@
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
template <class _InputIterator, class _Predicate>
|
||||
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
is_partitioned(_InputIterator __first, _InputIterator __last, _Predicate __pred) {
|
||||
for (; __first != __last; ++__first)
|
||||
if (!__pred(*__first))
|
||||
|
10
lib/libcxx/include/__algorithm/is_permutation.h
vendored
10
lib/libcxx/include/__algorithm/is_permutation.h
vendored
@ -113,7 +113,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation_impl(
|
||||
|
||||
// 2+1 iterators, predicate. Not used by range algorithms.
|
||||
template <class _AlgPolicy, class _ForwardIterator1, class _Sentinel1, class _ForwardIterator2, class _BinaryPredicate>
|
||||
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation(
|
||||
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation(
|
||||
_ForwardIterator1 __first1, _Sentinel1 __last1, _ForwardIterator2 __first2, _BinaryPredicate&& __pred) {
|
||||
// Shorten sequences as much as possible by lopping of any equal prefix.
|
||||
for (; __first1 != __last1; ++__first1, (void)++__first2) {
|
||||
@ -247,7 +247,7 @@ _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool __is_permutation(
|
||||
|
||||
// 2+1 iterators, predicate
|
||||
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
|
||||
_LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
|
||||
_LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
|
||||
_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _BinaryPredicate __pred) {
|
||||
static_assert(__is_callable<_BinaryPredicate, decltype(*__first1), decltype(*__first2)>::value,
|
||||
"The predicate has to be callable");
|
||||
@ -257,7 +257,7 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool i
|
||||
|
||||
// 2+1 iterators
|
||||
template <class _ForwardIterator1, class _ForwardIterator2>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2) {
|
||||
return std::is_permutation(__first1, __last1, __first2, __equal_to());
|
||||
}
|
||||
@ -266,7 +266,7 @@ is_permutation(_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIt
|
||||
|
||||
// 2+2 iterators
|
||||
template <class _ForwardIterator1, class _ForwardIterator2>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
|
||||
_ForwardIterator1 __first1, _ForwardIterator1 __last1, _ForwardIterator2 __first2, _ForwardIterator2 __last2) {
|
||||
return std::__is_permutation<_ClassicAlgPolicy>(
|
||||
std::move(__first1),
|
||||
@ -280,7 +280,7 @@ _LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20
|
||||
|
||||
// 2+2 iterators, predicate
|
||||
template <class _ForwardIterator1, class _ForwardIterator2, class _BinaryPredicate>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool is_permutation(
|
||||
_ForwardIterator1 __first1,
|
||||
_ForwardIterator1 __last1,
|
||||
_ForwardIterator2 __first2,
|
||||
|
4
lib/libcxx/include/__algorithm/is_sorted.h
vendored
4
lib/libcxx/include/__algorithm/is_sorted.h
vendored
@ -22,13 +22,13 @@
|
||||
_LIBCPP_BEGIN_NAMESPACE_STD
|
||||
|
||||
template <class _ForwardIterator, class _Compare>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
is_sorted(_ForwardIterator __first, _ForwardIterator __last, _Compare __comp) {
|
||||
return std::__is_sorted_until<__comp_ref_type<_Compare> >(__first, __last, __comp) == __last;
|
||||
}
|
||||
|
||||
template <class _ForwardIterator>
|
||||
_LIBCPP_NODISCARD_EXT inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
_LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool
|
||||
is_sorted(_ForwardIterator __first, _ForwardIterator __last) {
|
||||
return std::is_sorted(__first, __last, __less<>());
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user