Files
cli/vendor/aws-lc-sys/aws-lc/crypto/fipsmodule/cpucap/internal.h

339 lines
11 KiB
C

// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
#ifndef OPENSSL_HEADER_CPUCAP_INTERNAL_H
#define OPENSSL_HEADER_CPUCAP_INTERNAL_H
#include <openssl/base.h>
#if defined(__cplusplus)
extern "C" {
#endif
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM) || \
defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
#define HAS_OPENSSL_CPUID_SETUP
// OPENSSL_cpuid_setup initializes the platform-specific feature cache.
void OPENSSL_cpuid_setup(void);
#endif
// Runtime CPU feature support
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
// OPENSSL_ia32cap_P contains the Intel CPUID bits when running on an x86 or
// x86-64 system.
//
// Index 0:
// EDX for CPUID where EAX = 1
// Bit 20 is always zero
// Bit 28 is adjusted to reflect whether the data cache is shared between
// multiple logical cores
// Bit 30 is used to indicate an Intel CPU
// Index 1:
// ECX for CPUID where EAX = 1
// Bit 11 is used to indicate AMD XOP support, not SDBG
// Index 2:
// EBX for CPUID where EAX = 7
// Index 3:
// ECX for CPUID where EAX = 7
//
// Note: the CPUID bits are pre-adjusted for the OSXSAVE bit and the YMM and XMM
// bits in XCR0, so it is not necessary to check those. (WARNING: See caveats
// in cpu_intel.c.)
extern uint32_t OPENSSL_ia32cap_P[4];
#if defined(BORINGSSL_FIPS) && !defined(BORINGSSL_SHARED_LIBRARY)
// The FIPS module, as a static library, requires an out-of-line version of
// |OPENSSL_ia32cap_get| so accesses can be rewritten by delocate. Mark the
// function const so multiple accesses can be optimized together.
const uint32_t *OPENSSL_ia32cap_get(void) __attribute__((const));
#else
OPENSSL_INLINE const uint32_t *OPENSSL_ia32cap_get(void) {
return OPENSSL_ia32cap_P;
}
#endif
// See Intel manual, volume 2A, table 3-11.
OPENSSL_INLINE int CRYPTO_is_FXSR_capable(void) {
return (OPENSSL_ia32cap_get()[0] & (1 << 24)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_intel_cpu(void) {
// The reserved bit 30 is used to indicate an Intel CPU.
return (OPENSSL_ia32cap_get()[0] & (1 << 30)) != 0;
}
// See Intel manual, volume 2A, table 3-10.
OPENSSL_INLINE int CRYPTO_is_PCLMUL_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 1)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_SSSE3_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 9)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_SSE4_1_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 19)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_MOVBE_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 22)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_AESNI_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 25)) != 0;
}
// We intentionally avoid defining a |CRYPTO_is_XSAVE_capable| function. See
// |CRYPTO_cpu_perf_is_like_silvermont|.
OPENSSL_INLINE int CRYPTO_is_AVX_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 28)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_RDRAND_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1u << 30)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_AMD_XOP_support(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 11)) != 0;
}
// See Intel manual, volume 2A, table 3-8.
OPENSSL_INLINE int CRYPTO_is_BMI1_capable(void) {
return (OPENSSL_ia32cap_get()[2] & (1 << 3)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_AVX2_capable(void) {
return (OPENSSL_ia32cap_get()[2] & (1 << 5)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_BMI2_capable(void) {
return (OPENSSL_ia32cap_get()[2] & (1 << 8)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ADX_capable(void) {
return (OPENSSL_ia32cap_get()[2] & (1 << 19)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_SHAEXT_capable(void) {
return (OPENSSL_ia32cap_get()[2] & (1 << 29)) != 0;
}
// AVX512VL | AVX512BW | AVX512DQ | AVX512F
// 1u << 31 | 1u << 30 | 1u << 17 | 1u << 16
// 1100_0000_0000_0011_0000_0000_0000_0000
#define CPU_CAP_AVX512_BITFLAGS 0xC0030000
OPENSSL_INLINE int CRYPTO_is_AVX512_capable(void) {
return (OPENSSL_ia32cap_get()[2] & CPU_CAP_AVX512_BITFLAGS) == CPU_CAP_AVX512_BITFLAGS;
}
OPENSSL_INLINE int CRYPTO_is_VAES_capable(void) {
return (OPENSSL_ia32cap_get()[3] & (1u << (41 - 32))) != 0;
}
OPENSSL_INLINE int CRYPTO_is_VPCLMULQDQ_capable(void) {
return (OPENSSL_ia32cap_get()[3] & (1u << (42 - 32))) != 0;
}
// AVX512VL | AVX512BW | AVX512_IFMA | AVX512DQ | AVX512F
// 1u << 31 | 1u << 30 | 1u << 21 | 1u << 17 | 1u << 16
// 1100_0000_0010_0011_0000_0000_0000_0000
#define CPU_CAP_AVX512IFMA_BITFLAGS 0xC0230000
OPENSSL_INLINE int CRYPTO_is_AVX512IFMA_capable(void) {
return (OPENSSL_ia32cap_get()[2] & CPU_CAP_AVX512IFMA_BITFLAGS) ==
CPU_CAP_AVX512IFMA_BITFLAGS;
}
OPENSSL_INLINE int CRYPTO_is_VBMI2_capable(void) {
return (OPENSSL_ia32cap_get()[3] & (1 << 6)) != 0;
}
// CRYPTO_cpu_perf_is_like_silvermont returns one if, based on a heuristic, the
// CPU has Silvermont-like performance characteristics. It is often faster to
// run different codepaths on these CPUs than the available instructions would
// otherwise select. See chacha-x86_64.pl.
//
// Bonnell, Silvermont's predecessor in the Atom lineup, will also be matched by
// this. |OPENSSL_cpuid_setup| forces Knights Landing to also be matched by
// this. Goldmont (Silvermont's successor in the Atom lineup) added XSAVE so it
// isn't matched by this. Various sources indicate AMD first implemented MOVBE
// and XSAVE at the same time in Jaguar, so it seems like AMD chips will not be
// matched by this. That seems to be the case for other x86(-64) CPUs.
OPENSSL_INLINE int CRYPTO_cpu_perf_is_like_silvermont(void) {
// WARNING: This MUST NOT be used to guard the execution of the XSAVE
// instruction. This is the "hardware supports XSAVE" bit, not the OSXSAVE bit
// that indicates whether we can safely execute XSAVE. This bit may be set
// even when XSAVE is disabled (by the operating system). See the comment in
// cpu_intel.c and check how the users of this bit use it.
//
// We do not use |__XSAVE__| for static detection because the hack in
// |OPENSSL_cpuid_setup| for Knights Landing CPUs needs to override it.
int hardware_supports_xsave = (OPENSSL_ia32cap_get()[1] & (1u << 26)) != 0;
return !hardware_supports_xsave && CRYPTO_is_MOVBE_capable();
}
#endif // OPENSSL_X86 || OPENSSL_X86_64
#if defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
#if defined(OPENSSL_APPLE) && defined(OPENSSL_ARM)
// We do not detect any features at runtime for Apple's 32-bit ARM platforms. On
// 64-bit ARM, we detect some post-ARMv8.0 features.
#define OPENSSL_STATIC_ARMCAP
#endif
#include <openssl/arm_arch.h>
extern uint32_t OPENSSL_armcap_P;
extern uint8_t OPENSSL_cpucap_initialized;
// Normalize some older feature flags to their modern ACLE values.
// https://developer.arm.com/architectures/system-architectures/software-standards/acle
#if defined(__ARM_NEON__) && !defined(__ARM_NEON)
#define __ARM_NEON 1
#endif
#if defined(__ARM_FEATURE_CRYPTO)
#if !defined(__ARM_FEATURE_AES)
#define __ARM_FEATURE_AES 1
#endif
#if !defined(__ARM_FEATURE_SHA2)
#define __ARM_FEATURE_SHA2 1
#endif
#endif
// CRYPTO_is_NEON_capable returns true if the current CPU has a NEON unit.
// If this is known statically, it is a constant inline function.
// Otherwise, the capability is checked at runtime by checking the corresponding
// bit in |OPENSSL_armcap_P|. This is also the same for
// |CRYPTO_is_ARMv8_AES_capable| and |CRYPTO_is_ARMv8_PMULL_capable|
// for checking the support for AES and PMULL instructions, respectively.
OPENSSL_INLINE int CRYPTO_is_NEON_capable(void) {
return (OPENSSL_armcap_P & ARMV7_NEON) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_AES_capable(void) {
return (OPENSSL_armcap_P & ARMV8_AES) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_PMULL_capable(void) {
return (OPENSSL_armcap_P & ARMV8_PMULL) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_SHA1_capable(void) {
return (OPENSSL_armcap_P & ARMV8_SHA1) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_SHA256_capable(void) {
return (OPENSSL_armcap_P & ARMV8_SHA256) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_SHA512_capable(void) {
return (OPENSSL_armcap_P & ARMV8_SHA512) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_SHA3_capable(void) {
return (OPENSSL_armcap_P & ARMV8_SHA3) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_GCM_8x_capable(void) {
return (CRYPTO_is_ARMv8_SHA3_capable() &&
((OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 ||
(OPENSSL_armcap_P & ARMV8_NEOVERSE_V2) != 0 ||
(OPENSSL_armcap_P & ARMV8_APPLE_M) != 0));
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_wide_multiplier_capable(void) {
return (OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 ||
(OPENSSL_armcap_P & ARMV8_NEOVERSE_V2) != 0 ||
(OPENSSL_armcap_P & ARMV8_APPLE_M) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_DIT_capable(void) {
return (OPENSSL_armcap_P & (ARMV8_DIT | ARMV8_DIT_ALLOWED)) ==
(ARMV8_DIT | ARMV8_DIT_ALLOWED);
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_RNDR_capable(void) {
return (OPENSSL_armcap_P & ARMV8_RNG) != 0;
}
OPENSSL_INLINE int CRYPTO_is_Neoverse_N1(void) {
// It is Neoverse N1 if only ARMV8_NEOVERSE_N1 = 1 and
// ARMV8_NEOVERSE_<V1|V2> = 0.
return ((OPENSSL_armcap_P & ARMV8_NEOVERSE_N1) != 0 &&
(OPENSSL_armcap_P & (ARMV8_NEOVERSE_V1 | ARMV8_NEOVERSE_V2)) == 0);
}
OPENSSL_INLINE int CRYPTO_is_Neoverse_V1(void) {
return (OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0;
}
OPENSSL_INLINE int CRYPTO_is_Neoverse_V2(void) {
return (OPENSSL_armcap_P & ARMV8_NEOVERSE_V2) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_Apple_M(void) {
return (OPENSSL_armcap_P & ARMV8_APPLE_M) != 0;
}
// This function is used only for testing; hence, not inlined
OPENSSL_EXPORT int CRYPTO_is_ARMv8_DIT_capable_for_testing(void);
#endif // OPENSSL_ARM || OPENSSL_AARCH64
#if defined(AARCH64_DIT_SUPPORTED)
// (TODO): See if we can detect the DIT capability in Windows environment
// armv8_get_dit gets the value of the DIT flag from the CPU.
OPENSSL_EXPORT uint64_t armv8_get_dit(void);
// armv8_set_dit sets the CPU DIT flag to 1 and returns its original value
// before it was called.
OPENSSL_EXPORT uint64_t armv8_set_dit(void);
// armv8_restore_dit takes as input a value to restore the CPU DIT flag to.
OPENSSL_EXPORT void armv8_restore_dit(volatile uint64_t *original_dit);
#if defined(ENABLE_AUTO_SET_RESET_DIT)
// SET_DIT_AUTO_RESET can be inserted in the caller's application at
// the beginning of the code section that makes repeated calls to AWS-LC
// functions. The flag will be automatically restored to its original value
// at the end of the scope.
// This can minimise the effect on performance of repeatedly setting and
// disabling DIT.
// Instead of the macro, the functions above can be used.
// An example of their usage is present in the benchmarking function
// `Speed()` in `tool/speed.cc` when the option `-dit` is passed in.
#define SET_DIT_AUTO_RESET \
volatile uint64_t _dit_restore_orig \
__attribute__((cleanup(armv8_restore_dit))) \
OPENSSL_UNUSED = armv8_set_dit();
#else
#define SET_DIT_AUTO_RESET
#endif // ENABLE_AUTO_SET_RESET_DIT
#else
#define SET_DIT_AUTO_RESET
#endif // AARCH64_DIT_SUPPORTED
#if defined(OPENSSL_PPC64LE)
// CRYPTO_is_PPC64LE_vcrypto_capable returns true iff the current CPU supports
// the Vector.AES category of instructions.
int CRYPTO_is_PPC64LE_vcrypto_capable(void);
extern unsigned long OPENSSL_ppc64le_hwcap2;
#endif // OPENSSL_PPC64LE
#if defined(__cplusplus)
}
#endif
#endif // OPENSSL_HEADER_CPUCAP_INTERNAL_H