chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

View File

@@ -0,0 +1,114 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
#if defined(OPENSSL_AARCH64) && !defined(OPENSSL_STATIC_ARMCAP)
#include "cpu_aarch64.h"
void handle_cpu_env(uint32_t *out, const char *in) {
const int invert = in[0] == '~';
const int or = in[0] == '|';
const int skip_first_byte = invert || or;
const int hex = in[skip_first_byte] == '0' && in[skip_first_byte+1] == 'x';
uint32_t armcap = out[0];
int sscanf_result;
uint32_t v;
if (hex) {
sscanf_result = sscanf(in + skip_first_byte + 2, "%" PRIx32, &v);
} else {
sscanf_result = sscanf(in + skip_first_byte, "%" PRIu32, &v);
}
if (!sscanf_result) {
return;
}
// Detect if the user is trying to use the environment variable to set
// a capability that is _not_ available on the CPU:
// If the runtime capability check (e.g via getauxval() on Linux)
// returned a non-zero hwcap in `armcap` (out)
// and a bit set in the requested `v` is not set in `armcap`,
// abort instead of crashing later.
// The case of invert cannot enable an unexisting capability;
// it can only disable an existing one.
if (!invert && armcap && (~armcap & v))
{
fprintf(stderr,
"Fatal Error: HW capability found: 0x%02X, but HW capability requested: 0x%02X.\n",
armcap, v);
abort();
}
if (invert) {
out[0] &= ~v;
} else if (or) {
out[0] |= v;
} else {
out[0] = v;
}
}
#if defined(AARCH64_DIT_SUPPORTED)
// "DIT" is not recognised as a register name by clang-10 (at least)
// Register's encoded name is from e.g.
// https://github.com/ashwio/arm64-sysreg-lib/blob/d421e249a026f6f14653cb6f9c4edd8c5d898595/include/sysreg/dit.h#L286
#define DIT_REGISTER s3_3_c4_c2_5
DEFINE_STATIC_MUTEX(OPENSSL_armcap_P_lock)
uint64_t armv8_get_dit(void) {
if (CRYPTO_is_ARMv8_DIT_capable()) {
uint64_t val = 0;
__asm__ volatile("mrs %0, s3_3_c4_c2_5" : "=r" (val));
return (val >> 24) & 1;
} else {
return 0;
}
}
// See https://github.com/torvalds/linux/blob/53eaeb7fbe2702520125ae7d72742362c071a1f2/arch/arm64/include/asm/sysreg.h#L82
// As per Arm ARM for v8-A, Section "C.5.1.3 op0 == 0b00, architectural hints,
// barriers and CLREX, and PSTATE access", ARM DDI 0487 J.a, system instructions
// for accessing PSTATE fields have the following encoding
// and C5.2.4 DIT, Data Independent Timing:
// Op0 = 0, CRn = 4
// Op1 (3 for DIT) , Op2 (5 for DIT) encodes the PSTATE field modified and defines the constraints.
// CRm = Imm4 (#0 or #1 below)
// Rt = 0x1f
uint64_t armv8_set_dit(void) {
if (CRYPTO_is_ARMv8_DIT_capable()) {
uint64_t original_dit = armv8_get_dit();
// Encoding of "msr dit, #1"
__asm__ volatile(".inst 0xd503415f");
return original_dit;
} else {
return 0;
}
}
void armv8_restore_dit(volatile uint64_t *original_dit) {
if (*original_dit != 1 && CRYPTO_is_ARMv8_DIT_capable()) {
// Encoding of "msr dit, #0"
__asm__ volatile(".inst 0xd503405f");
}
}
void armv8_disable_dit(void) {
CRYPTO_STATIC_MUTEX_lock_write(OPENSSL_armcap_P_lock_bss_get());
OPENSSL_armcap_P &= ~ARMV8_DIT_ALLOWED;
CRYPTO_STATIC_MUTEX_unlock_write(OPENSSL_armcap_P_lock_bss_get());
}
void armv8_enable_dit(void) {
CRYPTO_STATIC_MUTEX_lock_write(OPENSSL_armcap_P_lock_bss_get());
OPENSSL_armcap_P |= ARMV8_DIT_ALLOWED;
CRYPTO_STATIC_MUTEX_unlock_write(OPENSSL_armcap_P_lock_bss_get());
}
int CRYPTO_is_ARMv8_DIT_capable_for_testing(void) {
return CRYPTO_is_ARMv8_DIT_capable();
}
#endif // AARCH64_DIT_SUPPORTED
#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP

View File

@@ -0,0 +1,31 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
#ifndef OPENSSL_HEADER_CPUCAP_CPU_AARCH64_H
#define OPENSSL_HEADER_CPUCAP_CPU_AARCH64_H
#if defined(__cplusplus)
extern "C" {
#endif
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if defined(OPENSSL_AARCH64) && !defined(OPENSSL_STATIC_ARMCAP)
// cpu_aarch64 contains common functions used across multiple cpu_aarch64_* files
// handle_cpu_env applies the value from |in| to the CPUID values in |out[0]|.
// See the comment in |OPENSSL_cpuid_setup| about this.
void handle_cpu_env(uint32_t *out, const char *in);
#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP
#if defined(__cplusplus)
}
#endif
#endif // OPENSSL_HEADER_CPUCAP_CPU_AARCH64_H

View File

@@ -0,0 +1,113 @@
// Copyright (c) 2021, Google Inc.
// SPDX-License-Identifier: ISC
#include "internal.h"
#if defined(OPENSSL_AARCH64) && defined(OPENSSL_APPLE) && \
!defined(OPENSSL_STATIC_ARMCAP)
#include <sys/sysctl.h>
#include <sys/types.h>
#include <openssl/arm_arch.h>
#include "cpu_aarch64.h"
static int has_hw_feature(const char *name) {
int value;
size_t len = sizeof(value);
if (sysctlbyname(name, &value, &len, NULL, 0) != 0) {
return 0;
}
if (len != sizeof(int)) {
// This should not happen. All the values queried should be integer-valued.
assert(0);
return 0;
}
// Per sys/sysctl.h:
//
// Selectors that return errors are not support on the system. Supported
// features will return 1 if they are recommended or 0 if they are supported
// but are not expected to help performance. Future versions of these
// selectors may return larger values as necessary so it is best to test for
// non zero.
return value != 0;
}
// This function compares the brand retrieved with the input string
// up to the length of the shortest of these 2 strings.
static int is_brand(const char *in_str) {
char brand[64];
size_t len = sizeof(brand);
if (sysctlbyname("machdep.cpu.brand_string", brand, &len, NULL, 0) != 0 ||
strncmp(brand, in_str, strnlen(in_str, len)) != 0) {
return 0;
}
if (len > sizeof(brand)) {
// This should not happen; too large of a brand for this function.
assert(0);
return 0;
}
return 1;
}
void OPENSSL_cpuid_setup(void) {
// Apple ARM64 platforms have NEON and cryptography extensions available
// statically, so we do not need to query them. In particular, there sometimes
// are no sysctls corresponding to such features. See below.
#if !defined(__ARM_NEON) || !defined(__ARM_FEATURE_AES) || \
!defined(__ARM_FEATURE_SHA2)
#error "NEON and crypto extensions should be statically available."
#endif
OPENSSL_armcap_P =
ARMV7_NEON | ARMV8_AES | ARMV8_PMULL | ARMV8_SHA1 | ARMV8_SHA256;
// See Apple's documentation for sysctl names:
// https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
//
// The new feature names, e.g. "hw.optional.arm.FEAT_SHA512", are only
// available in macOS 12. For compatibility with macOS 11, we also support
// the old names. The old names don't have values for features like FEAT_AES,
// so instead we detect them statically above.
//
// If querying new sysctls, update the Chromium sandbox definition. See
// https://crrev.com/c/4415225.
if (has_hw_feature("hw.optional.arm.FEAT_SHA512") ||
has_hw_feature("hw.optional.armv8_2_sha512")) {
OPENSSL_armcap_P |= ARMV8_SHA512;
}
if (has_hw_feature("hw.optional.armv8_2_sha3")) {
OPENSSL_armcap_P |= ARMV8_SHA3;
}
if (is_brand("Apple M")) {
OPENSSL_armcap_P |= ARMV8_APPLE_M;
}
if (has_hw_feature("hw.optional.arm.FEAT_DIT")) {
OPENSSL_armcap_P |= (ARMV8_DIT | ARMV8_DIT_ALLOWED);
}
// OPENSSL_armcap is a 32-bit, unsigned value which may start with "0x" to
// indicate a hex value. Prior to the 32-bit value, a '~' or '|' may be given.
//
// If the '~' prefix is present:
// the value is inverted and ANDed with the probed CPUID result
// If the '|' prefix is present:
// the value is ORed with the probed CPUID result
// Otherwise:
// the value is taken as the result of the CPUID
const char *env;
env = getenv("OPENSSL_armcap");
if (env != NULL) {
handle_cpu_env(&OPENSSL_armcap_P, env);
}
OPENSSL_cpucap_initialized = 1;
}
#endif // OPENSSL_AARCH64 && OPENSSL_APPLE && !OPENSSL_STATIC_ARMCAP

View File

@@ -0,0 +1,176 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
#include <openssl/crypto.h>
#include <gtest/gtest.h>
#include "internal.h"
#if defined(OPENSSL_THREADS)
#include <chrono>
#include <thread>
#endif
#if defined(AARCH64_DIT_SUPPORTED) && !defined(OPENSSL_STATIC_ARMCAP)
#if defined(ENABLE_AUTO_SET_RESET_DIT)
static void NestedMacroInvocation(uint64_t one) {
SET_DIT_AUTO_RESET;
uint64_t current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, one);
}
#endif // ENABLE_AUTO_SET_RESET_DIT
TEST(DITTest, SetReset) {
uint64_t one = CRYPTO_is_ARMv8_DIT_capable_for_testing()? (uint64_t)1 : (uint64_t)0;
uint64_t original_dit = 0, original_dit_2 = 0,
current_dit = 0;
original_dit = armv8_set_dit();
EXPECT_EQ(original_dit, (uint64_t)0);
// the case of a nested call of setting DIT
original_dit_2 = armv8_set_dit();
EXPECT_EQ(original_dit_2, one);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, one);
armv8_restore_dit(&original_dit);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, (uint64_t)0);
#if defined(ENABLE_AUTO_SET_RESET_DIT)
{ // invoke the macro within a scope
// to test that it restores the CPU DIT flag at the end
SET_DIT_AUTO_RESET;
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, one);
// Nested macro invocation will exit the scope leaving DIT = 1
NestedMacroInvocation(one);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, one);
}
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, (uint64_t)0);
#endif // ENABLE_AUTO_SET_RESET_DIT
}
#if defined(OPENSSL_THREADS)
TEST(DITTest, Threads) {
uint64_t one = CRYPTO_is_ARMv8_DIT_capable_for_testing()? (uint64_t)1 : (uint64_t)0;
{
// Test that the CPU DIT flag (bit in PSTATE register) is
// context-switched at the thread level.
std::thread thread1([&] {
uint64_t original_dit = 0, current_dit = 0;
original_dit = armv8_set_dit();
EXPECT_EQ(original_dit, (uint64_t)0);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, one);
// Sleep until thread2 starts, sets and resets DIT
std::this_thread::sleep_for(std::chrono::milliseconds(40));
// This thread should still see DIT=1
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, one);
armv8_restore_dit(&original_dit);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, (uint64_t)0);
});
std::thread thread2([&] {
uint64_t original_dit = 0, current_dit = 0;
original_dit = armv8_set_dit();
EXPECT_EQ(original_dit, (uint64_t)0);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, one);
armv8_restore_dit(&original_dit);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, (uint64_t)0);
});
thread2.join();
thread1.join();
}
{
// Test that the DIT runtime dis/enabler in OPENSSL_armcap_P is
// at the process level.
// (Trying to make the threads concurrent and synchronising them
// with sleep time was making the Thread Sanitizer warn about a
// a data race.)
std::thread thread1([&] {
uint64_t original_dit = 0, current_dit = 0;
original_dit = armv8_set_dit();
EXPECT_EQ(original_dit, (uint64_t)0);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, one);
armv8_restore_dit(&original_dit);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, (uint64_t)0);
armv8_disable_dit(); // disable DIT capability at run-time
});
thread1.join();
std::thread thread2([&] {
uint64_t original_dit = 0, current_dit = 0;
// DIT was disabled at runtime, so the DIT bit would be read as 0
EXPECT_EQ(CRYPTO_is_ARMv8_DIT_capable_for_testing(), 0);
original_dit = armv8_set_dit();
EXPECT_EQ(original_dit, (uint64_t)0);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, (uint64_t)0);
armv8_restore_dit(&original_dit);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, (uint64_t)0);
});
thread2.join();
std::thread thread3([&] {
armv8_enable_dit(); // enable back DIT capability at run-time
});
thread3.join();
std::thread thread4([&] {
uint64_t original_dit = 0, current_dit = 0;
EXPECT_EQ(CRYPTO_is_ARMv8_DIT_capable_for_testing(), (int)one);
original_dit = armv8_set_dit();
EXPECT_EQ(original_dit, (uint64_t)0);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, one);
armv8_restore_dit(&original_dit);
current_dit = armv8_get_dit();
EXPECT_EQ(current_dit, (uint64_t)0);
});
thread4.join();
}
}
#endif // OPENSSL_THREADS
#endif // AARCH64_DIT_SUPPORTED && !OPENSSL_STATIC_ARMCAP

View File

@@ -0,0 +1,52 @@
// Copyright (c) 2022, Google Inc.
// SPDX-License-Identifier: ISC
#include "internal.h"
#if defined(OPENSSL_AARCH64) && defined(OPENSSL_FREEBSD) && \
!defined(OPENSSL_STATIC_ARMCAP)
#include <machine/armreg.h>
#include <sys/types.h>
#include <openssl/arm_arch.h>
// ID_AA64ISAR0_*_VAL are defined starting FreeBSD 13.0. When FreeBSD
// 12.x is out of support, these compatibility macros can be removed.
#ifndef ID_AA64ISAR0_AES_VAL
#define ID_AA64ISAR0_AES_VAL ID_AA64ISAR0_AES
#endif
#ifndef ID_AA64ISAR0_SHA1_VAL
#define ID_AA64ISAR0_SHA1_VAL ID_AA64ISAR0_SHA1
#endif
#ifndef ID_AA64ISAR0_SHA2_VAL
#define ID_AA64ISAR0_SHA2_VAL ID_AA64ISAR0_SHA2
#endif
void OPENSSL_cpuid_setup(void) {
uint64_t id_aa64isar0 = READ_SPECIALREG(id_aa64isar0_el1);
OPENSSL_armcap_P |= ARMV7_NEON;
if (ID_AA64ISAR0_AES_VAL(id_aa64isar0) >= ID_AA64ISAR0_AES_BASE) {
OPENSSL_armcap_P |= ARMV8_AES;
}
if (ID_AA64ISAR0_AES_VAL(id_aa64isar0) >= ID_AA64ISAR0_AES_PMULL) {
OPENSSL_armcap_P |= ARMV8_PMULL;
}
if (ID_AA64ISAR0_SHA1_VAL(id_aa64isar0) >= ID_AA64ISAR0_SHA1_BASE) {
OPENSSL_armcap_P |= ARMV8_SHA1;
}
if (ID_AA64ISAR0_SHA2_VAL(id_aa64isar0) >= ID_AA64ISAR0_SHA2_BASE) {
OPENSSL_armcap_P |= ARMV8_SHA256;
}
if (ID_AA64ISAR0_SHA2_VAL(id_aa64isar0) >= ID_AA64ISAR0_SHA2_512) {
OPENSSL_armcap_P |= ARMV8_SHA512;
}
OPENSSL_cpucap_initialized = 1;
}
#endif // OPENSSL_AARCH64 && OPENSSL_FREEBSD && !OPENSSL_STATIC_ARMCAP

View File

@@ -0,0 +1,119 @@
// Copyright (c) 2016, Google Inc.
// SPDX-License-Identifier: ISC
#include "internal.h"
#if defined(OPENSSL_AARCH64) && defined(OPENSSL_LINUX) && \
!defined(OPENSSL_STATIC_ARMCAP)
#include <sys/auxv.h>
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <openssl/arm_arch.h>
#include "cpu_aarch64.h"
static uint64_t armv8_cpuid_probe(void) {
uint64_t val;
__asm__ volatile("mrs %0, MIDR_EL1" : "=r" (val));
return val;
}
void OPENSSL_cpuid_setup(void) {
unsigned long hwcap = getauxval(AT_HWCAP);
unsigned long hwcap2 = getauxval(AT_HWCAP2);
// See /usr/include/asm/hwcap.h on an aarch64 installation for the source of
// these values.
static const unsigned long kNEON = 1 << 1;
static const unsigned long kAES = 1 << 3;
static const unsigned long kPMULL = 1 << 4;
static const unsigned long kSHA1 = 1 << 5;
static const unsigned long kSHA256 = 1 << 6;
static const unsigned long kSHA512 = 1 << 21;
static const unsigned long kSHA3 = 1 << 17;
static const unsigned long kCPUID = 1 << 11;
static const unsigned long kRNGhwcap2 = 1 << 16;;
uint64_t OPENSSL_arm_midr = 0;
if ((hwcap & kNEON) == 0) {
// Matching OpenSSL, if NEON is missing, don't report other features
// either.
return;
}
OPENSSL_armcap_P |= ARMV7_NEON;
if (hwcap & kAES) {
OPENSSL_armcap_P |= ARMV8_AES;
}
if (hwcap & kPMULL) {
OPENSSL_armcap_P |= ARMV8_PMULL;
}
if (hwcap & kSHA1) {
OPENSSL_armcap_P |= ARMV8_SHA1;
}
if (hwcap & kSHA256) {
OPENSSL_armcap_P |= ARMV8_SHA256;
}
if (hwcap & kSHA512) {
OPENSSL_armcap_P |= ARMV8_SHA512;
}
if (hwcap & kSHA3) {
OPENSSL_armcap_P |= ARMV8_SHA3;
}
// Before calling armv8_cpuid_probe and reading from MIDR_EL1 check that it
// is supported. As of Valgrind 3.21 trying to read from that register will
// cause Valgrind to crash.
if (hwcap & kCPUID) {
OPENSSL_arm_midr = armv8_cpuid_probe();
if (MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N1)) {
OPENSSL_armcap_P |= ARMV8_NEOVERSE_N1;
}
if (MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V1)) {
OPENSSL_armcap_P |= ARMV8_NEOVERSE_V1;
// CPU capabilities of N1 are a subset of CPU capabilities of V1
OPENSSL_armcap_P |= ARMV8_NEOVERSE_N1;
}
if (MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_V2)) {
OPENSSL_armcap_P |= ARMV8_NEOVERSE_V2;
// CPU capabilities of N1 are a subset of CPU capabilities of V2
OPENSSL_armcap_P |= ARMV8_NEOVERSE_N1;
}
}
static const unsigned long kDIT = 1 << 24;
// Before setting/resetting the DIT flag, check it's available in HWCAP
if (hwcap & kDIT) {
OPENSSL_armcap_P |= (ARMV8_DIT | ARMV8_DIT_ALLOWED);
}
if (hwcap2 & kRNGhwcap2) {
OPENSSL_armcap_P |= ARMV8_RNG;
}
// OPENSSL_armcap is a 32-bit, unsigned value which may start with "0x" to
// indicate a hex value. Prior to the 32-bit value, a '~' or '|' may be given.
//
// If the '~' prefix is present:
// the value is inverted and ANDed with the probed CPUID result
// If the '|' prefix is present:
// the value is ORed with the probed CPUID result
// Otherwise:
// the value is taken as the result of the CPUID
const char *env;
env = getenv("OPENSSL_armcap");
if (env != NULL) {
handle_cpu_env(&OPENSSL_armcap_P, env);
}
OPENSSL_cpucap_initialized = 1;
}
#endif // OPENSSL_AARCH64 && OPENSSL_LINUX && !OPENSSL_STATIC_ARMCAP

View File

@@ -0,0 +1,136 @@
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
#include <openssl/cpu.h>
#if defined(OPENSSL_AARCH64) && defined(OPENSSL_NETBSD) && \
!defined(OPENSSL_STATIC_ARMCAP)
#include <aarch64/armreg.h>
#include <aarch64/cpu.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/param.h>
#include <sys/sysctl.h>
#include <openssl/arm_arch.h>
#include "internal.h"
// Helper function to query a specific CPU's capabilities
static int get_cpu_id(int cpu_num, struct aarch64_sysctl_cpu_id *id) {
char sysctl_name[64];
size_t len = sizeof(*id);
snprintf(sysctl_name, sizeof(sysctl_name), "machdep.cpu%d.cpu_id", cpu_num);
if (sysctlbyname(sysctl_name, id, &len, NULL, 0) < 0) {
return -1;
}
if (len != sizeof(*id)) {
return -1;
}
return 0;
}
void OPENSSL_cpuid_setup(void) {
struct aarch64_sysctl_cpu_id cpu_id;
// NetBSD's machdep.cpuN.cpu_id sysctl reads each core's ID registers
// directly, so it reflects that specific core's capabilities, not a
// system-wide minimum.
// Initialize with all features enabled (we'll AND them together)
uint64_t common_aa64isar0 = UINT64_MAX;
uint64_t common_aa64pfr0 = UINT64_MAX;
int found_cpu = 0;
// Query up to 256 CPUs (arbitrary but reasonable upper limit)
// Scan all possible CPU indices, tolerating gaps from offline/absent CPUs.
// On big.LITTLE systems, CPU indices may not be contiguous (e.g., an offline
// core creates a gap), so we must not stop at the first missing index.
// NOTE: This is still subject to a TOCTOU race if CPUs come online after
// this scan completes. The only fully correct solution would be a
// kernel-provided feature intersection (like Linux's AT_HWCAP).
for (size_t cpu_num = 0; cpu_num < 256; cpu_num++) {
if (get_cpu_id(cpu_num, &cpu_id) < 0) {
// Failed to read this CPU - either it doesn't exist or is offline.
// Continue scanning: there may be higher-indexed cores with different
// (potentially fewer) capabilities.
continue;
}
found_cpu++;
// Take the bitwise AND to get the intersection of capabilities
// Only features present on ALL cores will remain set
common_aa64isar0 &= cpu_id.ac_aa64isar0;
common_aa64pfr0 &= cpu_id.ac_aa64pfr0;
}
// If we couldn't read any CPU info, return early without setting any features
if (!found_cpu) {
return;
}
// NEON (Advanced SIMD) is mandatory on all ARMv8-A cores
OPENSSL_armcap_P |= ARMV7_NEON;
// Inspired by the implementation of `cpu_identify2` here:
// https://github.com/NetBSD/src/blob/62c785e59d064070166dab5d2a4492055effba89/sys/arch/aarch64/aarch64/cpu.c#L363
// Macros below found in "armreg.h"
// https://github.com/NetBSD/src/blame/62c785e59d064070166dab5d2a4492055effba89/sys/arch/aarch64/include/armreg.h
// Check for AES and PMULL
const uint64_t aes_detection =
__SHIFTOUT(common_aa64isar0, ID_AA64ISAR0_EL1_AES);
if (aes_detection >= ID_AA64ISAR0_EL1_AES_AES) {
OPENSSL_armcap_P |= ARMV8_AES;
}
if (aes_detection >= ID_AA64ISAR0_EL1_AES_PMUL) {
OPENSSL_armcap_P |= ARMV8_PMULL;
}
// Check for SHA1 support across all cores
if (__SHIFTOUT(common_aa64isar0, ID_AA64ISAR0_EL1_SHA1) >=
ID_AA64ISAR0_EL1_SHA1_SHA1CPMHSU) {
OPENSSL_armcap_P |= ARMV8_SHA1;
}
// Check for SHA256 support across all cores
const uint64_t sha2_detection =
__SHIFTOUT(common_aa64isar0, ID_AA64ISAR0_EL1_SHA2);
if (sha2_detection >= ID_AA64ISAR0_EL1_SHA2_SHA256HSU) {
OPENSSL_armcap_P |= ARMV8_SHA256;
}
if (sha2_detection >= ID_AA64ISAR0_EL1_SHA2_SHA512HSU) {
OPENSSL_armcap_P |= ARMV8_SHA512;
}
// Check for SHA3 support across all cores
if (__SHIFTOUT(common_aa64isar0, ID_AA64ISAR0_EL1_SHA3) >=
ID_AA64ISAR0_EL1_SHA3_EOR3) {
OPENSSL_armcap_P |= ARMV8_SHA3;
}
// Check for RNG (RNDR/RNDRRS) support across all cores
if (__SHIFTOUT(common_aa64isar0, ID_AA64ISAR0_EL1_RNDR) >=
ID_AA64ISAR0_EL1_RNDR_RNDRRS) {
OPENSSL_armcap_P |= ARMV8_RNG;
}
// Check for DIT (Data Independent Timing) support across all cores
if (__SHIFTOUT(common_aa64pfr0, ID_AA64PFR0_EL1_DIT) >=
ID_AA64PFR0_EL1_DIT_IMPL) {
OPENSSL_armcap_P |= (ARMV8_DIT | ARMV8_DIT_ALLOWED);
}
OPENSSL_cpucap_initialized = 1;
}
#endif // OPENSSL_AARCH64 && OPENSSL_NETBSD && !OPENSSL_STATIC_ARMCAP

View File

@@ -0,0 +1,49 @@
// Copyright (c) 2022, Robert Nagy <robert@openbsd.org>
// SPDX-License-Identifier: ISC
#include <openssl/cpu.h>
#if defined(OPENSSL_AARCH64) && defined(OPENSSL_OPENBSD) && \
!defined(OPENSSL_STATIC_ARMCAP)
#include <sys/sysctl.h>
#include <machine/cpu.h>
#include <machine/armreg.h>
#include <stdio.h>
#include <openssl/arm_arch.h>
#include "internal.h"
void OPENSSL_cpuid_setup(void) {
// CTL_MACHDEP from sys/sysctl.h
// CPU_ID_AA64ISAR0 from machine/cpu.h
int isar0_mib[] = { CTL_MACHDEP, CPU_ID_AA64ISAR0 };
size_t len = sizeof(uint64_t);
uint64_t cpu_id = 0;
if (sysctl(isar0_mib, 2, &cpu_id, &len, NULL, 0) < 0)
return;
OPENSSL_armcap_P |= ARMV7_NEON;
if (ID_AA64ISAR0_AES(cpu_id) >= ID_AA64ISAR0_AES_BASE)
OPENSSL_armcap_P |= ARMV8_AES;
if (ID_AA64ISAR0_AES(cpu_id) >= ID_AA64ISAR0_AES_PMULL)
OPENSSL_armcap_P |= ARMV8_PMULL;
if (ID_AA64ISAR0_SHA1(cpu_id) >= ID_AA64ISAR0_SHA1_BASE)
OPENSSL_armcap_P |= ARMV8_SHA1;
if (ID_AA64ISAR0_SHA2(cpu_id) >= ID_AA64ISAR0_SHA2_BASE)
OPENSSL_armcap_P |= ARMV8_SHA256;
if (ID_AA64ISAR0_SHA2(cpu_id) >= ID_AA64ISAR0_SHA2_512)
OPENSSL_armcap_P |= ARMV8_SHA512;
OPENSSL_cpucap_initialized = 1;
}
#endif // OPENSSL_AARCH64 && OPENSSL_OPENBSD && !OPENSSL_STATIC_ARMCAP

View File

@@ -0,0 +1,78 @@
// Copyright (c) 2023, Google Inc.
// SPDX-License-Identifier: ISC
#include "../../internal.h"
#if defined(OPENSSL_AARCH64) && !defined(OPENSSL_STATIC_ARMCAP) && \
defined(ANDROID_BAREMETAL)
#include <openssl/arm_arch.h>
#define ID_AA64PFR0_EL1_ADVSIMD 5
#define ID_AA64ISAR0_EL1_AES 1
#define ID_AA64ISAR0_EL1_SHA1 2
#define ID_AA64ISAR0_EL1_SHA2 3
#define NBITS_ID_FIELD 4
#define READ_SYSREG(name) \
({ \
uint64_t _r; \
__asm__("mrs %0, " name : "=r"(_r)); \
_r; \
})
static unsigned get_id_field(uint64_t reg, unsigned field) {
return (reg >> (field * NBITS_ID_FIELD)) & ((1 << NBITS_ID_FIELD) - 1);
}
static int get_signed_id_field(uint64_t reg, unsigned field) {
unsigned value = get_id_field(reg, field);
if (value & (1 << (NBITS_ID_FIELD - 1))) {
return (int)(value | (UINT64_MAX << NBITS_ID_FIELD));
} else {
return (int)value;
}
}
static uint32_t read_armcap(void) {
uint32_t armcap = ARMV7_NEON;
uint64_t id_aa64pfr0_el1 = READ_SYSREG("id_aa64pfr0_el1");
if (get_signed_id_field(id_aa64pfr0_el1, ID_AA64PFR0_EL1_ADVSIMD) < 0) {
// If AdvSIMD ("NEON") is missing, don't report other features either.
// This matches OpenSSL.
return 0;
}
uint64_t id_aa64isar0_el1 = READ_SYSREG("id_aa64isar0_el1");
unsigned aes = get_id_field(id_aa64isar0_el1, ID_AA64ISAR0_EL1_AES);
if (aes > 0) {
armcap |= ARMV8_AES;
}
if (aes > 1) {
armcap |= ARMV8_PMULL;
}
unsigned sha1 = get_id_field(id_aa64isar0_el1, ID_AA64ISAR0_EL1_SHA1);
if (sha1 > 0) {
armcap |= ARMV8_SHA1;
}
unsigned sha2 = get_id_field(id_aa64isar0_el1, ID_AA64ISAR0_EL1_SHA2);
if (sha2 > 0) {
armcap |= ARMV8_SHA256;
}
if (sha2 > 1) {
armcap |= ARMV8_SHA512;
}
return armcap;
}
void OPENSSL_cpuid_setup(void) { OPENSSL_armcap_P |= read_armcap(); }
#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP && ANDROID_BAREMETAL

View File

@@ -0,0 +1,31 @@
// Copyright (c) 2018, Google Inc.
// Copyright (c) 2020, Arm Ltd.
// SPDX-License-Identifier: ISC
#include "internal.h"
#if defined(OPENSSL_AARCH64) && defined(OPENSSL_WINDOWS) && \
!defined(OPENSSL_STATIC_ARMCAP)
#include <windows.h>
#include <openssl/arm_arch.h>
void OPENSSL_cpuid_setup(void) {
// We do not need to check for the presence of NEON, as Armv8-A always has it
OPENSSL_armcap_P |= ARMV7_NEON;
if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
// These are all covered by one call in Windows
OPENSSL_armcap_P |= ARMV8_AES;
OPENSSL_armcap_P |= ARMV8_PMULL;
OPENSSL_armcap_P |= ARMV8_SHA1;
OPENSSL_armcap_P |= ARMV8_SHA256;
}
// As of writing, Windows does not have a |PF_*| value for ARMv8.2 SHA-512
// extensions. When it does, add it here.
OPENSSL_cpucap_initialized = 1;
}
#endif // OPENSSL_AARCH64 && OPENSSL_WINDOWS && !OPENSSL_STATIC_ARMCAP

View File

@@ -0,0 +1,45 @@
// Copyright (c) 2022, Google Inc.
// SPDX-License-Identifier: ISC
#include "internal.h"
#if defined(OPENSSL_ARM) && defined(OPENSSL_FREEBSD) && \
!defined(OPENSSL_STATIC_ARMCAP)
#include <sys/auxv.h>
#include <sys/types.h>
#include <openssl/arm_arch.h>
#include <openssl/mem.h>
void OPENSSL_cpuid_setup(void) {
unsigned long hwcap = 0, hwcap2 = 0;
// |elf_aux_info| may fail, in which case |hwcap| and |hwcap2| will be
// left at zero. The rest of this function will then gracefully report
// the features are absent.
elf_aux_info(AT_HWCAP, &hwcap, sizeof(hwcap));
#if defined(AT_HWCAP2)
elf_aux_info(AT_HWCAP2, &hwcap2, sizeof(hwcap2));
#endif
// Matching OpenSSL, only report other features if NEON is present.
if (hwcap & HWCAP_NEON) {
OPENSSL_armcap_P |= ARMV7_NEON;
if (hwcap2 & HWCAP2_AES) {
OPENSSL_armcap_P |= ARMV8_AES;
}
if (hwcap2 & HWCAP2_PMULL) {
OPENSSL_armcap_P |= ARMV8_PMULL;
}
if (hwcap2 & HWCAP2_SHA1) {
OPENSSL_armcap_P |= ARMV8_SHA1;
}
if (hwcap2 & HWCAP2_SHA2) {
OPENSSL_armcap_P |= ARMV8_SHA256;
}
}
}
#endif // OPENSSL_ARM && OPENSSL_OPENBSD && !OPENSSL_STATIC_ARMCAP

View File

@@ -0,0 +1,143 @@
// Copyright (c) 2016, Google Inc.
// SPDX-License-Identifier: ISC
#include "internal.h"
#if defined(OPENSSL_ARM) && defined(OPENSSL_LINUX) && \
!defined(OPENSSL_STATIC_ARMCAP)
#include <errno.h>
#include <fcntl.h>
#include <sys/auxv.h>
#include <sys/types.h>
#include <unistd.h>
#include <openssl/arm_arch.h>
#include <openssl/mem.h>
#include "cpu_arm_linux.h"
static int open_eintr(const char *path, int flags) {
int ret;
do {
ret = open(path, flags);
} while (ret < 0 && errno == EINTR);
return ret;
}
static ssize_t read_eintr(int fd, void *out, size_t len) {
ssize_t ret;
do {
ret = read(fd, out, len);
} while (ret < 0 && errno == EINTR);
return ret;
}
// read_file opens |path| and reads until end-of-file. On success, it returns
// one and sets |*out_ptr| and |*out_len| to a newly-allocated buffer with the
// contents. Otherwise, it returns zero.
static int read_file(char **out_ptr, size_t *out_len, const char *path) {
int fd = open_eintr(path, O_RDONLY);
if (fd < 0) {
return 0;
}
static const size_t kReadSize = 1024;
int ret = 0;
size_t cap = kReadSize, len = 0;
char *buf = OPENSSL_malloc(cap);
if (buf == NULL) {
goto err;
}
for (;;) {
if (cap - len < kReadSize) {
size_t new_cap = cap * 2;
if (new_cap < cap) {
goto err;
}
char *new_buf = OPENSSL_realloc(buf, new_cap);
if (new_buf == NULL) {
goto err;
}
buf = new_buf;
cap = new_cap;
}
ssize_t bytes_read = read_eintr(fd, buf + len, kReadSize);
if (bytes_read < 0) {
goto err;
}
if (bytes_read == 0) {
break;
}
len += bytes_read;
}
*out_ptr = buf;
*out_len = len;
ret = 1;
buf = NULL;
err:
OPENSSL_free(buf);
close(fd);
return ret;
}
static int g_needs_hwcap2_workaround;
void OPENSSL_cpuid_setup(void) {
// We ignore the return value of |read_file| and proceed with an empty
// /proc/cpuinfo on error. If |getauxval| works, we will still detect
// capabilities.
char *cpuinfo_data = NULL;
size_t cpuinfo_len = 0;
read_file(&cpuinfo_data, &cpuinfo_len, "/proc/cpuinfo");
STRING_PIECE cpuinfo;
cpuinfo.data = cpuinfo_data;
cpuinfo.len = cpuinfo_len;
// Matching OpenSSL, only report other features if NEON is present.
unsigned long hwcap = getauxval(AT_HWCAP);
if (hwcap & HWCAP_NEON) {
OPENSSL_armcap_P |= ARMV7_NEON;
// Some ARMv8 Android devices don't expose AT_HWCAP2. Fall back to
// /proc/cpuinfo. See https://crbug.com/boringssl/46. As of February 2021,
// this is now rare (see Chrome's Net.NeedsHWCAP2Workaround metric), but AES
// and PMULL extensions are very useful, so we still carry the workaround
// for now.
#if defined(AT_HWCAP2)
unsigned long hwcap2 = getauxval(AT_HWCAP2);
#else
unsigned long hwcap2 = 0;
#endif
if (hwcap2 == 0) {
hwcap2 = crypto_get_arm_hwcap2_from_cpuinfo(&cpuinfo);
g_needs_hwcap2_workaround = hwcap2 != 0;
}
if (hwcap2 & HWCAP2_AES) {
OPENSSL_armcap_P |= ARMV8_AES;
}
if (hwcap2 & HWCAP2_PMULL) {
OPENSSL_armcap_P |= ARMV8_PMULL;
}
if (hwcap2 & HWCAP2_SHA1) {
OPENSSL_armcap_P |= ARMV8_SHA1;
}
if (hwcap2 & HWCAP2_SHA2) {
OPENSSL_armcap_P |= ARMV8_SHA256;
}
}
OPENSSL_free(cpuinfo_data);
OPENSSL_cpucap_initialized = 1;
}
int CRYPTO_has_broken_NEON(void) { return 0; }
int CRYPTO_needs_hwcap2_workaround(void) { return g_needs_hwcap2_workaround; }
#endif // OPENSSL_ARM && OPENSSL_LINUX && !OPENSSL_STATIC_ARMCAP

View File

@@ -0,0 +1,174 @@
// Copyright (c) 2018, Google Inc.
// SPDX-License-Identifier: ISC
#ifndef OPENSSL_HEADER_CRYPTO_CPU_ARM_LINUX_H
#define OPENSSL_HEADER_CRYPTO_CPU_ARM_LINUX_H
#include <openssl/base.h>
#include "../../internal.h"
#include <string.h>
#if defined(__cplusplus)
extern "C" {
#endif
// The cpuinfo parser lives in a header file so it may be accessible from
// cross-platform fuzzers without adding code to those platforms normally.
#if defined(HWCAP_NEON) && HWCAP_NEON != (1 << 12)
#error "HWCAP_NEON is defined but has wrong value (expected (1 << 12))"
#elif !defined(HWCAP_NEON)
#define HWCAP_NEON (1 << 12)
#endif
// See /usr/include/asm/hwcap.h on an ARM installation for the source of
// these values.
#if defined(HWCAP2_AES) && HWCAP2_AES != (1 << 0)
#error "HWCAP2_AES is defined but has wrong value (expected (1 << 0))"
#elif !defined(HWCAP2_AES)
#define HWCAP2_AES (1 << 0)
#endif
#if defined(HWCAP2_PMULL) && HWCAP2_PMULL != (1 << 1)
#error "HWCAP2_PMULL is defined but has wrong value (expected (1 << 1))"
#elif !defined(HWCAP2_PMULL)
#define HWCAP2_PMULL (1 << 1)
#endif
#if defined(HWCAP2_SHA1) && HWCAP2_SHA1 != (1 << 2)
#error "HWCAP2_SHA1 is defined but has wrong value (expected (1 << 2))"
#elif !defined(HWCAP2_SHA1)
#define HWCAP2_SHA1 (1 << 2)
#endif
#if defined(HWCAP2_SHA2) && HWCAP2_SHA2 != (1 << 3)
#error "HWCAP2_SHA2 is defined but has wrong value (expected (1 << 3))"
#elif !defined(HWCAP2_SHA2)
#define HWCAP2_SHA2 (1 << 3)
#endif
typedef struct {
const char *data;
size_t len;
} STRING_PIECE;
static int STRING_PIECE_equals(const STRING_PIECE *a, const char *b) {
size_t b_len = strlen(b);
return a->len == b_len && OPENSSL_memcmp(a->data, b, b_len) == 0;
}
// STRING_PIECE_split finds the first occurence of |sep| in |in| and, if found,
// sets |*out_left| and |*out_right| to |in| split before and after it. It
// returns one if |sep| was found and zero otherwise.
static int STRING_PIECE_split(STRING_PIECE *out_left, STRING_PIECE *out_right,
const STRING_PIECE *in, char sep) {
const char *p = (const char *)OPENSSL_memchr(in->data, sep, in->len);
if (p == NULL) {
return 0;
}
// |out_left| or |out_right| may alias |in|, so make a copy.
STRING_PIECE in_copy = *in;
out_left->data = in_copy.data;
out_left->len = p - in_copy.data;
out_right->data = in_copy.data + out_left->len + 1;
out_right->len = in_copy.len - out_left->len - 1;
return 1;
}
// STRING_PIECE_get_delimited reads a |sep|-delimited entry from |s|, writing it
// to |out| and updating |s| to point beyond it. It returns one on success and
// zero if |s| is empty. If |s| is has no copies of |sep| and is non-empty, it
// reads the entire string to |out|.
static int STRING_PIECE_get_delimited(STRING_PIECE *s, STRING_PIECE *out, char sep) {
if (s->len == 0) {
return 0;
}
if (!STRING_PIECE_split(out, s, s, sep)) {
// |s| had no instances of |sep|. Return the entire string.
*out = *s;
s->data += s->len;
s->len = 0;
}
return 1;
}
// STRING_PIECE_trim removes leading and trailing whitespace from |s|.
static void STRING_PIECE_trim(STRING_PIECE *s) {
while (s->len != 0 && (s->data[0] == ' ' || s->data[0] == '\t')) {
s->data++;
s->len--;
}
while (s->len != 0 &&
(s->data[s->len - 1] == ' ' || s->data[s->len - 1] == '\t')) {
s->len--;
}
}
// extract_cpuinfo_field extracts a /proc/cpuinfo field named |field| from
// |in|. If found, it sets |*out| to the value and returns one. Otherwise, it
// returns zero.
static int extract_cpuinfo_field(STRING_PIECE *out, const STRING_PIECE *in,
const char *field) {
// Process |in| one line at a time.
STRING_PIECE remaining = *in, line;
while (STRING_PIECE_get_delimited(&remaining, &line, '\n')) {
STRING_PIECE key, value;
if (!STRING_PIECE_split(&key, &value, &line, ':')) {
continue;
}
STRING_PIECE_trim(&key);
if (STRING_PIECE_equals(&key, field)) {
STRING_PIECE_trim(&value);
*out = value;
return 1;
}
}
return 0;
}
// has_list_item treats |list| as a space-separated list of items and returns
// one if |item| is contained in |list| and zero otherwise.
static int has_list_item(const STRING_PIECE *list, const char *item) {
STRING_PIECE remaining = *list, feature;
while (STRING_PIECE_get_delimited(&remaining, &feature, ' ')) {
if (STRING_PIECE_equals(&feature, item)) {
return 1;
}
}
return 0;
}
// crypto_get_arm_hwcap2_from_cpuinfo returns an equivalent ARM |AT_HWCAP2|
// value from |cpuinfo|.
static unsigned long crypto_get_arm_hwcap2_from_cpuinfo(
const STRING_PIECE *cpuinfo) {
STRING_PIECE features;
if (!extract_cpuinfo_field(&features, cpuinfo, "Features")) {
return 0;
}
unsigned long ret = 0;
if (has_list_item(&features, "aes")) {
ret |= HWCAP2_AES;
}
if (has_list_item(&features, "pmull")) {
ret |= HWCAP2_PMULL;
}
if (has_list_item(&features, "sha1")) {
ret |= HWCAP2_SHA1;
}
if (has_list_item(&features, "sha2")) {
ret |= HWCAP2_SHA2;
}
return ret;
}
#if defined(__cplusplus)
} // extern C
#endif
#endif // OPENSSL_HEADER_CRYPTO_CPU_ARM_LINUX_H

View File

@@ -0,0 +1,137 @@
// Copyright (c) 2018, Google Inc.
// SPDX-License-Identifier: ISC
#include "cpu_arm_linux.h"
#include <string.h>
#include <gtest/gtest.h>
TEST(ARMLinuxTest, CPUInfo) {
struct CPUInfoTest {
const char *cpuinfo;
unsigned long hwcap2;
} kTests[] = {
// Nexus 4 from https://crbug.com/341598#c43
{
"Processor : ARMv7 Processor rev 2 (v7l)\n"
"processor : 0\n"
"BogoMIPS : 13.53\n"
"\n"
"processor : 1\n"
"BogoMIPS : 13.53\n"
"\n"
"processor : 2\n"
"BogoMIPS : 13.53\n"
"\n"
"processor : 3\n"
"BogoMIPS : 13.53\n"
"\n"
"Features : swp half thumb fastmult vfp edsp neon vfpv3 tls "
"vfpv4 \n"
"CPU implementer : 0x51\n"
"CPU architecture: 7\n"
"CPU variant : 0x0\n"
"CPU part : 0x06f\n"
"CPU revision : 2\n"
"\n"
"Hardware : QCT APQ8064 MAKO\n"
"Revision : 000b\n"
"Serial : 0000000000000000\n",
0,
},
// Pixel 2 (truncated slightly)
{
"Processor : AArch64 Processor rev 1 (aarch64)\n"
"processor : 0\n"
"BogoMIPS : 38.00\n"
"Features : fp asimd evtstrm aes pmull sha1 sha2 crc32\n"
"CPU implementer : 0x51\n"
"CPU architecture: 8\n"
"CPU variant : 0xa\n"
"CPU part : 0x801\n"
"CPU revision : 4\n"
"\n"
"processor : 1\n"
"BogoMIPS : 38.00\n"
"Features : fp asimd evtstrm aes pmull sha1 sha2 crc32\n"
"CPU implementer : 0x51\n"
"CPU architecture: 8\n"
"CPU variant : 0xa\n"
"CPU part : 0x801\n"
"CPU revision : 4\n"
"\n"
"processor : 2\n"
"BogoMIPS : 38.00\n"
"Features : fp asimd evtstrm aes pmull sha1 sha2 crc32\n"
"CPU implementer : 0x51\n"
"CPU architecture: 8\n"
"CPU variant : 0xa\n"
"CPU part : 0x801\n"
"CPU revision : 4\n"
"\n"
"processor : 3\n"
"BogoMIPS : 38.00\n"
"Features : fp asimd evtstrm aes pmull sha1 sha2 crc32\n"
"CPU implementer : 0x51\n"
"CPU architecture: 8\n"
"CPU variant : 0xa\n"
"CPU part : 0x801\n"
"CPU revision : 4\n"
// (Extra processors omitted.)
"\n"
"Hardware : Qualcomm Technologies, Inc MSM8998\n",
HWCAP2_AES | HWCAP2_PMULL | HWCAP2_SHA1 | HWCAP2_SHA2,
},
// Garbage should be tolerated.
{
"Blah blah blah this is definitely an ARM CPU",
0,
},
// A hypothetical ARMv8 CPU without crc32 (and thus no trailing space
// after the last crypto entry).
{
"Features : aes pmull sha1 sha2\n"
"CPU architecture: 8\n",
HWCAP2_AES | HWCAP2_PMULL | HWCAP2_SHA1 | HWCAP2_SHA2,
},
// Various combinations of ARMv8 flags.
{
"Features : aes sha1 sha2\n"
"CPU architecture: 8\n",
HWCAP2_AES | HWCAP2_SHA1 | HWCAP2_SHA2,
},
{
"Features : pmull sha2\n"
"CPU architecture: 8\n",
HWCAP2_PMULL | HWCAP2_SHA2,
},
{
"Features : aes aes aes not_aes aes aes \n"
"CPU architecture: 8\n",
HWCAP2_AES,
},
{
"Features : \n"
"CPU architecture: 8\n",
0,
},
{
"Features : nothing\n"
"CPU architecture: 8\n",
0,
},
// If opening /proc/cpuinfo fails, we process the empty string.
{
"",
0,
},
};
for (const auto &t : kTests) {
SCOPED_TRACE(t.cpuinfo);
STRING_PIECE sp = {t.cpuinfo, strlen(t.cpuinfo)};
EXPECT_EQ(t.hwcap2, crypto_get_arm_hwcap2_from_cpuinfo(&sp));
}
}

View File

@@ -0,0 +1,306 @@
// Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) All rights reserved.
// SPDX-License-Identifier: Apache-2.0
#include <openssl/base.h>
#include "internal.h"
#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS
#endif
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if defined(_MSC_VER)
OPENSSL_MSVC_PRAGMA(warning(push, 3))
#include <immintrin.h>
#include <intrin.h>
OPENSSL_MSVC_PRAGMA(warning(pop))
#endif
// OPENSSL_cpuid runs the cpuid instruction. |leaf| is passed in as EAX and ECX
// is set to zero. It writes EAX, EBX, ECX, and EDX to |*out_eax| through
// |*out_edx|.
static void OPENSSL_cpuid(uint32_t *out_eax, uint32_t *out_ebx,
uint32_t *out_ecx, uint32_t *out_edx, uint32_t leaf) {
#if defined(_MSC_VER)
int tmp[4];
__cpuid(tmp, (int)leaf);
*out_eax = (uint32_t)tmp[0];
*out_ebx = (uint32_t)tmp[1];
*out_ecx = (uint32_t)tmp[2];
*out_edx = (uint32_t)tmp[3];
#elif defined(__pic__) && defined(OPENSSL_32_BIT)
// Inline assembly may not clobber the PIC register. For 32-bit, this is EBX.
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602.
__asm__ volatile (
"xor %%ecx, %%ecx\n"
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
: "=a"(*out_eax), "=D"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
: "a"(leaf)
);
#else
__asm__ volatile (
"xor %%ecx, %%ecx\n"
"cpuid\n"
: "=a"(*out_eax), "=b"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
: "a"(leaf)
);
#endif
}
// OPENSSL_xgetbv returns the value of an Intel Extended Control Register (XCR).
// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
static uint64_t OPENSSL_xgetbv(uint32_t xcr) {
#if defined(_MSC_VER)
return (uint64_t)_xgetbv(xcr);
#else
uint32_t eax, edx;
#if defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_AVX)
// Some old assemblers don't support the xgetbv instruction so we emit
// the opcode of xgetbv directly.
__asm__ volatile (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(xcr));
#else
__asm__ volatile ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
#endif
return (((uint64_t)edx) << 32) | eax;
#endif
}
static bool os_supports_avx512(uint64_t xcr0) {
#if defined(OPENSSL_APPLE)
// The Darwin kernel had a bug where it could corrupt the opmask registers.
// See
// https://community.intel.com/t5/Software-Tuning-Performance/MacOS-Darwin-kernel-bug-clobbers-AVX-512-opmask-register-state/m-p/1327259
// Darwin also does not initially set the XCR0 bits for AVX512, but they are
// set if the thread tries to use AVX512 anyway. Thus, to safely and
// consistently use AVX512 on macOS we'd need to check the kernel version as
// well as detect AVX512 support using a macOS-specific method. We don't
// bother with this, especially given Apple's transition to arm64.
return false;
#else
return (xcr0 & 0xe6) == 0xe6;
#endif
}
// handle_cpu_env applies the value from |in| to the CPUID values in |out[0]|
// and |out[1]|. See the comment in |OPENSSL_cpuid_setup| about this.
static void handle_cpu_env(uint32_t *out, const char *in) {
const int invert = in[0] == '~';
const int or = in[0] == '|';
const int skip_first_byte = invert || or;
const int hex = in[skip_first_byte] == '0' && in[skip_first_byte+1] == 'x';
uint32_t intelcap0 = out[0];
uint32_t intelcap1 = out[1];
int sscanf_result;
uint64_t v;
if (hex) {
sscanf_result = sscanf(in + skip_first_byte + 2, "%" PRIx64, &v);
} else {
sscanf_result = sscanf(in + skip_first_byte, "%" PRIu64, &v);
}
if (!sscanf_result) {
return;
}
uint32_t reqcap0 = (uint32_t)(v & UINT32_MAX);
uint32_t reqcap1 = (uint32_t)(v >> 32);
// Detect if the user is trying to use the environment variable to set
// a capability that is _not_ available on the CPU.
// The case of invert cannot enable an unexisting capability;
// it can only disable an existing one.
if (!invert && (intelcap0 || intelcap1)) {
// Allow Intel indicator bit to be set for testing
if((~(1u << 30 | intelcap0) & reqcap0) || (~intelcap1 & reqcap1)) {
fprintf(stderr,
"Fatal Error: HW capability found: 0x%02X 0x%02X, but HW capability requested: 0x%02X 0x%02X.\n",
intelcap0, intelcap1, reqcap0, reqcap1);
abort();
}
}
if (invert) {
out[0] &= ~reqcap0;
out[1] &= ~reqcap1;
} else if (or) {
out[0] |= reqcap0;
out[1] |= reqcap1;
} else {
out[0] = reqcap0;
out[1] = reqcap1;
}
}
extern uint8_t OPENSSL_cpucap_initialized;
static int amd_rdrand_maybe_apply_restrictions(const uint32_t family,
const uint32_t model) {
// Disable RDRAND on AMD families before 0x17 (Zen) due to reported failures
// after suspend. https://bugzilla.redhat.com/show_bug.cgi?id=1150286
// Also disable for family 0x17, models 0x700x7f, due to possible RDRAND
// failures there too.
if (family < 0x17 || (family == 0x17 && 0x70 <= model && model <= 0x7f)) {
return 1;
}
// Zen2 EPYC have prohibitively slow RDRAND implementations. Specifically,
// measured on the model EPYC 7R32. Please see q/VxC3AiwXpAjJ.
// We assume that slow implementations is universal to all AMD models based
// on the Zen2 uarch. Additionally, extend this assumptions to Zen1 based
// AMD models as well because Zen1 and Zen2 shares family number.
if (family == 0x17) {
return 1;
}
// No restrictions.
return 0;
}
void OPENSSL_cpuid_setup(void) {
// Determine the vendor and maximum input value.
uint32_t eax, ebx, ecx, edx;
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0);
uint32_t num_ids = eax;
int is_intel = ebx == 0x756e6547 /* Genu */ &&
edx == 0x49656e69 /* ineI */ &&
ecx == 0x6c65746e /* ntel */;
int is_amd = ebx == 0x68747541 /* Auth */ &&
edx == 0x69746e65 /* enti */ &&
ecx == 0x444d4163 /* cAMD */;
uint32_t extended_features[2] = {0};
if (num_ids >= 7) {
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 7);
extended_features[0] = ebx;
extended_features[1] = ecx;
}
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
if (is_amd) {
// See https://www.amd.com/system/files/TechDocs/25481.pdf, page 10.
const uint32_t base_family = (eax >> 8) & 15;
const uint32_t base_model = (eax >> 4) & 15;
uint32_t family = base_family;
uint32_t model = base_model;
if (base_family == 0xf) {
const uint32_t ext_family = (eax >> 20) & 255;
family += ext_family;
const uint32_t ext_model = (eax >> 16) & 15;
model |= ext_model << 4;
}
if (amd_rdrand_maybe_apply_restrictions(family, model) != 0) {
ecx &= ~(1u << 30);
}
}
// Force the hyper-threading bit so that the more conservative path is always
// chosen.
edx |= 1u << 28;
// Reserved bit #20 was historically repurposed to control the in-memory
// representation of RC4 state. Always set it to zero.
edx &= ~(1u << 20);
// Reserved bit #30 is repurposed to signal an Intel CPU.
if (is_intel) {
edx |= (1u << 30);
// Clear the XSAVE bit on Knights Landing to mimic Silvermont. This enables
// some Silvermont-specific codepaths which perform better. See OpenSSL
// commit 64d92d74985ebb3d0be58a9718f9e080a14a8e7f and
// |CRYPTO_cpu_perf_is_like_silvermont|.
if ((eax & 0x0fff0ff0) == 0x00050670 /* Knights Landing */ ||
(eax & 0x0fff0ff0) == 0x00080650 /* Knights Mill (per SDE) */) {
ecx &= ~(1u << 26);
}
} else {
edx &= ~(1u << 30);
}
// The SDBG bit is repurposed to denote AMD XOP support. Don't ever use AMD
// XOP code paths.
ecx &= ~(1u << 11);
uint64_t xcr0 = 0;
if (ecx & (1u << 27)) {
// XCR0 may only be queried if the OSXSAVE bit is set.
xcr0 = OPENSSL_xgetbv(0);
}
// See Intel manual, volume 1, section 14.3.
if ((xcr0 & 6) != 6) {
// YMM registers cannot be used.
ecx &= ~(1u << 28); // AVX
ecx &= ~(1u << 12); // FMA
ecx &= ~(1u << 11); // AMD XOP
// Clear AVX2 and AVX512* bits.
//
// TODO(davidben): Should bits 17 and 26-28 also be cleared? Upstream
// doesn't clear those. See the comments in
// |CRYPTO_hardware_supports_XSAVE|.
extended_features[0] &=
~((1u << 5) | (1u << 16) | (1u << 21) | (1u << 30) | (1u << 31));
}
// See Intel manual, volume 1, section 15.2.
if (!os_supports_avx512(xcr0)) {
// Clear AVX512F. Note we don't touch other AVX512 extensions because they
// can be used with YMM.
extended_features[0] &= ~(1u << 16);
}
// Disable ADX instructions on Knights Landing. See OpenSSL commit
// 64d92d74985ebb3d0be58a9718f9e080a14a8e7f.
if ((ecx & (1u << 26)) == 0) {
extended_features[0] &= ~(1u << 19);
}
OPENSSL_ia32cap_P[0] = edx;
OPENSSL_ia32cap_P[1] = ecx;
OPENSSL_ia32cap_P[2] = extended_features[0];
OPENSSL_ia32cap_P[3] = extended_features[1];
OPENSSL_cpucap_initialized = 1;
const char *env1, *env2;
env1 = getenv("OPENSSL_ia32cap");
if (env1 == NULL) {
return;
}
// OPENSSL_ia32cap can contain zero, one or two values, separated with a ':'.
// Each value is a 64-bit, unsigned value which may start with "0x" to
// indicate a hex value. Prior to the 64-bit value, a '~' or '|' may be given.
//
// If the '~' prefix is present:
// the value is inverted and ANDed with the probed CPUID result
// If the '|' prefix is present:
// the value is ORed with the probed CPUID result
// Otherwise:
// the value is taken as the result of the CPUID
//
// The first value determines OPENSSL_ia32cap_P[0] and [1]. The second [2]
// and [3].
handle_cpu_env(&OPENSSL_ia32cap_P[0], env1);
env2 = strchr(env1, ':');
if (env2 != NULL) {
handle_cpu_env(&OPENSSL_ia32cap_P[2], env2 + 1);
}
}
#endif // !OPENSSL_NO_ASM && (OPENSSL_X86 || OPENSSL_X86_64)

View File

@@ -0,0 +1,92 @@
// Copyright (c) 2016, Google Inc.
// SPDX-License-Identifier: ISC
#include <openssl/base.h>
#if defined(OPENSSL_PPC64LE)
#include <sys/auxv.h>
#if !defined(PPC_FEATURE2_HAS_VCRYPTO)
// PPC_FEATURE2_HAS_VCRYPTO was taken from section 4.1.2.3 of the “OpenPOWER
// ABI for Linux Supplement”.
#define PPC_FEATURE2_HAS_VCRYPTO 0x02000000
#endif
static void handle_cpu_env(unsigned long *out, const char *in) {
OPENSSL_STATIC_ASSERT(sizeof(unsigned long) == 8, PPC64LE_UNSIGNED_LONG_NOT_8_BYTES);
const int invert = in[0] == '~';
const int or = in[0] == '|';
const int skip_first_byte = (invert || or) ? 1 : 0;
const int hex = in[skip_first_byte] == '0' && in[skip_first_byte+1] == 'x';
unsigned long ppccap = *out;
int sscanf_result;
uint64_t reqcap;
if (hex) {
sscanf_result = sscanf(in + skip_first_byte + 2, "%" PRIx64, &reqcap);
} else {
sscanf_result = sscanf(in + skip_first_byte, "%" PRIu64, &reqcap);
}
if (!sscanf_result) {
return;
}
// Detect if the user is trying to use the environment variable to set
// a capability that is _not_ available on the CPU.
// The case of invert cannot enable an unexisting capability;
// it can only disable an existing one.
if (!invert && ppccap && (~ppccap & reqcap)) {
fprintf(stderr,
"Fatal Error: HW capability found: 0x%02lX, but HW capability requested: 0x%02lX.\n",
ppccap, reqcap);
abort();
}
if (invert) {
*out &= ~reqcap;
} else if (or) {
*out |= reqcap;
} else {
*out = reqcap;
}
}
extern uint8_t OPENSSL_cpucap_initialized;
void OPENSSL_cpuid_setup(void) {
#if defined(AT_HWCAP2)
#if defined(OPENSSL_LINUX)
OPENSSL_ppc64le_hwcap2 = getauxval(AT_HWCAP2);
#elif defined(OPENSSL_FREEBSD)
elf_aux_info(AT_HWCAP2, &OPENSSL_ppc64le_hwcap2, sizeof(OPENSSL_ppc64le_hwcap2));
#else
OPENSSL_ppc64le_hwcap2 = 0;
#endif
#endif
OPENSSL_cpucap_initialized = 1;
// OPENSSL_ppccap is a 64-bit hex string which may start with "0x".
// Prior to the value, a '~' or '|' may be given.
//
// If the '~' prefix is present:
// the value is inverted and ANDed with the probed CPUID result
// If the '|' prefix is present:
// the value is ORed with the probed CPUID result
// Otherwise:
// the value is taken as the result of the CPUID
const char *env;
env = getenv("OPENSSL_ppccap");
if (env != NULL) {
handle_cpu_env(&OPENSSL_ppc64le_hwcap2, env);
}
}
int CRYPTO_is_PPC64LE_vcrypto_capable(void) {
return (OPENSSL_ppc64le_hwcap2 & PPC_FEATURE2_HAS_VCRYPTO) != 0;
}
#endif // OPENSSL_PPC64LE

View File

@@ -0,0 +1,95 @@
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
// The contents of this file were copied from crypto/crypto.c to this file
// as part of the change that moves the CPU capability logic inside the FIPS
// module to satisfy the FIPS requirements.
#include <openssl/cpu.h>
// Our assembly does not use the GOT to reference symbols, which means
// references to visible symbols will often require a TEXTREL. This is
// undesirable, so all assembly-referenced symbols should be hidden. CPU
// capabilities are the only such symbols defined in C. Explicitly hide them,
// rather than rely on being built with -fvisibility=hidden.
#if defined(OPENSSL_WINDOWS)
#define HIDDEN
#else
#define HIDDEN __attribute__((visibility("hidden")))
#endif
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
// This value must be explicitly initialised to zero in order to work around a
// bug in libtool or the linker on OS X.
//
// If not initialised then it becomes a "common symbol". When put into an
// archive, linking on OS X will fail to resolve common symbols. By
// initialising it to zero, it becomes a "data symbol", which isn't so
// affected.
HIDDEN uint32_t OPENSSL_ia32cap_P[4] = {0};
#elif defined(OPENSSL_PPC64LE)
HIDDEN unsigned long OPENSSL_ppc64le_hwcap2 = 0;
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
#include <openssl/arm_arch.h>
#if defined(OPENSSL_STATIC_ARMCAP)
// See ARM ACLE for the definitions of these macros. Note |__ARM_FEATURE_AES|
// covers both AES and PMULL and |__ARM_FEATURE_SHA2| covers SHA-1 and SHA-256.
// https://developer.arm.com/architectures/system-architectures/software-standards/acle
// https://github.com/ARM-software/acle/issues/152
//
// TODO(davidben): Do we still need |OPENSSL_STATIC_ARMCAP_*| or are the
// standard flags and -march sufficient?
HIDDEN uint32_t OPENSSL_armcap_P =
#if defined(OPENSSL_STATIC_ARMCAP_NEON) || defined(__ARM_NEON)
ARMV7_NEON |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_AES) || defined(__ARM_FEATURE_AES)
ARMV8_AES |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_PMULL) || defined(__ARM_FEATURE_AES)
ARMV8_PMULL |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_SHA1) || defined(__ARM_FEATURE_SHA2)
ARMV8_SHA1 |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_SHA256) || defined(__ARM_FEATURE_SHA2)
ARMV8_SHA256 |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_SHA512) || defined(__ARM_FEATURE_SHA512)
ARMV8_SHA512 |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_SHA3) || defined(__ARM_FEATURE_SHA3)
ARMV8_SHA3 |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_NEOVERSE_N1) || defined(__ARM_FEATURE_NEOVERSE_N1)
ARMV8_NEOVERSE_N1 |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_NEOVERSE_V1) || defined(__ARM_FEATURE_NEOVERSE_V1)
ARMV8_NEOVERSE_V1 |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_NEOVERSE_V2) || defined(__ARM_FEATURE_NEOVERSE_V2)
ARMV8_NEOVERSE_V2 |
#endif
0;
#else
HIDDEN uint32_t OPENSSL_armcap_P = 0;
#endif
#endif
#if defined(BORINGSSL_DISPATCH_TEST)
// This value must be explicitly initialized to zero. See similar comment above.
HIDDEN uint8_t BORINGSSL_function_hit[15] = {0};
#endif // BORINGSSL_DISPATCH_TEST
// This variable is used only for testing purposes to ensure that the library
// constructor is executed and the capability variable is initialized.
HIDDEN uint8_t OPENSSL_cpucap_initialized = 0;

View File

@@ -0,0 +1,338 @@
// Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0 OR ISC
#ifndef OPENSSL_HEADER_CPUCAP_INTERNAL_H
#define OPENSSL_HEADER_CPUCAP_INTERNAL_H
#include <openssl/base.h>
#if defined(__cplusplus)
extern "C" {
#endif
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || defined(OPENSSL_ARM) || \
defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
#define HAS_OPENSSL_CPUID_SETUP
// OPENSSL_cpuid_setup initializes the platform-specific feature cache.
void OPENSSL_cpuid_setup(void);
#endif
// Runtime CPU feature support
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
// OPENSSL_ia32cap_P contains the Intel CPUID bits when running on an x86 or
// x86-64 system.
//
// Index 0:
// EDX for CPUID where EAX = 1
// Bit 20 is always zero
// Bit 28 is adjusted to reflect whether the data cache is shared between
// multiple logical cores
// Bit 30 is used to indicate an Intel CPU
// Index 1:
// ECX for CPUID where EAX = 1
// Bit 11 is used to indicate AMD XOP support, not SDBG
// Index 2:
// EBX for CPUID where EAX = 7
// Index 3:
// ECX for CPUID where EAX = 7
//
// Note: the CPUID bits are pre-adjusted for the OSXSAVE bit and the YMM and XMM
// bits in XCR0, so it is not necessary to check those. (WARNING: See caveats
// in cpu_intel.c.)
extern uint32_t OPENSSL_ia32cap_P[4];
#if defined(BORINGSSL_FIPS) && !defined(BORINGSSL_SHARED_LIBRARY)
// The FIPS module, as a static library, requires an out-of-line version of
// |OPENSSL_ia32cap_get| so accesses can be rewritten by delocate. Mark the
// function const so multiple accesses can be optimized together.
const uint32_t *OPENSSL_ia32cap_get(void) __attribute__((const));
#else
OPENSSL_INLINE const uint32_t *OPENSSL_ia32cap_get(void) {
return OPENSSL_ia32cap_P;
}
#endif
// See Intel manual, volume 2A, table 3-11.
OPENSSL_INLINE int CRYPTO_is_FXSR_capable(void) {
return (OPENSSL_ia32cap_get()[0] & (1 << 24)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_intel_cpu(void) {
// The reserved bit 30 is used to indicate an Intel CPU.
return (OPENSSL_ia32cap_get()[0] & (1 << 30)) != 0;
}
// See Intel manual, volume 2A, table 3-10.
OPENSSL_INLINE int CRYPTO_is_PCLMUL_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 1)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_SSSE3_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 9)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_SSE4_1_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 19)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_MOVBE_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 22)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_AESNI_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 25)) != 0;
}
// We intentionally avoid defining a |CRYPTO_is_XSAVE_capable| function. See
// |CRYPTO_cpu_perf_is_like_silvermont|.
OPENSSL_INLINE int CRYPTO_is_AVX_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 28)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_RDRAND_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1u << 30)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_AMD_XOP_support(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << 11)) != 0;
}
// See Intel manual, volume 2A, table 3-8.
OPENSSL_INLINE int CRYPTO_is_BMI1_capable(void) {
return (OPENSSL_ia32cap_get()[2] & (1 << 3)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_AVX2_capable(void) {
return (OPENSSL_ia32cap_get()[2] & (1 << 5)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_BMI2_capable(void) {
return (OPENSSL_ia32cap_get()[2] & (1 << 8)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ADX_capable(void) {
return (OPENSSL_ia32cap_get()[2] & (1 << 19)) != 0;
}
OPENSSL_INLINE int CRYPTO_is_SHAEXT_capable(void) {
return (OPENSSL_ia32cap_get()[2] & (1 << 29)) != 0;
}
// AVX512VL | AVX512BW | AVX512DQ | AVX512F
// 1u << 31 | 1u << 30 | 1u << 17 | 1u << 16
// 1100_0000_0000_0011_0000_0000_0000_0000
#define CPU_CAP_AVX512_BITFLAGS 0xC0030000
OPENSSL_INLINE int CRYPTO_is_AVX512_capable(void) {
return (OPENSSL_ia32cap_get()[2] & CPU_CAP_AVX512_BITFLAGS) == CPU_CAP_AVX512_BITFLAGS;
}
OPENSSL_INLINE int CRYPTO_is_VAES_capable(void) {
return (OPENSSL_ia32cap_get()[3] & (1u << (41 - 32))) != 0;
}
OPENSSL_INLINE int CRYPTO_is_VPCLMULQDQ_capable(void) {
return (OPENSSL_ia32cap_get()[3] & (1u << (42 - 32))) != 0;
}
// AVX512VL | AVX512BW | AVX512_IFMA | AVX512DQ | AVX512F
// 1u << 31 | 1u << 30 | 1u << 21 | 1u << 17 | 1u << 16
// 1100_0000_0010_0011_0000_0000_0000_0000
#define CPU_CAP_AVX512IFMA_BITFLAGS 0xC0230000
OPENSSL_INLINE int CRYPTO_is_AVX512IFMA_capable(void) {
return (OPENSSL_ia32cap_get()[2] & CPU_CAP_AVX512IFMA_BITFLAGS) ==
CPU_CAP_AVX512IFMA_BITFLAGS;
}
OPENSSL_INLINE int CRYPTO_is_VBMI2_capable(void) {
return (OPENSSL_ia32cap_get()[3] & (1 << 6)) != 0;
}
// CRYPTO_cpu_perf_is_like_silvermont returns one if, based on a heuristic, the
// CPU has Silvermont-like performance characteristics. It is often faster to
// run different codepaths on these CPUs than the available instructions would
// otherwise select. See chacha-x86_64.pl.
//
// Bonnell, Silvermont's predecessor in the Atom lineup, will also be matched by
// this. |OPENSSL_cpuid_setup| forces Knights Landing to also be matched by
// this. Goldmont (Silvermont's successor in the Atom lineup) added XSAVE so it
// isn't matched by this. Various sources indicate AMD first implemented MOVBE
// and XSAVE at the same time in Jaguar, so it seems like AMD chips will not be
// matched by this. That seems to be the case for other x86(-64) CPUs.
OPENSSL_INLINE int CRYPTO_cpu_perf_is_like_silvermont(void) {
// WARNING: This MUST NOT be used to guard the execution of the XSAVE
// instruction. This is the "hardware supports XSAVE" bit, not the OSXSAVE bit
// that indicates whether we can safely execute XSAVE. This bit may be set
// even when XSAVE is disabled (by the operating system). See the comment in
// cpu_intel.c and check how the users of this bit use it.
//
// We do not use |__XSAVE__| for static detection because the hack in
// |OPENSSL_cpuid_setup| for Knights Landing CPUs needs to override it.
int hardware_supports_xsave = (OPENSSL_ia32cap_get()[1] & (1u << 26)) != 0;
return !hardware_supports_xsave && CRYPTO_is_MOVBE_capable();
}
#endif // OPENSSL_X86 || OPENSSL_X86_64
#if defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
#if defined(OPENSSL_APPLE) && defined(OPENSSL_ARM)
// We do not detect any features at runtime for Apple's 32-bit ARM platforms. On
// 64-bit ARM, we detect some post-ARMv8.0 features.
#define OPENSSL_STATIC_ARMCAP
#endif
#include <openssl/arm_arch.h>
extern uint32_t OPENSSL_armcap_P;
extern uint8_t OPENSSL_cpucap_initialized;
// Normalize some older feature flags to their modern ACLE values.
// https://developer.arm.com/architectures/system-architectures/software-standards/acle
#if defined(__ARM_NEON__) && !defined(__ARM_NEON)
#define __ARM_NEON 1
#endif
#if defined(__ARM_FEATURE_CRYPTO)
#if !defined(__ARM_FEATURE_AES)
#define __ARM_FEATURE_AES 1
#endif
#if !defined(__ARM_FEATURE_SHA2)
#define __ARM_FEATURE_SHA2 1
#endif
#endif
// CRYPTO_is_NEON_capable returns true if the current CPU has a NEON unit.
// If this is known statically, it is a constant inline function.
// Otherwise, the capability is checked at runtime by checking the corresponding
// bit in |OPENSSL_armcap_P|. This is also the same for
// |CRYPTO_is_ARMv8_AES_capable| and |CRYPTO_is_ARMv8_PMULL_capable|
// for checking the support for AES and PMULL instructions, respectively.
OPENSSL_INLINE int CRYPTO_is_NEON_capable(void) {
return (OPENSSL_armcap_P & ARMV7_NEON) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_AES_capable(void) {
return (OPENSSL_armcap_P & ARMV8_AES) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_PMULL_capable(void) {
return (OPENSSL_armcap_P & ARMV8_PMULL) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_SHA1_capable(void) {
return (OPENSSL_armcap_P & ARMV8_SHA1) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_SHA256_capable(void) {
return (OPENSSL_armcap_P & ARMV8_SHA256) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_SHA512_capable(void) {
return (OPENSSL_armcap_P & ARMV8_SHA512) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_SHA3_capable(void) {
return (OPENSSL_armcap_P & ARMV8_SHA3) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_GCM_8x_capable(void) {
return (CRYPTO_is_ARMv8_SHA3_capable() &&
((OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 ||
(OPENSSL_armcap_P & ARMV8_NEOVERSE_V2) != 0 ||
(OPENSSL_armcap_P & ARMV8_APPLE_M) != 0));
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_wide_multiplier_capable(void) {
return (OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0 ||
(OPENSSL_armcap_P & ARMV8_NEOVERSE_V2) != 0 ||
(OPENSSL_armcap_P & ARMV8_APPLE_M) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_DIT_capable(void) {
return (OPENSSL_armcap_P & (ARMV8_DIT | ARMV8_DIT_ALLOWED)) ==
(ARMV8_DIT | ARMV8_DIT_ALLOWED);
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_RNDR_capable(void) {
return (OPENSSL_armcap_P & ARMV8_RNG) != 0;
}
OPENSSL_INLINE int CRYPTO_is_Neoverse_N1(void) {
// It is Neoverse N1 if only ARMV8_NEOVERSE_N1 = 1 and
// ARMV8_NEOVERSE_<V1|V2> = 0.
return ((OPENSSL_armcap_P & ARMV8_NEOVERSE_N1) != 0 &&
(OPENSSL_armcap_P & (ARMV8_NEOVERSE_V1 | ARMV8_NEOVERSE_V2)) == 0);
}
OPENSSL_INLINE int CRYPTO_is_Neoverse_V1(void) {
return (OPENSSL_armcap_P & ARMV8_NEOVERSE_V1) != 0;
}
OPENSSL_INLINE int CRYPTO_is_Neoverse_V2(void) {
return (OPENSSL_armcap_P & ARMV8_NEOVERSE_V2) != 0;
}
OPENSSL_INLINE int CRYPTO_is_ARMv8_Apple_M(void) {
return (OPENSSL_armcap_P & ARMV8_APPLE_M) != 0;
}
// This function is used only for testing; hence, not inlined
OPENSSL_EXPORT int CRYPTO_is_ARMv8_DIT_capable_for_testing(void);
#endif // OPENSSL_ARM || OPENSSL_AARCH64
#if defined(AARCH64_DIT_SUPPORTED)
// (TODO): See if we can detect the DIT capability in Windows environment
// armv8_get_dit gets the value of the DIT flag from the CPU.
OPENSSL_EXPORT uint64_t armv8_get_dit(void);
// armv8_set_dit sets the CPU DIT flag to 1 and returns its original value
// before it was called.
OPENSSL_EXPORT uint64_t armv8_set_dit(void);
// armv8_restore_dit takes as input a value to restore the CPU DIT flag to.
OPENSSL_EXPORT void armv8_restore_dit(volatile uint64_t *original_dit);
#if defined(ENABLE_AUTO_SET_RESET_DIT)
// SET_DIT_AUTO_RESET can be inserted in the caller's application at
// the beginning of the code section that makes repeated calls to AWS-LC
// functions. The flag will be automatically restored to its original value
// at the end of the scope.
// This can minimise the effect on performance of repeatedly setting and
// disabling DIT.
// Instead of the macro, the functions above can be used.
// An example of their usage is present in the benchmarking function
// `Speed()` in `tool/speed.cc` when the option `-dit` is passed in.
#define SET_DIT_AUTO_RESET \
volatile uint64_t _dit_restore_orig \
__attribute__((cleanup(armv8_restore_dit))) \
OPENSSL_UNUSED = armv8_set_dit();
#else
#define SET_DIT_AUTO_RESET
#endif // ENABLE_AUTO_SET_RESET_DIT
#else
#define SET_DIT_AUTO_RESET
#endif // AARCH64_DIT_SUPPORTED
#if defined(OPENSSL_PPC64LE)
// CRYPTO_is_PPC64LE_vcrypto_capable returns true iff the current CPU supports
// the Vector.AES category of instructions.
int CRYPTO_is_PPC64LE_vcrypto_capable(void);
extern unsigned long OPENSSL_ppc64le_hwcap2;
#endif // OPENSSL_PPC64LE
#if defined(__cplusplus)
}
#endif
#endif // OPENSSL_HEADER_CPUCAP_INTERNAL_H