chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

View File

@@ -0,0 +1,65 @@
// Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
#include <openssl/aes.h>
#include <assert.h>
#include "internal.h"
#include "../modes/internal.h"
// Be aware that different sets of AES functions use incompatible key
// representations, varying in format of the key schedule, the |AES_KEY.rounds|
// value, or both. Therefore they cannot mix. Also, on AArch64, the plain-C
// code, above, is incompatible with the |aes_hw_*| functions.
void AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
SET_DIT_AUTO_RESET;
if (hwaes_capable()) {
aes_hw_encrypt(in, out, key);
} else if (vpaes_capable()) {
vpaes_encrypt(in, out, key);
} else {
aes_nohw_encrypt(in, out, key);
}
}
void AES_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
SET_DIT_AUTO_RESET;
if (hwaes_capable()) {
aes_hw_decrypt(in, out, key);
} else if (vpaes_capable()) {
vpaes_decrypt(in, out, key);
} else {
aes_nohw_decrypt(in, out, key);
}
}
int AES_set_encrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
SET_DIT_AUTO_RESET;
if (bits != 128 && bits != 192 && bits != 256) {
return -2;
}
if (hwaes_capable()) {
return aes_hw_set_encrypt_key(key, bits, aeskey);
} else if (vpaes_capable()) {
return vpaes_set_encrypt_key(key, bits, aeskey);
} else {
return aes_nohw_set_encrypt_key(key, bits, aeskey);
}
}
int AES_set_decrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
SET_DIT_AUTO_RESET;
if (bits != 128 && bits != 192 && bits != 256) {
return -2;
}
if (hwaes_capable()) {
return aes_hw_set_decrypt_key(key, bits, aeskey);
} else if (vpaes_capable()) {
return vpaes_set_decrypt_key(key, bits, aeskey);
} else {
return aes_nohw_set_decrypt_key(key, bits, aeskey);
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,747 @@
// Copyright (c) 2015, Google Inc.
// SPDX-License-Identifier: ISC
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
#include <memory>
#include <vector>
#include <gtest/gtest.h>
#include <openssl/aes.h>
#include <openssl/cipher.h>
#include <openssl/rand.h>
#include "../../internal.h"
#include "../../test/abi_test.h"
#include "../../test/file_test.h"
#include "../../test/test_util.h"
#include "../../test/wycheproof_util.h"
#include "../cpucap/internal.h"
#include "internal.h"
// All test vectors use the default IV, so test both with implicit and
// explicit IV.
//
// TODO(davidben): Find test vectors that use a different IV.
static const uint8_t kDefaultIV[] = {
0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6,
};
static void TestRaw(FileTest *t) {
std::vector<uint8_t> key, plaintext, ciphertext;
ASSERT_TRUE(t->GetBytes(&key, "Key"));
ASSERT_TRUE(t->GetBytes(&plaintext, "Plaintext"));
ASSERT_TRUE(t->GetBytes(&ciphertext, "Ciphertext"));
ASSERT_EQ(static_cast<unsigned>(AES_BLOCK_SIZE), plaintext.size());
ASSERT_EQ(static_cast<unsigned>(AES_BLOCK_SIZE), ciphertext.size());
AES_KEY aes_key;
ASSERT_EQ(0, AES_set_encrypt_key(key.data(), 8 * key.size(), &aes_key));
// Test encryption.
uint8_t block[AES_BLOCK_SIZE];
AES_encrypt(plaintext.data(), block, &aes_key);
EXPECT_EQ(Bytes(ciphertext), Bytes(block));
// Test in-place encryption.
OPENSSL_memcpy(block, plaintext.data(), AES_BLOCK_SIZE);
AES_encrypt(block, block, &aes_key);
EXPECT_EQ(Bytes(ciphertext), Bytes(block));
ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes_key));
// Test decryption.
AES_decrypt(ciphertext.data(), block, &aes_key);
EXPECT_EQ(Bytes(plaintext), Bytes(block));
// Test in-place decryption.
OPENSSL_memcpy(block, ciphertext.data(), AES_BLOCK_SIZE);
AES_decrypt(block, block, &aes_key);
EXPECT_EQ(Bytes(plaintext), Bytes(block));
}
static void TestKeyWrap(FileTest *t) {
std::vector<uint8_t> key, plaintext, ciphertext;
ASSERT_TRUE(t->GetBytes(&key, "Key"));
ASSERT_TRUE(t->GetBytes(&plaintext, "Plaintext"));
ASSERT_TRUE(t->GetBytes(&ciphertext, "Ciphertext"));
ASSERT_EQ(plaintext.size() + 8, ciphertext.size())
<< "Invalid Plaintext and Ciphertext lengths.";
// Test encryption.
AES_KEY aes_key;
ASSERT_EQ(0, AES_set_encrypt_key(key.data(), 8 * key.size(), &aes_key));
// Test with implicit IV.
std::unique_ptr<uint8_t[]> buf(new uint8_t[ciphertext.size()]);
int len = AES_wrap_key(&aes_key, nullptr /* iv */, buf.get(),
plaintext.data(), plaintext.size());
ASSERT_GE(len, 0);
EXPECT_EQ(Bytes(ciphertext), Bytes(buf.get(), static_cast<size_t>(len)));
// Test with explicit IV.
OPENSSL_memset(buf.get(), 0, ciphertext.size());
len = AES_wrap_key(&aes_key, kDefaultIV, buf.get(), plaintext.data(),
plaintext.size());
ASSERT_GE(len, 0);
EXPECT_EQ(Bytes(ciphertext), Bytes(buf.get(), static_cast<size_t>(len)));
// Test decryption.
ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes_key));
// Test with implicit IV.
buf.reset(new uint8_t[plaintext.size()]);
len = AES_unwrap_key(&aes_key, nullptr /* iv */, buf.get(), ciphertext.data(),
ciphertext.size());
ASSERT_GE(len, 0);
EXPECT_EQ(Bytes(plaintext), Bytes(buf.get(), static_cast<size_t>(len)));
// Test with explicit IV.
OPENSSL_memset(buf.get(), 0, plaintext.size());
len = AES_unwrap_key(&aes_key, kDefaultIV, buf.get(), ciphertext.data(),
ciphertext.size());
ASSERT_GE(len, 0);
// Test corrupted ciphertext.
ciphertext[0] ^= 1;
EXPECT_EQ(-1, AES_unwrap_key(&aes_key, nullptr /* iv */, buf.get(),
ciphertext.data(), ciphertext.size()));
}
static void TestEVPKeyWrap(FileTest *t) {
std::vector<uint8_t> key, plaintext, ciphertext;
ASSERT_TRUE(t->GetBytes(&key, "Key"));
ASSERT_TRUE(t->GetBytes(&plaintext, "Plaintext"));
ASSERT_TRUE(t->GetBytes(&ciphertext, "Ciphertext"));
// Only 256 bit keys are supported for key wrap from EVP_CIPHER at the moment.
if (key.size() != 32) {
return;
}
const EVP_CIPHER *cipher = EVP_aes_256_wrap();
ASSERT_EQ(plaintext.size() + 8, ciphertext.size())
<< "Invalid Plaintext and Ciphertext lengths.";
// Test encryption.
std::vector<uint8_t> out(ciphertext.size());
int len;
// Test with implicit IV.
bssl::ScopedEVP_CIPHER_CTX ctx;
ASSERT_TRUE(
EVP_EncryptInit_ex(ctx.get(), cipher, nullptr, key.data(), nullptr));
ASSERT_TRUE(EVP_EncryptUpdate(ctx.get(), out.data(), &len, plaintext.data(),
plaintext.size()));
ASSERT_GE(len, 0);
ASSERT_TRUE(EVP_EncryptFinal(ctx.get(), out.data(), &len));
EXPECT_EQ(Bytes(ciphertext), Bytes(out));
// Test with explicit IV.
ctx.Reset();
ASSERT_TRUE(
EVP_EncryptInit_ex(ctx.get(), cipher, nullptr, key.data(), kDefaultIV));
ASSERT_TRUE(EVP_EncryptUpdate(ctx.get(), out.data(), &len, plaintext.data(),
plaintext.size()));
ASSERT_GE(len, 0);
ASSERT_TRUE(EVP_EncryptFinal(ctx.get(), out.data(), &len));
EXPECT_EQ(Bytes(ciphertext), Bytes(out));
// Test decryption.
out.clear();
out.resize(plaintext.size());
ctx.Reset();
// Test with implicit IV.
ASSERT_TRUE(
EVP_DecryptInit_ex(ctx.get(), cipher, nullptr, key.data(), nullptr));
ASSERT_TRUE(EVP_DecryptUpdate(ctx.get(), out.data(), &len, ciphertext.data(),
ciphertext.size()));
out.resize(len);
ASSERT_TRUE(EVP_EncryptFinal(ctx.get(), out.data(), &len));
EXPECT_EQ(Bytes(plaintext), Bytes(out));
// Test with explicit IV.
ctx.Reset();
ASSERT_TRUE(
EVP_DecryptInit_ex(ctx.get(), cipher, nullptr, key.data(), kDefaultIV));
ASSERT_TRUE(EVP_DecryptUpdate(ctx.get(), out.data(), &len, ciphertext.data(),
ciphertext.size()));
out.resize(len);
ASSERT_TRUE(EVP_EncryptFinal(ctx.get(), out.data(), &len));
EXPECT_EQ(Bytes(plaintext), Bytes(out));
// Test corrupted ciphertext.
ctx.Reset();
ciphertext[0] ^= 1;
ASSERT_TRUE(
EVP_DecryptInit_ex(ctx.get(), cipher, nullptr, key.data(), nullptr));
EXPECT_FALSE(EVP_DecryptUpdate(ctx.get(), out.data(), &len, ciphertext.data(),
ciphertext.size()));
}
static void TestKeyWrapWithPadding(FileTest *t) {
std::vector<uint8_t> key, plaintext, ciphertext;
ASSERT_TRUE(t->GetBytes(&key, "Key"));
ASSERT_TRUE(t->GetBytes(&plaintext, "Plaintext"));
ASSERT_TRUE(t->GetBytes(&ciphertext, "Ciphertext"));
// Test encryption.
AES_KEY aes_key;
ASSERT_EQ(0, AES_set_encrypt_key(key.data(), 8 * key.size(), &aes_key));
std::unique_ptr<uint8_t[]> buf(new uint8_t[plaintext.size() + 15]);
size_t len;
ASSERT_TRUE(AES_wrap_key_padded(&aes_key, buf.get(), &len,
plaintext.size() + 15, plaintext.data(),
plaintext.size()));
EXPECT_EQ(Bytes(ciphertext), Bytes(buf.get(), static_cast<size_t>(len)));
// Test decryption
ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes_key));
buf.reset(new uint8_t[ciphertext.size() - 8]);
ASSERT_TRUE(AES_unwrap_key_padded(&aes_key, buf.get(), &len,
ciphertext.size() - 8, ciphertext.data(),
ciphertext.size()));
ASSERT_EQ(len, plaintext.size());
EXPECT_EQ(Bytes(plaintext), Bytes(buf.get(), static_cast<size_t>(len)));
}
TEST(AESTest, TestVectors) {
FileTestGTest("crypto/fipsmodule/aes/aes_tests.txt", [](FileTest *t) {
if (t->GetParameter() == "Raw") {
TestRaw(t);
} else if (t->GetParameter() == "KeyWrap") {
TestKeyWrap(t);
TestEVPKeyWrap(t);
} else if (t->GetParameter() == "KeyWrapWithPadding") {
TestKeyWrapWithPadding(t);
} else {
ADD_FAILURE() << "Unknown mode " << t->GetParameter();
}
});
}
TEST(AESTest, WycheproofKeyWrap) {
FileTestGTest("third_party/wycheproof_testvectors/kw_test.txt",
[](FileTest *t) {
std::string key_size;
ASSERT_TRUE(t->GetInstruction(&key_size, "keySize"));
std::vector<uint8_t> ct, key, msg;
ASSERT_TRUE(t->GetBytes(&ct, "ct"));
ASSERT_TRUE(t->GetBytes(&key, "key"));
ASSERT_TRUE(t->GetBytes(&msg, "msg"));
ASSERT_EQ(static_cast<unsigned>(atoi(key_size.c_str())), key.size() * 8);
WycheproofResult result;
ASSERT_TRUE(GetWycheproofResult(t, &result));
if (result.IsValid()) {
ASSERT_GE(ct.size(), 8u);
AES_KEY aes;
ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes));
std::vector<uint8_t> out(ct.size() - 8);
int len = AES_unwrap_key(&aes, nullptr, out.data(), ct.data(), ct.size());
ASSERT_EQ(static_cast<int>(out.size()), len);
EXPECT_EQ(Bytes(msg), Bytes(out));
out.resize(msg.size() + 8);
ASSERT_EQ(0, AES_set_encrypt_key(key.data(), 8 * key.size(), &aes));
len = AES_wrap_key(&aes, nullptr, out.data(), msg.data(), msg.size());
ASSERT_EQ(static_cast<int>(out.size()), len);
EXPECT_EQ(Bytes(ct), Bytes(out));
} else {
AES_KEY aes;
ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes));
std::vector<uint8_t> out(ct.size() < 8 ? 0 : ct.size() - 8);
int len = AES_unwrap_key(&aes, nullptr, out.data(), ct.data(), ct.size());
EXPECT_EQ(-1, len);
}
});
}
TEST(AESTest, WycheproofEVPKeyWrap) {
FileTestGTest("third_party/wycheproof_testvectors/kw_test.txt",
[](FileTest *t) {
std::string key_size;
ASSERT_TRUE(t->GetInstruction(&key_size, "keySize"));
std::vector<uint8_t> ct, key, msg;
ASSERT_TRUE(t->GetBytes(&ct, "ct"));
ASSERT_TRUE(t->GetBytes(&key, "key"));
ASSERT_TRUE(t->GetBytes(&msg, "msg"));
ASSERT_EQ(static_cast<unsigned>(atoi(key_size.c_str())), key.size() * 8);
WycheproofResult result;
ASSERT_TRUE(GetWycheproofResult(t, &result));
// Only 256 bit keys are supported for key wrap from EVP_CIPHER at the
// moment.
if (key.size() != 32) {
return;
}
const EVP_CIPHER *cipher = EVP_aes_256_wrap();
if (result.IsValid()) {
ASSERT_GE(ct.size(), 8u);
bssl::ScopedEVP_CIPHER_CTX ctx;
std::vector<uint8_t> out(ct.size() - 8);
int len;
ASSERT_TRUE(
EVP_DecryptInit_ex(ctx.get(), cipher, nullptr, key.data(), nullptr));
ASSERT_TRUE(EVP_DecryptUpdate(ctx.get(), out.data(), &len, ct.data(),
ct.size()));
ASSERT_EQ(static_cast<int>(out.size()), len);
ASSERT_TRUE(EVP_EncryptFinal(ctx.get(), out.data(), &len));
EXPECT_EQ(Bytes(msg), Bytes(out));
ctx.Reset();
out.resize(msg.size() + 8);
ASSERT_TRUE(
EVP_EncryptInit_ex(ctx.get(), cipher, nullptr, key.data(), nullptr));
ASSERT_TRUE(EVP_EncryptUpdate(ctx.get(), out.data(), &len, msg.data(),
msg.size()));
ASSERT_EQ(static_cast<int>(out.size()), len);
ASSERT_TRUE(EVP_EncryptFinal(ctx.get(), out.data(), &len));
EXPECT_EQ(Bytes(ct), Bytes(out));
} else {
bssl::ScopedEVP_CIPHER_CTX ctx;
std::vector<uint8_t> out(ct.size() < 8 ? 0 : ct.size() - 8);
int len;
ASSERT_TRUE(
EVP_DecryptInit_ex(ctx.get(), cipher, nullptr, key.data(), nullptr));
if (!ct.empty()) {
EXPECT_FALSE(EVP_DecryptUpdate(ctx.get(), out.data(), &len, ct.data(),
ct.size()));
// There is no "Final" function for |EVP_aes_256_wrap|, so this will
// always return 1.
EXPECT_TRUE(EVP_EncryptFinal(ctx.get(), out.data(), &len));
} else {
// The EVP version of AES-KEY Wrap will return 1 if the ciphertext is
// NULL. This is consistent with OpenSSL behaviour.
EXPECT_EQ(EVP_DecryptUpdate(ctx.get(), out.data(), &len, ct.data(),
ct.size()), 1);
EXPECT_TRUE(EVP_EncryptFinal(ctx.get(), out.data(), &len));
}
}
});
}
TEST(AESTest, WycheproofKeyWrapWithPadding) {
FileTestGTest("third_party/wycheproof_testvectors/kwp_test.txt",
[](FileTest *t) {
std::string key_size;
ASSERT_TRUE(t->GetInstruction(&key_size, "keySize"));
std::vector<uint8_t> ct, key, msg;
ASSERT_TRUE(t->GetBytes(&ct, "ct"));
ASSERT_TRUE(t->GetBytes(&key, "key"));
ASSERT_TRUE(t->GetBytes(&msg, "msg"));
ASSERT_EQ(static_cast<unsigned>(atoi(key_size.c_str())), key.size() * 8);
WycheproofResult result;
ASSERT_TRUE(GetWycheproofResult(t, &result));
// Wycheproof contains test vectors with empty messages that it believes
// should pass. However, both RFC 5649 and SP 800-38F section 5.3.1 say that
// the minimum length is one. Therefore we consider test cases with an empty
// message to be invalid.
//
// Wycheproof marks various weak parameters as acceptable. We do not enforce
// policy in the library, so we map those flags to valid.
if (result.IsValid({"SmallKey", "WeakWrapping"}) && !msg.empty()) {
AES_KEY aes;
ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes));
std::vector<uint8_t> out(ct.size() - 8);
size_t len;
ASSERT_TRUE(AES_unwrap_key_padded(&aes, out.data(), &len, ct.size() - 8,
ct.data(), ct.size()));
EXPECT_EQ(Bytes(msg), Bytes(out.data(), len));
out.resize(msg.size() + 15);
ASSERT_EQ(0, AES_set_encrypt_key(key.data(), 8 * key.size(), &aes));
ASSERT_TRUE(AES_wrap_key_padded(&aes, out.data(), &len, msg.size() + 15,
msg.data(), msg.size()));
EXPECT_EQ(Bytes(ct), Bytes(out.data(), len));
} else {
AES_KEY aes;
ASSERT_EQ(0, AES_set_decrypt_key(key.data(), 8 * key.size(), &aes));
std::vector<uint8_t> out(ct.size());
size_t len;
ASSERT_FALSE(AES_unwrap_key_padded(&aes, out.data(), &len, ct.size(),
ct.data(), ct.size()));
}
});
}
TEST(AESTest, WrapBadLengths) {
uint8_t key[128/8] = {0};
AES_KEY aes;
ASSERT_EQ(0, AES_set_encrypt_key(key, 128, &aes));
// Input lengths to |AES_wrap_key| must be a multiple of 8 and at least 16.
static const size_t kLengths[] = {0, 1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 20};
for (size_t len : kLengths) {
SCOPED_TRACE(len);
std::vector<uint8_t> in(len);
std::vector<uint8_t> out(len + 8);
EXPECT_EQ(-1,
AES_wrap_key(&aes, nullptr, out.data(), in.data(), in.size()));
}
}
TEST(AESTest, InvalidKeySize) {
static const uint8_t kZero[8] = {0};
AES_KEY key;
EXPECT_LT(AES_set_encrypt_key(kZero, 42, &key), 0);
EXPECT_LT(AES_set_decrypt_key(kZero, 42, &key), 0);
}
#if defined(SUPPORTS_ABI_TEST)
TEST(AESTest, ABI) {
for (int bits : {128, 192, 256}) {
SCOPED_TRACE(bits);
const uint8_t kKey[256/8] = {0};
AES_KEY key;
uint8_t block[AES_BLOCK_SIZE];
uint8_t buf[AES_BLOCK_SIZE * 64] = {0};
std::vector<int> block_counts;
if (bits == 128) {
block_counts = {0, 1, 2, 3, 4, 8, 16, 31};
} else {
// Unwind tests are very slow. Assume that the various input sizes do not
// differ significantly by round count for ABI purposes.
block_counts = {0, 1, 8};
}
if (bsaes_capable()) {
ASSERT_EQ(vpaes_set_encrypt_key(kKey, bits, &key), 0);
CHECK_ABI(vpaes_encrypt_key_to_bsaes, &key, &key);
for (size_t blocks : block_counts) {
SCOPED_TRACE(blocks);
if (blocks != 0) {
CHECK_ABI(bsaes_ctr32_encrypt_blocks, buf, buf, blocks, &key, block);
}
}
ASSERT_EQ(vpaes_set_decrypt_key(kKey, bits, &key), 0);
CHECK_ABI(vpaes_decrypt_key_to_bsaes, &key, &key);
for (size_t blocks : block_counts) {
SCOPED_TRACE(blocks);
CHECK_ABI(bsaes_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key,
block, AES_DECRYPT);
}
}
if (vpaes_capable()) {
ASSERT_EQ(CHECK_ABI(vpaes_set_encrypt_key, kKey, bits, &key), 0);
CHECK_ABI(vpaes_encrypt, block, block, &key);
for (size_t blocks : block_counts) {
SCOPED_TRACE(blocks);
#if defined(VPAES_CBC)
CHECK_ABI(vpaes_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key,
block, AES_ENCRYPT);
#endif
#if defined(VPAES_CTR32)
CHECK_ABI(vpaes_ctr32_encrypt_blocks, buf, buf, blocks, &key, block);
#endif
}
ASSERT_EQ(CHECK_ABI(vpaes_set_decrypt_key, kKey, bits, &key), 0);
CHECK_ABI(vpaes_decrypt, block, block, &key);
#if defined(VPAES_CBC)
for (size_t blocks : block_counts) {
SCOPED_TRACE(blocks);
CHECK_ABI(vpaes_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key,
block, AES_DECRYPT);
}
#endif // VPAES_CBC
}
if (hwaes_capable()) {
ASSERT_EQ(CHECK_ABI(aes_hw_set_encrypt_key, kKey, bits, &key), 0);
CHECK_ABI(aes_hw_encrypt, block, block, &key);
for (size_t blocks : block_counts) {
SCOPED_TRACE(blocks);
CHECK_ABI(aes_hw_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key,
block, AES_ENCRYPT);
if (blocks == 0) {
// Without this initialization, valgrind complains
// about using an unitialized value.
for (size_t i = 0; i < 64; i++) {
buf[i] = i;
}
std::string buf_before = testing::PrintToString(Bytes(buf,64));
CHECK_ABI(aes_hw_ctr32_encrypt_blocks, buf, buf, blocks, &key, block);
EXPECT_EQ(buf_before, testing::PrintToString(Bytes(buf,64)));
}
CHECK_ABI(aes_hw_ctr32_encrypt_blocks, buf, buf, blocks, &key, block);
#if defined(HWAES_ECB)
CHECK_ABI(aes_hw_ecb_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key,
AES_ENCRYPT);
#endif
}
ASSERT_EQ(CHECK_ABI(aes_hw_set_decrypt_key, kKey, bits, &key), 0);
CHECK_ABI(aes_hw_decrypt, block, block, &key);
for (size_t blocks : block_counts) {
SCOPED_TRACE(blocks);
CHECK_ABI(aes_hw_cbc_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key,
block, AES_DECRYPT);
#if defined(HWAES_ECB)
CHECK_ABI(aes_hw_ecb_encrypt, buf, buf, AES_BLOCK_SIZE * blocks, &key,
AES_DECRYPT);
#endif
}
}
}
}
#endif // SUPPORTS_ABI_TEST
#if defined(BSAES) && !defined(BORINGSSL_SHARED_LIBRARY)
static Bytes AESKeyToBytes(const AES_KEY *key) {
return Bytes(reinterpret_cast<const uint8_t *>(key), sizeof(*key));
}
static uint8_t aes_ref_sub_byte(uint8_t b) {
static const uint8_t kSBox[256] = {
0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b,
0xfe, 0xd7, 0xab, 0x76, 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, 0xb7, 0xfd, 0x93, 0x26,
0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2,
0xeb, 0x27, 0xb2, 0x75, 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, 0x53, 0xd1, 0x00, 0xed,
0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f,
0x50, 0x3c, 0x9f, 0xa8, 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, 0xcd, 0x0c, 0x13, 0xec,
0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14,
0xde, 0x5e, 0x0b, 0xdb, 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, 0xe7, 0xc8, 0x37, 0x6d,
0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f,
0x4b, 0xbd, 0x8b, 0x8a, 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, 0xe1, 0xf8, 0x98, 0x11,
0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f,
0xb0, 0x54, 0xbb, 0x16,
};
return kSBox[b];
}
static uint32_t aes_ref_sub_word(uint32_t in) {
uint32_t a0 = aes_ref_sub_byte(in);
uint32_t a1 = aes_ref_sub_byte(in >> 8);
uint32_t a2 = aes_ref_sub_byte(in >> 16);
uint32_t a3 = aes_ref_sub_byte(in >> 24);
return a0 | (a1 << 8) | (a2 << 16) | (a3 << 24);
}
static int aes_ref_set_encrypt_key(const uint8_t *key, int key_bits,
AES_KEY *out) {
static const uint32_t kRCon[10] = {0x01, 0x02, 0x04, 0x08, 0x10,
0x20, 0x40, 0x80, 0x1b, 0x36};
switch (key_bits) {
case 128:
out->rounds = 10;
break;
case 192:
out->rounds = 12;
break;
case 256:
out->rounds = 14;
break;
default:
return 1;
}
size_t words = key_bits / 32;
size_t num_subkey_words = (out->rounds + 1) * 4;
OPENSSL_memcpy(out->rd_key, key, words * sizeof(uint32_t));
for (size_t i = words; i < num_subkey_words; i++) {
uint32_t tmp = out->rd_key[i - 1];
if (i % words == 0) {
tmp = aes_ref_sub_word(CRYPTO_rotr_u32(tmp, 8)) ^ kRCon[(i / words) - 1];
} else if (key_bits == 256 && i % 4 == 0) {
tmp = aes_ref_sub_word(tmp);
}
out->rd_key[i] = tmp ^ out->rd_key[i - words];
}
// The ARM bsaes implementation expects all the keys to be byteswapped.
for (size_t i = 0; i < num_subkey_words; i++) {
out->rd_key[i] = CRYPTO_bswap4(out->rd_key[i]);
}
return 0;
}
static void aes_ref_inv_mix_columns(uint32_t block[4]) {
// This tables was generated with the following Python script:
// clang-format off
/*
def mul_unreduced(a, b):
c = 0
for i in range(8):
if b & (1 << i):
c ^= a << i
return c
def mul(a, b):
c = mul_unreduced(a, b)
# c's highest term is at most x^14.
c = (c & 0xff) ^ mul_unreduced(c >> 8, 0b00011011)
# c's highest term is at most x^10.
c = (c & 0xff) ^ mul_unreduced(c >> 8, 0b00011011)
# c's highest term is at most x^7.
assert (c >> 8) == 0
return c
def inv_mix_column(a):
ret = 0
for b in [0x0e, 0x09, 0x0d, 0x0b]:
ret <<= 8
ret |= mul(a, b)
return ret
body = ", ".join("0x%08x" % inv_mix_column(a) for a in range(256))
print("static const uint32_t kTable[256] = {%s};\n" % body)
*/
// clang-format on
// kInvMixColumn[i] is the result of InvMixColumns applied to a column
// containing [i, 0, 0, 0]. (The contributions of the other positions are
// computed by rotating bytes.)
static const uint32_t kInvMixColumn[256] = {
0x00000000, 0x0e090d0b, 0x1c121a16, 0x121b171d, 0x3824342c, 0x362d3927,
0x24362e3a, 0x2a3f2331, 0x70486858, 0x7e416553, 0x6c5a724e, 0x62537f45,
0x486c5c74, 0x4665517f, 0x547e4662, 0x5a774b69, 0xe090d0b0, 0xee99ddbb,
0xfc82caa6, 0xf28bc7ad, 0xd8b4e49c, 0xd6bde997, 0xc4a6fe8a, 0xcaaff381,
0x90d8b8e8, 0x9ed1b5e3, 0x8ccaa2fe, 0x82c3aff5, 0xa8fc8cc4, 0xa6f581cf,
0xb4ee96d2, 0xbae79bd9, 0xdb3bbb7b, 0xd532b670, 0xc729a16d, 0xc920ac66,
0xe31f8f57, 0xed16825c, 0xff0d9541, 0xf104984a, 0xab73d323, 0xa57ade28,
0xb761c935, 0xb968c43e, 0x9357e70f, 0x9d5eea04, 0x8f45fd19, 0x814cf012,
0x3bab6bcb, 0x35a266c0, 0x27b971dd, 0x29b07cd6, 0x038f5fe7, 0x0d8652ec,
0x1f9d45f1, 0x119448fa, 0x4be30393, 0x45ea0e98, 0x57f11985, 0x59f8148e,
0x73c737bf, 0x7dce3ab4, 0x6fd52da9, 0x61dc20a2, 0xad766df6, 0xa37f60fd,
0xb16477e0, 0xbf6d7aeb, 0x955259da, 0x9b5b54d1, 0x894043cc, 0x87494ec7,
0xdd3e05ae, 0xd33708a5, 0xc12c1fb8, 0xcf2512b3, 0xe51a3182, 0xeb133c89,
0xf9082b94, 0xf701269f, 0x4de6bd46, 0x43efb04d, 0x51f4a750, 0x5ffdaa5b,
0x75c2896a, 0x7bcb8461, 0x69d0937c, 0x67d99e77, 0x3daed51e, 0x33a7d815,
0x21bccf08, 0x2fb5c203, 0x058ae132, 0x0b83ec39, 0x1998fb24, 0x1791f62f,
0x764dd68d, 0x7844db86, 0x6a5fcc9b, 0x6456c190, 0x4e69e2a1, 0x4060efaa,
0x527bf8b7, 0x5c72f5bc, 0x0605bed5, 0x080cb3de, 0x1a17a4c3, 0x141ea9c8,
0x3e218af9, 0x302887f2, 0x223390ef, 0x2c3a9de4, 0x96dd063d, 0x98d40b36,
0x8acf1c2b, 0x84c61120, 0xaef93211, 0xa0f03f1a, 0xb2eb2807, 0xbce2250c,
0xe6956e65, 0xe89c636e, 0xfa877473, 0xf48e7978, 0xdeb15a49, 0xd0b85742,
0xc2a3405f, 0xccaa4d54, 0x41ecdaf7, 0x4fe5d7fc, 0x5dfec0e1, 0x53f7cdea,
0x79c8eedb, 0x77c1e3d0, 0x65daf4cd, 0x6bd3f9c6, 0x31a4b2af, 0x3fadbfa4,
0x2db6a8b9, 0x23bfa5b2, 0x09808683, 0x07898b88, 0x15929c95, 0x1b9b919e,
0xa17c0a47, 0xaf75074c, 0xbd6e1051, 0xb3671d5a, 0x99583e6b, 0x97513360,
0x854a247d, 0x8b432976, 0xd134621f, 0xdf3d6f14, 0xcd267809, 0xc32f7502,
0xe9105633, 0xe7195b38, 0xf5024c25, 0xfb0b412e, 0x9ad7618c, 0x94de6c87,
0x86c57b9a, 0x88cc7691, 0xa2f355a0, 0xacfa58ab, 0xbee14fb6, 0xb0e842bd,
0xea9f09d4, 0xe49604df, 0xf68d13c2, 0xf8841ec9, 0xd2bb3df8, 0xdcb230f3,
0xcea927ee, 0xc0a02ae5, 0x7a47b13c, 0x744ebc37, 0x6655ab2a, 0x685ca621,
0x42638510, 0x4c6a881b, 0x5e719f06, 0x5078920d, 0x0a0fd964, 0x0406d46f,
0x161dc372, 0x1814ce79, 0x322bed48, 0x3c22e043, 0x2e39f75e, 0x2030fa55,
0xec9ab701, 0xe293ba0a, 0xf088ad17, 0xfe81a01c, 0xd4be832d, 0xdab78e26,
0xc8ac993b, 0xc6a59430, 0x9cd2df59, 0x92dbd252, 0x80c0c54f, 0x8ec9c844,
0xa4f6eb75, 0xaaffe67e, 0xb8e4f163, 0xb6edfc68, 0x0c0a67b1, 0x02036aba,
0x10187da7, 0x1e1170ac, 0x342e539d, 0x3a275e96, 0x283c498b, 0x26354480,
0x7c420fe9, 0x724b02e2, 0x605015ff, 0x6e5918f4, 0x44663bc5, 0x4a6f36ce,
0x587421d3, 0x567d2cd8, 0x37a10c7a, 0x39a80171, 0x2bb3166c, 0x25ba1b67,
0x0f853856, 0x018c355d, 0x13972240, 0x1d9e2f4b, 0x47e96422, 0x49e06929,
0x5bfb7e34, 0x55f2733f, 0x7fcd500e, 0x71c45d05, 0x63df4a18, 0x6dd64713,
0xd731dcca, 0xd938d1c1, 0xcb23c6dc, 0xc52acbd7, 0xef15e8e6, 0xe11ce5ed,
0xf307f2f0, 0xfd0efffb, 0xa779b492, 0xa970b999, 0xbb6bae84, 0xb562a38f,
0x9f5d80be, 0x91548db5, 0x834f9aa8, 0x8d4697a3};
// Note |block| is byte-swapped so block[i] >> 24 is the first element of
// block[i]. (See |aes_ref_set_encrypt_key|).
for (size_t i = 0; i < 4; i++) {
uint32_t in = block[i];
block[i] = kInvMixColumn[in >> 24];
block[i] ^= CRYPTO_rotr_u32(kInvMixColumn[(in >> 16) & 0xff], 8);
block[i] ^= CRYPTO_rotr_u32(kInvMixColumn[(in >> 8) & 0xff], 16);
block[i] ^= CRYPTO_rotr_u32(kInvMixColumn[in & 0xff], 24);
}
}
static int aes_ref_set_decrypt_key(const uint8_t *key, int bits, AES_KEY *out) {
if (aes_ref_set_encrypt_key(key, bits, out) != 0) {
return 1;
}
// bsaes expects the decryption round keys in reverse order. Note there are
// |out->rounds + 1| round keys.
for (size_t i = 0; i < out->rounds / 2; i++) {
std::swap(out->rd_key[4 * i], out->rd_key[4 * (out->rounds - i)]);
std::swap(out->rd_key[4 * i + 1], out->rd_key[4 * (out->rounds - i) + 1]);
std::swap(out->rd_key[4 * i + 2], out->rd_key[4 * (out->rounds - i) + 2]);
std::swap(out->rd_key[4 * i + 3], out->rd_key[4 * (out->rounds - i) + 3]);
}
// bsaes expects round keys other than the first and last to have
// InvMixColumns applied.
for (size_t i = 1; i < out->rounds; i++) {
aes_ref_inv_mix_columns(out->rd_key + 4 * i);
}
return 0;
}
TEST(AESTest, VPAESToBSAESConvert) {
if (!vpaes_capable()) {
GTEST_SKIP();
}
const int kNumIterations = 1000;
for (int i = 0; i < kNumIterations; i++) {
uint8_t key[256 / 8];
RAND_bytes(key, sizeof(key));
SCOPED_TRACE(Bytes(key));
for (unsigned bits : {128u, 192u, 256u}) {
SCOPED_TRACE(bits);
for (bool enc : {false, true}) {
SCOPED_TRACE(enc);
AES_KEY ref, vpaes, bsaes;
OPENSSL_memset(&ref, 0xaa, sizeof(ref));
OPENSSL_memset(&vpaes, 0xaa, sizeof(vpaes));
OPENSSL_memset(&bsaes, 0xaa, sizeof(bsaes));
if (enc) {
ASSERT_EQ(0, aes_ref_set_encrypt_key(key, bits, &ref));
ASSERT_EQ(0, vpaes_set_encrypt_key(key, bits, &vpaes));
vpaes_encrypt_key_to_bsaes(&bsaes, &vpaes);
} else {
ASSERT_EQ(0, aes_ref_set_decrypt_key(key, bits, &ref));
ASSERT_EQ(0, vpaes_set_decrypt_key(key, bits, &vpaes));
vpaes_decrypt_key_to_bsaes(&bsaes, &vpaes);
}
// Although not fatal, stop running if this fails, otherwise we'll spam
// the user's console.
ASSERT_EQ(AESKeyToBytes(&ref), AESKeyToBytes(&bsaes));
// Repeat the test in-place.
OPENSSL_memcpy(&bsaes, &vpaes, sizeof(AES_KEY));
if (enc) {
vpaes_encrypt_key_to_bsaes(&bsaes, &bsaes);
} else {
vpaes_decrypt_key_to_bsaes(&bsaes, &bsaes);
}
ASSERT_EQ(AESKeyToBytes(&ref), AESKeyToBytes(&bsaes));
}
}
}
}
#endif // BSAES && !SHARED_LIBRARY

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,923 @@
#! /usr/bin/env perl
# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
######################################################################
## Constant-time SSSE3 AES core implementation.
## version 0.1
##
## By Mike Hamburg (Stanford University), 2009
## Public domain.
##
## For details see http://shiftleft.org/papers/vector_aes/ and
## http://crypto.stanford.edu/vpaes/.
######################################################################
# September 2011.
#
# Port vpaes-x86_64.pl as 32-bit "almost" drop-in replacement for
# aes-586.pl. "Almost" refers to the fact that AES_cbc_encrypt
# doesn't handle partial vectors (doesn't have to if called from
# EVP only). "Drop-in" implies that this module doesn't share key
# schedule structure with the original nor does it make assumption
# about its alignment...
#
# Performance summary. aes-586.pl column lists large-block CBC
# encrypt/decrypt/with-hyper-threading-off(*) results in cycles per
# byte processed with 128-bit key, and vpaes-x86.pl column - [also
# large-block CBC] encrypt/decrypt.
#
# aes-586.pl vpaes-x86.pl
#
# Core 2(**) 28.1/41.4/18.3 21.9/25.2(***)
# Nehalem 27.9/40.4/18.1 10.2/11.9
# Atom 70.7/92.1/60.1 61.1/75.4(***)
# Silvermont 45.4/62.9/24.1 49.2/61.1(***)
#
# (*) "Hyper-threading" in the context refers rather to cache shared
# among multiple cores, than to specifically Intel HTT. As vast
# majority of contemporary cores share cache, slower code path
# is common place. In other words "with-hyper-threading-off"
# results are presented mostly for reference purposes.
#
# (**) "Core 2" refers to initial 65nm design, a.k.a. Conroe.
#
# (***) Less impressive improvement on Core 2 and Atom is due to slow
# pshufb, yet it's respectable +28%/64% improvement on Core 2
# and +15% on Atom (as implied, over "hyper-threading-safe"
# code path).
#
# <appro@openssl.org>
# The first two arguments should always be the flavour and output file path.
if ($#ARGV < 1) { die "Not enough arguments provided.
Two arguments are necessary: the flavour and the output file path."; }
$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
push(@INC,"${dir}","${dir}../../../perlasm");
require "x86asm.pl";
$output = $ARGV[1];
open OUT,">$output";
*STDOUT=*OUT;
&asm_init($ARGV[0]);
$PREFIX="vpaes";
my ($round, $base, $magic, $key, $const, $inp, $out)=
("eax", "ebx", "ecx", "edx","ebp", "esi","edi");
&preprocessor_ifdef("BORINGSSL_DISPATCH_TEST")
&external_label("BORINGSSL_function_hit");
&preprocessor_endif();
&static_label("_vpaes_consts");
&static_label("_vpaes_schedule_low_round");
&set_label("_vpaes_consts",64);
$k_inv=-0x30; # inv, inva
&data_word(0x0D080180,0x0E05060F,0x0A0B0C02,0x04070309);
&data_word(0x0F0B0780,0x01040A06,0x02050809,0x030D0E0C);
$k_s0F=-0x10; # s0F
&data_word(0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F,0x0F0F0F0F);
$k_ipt=0x00; # input transform (lo, hi)
&data_word(0x5A2A7000,0xC2B2E898,0x52227808,0xCABAE090);
&data_word(0x317C4D00,0x4C01307D,0xB0FDCC81,0xCD80B1FC);
$k_sb1=0x20; # sb1u, sb1t
&data_word(0xCB503E00,0xB19BE18F,0x142AF544,0xA5DF7A6E);
&data_word(0xFAE22300,0x3618D415,0x0D2ED9EF,0x3BF7CCC1);
$k_sb2=0x40; # sb2u, sb2t
&data_word(0x0B712400,0xE27A93C6,0xBC982FCD,0x5EB7E955);
&data_word(0x0AE12900,0x69EB8840,0xAB82234A,0xC2A163C8);
$k_sbo=0x60; # sbou, sbot
&data_word(0x6FBDC700,0xD0D26D17,0xC502A878,0x15AABF7A);
&data_word(0x5FBB6A00,0xCFE474A5,0x412B35FA,0x8E1E90D1);
$k_mc_forward=0x80; # mc_forward
&data_word(0x00030201,0x04070605,0x080B0A09,0x0C0F0E0D);
&data_word(0x04070605,0x080B0A09,0x0C0F0E0D,0x00030201);
&data_word(0x080B0A09,0x0C0F0E0D,0x00030201,0x04070605);
&data_word(0x0C0F0E0D,0x00030201,0x04070605,0x080B0A09);
$k_mc_backward=0xc0; # mc_backward
&data_word(0x02010003,0x06050407,0x0A09080B,0x0E0D0C0F);
&data_word(0x0E0D0C0F,0x02010003,0x06050407,0x0A09080B);
&data_word(0x0A09080B,0x0E0D0C0F,0x02010003,0x06050407);
&data_word(0x06050407,0x0A09080B,0x0E0D0C0F,0x02010003);
$k_sr=0x100; # sr
&data_word(0x03020100,0x07060504,0x0B0A0908,0x0F0E0D0C);
&data_word(0x0F0A0500,0x030E0904,0x07020D08,0x0B06010C);
&data_word(0x0B020900,0x0F060D04,0x030A0108,0x070E050C);
&data_word(0x070A0D00,0x0B0E0104,0x0F020508,0x0306090C);
$k_rcon=0x140; # rcon
&data_word(0xAF9DEEB6,0x1F8391B9,0x4D7C7D81,0x702A9808);
$k_s63=0x150; # s63: all equal to 0x63 transformed
&data_word(0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B,0x5B5B5B5B);
$k_opt=0x160; # output transform
&data_word(0xD6B66000,0xFF9F4929,0xDEBE6808,0xF7974121);
&data_word(0x50BCEC00,0x01EDBD51,0xB05C0CE0,0xE10D5DB1);
$k_deskew=0x180; # deskew tables: inverts the sbox's "skew"
&data_word(0x47A4E300,0x07E4A340,0x5DBEF91A,0x1DFEB95A);
&data_word(0x83EA6900,0x5F36B5DC,0xF49D1E77,0x2841C2AB);
##
## Decryption stuff
## Key schedule constants
##
$k_dksd=0x1a0; # decryption key schedule: invskew x*D
&data_word(0xA3E44700,0xFEB91A5D,0x5A1DBEF9,0x0740E3A4);
&data_word(0xB5368300,0x41C277F4,0xAB289D1E,0x5FDC69EA);
$k_dksb=0x1c0; # decryption key schedule: invskew x*B
&data_word(0x8550D500,0x9A4FCA1F,0x1CC94C99,0x03D65386);
&data_word(0xB6FC4A00,0x115BEDA7,0x7E3482C8,0xD993256F);
$k_dkse=0x1e0; # decryption key schedule: invskew x*E + 0x63
&data_word(0x1FC9D600,0xD5031CCA,0x994F5086,0x53859A4C);
&data_word(0x4FDC7BE8,0xA2319605,0x20B31487,0xCD5EF96A);
$k_dks9=0x200; # decryption key schedule: invskew x*9
&data_word(0x7ED9A700,0xB6116FC8,0x82255BFC,0x4AED9334);
&data_word(0x27143300,0x45765162,0xE9DAFDCE,0x8BB89FAC);
##
## Decryption stuff
## Round function constants
##
$k_dipt=0x220; # decryption input transform
&data_word(0x0B545F00,0x0F505B04,0x114E451A,0x154A411E);
&data_word(0x60056500,0x86E383E6,0xF491F194,0x12771772);
$k_dsb9=0x240; # decryption sbox output *9*u, *9*t
&data_word(0x9A86D600,0x851C0353,0x4F994CC9,0xCAD51F50);
&data_word(0xECD74900,0xC03B1789,0xB2FBA565,0x725E2C9E);
$k_dsbd=0x260; # decryption sbox output *D*u, *D*t
&data_word(0xE6B1A200,0x7D57CCDF,0x882A4439,0xF56E9B13);
&data_word(0x24C6CB00,0x3CE2FAF7,0x15DEEFD3,0x2931180D);
$k_dsbb=0x280; # decryption sbox output *B*u, *B*t
&data_word(0x96B44200,0xD0226492,0xB0F2D404,0x602646F6);
&data_word(0xCD596700,0xC19498A6,0x3255AA6B,0xF3FF0C3E);
$k_dsbe=0x2a0; # decryption sbox output *E*u, *E*t
&data_word(0x26D4D000,0x46F29296,0x64B4F6B0,0x22426004);
&data_word(0xFFAAC100,0x0C55A6CD,0x98593E32,0x9467F36B);
$k_dsbo=0x2c0; # decryption sbox final output
&data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9);
&data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159);
&asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)");
&align (64);
&function_begin_B("_vpaes_preheat");
&add ($const,&DWP(0,"esp"));
&movdqa ("xmm7",&QWP($k_inv,$const));
&movdqa ("xmm6",&QWP($k_s0F,$const));
&ret ();
&function_end_B("_vpaes_preheat");
##
## _aes_encrypt_core
##
## AES-encrypt %xmm0.
##
## Inputs:
## %xmm0 = input
## %xmm6-%xmm7 as in _vpaes_preheat
## (%edx) = scheduled keys
##
## Output in %xmm0
## Clobbers %xmm1-%xmm5, %eax, %ebx, %ecx, %edx
##
##
&function_begin_B("_vpaes_encrypt_core");
&mov ($magic,16);
&mov ($round,&DWP(240,$key));
&movdqa ("xmm1","xmm6")
&movdqa ("xmm2",&QWP($k_ipt,$const));
&pandn ("xmm1","xmm0");
&pand ("xmm0","xmm6");
&movdqu ("xmm5",&QWP(0,$key));
&pshufb ("xmm2","xmm0");
&movdqa ("xmm0",&QWP($k_ipt+16,$const));
&pxor ("xmm2","xmm5");
&psrld ("xmm1",4);
&add ($key,16);
&pshufb ("xmm0","xmm1");
&lea ($base,&DWP($k_mc_backward,$const));
&pxor ("xmm0","xmm2");
&jmp (&label("enc_entry"));
&set_label("enc_loop",16);
# middle of middle round
&movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sb1u
&movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sb1t
&pshufb ("xmm4","xmm2"); # 4 = sb1u
&pshufb ("xmm0","xmm3"); # 0 = sb1t
&pxor ("xmm4","xmm5"); # 4 = sb1u + k
&movdqa ("xmm5",&QWP($k_sb2,$const)); # 4 : sb2u
&pxor ("xmm0","xmm4"); # 0 = A
&movdqa ("xmm1",&QWP(-0x40,$base,$magic));# .Lk_mc_forward[]
&pshufb ("xmm5","xmm2"); # 4 = sb2u
&movdqa ("xmm2",&QWP($k_sb2+16,$const));# 2 : sb2t
&movdqa ("xmm4",&QWP(0,$base,$magic)); # .Lk_mc_backward[]
&pshufb ("xmm2","xmm3"); # 2 = sb2t
&movdqa ("xmm3","xmm0"); # 3 = A
&pxor ("xmm2","xmm5"); # 2 = 2A
&pshufb ("xmm0","xmm1"); # 0 = B
&add ($key,16); # next key
&pxor ("xmm0","xmm2"); # 0 = 2A+B
&pshufb ("xmm3","xmm4"); # 3 = D
&add ($magic,16); # next mc
&pxor ("xmm3","xmm0"); # 3 = 2A+B+D
&pshufb ("xmm0","xmm1"); # 0 = 2B+C
&and ($magic,0x30); # ... mod 4
&sub ($round,1); # nr--
&pxor ("xmm0","xmm3"); # 0 = 2A+3B+C+D
&set_label("enc_entry");
# top of round
&movdqa ("xmm1","xmm6"); # 1 : i
&movdqa ("xmm5",&QWP($k_inv+16,$const));# 2 : a/k
&pandn ("xmm1","xmm0"); # 1 = i<<4
&psrld ("xmm1",4); # 1 = i
&pand ("xmm0","xmm6"); # 0 = k
&pshufb ("xmm5","xmm0"); # 2 = a/k
&movdqa ("xmm3","xmm7"); # 3 : 1/i
&pxor ("xmm0","xmm1"); # 0 = j
&pshufb ("xmm3","xmm1"); # 3 = 1/i
&movdqa ("xmm4","xmm7"); # 4 : 1/j
&pxor ("xmm3","xmm5"); # 3 = iak = 1/i + a/k
&pshufb ("xmm4","xmm0"); # 4 = 1/j
&movdqa ("xmm2","xmm7"); # 2 : 1/iak
&pxor ("xmm4","xmm5"); # 4 = jak = 1/j + a/k
&pshufb ("xmm2","xmm3"); # 2 = 1/iak
&movdqa ("xmm3","xmm7"); # 3 : 1/jak
&pxor ("xmm2","xmm0"); # 2 = io
&pshufb ("xmm3","xmm4"); # 3 = 1/jak
&movdqu ("xmm5",&QWP(0,$key));
&pxor ("xmm3","xmm1"); # 3 = jo
&jnz (&label("enc_loop"));
# middle of last round
&movdqa ("xmm4",&QWP($k_sbo,$const)); # 3 : sbou .Lk_sbo
&movdqa ("xmm0",&QWP($k_sbo+16,$const));# 3 : sbot .Lk_sbo+16
&pshufb ("xmm4","xmm2"); # 4 = sbou
&pxor ("xmm4","xmm5"); # 4 = sb1u + k
&pshufb ("xmm0","xmm3"); # 0 = sb1t
&movdqa ("xmm1",&QWP(0x40,$base,$magic));# .Lk_sr[]
&pxor ("xmm0","xmm4"); # 0 = A
&pshufb ("xmm0","xmm1");
&ret ();
&function_end_B("_vpaes_encrypt_core");
##
## Decryption core
##
## Same API as encryption core.
##
&function_begin_B("_vpaes_decrypt_core");
&lea ($base,&DWP($k_dsbd,$const));
&mov ($round,&DWP(240,$key));
&movdqa ("xmm1","xmm6");
&movdqa ("xmm2",&QWP($k_dipt-$k_dsbd,$base));
&pandn ("xmm1","xmm0");
&mov ($magic,$round);
&psrld ("xmm1",4)
&movdqu ("xmm5",&QWP(0,$key));
&shl ($magic,4);
&pand ("xmm0","xmm6");
&pshufb ("xmm2","xmm0");
&movdqa ("xmm0",&QWP($k_dipt-$k_dsbd+16,$base));
&xor ($magic,0x30);
&pshufb ("xmm0","xmm1");
&and ($magic,0x30);
&pxor ("xmm2","xmm5");
&movdqa ("xmm5",&QWP($k_mc_forward+48,$const));
&pxor ("xmm0","xmm2");
&add ($key,16);
&lea ($magic,&DWP($k_sr-$k_dsbd,$base,$magic));
&jmp (&label("dec_entry"));
&set_label("dec_loop",16);
##
## Inverse mix columns
##
&movdqa ("xmm4",&QWP(-0x20,$base)); # 4 : sb9u
&movdqa ("xmm1",&QWP(-0x10,$base)); # 0 : sb9t
&pshufb ("xmm4","xmm2"); # 4 = sb9u
&pshufb ("xmm1","xmm3"); # 0 = sb9t
&pxor ("xmm0","xmm4");
&movdqa ("xmm4",&QWP(0,$base)); # 4 : sbdu
&pxor ("xmm0","xmm1"); # 0 = ch
&movdqa ("xmm1",&QWP(0x10,$base)); # 0 : sbdt
&pshufb ("xmm4","xmm2"); # 4 = sbdu
&pshufb ("xmm0","xmm5"); # MC ch
&pshufb ("xmm1","xmm3"); # 0 = sbdt
&pxor ("xmm0","xmm4"); # 4 = ch
&movdqa ("xmm4",&QWP(0x20,$base)); # 4 : sbbu
&pxor ("xmm0","xmm1"); # 0 = ch
&movdqa ("xmm1",&QWP(0x30,$base)); # 0 : sbbt
&pshufb ("xmm4","xmm2"); # 4 = sbbu
&pshufb ("xmm0","xmm5"); # MC ch
&pshufb ("xmm1","xmm3"); # 0 = sbbt
&pxor ("xmm0","xmm4"); # 4 = ch
&movdqa ("xmm4",&QWP(0x40,$base)); # 4 : sbeu
&pxor ("xmm0","xmm1"); # 0 = ch
&movdqa ("xmm1",&QWP(0x50,$base)); # 0 : sbet
&pshufb ("xmm4","xmm2"); # 4 = sbeu
&pshufb ("xmm0","xmm5"); # MC ch
&pshufb ("xmm1","xmm3"); # 0 = sbet
&pxor ("xmm0","xmm4"); # 4 = ch
&add ($key,16); # next round key
&palignr("xmm5","xmm5",12);
&pxor ("xmm0","xmm1"); # 0 = ch
&sub ($round,1); # nr--
&set_label("dec_entry");
# top of round
&movdqa ("xmm1","xmm6"); # 1 : i
&movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
&pandn ("xmm1","xmm0"); # 1 = i<<4
&pand ("xmm0","xmm6"); # 0 = k
&psrld ("xmm1",4); # 1 = i
&pshufb ("xmm2","xmm0"); # 2 = a/k
&movdqa ("xmm3","xmm7"); # 3 : 1/i
&pxor ("xmm0","xmm1"); # 0 = j
&pshufb ("xmm3","xmm1"); # 3 = 1/i
&movdqa ("xmm4","xmm7"); # 4 : 1/j
&pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k
&pshufb ("xmm4","xmm0"); # 4 = 1/j
&pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k
&movdqa ("xmm2","xmm7"); # 2 : 1/iak
&pshufb ("xmm2","xmm3"); # 2 = 1/iak
&movdqa ("xmm3","xmm7"); # 3 : 1/jak
&pxor ("xmm2","xmm0"); # 2 = io
&pshufb ("xmm3","xmm4"); # 3 = 1/jak
&movdqu ("xmm0",&QWP(0,$key));
&pxor ("xmm3","xmm1"); # 3 = jo
&jnz (&label("dec_loop"));
# middle of last round
&movdqa ("xmm4",&QWP(0x60,$base)); # 3 : sbou
&pshufb ("xmm4","xmm2"); # 4 = sbou
&pxor ("xmm4","xmm0"); # 4 = sb1u + k
&movdqa ("xmm0",&QWP(0x70,$base)); # 0 : sbot
&movdqa ("xmm2",&QWP(0,$magic));
&pshufb ("xmm0","xmm3"); # 0 = sb1t
&pxor ("xmm0","xmm4"); # 0 = A
&pshufb ("xmm0","xmm2");
&ret ();
&function_end_B("_vpaes_decrypt_core");
########################################################
## ##
## AES key schedule ##
## ##
########################################################
&function_begin_B("_vpaes_schedule_core");
&add ($const,&DWP(0,"esp"));
&movdqu ("xmm0",&QWP(0,$inp)); # load key (unaligned)
&movdqa ("xmm2",&QWP($k_rcon,$const)); # load rcon
# input transform
&movdqa ("xmm3","xmm0");
&lea ($base,&DWP($k_ipt,$const));
&movdqa (&QWP(4,"esp"),"xmm2"); # xmm8
&call ("_vpaes_schedule_transform");
&movdqa ("xmm7","xmm0");
&test ($out,$out);
&jnz (&label("schedule_am_decrypting"));
# encrypting, output zeroth round key after transform
&movdqu (&QWP(0,$key),"xmm0");
&jmp (&label("schedule_go"));
&set_label("schedule_am_decrypting");
# decrypting, output zeroth round key after shiftrows
&movdqa ("xmm1",&QWP($k_sr,$const,$magic));
&pshufb ("xmm3","xmm1");
&movdqu (&QWP(0,$key),"xmm3");
&xor ($magic,0x30);
&set_label("schedule_go");
&cmp ($round,192);
&ja (&label("schedule_256"));
&je (&label("schedule_192"));
# 128: fall though
##
## .schedule_128
##
## 128-bit specific part of key schedule.
##
## This schedule is really simple, because all its parts
## are accomplished by the subroutines.
##
&set_label("schedule_128");
&mov ($round,10);
&set_label("loop_schedule_128");
&call ("_vpaes_schedule_round");
&dec ($round);
&jz (&label("schedule_mangle_last"));
&call ("_vpaes_schedule_mangle"); # write output
&jmp (&label("loop_schedule_128"));
##
## .aes_schedule_192
##
## 192-bit specific part of key schedule.
##
## The main body of this schedule is the same as the 128-bit
## schedule, but with more smearing. The long, high side is
## stored in %xmm7 as before, and the short, low side is in
## the high bits of %xmm6.
##
## This schedule is somewhat nastier, however, because each
## round produces 192 bits of key material, or 1.5 round keys.
## Therefore, on each cycle we do 2 rounds and produce 3 round
## keys.
##
&set_label("schedule_192",16);
&movdqu ("xmm0",&QWP(8,$inp)); # load key part 2 (very unaligned)
&call ("_vpaes_schedule_transform"); # input transform
&movdqa ("xmm6","xmm0"); # save short part
&pxor ("xmm4","xmm4"); # clear 4
&movhlps("xmm6","xmm4"); # clobber low side with zeros
&mov ($round,4);
&set_label("loop_schedule_192");
&call ("_vpaes_schedule_round");
&palignr("xmm0","xmm6",8);
&call ("_vpaes_schedule_mangle"); # save key n
&call ("_vpaes_schedule_192_smear");
&call ("_vpaes_schedule_mangle"); # save key n+1
&call ("_vpaes_schedule_round");
&dec ($round);
&jz (&label("schedule_mangle_last"));
&call ("_vpaes_schedule_mangle"); # save key n+2
&call ("_vpaes_schedule_192_smear");
&jmp (&label("loop_schedule_192"));
##
## .aes_schedule_256
##
## 256-bit specific part of key schedule.
##
## The structure here is very similar to the 128-bit
## schedule, but with an additional "low side" in
## %xmm6. The low side's rounds are the same as the
## high side's, except no rcon and no rotation.
##
&set_label("schedule_256",16);
&movdqu ("xmm0",&QWP(16,$inp)); # load key part 2 (unaligned)
&call ("_vpaes_schedule_transform"); # input transform
&mov ($round,7);
&set_label("loop_schedule_256");
&call ("_vpaes_schedule_mangle"); # output low result
&movdqa ("xmm6","xmm0"); # save cur_lo in xmm6
# high round
&call ("_vpaes_schedule_round");
&dec ($round);
&jz (&label("schedule_mangle_last"));
&call ("_vpaes_schedule_mangle");
# low round. swap xmm7 and xmm6
&pshufd ("xmm0","xmm0",0xFF);
&movdqa (&QWP(20,"esp"),"xmm7");
&movdqa ("xmm7","xmm6");
&call ("_vpaes_schedule_low_round");
&movdqa ("xmm7",&QWP(20,"esp"));
&jmp (&label("loop_schedule_256"));
##
## .aes_schedule_mangle_last
##
## Mangler for last round of key schedule
## Mangles %xmm0
## when encrypting, outputs out(%xmm0) ^ 63
## when decrypting, outputs unskew(%xmm0)
##
## Always called right before return... jumps to cleanup and exits
##
&set_label("schedule_mangle_last",16);
# schedule last round key from xmm0
&lea ($base,&DWP($k_deskew,$const));
&test ($out,$out);
&jnz (&label("schedule_mangle_last_dec"));
# encrypting
&movdqa ("xmm1",&QWP($k_sr,$const,$magic));
&pshufb ("xmm0","xmm1"); # output permute
&lea ($base,&DWP($k_opt,$const)); # prepare to output transform
&add ($key,32);
&set_label("schedule_mangle_last_dec");
&add ($key,-16);
&pxor ("xmm0",&QWP($k_s63,$const));
&call ("_vpaes_schedule_transform"); # output transform
&movdqu (&QWP(0,$key),"xmm0"); # save last key
# cleanup
&pxor ("xmm0","xmm0");
&pxor ("xmm1","xmm1");
&pxor ("xmm2","xmm2");
&pxor ("xmm3","xmm3");
&pxor ("xmm4","xmm4");
&pxor ("xmm5","xmm5");
&pxor ("xmm6","xmm6");
&pxor ("xmm7","xmm7");
&ret ();
&function_end_B("_vpaes_schedule_core");
##
## .aes_schedule_192_smear
##
## Smear the short, low side in the 192-bit key schedule.
##
## Inputs:
## %xmm7: high side, b a x y
## %xmm6: low side, d c 0 0
## %xmm13: 0
##
## Outputs:
## %xmm6: b+c+d b+c 0 0
## %xmm0: b+c+d b+c b a
##
&function_begin_B("_vpaes_schedule_192_smear");
&pshufd ("xmm1","xmm6",0x80); # d c 0 0 -> c 0 0 0
&pshufd ("xmm0","xmm7",0xFE); # b a _ _ -> b b b a
&pxor ("xmm6","xmm1"); # -> c+d c 0 0
&pxor ("xmm1","xmm1");
&pxor ("xmm6","xmm0"); # -> b+c+d b+c b a
&movdqa ("xmm0","xmm6");
&movhlps("xmm6","xmm1"); # clobber low side with zeros
&ret ();
&function_end_B("_vpaes_schedule_192_smear");
##
## .aes_schedule_round
##
## Runs one main round of the key schedule on %xmm0, %xmm7
##
## Specifically, runs subbytes on the high dword of %xmm0
## then rotates it by one byte and xors into the low dword of
## %xmm7.
##
## Adds rcon from low byte of %xmm8, then rotates %xmm8 for
## next rcon.
##
## Smears the dwords of %xmm7 by xoring the low into the
## second low, result into third, result into highest.
##
## Returns results in %xmm7 = %xmm0.
## Clobbers %xmm1-%xmm5.
##
&function_begin_B("_vpaes_schedule_round");
# extract rcon from xmm8
&movdqa ("xmm2",&QWP(8,"esp")); # xmm8
&pxor ("xmm1","xmm1");
&palignr("xmm1","xmm2",15);
&palignr("xmm2","xmm2",15);
&pxor ("xmm7","xmm1");
# rotate
&pshufd ("xmm0","xmm0",0xFF);
&palignr("xmm0","xmm0",1);
# fall through...
&movdqa (&QWP(8,"esp"),"xmm2"); # xmm8
# low round: same as high round, but no rotation and no rcon.
&set_label("_vpaes_schedule_low_round");
# smear xmm7
&movdqa ("xmm1","xmm7");
&pslldq ("xmm7",4);
&pxor ("xmm7","xmm1");
&movdqa ("xmm1","xmm7");
&pslldq ("xmm7",8);
&pxor ("xmm7","xmm1");
&pxor ("xmm7",&QWP($k_s63,$const));
# subbyte
&movdqa ("xmm4",&QWP($k_s0F,$const));
&movdqa ("xmm5",&QWP($k_inv,$const)); # 4 : 1/j
&movdqa ("xmm1","xmm4");
&pandn ("xmm1","xmm0");
&psrld ("xmm1",4); # 1 = i
&pand ("xmm0","xmm4"); # 0 = k
&movdqa ("xmm2",&QWP($k_inv+16,$const));# 2 : a/k
&pshufb ("xmm2","xmm0"); # 2 = a/k
&pxor ("xmm0","xmm1"); # 0 = j
&movdqa ("xmm3","xmm5"); # 3 : 1/i
&pshufb ("xmm3","xmm1"); # 3 = 1/i
&pxor ("xmm3","xmm2"); # 3 = iak = 1/i + a/k
&movdqa ("xmm4","xmm5"); # 4 : 1/j
&pshufb ("xmm4","xmm0"); # 4 = 1/j
&pxor ("xmm4","xmm2"); # 4 = jak = 1/j + a/k
&movdqa ("xmm2","xmm5"); # 2 : 1/iak
&pshufb ("xmm2","xmm3"); # 2 = 1/iak
&pxor ("xmm2","xmm0"); # 2 = io
&movdqa ("xmm3","xmm5"); # 3 : 1/jak
&pshufb ("xmm3","xmm4"); # 3 = 1/jak
&pxor ("xmm3","xmm1"); # 3 = jo
&movdqa ("xmm4",&QWP($k_sb1,$const)); # 4 : sbou
&pshufb ("xmm4","xmm2"); # 4 = sbou
&movdqa ("xmm0",&QWP($k_sb1+16,$const));# 0 : sbot
&pshufb ("xmm0","xmm3"); # 0 = sb1t
&pxor ("xmm0","xmm4"); # 0 = sbox output
# add in smeared stuff
&pxor ("xmm0","xmm7");
&movdqa ("xmm7","xmm0");
&ret ();
&function_end_B("_vpaes_schedule_round");
##
## .aes_schedule_transform
##
## Linear-transform %xmm0 according to tables at (%ebx)
##
## Output in %xmm0
## Clobbers %xmm1, %xmm2
##
&function_begin_B("_vpaes_schedule_transform");
&movdqa ("xmm2",&QWP($k_s0F,$const));
&movdqa ("xmm1","xmm2");
&pandn ("xmm1","xmm0");
&psrld ("xmm1",4);
&pand ("xmm0","xmm2");
&movdqa ("xmm2",&QWP(0,$base));
&pshufb ("xmm2","xmm0");
&movdqa ("xmm0",&QWP(16,$base));
&pshufb ("xmm0","xmm1");
&pxor ("xmm0","xmm2");
&ret ();
&function_end_B("_vpaes_schedule_transform");
##
## .aes_schedule_mangle
##
## Mangle xmm0 from (basis-transformed) standard version
## to our version.
##
## On encrypt,
## xor with 0x63
## multiply by circulant 0,1,1,1
## apply shiftrows transform
##
## On decrypt,
## xor with 0x63
## multiply by "inverse mixcolumns" circulant E,B,D,9
## deskew
## apply shiftrows transform
##
##
## Writes out to (%edx), and increments or decrements it
## Keeps track of round number mod 4 in %ecx
## Preserves xmm0
## Clobbers xmm1-xmm5
##
&function_begin_B("_vpaes_schedule_mangle");
&movdqa ("xmm4","xmm0"); # save xmm0 for later
&movdqa ("xmm5",&QWP($k_mc_forward,$const));
&test ($out,$out);
&jnz (&label("schedule_mangle_dec"));
# encrypting
&add ($key,16);
&pxor ("xmm4",&QWP($k_s63,$const));
&pshufb ("xmm4","xmm5");
&movdqa ("xmm3","xmm4");
&pshufb ("xmm4","xmm5");
&pxor ("xmm3","xmm4");
&pshufb ("xmm4","xmm5");
&pxor ("xmm3","xmm4");
&jmp (&label("schedule_mangle_both"));
&set_label("schedule_mangle_dec",16);
# inverse mix columns
&movdqa ("xmm2",&QWP($k_s0F,$const));
&lea ($inp,&DWP($k_dksd,$const));
&movdqa ("xmm1","xmm2");
&pandn ("xmm1","xmm4");
&psrld ("xmm1",4); # 1 = hi
&pand ("xmm4","xmm2"); # 4 = lo
&movdqa ("xmm2",&QWP(0,$inp));
&pshufb ("xmm2","xmm4");
&movdqa ("xmm3",&QWP(0x10,$inp));
&pshufb ("xmm3","xmm1");
&pxor ("xmm3","xmm2");
&pshufb ("xmm3","xmm5");
&movdqa ("xmm2",&QWP(0x20,$inp));
&pshufb ("xmm2","xmm4");
&pxor ("xmm2","xmm3");
&movdqa ("xmm3",&QWP(0x30,$inp));
&pshufb ("xmm3","xmm1");
&pxor ("xmm3","xmm2");
&pshufb ("xmm3","xmm5");
&movdqa ("xmm2",&QWP(0x40,$inp));
&pshufb ("xmm2","xmm4");
&pxor ("xmm2","xmm3");
&movdqa ("xmm3",&QWP(0x50,$inp));
&pshufb ("xmm3","xmm1");
&pxor ("xmm3","xmm2");
&pshufb ("xmm3","xmm5");
&movdqa ("xmm2",&QWP(0x60,$inp));
&pshufb ("xmm2","xmm4");
&pxor ("xmm2","xmm3");
&movdqa ("xmm3",&QWP(0x70,$inp));
&pshufb ("xmm3","xmm1");
&pxor ("xmm3","xmm2");
&add ($key,-16);
&set_label("schedule_mangle_both");
&movdqa ("xmm1",&QWP($k_sr,$const,$magic));
&pshufb ("xmm3","xmm1");
&add ($magic,-16);
&and ($magic,0x30);
&movdqu (&QWP(0,$key),"xmm3");
&ret ();
&function_end_B("_vpaes_schedule_mangle");
#
# Interface to OpenSSL
#
&function_begin("${PREFIX}_set_encrypt_key");
record_function_hit(5);
&mov ($inp,&wparam(0)); # inp
&lea ($base,&DWP(-56,"esp"));
&mov ($round,&wparam(1)); # bits
&and ($base,-16);
&mov ($key,&wparam(2)); # key
&xchg ($base,"esp"); # alloca
&mov (&DWP(48,"esp"),$base);
&mov ($base,$round);
&shr ($base,5);
&add ($base,5);
&mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5;
&mov ($magic,0x30);
&mov ($out,0);
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
&call ("_vpaes_schedule_core");
&set_label("pic_point");
&mov ("esp",&DWP(48,"esp"));
&xor ("eax","eax");
&function_end("${PREFIX}_set_encrypt_key");
&function_begin("${PREFIX}_set_decrypt_key");
&mov ($inp,&wparam(0)); # inp
&lea ($base,&DWP(-56,"esp"));
&mov ($round,&wparam(1)); # bits
&and ($base,-16);
&mov ($key,&wparam(2)); # key
&xchg ($base,"esp"); # alloca
&mov (&DWP(48,"esp"),$base);
&mov ($base,$round);
&shr ($base,5);
&add ($base,5);
&mov (&DWP(240,$key),$base); # AES_KEY->rounds = nbits/32+5;
&shl ($base,4);
&lea ($key,&DWP(16,$key,$base));
&mov ($out,1);
&mov ($magic,$round);
&shr ($magic,1);
&and ($magic,32);
&xor ($magic,32); # nbist==192?0:32;
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
&call ("_vpaes_schedule_core");
&set_label("pic_point");
&mov ("esp",&DWP(48,"esp"));
&xor ("eax","eax");
&function_end("${PREFIX}_set_decrypt_key");
&function_begin("${PREFIX}_encrypt");
record_function_hit(4);
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
&call ("_vpaes_preheat");
&set_label("pic_point");
&mov ($inp,&wparam(0)); # inp
&lea ($base,&DWP(-56,"esp"));
&mov ($out,&wparam(1)); # out
&and ($base,-16);
&mov ($key,&wparam(2)); # key
&xchg ($base,"esp"); # alloca
&mov (&DWP(48,"esp"),$base);
&movdqu ("xmm0",&QWP(0,$inp));
&call ("_vpaes_encrypt_core");
&movdqu (&QWP(0,$out),"xmm0");
&mov ("esp",&DWP(48,"esp"));
&function_end("${PREFIX}_encrypt");
&function_begin("${PREFIX}_decrypt");
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
&call ("_vpaes_preheat");
&set_label("pic_point");
&mov ($inp,&wparam(0)); # inp
&lea ($base,&DWP(-56,"esp"));
&mov ($out,&wparam(1)); # out
&and ($base,-16);
&mov ($key,&wparam(2)); # key
&xchg ($base,"esp"); # alloca
&mov (&DWP(48,"esp"),$base);
&movdqu ("xmm0",&QWP(0,$inp));
&call ("_vpaes_decrypt_core");
&movdqu (&QWP(0,$out),"xmm0");
&mov ("esp",&DWP(48,"esp"));
&function_end("${PREFIX}_decrypt");
&function_begin("${PREFIX}_cbc_encrypt");
&mov ($inp,&wparam(0)); # inp
&mov ($out,&wparam(1)); # out
&mov ($round,&wparam(2)); # len
&mov ($key,&wparam(3)); # key
&sub ($round,16);
&jc (&label("cbc_abort"));
&lea ($base,&DWP(-56,"esp"));
&mov ($const,&wparam(4)); # ivp
&and ($base,-16);
&mov ($magic,&wparam(5)); # enc
&xchg ($base,"esp"); # alloca
&movdqu ("xmm1",&QWP(0,$const)); # load IV
&sub ($out,$inp);
&mov (&DWP(48,"esp"),$base);
&mov (&DWP(0,"esp"),$out); # save out
&mov (&DWP(4,"esp"),$key) # save key
&mov (&DWP(8,"esp"),$const); # save ivp
&mov ($out,$round); # $out works as $len
&lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
&call ("_vpaes_preheat");
&set_label("pic_point");
&cmp ($magic,0);
&je (&label("cbc_dec_loop"));
&jmp (&label("cbc_enc_loop"));
&set_label("cbc_enc_loop",16);
&movdqu ("xmm0",&QWP(0,$inp)); # load input
&pxor ("xmm0","xmm1"); # inp^=iv
&call ("_vpaes_encrypt_core");
&mov ($base,&DWP(0,"esp")); # restore out
&mov ($key,&DWP(4,"esp")); # restore key
&movdqa ("xmm1","xmm0");
&movdqu (&QWP(0,$base,$inp),"xmm0"); # write output
&lea ($inp,&DWP(16,$inp));
&sub ($out,16);
&jnc (&label("cbc_enc_loop"));
&jmp (&label("cbc_done"));
&set_label("cbc_dec_loop",16);
&movdqu ("xmm0",&QWP(0,$inp)); # load input
&movdqa (&QWP(16,"esp"),"xmm1"); # save IV
&movdqa (&QWP(32,"esp"),"xmm0"); # save future IV
&call ("_vpaes_decrypt_core");
&mov ($base,&DWP(0,"esp")); # restore out
&mov ($key,&DWP(4,"esp")); # restore key
&pxor ("xmm0",&QWP(16,"esp")); # out^=iv
&movdqa ("xmm1",&QWP(32,"esp")); # load next IV
&movdqu (&QWP(0,$base,$inp),"xmm0"); # write output
&lea ($inp,&DWP(16,$inp));
&sub ($out,16);
&jnc (&label("cbc_dec_loop"));
&set_label("cbc_done");
&mov ($base,&DWP(8,"esp")); # restore ivp
&mov ("esp",&DWP(48,"esp"));
&movdqu (&QWP(0,$base),"xmm1"); # write IV
&set_label("cbc_abort");
&function_end("${PREFIX}_cbc_encrypt");
&asm_finish();
close STDOUT or die "error closing STDOUT: $!";

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,286 @@
// Copyright (c) 2017, Google Inc.
// SPDX-License-Identifier: ISC
#ifndef OPENSSL_HEADER_AES_INTERNAL_H
#define OPENSSL_HEADER_AES_INTERNAL_H
#include <stdlib.h>
#include <openssl/cpu.h>
#include "../service_indicator/internal.h"
#include "../cpucap/internal.h"
#include "../../internal.h"
#if defined(__cplusplus)
extern "C" {
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(OPENSSL_X86_64)
OPENSSL_INLINE int avx512_xts_available(void) {
return (CRYPTO_is_VAES_capable() &&
CRYPTO_is_VBMI2_capable() &&
CRYPTO_is_AVX512_capable() &&
CRYPTO_is_VPCLMULQDQ_capable());
}
#endif
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
#define HWAES
#define HWAES_ECB
OPENSSL_INLINE int hwaes_capable(void) { return CRYPTO_is_AESNI_capable(); }
#define VPAES
#if defined(OPENSSL_X86_64)
#define VPAES_CTR32
#define HWAES_XTS
OPENSSL_INLINE int hwaes_xts_available(void) {
return CRYPTO_is_AESNI_capable();
}
#endif
#define VPAES_CBC
OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_SSSE3_capable(); }
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
#define HWAES
OPENSSL_INLINE int hwaes_capable(void) { return CRYPTO_is_ARMv8_AES_capable(); }
#if defined(OPENSSL_ARM)
#define BSAES
#define VPAES
#define VPAES_CTR32
OPENSSL_INLINE int bsaes_capable(void) { return CRYPTO_is_NEON_capable(); }
OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
#endif
#if defined(OPENSSL_AARCH64)
#define VPAES
#define VPAES_CBC
#define VPAES_CTR32
#define HWAES_XTS
OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
OPENSSL_INLINE int hwaes_xts_available(void) {
// same as hwaes_capable()
return CRYPTO_is_ARMv8_AES_capable();
}
#endif
#elif defined(OPENSSL_PPC64LE)
#define HWAES
OPENSSL_INLINE int hwaes_capable(void) {
return CRYPTO_is_PPC64LE_vcrypto_capable();
}
#endif
#endif // !NO_ASM
#if defined(HWAES)
int aes_hw_set_encrypt_key(const uint8_t *user_key, const int bits,
AES_KEY *key);
int aes_hw_set_decrypt_key(const uint8_t *user_key, const int bits,
AES_KEY *key);
void aes_hw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void aes_hw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void aes_hw_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t *ivec, const int enc);
void aes_hw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, const uint8_t ivec[16]);
#else
// If HWAES isn't defined then we provide dummy functions for each of the hwaes
// functions.
OPENSSL_INLINE int hwaes_capable(void) { return 0; }
OPENSSL_INLINE int aes_hw_set_encrypt_key(const uint8_t *user_key, int bits,
AES_KEY *key) {
abort();
}
OPENSSL_INLINE int aes_hw_set_decrypt_key(const uint8_t *user_key, int bits,
AES_KEY *key) {
abort();
}
OPENSSL_INLINE void aes_hw_encrypt(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
abort();
}
OPENSSL_INLINE void aes_hw_decrypt(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
abort();
}
OPENSSL_INLINE void aes_hw_cbc_encrypt(const uint8_t *in, uint8_t *out,
size_t length, const AES_KEY *key,
uint8_t *ivec, int enc) {
abort();
}
OPENSSL_INLINE void aes_hw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
size_t len, const AES_KEY *key,
const uint8_t ivec[16]) {
abort();
}
#endif // !HWAES
#if defined(HWAES_ECB)
void aes_hw_ecb_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, const int enc);
#endif // HWAES_ECB
#if defined(HWAES_XTS)
void aes_hw_xts_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key1, const AES_KEY *key2,
const uint8_t iv[16]);
void aes_hw_xts_decrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key1, const AES_KEY *key2,
const uint8_t iv[16]);
OPENSSL_EXPORT int aes_hw_xts_cipher(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key1, const AES_KEY *key2,
const uint8_t iv[16], int enc);
#if defined(OPENSSL_X86_64) && !defined(MY_ASSEMBLER_IS_TOO_OLD_FOR_512AVX) && !defined(OPENSSL_WINDOWS)
#define AES_XTS_X86_64_AVX512
void aes_hw_xts_encrypt_avx512(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key1, const AES_KEY *key2,
const uint8_t iv[16]);
void aes_hw_xts_decrypt_avx512(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key1, const AES_KEY *key2,
const uint8_t iv[16]);
int crypto_xts_avx512_enabled(void);
#endif //AES_XTS_X86_64_AVX512
#else
OPENSSL_INLINE int hwaes_xts_available(void) { return 0; }
OPENSSL_INLINE void aes_hw_xts_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key1, const AES_KEY *key2,
const uint8_t iv[16]) {
abort();
}
OPENSSL_INLINE void aes_hw_xts_decrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key1, const AES_KEY *key2,
const uint8_t iv[16]) {
abort();
}
OPENSSL_INLINE int aes_hw_xts_cipher(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key1, const AES_KEY *key2,
const uint8_t iv[16], int enc) {
abort();
}
#endif // HWAES_XTS
#if defined(BSAES)
// Note |bsaes_cbc_encrypt| requires |enc| to be zero.
void bsaes_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t ivec[16], int enc);
void bsaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, const uint8_t ivec[16]);
// VPAES to BSAES conversions are available on all BSAES platforms.
void vpaes_encrypt_key_to_bsaes(AES_KEY *out_bsaes, const AES_KEY *vpaes);
void vpaes_decrypt_key_to_bsaes(AES_KEY *out_bsaes, const AES_KEY *vpaes);
#else
OPENSSL_INLINE char bsaes_capable(void) { return 0; }
// On other platforms, bsaes_capable() will always return false and so the
// following will never be called.
OPENSSL_INLINE void bsaes_cbc_encrypt(const uint8_t *in, uint8_t *out,
size_t length, const AES_KEY *key,
uint8_t ivec[16], int enc) {
abort();
}
OPENSSL_INLINE void bsaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
size_t len, const AES_KEY *key,
const uint8_t ivec[16]) {
abort();
}
OPENSSL_INLINE void vpaes_encrypt_key_to_bsaes(AES_KEY *out_bsaes,
const AES_KEY *vpaes) {
abort();
}
OPENSSL_INLINE void vpaes_decrypt_key_to_bsaes(AES_KEY *out_bsaes,
const AES_KEY *vpaes) {
abort();
}
#endif // !BSAES
#if defined(VPAES)
// On platforms where VPAES gets defined (just above), then these functions are
// provided by asm.
int vpaes_set_encrypt_key(const uint8_t *userKey, int bits, AES_KEY *key);
int vpaes_set_decrypt_key(const uint8_t *userKey, int bits, AES_KEY *key);
void vpaes_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void vpaes_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
#if defined(VPAES_CBC)
void vpaes_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t *ivec, int enc);
#endif
#if defined(VPAES_CTR32)
void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, const uint8_t ivec[16]);
#endif
#else
OPENSSL_INLINE char vpaes_capable(void) { return 0; }
// On other platforms, vpaes_capable() will always return false and so the
// following will never be called.
OPENSSL_INLINE int vpaes_set_encrypt_key(const uint8_t *userKey, int bits,
AES_KEY *key) {
abort();
}
OPENSSL_INLINE int vpaes_set_decrypt_key(const uint8_t *userKey, int bits,
AES_KEY *key) {
abort();
}
OPENSSL_INLINE void vpaes_encrypt(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
abort();
}
OPENSSL_INLINE void vpaes_decrypt(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
abort();
}
OPENSSL_INLINE void vpaes_cbc_encrypt(const uint8_t *in, uint8_t *out,
size_t length, const AES_KEY *key,
uint8_t *ivec, int enc) {
abort();
}
#endif // !VPAES
int aes_nohw_set_encrypt_key(const uint8_t *key, unsigned bits,
AES_KEY *aeskey);
int aes_nohw_set_decrypt_key(const uint8_t *key, unsigned bits,
AES_KEY *aeskey);
void aes_nohw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void aes_nohw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void aes_nohw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
size_t blocks, const AES_KEY *key,
const uint8_t ivec[16]);
void aes_nohw_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t *ivec, const int enc);
#if defined(__cplusplus)
} // extern C
#endif
#endif // OPENSSL_HEADER_AES_INTERNAL_H

View File

@@ -0,0 +1,206 @@
// Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
#include <openssl/aes.h>
#include <assert.h>
#include <limits.h>
#include <string.h>
#include <openssl/mem.h>
#include "../../internal.h"
// kDefaultIV is the default IV value given in RFC 3394, 2.2.3.1.
static const uint8_t kDefaultIV[] = {
0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6,
};
static const unsigned kBound = 6;
int AES_wrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
const uint8_t *in, size_t in_len) {
// See RFC 3394, section 2.2.1. Additionally, note that section 2 requires the
// plaintext be at least two 8-byte blocks.
if (in_len > INT_MAX - 8 || in_len < 16 || in_len % 8 != 0) {
return -1;
}
if (iv == NULL) {
iv = kDefaultIV;
}
OPENSSL_memmove(out + 8, in, in_len);
uint8_t A[AES_BLOCK_SIZE];
OPENSSL_memcpy(A, iv, 8);
size_t n = in_len / 8;
for (unsigned j = 0; j < kBound; j++) {
for (size_t i = 1; i <= n; i++) {
OPENSSL_memcpy(A + 8, out + 8 * i, 8);
AES_encrypt(A, A, key);
uint32_t t = (uint32_t)(n * j + i);
A[7] ^= t & 0xff;
A[6] ^= (t >> 8) & 0xff;
A[5] ^= (t >> 16) & 0xff;
A[4] ^= (t >> 24) & 0xff;
OPENSSL_memcpy(out + 8 * i, A + 8, 8);
}
}
OPENSSL_memcpy(out, A, 8);
FIPS_service_indicator_update_state();
return (int)in_len + 8;
}
// aes_unwrap_key_inner performs steps one and two from
// https://tools.ietf.org/html/rfc3394#section-2.2.2
static int aes_unwrap_key_inner(const AES_KEY *key, uint8_t *out,
uint8_t out_iv[8], const uint8_t *in,
size_t in_len) {
// See RFC 3394, section 2.2.2. Additionally, note that section 2 requires the
// plaintext be at least two 8-byte blocks, so the ciphertext must be at least
// three blocks.
if (in_len > INT_MAX || in_len < 24 || in_len % 8 != 0) {
return 0;
}
uint8_t A[AES_BLOCK_SIZE];
OPENSSL_memcpy(A, in, 8);
OPENSSL_memmove(out, in + 8, in_len - 8);
size_t n = (in_len / 8) - 1;
for (unsigned j = kBound - 1; j < kBound; j--) {
for (size_t i = n; i > 0; i--) {
uint32_t t = (uint32_t)(n * j + i);
A[7] ^= t & 0xff;
A[6] ^= (t >> 8) & 0xff;
A[5] ^= (t >> 16) & 0xff;
A[4] ^= (t >> 24) & 0xff;
OPENSSL_memcpy(A + 8, out + 8 * (i - 1), 8);
AES_decrypt(A, A, key);
OPENSSL_memcpy(out + 8 * (i - 1), A + 8, 8);
}
}
memcpy(out_iv, A, 8);
return 1;
}
int AES_unwrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
const uint8_t *in, size_t in_len) {
uint8_t calculated_iv[8];
if (!aes_unwrap_key_inner(key, out, calculated_iv, in, in_len)) {
return -1;
}
if (iv == NULL) {
iv = kDefaultIV;
}
if (CRYPTO_memcmp(calculated_iv, iv, 8) != 0) {
return -1;
}
FIPS_service_indicator_update_state();
return (int)in_len - 8;
}
// kPaddingConstant is used in Key Wrap with Padding. See
// https://tools.ietf.org/html/rfc5649#section-3
static const uint8_t kPaddingConstant[4] = {0xa6, 0x59, 0x59, 0xa6};
int AES_wrap_key_padded(const AES_KEY *key, uint8_t *out, size_t *out_len,
size_t max_out, const uint8_t *in, size_t in_len) {
// We have to avoid the underlying |AES_wrap_key| service updating the
// indicator state, so we lock the state here.
FIPS_service_indicator_lock_state();
// See https://tools.ietf.org/html/rfc5649#section-4.1
const uint64_t in_len64 = in_len;
const size_t padded_len = (in_len + 7) & ~7;
int ret = 0;
*out_len = 0;
if (in_len == 0 || in_len64 > 0xffffffffu || in_len + 7 < in_len ||
padded_len + 8 < padded_len || max_out < padded_len + 8) {
goto end;
}
uint8_t block[AES_BLOCK_SIZE];
memcpy(block, kPaddingConstant, sizeof(kPaddingConstant));
CRYPTO_store_u32_be(block + 4, (uint32_t)in_len);
if (in_len <= 8) {
memset(block + 8, 0, 8);
memcpy(block + 8, in, in_len);
AES_encrypt(block, out, key);
*out_len = AES_BLOCK_SIZE;
ret = 1;
goto end;
}
uint8_t *padded_in = OPENSSL_malloc(padded_len);
if (padded_in == NULL) {
goto end;
}
assert(padded_len >= 8);
memset(padded_in + padded_len - 8, 0, 8);
memcpy(padded_in, in, in_len);
const int out_length = AES_wrap_key(key, block, out, padded_in, padded_len);
OPENSSL_free(padded_in);
if (out_length < 0) {
goto end;
}
*out_len = out_length;
ret = 1;
end:
FIPS_service_indicator_unlock_state();
if(ret) {
FIPS_service_indicator_update_state();
}
return ret;
}
int AES_unwrap_key_padded(const AES_KEY *key, uint8_t *out, size_t *out_len,
size_t max_out, const uint8_t *in, size_t in_len) {
*out_len = 0;
if (in_len < AES_BLOCK_SIZE || max_out < in_len - 8) {
return 0;
}
uint8_t iv[8];
if (in_len == AES_BLOCK_SIZE) {
uint8_t block[AES_BLOCK_SIZE];
AES_decrypt(in, block, key);
memcpy(iv, block, sizeof(iv));
memcpy(out, block + 8, 8);
} else if (!aes_unwrap_key_inner(key, out, iv, in, in_len)) {
return 0;
}
assert(in_len % 8 == 0);
crypto_word_t ok = constant_time_eq_int(
CRYPTO_memcmp(iv, kPaddingConstant, sizeof(kPaddingConstant)), 0);
const size_t claimed_len = CRYPTO_load_u32_be(iv + 4);
ok &= ~constant_time_is_zero_w(claimed_len);
ok &= constant_time_eq_w((claimed_len - 1) >> 3, (in_len - 9) >> 3);
// Check that padding bytes are all zero.
for (size_t i = in_len - 15; i < in_len - 8; i++) {
ok &= constant_time_is_zero_w(constant_time_ge_8(i, claimed_len) & out[i]);
}
*out_len = constant_time_select_w(ok, claimed_len, 0);
const int ret = ok & 1;
if(ret) {
FIPS_service_indicator_update_state();
}
return ret;
}

View File

@@ -0,0 +1,167 @@
// Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
#include <openssl/aes.h>
#include <assert.h>
#include "../aes/internal.h"
#include "../modes/internal.h"
#include "../cipher/internal.h"
// The following wrappers ensure that the delocator can handle the
// function pointer calculation in AES_ctr128_encrypt. Without it,
// on AArch64 there is risk of the calculations requiring a PC-relative
// offset outside of the range (-1MB,1MB) addressable using `ADR`.
static inline void aes_hw_encrypt_wrapper(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
aes_hw_encrypt(in, out, key);
}
static inline void aes_nohw_encrypt_wrapper(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
aes_nohw_encrypt(in, out, key);
}
static inline void aes_hw_ctr32_encrypt_blocks_wrapper(const uint8_t *in,
uint8_t *out, size_t len,
const AES_KEY *key,
const uint8_t ivec[16]) {
aes_hw_ctr32_encrypt_blocks(in, out, len, key, ivec);
}
static inline void aes_nohw_ctr32_encrypt_blocks_wrapper(const uint8_t *in,
uint8_t *out, size_t len,
const AES_KEY *key,
const uint8_t ivec[16]) {
aes_nohw_ctr32_encrypt_blocks(in, out, len, key, ivec);
}
static inline void vpaes_encrypt_wrapper(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
vpaes_encrypt(in, out, key);
}
#if defined(VPAES_CTR32)
static inline void vpaes_ctr32_encrypt_blocks_wrapper(const uint8_t *in,
uint8_t *out, size_t len,
const AES_KEY *key,
const uint8_t ivec[16]) {
vpaes_ctr32_encrypt_blocks(in, out, len, key, ivec);
}
#endif
void AES_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[AES_BLOCK_SIZE],
uint8_t ecount_buf[AES_BLOCK_SIZE], unsigned int *num) {
if (hwaes_capable()) {
CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
aes_hw_ctr32_encrypt_blocks_wrapper);
} else if (vpaes_capable()) {
#if defined(VPAES_CTR32)
// TODO(davidben): On ARM, where |BSAES| is additionally defined, this could
// use |vpaes_ctr32_encrypt_blocks_with_bsaes|.
CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
vpaes_ctr32_encrypt_blocks_wrapper);
#else
CRYPTO_ctr128_encrypt(in, out, len, key, ivec, ecount_buf, num,
vpaes_encrypt_wrapper);
#endif
} else {
CRYPTO_ctr128_encrypt_ctr32(in, out, len, key, ivec, ecount_buf, num,
aes_nohw_ctr32_encrypt_blocks_wrapper);
}
FIPS_service_indicator_update_state();
}
void AES_ecb_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key,
const int enc) {
assert(in && out && key);
assert((AES_ENCRYPT == enc) || (AES_DECRYPT == enc));
if (AES_ENCRYPT == enc) {
AES_encrypt(in, out, key);
} else {
AES_decrypt(in, out, key);
}
FIPS_service_indicator_update_state();
}
void AES_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t *ivec, const int enc) {
if (hwaes_capable()) {
aes_hw_cbc_encrypt(in, out, len, key, ivec, enc);
} else if (!vpaes_capable()) {
aes_nohw_cbc_encrypt(in, out, len, key, ivec, enc);
} else if (enc) {
CRYPTO_cbc128_encrypt(in, out, len, key, ivec, AES_encrypt);
} else {
CRYPTO_cbc128_decrypt(in, out, len, key, ivec, AES_decrypt);
}
FIPS_service_indicator_update_state();
}
void AES_ofb128_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t *ivec, int *num) {
unsigned num_u = (unsigned)(*num);
CRYPTO_ofb128_encrypt(in, out, length, key, ivec, &num_u, AES_encrypt);
*num = (int)num_u;
}
void AES_cfb1_encrypt(const uint8_t *in, uint8_t *out, size_t bits,
const AES_KEY *key, uint8_t *ivec, int *num,
int enc) {
unsigned num_u = (unsigned)(*num);
CRYPTO_cfb128_1_encrypt(in, out, bits, key, ivec, &num_u, enc, AES_encrypt);
*num = (int)num_u;
}
void AES_cfb8_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t *ivec, int *num,
int enc) {
unsigned num_u = (unsigned)(*num);
CRYPTO_cfb128_8_encrypt(in, out, length, key, ivec, &num_u, enc, AES_encrypt);
*num = (int)num_u;
}
void AES_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t *ivec, int *num,
int enc) {
unsigned num_u = (unsigned)(*num);
CRYPTO_cfb128_encrypt(in, out, length, key, ivec, &num_u, enc, AES_encrypt);
*num = (int)num_u;
}
#if defined(HWAES_XTS)
int aes_hw_xts_cipher(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key1, const AES_KEY *key2,
const uint8_t iv[16], int enc) {
// The assembly functions abort on the following condition.
// They can be modified to return 0/1 instead of void, but
// this is the easy way out for now.
if (length < 16) return 0;
if (enc) {
#if defined(AES_XTS_X86_64_AVX512)
if (avx512_xts_available()) {
aes_hw_xts_encrypt_avx512(in, out, length, key1, key2, iv);
return 1;
}
#endif
aes_hw_xts_encrypt(in, out, length, key1, key2, iv);
} else {
#if defined(AES_XTS_X86_64_AVX512)
if (avx512_xts_available()) {
aes_hw_xts_decrypt_avx512(in, out, length, key1, key2, iv);
return 1;
}
#endif
aes_hw_xts_decrypt(in, out, length, key1, key2, iv);
}
return 1;
}
#endif // HWAES_XTS