153 lines
4.7 KiB
Rust
153 lines
4.7 KiB
Rust
//! AES encryption support
|
|
|
|
use crate::{Block, Block8};
|
|
use cipher::inout::InOut;
|
|
use core::arch::aarch64::*;
|
|
|
|
// Stable "polyfills" for unstable core::arch::aarch64 intrinsics
|
|
// TODO(tarcieri): remove when these intrinsics have been stabilized
|
|
use super::intrinsics::{
|
|
vaesdq_u8, vaesdq_u8_and_vaesimcq_u8, vaeseq_u8, vaeseq_u8_and_vaesmcq_u8,
|
|
};
|
|
|
|
/// Perform AES encryption using the given expanded keys.
|
|
#[target_feature(enable = "aes")]
|
|
#[target_feature(enable = "neon")]
|
|
pub(super) unsafe fn encrypt1<const N: usize>(
|
|
expanded_keys: &[uint8x16_t; N],
|
|
block: InOut<'_, '_, Block>,
|
|
) {
|
|
let rounds = N - 1;
|
|
assert!(rounds == 10 || rounds == 12 || rounds == 14);
|
|
|
|
let (in_ptr, out_ptr) = block.into_raw();
|
|
|
|
let mut state = vld1q_u8(in_ptr as *const u8);
|
|
|
|
for k in expanded_keys.iter().take(rounds - 1) {
|
|
// AES single round encryption and mix columns
|
|
state = vaeseq_u8_and_vaesmcq_u8(state, *k);
|
|
}
|
|
|
|
// AES single round encryption
|
|
state = vaeseq_u8(state, expanded_keys[rounds - 1]);
|
|
|
|
// Final add (bitwise XOR)
|
|
state = veorq_u8(state, expanded_keys[rounds]);
|
|
|
|
vst1q_u8(out_ptr as *mut u8, state);
|
|
}
|
|
|
|
/// Perform parallel AES encryption 8-blocks-at-a-time using the given expanded keys.
|
|
#[target_feature(enable = "aes")]
|
|
#[target_feature(enable = "neon")]
|
|
pub(super) unsafe fn encrypt8<const N: usize>(
|
|
expanded_keys: &[uint8x16_t; N],
|
|
blocks: InOut<'_, '_, Block8>,
|
|
) {
|
|
let rounds = N - 1;
|
|
assert!(rounds == 10 || rounds == 12 || rounds == 14);
|
|
|
|
let (in_ptr, out_ptr) = blocks.into_raw();
|
|
let in_ptr = in_ptr as *const Block;
|
|
let out_ptr = out_ptr as *const Block;
|
|
|
|
let mut state = [
|
|
vld1q_u8(in_ptr.add(0) as *const u8),
|
|
vld1q_u8(in_ptr.add(1) as *const u8),
|
|
vld1q_u8(in_ptr.add(2) as *const u8),
|
|
vld1q_u8(in_ptr.add(3) as *const u8),
|
|
vld1q_u8(in_ptr.add(4) as *const u8),
|
|
vld1q_u8(in_ptr.add(5) as *const u8),
|
|
vld1q_u8(in_ptr.add(6) as *const u8),
|
|
vld1q_u8(in_ptr.add(7) as *const u8),
|
|
];
|
|
|
|
for k in expanded_keys.iter().take(rounds - 1) {
|
|
for i in 0..8 {
|
|
// AES single round encryption and mix columns
|
|
state[i] = vaeseq_u8_and_vaesmcq_u8(state[i], *k);
|
|
}
|
|
}
|
|
|
|
for i in 0..8 {
|
|
// AES single round encryption
|
|
state[i] = vaeseq_u8(state[i], expanded_keys[rounds - 1]);
|
|
|
|
// Final add (bitwise XOR)
|
|
state[i] = veorq_u8(state[i], expanded_keys[rounds]);
|
|
|
|
vst1q_u8(out_ptr.add(i) as *mut u8, state[i]);
|
|
}
|
|
}
|
|
|
|
/// Perform AES decryption using the given expanded keys.
|
|
#[target_feature(enable = "aes")]
|
|
#[target_feature(enable = "neon")]
|
|
pub(super) unsafe fn decrypt1<const N: usize>(
|
|
expanded_keys: &[uint8x16_t; N],
|
|
block: InOut<'_, '_, Block>,
|
|
) {
|
|
let rounds = N - 1;
|
|
assert!(rounds == 10 || rounds == 12 || rounds == 14);
|
|
|
|
let (in_ptr, out_ptr) = block.into_raw();
|
|
let mut state = vld1q_u8(in_ptr as *const u8);
|
|
|
|
for k in expanded_keys.iter().take(rounds - 1) {
|
|
// AES single round decryption and inverse mix columns
|
|
state = vaesdq_u8_and_vaesimcq_u8(state, *k);
|
|
}
|
|
|
|
// AES single round decryption
|
|
state = vaesdq_u8(state, expanded_keys[rounds - 1]);
|
|
|
|
// Final add (bitwise XOR)
|
|
state = veorq_u8(state, expanded_keys[rounds]);
|
|
|
|
vst1q_u8(out_ptr as *mut u8, state);
|
|
}
|
|
|
|
/// Perform parallel AES decryption 8-blocks-at-a-time using the given expanded keys.
|
|
#[target_feature(enable = "aes")]
|
|
#[target_feature(enable = "neon")]
|
|
pub(super) unsafe fn decrypt8<const N: usize>(
|
|
expanded_keys: &[uint8x16_t; N],
|
|
blocks: InOut<'_, '_, Block8>,
|
|
) {
|
|
let rounds = N - 1;
|
|
assert!(rounds == 10 || rounds == 12 || rounds == 14);
|
|
|
|
let (in_ptr, out_ptr) = blocks.into_raw();
|
|
let in_ptr = in_ptr as *const Block;
|
|
let out_ptr = out_ptr as *const Block;
|
|
|
|
let mut state = [
|
|
vld1q_u8(in_ptr.add(0) as *const u8),
|
|
vld1q_u8(in_ptr.add(1) as *const u8),
|
|
vld1q_u8(in_ptr.add(2) as *const u8),
|
|
vld1q_u8(in_ptr.add(3) as *const u8),
|
|
vld1q_u8(in_ptr.add(4) as *const u8),
|
|
vld1q_u8(in_ptr.add(5) as *const u8),
|
|
vld1q_u8(in_ptr.add(6) as *const u8),
|
|
vld1q_u8(in_ptr.add(7) as *const u8),
|
|
];
|
|
|
|
for k in expanded_keys.iter().take(rounds - 1) {
|
|
for i in 0..8 {
|
|
// AES single round decryption and inverse mix columns
|
|
state[i] = vaesdq_u8_and_vaesimcq_u8(state[i], *k);
|
|
}
|
|
}
|
|
|
|
for i in 0..8 {
|
|
// AES single round decryption
|
|
state[i] = vaesdq_u8(state[i], expanded_keys[rounds - 1]);
|
|
|
|
// Final add (bitwise XOR)
|
|
state[i] = veorq_u8(state[i], expanded_keys[rounds]);
|
|
|
|
vst1q_u8(out_ptr.add(i) as *mut u8, state[i]);
|
|
}
|
|
}
|