chore: checkpoint before Python removal

2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions
--- a/vendor/sha2/src/consts.rs
+++ b/vendor/sha2/src/consts.rs
@@ -0,0 +1,107 @@
+#![allow(dead_code, clippy::unreadable_literal)]
+
+pub const STATE_LEN: usize = 8;
+pub const BLOCK_LEN: usize = 16;
+
+pub type State256 = [u32; STATE_LEN];
+pub type State512 = [u64; STATE_LEN];
+
+/// Constants necessary for SHA-256 family of digests.
+pub const K32: [u32; 64] = [
+    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
+];
+
+/// Constants necessary for SHA-256 family of digests.
+pub const K32X4: [[u32; 4]; 16] = [
+    [K32[3], K32[2], K32[1], K32[0]],
+    [K32[7], K32[6], K32[5], K32[4]],
+    [K32[11], K32[10], K32[9], K32[8]],
+    [K32[15], K32[14], K32[13], K32[12]],
+    [K32[19], K32[18], K32[17], K32[16]],
+    [K32[23], K32[22], K32[21], K32[20]],
+    [K32[27], K32[26], K32[25], K32[24]],
+    [K32[31], K32[30], K32[29], K32[28]],
+    [K32[35], K32[34], K32[33], K32[32]],
+    [K32[39], K32[38], K32[37], K32[36]],
+    [K32[43], K32[42], K32[41], K32[40]],
+    [K32[47], K32[46], K32[45], K32[44]],
+    [K32[51], K32[50], K32[49], K32[48]],
+    [K32[55], K32[54], K32[53], K32[52]],
+    [K32[59], K32[58], K32[57], K32[56]],
+    [K32[63], K32[62], K32[61], K32[60]],
+];
+
+/// Constants necessary for SHA-512 family of digests.
+pub const K64: [u64; 80] = [
+    0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
+    0x3956c25bf348b538, 0x59f111f1b605d019, 0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
+    0xd807aa98a3030242, 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
+    0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, 0xc19bf174cf692694,
+    0xe49b69c19ef14ad2, 0xefbe4786384f25e3, 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
+    0x2de92c6f592b0275, 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
+    0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, 0xbf597fc7beef0ee4,
+    0xc6e00bf33da88fc2, 0xd5a79147930aa725, 0x06ca6351e003826f, 0x142929670a0e6e70,
+    0x27b70a8546d22ffc, 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
+    0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, 0x92722c851482353b,
+    0xa2bfe8a14cf10364, 0xa81a664bbc423001, 0xc24b8b70d0f89791, 0xc76c51a30654be30,
+    0xd192e819d6ef5218, 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8,
+    0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
+    0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
+    0x748f82ee5defb2fc, 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec,
+    0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, 0xc67178f2e372532b,
+    0xca273eceea26619c, 0xd186b8c721c0c207, 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
+    0x06f067aa72176fba, 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b,
+    0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
+    0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, 0x5fcb6fab3ad6faec, 0x6c44198c4a475817,
+];
+
+/// Constants necessary for SHA-512 family of digests.
+pub const K64X2: [[u64; 2]; 40] = [
+    [K64[1],  K64[0]],  [K64[3],  K64[2]],  [K64[5],  K64[4]],  [K64[7],  K64[6]],
+    [K64[9],  K64[8]],  [K64[11], K64[10]], [K64[13], K64[12]], [K64[15], K64[14]],
+    [K64[17], K64[16]], [K64[19], K64[18]], [K64[21], K64[20]], [K64[23], K64[22]],
+    [K64[25], K64[24]], [K64[27], K64[26]], [K64[29], K64[28]], [K64[31], K64[30]],
+    [K64[33], K64[32]], [K64[35], K64[34]], [K64[37], K64[36]], [K64[39], K64[38]],
+    [K64[41], K64[40]], [K64[43], K64[42]], [K64[45], K64[44]], [K64[47], K64[46]],
+    [K64[49], K64[48]], [K64[51], K64[50]], [K64[53], K64[52]], [K64[55], K64[54]],
+    [K64[57], K64[56]], [K64[59], K64[58]], [K64[61], K64[60]], [K64[63], K64[62]],
+    [K64[65], K64[64]], [K64[67], K64[66]], [K64[69], K64[68]], [K64[71], K64[70]],
+    [K64[73], K64[72]], [K64[75], K64[74]], [K64[77], K64[76]], [K64[79], K64[78]],
+];
+
+pub const H256_224: State256 = [
+    0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
+    0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4,
+];
+
+pub const H256_256: State256 = [
+    0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
+];
+
+pub const H512_224: State512 = [
+    0x8c3d37c819544da2, 0x73e1996689dcd4d6, 0x1dfab7ae32ff9c82, 0x679dd514582f9fcf,
+    0x0f6d2b697bd44da8, 0x77e36f7304c48942, 0x3f9d85a86a1d36c8, 0x1112e6ad91d692a1,
+];
+
+pub const H512_256: State512 = [
+    0x22312194fc2bf72c, 0x9f555fa3c84c64c2, 0x2393b86b6f53b151, 0x963877195940eabd,
+    0x96283ee2a88effe3, 0xbe5e1e2553863992, 0x2b0199fc2c85b8aa, 0x0eb72ddc81c52ca2,
+];
+
+pub const H512_384: State512 = [
+    0xcbbb9d5dc1059ed8, 0x629a292a367cd507, 0x9159015a3070dd17, 0x152fecd8f70e5939,
+    0x67332667ffc00b31, 0x8eb44a8768581511, 0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4,
+];
+
+pub const H512_512: State512 = [
+    0x6a09e667f3bcc908, 0xbb67ae8584caa73b, 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
+    0x510e527fade682d1, 0x9b05688c2b3e6c1f, 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179,
+];
--- a/vendor/sha2/src/core_api.rs
+++ b/vendor/sha2/src/core_api.rs
@@ -0,0 +1,157 @@
+use crate::{consts, sha256::compress256, sha512::compress512};
+use core::{fmt, slice::from_ref};
+use digest::{
+    block_buffer::Eager,
+    core_api::{
+        AlgorithmName, Block, BlockSizeUser, Buffer, BufferKindUser, OutputSizeUser, TruncSide,
+        UpdateCore, VariableOutputCore,
+    },
+    typenum::{Unsigned, U128, U32, U64},
+    HashMarker, InvalidOutputSize, Output,
+};
+
+/// Core block-level SHA-256 hasher with variable output size.
+///
+/// Supports initialization only for 28 and 32 byte output sizes,
+/// i.e. 224 and 256 bits respectively.
+#[derive(Clone)]
+pub struct Sha256VarCore {
+    state: consts::State256,
+    block_len: u64,
+}
+
+impl HashMarker for Sha256VarCore {}
+
+impl BlockSizeUser for Sha256VarCore {
+    type BlockSize = U64;
+}
+
+impl BufferKindUser for Sha256VarCore {
+    type BufferKind = Eager;
+}
+
+impl UpdateCore for Sha256VarCore {
+    #[inline]
+    fn update_blocks(&mut self, blocks: &[Block<Self>]) {
+        self.block_len += blocks.len() as u64;
+        compress256(&mut self.state, blocks);
+    }
+}
+
+impl OutputSizeUser for Sha256VarCore {
+    type OutputSize = U32;
+}
+
+impl VariableOutputCore for Sha256VarCore {
+    const TRUNC_SIDE: TruncSide = TruncSide::Left;
+
+    #[inline]
+    fn new(output_size: usize) -> Result<Self, InvalidOutputSize> {
+        let state = match output_size {
+            28 => consts::H256_224,
+            32 => consts::H256_256,
+            _ => return Err(InvalidOutputSize),
+        };
+        let block_len = 0;
+        Ok(Self { state, block_len })
+    }
+
+    #[inline]
+    fn finalize_variable_core(&mut self, buffer: &mut Buffer<Self>, out: &mut Output<Self>) {
+        let bs = Self::BlockSize::U64;
+        let bit_len = 8 * (buffer.get_pos() as u64 + bs * self.block_len);
+        buffer.len64_padding_be(bit_len, |b| compress256(&mut self.state, from_ref(b)));
+
+        for (chunk, v) in out.chunks_exact_mut(4).zip(self.state.iter()) {
+            chunk.copy_from_slice(&v.to_be_bytes());
+        }
+    }
+}
+
+impl AlgorithmName for Sha256VarCore {
+    #[inline]
+    fn write_alg_name(f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str("Sha256")
+    }
+}
+
+impl fmt::Debug for Sha256VarCore {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str("Sha256VarCore { ... }")
+    }
+}
+
+/// Core block-level SHA-512 hasher with variable output size.
+///
+/// Supports initialization only for 28, 32, 48, and 64 byte output sizes,
+/// i.e. 224, 256, 384, and 512 bits respectively.
+#[derive(Clone)]
+pub struct Sha512VarCore {
+    state: consts::State512,
+    block_len: u128,
+}
+
+impl HashMarker for Sha512VarCore {}
+
+impl BlockSizeUser for Sha512VarCore {
+    type BlockSize = U128;
+}
+
+impl BufferKindUser for Sha512VarCore {
+    type BufferKind = Eager;
+}
+
+impl UpdateCore for Sha512VarCore {
+    #[inline]
+    fn update_blocks(&mut self, blocks: &[Block<Self>]) {
+        self.block_len += blocks.len() as u128;
+        compress512(&mut self.state, blocks);
+    }
+}
+
+impl OutputSizeUser for Sha512VarCore {
+    type OutputSize = U64;
+}
+
+impl VariableOutputCore for Sha512VarCore {
+    const TRUNC_SIDE: TruncSide = TruncSide::Left;
+
+    #[inline]
+    fn new(output_size: usize) -> Result<Self, InvalidOutputSize> {
+        let state = match output_size {
+            28 => consts::H512_224,
+            32 => consts::H512_256,
+            48 => consts::H512_384,
+            64 => consts::H512_512,
+            _ => return Err(InvalidOutputSize),
+        };
+        let block_len = 0;
+        Ok(Self { state, block_len })
+    }
+
+    #[inline]
+    fn finalize_variable_core(&mut self, buffer: &mut Buffer<Self>, out: &mut Output<Self>) {
+        let bs = Self::BlockSize::U64 as u128;
+        let bit_len = 8 * (buffer.get_pos() as u128 + bs * self.block_len);
+        buffer.len128_padding_be(bit_len, |b| compress512(&mut self.state, from_ref(b)));
+
+        for (chunk, v) in out.chunks_exact_mut(8).zip(self.state.iter()) {
+            chunk.copy_from_slice(&v.to_be_bytes());
+        }
+    }
+}
+
+impl AlgorithmName for Sha512VarCore {
+    #[inline]
+    fn write_alg_name(f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str("Sha512")
+    }
+}
+
+impl fmt::Debug for Sha512VarCore {
+    #[inline]
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str("Sha512VarCore { ... }")
+    }
+}
--- a/vendor/sha2/src/lib.rs
+++ b/vendor/sha2/src/lib.rs
@@ -0,0 +1,110 @@
+//! An implementation of the [SHA-2][1] cryptographic hash algorithms.
+//!
+//! There are 6 standard algorithms specified in the SHA-2 standard: [`Sha224`],
+//! [`Sha256`], [`Sha512_224`], [`Sha512_256`], [`Sha384`], and [`Sha512`].
+//!
+//! Algorithmically, there are only 2 core algorithms: SHA-256 and SHA-512.
+//! All other algorithms are just applications of these with different initial
+//! hash values, and truncated to different digest bit lengths. The first two
+//! algorithms in the list are based on SHA-256, while the last four are based
+//! on SHA-512.
+//!
+//! # Usage
+//!
+//! ## One-shot API
+//!
+//! ```rust
+//! use hex_literal::hex;
+//! use sha2::{Sha256, Digest};
+//!
+//! let result = Sha256::digest(b"hello world");
+//! assert_eq!(result[..], hex!("
+//!     b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9
+//! ")[..]);
+//! ```
+//!
+//! ## Incremental API
+//!
+//! ```rust
+//! use hex_literal::hex;
+//! use sha2::{Sha256, Sha512, Digest};
+//!
+//! // create a Sha256 object
+//! let mut hasher = Sha256::new();
+//!
+//! // write input message
+//! hasher.update(b"hello world");
+//!
+//! // read hash digest and consume hasher
+//! let result = hasher.finalize();
+//!
+//! assert_eq!(result[..], hex!("
+//!     b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9
+//! ")[..]);
+//!
+//! // same for Sha512
+//! let mut hasher = Sha512::new();
+//! hasher.update(b"hello world");
+//! let result = hasher.finalize();
+//!
+//! assert_eq!(result[..], hex!("
+//!     309ecc489c12d6eb4cc40f50c902f2b4d0ed77ee511a7c7a9bcd3ca86d4cd86f
+//!     989dd35bc5ff499670da34255b45b0cfd830e81f605dcf7dc5542e93ae9cd76f
+//! ")[..]);
+//! ```
+//!
+//! Also see [RustCrypto/hashes][2] readme.
+//!
+//! [1]: https://en.wikipedia.org/wiki/SHA-2
+//! [2]: https://github.com/RustCrypto/hashes
+
+#![no_std]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#![doc(
+    html_logo_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg",
+    html_favicon_url = "https://raw.githubusercontent.com/RustCrypto/media/6ee8e381/logo.svg"
+)]
+#![warn(missing_docs, rust_2018_idioms)]
+
+pub use digest::{self, Digest};
+
+#[cfg(feature = "oid")]
+use digest::const_oid::{AssociatedOid, ObjectIdentifier};
+use digest::{
+    consts::{U28, U32, U48, U64},
+    core_api::{CoreWrapper, CtVariableCoreWrapper},
+    impl_oid_carrier,
+};
+
+#[rustfmt::skip]
+mod consts;
+mod core_api;
+mod sha256;
+mod sha512;
+
+#[cfg(feature = "compress")]
+pub use sha256::compress256;
+#[cfg(feature = "compress")]
+pub use sha512::compress512;
+
+pub use core_api::{Sha256VarCore, Sha512VarCore};
+
+impl_oid_carrier!(OidSha256, "2.16.840.1.101.3.4.2.1");
+impl_oid_carrier!(OidSha384, "2.16.840.1.101.3.4.2.2");
+impl_oid_carrier!(OidSha512, "2.16.840.1.101.3.4.2.3");
+impl_oid_carrier!(OidSha224, "2.16.840.1.101.3.4.2.4");
+impl_oid_carrier!(OidSha512_224, "2.16.840.1.101.3.4.2.5");
+impl_oid_carrier!(OidSha512_256, "2.16.840.1.101.3.4.2.6");
+
+/// SHA-224 hasher.
+pub type Sha224 = CoreWrapper<CtVariableCoreWrapper<Sha256VarCore, U28, OidSha224>>;
+/// SHA-256 hasher.
+pub type Sha256 = CoreWrapper<CtVariableCoreWrapper<Sha256VarCore, U32, OidSha256>>;
+/// SHA-512/224 hasher.
+pub type Sha512_224 = CoreWrapper<CtVariableCoreWrapper<Sha512VarCore, U28, OidSha512_224>>;
+/// SHA-512/256 hasher.
+pub type Sha512_256 = CoreWrapper<CtVariableCoreWrapper<Sha512VarCore, U32, OidSha512_256>>;
+/// SHA-384 hasher.
+pub type Sha384 = CoreWrapper<CtVariableCoreWrapper<Sha512VarCore, U48, OidSha384>>;
+/// SHA-512 hasher.
+pub type Sha512 = CoreWrapper<CtVariableCoreWrapper<Sha512VarCore, U64, OidSha512>>;
--- a/vendor/sha2/src/sha256.rs
+++ b/vendor/sha2/src/sha256.rs
@@ -0,0 +1,43 @@
+use digest::{generic_array::GenericArray, typenum::U64};
+
+cfg_if::cfg_if! {
+    if #[cfg(feature = "force-soft-compact")] {
+        mod soft_compact;
+        use soft_compact::compress;
+    } else if #[cfg(feature = "force-soft")] {
+        mod soft;
+        use soft::compress;
+    } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+        #[cfg(not(feature = "asm"))]
+        mod soft;
+        #[cfg(feature = "asm")]
+        mod soft {
+            pub(crate) use sha2_asm::compress256 as compress;
+        }
+        mod x86;
+        use x86::compress;
+    } else if #[cfg(all(feature = "asm", target_arch = "aarch64"))] {
+        mod soft;
+        mod aarch64;
+        use aarch64::compress;
+    } else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] {
+        mod loongarch64_asm;
+        use loongarch64_asm::compress;
+    } else {
+        mod soft;
+        use soft::compress;
+    }
+}
+
+/// Raw SHA-256 compression function.
+///
+/// This is a low-level "hazmat" API which provides direct access to the core
+/// functionality of SHA-256.
+#[cfg_attr(docsrs, doc(cfg(feature = "compress")))]
+pub fn compress256(state: &mut [u32; 8], blocks: &[GenericArray<u8, U64>]) {
+    // SAFETY: GenericArray<u8, U64> and [u8; 64] have
+    // exactly the same memory layout
+    let p = blocks.as_ptr() as *const [u8; 64];
+    let blocks = unsafe { core::slice::from_raw_parts(p, blocks.len()) };
+    compress(state, blocks)
+}
--- a/vendor/sha2/src/sha256/aarch64.rs
+++ b/vendor/sha2/src/sha256/aarch64.rs
@@ -0,0 +1,159 @@
+//! SHA-256 `aarch64` backend.
+
+// Implementation adapted from mbedtls.
+
+// TODO: stdarch intrinsics: RustCrypto/hashes#257
+
+use core::arch::{aarch64::*, asm};
+
+use crate::consts::K32;
+
+cpufeatures::new!(sha2_hwcap, "sha2");
+
+pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+    // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
+    // after stabilization
+    if sha2_hwcap::get() {
+        unsafe { sha256_compress(state, blocks) }
+    } else {
+        super::soft::compress(state, blocks);
+    }
+}
+
+#[target_feature(enable = "sha2")]
+unsafe fn sha256_compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+    // SAFETY: Requires the sha2 feature.
+
+    // Load state into vectors.
+    let mut abcd = vld1q_u32(state[0..4].as_ptr());
+    let mut efgh = vld1q_u32(state[4..8].as_ptr());
+
+    // Iterate through the message blocks.
+    for block in blocks {
+        // Keep original state values.
+        let abcd_orig = abcd;
+        let efgh_orig = efgh;
+
+        // Load the message block into vectors, assuming little endianness.
+        let mut s0 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[0..16].as_ptr())));
+        let mut s1 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[16..32].as_ptr())));
+        let mut s2 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[32..48].as_ptr())));
+        let mut s3 = vreinterpretq_u32_u8(vrev32q_u8(vld1q_u8(block[48..64].as_ptr())));
+
+        // Rounds 0 to 3
+        let mut tmp = vaddq_u32(s0, vld1q_u32(&K32[0]));
+        let mut abcd_prev = abcd;
+        abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
+        efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
+
+        // Rounds 4 to 7
+        tmp = vaddq_u32(s1, vld1q_u32(&K32[4]));
+        abcd_prev = abcd;
+        abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
+        efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
+
+        // Rounds 8 to 11
+        tmp = vaddq_u32(s2, vld1q_u32(&K32[8]));
+        abcd_prev = abcd;
+        abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
+        efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
+
+        // Rounds 12 to 15
+        tmp = vaddq_u32(s3, vld1q_u32(&K32[12]));
+        abcd_prev = abcd;
+        abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
+        efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
+
+        for t in (16..64).step_by(16) {
+            // Rounds t to t + 3
+            s0 = vsha256su1q_u32(vsha256su0q_u32(s0, s1), s2, s3);
+            tmp = vaddq_u32(s0, vld1q_u32(&K32[t]));
+            abcd_prev = abcd;
+            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
+            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
+
+            // Rounds t + 4 to t + 7
+            s1 = vsha256su1q_u32(vsha256su0q_u32(s1, s2), s3, s0);
+            tmp = vaddq_u32(s1, vld1q_u32(&K32[t + 4]));
+            abcd_prev = abcd;
+            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
+            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
+
+            // Rounds t + 8 to t + 11
+            s2 = vsha256su1q_u32(vsha256su0q_u32(s2, s3), s0, s1);
+            tmp = vaddq_u32(s2, vld1q_u32(&K32[t + 8]));
+            abcd_prev = abcd;
+            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
+            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
+
+            // Rounds t + 12 to t + 15
+            s3 = vsha256su1q_u32(vsha256su0q_u32(s3, s0), s1, s2);
+            tmp = vaddq_u32(s3, vld1q_u32(&K32[t + 12]));
+            abcd_prev = abcd;
+            abcd = vsha256hq_u32(abcd_prev, efgh, tmp);
+            efgh = vsha256h2q_u32(efgh, abcd_prev, tmp);
+        }
+
+        // Add the block-specific state to the original state.
+        abcd = vaddq_u32(abcd, abcd_orig);
+        efgh = vaddq_u32(efgh, efgh_orig);
+    }
+
+    // Store vectors into state.
+    vst1q_u32(state[0..4].as_mut_ptr(), abcd);
+    vst1q_u32(state[4..8].as_mut_ptr(), efgh);
+}
+
+// TODO remove these polyfills once SHA2 intrinsics land
+
+#[inline(always)]
+unsafe fn vsha256hq_u32(
+    mut hash_efgh: uint32x4_t,
+    hash_abcd: uint32x4_t,
+    wk: uint32x4_t,
+) -> uint32x4_t {
+    asm!(
+        "SHA256H {:q}, {:q}, {:v}.4S",
+        inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk,
+        options(pure, nomem, nostack, preserves_flags)
+    );
+    hash_efgh
+}
+
+#[inline(always)]
+unsafe fn vsha256h2q_u32(
+    mut hash_efgh: uint32x4_t,
+    hash_abcd: uint32x4_t,
+    wk: uint32x4_t,
+) -> uint32x4_t {
+    asm!(
+        "SHA256H2 {:q}, {:q}, {:v}.4S",
+        inout(vreg) hash_efgh, in(vreg) hash_abcd, in(vreg) wk,
+        options(pure, nomem, nostack, preserves_flags)
+    );
+    hash_efgh
+}
+
+#[inline(always)]
+unsafe fn vsha256su0q_u32(mut w0_3: uint32x4_t, w4_7: uint32x4_t) -> uint32x4_t {
+    asm!(
+        "SHA256SU0 {:v}.4S, {:v}.4S",
+        inout(vreg) w0_3, in(vreg) w4_7,
+        options(pure, nomem, nostack, preserves_flags)
+    );
+    w0_3
+}
+
+#[inline(always)]
+unsafe fn vsha256su1q_u32(
+    mut tw0_3: uint32x4_t,
+    w8_11: uint32x4_t,
+    w12_15: uint32x4_t,
+) -> uint32x4_t {
+    asm!(
+        "SHA256SU1 {:v}.4S, {:v}.4S, {:v}.4S",
+        inout(vreg) tw0_3, in(vreg) w8_11, in(vreg) w12_15,
+        options(pure, nomem, nostack, preserves_flags)
+    );
+    tw0_3
+}
--- a/vendor/sha2/src/sha256/loongarch64_asm.rs
+++ b/vendor/sha2/src/sha256/loongarch64_asm.rs
@@ -0,0 +1,227 @@
+//! LoongArch64 assembly backend
+
+macro_rules! c {
+    ($($l:expr)*) => {
+        concat!($($l ,)*)
+    };
+}
+
+macro_rules! rounda {
+    ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+        c!(
+            "ld.w    $a5, $a1, (" $i " * 4);"
+            "revb.2h $a5, $a5;"
+            "rotri.w $a5, $a5, 16;"
+            roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
+        )
+    };
+}
+
+macro_rules! roundb {
+    ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+        c!(
+            "ld.w    $a4, $sp, (((" $i " - 15) & 0xF) * 4);"
+            "ld.w    $a5, $sp, (((" $i " - 16) & 0xF) * 4);"
+            "ld.w    $a6, $sp, (((" $i " -  7) & 0xF) * 4);"
+            "add.w   $a5, $a5, $a6;"
+            "rotri.w $a6, $a4, 18;"
+            "srli.w  $a7, $a4, 3;"
+            "rotri.w $a4, $a4, 7;"
+            "xor     $a6, $a6, $a7;"
+            "xor     $a4, $a4, $a6;"
+            "add.w   $a5, $a5, $a4;"
+            "ld.w    $a4, $sp, (((" $i " -  2) & 0xF) * 4);"
+            "rotri.w $a6, $a4, 19;"
+            "srli.w  $a7, $a4, 10;"
+            "rotri.w $a4, $a4, 17;"
+            "xor     $a6, $a6, $a7;"
+            "xor     $a4, $a4, $a6;"
+            "add.w   $a5, $a5, $a4;"
+            roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
+        )
+    };
+}
+
+macro_rules! roundtail {
+    ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+        c!(
+            // Part 0
+            "rotri.w $a6, " $e ", 11;"
+            "rotri.w $a7, " $e ", 25;"
+            "rotri.w $a4, " $e ", 6;"
+            "xor     $a6, $a6, $a7;"
+            "xor     $a4, $a4, $a6;"
+            "xor     $a6, " $g ", " $f ";"
+            "ld.w    $a7, $a3, " $i " * 4;"
+            "and     $a6, $a6, " $e ";"
+            "xor     $a6, $a6, " $g ";"
+            "add.w   $a4, $a4, $a6;"
+            "add.w   $a4, $a4, $a7;"
+            "add.w   " $h ", " $h ", $a5;"
+            "add.w   " $h ", " $h ", $a4;"
+            // Part 1
+            "add.w   " $d ", " $d ", " $h ";"
+            // Part 2
+            "rotri.w $a6, " $a ", 13;"
+            "rotri.w $a7, " $a ", 22;"
+            "rotri.w $a4, " $a ", 2;"
+            "xor     $a6, $a6, $a7;"
+            "xor     $a4, $a4, $a6;"
+            "add.w   " $h ", " $h ", $a4;"
+            "or      $a4, " $c ", " $b ";"
+            "and     $a6, " $c ", " $b ";"
+            "and     $a4, $a4, " $a ";"
+            "or      $a4, $a4, $a6;"
+            "add.w   " $h ", " $h ", $a4;"
+            "st.w    $a5, $sp, ((" $i " & 0xF) * 4);"
+        )
+    };
+}
+
+pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+    if blocks.is_empty() {
+        return;
+    }
+
+    unsafe {
+        core::arch::asm!(
+            // Allocate scratch stack space
+            "addi.d  $sp, $sp, -64;",
+
+            // Load state
+            "ld.w    $t0, $a0, 0",
+            "ld.w    $t1, $a0, 4",
+            "ld.w    $t2, $a0, 8",
+            "ld.w    $t3, $a0, 12",
+            "ld.w    $t4, $a0, 16",
+            "ld.w    $t5, $a0, 20",
+            "ld.w    $t6, $a0, 24",
+            "ld.w    $t7, $a0, 28",
+
+            "42:",
+
+            // Do 64 rounds of hashing
+            rounda!( 0, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            rounda!( 1, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            rounda!( 2, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            rounda!( 3, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            rounda!( 4, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            rounda!( 5, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            rounda!( 6, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            rounda!( 7, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            rounda!( 8, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            rounda!( 9, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            rounda!(10, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            rounda!(11, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            rounda!(12, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            rounda!(13, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            rounda!(14, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            rounda!(15, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(16, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(17, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(18, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(19, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(20, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(21, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(22, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(23, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(24, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(25, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(26, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(27, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(28, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(29, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(30, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(31, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(32, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(33, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(34, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(35, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(36, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(37, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(38, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(39, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(40, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(41, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(42, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(43, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(44, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(45, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(46, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(47, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(48, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(49, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(50, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(51, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(52, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(53, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(54, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(55, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(56, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(57, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(58, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(59, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(60, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(61, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(62, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(63, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+
+            // Update state registers
+            "ld.w    $a4, $a0, 0",  // a
+            "ld.w    $a5, $a0, 4",  // b
+            "ld.w    $a6, $a0, 8",  // c
+            "ld.w    $a7, $a0, 12", // d
+            "add.w   $t0, $t0, $a4",
+            "add.w   $t1, $t1, $a5",
+            "add.w   $t2, $t2, $a6",
+            "add.w   $t3, $t3, $a7",
+            "ld.w    $a4, $a0, 16", // e
+            "ld.w    $a5, $a0, 20", // f
+            "ld.w    $a6, $a0, 24", // g
+            "ld.w    $a7, $a0, 28", // h
+            "add.w   $t4, $t4, $a4",
+            "add.w   $t5, $t5, $a5",
+            "add.w   $t6, $t6, $a6",
+            "add.w   $t7, $t7, $a7",
+
+            // Save updated state
+            "st.w    $t0, $a0, 0",
+            "st.w    $t1, $a0, 4",
+            "st.w    $t2, $a0, 8",
+            "st.w    $t3, $a0, 12",
+            "st.w    $t4, $a0, 16",
+            "st.w    $t5, $a0, 20",
+            "st.w    $t6, $a0, 24",
+            "st.w    $t7, $a0, 28",
+
+            // Looping over blocks
+            "addi.d  $a1, $a1, 64",
+            "addi.d  $a2, $a2, -1",
+            "bnez    $a2, 42b",
+
+            // Restore stack register
+            "addi.d  $sp, $sp, 64",
+
+            in("$a0") state,
+            inout("$a1") blocks.as_ptr() => _,
+            inout("$a2") blocks.len() => _,
+            in("$a3") crate::consts::K32.as_ptr(),
+
+            // Clobbers
+            out("$a4") _,
+            out("$a5") _,
+            out("$a6") _,
+            out("$a7") _,
+            out("$t0") _,
+            out("$t1") _,
+            out("$t2") _,
+            out("$t3") _,
+            out("$t4") _,
+            out("$t5") _,
+            out("$t6") _,
+            out("$t7") _,
+
+            options(preserves_flags),
+        );
+    }
+}
--- a/vendor/sha2/src/sha256/soft.rs
+++ b/vendor/sha2/src/sha256/soft.rs
@@ -0,0 +1,218 @@
+#![allow(clippy::many_single_char_names)]
+use crate::consts::BLOCK_LEN;
+use core::convert::TryInto;
+
+#[inline(always)]
+fn shl(v: [u32; 4], o: u32) -> [u32; 4] {
+    [v[0] >> o, v[1] >> o, v[2] >> o, v[3] >> o]
+}
+
+#[inline(always)]
+fn shr(v: [u32; 4], o: u32) -> [u32; 4] {
+    [v[0] << o, v[1] << o, v[2] << o, v[3] << o]
+}
+
+#[inline(always)]
+fn or(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
+    [a[0] | b[0], a[1] | b[1], a[2] | b[2], a[3] | b[3]]
+}
+
+#[inline(always)]
+fn xor(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
+    [a[0] ^ b[0], a[1] ^ b[1], a[2] ^ b[2], a[3] ^ b[3]]
+}
+
+#[inline(always)]
+fn add(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
+    [
+        a[0].wrapping_add(b[0]),
+        a[1].wrapping_add(b[1]),
+        a[2].wrapping_add(b[2]),
+        a[3].wrapping_add(b[3]),
+    ]
+}
+
+fn sha256load(v2: [u32; 4], v3: [u32; 4]) -> [u32; 4] {
+    [v3[3], v2[0], v2[1], v2[2]]
+}
+
+fn sha256swap(v0: [u32; 4]) -> [u32; 4] {
+    [v0[2], v0[3], v0[0], v0[1]]
+}
+
+fn sha256msg1(v0: [u32; 4], v1: [u32; 4]) -> [u32; 4] {
+    // sigma 0 on vectors
+    #[inline]
+    fn sigma0x4(x: [u32; 4]) -> [u32; 4] {
+        let t1 = or(shl(x, 7), shr(x, 25));
+        let t2 = or(shl(x, 18), shr(x, 14));
+        let t3 = shl(x, 3);
+        xor(xor(t1, t2), t3)
+    }
+
+    add(v0, sigma0x4(sha256load(v0, v1)))
+}
+
+fn sha256msg2(v4: [u32; 4], v3: [u32; 4]) -> [u32; 4] {
+    macro_rules! sigma1 {
+        ($a:expr) => {
+            $a.rotate_right(17) ^ $a.rotate_right(19) ^ ($a >> 10)
+        };
+    }
+
+    let [x3, x2, x1, x0] = v4;
+    let [w15, w14, _, _] = v3;
+
+    let w16 = x0.wrapping_add(sigma1!(w14));
+    let w17 = x1.wrapping_add(sigma1!(w15));
+    let w18 = x2.wrapping_add(sigma1!(w16));
+    let w19 = x3.wrapping_add(sigma1!(w17));
+
+    [w19, w18, w17, w16]
+}
+
+fn sha256_digest_round_x2(cdgh: [u32; 4], abef: [u32; 4], wk: [u32; 4]) -> [u32; 4] {
+    macro_rules! big_sigma0 {
+        ($a:expr) => {
+            ($a.rotate_right(2) ^ $a.rotate_right(13) ^ $a.rotate_right(22))
+        };
+    }
+    macro_rules! big_sigma1 {
+        ($a:expr) => {
+            ($a.rotate_right(6) ^ $a.rotate_right(11) ^ $a.rotate_right(25))
+        };
+    }
+    macro_rules! bool3ary_202 {
+        ($a:expr, $b:expr, $c:expr) => {
+            $c ^ ($a & ($b ^ $c))
+        };
+    } // Choose, MD5F, SHA1C
+    macro_rules! bool3ary_232 {
+        ($a:expr, $b:expr, $c:expr) => {
+            ($a & $b) ^ ($a & $c) ^ ($b & $c)
+        };
+    } // Majority, SHA1M
+
+    let [_, _, wk1, wk0] = wk;
+    let [a0, b0, e0, f0] = abef;
+    let [c0, d0, g0, h0] = cdgh;
+
+    // a round
+    let x0 = big_sigma1!(e0)
+        .wrapping_add(bool3ary_202!(e0, f0, g0))
+        .wrapping_add(wk0)
+        .wrapping_add(h0);
+    let y0 = big_sigma0!(a0).wrapping_add(bool3ary_232!(a0, b0, c0));
+    let (a1, b1, c1, d1, e1, f1, g1, h1) = (
+        x0.wrapping_add(y0),
+        a0,
+        b0,
+        c0,
+        x0.wrapping_add(d0),
+        e0,
+        f0,
+        g0,
+    );
+
+    // a round
+    let x1 = big_sigma1!(e1)
+        .wrapping_add(bool3ary_202!(e1, f1, g1))
+        .wrapping_add(wk1)
+        .wrapping_add(h1);
+    let y1 = big_sigma0!(a1).wrapping_add(bool3ary_232!(a1, b1, c1));
+    let (a2, b2, _, _, e2, f2, _, _) = (
+        x1.wrapping_add(y1),
+        a1,
+        b1,
+        c1,
+        x1.wrapping_add(d1),
+        e1,
+        f1,
+        g1,
+    );
+
+    [a2, b2, e2, f2]
+}
+
+fn schedule(v0: [u32; 4], v1: [u32; 4], v2: [u32; 4], v3: [u32; 4]) -> [u32; 4] {
+    let t1 = sha256msg1(v0, v1);
+    let t2 = sha256load(v2, v3);
+    let t3 = add(t1, t2);
+    sha256msg2(t3, v3)
+}
+
+macro_rules! rounds4 {
+    ($abef:ident, $cdgh:ident, $rest:expr, $i:expr) => {{
+        let t1 = add($rest, crate::consts::K32X4[$i]);
+        $cdgh = sha256_digest_round_x2($cdgh, $abef, t1);
+        let t2 = sha256swap(t1);
+        $abef = sha256_digest_round_x2($abef, $cdgh, t2);
+    }};
+}
+
+macro_rules! schedule_rounds4 {
+    (
+        $abef:ident, $cdgh:ident,
+        $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
+        $i: expr
+    ) => {{
+        $w4 = schedule($w0, $w1, $w2, $w3);
+        rounds4!($abef, $cdgh, $w4, $i);
+    }};
+}
+
+/// Process a block with the SHA-256 algorithm.
+fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) {
+    let mut abef = [state[0], state[1], state[4], state[5]];
+    let mut cdgh = [state[2], state[3], state[6], state[7]];
+
+    // Rounds 0..64
+    let mut w0 = [block[3], block[2], block[1], block[0]];
+    let mut w1 = [block[7], block[6], block[5], block[4]];
+    let mut w2 = [block[11], block[10], block[9], block[8]];
+    let mut w3 = [block[15], block[14], block[13], block[12]];
+    let mut w4;
+
+    rounds4!(abef, cdgh, w0, 0);
+    rounds4!(abef, cdgh, w1, 1);
+    rounds4!(abef, cdgh, w2, 2);
+    rounds4!(abef, cdgh, w3, 3);
+    schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 4);
+    schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 5);
+    schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 6);
+    schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 7);
+    schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 8);
+    schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 9);
+    schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 10);
+    schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 11);
+    schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 12);
+    schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 13);
+    schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 14);
+    schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 15);
+
+    let [a, b, e, f] = abef;
+    let [c, d, g, h] = cdgh;
+
+    state[0] = state[0].wrapping_add(a);
+    state[1] = state[1].wrapping_add(b);
+    state[2] = state[2].wrapping_add(c);
+    state[3] = state[3].wrapping_add(d);
+    state[4] = state[4].wrapping_add(e);
+    state[5] = state[5].wrapping_add(f);
+    state[6] = state[6].wrapping_add(g);
+    state[7] = state[7].wrapping_add(h);
+}
+
+pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+    let mut block_u32 = [0u32; BLOCK_LEN];
+    // since LLVM can't properly use aliasing yet it will make
+    // unnecessary state stores without this copy
+    let mut state_cpy = *state;
+    for block in blocks {
+        for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(4)) {
+            *o = u32::from_be_bytes(chunk.try_into().unwrap());
+        }
+        sha256_digest_block_u32(&mut state_cpy, &block_u32);
+    }
+    *state = state_cpy;
+}
--- a/vendor/sha2/src/sha256/soft_compact.rs
+++ b/vendor/sha2/src/sha256/soft_compact.rs
@@ -0,0 +1,66 @@
+use crate::consts::K32;
+
+fn to_u32s(block: &[u8; 64]) -> [u32; 16] {
+    use core::convert::TryInto;
+    let mut res = [0u32; 16];
+    for i in 0..16 {
+        let chunk = block[4 * i..][..4].try_into().unwrap();
+        res[i] = u32::from_be_bytes(chunk);
+    }
+    res
+}
+
+fn compress_u32(state: &mut [u32; 8], block: [u32; 16]) {
+    let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h] = *state;
+
+    let mut w = [0; 64];
+    w[..16].copy_from_slice(&block);
+
+    for i in 16..64 {
+        let w15 = w[i - 15];
+        let s0 = (w15.rotate_right(7)) ^ (w15.rotate_right(18)) ^ (w15 >> 3);
+        let w2 = w[i - 2];
+        let s1 = (w2.rotate_right(17)) ^ (w2.rotate_right(19)) ^ (w2 >> 10);
+        w[i] = w[i - 16]
+            .wrapping_add(s0)
+            .wrapping_add(w[i - 7])
+            .wrapping_add(s1);
+    }
+
+    for i in 0..64 {
+        let s1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
+        let ch = (e & f) ^ ((!e) & g);
+        let t1 = s1
+            .wrapping_add(ch)
+            .wrapping_add(K32[i])
+            .wrapping_add(w[i])
+            .wrapping_add(h);
+        let s0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
+        let maj = (a & b) ^ (a & c) ^ (b & c);
+        let t2 = s0.wrapping_add(maj);
+
+        h = g;
+        g = f;
+        f = e;
+        e = d.wrapping_add(t1);
+        d = c;
+        c = b;
+        b = a;
+        a = t1.wrapping_add(t2);
+    }
+
+    state[0] = state[0].wrapping_add(a);
+    state[1] = state[1].wrapping_add(b);
+    state[2] = state[2].wrapping_add(c);
+    state[3] = state[3].wrapping_add(d);
+    state[4] = state[4].wrapping_add(e);
+    state[5] = state[5].wrapping_add(f);
+    state[6] = state[6].wrapping_add(g);
+    state[7] = state[7].wrapping_add(h);
+}
+
+pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+    for block in blocks.iter() {
+        compress_u32(state, to_u32s(block));
+    }
+}
--- a/vendor/sha2/src/sha256/x86.rs
+++ b/vendor/sha2/src/sha256/x86.rs
@@ -0,0 +1,112 @@
+//! SHA-256 `x86`/`x86_64` backend
+
+#![allow(clippy::many_single_char_names)]
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::*;
+
+unsafe fn schedule(v0: __m128i, v1: __m128i, v2: __m128i, v3: __m128i) -> __m128i {
+    let t1 = _mm_sha256msg1_epu32(v0, v1);
+    let t2 = _mm_alignr_epi8(v3, v2, 4);
+    let t3 = _mm_add_epi32(t1, t2);
+    _mm_sha256msg2_epu32(t3, v3)
+}
+
+macro_rules! rounds4 {
+    ($abef:ident, $cdgh:ident, $rest:expr, $i:expr) => {{
+        let k = crate::consts::K32X4[$i];
+        let kv = _mm_set_epi32(k[0] as i32, k[1] as i32, k[2] as i32, k[3] as i32);
+        let t1 = _mm_add_epi32($rest, kv);
+        $cdgh = _mm_sha256rnds2_epu32($cdgh, $abef, t1);
+        let t2 = _mm_shuffle_epi32(t1, 0x0E);
+        $abef = _mm_sha256rnds2_epu32($abef, $cdgh, t2);
+    }};
+}
+
+macro_rules! schedule_rounds4 {
+    (
+        $abef:ident, $cdgh:ident,
+        $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
+        $i: expr
+    ) => {{
+        $w4 = schedule($w0, $w1, $w2, $w3);
+        rounds4!($abef, $cdgh, $w4, $i);
+    }};
+}
+
+// we use unaligned loads with `__m128i` pointers
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
+unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+    #[allow(non_snake_case)]
+    let MASK: __m128i = _mm_set_epi64x(
+        0x0C0D_0E0F_0809_0A0Bu64 as i64,
+        0x0405_0607_0001_0203u64 as i64,
+    );
+
+    let state_ptr = state.as_ptr() as *const __m128i;
+    let dcba = _mm_loadu_si128(state_ptr.add(0));
+    let efgh = _mm_loadu_si128(state_ptr.add(1));
+
+    let cdab = _mm_shuffle_epi32(dcba, 0xB1);
+    let efgh = _mm_shuffle_epi32(efgh, 0x1B);
+    let mut abef = _mm_alignr_epi8(cdab, efgh, 8);
+    let mut cdgh = _mm_blend_epi16(efgh, cdab, 0xF0);
+
+    for block in blocks {
+        let abef_save = abef;
+        let cdgh_save = cdgh;
+
+        let data_ptr = block.as_ptr() as *const __m128i;
+        let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(0)), MASK);
+        let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(1)), MASK);
+        let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(2)), MASK);
+        let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(3)), MASK);
+        let mut w4;
+
+        rounds4!(abef, cdgh, w0, 0);
+        rounds4!(abef, cdgh, w1, 1);
+        rounds4!(abef, cdgh, w2, 2);
+        rounds4!(abef, cdgh, w3, 3);
+        schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 4);
+        schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 5);
+        schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 6);
+        schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 7);
+        schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 8);
+        schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 9);
+        schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 10);
+        schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 11);
+        schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 12);
+        schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 13);
+        schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 14);
+        schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 15);
+
+        abef = _mm_add_epi32(abef, abef_save);
+        cdgh = _mm_add_epi32(cdgh, cdgh_save);
+    }
+
+    let feba = _mm_shuffle_epi32(abef, 0x1B);
+    let dchg = _mm_shuffle_epi32(cdgh, 0xB1);
+    let dcba = _mm_blend_epi16(feba, dchg, 0xF0);
+    let hgef = _mm_alignr_epi8(dchg, feba, 8);
+
+    let state_ptr_mut = state.as_mut_ptr() as *mut __m128i;
+    _mm_storeu_si128(state_ptr_mut.add(0), dcba);
+    _mm_storeu_si128(state_ptr_mut.add(1), hgef);
+}
+
+cpufeatures::new!(shani_cpuid, "sha", "sse2", "ssse3", "sse4.1");
+
+pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+    // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
+    // after stabilization
+    if shani_cpuid::get() {
+        unsafe {
+            digest_blocks(state, blocks);
+        }
+    } else {
+        super::soft::compress(state, blocks);
+    }
+}
--- a/vendor/sha2/src/sha512.rs
+++ b/vendor/sha2/src/sha512.rs
@@ -0,0 +1,45 @@
+use digest::{generic_array::GenericArray, typenum::U128};
+
+cfg_if::cfg_if! {
+    if #[cfg(feature = "force-soft-compact")] {
+        mod soft_compact;
+        use soft_compact::compress;
+    } else if #[cfg(feature = "force-soft")] {
+        mod soft;
+        use soft::compress;
+    } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+        #[cfg(not(feature = "asm"))]
+        mod soft;
+        #[cfg(feature = "asm")]
+        mod soft {
+            pub(crate) fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
+                sha2_asm::compress512(state, blocks);
+            }
+        }
+        mod x86;
+        use x86::compress;
+    } else if #[cfg(all(feature = "asm", target_arch = "aarch64"))] {
+        mod soft;
+        mod aarch64;
+        use aarch64::compress;
+    } else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] {
+        mod loongarch64_asm;
+        use loongarch64_asm::compress;
+    } else {
+        mod soft;
+        use soft::compress;
+    }
+}
+
+/// Raw SHA-512 compression function.
+///
+/// This is a low-level "hazmat" API which provides direct access to the core
+/// functionality of SHA-512.
+#[cfg_attr(docsrs, doc(cfg(feature = "compress")))]
+pub fn compress512(state: &mut [u64; 8], blocks: &[GenericArray<u8, U128>]) {
+    // SAFETY: GenericArray<u8, U64> and [u8; 64] have
+    // exactly the same memory layout
+    let p = blocks.as_ptr() as *const [u8; 128];
+    let blocks = unsafe { core::slice::from_raw_parts(p, blocks.len()) };
+    compress(state, blocks)
+}
--- a/vendor/sha2/src/sha512/aarch64.rs
+++ b/vendor/sha2/src/sha512/aarch64.rs
@@ -0,0 +1,235 @@
+// Implementation adapted from mbedtls.
+
+use core::arch::{aarch64::*, asm};
+
+use crate::consts::K64;
+
+cpufeatures::new!(sha3_hwcap, "sha3");
+
+pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
+    // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
+    // after stabilization
+    if sha3_hwcap::get() {
+        unsafe { sha512_compress(state, blocks) }
+    } else {
+        super::soft::compress(state, blocks);
+    }
+}
+
+#[target_feature(enable = "sha3")]
+unsafe fn sha512_compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
+    // SAFETY: Requires the sha3 feature.
+
+    // Load state into vectors.
+    let mut ab = vld1q_u64(state[0..2].as_ptr());
+    let mut cd = vld1q_u64(state[2..4].as_ptr());
+    let mut ef = vld1q_u64(state[4..6].as_ptr());
+    let mut gh = vld1q_u64(state[6..8].as_ptr());
+
+    // Iterate through the message blocks.
+    for block in blocks {
+        // Keep original state values.
+        let ab_orig = ab;
+        let cd_orig = cd;
+        let ef_orig = ef;
+        let gh_orig = gh;
+
+        // Load the message block into vectors, assuming little endianness.
+        let mut s0 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[0..16].as_ptr())));
+        let mut s1 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[16..32].as_ptr())));
+        let mut s2 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[32..48].as_ptr())));
+        let mut s3 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[48..64].as_ptr())));
+        let mut s4 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[64..80].as_ptr())));
+        let mut s5 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[80..96].as_ptr())));
+        let mut s6 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[96..112].as_ptr())));
+        let mut s7 = vreinterpretq_u64_u8(vrev64q_u8(vld1q_u8(block[112..128].as_ptr())));
+
+        // Rounds 0 and 1
+        let mut initial_sum = vaddq_u64(s0, vld1q_u64(&K64[0]));
+        let mut sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
+        let mut intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
+        gh = vsha512h2q_u64(intermed, cd, ab);
+        cd = vaddq_u64(cd, intermed);
+
+        // Rounds 2 and 3
+        initial_sum = vaddq_u64(s1, vld1q_u64(&K64[2]));
+        sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
+        intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
+        ef = vsha512h2q_u64(intermed, ab, gh);
+        ab = vaddq_u64(ab, intermed);
+
+        // Rounds 4 and 5
+        initial_sum = vaddq_u64(s2, vld1q_u64(&K64[4]));
+        sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
+        intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
+        cd = vsha512h2q_u64(intermed, gh, ef);
+        gh = vaddq_u64(gh, intermed);
+
+        // Rounds 6 and 7
+        initial_sum = vaddq_u64(s3, vld1q_u64(&K64[6]));
+        sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
+        intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
+        ab = vsha512h2q_u64(intermed, ef, cd);
+        ef = vaddq_u64(ef, intermed);
+
+        // Rounds 8 and 9
+        initial_sum = vaddq_u64(s4, vld1q_u64(&K64[8]));
+        sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
+        intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
+        gh = vsha512h2q_u64(intermed, cd, ab);
+        cd = vaddq_u64(cd, intermed);
+
+        // Rounds 10 and 11
+        initial_sum = vaddq_u64(s5, vld1q_u64(&K64[10]));
+        sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
+        intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
+        ef = vsha512h2q_u64(intermed, ab, gh);
+        ab = vaddq_u64(ab, intermed);
+
+        // Rounds 12 and 13
+        initial_sum = vaddq_u64(s6, vld1q_u64(&K64[12]));
+        sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
+        intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
+        cd = vsha512h2q_u64(intermed, gh, ef);
+        gh = vaddq_u64(gh, intermed);
+
+        // Rounds 14 and 15
+        initial_sum = vaddq_u64(s7, vld1q_u64(&K64[14]));
+        sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
+        intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
+        ab = vsha512h2q_u64(intermed, ef, cd);
+        ef = vaddq_u64(ef, intermed);
+
+        for t in (16..80).step_by(16) {
+            // Rounds t and t + 1
+            s0 = vsha512su1q_u64(vsha512su0q_u64(s0, s1), s7, vextq_u64(s4, s5, 1));
+            initial_sum = vaddq_u64(s0, vld1q_u64(&K64[t]));
+            sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
+            intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
+            gh = vsha512h2q_u64(intermed, cd, ab);
+            cd = vaddq_u64(cd, intermed);
+
+            // Rounds t + 2 and t + 3
+            s1 = vsha512su1q_u64(vsha512su0q_u64(s1, s2), s0, vextq_u64(s5, s6, 1));
+            initial_sum = vaddq_u64(s1, vld1q_u64(&K64[t + 2]));
+            sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
+            intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
+            ef = vsha512h2q_u64(intermed, ab, gh);
+            ab = vaddq_u64(ab, intermed);
+
+            // Rounds t + 4 and t + 5
+            s2 = vsha512su1q_u64(vsha512su0q_u64(s2, s3), s1, vextq_u64(s6, s7, 1));
+            initial_sum = vaddq_u64(s2, vld1q_u64(&K64[t + 4]));
+            sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
+            intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
+            cd = vsha512h2q_u64(intermed, gh, ef);
+            gh = vaddq_u64(gh, intermed);
+
+            // Rounds t + 6 and t + 7
+            s3 = vsha512su1q_u64(vsha512su0q_u64(s3, s4), s2, vextq_u64(s7, s0, 1));
+            initial_sum = vaddq_u64(s3, vld1q_u64(&K64[t + 6]));
+            sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
+            intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
+            ab = vsha512h2q_u64(intermed, ef, cd);
+            ef = vaddq_u64(ef, intermed);
+
+            // Rounds t + 8 and t + 9
+            s4 = vsha512su1q_u64(vsha512su0q_u64(s4, s5), s3, vextq_u64(s0, s1, 1));
+            initial_sum = vaddq_u64(s4, vld1q_u64(&K64[t + 8]));
+            sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), gh);
+            intermed = vsha512hq_u64(sum, vextq_u64(ef, gh, 1), vextq_u64(cd, ef, 1));
+            gh = vsha512h2q_u64(intermed, cd, ab);
+            cd = vaddq_u64(cd, intermed);
+
+            // Rounds t + 10 and t + 11
+            s5 = vsha512su1q_u64(vsha512su0q_u64(s5, s6), s4, vextq_u64(s1, s2, 1));
+            initial_sum = vaddq_u64(s5, vld1q_u64(&K64[t + 10]));
+            sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ef);
+            intermed = vsha512hq_u64(sum, vextq_u64(cd, ef, 1), vextq_u64(ab, cd, 1));
+            ef = vsha512h2q_u64(intermed, ab, gh);
+            ab = vaddq_u64(ab, intermed);
+
+            // Rounds t + 12 and t + 13
+            s6 = vsha512su1q_u64(vsha512su0q_u64(s6, s7), s5, vextq_u64(s2, s3, 1));
+            initial_sum = vaddq_u64(s6, vld1q_u64(&K64[t + 12]));
+            sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), cd);
+            intermed = vsha512hq_u64(sum, vextq_u64(ab, cd, 1), vextq_u64(gh, ab, 1));
+            cd = vsha512h2q_u64(intermed, gh, ef);
+            gh = vaddq_u64(gh, intermed);
+
+            // Rounds t + 14 and t + 15
+            s7 = vsha512su1q_u64(vsha512su0q_u64(s7, s0), s6, vextq_u64(s3, s4, 1));
+            initial_sum = vaddq_u64(s7, vld1q_u64(&K64[t + 14]));
+            sum = vaddq_u64(vextq_u64(initial_sum, initial_sum, 1), ab);
+            intermed = vsha512hq_u64(sum, vextq_u64(gh, ab, 1), vextq_u64(ef, gh, 1));
+            ab = vsha512h2q_u64(intermed, ef, cd);
+            ef = vaddq_u64(ef, intermed);
+        }
+
+        // Add the block-specific state to the original state.
+        ab = vaddq_u64(ab, ab_orig);
+        cd = vaddq_u64(cd, cd_orig);
+        ef = vaddq_u64(ef, ef_orig);
+        gh = vaddq_u64(gh, gh_orig);
+    }
+
+    // Store vectors into state.
+    vst1q_u64(state[0..2].as_mut_ptr(), ab);
+    vst1q_u64(state[2..4].as_mut_ptr(), cd);
+    vst1q_u64(state[4..6].as_mut_ptr(), ef);
+    vst1q_u64(state[6..8].as_mut_ptr(), gh);
+}
+
+// TODO remove these polyfills once SHA3 intrinsics land
+
+#[inline(always)]
+unsafe fn vsha512hq_u64(
+    mut hash_ed: uint64x2_t,
+    hash_gf: uint64x2_t,
+    kwh_kwh2: uint64x2_t,
+) -> uint64x2_t {
+    asm!(
+        "SHA512H {:q}, {:q}, {:v}.2D",
+        inout(vreg) hash_ed, in(vreg) hash_gf, in(vreg) kwh_kwh2,
+        options(pure, nomem, nostack, preserves_flags)
+    );
+    hash_ed
+}
+
+#[inline(always)]
+unsafe fn vsha512h2q_u64(
+    mut sum_ab: uint64x2_t,
+    hash_c_: uint64x2_t,
+    hash_ab: uint64x2_t,
+) -> uint64x2_t {
+    asm!(
+        "SHA512H2 {:q}, {:q}, {:v}.2D",
+        inout(vreg) sum_ab, in(vreg) hash_c_, in(vreg) hash_ab,
+        options(pure, nomem, nostack, preserves_flags)
+    );
+    sum_ab
+}
+
+#[inline(always)]
+unsafe fn vsha512su0q_u64(mut w0_1: uint64x2_t, w2_: uint64x2_t) -> uint64x2_t {
+    asm!(
+        "SHA512SU0 {:v}.2D, {:v}.2D",
+        inout(vreg) w0_1, in(vreg) w2_,
+        options(pure, nomem, nostack, preserves_flags)
+    );
+    w0_1
+}
+
+#[inline(always)]
+unsafe fn vsha512su1q_u64(
+    mut s01_s02: uint64x2_t,
+    w14_15: uint64x2_t,
+    w9_10: uint64x2_t,
+) -> uint64x2_t {
+    asm!(
+        "SHA512SU1 {:v}.2D, {:v}.2D, {:v}.2D",
+        inout(vreg) s01_s02, in(vreg) w14_15, in(vreg) w9_10,
+        options(pure, nomem, nostack, preserves_flags)
+    );
+    s01_s02
+}
--- a/vendor/sha2/src/sha512/loongarch64_asm.rs
+++ b/vendor/sha2/src/sha512/loongarch64_asm.rs
@@ -0,0 +1,242 @@
+//! LoongArch64 assembly backend
+
+macro_rules! c {
+    ($($l:expr)*) => {
+        concat!($($l ,)*)
+    };
+}
+
+macro_rules! rounda {
+    ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+        c!(
+            "ld.d    $a5, $a1, (" $i " * 8);"
+            "revb.d  $a5, $a5;"
+            roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
+        )
+    };
+}
+
+macro_rules! roundb {
+    ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+        c!(
+            "ld.d    $a4, $sp, (((" $i " - 15) & 0xF) * 8);"
+            "ld.d    $a5, $sp, (((" $i " - 16) & 0xF) * 8);"
+            "ld.d    $a6, $sp, (((" $i " -  7) & 0xF) * 8);"
+            "add.d   $a5, $a5, $a6;"
+            "rotri.d $a6, $a4, 8;"
+            "srli.d  $a7, $a4, 7;"
+            "rotri.d $a4, $a4, 1;"
+            "xor     $a6, $a6, $a7;"
+            "xor     $a4, $a4, $a6;"
+            "add.d   $a5, $a5, $a4;"
+            "ld.d    $a4, $sp, (((" $i " -  2) & 0xF) * 8);"
+            "rotri.d $a6, $a4, 61;"
+            "srli.d  $a7, $a4,  6;"
+            "rotri.d $a4, $a4, 19;"
+            "xor     $a6, $a6, $a7;"
+            "xor     $a4, $a4, $a6;"
+            "add.d   $a5, $a5, $a4;"
+            roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
+        )
+    };
+}
+
+macro_rules! roundtail {
+    ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+        c!(
+            // Part 0
+            "rotri.d $a6, " $e ", 18;"
+            "rotri.d $a7, " $e ", 41;"
+            "rotri.d $a4, " $e ", 14;"
+            "xor     $a6, $a6, $a7;"
+            "xor     $a4, $a4, $a6;"
+            "xor     $a6, " $g ", " $f ";"
+            "ld.d    $a7, $a3, " $i " * 8;"
+            "and     $a6, $a6, " $e ";"
+            "xor     $a6, $a6, " $g ";"
+            "add.d   $a4, $a4, $a6;"
+            "add.d   $a4, $a4, $a7;"
+            "add.d   " $h ", " $h ", $a5;"
+            "add.d   " $h ", " $h ", $a4;"
+            // Part 1
+            "add.d   " $d ", " $d ", " $h ";"
+            // Part 2
+            "rotri.d $a6, " $a ", 39;"
+            "rotri.d $a7, " $a ", 34;"
+            "rotri.d $a4, " $a ", 28;"
+            "xor     $a6, $a6, $a7;"
+            "xor     $a4, $a4, $a6;"
+            "add.d   " $h ", " $h ", $a4;"
+            "or      $a4, " $c ", " $b ";"
+            "and     $a6, " $c ", " $b ";"
+            "and     $a4, $a4, " $a ";"
+            "or      $a4, $a4, $a6;"
+            "add.d   " $h ", " $h ", $a4;"
+            "st.d    $a5, $sp, ((" $i " & 0xF) * 8);"
+        )
+    };
+}
+
+pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
+    if blocks.is_empty() {
+        return;
+    }
+
+    unsafe {
+        core::arch::asm!(
+            // Allocate scratch stack space
+            "addi.d  $sp, $sp, -128;",
+
+            // Load state
+            "ld.d    $t0, $a0, 0",
+            "ld.d    $t1, $a0, 8",
+            "ld.d    $t2, $a0, 16",
+            "ld.d    $t3, $a0, 24",
+            "ld.d    $t4, $a0, 32",
+            "ld.d    $t5, $a0, 40",
+            "ld.d    $t6, $a0, 48",
+            "ld.d    $t7, $a0, 56",
+
+            "42:",
+
+            // Do 64 rounds of hashing
+            rounda!( 0, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            rounda!( 1, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            rounda!( 2, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            rounda!( 3, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            rounda!( 4, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            rounda!( 5, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            rounda!( 6, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            rounda!( 7, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            rounda!( 8, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            rounda!( 9, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            rounda!(10, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            rounda!(11, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            rounda!(12, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            rounda!(13, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            rounda!(14, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            rounda!(15, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(16, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(17, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(18, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(19, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(20, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(21, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(22, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(23, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(24, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(25, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(26, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(27, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(28, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(29, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(30, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(31, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(32, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(33, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(34, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(35, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(36, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(37, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(38, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(39, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(40, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(41, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(42, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(43, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(44, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(45, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(46, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(47, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(48, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(49, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(50, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(51, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(52, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(53, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(54, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(55, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(56, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(57, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(58, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(59, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(60, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(61, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(62, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(63, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(64, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(65, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(66, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(67, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(68, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(69, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(70, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(71, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+            roundb!(72, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+            roundb!(73, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+            roundb!(74, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+            roundb!(75, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+            roundb!(76, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+            roundb!(77, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+            roundb!(78, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+            roundb!(79, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+
+            // Update state registers
+            "ld.d    $a4, $a0, 0",  // a
+            "ld.d    $a5, $a0, 8",  // b
+            "ld.d    $a6, $a0, 16",  // c
+            "ld.d    $a7, $a0, 24", // d
+            "add.d   $t0, $t0, $a4",
+            "add.d   $t1, $t1, $a5",
+            "add.d   $t2, $t2, $a6",
+            "add.d   $t3, $t3, $a7",
+            "ld.d    $a4, $a0, 32", // e
+            "ld.d    $a5, $a0, 40", // f
+            "ld.d    $a6, $a0, 48", // g
+            "ld.d    $a7, $a0, 56", // h
+            "add.d   $t4, $t4, $a4",
+            "add.d   $t5, $t5, $a5",
+            "add.d   $t6, $t6, $a6",
+            "add.d   $t7, $t7, $a7",
+
+            // Save updated state
+            "st.d    $t0, $a0, 0",
+            "st.d    $t1, $a0, 8",
+            "st.d    $t2, $a0, 16",
+            "st.d    $t3, $a0, 24",
+            "st.d    $t4, $a0, 32",
+            "st.d    $t5, $a0, 40",
+            "st.d    $t6, $a0, 48",
+            "st.d    $t7, $a0, 56",
+
+            // Looping over blocks
+            "addi.d  $a1, $a1, 128",
+            "addi.d  $a2, $a2, -1",
+            "bnez    $a2, 42b",
+
+            // Restore stack register
+            "addi.d  $sp, $sp, 128",
+
+            in("$a0") state,
+            inout("$a1") blocks.as_ptr() => _,
+            inout("$a2") blocks.len() => _,
+            in("$a3") crate::consts::K64.as_ptr(),
+
+            // Clobbers
+            out("$a4") _,
+            out("$a5") _,
+            out("$a6") _,
+            out("$a7") _,
+            out("$t0") _,
+            out("$t1") _,
+            out("$t2") _,
+            out("$t3") _,
+            out("$t4") _,
+            out("$t5") _,
+            out("$t6") _,
+            out("$t7") _,
+
+            options(preserves_flags),
+        );
+    }
+}
--- a/vendor/sha2/src/sha512/soft.rs
+++ b/vendor/sha2/src/sha512/soft.rs
@@ -0,0 +1,215 @@
+#![allow(clippy::many_single_char_names)]
+use crate::consts::{BLOCK_LEN, K64X2};
+use core::convert::TryInto;
+
+fn add(a: [u64; 2], b: [u64; 2]) -> [u64; 2] {
+    [a[0].wrapping_add(b[0]), a[1].wrapping_add(b[1])]
+}
+
+/// Not an intrinsic, but works like an unaligned load.
+fn sha512load(v0: [u64; 2], v1: [u64; 2]) -> [u64; 2] {
+    [v1[1], v0[0]]
+}
+
+/// Performs 2 rounds of the SHA-512 message schedule update.
+pub fn sha512_schedule_x2(v0: [u64; 2], v1: [u64; 2], v4to5: [u64; 2], v7: [u64; 2]) -> [u64; 2] {
+    // sigma 0
+    fn sigma0(x: u64) -> u64 {
+        ((x << 63) | (x >> 1)) ^ ((x << 56) | (x >> 8)) ^ (x >> 7)
+    }
+
+    // sigma 1
+    fn sigma1(x: u64) -> u64 {
+        ((x << 45) | (x >> 19)) ^ ((x << 3) | (x >> 61)) ^ (x >> 6)
+    }
+
+    let [w1, w0] = v0;
+    let [_, w2] = v1;
+    let [w10, w9] = v4to5;
+    let [w15, w14] = v7;
+
+    let w16 = sigma1(w14)
+        .wrapping_add(w9)
+        .wrapping_add(sigma0(w1))
+        .wrapping_add(w0);
+    let w17 = sigma1(w15)
+        .wrapping_add(w10)
+        .wrapping_add(sigma0(w2))
+        .wrapping_add(w1);
+
+    [w17, w16]
+}
+
+/// Performs one round of the SHA-512 message block digest.
+pub fn sha512_digest_round(
+    ae: [u64; 2],
+    bf: [u64; 2],
+    cg: [u64; 2],
+    dh: [u64; 2],
+    wk0: u64,
+) -> [u64; 2] {
+    macro_rules! big_sigma0 {
+        ($a:expr) => {
+            ($a.rotate_right(28) ^ $a.rotate_right(34) ^ $a.rotate_right(39))
+        };
+    }
+    macro_rules! big_sigma1 {
+        ($a:expr) => {
+            ($a.rotate_right(14) ^ $a.rotate_right(18) ^ $a.rotate_right(41))
+        };
+    }
+    macro_rules! bool3ary_202 {
+        ($a:expr, $b:expr, $c:expr) => {
+            $c ^ ($a & ($b ^ $c))
+        };
+    } // Choose, MD5F, SHA1C
+    macro_rules! bool3ary_232 {
+        ($a:expr, $b:expr, $c:expr) => {
+            ($a & $b) ^ ($a & $c) ^ ($b & $c)
+        };
+    } // Majority, SHA1M
+
+    let [a0, e0] = ae;
+    let [b0, f0] = bf;
+    let [c0, g0] = cg;
+    let [d0, h0] = dh;
+
+    // a round
+    let x0 = big_sigma1!(e0)
+        .wrapping_add(bool3ary_202!(e0, f0, g0))
+        .wrapping_add(wk0)
+        .wrapping_add(h0);
+    let y0 = big_sigma0!(a0).wrapping_add(bool3ary_232!(a0, b0, c0));
+    let (a1, _, _, _, e1, _, _, _) = (
+        x0.wrapping_add(y0),
+        a0,
+        b0,
+        c0,
+        x0.wrapping_add(d0),
+        e0,
+        f0,
+        g0,
+    );
+
+    [a1, e1]
+}
+
+/// Process a block with the SHA-512 algorithm.
+pub fn sha512_digest_block_u64(state: &mut [u64; 8], block: &[u64; 16]) {
+    let k = &K64X2;
+
+    macro_rules! schedule {
+        ($v0:expr, $v1:expr, $v4:expr, $v5:expr, $v7:expr) => {
+            sha512_schedule_x2($v0, $v1, sha512load($v4, $v5), $v7)
+        };
+    }
+
+    macro_rules! rounds4 {
+        ($ae:ident, $bf:ident, $cg:ident, $dh:ident, $wk0:expr, $wk1:expr) => {{
+            let [u, t] = $wk0;
+            let [w, v] = $wk1;
+
+            $dh = sha512_digest_round($ae, $bf, $cg, $dh, t);
+            $cg = sha512_digest_round($dh, $ae, $bf, $cg, u);
+            $bf = sha512_digest_round($cg, $dh, $ae, $bf, v);
+            $ae = sha512_digest_round($bf, $cg, $dh, $ae, w);
+        }};
+    }
+
+    let mut ae = [state[0], state[4]];
+    let mut bf = [state[1], state[5]];
+    let mut cg = [state[2], state[6]];
+    let mut dh = [state[3], state[7]];
+
+    // Rounds 0..20
+    let (mut w1, mut w0) = ([block[3], block[2]], [block[1], block[0]]);
+    rounds4!(ae, bf, cg, dh, add(k[0], w0), add(k[1], w1));
+    let (mut w3, mut w2) = ([block[7], block[6]], [block[5], block[4]]);
+    rounds4!(ae, bf, cg, dh, add(k[2], w2), add(k[3], w3));
+    let (mut w5, mut w4) = ([block[11], block[10]], [block[9], block[8]]);
+    rounds4!(ae, bf, cg, dh, add(k[4], w4), add(k[5], w5));
+    let (mut w7, mut w6) = ([block[15], block[14]], [block[13], block[12]]);
+    rounds4!(ae, bf, cg, dh, add(k[6], w6), add(k[7], w7));
+    let mut w8 = schedule!(w0, w1, w4, w5, w7);
+    let mut w9 = schedule!(w1, w2, w5, w6, w8);
+    rounds4!(ae, bf, cg, dh, add(k[8], w8), add(k[9], w9));
+
+    // Rounds 20..40
+    w0 = schedule!(w2, w3, w6, w7, w9);
+    w1 = schedule!(w3, w4, w7, w8, w0);
+    rounds4!(ae, bf, cg, dh, add(k[10], w0), add(k[11], w1));
+    w2 = schedule!(w4, w5, w8, w9, w1);
+    w3 = schedule!(w5, w6, w9, w0, w2);
+    rounds4!(ae, bf, cg, dh, add(k[12], w2), add(k[13], w3));
+    w4 = schedule!(w6, w7, w0, w1, w3);
+    w5 = schedule!(w7, w8, w1, w2, w4);
+    rounds4!(ae, bf, cg, dh, add(k[14], w4), add(k[15], w5));
+    w6 = schedule!(w8, w9, w2, w3, w5);
+    w7 = schedule!(w9, w0, w3, w4, w6);
+    rounds4!(ae, bf, cg, dh, add(k[16], w6), add(k[17], w7));
+    w8 = schedule!(w0, w1, w4, w5, w7);
+    w9 = schedule!(w1, w2, w5, w6, w8);
+    rounds4!(ae, bf, cg, dh, add(k[18], w8), add(k[19], w9));
+
+    // Rounds 40..60
+    w0 = schedule!(w2, w3, w6, w7, w9);
+    w1 = schedule!(w3, w4, w7, w8, w0);
+    rounds4!(ae, bf, cg, dh, add(k[20], w0), add(k[21], w1));
+    w2 = schedule!(w4, w5, w8, w9, w1);
+    w3 = schedule!(w5, w6, w9, w0, w2);
+    rounds4!(ae, bf, cg, dh, add(k[22], w2), add(k[23], w3));
+    w4 = schedule!(w6, w7, w0, w1, w3);
+    w5 = schedule!(w7, w8, w1, w2, w4);
+    rounds4!(ae, bf, cg, dh, add(k[24], w4), add(k[25], w5));
+    w6 = schedule!(w8, w9, w2, w3, w5);
+    w7 = schedule!(w9, w0, w3, w4, w6);
+    rounds4!(ae, bf, cg, dh, add(k[26], w6), add(k[27], w7));
+    w8 = schedule!(w0, w1, w4, w5, w7);
+    w9 = schedule!(w1, w2, w5, w6, w8);
+    rounds4!(ae, bf, cg, dh, add(k[28], w8), add(k[29], w9));
+
+    // Rounds 60..80
+    w0 = schedule!(w2, w3, w6, w7, w9);
+    w1 = schedule!(w3, w4, w7, w8, w0);
+    rounds4!(ae, bf, cg, dh, add(k[30], w0), add(k[31], w1));
+    w2 = schedule!(w4, w5, w8, w9, w1);
+    w3 = schedule!(w5, w6, w9, w0, w2);
+    rounds4!(ae, bf, cg, dh, add(k[32], w2), add(k[33], w3));
+    w4 = schedule!(w6, w7, w0, w1, w3);
+    w5 = schedule!(w7, w8, w1, w2, w4);
+    rounds4!(ae, bf, cg, dh, add(k[34], w4), add(k[35], w5));
+    w6 = schedule!(w8, w9, w2, w3, w5);
+    w7 = schedule!(w9, w0, w3, w4, w6);
+    rounds4!(ae, bf, cg, dh, add(k[36], w6), add(k[37], w7));
+    w8 = schedule!(w0, w1, w4, w5, w7);
+    w9 = schedule!(w1, w2, w5, w6, w8);
+    rounds4!(ae, bf, cg, dh, add(k[38], w8), add(k[39], w9));
+
+    let [a, e] = ae;
+    let [b, f] = bf;
+    let [c, g] = cg;
+    let [d, h] = dh;
+
+    state[0] = state[0].wrapping_add(a);
+    state[1] = state[1].wrapping_add(b);
+    state[2] = state[2].wrapping_add(c);
+    state[3] = state[3].wrapping_add(d);
+    state[4] = state[4].wrapping_add(e);
+    state[5] = state[5].wrapping_add(f);
+    state[6] = state[6].wrapping_add(g);
+    state[7] = state[7].wrapping_add(h);
+}
+
+pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
+    let mut block_u32 = [0u64; BLOCK_LEN];
+    // since LLVM can't properly use aliasing yet it will make
+    // unnecessary state stores without this copy
+    let mut state_cpy = *state;
+    for block in blocks {
+        for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(8)) {
+            *o = u64::from_be_bytes(chunk.try_into().unwrap());
+        }
+        sha512_digest_block_u64(&mut state_cpy, &block_u32);
+    }
+    *state = state_cpy;
+}
--- a/vendor/sha2/src/sha512/soft_compact.rs
+++ b/vendor/sha2/src/sha512/soft_compact.rs
@@ -0,0 +1,66 @@
+use crate::consts::K64;
+
+fn to_u64s(block: &[u8; 128]) -> [u64; 16] {
+    use core::convert::TryInto;
+    let mut res = [0u64; 16];
+    for i in 0..16 {
+        let chunk = block[8 * i..][..8].try_into().unwrap();
+        res[i] = u64::from_be_bytes(chunk);
+    }
+    res
+}
+
+fn compress_u64(state: &mut [u64; 8], block: [u64; 16]) {
+    let [mut a, mut b, mut c, mut d, mut e, mut f, mut g, mut h] = *state;
+
+    let mut w = [0; 80];
+    w[..16].copy_from_slice(&block);
+
+    for i in 16..80 {
+        let w15 = w[i - 15];
+        let s0 = (w15.rotate_right(1)) ^ (w15.rotate_right(8)) ^ (w15 >> 7);
+        let w2 = w[i - 2];
+        let s1 = (w2.rotate_right(19)) ^ (w2.rotate_right(61)) ^ (w2 >> 6);
+        w[i] = w[i - 16]
+            .wrapping_add(s0)
+            .wrapping_add(w[i - 7])
+            .wrapping_add(s1);
+    }
+
+    for i in 0..80 {
+        let s1 = e.rotate_right(14) ^ e.rotate_right(18) ^ e.rotate_right(41);
+        let ch = (e & f) ^ ((!e) & g);
+        let t1 = s1
+            .wrapping_add(ch)
+            .wrapping_add(K64[i])
+            .wrapping_add(w[i])
+            .wrapping_add(h);
+        let s0 = a.rotate_right(28) ^ a.rotate_right(34) ^ a.rotate_right(39);
+        let maj = (a & b) ^ (a & c) ^ (b & c);
+        let t2 = s0.wrapping_add(maj);
+
+        h = g;
+        g = f;
+        f = e;
+        e = d.wrapping_add(t1);
+        d = c;
+        c = b;
+        b = a;
+        a = t1.wrapping_add(t2);
+    }
+
+    state[0] = state[0].wrapping_add(a);
+    state[1] = state[1].wrapping_add(b);
+    state[2] = state[2].wrapping_add(c);
+    state[3] = state[3].wrapping_add(d);
+    state[4] = state[4].wrapping_add(e);
+    state[5] = state[5].wrapping_add(f);
+    state[6] = state[6].wrapping_add(g);
+    state[7] = state[7].wrapping_add(h);
+}
+
+pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
+    for block in blocks.iter() {
+        compress_u64(state, to_u64s(block));
+    }
+}
--- a/vendor/sha2/src/sha512/x86.rs
+++ b/vendor/sha2/src/sha512/x86.rs
@@ -0,0 +1,357 @@
+//! SHA-512 `x86`/`x86_64` backend
+
+#![allow(clippy::many_single_char_names)]
+
+use core::mem::size_of;
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::*;
+
+use crate::consts::K64;
+
+cpufeatures::new!(avx2_cpuid, "avx2");
+
+pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
+    // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
+    // after stabilization
+    if avx2_cpuid::get() {
+        unsafe {
+            sha512_compress_x86_64_avx2(state, blocks);
+        }
+    } else {
+        super::soft::compress(state, blocks);
+    }
+}
+
+#[target_feature(enable = "avx2")]
+unsafe fn sha512_compress_x86_64_avx2(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
+    let mut start_block = 0;
+
+    if blocks.len() & 0b1 != 0 {
+        sha512_compress_x86_64_avx(state, &blocks[0]);
+        start_block += 1;
+    }
+
+    let mut ms: MsgSchedule = [_mm_setzero_si128(); 8];
+    let mut t2: RoundStates = [_mm_setzero_si128(); 40];
+    let mut x = [_mm256_setzero_si256(); 8];
+
+    for i in (start_block..blocks.len()).step_by(2) {
+        load_data_avx2(&mut x, &mut ms, &mut t2, blocks.as_ptr().add(i) as *const _);
+
+        // First block
+        let mut current_state = *state;
+        rounds_0_63_avx2(&mut current_state, &mut x, &mut ms, &mut t2);
+        rounds_64_79(&mut current_state, &ms);
+        accumulate_state(state, &current_state);
+
+        // Second block
+        current_state = *state;
+        process_second_block(&mut current_state, &t2);
+        accumulate_state(state, &current_state);
+    }
+}
+
+#[inline(always)]
+unsafe fn sha512_compress_x86_64_avx(state: &mut [u64; 8], block: &[u8; 128]) {
+    let mut ms = [_mm_setzero_si128(); 8];
+    let mut x = [_mm_setzero_si128(); 8];
+
+    // Reduced to single iteration
+    let mut current_state = *state;
+    load_data_avx(&mut x, &mut ms, block.as_ptr() as *const _);
+    rounds_0_63_avx(&mut current_state, &mut x, &mut ms);
+    rounds_64_79(&mut current_state, &ms);
+    accumulate_state(state, &current_state);
+}
+
+#[inline(always)]
+unsafe fn load_data_avx(x: &mut [__m128i; 8], ms: &mut MsgSchedule, data: *const __m128i) {
+    #[allow(non_snake_case)]
+    let MASK = _mm_setr_epi32(0x04050607, 0x00010203, 0x0c0d0e0f, 0x08090a0b);
+
+    macro_rules! unrolled_iterations {
+        ($($i:literal),*) => {$(
+            x[$i] = _mm_loadu_si128(data.add($i) as *const _);
+            x[$i] = _mm_shuffle_epi8(x[$i], MASK);
+
+            let y = _mm_add_epi64(
+                x[$i],
+                _mm_loadu_si128(&K64[2 * $i] as *const u64 as *const _),
+            );
+
+            ms[$i] = y;
+        )*};
+    }
+
+    unrolled_iterations!(0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+#[inline(always)]
+unsafe fn load_data_avx2(
+    x: &mut [__m256i; 8],
+    ms: &mut MsgSchedule,
+    t2: &mut RoundStates,
+    data: *const __m128i,
+) {
+    #[allow(non_snake_case)]
+    let MASK = _mm256_set_epi64x(
+        0x0809_0A0B_0C0D_0E0F_i64,
+        0x0001_0203_0405_0607_i64,
+        0x0809_0A0B_0C0D_0E0F_i64,
+        0x0001_0203_0405_0607_i64,
+    );
+
+    macro_rules! unrolled_iterations {
+        ($($i:literal),*) => {$(
+            x[$i] = _mm256_insertf128_si256(x[$i], _mm_loadu_si128(data.add(8 + $i) as *const _), 1);
+            x[$i] = _mm256_insertf128_si256(x[$i], _mm_loadu_si128(data.add($i) as *const _), 0);
+
+            x[$i] = _mm256_shuffle_epi8(x[$i], MASK);
+
+            let t = _mm_loadu_si128(K64.as_ptr().add($i * 2) as *const u64 as *const _);
+            let y = _mm256_add_epi64(x[$i], _mm256_set_m128i(t, t));
+
+            ms[$i] = _mm256_extracti128_si256(y, 0);
+            t2[$i] = _mm256_extracti128_si256(y, 1);
+        )*};
+    }
+
+    unrolled_iterations!(0, 1, 2, 3, 4, 5, 6, 7);
+}
+
+#[inline(always)]
+unsafe fn rounds_0_63_avx(current_state: &mut State, x: &mut [__m128i; 8], ms: &mut MsgSchedule) {
+    let mut k64_idx: usize = SHA512_BLOCK_WORDS_NUM;
+
+    for _ in 0..4 {
+        for j in 0..8 {
+            let k64 = _mm_loadu_si128(&K64[k64_idx] as *const u64 as *const _);
+            let y = sha512_update_x_avx(x, k64);
+
+            {
+                let ms = cast_ms(ms);
+                sha_round(current_state, ms[2 * j]);
+                sha_round(current_state, ms[2 * j + 1]);
+            }
+
+            ms[j] = y;
+            k64_idx += 2;
+        }
+    }
+}
+
+#[inline(always)]
+unsafe fn rounds_0_63_avx2(
+    current_state: &mut State,
+    x: &mut [__m256i; 8],
+    ms: &mut MsgSchedule,
+    t2: &mut RoundStates,
+) {
+    let mut k64x4_idx: usize = SHA512_BLOCK_WORDS_NUM;
+
+    for i in 1..5 {
+        for j in 0..8 {
+            let t = _mm_loadu_si128(K64.as_ptr().add(k64x4_idx) as *const u64 as *const _);
+            let y = sha512_update_x_avx2(x, _mm256_set_m128i(t, t));
+
+            {
+                let ms = cast_ms(ms);
+                sha_round(current_state, ms[2 * j]);
+                sha_round(current_state, ms[2 * j + 1]);
+            }
+
+            ms[j] = _mm256_extracti128_si256(y, 0);
+            t2[8 * i + j] = _mm256_extracti128_si256(y, 1);
+
+            k64x4_idx += 2;
+        }
+    }
+}
+
+#[inline(always)]
+fn rounds_64_79(current_state: &mut State, ms: &MsgSchedule) {
+    let ms = cast_ms(ms);
+    for i in 64..80 {
+        sha_round(current_state, ms[i & 0xf]);
+    }
+}
+
+#[inline(always)]
+fn process_second_block(current_state: &mut State, t2: &RoundStates) {
+    for t2 in cast_rs(t2).iter() {
+        sha_round(current_state, *t2);
+    }
+}
+
+#[inline(always)]
+fn sha_round(s: &mut State, x: u64) {
+    macro_rules! big_sigma0 {
+        ($a:expr) => {
+            $a.rotate_right(28) ^ $a.rotate_right(34) ^ $a.rotate_right(39)
+        };
+    }
+    macro_rules! big_sigma1 {
+        ($a:expr) => {
+            $a.rotate_right(14) ^ $a.rotate_right(18) ^ $a.rotate_right(41)
+        };
+    }
+    macro_rules! bool3ary_202 {
+        ($a:expr, $b:expr, $c:expr) => {
+            $c ^ ($a & ($b ^ $c))
+        };
+    } // Choose, MD5F, SHA1C
+    macro_rules! bool3ary_232 {
+        ($a:expr, $b:expr, $c:expr) => {
+            ($a & $b) ^ ($a & $c) ^ ($b & $c)
+        };
+    } // Majority, SHA1M
+
+    macro_rules! rotate_state {
+        ($s:ident) => {{
+            let tmp = $s[7];
+            $s[7] = $s[6];
+            $s[6] = $s[5];
+            $s[5] = $s[4];
+            $s[4] = $s[3];
+            $s[3] = $s[2];
+            $s[2] = $s[1];
+            $s[1] = $s[0];
+            $s[0] = tmp;
+        }};
+    }
+
+    let t = x
+        .wrapping_add(s[7])
+        .wrapping_add(big_sigma1!(s[4]))
+        .wrapping_add(bool3ary_202!(s[4], s[5], s[6]));
+
+    s[7] = t
+        .wrapping_add(big_sigma0!(s[0]))
+        .wrapping_add(bool3ary_232!(s[0], s[1], s[2]));
+    s[3] = s[3].wrapping_add(t);
+
+    rotate_state!(s);
+}
+
+#[inline(always)]
+fn accumulate_state(dst: &mut State, src: &State) {
+    for i in 0..SHA512_HASH_WORDS_NUM {
+        dst[i] = dst[i].wrapping_add(src[i]);
+    }
+}
+
+macro_rules! fn_sha512_update_x {
+    ($name:ident, $ty:ident, {
+        ADD64 = $ADD64:ident,
+        ALIGNR8 = $ALIGNR8:ident,
+        SRL64 = $SRL64:ident,
+        SLL64 = $SLL64:ident,
+        XOR = $XOR:ident,
+    }) => {
+        unsafe fn $name(x: &mut [$ty; 8], k64: $ty) -> $ty {
+            // q[2:1]
+            let mut t0 = $ALIGNR8(x[1], x[0], 8);
+            // q[10:9]
+            let mut t3 = $ALIGNR8(x[5], x[4], 8);
+            // q[2:1] >> s0[0]
+            let mut t2 = $SRL64(t0, 1);
+            // q[1:0] + q[10:9]
+            x[0] = $ADD64(x[0], t3);
+            // q[2:1] >> s0[2]
+            t3 = $SRL64(t0, 7);
+            // q[2:1] << (64 - s0[1])
+            let mut t1 = $SLL64(t0, 64 - 8);
+            // (q[2:1] >> s0[2]) ^
+            // (q[2:1] >> s0[0])
+            t0 = $XOR(t3, t2);
+            // q[2:1] >> s0[1]
+            t2 = $SRL64(t2, 8 - 1);
+            // (q[2:1] >> s0[2]) ^
+            // (q[2:1] >> s0[0]) ^
+            // q[2:1] << (64 - s0[1])
+            t0 = $XOR(t0, t1);
+            // q[2:1] << (64 - s0[0])
+            t1 = $SLL64(t1, 8 - 1);
+            // sigma1(q[2:1])
+            t0 = $XOR(t0, t2);
+            t0 = $XOR(t0, t1);
+            // q[15:14] >> s1[2]
+            t3 = $SRL64(x[7], 6);
+            // q[15:14] >> (64 - s1[1])
+            t2 = $SLL64(x[7], 64 - 61);
+            // q[1:0] + sigma0(q[2:1])
+            x[0] = $ADD64(x[0], t0);
+            // q[15:14] >> s1[0]
+            t1 = $SRL64(x[7], 19);
+            // q[15:14] >> s1[2] ^
+            // q[15:14] >> (64 - s1[1])
+            t3 = $XOR(t3, t2);
+            // q[15:14] >> (64 - s1[0])
+            t2 = $SLL64(t2, 61 - 19);
+            // q[15:14] >> s1[2] ^
+            // q[15:14] >> (64 - s1[1] ^
+            // q[15:14] >> s1[0]
+            t3 = $XOR(t3, t1);
+            // q[15:14] >> s1[1]
+            t1 = $SRL64(t1, 61 - 19);
+            // sigma1(q[15:14])
+            t3 = $XOR(t3, t2);
+            t3 = $XOR(t3, t1);
+
+            // q[1:0] + q[10:9] + sigma1(q[15:14]) + sigma0(q[2:1])
+            x[0] = $ADD64(x[0], t3);
+
+            // rotate
+            let temp = x[0];
+            x[0] = x[1];
+            x[1] = x[2];
+            x[2] = x[3];
+            x[3] = x[4];
+            x[4] = x[5];
+            x[5] = x[6];
+            x[6] = x[7];
+            x[7] = temp;
+
+            $ADD64(x[7], k64)
+        }
+    };
+}
+
+fn_sha512_update_x!(sha512_update_x_avx, __m128i, {
+        ADD64 = _mm_add_epi64,
+        ALIGNR8 = _mm_alignr_epi8,
+        SRL64 = _mm_srli_epi64,
+        SLL64 = _mm_slli_epi64,
+        XOR = _mm_xor_si128,
+});
+
+fn_sha512_update_x!(sha512_update_x_avx2, __m256i, {
+        ADD64 = _mm256_add_epi64,
+        ALIGNR8 = _mm256_alignr_epi8,
+        SRL64 = _mm256_srli_epi64,
+        SLL64 = _mm256_slli_epi64,
+        XOR = _mm256_xor_si256,
+});
+
+#[inline(always)]
+fn cast_ms(ms: &MsgSchedule) -> &[u64; SHA512_BLOCK_WORDS_NUM] {
+    unsafe { &*(ms as *const MsgSchedule as *const _) }
+}
+
+#[inline(always)]
+fn cast_rs(rs: &RoundStates) -> &[u64; SHA512_ROUNDS_NUM] {
+    unsafe { &*(rs as *const RoundStates as *const _) }
+}
+
+type State = [u64; SHA512_HASH_WORDS_NUM];
+type MsgSchedule = [__m128i; SHA512_BLOCK_WORDS_NUM / 2];
+type RoundStates = [__m128i; SHA512_ROUNDS_NUM / 2];
+
+const SHA512_BLOCK_BYTE_LEN: usize = 128;
+const SHA512_ROUNDS_NUM: usize = 80;
+const SHA512_HASH_BYTE_LEN: usize = 64;
+const SHA512_HASH_WORDS_NUM: usize = SHA512_HASH_BYTE_LEN / size_of::<u64>();
+const SHA512_BLOCK_WORDS_NUM: usize = SHA512_BLOCK_BYTE_LEN / size_of::<u64>();