chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

199
vendor/httparse/src/iter.rs vendored Normal file
View File

@@ -0,0 +1,199 @@
use core::convert::TryFrom;
use core::convert::TryInto;
#[allow(missing_docs)]
pub struct Bytes<'a> {
start: *const u8,
end: *const u8,
/// INVARIANT: start <= cursor && cursor <= end
cursor: *const u8,
phantom: core::marker::PhantomData<&'a ()>,
}
#[allow(missing_docs)]
impl<'a> Bytes<'a> {
#[inline]
pub fn new(slice: &'a [u8]) -> Bytes<'a> {
let start = slice.as_ptr();
// SAFETY: obtain pointer to slice end; start points to slice start.
let end = unsafe { start.add(slice.len()) };
let cursor = start;
Bytes {
start,
end,
cursor,
phantom: core::marker::PhantomData,
}
}
#[inline]
pub fn pos(&self) -> usize {
self.cursor as usize - self.start as usize
}
#[inline]
pub fn peek(&self) -> Option<u8> {
if self.cursor < self.end {
// SAFETY: bounds checked
Some(unsafe { *self.cursor })
} else {
None
}
}
/// Peek at byte `n` ahead of cursor
///
/// # Safety
///
/// Caller must ensure that `n <= self.len()`, otherwise `self.cursor.add(n)` is UB.
/// That means there are at least `n-1` bytes between `self.cursor` and `self.end`
/// and `self.cursor.add(n)` is either `self.end` or points to a valid byte.
#[inline]
pub unsafe fn peek_ahead(&self, n: usize) -> Option<u8> {
debug_assert!(n <= self.len());
// SAFETY: by preconditions
let p = unsafe { self.cursor.add(n) };
if p < self.end {
// SAFETY: by preconditions, if this is not `self.end`,
// then it is safe to dereference
Some(unsafe { *p })
} else {
None
}
}
#[inline]
pub fn peek_n<'b: 'a, U: TryFrom<&'a [u8]>>(&'b self, n: usize) -> Option<U> {
// TODO: once we bump MSRC, use const generics to allow only [u8; N] reads
// TODO: drop `n` arg in favour of const
// let n = core::mem::size_of::<U>();
self.as_ref().get(..n)?.try_into().ok()
}
/// Advance by 1, equivalent to calling `advance(1)`.
///
/// # Safety
///
/// Caller must ensure that Bytes hasn't been advanced/bumped by more than [`Bytes::len()`].
#[inline]
pub unsafe fn bump(&mut self) {
self.advance(1)
}
/// Advance cursor by `n`
///
/// # Safety
///
/// Caller must ensure that Bytes hasn't been advanced/bumped by more than [`Bytes::len()`].
#[inline]
pub unsafe fn advance(&mut self, n: usize) {
self.cursor = self.cursor.add(n);
debug_assert!(self.cursor <= self.end, "overflow");
}
#[inline]
pub fn len(&self) -> usize {
self.end as usize - self.cursor as usize
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn slice(&mut self) -> &'a [u8] {
// SAFETY: not moving position at all, so it's safe
let slice = unsafe { slice_from_ptr_range(self.start, self.cursor) };
self.commit();
slice
}
// TODO: this is an anti-pattern, should be removed
/// Deprecated. Do not use!
/// # Safety
///
/// Caller must ensure that `skip` is at most the number of advances (i.e., `bytes.advance(3)`
/// implies a skip of at most 3).
#[inline]
pub unsafe fn slice_skip(&mut self, skip: usize) -> &'a [u8] {
debug_assert!(skip <= self.cursor.offset_from(self.start) as usize);
let head = slice_from_ptr_range(self.start, self.cursor.sub(skip));
self.commit();
head
}
#[inline]
pub fn commit(&mut self) {
self.start = self.cursor
}
/// # Safety
///
/// see [`Bytes::advance`] safety comment.
#[inline]
pub unsafe fn advance_and_commit(&mut self, n: usize) {
self.advance(n);
self.commit();
}
#[inline]
pub fn as_ptr(&self) -> *const u8 {
self.cursor
}
#[inline]
pub fn start(&self) -> *const u8 {
self.start
}
#[inline]
pub fn end(&self) -> *const u8 {
self.end
}
/// # Safety
///
/// Must ensure invariant `bytes.start() <= ptr && ptr <= bytes.end()`.
#[inline]
pub unsafe fn set_cursor(&mut self, ptr: *const u8) {
debug_assert!(ptr >= self.start);
debug_assert!(ptr <= self.end);
self.cursor = ptr;
}
}
impl AsRef<[u8]> for Bytes<'_> {
#[inline]
fn as_ref(&self) -> &[u8] {
// SAFETY: not moving position at all, so it's safe
unsafe { slice_from_ptr_range(self.cursor, self.end) }
}
}
/// # Safety
///
/// Must ensure start and end point to the same memory object to uphold memory safety.
#[inline]
unsafe fn slice_from_ptr_range<'a>(start: *const u8, end: *const u8) -> &'a [u8] {
debug_assert!(start <= end);
core::slice::from_raw_parts(start, end as usize - start as usize)
}
impl Iterator for Bytes<'_> {
type Item = u8;
#[inline]
fn next(&mut self) -> Option<u8> {
if self.cursor < self.end {
// SAFETY: bounds checked dereference
unsafe {
let b = *self.cursor;
self.bump();
Some(b)
}
} else {
None
}
}
}

2798
vendor/httparse/src/lib.rs vendored Normal file

File diff suppressed because it is too large Load Diff

68
vendor/httparse/src/macros.rs vendored Normal file
View File

@@ -0,0 +1,68 @@
//! Utility macros
macro_rules! next {
($bytes:ident) => ({
match $bytes.next() {
Some(b) => b,
None => return Ok(Status::Partial)
}
})
}
macro_rules! expect {
($bytes:ident.next() == $pat:pat => $ret:expr) => {
expect!(next!($bytes) => $pat |? $ret)
};
($e:expr => $pat:pat |? $ret:expr) => {
match $e {
v@$pat => v,
_ => return $ret
}
};
}
macro_rules! complete {
($e:expr) => {
match $e? {
Status::Complete(v) => v,
Status::Partial => return Ok(Status::Partial)
}
}
}
macro_rules! byte_map {
($($p:pat)|+) => {{
const fn make_map() -> [bool; 256] {
let mut ret = [false; 256];
let mut i = 0;
while i < 256 {
ret[i] = matches!(i as u8, $($p)|+);
i += 1;
}
ret
}
make_map()
}}
}
macro_rules! space {
($bytes:ident or $err:expr) => ({
expect!($bytes.next() == b' ' => Err($err));
$bytes.slice();
})
}
macro_rules! newline {
($bytes:ident) => ({
match next!($bytes) {
b'\r' => {
expect!($bytes.next() == b'\n' => Err(Error::NewLine));
$bytes.slice();
},
b'\n' => {
$bytes.slice();
},
_ => return Err(Error::NewLine)
}
})
}

206
vendor/httparse/src/simd/avx2.rs vendored Normal file
View File

@@ -0,0 +1,206 @@
use crate::iter::Bytes;
#[inline]
#[target_feature(enable = "avx2")]
pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_url_char_32_avx(bytes.as_ref());
bytes.advance(advance);
if advance != 32 {
return;
}
}
// NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
super::swar::match_uri_vectored(bytes)
}
#[inline(always)]
#[allow(non_snake_case, overflowing_literals)]
#[allow(unused)]
unsafe fn match_url_char_32_avx(buf: &[u8]) -> usize {
// NOTE: This check might be not necessary since this function is only used in
// `match_uri_vectored` where buffer overflow is taken care of.
debug_assert!(buf.len() >= 32);
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
// pointer to buffer
let ptr = buf.as_ptr();
// %x21-%x7e %x80-%xff
//
// Character ranges allowed by this function, can also be interpreted as:
// 33 =< (x != 127) =< 255
//
// Create a vector full of DEL (0x7f) characters.
let DEL: __m256i = _mm256_set1_epi8(0x7f);
// Create a vector full of exclamation mark (!) (0x21) characters.
// Used as lower threshold, characters in URLs cannot be smaller than this.
let LOW: __m256i = _mm256_set1_epi8(0x21);
// Load a chunk of 32 bytes from `ptr` as a vector.
// We can check 32 bytes in parallel at most with AVX2 since
// YMM registers can only have 256 bits most.
let dat = _mm256_lddqu_si256(ptr as *const _);
// unsigned comparison dat >= LOW
//
// `_mm256_max_epu8` creates a new vector by comparing vectors `dat` and `LOW`
// and picks the max. values from each for all indices.
// So if a byte in `dat` is <= 32, it'll be represented as 33
// which is the smallest valid character.
//
// Then, we compare the new vector with `dat` for equality.
//
// `_mm256_cmpeq_epi8` returns a new vector where;
// * matching bytes are set to 0xFF (all bits set),
// * nonmatching bytes are set to 0 (no bits set).
let low = _mm256_cmpeq_epi8(_mm256_max_epu8(dat, LOW), dat);
// Similar to what we did before, but now invalid characters are set to 0xFF.
let del = _mm256_cmpeq_epi8(dat, DEL);
// We glue the both comparisons via `_mm256_andnot_si256`.
//
// Since the representation of truthiness differ in these comparisons,
// we are in need of bitwise NOT to convert valid characters of `del`.
let bit = _mm256_andnot_si256(del, low);
// This creates a bitmask from the most significant bit of each byte.
// Simply, we're converting a vector value to scalar value here.
let res = _mm256_movemask_epi8(bit) as u32;
// Count trailing zeros to find the first encountered invalid character.
// Bitwise NOT is required once again to flip truthiness.
// TODO: use .trailing_ones() once MSRV >= 1.46
(!res).trailing_zeros() as usize
}
#[target_feature(enable = "avx2")]
pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 32 {
let advance = match_header_value_char_32_avx(bytes.as_ref());
bytes.advance(advance);
if advance != 32 {
return;
}
}
// NOTE: use SWAR for <32B, more efficient than falling back to SSE4.2
super::swar::match_header_value_vectored(bytes)
}
#[inline(always)]
#[allow(non_snake_case)]
#[allow(unused)]
unsafe fn match_header_value_char_32_avx(buf: &[u8]) -> usize {
debug_assert!(buf.len() >= 32);
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
let ptr = buf.as_ptr();
// %x09 %x20-%x7e %x80-%xff
// Create a vector full of horizontal tab (\t) (0x09) characters.
let TAB: __m256i = _mm256_set1_epi8(0x09);
// Create a vector full of DEL (0x7f) characters.
let DEL: __m256i = _mm256_set1_epi8(0x7f);
// Create a vector full of space (0x20) characters.
let LOW: __m256i = _mm256_set1_epi8(0x20);
// Load a chunk of 32 bytes from `ptr` as a vector.
let dat = _mm256_lddqu_si256(ptr as *const _);
// unsigned comparison dat >= LOW
//
// Same as what we do in `match_url_char_32_avx`.
// This time the lower threshold is set to space character though.
let low = _mm256_cmpeq_epi8(_mm256_max_epu8(dat, LOW), dat);
// Check if `dat` includes `TAB` characters.
let tab = _mm256_cmpeq_epi8(dat, TAB);
// Check if `dat` includes `DEL` characters.
let del = _mm256_cmpeq_epi8(dat, DEL);
// Combine all comparisons together, notice that we're also using OR
// to connect `low` and `tab` but flip bits of `del`.
//
// In the end, this is simply:
// ~del & (low | tab)
let bit = _mm256_andnot_si256(del, _mm256_or_si256(low, tab));
// This creates a bitmask from the most significant bit of each byte.
// Creates a scalar value from vector value.
let res = _mm256_movemask_epi8(bit) as u32;
// Count trailing zeros to find the first encountered invalid character.
// Bitwise NOT is required once again to flip truthiness.
// TODO: use .trailing_ones() once MSRV >= 1.46
(!res).trailing_zeros() as usize
}
#[test]
fn avx2_code_matches_uri_chars_table() {
if !is_x86_feature_detected!("avx2") {
return;
}
#[allow(clippy::undocumented_unsafe_blocks)]
unsafe {
assert!(byte_is_allowed(b'_', match_uri_vectored));
for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() {
assert_eq!(
byte_is_allowed(b as u8, match_uri_vectored), allowed,
"byte_is_allowed({:?}) should be {:?}", b, allowed,
);
}
}
}
#[test]
fn avx2_code_matches_header_value_chars_table() {
if !is_x86_feature_detected!("avx2") {
return;
}
#[allow(clippy::undocumented_unsafe_blocks)]
unsafe {
assert!(byte_is_allowed(b'_', match_header_value_vectored));
for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() {
assert_eq!(
byte_is_allowed(b as u8, match_header_value_vectored), allowed,
"byte_is_allowed({:?}) should be {:?}", b, allowed,
);
}
}
}
#[cfg(test)]
unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
let slice = [
b'_', b'_', b'_', b'_',
b'_', b'_', b'_', b'_',
b'_', b'_', b'_', b'_',
b'_', b'_', b'_', b'_',
b'_', b'_', b'_', b'_',
b'_', b'_', b'_', b'_',
b'_', b'_', byte, b'_',
b'_', b'_', b'_', b'_',
];
let mut bytes = Bytes::new(&slice);
f(&mut bytes);
match bytes.pos() {
32 => true,
26 => false,
_ => unreachable!(),
}
}

153
vendor/httparse/src/simd/mod.rs vendored Normal file
View File

@@ -0,0 +1,153 @@
mod swar;
#[cfg(not(all(
httparse_simd,
any(
target_arch = "x86",
target_arch = "x86_64",
all(
target_arch = "aarch64",
httparse_simd_neon_intrinsics,
)
),
)))]
pub use self::swar::*;
#[cfg(all(
httparse_simd,
not(httparse_simd_target_feature_avx2),
any(
target_arch = "x86",
target_arch = "x86_64",
),
))]
mod sse42;
#[cfg(all(
httparse_simd,
any(
httparse_simd_target_feature_avx2,
not(httparse_simd_target_feature_sse42),
),
any(
target_arch = "x86",
target_arch = "x86_64",
),
))]
mod avx2;
#[cfg(all(
httparse_simd,
not(any(
httparse_simd_target_feature_sse42,
httparse_simd_target_feature_avx2,
)),
any(
target_arch = "x86",
target_arch = "x86_64",
),
))]
mod runtime;
#[cfg(all(
httparse_simd,
not(any(
httparse_simd_target_feature_sse42,
httparse_simd_target_feature_avx2,
)),
any(
target_arch = "x86",
target_arch = "x86_64",
),
))]
pub use self::runtime::*;
#[cfg(all(
httparse_simd,
httparse_simd_target_feature_sse42,
not(httparse_simd_target_feature_avx2),
any(
target_arch = "x86",
target_arch = "x86_64",
),
))]
mod sse42_compile_time {
#[inline(always)]
pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) {
super::swar::match_header_name_vectored(b);
}
#[inline(always)]
pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::sse42::match_uri_vectored(b) }
}
#[inline(always)]
pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::sse42::match_header_value_vectored(b) }
}
}
#[cfg(all(
httparse_simd,
httparse_simd_target_feature_sse42,
not(httparse_simd_target_feature_avx2),
any(
target_arch = "x86",
target_arch = "x86_64",
),
))]
pub use self::sse42_compile_time::*;
#[cfg(all(
httparse_simd,
httparse_simd_target_feature_avx2,
any(
target_arch = "x86",
target_arch = "x86_64",
),
))]
mod avx2_compile_time {
#[inline(always)]
pub fn match_header_name_vectored(b: &mut crate::iter::Bytes<'_>) {
super::swar::match_header_name_vectored(b);
}
#[inline(always)]
pub fn match_uri_vectored(b: &mut crate::iter::Bytes<'_>) {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::avx2::match_uri_vectored(b) }
}
#[inline(always)]
pub fn match_header_value_vectored(b: &mut crate::iter::Bytes<'_>) {
// SAFETY: calls are guarded by a compile time feature check
unsafe { crate::simd::avx2::match_header_value_vectored(b) }
}
}
#[cfg(all(
httparse_simd,
httparse_simd_target_feature_avx2,
any(
target_arch = "x86",
target_arch = "x86_64",
),
))]
pub use self::avx2_compile_time::*;
#[cfg(all(
httparse_simd,
target_arch = "aarch64",
httparse_simd_neon_intrinsics,
))]
mod neon;
#[cfg(all(
httparse_simd,
target_arch = "aarch64",
httparse_simd_neon_intrinsics,
))]
pub use self::neon::*;

258
vendor/httparse/src/simd/neon.rs vendored Normal file
View File

@@ -0,0 +1,258 @@
use crate::iter::Bytes;
use core::arch::aarch64::*;
#[inline]
pub fn match_header_name_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 16 {
// SAFETY: ensured that there are at least 16 bytes remaining
unsafe {
let advance = match_header_name_char_16_neon(bytes.as_ref().as_ptr());
bytes.advance(advance);
if advance != 16 {
return;
}
}
}
super::swar::match_header_name_vectored(bytes);
}
#[inline]
pub fn match_header_value_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 16 {
// SAFETY: ensured that there are at least 16 bytes remaining
unsafe {
let advance = match_header_value_char_16_neon(bytes.as_ref().as_ptr());
bytes.advance(advance);
if advance != 16 {
return;
}
}
}
super::swar::match_header_value_vectored(bytes);
}
#[inline]
pub fn match_uri_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 16 {
// SAFETY: ensured that there are at least 16 bytes remaining
unsafe {
let advance = match_url_char_16_neon(bytes.as_ref().as_ptr());
bytes.advance(advance);
if advance != 16 {
return;
}
}
}
super::swar::match_uri_vectored(bytes);
}
const fn bit_set(x: u8) -> bool {
// Validates if a byte is a valid header name character
// https://tools.ietf.org/html/rfc7230#section-3.2.6
matches!(x, b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'!' | b'#' | b'$' | b'%' | b'&' | b'\'' | b'*' | b'+' | b'-' | b'.' | b'^' | b'_' | b'`' | b'|' | b'~')
}
// A 256-bit bitmap, split into two halves
// lower half contains bits whose higher nibble is <= 7
// higher half contains bits whose higher nibble is >= 8
const fn build_bitmap() -> ([u8; 16], [u8; 16]) {
let mut bitmap_0_7 = [0u8; 16]; // 0x00..0x7F
let mut bitmap_8_15 = [0u8; 16]; // 0x80..0xFF
let mut i = 0;
while i < 256 {
if bit_set(i as u8) {
// Nibbles
let (lo, hi) = (i & 0x0F, i >> 4);
if i < 128 {
bitmap_0_7[lo] |= 1 << hi;
} else {
bitmap_8_15[lo] |= 1 << hi;
}
}
i += 1;
}
(bitmap_0_7, bitmap_8_15)
}
const BITMAPS: ([u8; 16], [u8; 16]) = build_bitmap();
// NOTE: adapted from 256-bit version, with upper 128-bit ops commented out
#[inline]
unsafe fn match_header_name_char_16_neon(ptr: *const u8) -> usize {
let bitmaps = BITMAPS;
// NOTE: ideally compile-time constants
let (bitmap_0_7, _bitmap_8_15) = bitmaps;
let bitmap_0_7 = vld1q_u8(bitmap_0_7.as_ptr());
// let bitmap_8_15 = vld1q_u8(bitmap_8_15.as_ptr());
// Initialize the bitmask_lookup.
const BITMASK_LOOKUP_DATA: [u8; 16] =
[1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128];
let bitmask_lookup = vld1q_u8(BITMASK_LOOKUP_DATA.as_ptr());
// Load 16 input bytes.
let input = vld1q_u8(ptr);
// Extract indices for row_0_7.
let indices_0_7 = vandq_u8(input, vdupq_n_u8(0x8F)); // 0b1000_1111;
// Extract indices for row_8_15.
// let msb = vandq_u8(input, vdupq_n_u8(0x80));
// let indices_8_15 = veorq_u8(indices_0_7, msb);
// Fetch row_0_7 and row_8_15.
let row_0_7 = vqtbl1q_u8(bitmap_0_7, indices_0_7);
// let row_8_15 = vqtbl1q_u8(bitmap_8_15, indices_8_15);
// Calculate a bitmask, i.e. (1 << hi_nibble % 8).
let bitmask = vqtbl1q_u8(bitmask_lookup, vshrq_n_u8(input, 4));
// Choose rows halves depending on higher nibbles.
// let bitsets = vorrq_u8(row_0_7, row_8_15);
let bitsets = row_0_7;
// Finally check which bytes belong to the set.
let tmp = vandq_u8(bitsets, bitmask);
let result = vceqq_u8(tmp, bitmask);
offsetz(result) as usize
}
#[inline]
unsafe fn match_url_char_16_neon(ptr: *const u8) -> usize {
let input = vld1q_u8(ptr);
// Check that b'!' <= and b != 127
let result = vcleq_u8(vdupq_n_u8(b'!'), input);
// Disallow del
let del = vceqq_u8(input, vdupq_n_u8(0x7F));
let result = vbicq_u8(result, del);
offsetz(result) as usize
}
#[inline]
unsafe fn match_header_value_char_16_neon(ptr: *const u8) -> usize {
let input = vld1q_u8(ptr);
// Check that b' ' <= and b != 127 or b == 9
let result = vcleq_u8(vdupq_n_u8(b' '), input);
// Allow tab
let tab = vceqq_u8(input, vdupq_n_u8(0x09));
let result = vorrq_u8(result, tab);
// Disallow del
let del = vceqq_u8(input, vdupq_n_u8(0x7F));
let result = vbicq_u8(result, del);
offsetz(result) as usize
}
#[inline]
unsafe fn offsetz(x: uint8x16_t) -> u32 {
// NOT the vector since it's faster to operate with zeros instead
offsetnz(vmvnq_u8(x))
}
#[inline]
unsafe fn offsetnz(x: uint8x16_t) -> u32 {
// Extract two u64
let x = vreinterpretq_u64_u8(x);
// Extract to general purpose registers to perform clz
let low: u64 = vgetq_lane_u64::<0>(x);
let high: u64 = vgetq_lane_u64::<1>(x);
#[inline]
fn clz(x: u64) -> u32 {
// perf: rust will unroll this loop
// and it's much faster than rbit + clz so voila
for (i, b) in x.to_ne_bytes().iter().copied().enumerate() {
if b != 0 {
return i as u32;
}
}
8 // Technically not reachable since zero-guarded
}
if low != 0 {
clz(low)
} else if high != 0 {
return 8 + clz(high);
} else {
return 16;
}
}
#[test]
fn neon_code_matches_uri_chars_table() {
#[allow(clippy::undocumented_unsafe_blocks)]
unsafe {
assert!(byte_is_allowed(b'_', match_uri_vectored));
for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() {
assert_eq!(
byte_is_allowed(b as u8, match_uri_vectored),
allowed,
"byte_is_allowed({:?}) should be {:?}",
b,
allowed,
);
}
}
}
#[test]
fn neon_code_matches_header_value_chars_table() {
#[allow(clippy::undocumented_unsafe_blocks)]
unsafe {
assert!(byte_is_allowed(b'_', match_header_value_vectored));
for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() {
assert_eq!(
byte_is_allowed(b as u8, match_header_value_vectored),
allowed,
"byte_is_allowed({:?}) should be {:?}",
b,
allowed,
);
}
}
}
#[test]
fn neon_code_matches_header_name_chars_table() {
#[allow(clippy::undocumented_unsafe_blocks)]
unsafe {
assert!(byte_is_allowed(b'_', match_header_name_vectored));
for (b, allowed) in crate::TOKEN_MAP.iter().cloned().enumerate() {
assert_eq!(
byte_is_allowed(b as u8, match_header_name_vectored),
allowed,
"byte_is_allowed({:?}) should be {:?}",
b,
allowed,
);
}
}
}
#[cfg(test)]
unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
let mut slice = [b'_'; 16];
slice[10] = byte;
let mut bytes = Bytes::new(&slice);
f(&mut bytes);
match bytes.pos() {
16 => true,
10 => false,
x => panic!("unexpected pos: {}", x),
}
}

57
vendor/httparse/src/simd/runtime.rs vendored Normal file
View File

@@ -0,0 +1,57 @@
use std::sync::atomic::{AtomicU8, Ordering};
use crate::iter::Bytes;
use super::avx2;
use super::sse42;
const AVX2: u8 = 1;
const SSE42: u8 = 2;
const NOP: u8 = 3;
fn detect_runtime_feature() -> u8 {
if is_x86_feature_detected!("avx2") {
AVX2
} else if is_x86_feature_detected!("sse4.2") {
SSE42
} else {
NOP
}
}
static RUNTIME_FEATURE: AtomicU8 = AtomicU8::new(0);
#[inline]
fn get_runtime_feature() -> u8 {
let mut feature = RUNTIME_FEATURE.load(Ordering::Relaxed);
if feature == 0 {
feature = detect_runtime_feature();
RUNTIME_FEATURE.store(feature, Ordering::Relaxed);
}
feature
}
pub fn match_header_name_vectored(bytes: &mut Bytes) {
super::swar::match_header_name_vectored(bytes);
}
pub fn match_uri_vectored(bytes: &mut Bytes) {
// SAFETY: calls are guarded by a feature check
unsafe {
match get_runtime_feature() {
AVX2 => avx2::match_uri_vectored(bytes),
SSE42 => sse42::match_uri_vectored(bytes),
_ /* NOP */ => super::swar::match_uri_vectored(bytes),
}
}
}
pub fn match_header_value_vectored(bytes: &mut Bytes) {
// SAFETY: calls are guarded by a feature check
unsafe {
match get_runtime_feature() {
AVX2 => avx2::match_header_value_vectored(bytes),
SSE42 => sse42::match_header_value_vectored(bytes),
_ /* NOP */ => super::swar::match_header_value_vectored(bytes),
}
}
}

142
vendor/httparse/src/simd/sse42.rs vendored Normal file
View File

@@ -0,0 +1,142 @@
use crate::iter::Bytes;
#[target_feature(enable = "sse4.2")]
pub unsafe fn match_uri_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 16 {
let advance = match_url_char_16_sse(bytes.as_ref());
bytes.advance(advance);
if advance != 16 {
return;
}
}
super::swar::match_uri_vectored(bytes);
}
#[inline(always)]
#[allow(non_snake_case)]
unsafe fn match_url_char_16_sse(buf: &[u8]) -> usize {
debug_assert!(buf.len() >= 16);
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
let ptr = buf.as_ptr();
// %x21-%x7e %x80-%xff
let DEL: __m128i = _mm_set1_epi8(0x7f);
let LOW: __m128i = _mm_set1_epi8(0x21);
let dat = _mm_lddqu_si128(ptr as *const _);
// unsigned comparison dat >= LOW
let low = _mm_cmpeq_epi8(_mm_max_epu8(dat, LOW), dat);
let del = _mm_cmpeq_epi8(dat, DEL);
let bit = _mm_andnot_si128(del, low);
let res = _mm_movemask_epi8(bit) as u16;
// TODO: use .trailing_ones() once MSRV >= 1.46
(!res).trailing_zeros() as usize
}
#[target_feature(enable = "sse4.2")]
pub unsafe fn match_header_value_vectored(bytes: &mut Bytes) {
while bytes.as_ref().len() >= 16 {
let advance = match_header_value_char_16_sse(bytes.as_ref());
bytes.advance(advance);
if advance != 16 {
return;
}
}
super::swar::match_header_value_vectored(bytes);
}
#[inline(always)]
#[allow(non_snake_case)]
unsafe fn match_header_value_char_16_sse(buf: &[u8]) -> usize {
debug_assert!(buf.len() >= 16);
#[cfg(target_arch = "x86")]
use core::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64::*;
let ptr = buf.as_ptr();
// %x09 %x20-%x7e %x80-%xff
let TAB: __m128i = _mm_set1_epi8(0x09);
let DEL: __m128i = _mm_set1_epi8(0x7f);
let LOW: __m128i = _mm_set1_epi8(0x20);
let dat = _mm_lddqu_si128(ptr as *const _);
// unsigned comparison dat >= LOW
let low = _mm_cmpeq_epi8(_mm_max_epu8(dat, LOW), dat);
let tab = _mm_cmpeq_epi8(dat, TAB);
let del = _mm_cmpeq_epi8(dat, DEL);
let bit = _mm_andnot_si128(del, _mm_or_si128(low, tab));
let res = _mm_movemask_epi8(bit) as u16;
// TODO: use .trailing_ones() once MSRV >= 1.46
(!res).trailing_zeros() as usize
}
#[test]
fn sse_code_matches_uri_chars_table() {
if !is_x86_feature_detected!("sse4.2") {
return;
}
#[allow(clippy::undocumented_unsafe_blocks)]
unsafe {
assert!(byte_is_allowed(b'_', match_uri_vectored));
for (b, allowed) in crate::URI_MAP.iter().cloned().enumerate() {
assert_eq!(
byte_is_allowed(b as u8, match_uri_vectored), allowed,
"byte_is_allowed({:?}) should be {:?}", b, allowed,
);
}
}
}
#[test]
fn sse_code_matches_header_value_chars_table() {
if !is_x86_feature_detected!("sse4.2") {
return;
}
#[allow(clippy::undocumented_unsafe_blocks)]
unsafe {
assert!(byte_is_allowed(b'_', match_header_value_vectored));
for (b, allowed) in crate::HEADER_VALUE_MAP.iter().cloned().enumerate() {
assert_eq!(
byte_is_allowed(b as u8, match_header_value_vectored), allowed,
"byte_is_allowed({:?}) should be {:?}", b, allowed,
);
}
}
}
#[allow(clippy::missing_safety_doc)]
#[cfg(test)]
unsafe fn byte_is_allowed(byte: u8, f: unsafe fn(bytes: &mut Bytes<'_>)) -> bool {
let slice = [
b'_', b'_', b'_', b'_',
b'_', b'_', b'_', b'_',
b'_', b'_', byte, b'_',
b'_', b'_', b'_', b'_',
];
let mut bytes = Bytes::new(&slice);
f(&mut bytes);
match bytes.pos() {
16 => true,
10 => false,
_ => unreachable!(),
}
}

235
vendor/httparse/src/simd/swar.rs vendored Normal file
View File

@@ -0,0 +1,235 @@
/// SWAR: SIMD Within A Register
/// SIMD validator backend that validates register-sized chunks of data at a time.
use crate::{is_header_name_token, is_header_value_token, is_uri_token, Bytes};
// Adapt block-size to match native register size, i.e: 32bit => 4, 64bit => 8
const BLOCK_SIZE: usize = core::mem::size_of::<usize>();
type ByteBlock = [u8; BLOCK_SIZE];
#[inline]
pub fn match_uri_vectored(bytes: &mut Bytes) {
loop {
if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) {
let n = match_uri_char_8_swar(bytes8);
// SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes
// in `bytes`, so calling `advance(n)` is safe.
unsafe {
bytes.advance(n);
}
if n == BLOCK_SIZE {
continue;
}
}
if let Some(b) = bytes.peek() {
if is_uri_token(b) {
// SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte
// in bytes, so calling advance is safe.
unsafe {
bytes.advance(1);
}
continue;
}
}
break;
}
}
#[inline]
pub fn match_header_value_vectored(bytes: &mut Bytes) {
loop {
if let Some(bytes8) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) {
let n = match_header_value_char_8_swar(bytes8);
// SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes
// in `bytes`, so calling `advance(n)` is safe.
unsafe {
bytes.advance(n);
}
if n == BLOCK_SIZE {
continue;
}
}
if let Some(b) = bytes.peek() {
if is_header_value_token(b) {
// SAFETY: using peek to retrieve the byte ensures that there is at least 1 more byte
// in bytes, so calling advance is safe.
unsafe {
bytes.advance(1);
}
continue;
}
}
break;
}
}
#[inline]
pub fn match_header_name_vectored(bytes: &mut Bytes) {
while let Some(block) = bytes.peek_n::<ByteBlock>(BLOCK_SIZE) {
let n = match_block(is_header_name_token, block);
// SAFETY: using peek_n to retrieve the bytes ensures that there are at least n more bytes
// in `bytes`, so calling `advance(n)` is safe.
unsafe {
bytes.advance(n);
}
if n != BLOCK_SIZE {
return;
}
}
// SAFETY: match_tail processes at most the remaining data in `bytes`. advances `bytes` to the
// end, but no further.
unsafe { bytes.advance(match_tail(is_header_name_token, bytes.as_ref())) };
}
// Matches "tail", i.e: when we have <BLOCK_SIZE bytes in the buffer, should be uncommon
#[cold]
#[inline]
fn match_tail(f: impl Fn(u8) -> bool, bytes: &[u8]) -> usize {
for (i, &b) in bytes.iter().enumerate() {
if !f(b) {
return i;
}
}
bytes.len()
}
// Naive fallback block matcher
#[inline(always)]
fn match_block(f: impl Fn(u8) -> bool, block: ByteBlock) -> usize {
for (i, &b) in block.iter().enumerate() {
if !f(b) {
return i;
}
}
BLOCK_SIZE
}
// A const alternative to u64::from_ne_bytes to avoid bumping MSRV (1.36 => 1.44)
// creates a u64 whose bytes are each equal to b
const fn uniform_block(b: u8) -> usize {
(b as u64 * 0x01_01_01_01_01_01_01_01 /* [1_u8; 8] */) as usize
}
// A byte-wise range-check on an entire word/block,
// ensuring all bytes in the word satisfy `33 <= (x != 127) <= 255`
#[inline]
fn match_uri_char_8_swar(block: ByteBlock) -> usize {
// 33 <= (x != 127) <= 255
const M: u8 = 0x21;
// uniform block full of exclamation mark (!) (33).
const BM: usize = uniform_block(M);
// uniform block full of 1.
const ONE: usize = uniform_block(0x01);
// uniform block full of DEL (127).
const DEL: usize = uniform_block(0x7f);
// uniform block full of 128.
const M128: usize = uniform_block(128);
let x = usize::from_ne_bytes(block); // Really just a transmute
let lt = x.wrapping_sub(BM) & !x; // <= m
let xor_del = x ^ DEL;
let eq_del = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL
offsetnz((lt | eq_del) & M128)
}
// A byte-wise range-check on an entire word/block,
// ensuring all bytes in the word satisfy `32 <= (x != 127) <= 255`
#[inline]
fn match_header_value_char_8_swar(block: ByteBlock) -> usize {
// 32 <= (x != 127) <= 255
const M: u8 = 0x20;
// uniform block full of exclamation mark (!) (33).
const BM: usize = uniform_block(M);
// uniform block full of 1.
const ONE: usize = uniform_block(0x01);
// uniform block full of DEL (127).
const DEL: usize = uniform_block(0x7f);
// uniform block full of 128.
const M128: usize = uniform_block(128);
let x = usize::from_ne_bytes(block); // Really just a transmute
let lt = x.wrapping_sub(BM) & !x; // <= m
let xor_del = x ^ DEL;
let eq_del = xor_del.wrapping_sub(ONE) & !xor_del; // == DEL
offsetnz((lt | eq_del) & M128)
}
/// Check block to find offset of first non-zero byte
// NOTE: Curiously `block.trailing_zeros() >> 3` appears to be slower, maybe revisit
#[inline]
fn offsetnz(block: usize) -> usize {
// fast path optimistic case (common for long valid sequences)
if block == 0 {
return BLOCK_SIZE;
}
// perf: rust will unroll this loop
for (i, b) in block.to_ne_bytes().iter().copied().enumerate() {
if b != 0 {
return i;
}
}
unreachable!()
}
#[test]
fn test_is_header_value_block() {
let is_header_value_block = |b| match_header_value_char_8_swar(b) == BLOCK_SIZE;
// 0..32 => false
for b in 0..32_u8 {
assert!(!is_header_value_block([b; BLOCK_SIZE]), "b={}", b);
}
// 32..=126 => true
for b in 32..=126_u8 {
assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b);
}
// 127 => false
assert!(!is_header_value_block([b'\x7F'; BLOCK_SIZE]), "b={}", b'\x7F');
// 128..=255 => true
for b in 128..=255_u8 {
assert!(is_header_value_block([b; BLOCK_SIZE]), "b={}", b);
}
#[cfg(target_pointer_width = "64")]
{
// A few sanity checks on non-uniform bytes for safe-measure
assert!(!is_header_value_block(*b"foo.com\n"));
assert!(!is_header_value_block(*b"o.com\r\nU"));
}
}
#[test]
fn test_is_uri_block() {
let is_uri_block = |b| match_uri_char_8_swar(b) == BLOCK_SIZE;
// 0..33 => false
for b in 0..33_u8 {
assert!(!is_uri_block([b; BLOCK_SIZE]), "b={}", b);
}
// 33..=126 => true
for b in 33..=126_u8 {
assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b);
}
// 127 => false
assert!(!is_uri_block([b'\x7F'; BLOCK_SIZE]), "b={}", b'\x7F');
// 128..=255 => true
for b in 128..=255_u8 {
assert!(is_uri_block([b; BLOCK_SIZE]), "b={}", b);
}
}
#[test]
fn test_offsetnz() {
let seq = [0_u8; BLOCK_SIZE];
for i in 0..BLOCK_SIZE {
let mut seq = seq;
seq[i] = 1;
let x = usize::from_ne_bytes(seq);
assert_eq!(offsetnz(x), i);
}
}