chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

View File

@@ -0,0 +1,117 @@
use super::group::{
BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE,
};
/// A bit mask which contains the result of a `Match` operation on a `Group` and
/// allows iterating through them.
///
/// The bit mask is arranged so that low-order bits represent lower memory
/// addresses for group match results.
///
/// For implementation reasons, the bits in the set may be sparsely packed with
/// groups of 8 bits representing one element. If any of these bits are non-zero
/// then this element is considered to true in the mask. If this is the
/// case, `BITMASK_STRIDE` will be 8 to indicate a divide-by-8 should be
/// performed on counts/indices to normalize this difference. `BITMASK_MASK` is
/// similarly a mask of all the actually-used bits.
///
/// To iterate over a bit mask, it must be converted to a form where only 1 bit
/// is set per element. This is done by applying `BITMASK_ITER_MASK` on the
/// mask bits.
#[derive(Copy, Clone)]
pub(crate) struct BitMask(pub(crate) BitMaskWord);
#[allow(clippy::use_self)]
impl BitMask {
/// Returns a new `BitMask` with all bits inverted.
#[inline]
#[must_use]
#[allow(dead_code)]
pub(crate) fn invert(self) -> Self {
BitMask(self.0 ^ BITMASK_MASK)
}
/// Returns a new `BitMask` with the lowest bit removed.
#[inline]
#[must_use]
fn remove_lowest_bit(self) -> Self {
BitMask(self.0 & (self.0 - 1))
}
/// Returns whether the `BitMask` has at least one set bit.
#[inline]
pub(crate) fn any_bit_set(self) -> bool {
self.0 != 0
}
/// Returns the first set bit in the `BitMask`, if there is one.
#[inline]
pub(crate) fn lowest_set_bit(self) -> Option<usize> {
if let Some(nonzero) = NonZeroBitMaskWord::new(self.0) {
Some(Self::nonzero_trailing_zeros(nonzero))
} else {
None
}
}
/// Returns the number of trailing zeroes in the `BitMask`.
#[inline]
pub(crate) fn trailing_zeros(self) -> usize {
// ARM doesn't have a trailing_zeroes instruction, and instead uses
// reverse_bits (RBIT) + leading_zeroes (CLZ). However older ARM
// versions (pre-ARMv7) don't have RBIT and need to emulate it
// instead. Since we only have 1 bit set in each byte on ARM, we can
// use swap_bytes (REV) + leading_zeroes instead.
if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 {
self.0.swap_bytes().leading_zeros() as usize / BITMASK_STRIDE
} else {
self.0.trailing_zeros() as usize / BITMASK_STRIDE
}
}
/// Same as above but takes a `NonZeroBitMaskWord`.
#[inline]
fn nonzero_trailing_zeros(nonzero: NonZeroBitMaskWord) -> usize {
if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 {
// SAFETY: A byte-swapped non-zero value is still non-zero.
let swapped = unsafe { NonZeroBitMaskWord::new_unchecked(nonzero.get().swap_bytes()) };
swapped.leading_zeros() as usize / BITMASK_STRIDE
} else {
nonzero.trailing_zeros() as usize / BITMASK_STRIDE
}
}
/// Returns the number of leading zeroes in the `BitMask`.
#[inline]
pub(crate) fn leading_zeros(self) -> usize {
self.0.leading_zeros() as usize / BITMASK_STRIDE
}
}
impl IntoIterator for BitMask {
type Item = usize;
type IntoIter = BitMaskIter;
#[inline]
fn into_iter(self) -> BitMaskIter {
// A BitMask only requires each element (group of bits) to be non-zero.
// However for iteration we need each element to only contain 1 bit.
BitMaskIter(BitMask(self.0 & BITMASK_ITER_MASK))
}
}
/// Iterator over the contents of a `BitMask`, returning the indices of set
/// bits.
#[derive(Clone)]
pub(crate) struct BitMaskIter(pub(crate) BitMask);
impl Iterator for BitMaskIter {
type Item = usize;
#[inline]
fn next(&mut self) -> Option<usize> {
let bit = self.0.lowest_set_bit()?;
self.0 = self.0.remove_lowest_bit();
Some(bit)
}
}

View File

@@ -0,0 +1,154 @@
use super::super::{BitMask, Tag};
use core::{mem, ptr};
// Use the native word size as the group size. Using a 64-bit group size on
// a 32-bit architecture will just end up being more expensive because
// shifts and multiplies will need to be emulated.
cfg_if! {
if #[cfg(any(
target_pointer_width = "64",
target_arch = "aarch64",
target_arch = "x86_64",
target_arch = "wasm32",
))] {
type GroupWord = u64;
type NonZeroGroupWord = core::num::NonZeroU64;
} else {
type GroupWord = u32;
type NonZeroGroupWord = core::num::NonZeroU32;
}
}
pub(crate) type BitMaskWord = GroupWord;
pub(crate) type NonZeroBitMaskWord = NonZeroGroupWord;
pub(crate) const BITMASK_STRIDE: usize = 8;
// We only care about the highest bit of each tag for the mask.
#[allow(clippy::cast_possible_truncation, clippy::unnecessary_cast)]
pub(crate) const BITMASK_MASK: BitMaskWord = u64::from_ne_bytes([Tag::DELETED.0; 8]) as GroupWord;
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;
/// Helper function to replicate a tag across a `GroupWord`.
#[inline]
fn repeat(tag: Tag) -> GroupWord {
GroupWord::from_ne_bytes([tag.0; Group::WIDTH])
}
/// Abstraction over a group of control tags which can be scanned in
/// parallel.
///
/// This implementation uses a word-sized integer.
#[derive(Copy, Clone)]
pub(crate) struct Group(GroupWord);
// We perform all operations in the native endianness, and convert to
// little-endian just before creating a BitMask. The can potentially
// enable the compiler to eliminate unnecessary byte swaps if we are
// only checking whether a BitMask is empty.
#[allow(clippy::use_self)]
impl Group {
/// Number of bytes in the group.
pub(crate) const WIDTH: usize = mem::size_of::<Self>();
/// Returns a full group of empty tags, suitable for use as the initial
/// value for an empty hash table.
///
/// This is guaranteed to be aligned to the group size.
#[inline]
pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] {
#[repr(C)]
struct AlignedTags {
_align: [Group; 0],
tags: [Tag; Group::WIDTH],
}
const ALIGNED_TAGS: AlignedTags = AlignedTags {
_align: [],
tags: [Tag::EMPTY; Group::WIDTH],
};
&ALIGNED_TAGS.tags
}
/// Loads a group of tags starting at the given address.
#[inline]
#[allow(clippy::cast_ptr_alignment)] // unaligned load
pub(crate) unsafe fn load(ptr: *const Tag) -> Self {
Group(ptr::read_unaligned(ptr.cast()))
}
/// Loads a group of tags starting at the given address, which must be
/// aligned to `mem::align_of::<Group>()`.
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
Group(ptr::read(ptr.cast()))
}
/// Stores the group of tags to the given address, which must be
/// aligned to `mem::align_of::<Group>()`.
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
ptr::write(ptr.cast(), self.0);
}
/// Returns a `BitMask` indicating all tags in the group which *may*
/// have the given value.
///
/// This function may return a false positive in certain cases where
/// the tag in the group differs from the searched value only in its
/// lowest bit. This is fine because:
/// - This never happens for `EMPTY` and `DELETED`, only full entries.
/// - The check for key equality will catch these.
/// - This only happens if there is at least 1 true match.
/// - The chance of this happening is very low (< 1% chance per byte).
#[inline]
pub(crate) fn match_tag(self, tag: Tag) -> BitMask {
// This algorithm is derived from
// https://graphics.stanford.edu/~seander/bithacks.html##ValueInWord
let cmp = self.0 ^ repeat(tag);
BitMask((cmp.wrapping_sub(repeat(Tag(0x01))) & !cmp & repeat(Tag::DELETED)).to_le())
}
/// Returns a `BitMask` indicating all tags in the group which are
/// `EMPTY`.
#[inline]
pub(crate) fn match_empty(self) -> BitMask {
// If the high bit is set, then the tag must be either:
// 1111_1111 (EMPTY) or 1000_0000 (DELETED).
// So we can just check if the top two bits are 1 by ANDing them.
BitMask((self.0 & (self.0 << 1) & repeat(Tag::DELETED)).to_le())
}
/// Returns a `BitMask` indicating all tags in the group which are
/// `EMPTY` or `DELETED`.
#[inline]
pub(crate) fn match_empty_or_deleted(self) -> BitMask {
// A tag is EMPTY or DELETED iff the high bit is set
BitMask((self.0 & repeat(Tag::DELETED)).to_le())
}
/// Returns a `BitMask` indicating all tags in the group which are full.
#[inline]
pub(crate) fn match_full(self) -> BitMask {
self.match_empty_or_deleted().invert()
}
/// Performs the following transformation on all tags in the group:
/// - `EMPTY => EMPTY`
/// - `DELETED => EMPTY`
/// - `FULL => DELETED`
#[inline]
pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
// and high_bit = 0 (FULL) to 1000_0000
//
// Here's this logic expanded to concrete values:
// let full = 1000_0000 (true) or 0000_0000 (false)
// !1000_0000 + 1 = 0111_1111 + 1 = 1000_0000 (no carry)
// !0000_0000 + 0 = 1111_1111 + 0 = 1111_1111 (no carry)
let full = !self.0 & repeat(Tag::DELETED);
Group(!full + (full >> 7))
}
}

View File

@@ -0,0 +1,137 @@
use super::super::{BitMask, Tag};
use core::mem;
use core::num::NonZeroU16;
use core::arch::loongarch64::*;
use mem::transmute;
pub(crate) type BitMaskWord = u16;
pub(crate) type NonZeroBitMaskWord = NonZeroU16;
pub(crate) const BITMASK_STRIDE: usize = 1;
pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff;
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;
/// Abstraction over a group of control tags which can be scanned in
/// parallel.
///
/// This implementation uses a 128-bit LSX value.
#[derive(Copy, Clone)]
pub(crate) struct Group(v16i8);
// FIXME: https://github.com/rust-lang/rust-clippy/issues/3859
#[allow(clippy::use_self)]
impl Group {
/// Number of bytes in the group.
pub(crate) const WIDTH: usize = mem::size_of::<Self>();
/// Returns a full group of empty tags, suitable for use as the initial
/// value for an empty hash table.
///
/// This is guaranteed to be aligned to the group size.
#[inline]
#[allow(clippy::items_after_statements)]
pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] {
#[repr(C)]
struct AlignedTags {
_align: [Group; 0],
tags: [Tag; Group::WIDTH],
}
const ALIGNED_TAGS: AlignedTags = AlignedTags {
_align: [],
tags: [Tag::EMPTY; Group::WIDTH],
};
&ALIGNED_TAGS.tags
}
/// Loads a group of tags starting at the given address.
#[inline]
#[allow(clippy::cast_ptr_alignment)] // unaligned load
pub(crate) unsafe fn load(ptr: *const Tag) -> Self {
Group(lsx_vld::<0>(ptr.cast()))
}
/// Loads a group of tags starting at the given address, which must be
/// aligned to `mem::align_of::<Group>()`.
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
Group(lsx_vld::<0>(ptr.cast()))
}
/// Stores the group of tags to the given address, which must be
/// aligned to `mem::align_of::<Group>()`.
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
lsx_vst::<0>(self.0, ptr.cast());
}
/// Returns a `BitMask` indicating all tags in the group which have
/// the given value.
#[inline]
pub(crate) fn match_tag(self, tag: Tag) -> BitMask {
#[allow(clippy::missing_transmute_annotations)]
unsafe {
let cmp = lsx_vseq_b(self.0, lsx_vreplgr2vr_b(tag.0 as i32));
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskltz_b(cmp))) as u16)
}
}
/// Returns a `BitMask` indicating all tags in the group which are
/// `EMPTY`.
#[inline]
pub(crate) fn match_empty(self) -> BitMask {
#[allow(clippy::missing_transmute_annotations)]
unsafe {
let cmp = lsx_vseqi_b::<{ Tag::EMPTY.0 as i8 as i32 }>(self.0);
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskltz_b(cmp))) as u16)
}
}
/// Returns a `BitMask` indicating all tags in the group which are
/// `EMPTY` or `DELETED`.
#[inline]
pub(crate) fn match_empty_or_deleted(self) -> BitMask {
#[allow(clippy::missing_transmute_annotations)]
unsafe {
// A tag is EMPTY or DELETED iff the high bit is set
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskltz_b(self.0))) as u16)
}
}
/// Returns a `BitMask` indicating all tags in the group which are full.
#[inline]
pub(crate) fn match_full(&self) -> BitMask {
#[allow(clippy::missing_transmute_annotations)]
unsafe {
// A tag is EMPTY or DELETED iff the high bit is set
BitMask(lsx_vpickve2gr_hu::<0>(transmute(lsx_vmskgez_b(self.0))) as u16)
}
}
/// Performs the following transformation on all tags in the group:
/// - `EMPTY => EMPTY`
/// - `DELETED => EMPTY`
/// - `FULL => DELETED`
#[inline]
pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
// and high_bit = 0 (FULL) to 1000_0000
//
// Here's this logic expanded to concrete values:
// let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false)
// 1111_1111 | 1000_0000 = 1111_1111
// 0000_0000 | 1000_0000 = 1000_0000
#[allow(clippy::missing_transmute_annotations)]
unsafe {
let zero = lsx_vreplgr2vr_b(0);
let special = lsx_vslt_b(self.0, zero);
Group(transmute(lsx_vor_v(
transmute(special),
transmute(lsx_vreplgr2vr_b(Tag::DELETED.0 as i32)),
)))
}
}
}

View File

@@ -0,0 +1,43 @@
cfg_if! {
// Use the SSE2 implementation if possible: it allows us to scan 16 buckets
// at once instead of 8. We don't bother with AVX since it would require
// runtime dispatch and wouldn't gain us much anyways: the probability of
// finding a match drops off drastically after the first few buckets.
//
// I attempted an implementation on ARM using NEON instructions, but it
// turns out that most NEON instructions have multi-cycle latency, which in
// the end outweighs any gains over the generic implementation.
if #[cfg(all(
target_feature = "sse2",
any(target_arch = "x86", target_arch = "x86_64"),
not(miri),
))] {
mod sse2;
use sse2 as imp;
} else if #[cfg(all(
target_arch = "aarch64",
target_feature = "neon",
// NEON intrinsics are currently broken on big-endian targets.
// See https://github.com/rust-lang/stdarch/issues/1484.
target_endian = "little",
not(miri),
))] {
mod neon;
use neon as imp;
} else if #[cfg(all(
feature = "nightly",
target_arch = "loongarch64",
target_feature = "lsx",
not(miri),
))] {
mod lsx;
use lsx as imp;
} else {
mod generic;
use generic as imp;
}
}
pub(crate) use self::imp::Group;
pub(super) use self::imp::{
BitMaskWord, NonZeroBitMaskWord, BITMASK_ITER_MASK, BITMASK_MASK, BITMASK_STRIDE,
};

View File

@@ -0,0 +1,121 @@
use super::super::{BitMask, Tag};
use core::arch::aarch64 as neon;
use core::mem;
use core::num::NonZeroU64;
pub(crate) type BitMaskWord = u64;
pub(crate) type NonZeroBitMaskWord = NonZeroU64;
pub(crate) const BITMASK_STRIDE: usize = 8;
pub(crate) const BITMASK_MASK: BitMaskWord = !0;
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = 0x8080_8080_8080_8080;
/// Abstraction over a group of control tags which can be scanned in
/// parallel.
///
/// This implementation uses a 64-bit NEON value.
#[derive(Copy, Clone)]
pub(crate) struct Group(neon::uint8x8_t);
#[allow(clippy::use_self)]
impl Group {
/// Number of bytes in the group.
pub(crate) const WIDTH: usize = mem::size_of::<Self>();
/// Returns a full group of empty tags, suitable for use as the initial
/// value for an empty hash table.
///
/// This is guaranteed to be aligned to the group size.
#[inline]
pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] {
#[repr(C)]
struct AlignedTags {
_align: [Group; 0],
tags: [Tag; Group::WIDTH],
}
const ALIGNED_TAGS: AlignedTags = AlignedTags {
_align: [],
tags: [Tag::EMPTY; Group::WIDTH],
};
&ALIGNED_TAGS.tags
}
/// Loads a group of tags starting at the given address.
#[inline]
#[allow(clippy::cast_ptr_alignment)] // unaligned load
pub(crate) unsafe fn load(ptr: *const Tag) -> Self {
Group(neon::vld1_u8(ptr.cast()))
}
/// Loads a group of tags starting at the given address, which must be
/// aligned to `mem::align_of::<Group>()`.
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
Group(neon::vld1_u8(ptr.cast()))
}
/// Stores the group of tags to the given address, which must be
/// aligned to `mem::align_of::<Group>()`.
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
neon::vst1_u8(ptr.cast(), self.0);
}
/// Returns a `BitMask` indicating all tags in the group which *may*
/// have the given value.
#[inline]
pub(crate) fn match_tag(self, tag: Tag) -> BitMask {
unsafe {
let cmp = neon::vceq_u8(self.0, neon::vdup_n_u8(tag.0));
BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0))
}
}
/// Returns a `BitMask` indicating all tags in the group which are
/// `EMPTY`.
#[inline]
pub(crate) fn match_empty(self) -> BitMask {
self.match_tag(Tag::EMPTY)
}
/// Returns a `BitMask` indicating all tags in the group which are
/// `EMPTY` or `DELETED`.
#[inline]
pub(crate) fn match_empty_or_deleted(self) -> BitMask {
unsafe {
let cmp = neon::vcltz_s8(neon::vreinterpret_s8_u8(self.0));
BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0))
}
}
/// Returns a `BitMask` indicating all tags in the group which are full.
#[inline]
pub(crate) fn match_full(self) -> BitMask {
unsafe {
let cmp = neon::vcgez_s8(neon::vreinterpret_s8_u8(self.0));
BitMask(neon::vget_lane_u64(neon::vreinterpret_u64_u8(cmp), 0))
}
}
/// Performs the following transformation on all tags in the group:
/// - `EMPTY => EMPTY`
/// - `DELETED => EMPTY`
/// - `FULL => DELETED`
#[inline]
pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
// and high_bit = 0 (FULL) to 1000_0000
//
// Here's this logic expanded to concrete values:
// let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false)
// 1111_1111 | 1000_0000 = 1111_1111
// 0000_0000 | 1000_0000 = 1000_0000
unsafe {
let special = neon::vcltz_s8(neon::vreinterpret_s8_u8(self.0));
Group(neon::vorr_u8(special, neon::vdup_n_u8(0x80)))
}
}
}

View File

@@ -0,0 +1,146 @@
use super::super::{BitMask, Tag};
use core::mem;
use core::num::NonZeroU16;
#[cfg(target_arch = "x86")]
use core::arch::x86;
#[cfg(target_arch = "x86_64")]
use core::arch::x86_64 as x86;
pub(crate) type BitMaskWord = u16;
pub(crate) type NonZeroBitMaskWord = NonZeroU16;
pub(crate) const BITMASK_STRIDE: usize = 1;
pub(crate) const BITMASK_MASK: BitMaskWord = 0xffff;
pub(crate) const BITMASK_ITER_MASK: BitMaskWord = !0;
/// Abstraction over a group of control tags which can be scanned in
/// parallel.
///
/// This implementation uses a 128-bit SSE value.
#[derive(Copy, Clone)]
pub(crate) struct Group(x86::__m128i);
// FIXME: https://github.com/rust-lang/rust-clippy/issues/3859
#[allow(clippy::use_self)]
impl Group {
/// Number of bytes in the group.
pub(crate) const WIDTH: usize = mem::size_of::<Self>();
/// Returns a full group of empty tags, suitable for use as the initial
/// value for an empty hash table.
///
/// This is guaranteed to be aligned to the group size.
#[inline]
#[allow(clippy::items_after_statements)]
pub(crate) const fn static_empty() -> &'static [Tag; Group::WIDTH] {
#[repr(C)]
struct AlignedTags {
_align: [Group; 0],
tags: [Tag; Group::WIDTH],
}
const ALIGNED_TAGS: AlignedTags = AlignedTags {
_align: [],
tags: [Tag::EMPTY; Group::WIDTH],
};
&ALIGNED_TAGS.tags
}
/// Loads a group of tags starting at the given address.
#[inline]
#[allow(clippy::cast_ptr_alignment)] // unaligned load
pub(crate) unsafe fn load(ptr: *const Tag) -> Self {
Group(x86::_mm_loadu_si128(ptr.cast()))
}
/// Loads a group of tags starting at the given address, which must be
/// aligned to `mem::align_of::<Group>()`.
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn load_aligned(ptr: *const Tag) -> Self {
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
Group(x86::_mm_load_si128(ptr.cast()))
}
/// Stores the group of tags to the given address, which must be
/// aligned to `mem::align_of::<Group>()`.
#[inline]
#[allow(clippy::cast_ptr_alignment)]
pub(crate) unsafe fn store_aligned(self, ptr: *mut Tag) {
debug_assert_eq!(ptr.align_offset(mem::align_of::<Self>()), 0);
x86::_mm_store_si128(ptr.cast(), self.0);
}
/// Returns a `BitMask` indicating all tags in the group which have
/// the given value.
#[inline]
pub(crate) fn match_tag(self, tag: Tag) -> BitMask {
#[allow(
clippy::cast_possible_wrap, // tag.0: Tag as i8
// tag: i32 as u16
// note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
// upper 16-bits of the i32 are zeroed:
clippy::cast_sign_loss,
clippy::cast_possible_truncation
)]
unsafe {
let cmp = x86::_mm_cmpeq_epi8(self.0, x86::_mm_set1_epi8(tag.0 as i8));
BitMask(x86::_mm_movemask_epi8(cmp) as u16)
}
}
/// Returns a `BitMask` indicating all tags in the group which are
/// `EMPTY`.
#[inline]
pub(crate) fn match_empty(self) -> BitMask {
self.match_tag(Tag::EMPTY)
}
/// Returns a `BitMask` indicating all tags in the group which are
/// `EMPTY` or `DELETED`.
#[inline]
pub(crate) fn match_empty_or_deleted(self) -> BitMask {
#[allow(
// tag: i32 as u16
// note: _mm_movemask_epi8 returns a 16-bit mask in a i32, the
// upper 16-bits of the i32 are zeroed:
clippy::cast_sign_loss,
clippy::cast_possible_truncation
)]
unsafe {
// A tag is EMPTY or DELETED iff the high bit is set
BitMask(x86::_mm_movemask_epi8(self.0) as u16)
}
}
/// Returns a `BitMask` indicating all tags in the group which are full.
#[inline]
pub(crate) fn match_full(&self) -> BitMask {
self.match_empty_or_deleted().invert()
}
/// Performs the following transformation on all tags in the group:
/// - `EMPTY => EMPTY`
/// - `DELETED => EMPTY`
/// - `FULL => DELETED`
#[inline]
pub(crate) fn convert_special_to_empty_and_full_to_deleted(self) -> Self {
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
// and high_bit = 0 (FULL) to 1000_0000
//
// Here's this logic expanded to concrete values:
// let special = 0 > tag = 1111_1111 (true) or 0000_0000 (false)
// 1111_1111 | 1000_0000 = 1111_1111
// 0000_0000 | 1000_0000 = 1000_0000
#[allow(
clippy::cast_possible_wrap, // tag: Tag::DELETED.0 as i8
)]
unsafe {
let zero = x86::_mm_setzero_si128();
let special = x86::_mm_cmpgt_epi8(zero, self.0);
Group(x86::_mm_or_si128(
special,
x86::_mm_set1_epi8(Tag::DELETED.0 as i8),
))
}
}
}

View File

@@ -0,0 +1,10 @@
mod bitmask;
mod group;
mod tag;
use self::bitmask::BitMask;
pub(crate) use self::{
bitmask::BitMaskIter,
group::Group,
tag::{Tag, TagSliceExt},
};

View File

@@ -0,0 +1,83 @@
use core::{fmt, mem};
/// Single tag in a control group.
#[derive(Copy, Clone, PartialEq, Eq)]
#[repr(transparent)]
pub(crate) struct Tag(pub(super) u8);
impl Tag {
/// Control tag value for an empty bucket.
pub(crate) const EMPTY: Tag = Tag(0b1111_1111);
/// Control tag value for a deleted bucket.
pub(crate) const DELETED: Tag = Tag(0b1000_0000);
/// Checks whether a control tag represents a full bucket (top bit is clear).
#[inline]
pub(crate) const fn is_full(self) -> bool {
self.0 & 0x80 == 0
}
/// Checks whether a control tag represents a special value (top bit is set).
#[inline]
pub(crate) const fn is_special(self) -> bool {
self.0 & 0x80 != 0
}
/// Checks whether a special control value is EMPTY (just check 1 bit).
#[inline]
pub(crate) const fn special_is_empty(self) -> bool {
debug_assert!(self.is_special());
self.0 & 0x01 != 0
}
/// Creates a control tag representing a full bucket with the given hash.
#[inline]
#[allow(clippy::cast_possible_truncation)]
pub(crate) const fn full(hash: u64) -> Tag {
// Constant for function that grabs the top 7 bits of the hash.
const MIN_HASH_LEN: usize = if mem::size_of::<usize>() < mem::size_of::<u64>() {
mem::size_of::<usize>()
} else {
mem::size_of::<u64>()
};
// Grab the top 7 bits of the hash. While the hash is normally a full 64-bit
// value, some hash functions (such as FxHash) produce a usize result
// instead, which means that the top 32 bits are 0 on 32-bit platforms.
// So we use MIN_HASH_LEN constant to handle this.
let top7 = hash >> (MIN_HASH_LEN * 8 - 7);
Tag((top7 & 0x7f) as u8) // truncation
}
}
impl fmt::Debug for Tag {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if self.is_special() {
if self.special_is_empty() {
f.pad("EMPTY")
} else {
f.pad("DELETED")
}
} else {
f.debug_tuple("full").field(&(self.0 & 0x7F)).finish()
}
}
}
/// Extension trait for slices of tags.
pub(crate) trait TagSliceExt {
/// Fills the control with the given tag.
fn fill_tag(&mut self, tag: Tag);
/// Clears out the control.
#[inline]
fn fill_empty(&mut self) {
self.fill_tag(Tag::EMPTY)
}
}
impl TagSliceExt for [Tag] {
#[inline]
fn fill_tag(&mut self, tag: Tag) {
// SAFETY: We have access to the entire slice, so, we can write to the entire slice.
unsafe { self.as_mut_ptr().write_bytes(tag.0, self.len()) }
}
}