889 lines
32 KiB
Rust
889 lines
32 KiB
Rust
// This file is part of ICU4X. For terms of use, please see the file
|
|
// called LICENSE at the top level of the ICU4X source tree
|
|
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
|
|
|
|
use crate::reader;
|
|
|
|
use core::borrow::Borrow;
|
|
|
|
#[cfg(feature = "alloc")]
|
|
use crate::{
|
|
builder::bytestr::ByteStr, builder::nonconst::ZeroTrieBuilder, error::ZeroTrieBuildError,
|
|
};
|
|
#[cfg(feature = "alloc")]
|
|
use alloc::{boxed::Box, collections::BTreeMap, collections::VecDeque, string::String, vec::Vec};
|
|
#[cfg(feature = "litemap")]
|
|
use litemap::LiteMap;
|
|
|
|
/// A data structure that compactly maps from byte sequences to integers.
|
|
///
|
|
/// There are several variants of `ZeroTrie` which are very similar but are optimized
|
|
/// for different use cases:
|
|
///
|
|
/// - [`ZeroTrieSimpleAscii`] is the most compact structure. Very fast for small data.
|
|
/// Only stores ASCII-encoded strings. Can be const-constructed!
|
|
/// - [`ZeroTriePerfectHash`] is also compact, but it also supports arbitrary binary
|
|
/// strings. It also scales better to large data. Cannot be const-constructed.
|
|
/// - [`ZeroTrieExtendedCapacity`] can be used if more than 2^32 bytes are required.
|
|
///
|
|
/// You can create a `ZeroTrie` directly, in which case the most appropriate
|
|
/// backing implementation will be chosen.
|
|
///
|
|
/// # Backing Store
|
|
///
|
|
/// The data structure has a flexible backing data store. The only requirement for most
|
|
/// functionality is that it implement `AsRef<[u8]>`. All of the following are valid
|
|
/// ZeroTrie types:
|
|
///
|
|
/// - `ZeroTrie<[u8]>` (dynamically sized type: must be stored in a reference or Box)
|
|
/// - `ZeroTrie<&[u8]>` (borrows its data from a u8 buffer)
|
|
/// - `ZeroTrie<Vec<u8>>` (fully owned data)
|
|
/// - `ZeroTrie<ZeroVec<u8>>` (the recommended borrowed-or-owned signature)
|
|
/// - `Cow<ZeroTrie<[u8]>>` (another borrowed-or-owned signature)
|
|
/// - `ZeroTrie<Cow<[u8]>>` (another borrowed-or-owned signature)
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// use litemap::LiteMap;
|
|
/// use zerotrie::ZeroTrie;
|
|
///
|
|
/// let mut map = LiteMap::<&[u8], usize>::new_vec();
|
|
/// map.insert("foo".as_bytes(), 1);
|
|
/// map.insert("bar".as_bytes(), 2);
|
|
/// map.insert("bazzoo".as_bytes(), 3);
|
|
///
|
|
/// let trie = ZeroTrie::try_from(&map)?;
|
|
///
|
|
/// assert_eq!(trie.get("foo"), Some(1));
|
|
/// assert_eq!(trie.get("bar"), Some(2));
|
|
/// assert_eq!(trie.get("bazzoo"), Some(3));
|
|
/// assert_eq!(trie.get("unknown"), None);
|
|
///
|
|
/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
|
|
/// ```
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
// Note: The absence of the following derive does not cause any test failures in this crate
|
|
#[cfg_attr(feature = "yoke", derive(yoke::Yokeable))]
|
|
pub struct ZeroTrie<Store>(pub(crate) ZeroTrieFlavor<Store>);
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
pub(crate) enum ZeroTrieFlavor<Store> {
|
|
SimpleAscii(ZeroTrieSimpleAscii<Store>),
|
|
PerfectHash(ZeroTriePerfectHash<Store>),
|
|
ExtendedCapacity(ZeroTrieExtendedCapacity<Store>),
|
|
}
|
|
|
|
/// A data structure that compactly maps from ASCII strings to integers.
|
|
///
|
|
/// For more information, see [`ZeroTrie`].
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// use litemap::LiteMap;
|
|
/// use zerotrie::ZeroTrieSimpleAscii;
|
|
///
|
|
/// let mut map = LiteMap::new_vec();
|
|
/// map.insert(&b"foo"[..], 1);
|
|
/// map.insert(b"bar", 2);
|
|
/// map.insert(b"bazzoo", 3);
|
|
///
|
|
/// let trie = ZeroTrieSimpleAscii::try_from(&map)?;
|
|
///
|
|
/// assert_eq!(trie.get(b"foo"), Some(1));
|
|
/// assert_eq!(trie.get(b"bar"), Some(2));
|
|
/// assert_eq!(trie.get(b"bazzoo"), Some(3));
|
|
/// assert_eq!(trie.get(b"unknown"), None);
|
|
///
|
|
/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
|
|
/// ```
|
|
///
|
|
/// The trie can only store ASCII bytes; a string with non-ASCII always returns None:
|
|
///
|
|
/// ```
|
|
/// use zerotrie::ZeroTrieSimpleAscii;
|
|
///
|
|
/// // A trie with two values: "abc" and "abcdef"
|
|
/// let trie = ZeroTrieSimpleAscii::from_bytes(b"abc\x80def\x81");
|
|
///
|
|
/// assert!(trie.get(b"ab\xFF").is_none());
|
|
/// ```
|
|
#[repr(transparent)]
|
|
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
|
#[cfg_attr(feature = "databake", derive(databake::Bake))]
|
|
#[cfg_attr(feature = "databake", databake(path = zerotrie))]
|
|
#[allow(clippy::exhaustive_structs)] // databake hidden fields
|
|
pub struct ZeroTrieSimpleAscii<Store: ?Sized> {
|
|
#[doc(hidden)] // for databake, but there are no invariants
|
|
pub store: Store,
|
|
}
|
|
|
|
impl<Store: ?Sized> ZeroTrieSimpleAscii<Store> {
|
|
fn transparent_ref_from_store(s: &Store) -> &Self {
|
|
unsafe {
|
|
// Safety: Self is transparent over Store
|
|
core::mem::transmute(s)
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<Store> ZeroTrieSimpleAscii<Store> {
|
|
/// Wrap this specific ZeroTrie variant into a ZeroTrie.
|
|
#[inline]
|
|
pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
|
|
ZeroTrie(ZeroTrieFlavor::SimpleAscii(self))
|
|
}
|
|
}
|
|
|
|
/// A data structure that compactly maps from ASCII strings to integers
|
|
/// in a case-insensitive way.
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// use litemap::LiteMap;
|
|
/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
|
|
///
|
|
/// let mut map = LiteMap::new_vec();
|
|
/// map.insert(&b"foo"[..], 1);
|
|
/// map.insert(b"Bar", 2);
|
|
/// map.insert(b"Bazzoo", 3);
|
|
///
|
|
/// let trie = ZeroAsciiIgnoreCaseTrie::try_from(&map)?;
|
|
///
|
|
/// assert_eq!(trie.get(b"foo"), Some(1));
|
|
/// assert_eq!(trie.get(b"bar"), Some(2));
|
|
/// assert_eq!(trie.get(b"BAR"), Some(2));
|
|
/// assert_eq!(trie.get(b"bazzoo"), Some(3));
|
|
/// assert_eq!(trie.get(b"unknown"), None);
|
|
///
|
|
/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
|
|
/// ```
|
|
///
|
|
/// Strings with different cases of the same character at the same offset are not allowed:
|
|
///
|
|
/// ```
|
|
/// use litemap::LiteMap;
|
|
/// use zerotrie::ZeroAsciiIgnoreCaseTrie;
|
|
///
|
|
/// let mut map = LiteMap::new_vec();
|
|
/// map.insert(&b"bar"[..], 1);
|
|
/// // OK: 'r' and 'Z' are different letters
|
|
/// map.insert(b"baZ", 2);
|
|
/// // Bad: we already inserted 'r' so we cannot also insert 'R' at the same position
|
|
/// map.insert(b"baR", 2);
|
|
///
|
|
/// ZeroAsciiIgnoreCaseTrie::try_from(&map).expect_err("mixed-case strings!");
|
|
/// ```
|
|
#[repr(transparent)]
|
|
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
|
#[cfg_attr(feature = "databake", derive(databake::Bake))]
|
|
#[cfg_attr(feature = "databake", databake(path = zerotrie))]
|
|
#[allow(clippy::exhaustive_structs)] // databake hidden fields
|
|
pub struct ZeroAsciiIgnoreCaseTrie<Store: ?Sized> {
|
|
#[doc(hidden)] // for databake, but there are no invariants
|
|
pub store: Store,
|
|
}
|
|
|
|
impl<Store: ?Sized> ZeroAsciiIgnoreCaseTrie<Store> {
|
|
fn transparent_ref_from_store(s: &Store) -> &Self {
|
|
unsafe {
|
|
// Safety: Self is transparent over Store
|
|
core::mem::transmute(s)
|
|
}
|
|
}
|
|
}
|
|
|
|
// Note: ZeroAsciiIgnoreCaseTrie is not a variant of ZeroTrie so there is no `into_zerotrie`
|
|
|
|
/// A data structure that compactly maps from byte strings to integers.
|
|
///
|
|
/// For more information, see [`ZeroTrie`].
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// use litemap::LiteMap;
|
|
/// use zerotrie::ZeroTriePerfectHash;
|
|
///
|
|
/// let mut map = LiteMap::<&[u8], usize>::new_vec();
|
|
/// map.insert("foo".as_bytes(), 1);
|
|
/// map.insert("bår".as_bytes(), 2);
|
|
/// map.insert("båzzøø".as_bytes(), 3);
|
|
///
|
|
/// let trie = ZeroTriePerfectHash::try_from(&map)?;
|
|
///
|
|
/// assert_eq!(trie.get("foo".as_bytes()), Some(1));
|
|
/// assert_eq!(trie.get("bår".as_bytes()), Some(2));
|
|
/// assert_eq!(trie.get("båzzøø".as_bytes()), Some(3));
|
|
/// assert_eq!(trie.get("bazzoo".as_bytes()), None);
|
|
///
|
|
/// # Ok::<_, zerotrie::ZeroTrieBuildError>(())
|
|
/// ```
|
|
#[repr(transparent)]
|
|
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
|
#[cfg_attr(feature = "databake", derive(databake::Bake))]
|
|
#[cfg_attr(feature = "databake", databake(path = zerotrie))]
|
|
#[allow(clippy::exhaustive_structs)] // databake hidden fields
|
|
pub struct ZeroTriePerfectHash<Store: ?Sized> {
|
|
#[doc(hidden)] // for databake, but there are no invariants
|
|
pub store: Store,
|
|
}
|
|
|
|
impl<Store: ?Sized> ZeroTriePerfectHash<Store> {
|
|
fn transparent_ref_from_store(s: &Store) -> &Self {
|
|
unsafe {
|
|
// Safety: Self is transparent over Store
|
|
core::mem::transmute(s)
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<Store> ZeroTriePerfectHash<Store> {
|
|
/// Wrap this specific ZeroTrie variant into a ZeroTrie.
|
|
#[inline]
|
|
pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
|
|
ZeroTrie(ZeroTrieFlavor::PerfectHash(self))
|
|
}
|
|
}
|
|
|
|
/// A data structure that maps from a large number of byte strings to integers.
|
|
///
|
|
/// For more information, see [`ZeroTrie`].
|
|
#[repr(transparent)]
|
|
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
|
|
#[cfg_attr(feature = "databake", derive(databake::Bake))]
|
|
#[cfg_attr(feature = "databake", databake(path = zerotrie))]
|
|
#[allow(clippy::exhaustive_structs)] // databake hidden fields
|
|
pub struct ZeroTrieExtendedCapacity<Store: ?Sized> {
|
|
#[doc(hidden)] // for databake, but there are no invariants
|
|
pub store: Store,
|
|
}
|
|
|
|
impl<Store: ?Sized> ZeroTrieExtendedCapacity<Store> {
|
|
fn transparent_ref_from_store(s: &Store) -> &Self {
|
|
unsafe {
|
|
// Safety: Self is transparent over Store
|
|
core::mem::transmute(s)
|
|
}
|
|
}
|
|
}
|
|
|
|
impl<Store> ZeroTrieExtendedCapacity<Store> {
|
|
/// Wrap this specific ZeroTrie variant into a ZeroTrie.
|
|
#[inline]
|
|
pub const fn into_zerotrie(self) -> ZeroTrie<Store> {
|
|
ZeroTrie(ZeroTrieFlavor::ExtendedCapacity(self))
|
|
}
|
|
}
|
|
|
|
macro_rules! impl_zerotrie_subtype {
|
|
($name:ident, $iter_element:ty, $iter_fn:path, $iter_ty:ty, $cnv_fn:path) => {
|
|
impl<Store> $name<Store> {
|
|
/// Create a trie directly from a store.
|
|
///
|
|
/// If the store does not contain valid bytes, unexpected behavior may occur.
|
|
#[inline]
|
|
pub const fn from_store(store: Store) -> Self {
|
|
Self { store }
|
|
}
|
|
/// Takes the byte store from this trie.
|
|
#[inline]
|
|
pub fn into_store(self) -> Store {
|
|
self.store
|
|
}
|
|
/// Converts this trie's store to a different store implementing the `From` trait.
|
|
///
|
|
#[doc = concat!("For example, use this to change `", stringify!($name), "<Vec<u8>>` to `", stringify!($name), "<Cow<[u8]>>`.")]
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// use std::borrow::Cow;
|
|
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
|
|
///
|
|
#[doc = concat!("let trie: ", stringify!($name), "<Vec<u8>> = ", stringify!($name), "::from_bytes(b\"abc\\x85\").to_owned();")]
|
|
#[doc = concat!("let cow: ", stringify!($name), "<Cow<[u8]>> = trie.convert_store();")]
|
|
///
|
|
/// assert_eq!(cow.get(b"abc"), Some(5));
|
|
/// ```
|
|
pub fn convert_store<X: From<Store>>(self) -> $name<X> {
|
|
$name::<X>::from_store(X::from(self.store))
|
|
}
|
|
}
|
|
impl<Store> $name<Store>
|
|
where
|
|
Store: AsRef<[u8]> + ?Sized,
|
|
{
|
|
/// Queries the trie for a string.
|
|
// Note: We do not need the Borrow trait's guarantees, so we use
|
|
// the more general AsRef trait.
|
|
pub fn get<K>(&self, key: K) -> Option<usize> where K: AsRef<[u8]> {
|
|
reader::get_parameterized::<Self>(self.store.as_ref(), key.as_ref())
|
|
}
|
|
/// Returns `true` if the trie is empty.
|
|
#[inline]
|
|
pub fn is_empty(&self) -> bool {
|
|
self.store.as_ref().is_empty()
|
|
}
|
|
/// Returns the size of the trie in number of bytes.
|
|
///
|
|
/// To get the number of keys in the trie, use `.iter().count()`:
|
|
///
|
|
/// ```
|
|
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
|
|
///
|
|
/// // A trie with two values: "abc" and "abcdef"
|
|
#[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
|
|
///
|
|
/// assert_eq!(8, trie.byte_len());
|
|
/// assert_eq!(2, trie.iter().count());
|
|
/// ```
|
|
#[inline]
|
|
pub fn byte_len(&self) -> usize {
|
|
self.store.as_ref().len()
|
|
}
|
|
/// Returns the bytes contained in the underlying store.
|
|
#[inline]
|
|
pub fn as_bytes(&self) -> &[u8] {
|
|
self.store.as_ref()
|
|
}
|
|
/// Returns this trie as a reference transparent over a byte slice.
|
|
#[inline]
|
|
pub fn as_borrowed(&self) -> &$name<[u8]> {
|
|
$name::from_bytes(self.store.as_ref())
|
|
}
|
|
/// Returns a trie with a store borrowing from this trie.
|
|
#[inline]
|
|
pub fn as_borrowed_slice(&self) -> $name<&[u8]> {
|
|
$name::from_store(self.store.as_ref())
|
|
}
|
|
}
|
|
impl<Store> AsRef<$name<[u8]>> for $name<Store>
|
|
where
|
|
Store: AsRef<[u8]> + ?Sized,
|
|
{
|
|
#[inline]
|
|
fn as_ref(&self) -> &$name<[u8]> {
|
|
self.as_borrowed()
|
|
}
|
|
}
|
|
#[cfg(feature = "alloc")]
|
|
impl<Store> $name<Store>
|
|
where
|
|
Store: AsRef<[u8]> + ?Sized,
|
|
{
|
|
/// Converts a possibly-borrowed $name to an owned one.
|
|
///
|
|
/// ✨ *Enabled with the `alloc` Cargo feature.*
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
|
|
///
|
|
#[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x85\");")]
|
|
#[doc = concat!("let owned: ", stringify!($name), "<Vec<u8>> = trie.to_owned();")]
|
|
///
|
|
/// assert_eq!(trie.get(b"abc"), Some(5));
|
|
/// assert_eq!(owned.get(b"abc"), Some(5));
|
|
/// ```
|
|
#[inline]
|
|
pub fn to_owned(&self) -> $name<Vec<u8>> {
|
|
$name::from_store(
|
|
Vec::from(self.store.as_ref()),
|
|
)
|
|
}
|
|
/// Returns an iterator over the key/value pairs in this trie.
|
|
///
|
|
/// ✨ *Enabled with the `alloc` Cargo feature.*
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
|
|
///
|
|
/// // A trie with two values: "abc" and "abcdef"
|
|
#[doc = concat!("let trie: &", stringify!($name), "<[u8]> = ", stringify!($name), "::from_bytes(b\"abc\\x80def\\x81\");")]
|
|
///
|
|
/// let mut it = trie.iter();
|
|
/// assert_eq!(it.next(), Some(("abc".into(), 0)));
|
|
/// assert_eq!(it.next(), Some(("abcdef".into(), 1)));
|
|
/// assert_eq!(it.next(), None);
|
|
/// ```
|
|
#[inline]
|
|
pub fn iter(&self) -> $iter_ty {
|
|
$iter_fn(self.as_bytes())
|
|
}
|
|
}
|
|
impl $name<[u8]> {
|
|
/// Casts from a byte slice to a reference to a trie with the same lifetime.
|
|
///
|
|
/// If the bytes are not a valid trie, unexpected behavior may occur.
|
|
#[inline]
|
|
pub fn from_bytes(trie: &[u8]) -> &Self {
|
|
Self::transparent_ref_from_store(trie)
|
|
}
|
|
}
|
|
#[cfg(feature = "alloc")]
|
|
impl $name<Vec<u8>> {
|
|
pub(crate) fn try_from_tuple_slice(items: &[(&ByteStr, usize)]) -> Result<Self, ZeroTrieBuildError> {
|
|
use crate::options::ZeroTrieWithOptions;
|
|
ZeroTrieBuilder::<VecDeque<u8>>::from_sorted_tuple_slice(
|
|
items,
|
|
Self::OPTIONS,
|
|
)
|
|
.map(|s| Self {
|
|
store: s.to_bytes(),
|
|
})
|
|
}
|
|
}
|
|
#[cfg(feature = "alloc")]
|
|
impl<'a, K> FromIterator<(K, usize)> for $name<Vec<u8>>
|
|
where
|
|
K: AsRef<[u8]>
|
|
{
|
|
fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
|
|
use crate::options::ZeroTrieWithOptions;
|
|
use crate::builder::nonconst::ZeroTrieBuilder;
|
|
ZeroTrieBuilder::<VecDeque<u8>>::from_bytes_iter(
|
|
iter,
|
|
Self::OPTIONS
|
|
)
|
|
.map(|s| Self {
|
|
store: s.to_bytes(),
|
|
})
|
|
.unwrap()
|
|
}
|
|
}
|
|
#[cfg(feature = "alloc")]
|
|
impl<'a, K> TryFrom<&'a BTreeMap<K, usize>> for $name<Vec<u8>>
|
|
where
|
|
K: Borrow<[u8]>
|
|
{
|
|
type Error = crate::error::ZeroTrieBuildError;
|
|
fn try_from(map: &'a BTreeMap<K, usize>) -> Result<Self, Self::Error> {
|
|
let tuples: Vec<(&[u8], usize)> = map
|
|
.iter()
|
|
.map(|(k, v)| (k.borrow(), *v))
|
|
.collect();
|
|
let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
|
|
Self::try_from_tuple_slice(byte_str_slice)
|
|
}
|
|
}
|
|
#[cfg(feature = "alloc")]
|
|
impl<Store> $name<Store>
|
|
where
|
|
Store: AsRef<[u8]> + ?Sized
|
|
{
|
|
/// Exports the data from this ZeroTrie type into a BTreeMap.
|
|
///
|
|
/// ✨ *Enabled with the `alloc` Cargo feature.*
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
|
|
/// use std::collections::BTreeMap;
|
|
///
|
|
#[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
|
|
/// let items = trie.to_btreemap();
|
|
///
|
|
/// assert_eq!(items.len(), 2);
|
|
///
|
|
#[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
|
|
/// .into_iter()
|
|
/// .collect();
|
|
/// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
|
|
/// ```
|
|
pub fn to_btreemap(&self) -> BTreeMap<$iter_element, usize> {
|
|
self.iter().collect()
|
|
}
|
|
#[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
|
|
pub(crate) fn to_btreemap_bytes(&self) -> BTreeMap<Box<[u8]>, usize> {
|
|
self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
|
|
}
|
|
}
|
|
#[cfg(feature = "alloc")]
|
|
impl<Store> From<&$name<Store>> for BTreeMap<$iter_element, usize>
|
|
where
|
|
Store: AsRef<[u8]> + ?Sized,
|
|
{
|
|
#[inline]
|
|
fn from(other: &$name<Store>) -> Self {
|
|
other.to_btreemap()
|
|
}
|
|
}
|
|
#[cfg(feature = "litemap")]
|
|
impl<'a, K, S> TryFrom<&'a LiteMap<K, usize, S>> for $name<Vec<u8>>
|
|
where
|
|
K: Borrow<[u8]>,
|
|
S: litemap::store::StoreIterable<'a, K, usize>,
|
|
{
|
|
type Error = crate::error::ZeroTrieBuildError;
|
|
fn try_from(map: &'a LiteMap<K, usize, S>) -> Result<Self, Self::Error> {
|
|
let tuples: Vec<(&[u8], usize)> = map
|
|
.iter()
|
|
.map(|(k, v)| (k.borrow(), *v))
|
|
.collect();
|
|
let byte_str_slice = ByteStr::from_byte_slice_with_value(&tuples);
|
|
Self::try_from_tuple_slice(byte_str_slice)
|
|
}
|
|
}
|
|
#[cfg(feature = "litemap")]
|
|
impl<Store> $name<Store>
|
|
where
|
|
Store: AsRef<[u8]> + ?Sized,
|
|
{
|
|
/// Exports the data from this ZeroTrie type into a LiteMap.
|
|
///
|
|
/// ✨ *Enabled with the `litemap` Cargo feature.*
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
|
|
/// use litemap::LiteMap;
|
|
///
|
|
#[doc = concat!("let trie = ", stringify!($name), "::from_bytes(b\"abc\\x81def\\x82\");")]
|
|
///
|
|
/// let items = trie.to_litemap();
|
|
/// assert_eq!(items.len(), 2);
|
|
///
|
|
#[doc = concat!("let recovered_trie: ", stringify!($name), "<Vec<u8>> = items")]
|
|
/// .iter()
|
|
/// .map(|(k, v)| (k, *v))
|
|
/// .collect();
|
|
/// assert_eq!(trie.as_bytes(), recovered_trie.as_bytes());
|
|
/// ```
|
|
pub fn to_litemap(&self) -> LiteMap<$iter_element, usize> {
|
|
self.iter().collect()
|
|
}
|
|
#[allow(dead_code)] // not needed for ZeroAsciiIgnoreCaseTrie
|
|
pub(crate) fn to_litemap_bytes(&self) -> LiteMap<Box<[u8]>, usize> {
|
|
self.iter().map(|(k, v)| ($cnv_fn(k), v)).collect()
|
|
}
|
|
}
|
|
#[cfg(feature = "litemap")]
|
|
impl<Store> From<&$name<Store>> for LiteMap<$iter_element, usize>
|
|
where
|
|
Store: AsRef<[u8]> + ?Sized,
|
|
{
|
|
#[inline]
|
|
fn from(other: &$name<Store>) -> Self {
|
|
other.to_litemap()
|
|
}
|
|
}
|
|
#[cfg(feature = "litemap")]
|
|
impl $name<Vec<u8>>
|
|
{
|
|
#[cfg(feature = "serde")]
|
|
pub(crate) fn try_from_serde_litemap(items: &LiteMap<Box<ByteStr>, usize>) -> Result<Self, ZeroTrieBuildError> {
|
|
let lm_borrowed: LiteMap<&ByteStr, usize> = items.to_borrowed_keys();
|
|
Self::try_from_tuple_slice(lm_borrowed.as_slice())
|
|
}
|
|
}
|
|
// Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
|
|
impl Borrow<$name<[u8]>> for $name<&[u8]> {
|
|
#[inline]
|
|
fn borrow(&self) -> &$name<[u8]> {
|
|
self.as_borrowed()
|
|
}
|
|
}
|
|
// Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
|
|
#[cfg(feature = "alloc")]
|
|
impl Borrow<$name<[u8]>> for $name<Box<[u8]>> {
|
|
#[inline]
|
|
fn borrow(&self) -> &$name<[u8]> {
|
|
self.as_borrowed()
|
|
}
|
|
}
|
|
// Note: Can't generalize this impl due to the `core::borrow::Borrow` blanket impl.
|
|
#[cfg(feature = "alloc")]
|
|
impl Borrow<$name<[u8]>> for $name<Vec<u8>> {
|
|
#[inline]
|
|
fn borrow(&self) -> &$name<[u8]> {
|
|
self.as_borrowed()
|
|
}
|
|
}
|
|
#[cfg(feature = "alloc")]
|
|
impl alloc::borrow::ToOwned for $name<[u8]> {
|
|
type Owned = $name<Box<[u8]>>;
|
|
#[doc = concat!("This impl allows [`", stringify!($name), "`] to be used inside of a [`Cow`](alloc::borrow::Cow).")]
|
|
///
|
|
#[doc = concat!("Note that it is also possible to use `", stringify!($name), "<ZeroVec<u8>>` for a similar result.")]
|
|
///
|
|
/// ✨ *Enabled with the `alloc` Cargo feature.*
|
|
///
|
|
/// # Examples
|
|
///
|
|
/// ```
|
|
/// use std::borrow::Cow;
|
|
#[doc = concat!("use zerotrie::", stringify!($name), ";")]
|
|
///
|
|
#[doc = concat!("let trie: Cow<", stringify!($name), "<[u8]>> = Cow::Borrowed(", stringify!($name), "::from_bytes(b\"abc\\x85\"));")]
|
|
/// assert_eq!(trie.get(b"abc"), Some(5));
|
|
/// ```
|
|
fn to_owned(&self) -> Self::Owned {
|
|
let bytes: &[u8] = self.store.as_ref();
|
|
$name::from_store(
|
|
Vec::from(bytes).into_boxed_slice(),
|
|
)
|
|
}
|
|
}
|
|
// TODO(#2778): Auto-derive these impls based on the repr(transparent).
|
|
//
|
|
// Safety (based on the safety checklist on the VarULE trait):
|
|
// 1. `$name` does not include any uninitialized or padding bytes as it is `repr(transparent)`
|
|
// over a `VarULE` type, `Store`, as evidenced by the existence of `transparent_ref_from_store()`
|
|
// 2. `$name` is aligned to 1 byte for the same reason
|
|
// 3. The impl of `validate_bytes()` returns an error if any byte is not valid (passed down to `VarULE` impl of `Store`)
|
|
// 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (passed down to `VarULE` impl of `Store`)
|
|
// 5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
|
|
// 6. `parse_bytes()` is left to its default impl
|
|
// 7. byte equality is semantic equality
|
|
#[cfg(feature = "zerovec")]
|
|
unsafe impl<Store> zerovec::ule::VarULE for $name<Store>
|
|
where
|
|
Store: zerovec::ule::VarULE,
|
|
{
|
|
#[inline]
|
|
fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
|
|
Store::validate_bytes(bytes)
|
|
}
|
|
#[inline]
|
|
unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
|
|
// Safety: we can pass down the validity invariant to Store
|
|
Self::transparent_ref_from_store(Store::from_bytes_unchecked(bytes))
|
|
}
|
|
}
|
|
#[cfg(feature = "zerofrom")]
|
|
impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, $name<Store1>> for $name<Store2>
|
|
where
|
|
Store2: zerofrom::ZeroFrom<'zf, Store1>,
|
|
{
|
|
#[inline]
|
|
fn zero_from(other: &'zf $name<Store1>) -> Self {
|
|
$name::from_store(zerofrom::ZeroFrom::zero_from(&other.store))
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
#[cfg(feature = "alloc")]
|
|
fn string_to_box_u8(input: String) -> Box<[u8]> {
|
|
input.into_boxed_str().into_boxed_bytes()
|
|
}
|
|
|
|
#[doc(hidden)] // subject to change
|
|
#[cfg(feature = "alloc")]
|
|
pub type ZeroTrieStringIterator<'a> =
|
|
core::iter::Map<reader::ZeroTrieIterator<'a>, fn((Vec<u8>, usize)) -> (String, usize)>;
|
|
|
|
impl_zerotrie_subtype!(
|
|
ZeroTrieSimpleAscii,
|
|
String,
|
|
reader::get_iter_ascii_or_panic,
|
|
ZeroTrieStringIterator<'_>,
|
|
string_to_box_u8
|
|
);
|
|
impl_zerotrie_subtype!(
|
|
ZeroAsciiIgnoreCaseTrie,
|
|
String,
|
|
reader::get_iter_ascii_or_panic,
|
|
ZeroTrieStringIterator<'_>,
|
|
string_to_box_u8
|
|
);
|
|
impl_zerotrie_subtype!(
|
|
ZeroTriePerfectHash,
|
|
Vec<u8>,
|
|
reader::get_iter_phf,
|
|
reader::ZeroTrieIterator<'_>,
|
|
Vec::into_boxed_slice
|
|
);
|
|
impl_zerotrie_subtype!(
|
|
ZeroTrieExtendedCapacity,
|
|
Vec<u8>,
|
|
reader::get_iter_phf,
|
|
reader::ZeroTrieIterator<'_>,
|
|
Vec::into_boxed_slice
|
|
);
|
|
|
|
macro_rules! impl_dispatch {
|
|
($self:ident, $inner_fn:ident()) => {
|
|
match $self.0 {
|
|
ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
|
|
ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
|
|
ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
|
|
}
|
|
};
|
|
($self:ident, $inner_fn:ident().into_zerotrie()) => {
|
|
match $self.0 {
|
|
ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn().into_zerotrie(),
|
|
ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn().into_zerotrie(),
|
|
ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn().into_zerotrie(),
|
|
}
|
|
};
|
|
(&$self:ident, $inner_fn:ident()) => {
|
|
match &$self.0 {
|
|
ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn(),
|
|
ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn(),
|
|
ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn(),
|
|
}
|
|
};
|
|
($self:ident, $inner_fn:ident($arg:ident)) => {
|
|
match $self.0 {
|
|
ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
|
|
ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
|
|
ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
|
|
}
|
|
};
|
|
(&$self:ident, $inner_fn:ident($arg:ident)) => {
|
|
match &$self.0 {
|
|
ZeroTrieFlavor::SimpleAscii(subtype) => subtype.$inner_fn($arg),
|
|
ZeroTrieFlavor::PerfectHash(subtype) => subtype.$inner_fn($arg),
|
|
ZeroTrieFlavor::ExtendedCapacity(subtype) => subtype.$inner_fn($arg),
|
|
}
|
|
};
|
|
(&$self:ident, $trait:ident::$inner_fn:ident()) => {
|
|
match &$self.0 {
|
|
ZeroTrieFlavor::SimpleAscii(subtype) => {
|
|
ZeroTrie(ZeroTrieFlavor::SimpleAscii($trait::$inner_fn(subtype)))
|
|
}
|
|
ZeroTrieFlavor::PerfectHash(subtype) => {
|
|
ZeroTrie(ZeroTrieFlavor::PerfectHash($trait::$inner_fn(subtype)))
|
|
}
|
|
ZeroTrieFlavor::ExtendedCapacity(subtype) => {
|
|
ZeroTrie(ZeroTrieFlavor::ExtendedCapacity($trait::$inner_fn(subtype)))
|
|
}
|
|
}
|
|
};
|
|
}
|
|
|
|
impl<Store> ZeroTrie<Store> {
|
|
/// Takes the byte store from this trie.
|
|
pub fn into_store(self) -> Store {
|
|
impl_dispatch!(self, into_store())
|
|
}
|
|
/// Converts this trie's store to a different store implementing the `From` trait.
|
|
///
|
|
/// For example, use this to change `ZeroTrie<Vec<u8>>` to `ZeroTrie<Cow<[u8]>>`.
|
|
pub fn convert_store<NewStore>(self) -> ZeroTrie<NewStore>
|
|
where
|
|
NewStore: From<Store>,
|
|
{
|
|
impl_dispatch!(self, convert_store().into_zerotrie())
|
|
}
|
|
}
|
|
|
|
impl<Store> ZeroTrie<Store>
|
|
where
|
|
Store: AsRef<[u8]>,
|
|
{
|
|
/// Queries the trie for a string.
|
|
pub fn get<K>(&self, key: K) -> Option<usize>
|
|
where
|
|
K: AsRef<[u8]>,
|
|
{
|
|
impl_dispatch!(&self, get(key))
|
|
}
|
|
/// Returns `true` if the trie is empty.
|
|
pub fn is_empty(&self) -> bool {
|
|
impl_dispatch!(&self, is_empty())
|
|
}
|
|
/// Returns the size of the trie in number of bytes.
|
|
///
|
|
/// To get the number of keys in the trie, use `.iter().count()`.
|
|
pub fn byte_len(&self) -> usize {
|
|
impl_dispatch!(&self, byte_len())
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "alloc")]
|
|
impl<Store> ZeroTrie<Store>
|
|
where
|
|
Store: AsRef<[u8]>,
|
|
{
|
|
/// Exports the data from this ZeroTrie into a BTreeMap.
|
|
///
|
|
/// ✨ *Enabled with the `alloc` Cargo feature.*
|
|
pub fn to_btreemap(&self) -> BTreeMap<Box<[u8]>, usize> {
|
|
impl_dispatch!(&self, to_btreemap_bytes())
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "litemap")]
|
|
impl<Store> ZeroTrie<Store>
|
|
where
|
|
Store: AsRef<[u8]>,
|
|
{
|
|
/// Exports the data from this ZeroTrie into a LiteMap.
|
|
pub fn to_litemap(&self) -> LiteMap<Box<[u8]>, usize> {
|
|
impl_dispatch!(&self, to_litemap_bytes())
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "alloc")]
|
|
impl ZeroTrie<Vec<u8>> {
|
|
pub(crate) fn try_from_tuple_slice(
|
|
items: &[(&ByteStr, usize)],
|
|
) -> Result<Self, ZeroTrieBuildError> {
|
|
let is_all_ascii = items.iter().all(|(s, _)| s.is_all_ascii());
|
|
if is_all_ascii && items.len() < 512 {
|
|
ZeroTrieSimpleAscii::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
|
|
} else {
|
|
ZeroTriePerfectHash::try_from_tuple_slice(items).map(|x| x.into_zerotrie())
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "alloc")]
|
|
impl<K> FromIterator<(K, usize)> for ZeroTrie<Vec<u8>>
|
|
where
|
|
K: AsRef<[u8]>,
|
|
{
|
|
fn from_iter<T: IntoIterator<Item = (K, usize)>>(iter: T) -> Self {
|
|
// We need two Vecs because the first one anchors the `K`s that the second one borrows.
|
|
let items = Vec::from_iter(iter);
|
|
let mut items: Vec<(&[u8], usize)> = items.iter().map(|(k, v)| (k.as_ref(), *v)).collect();
|
|
items.sort();
|
|
let byte_str_slice = ByteStr::from_byte_slice_with_value(&items);
|
|
#[expect(clippy::unwrap_used)] // FromIterator is panicky
|
|
Self::try_from_tuple_slice(byte_str_slice).unwrap()
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "databake")]
|
|
impl<Store> databake::Bake for ZeroTrie<Store>
|
|
where
|
|
Store: databake::Bake,
|
|
{
|
|
fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
|
|
use databake::*;
|
|
let inner = impl_dispatch!(&self, bake(env));
|
|
quote! { #inner.into_zerotrie() }
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "databake")]
|
|
impl<Store> databake::BakeSize for ZeroTrie<Store>
|
|
where
|
|
Store: databake::BakeSize,
|
|
{
|
|
fn borrows_size(&self) -> usize {
|
|
impl_dispatch!(&self, borrows_size())
|
|
}
|
|
}
|
|
|
|
#[cfg(feature = "zerofrom")]
|
|
impl<'zf, Store1, Store2> zerofrom::ZeroFrom<'zf, ZeroTrie<Store1>> for ZeroTrie<Store2>
|
|
where
|
|
Store2: zerofrom::ZeroFrom<'zf, Store1>,
|
|
{
|
|
fn zero_from(other: &'zf ZeroTrie<Store1>) -> Self {
|
|
use zerofrom::ZeroFrom;
|
|
impl_dispatch!(&other, ZeroFrom::zero_from())
|
|
}
|
|
}
|