chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,30 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
//! Custom error type(s) for the parent module.
use displaydoc::Display;
/// A custom error type for [`CodePointTrie`](super::CodePointTrie).
#[derive(Copy, Clone, Display, Debug, PartialEq)]
#[non_exhaustive]
pub enum Error {
/// Could not construct [`CodePointTrie`](super::CodePointTrie) from deserialized values
#[displaydoc("Could not construct CodePointTrie from deserialized values: {reason}")]
FromDeserialized {
/// Reason for inability to deserialize values.
reason: &'static str,
},
/// [`CodePointTrie`](super::CodePointTrie) must be constructed from data vector with at least one element
#[displaydoc("CodePointTrie must be constructed from data vector with at least one element")]
EmptyDataVector,
/// [`CodePointTrie`](super::CodePointTrie) must be constructed from index vector long enough to accommodate fast-path access
#[displaydoc("CodePointTrie must be constructed from index vector long enough to accommodate fast-path access")]
IndexTooShortForFastAccess,
/// [`CodePointTrie`](super::CodePointTrie) must be constructed from data vector long enough to accommodate fast-path access
#[displaydoc("CodePointTrie must be constructed from data vector long enough to accommodate fast-path access")]
DataTooShortForFastAccess,
}
impl core::error::Error for Error {}

View File

@@ -0,0 +1,76 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
pub const FAST_TYPE_SHIFT: i32 = 6;
/// Number of entries in a data block for code points below the fast limit. 64=0x40
pub const FAST_TYPE_DATA_BLOCK_LENGTH: u32 = 1 << FAST_TYPE_SHIFT;
/// Mask for getting the lower bits for the in-fast-data-block offset.
pub const FAST_TYPE_DATA_MASK: u32 = FAST_TYPE_DATA_BLOCK_LENGTH - 1;
/// Fast indexing limit for "fast"-type trie
pub const FAST_TYPE_FAST_INDEXING_MAX: u32 = 0xffff;
/// Fast indexing limit for "small"-type trie
pub const SMALL_TYPE_FAST_INDEXING_MAX: u32 = 0xfff;
/// Offset from dataLength (to be subtracted) for fetching the
/// value returned for out-of-range code points and ill-formed UTF-8/16.
pub const ERROR_VALUE_NEG_DATA_OFFSET: u32 = 1;
/// Offset from dataLength (to be subtracted) for fetching the
/// value returned for code points highStart..U+10FFFF.
pub const HIGH_VALUE_NEG_DATA_OFFSET: u32 = 2;
/// The length of the BMP index table. 1024=0x400
pub const BMP_INDEX_LENGTH: u32 = 0x10000 >> FAST_TYPE_SHIFT;
pub const SMALL_LIMIT: u32 = 0x1000;
pub const SMALL_INDEX_LENGTH: u32 = SMALL_LIMIT >> FAST_TYPE_SHIFT;
/// Shift size for getting the index-3 table offset.
pub const SHIFT_3: u32 = 4;
/// Shift size for getting the index-2 table offset.
pub const SHIFT_2: u32 = 5 + SHIFT_3;
/// Shift size for getting the index-1 table offset.
pub const SHIFT_1: u32 = 5 + SHIFT_2;
/// Difference between two shift sizes,
/// for getting an index-2 offset from an index-3 offset. 5=9-4
pub const SHIFT_2_3: u32 = SHIFT_2 - SHIFT_3;
/// Difference between two shift sizes,
/// for getting an index-1 offset from an index-2 offset. 5=14-9
pub const SHIFT_1_2: u32 = SHIFT_1 - SHIFT_2;
/// Number of index-1 entries for the BMP. (4)
/// This part of the index-1 table is omitted from the serialized form.
pub const OMITTED_BMP_INDEX_1_LENGTH: u32 = 0x10000 >> SHIFT_1;
/// Number of entries in an index-2 block. 32=0x20
pub const INDEX_2_BLOCK_LENGTH: u32 = 1 << SHIFT_1_2;
/// Mask for getting the lower bits for the in-index-2-block offset.
pub const INDEX_2_MASK: u32 = INDEX_2_BLOCK_LENGTH - 1;
/// Number of code points per index-2 table entry. 512=0x200
pub const CP_PER_INDEX_2_ENTRY: u32 = 1 << SHIFT_2;
/// Number of entries in an index-3 block. 32=0x20
pub const INDEX_3_BLOCK_LENGTH: u32 = 1 << SHIFT_2_3;
/// Mask for getting the lower bits for the in-index-3-block offset.
pub const INDEX_3_MASK: u32 = INDEX_3_BLOCK_LENGTH - 1;
/// Number of entries in a small data block. 16=0x10
pub const SMALL_DATA_BLOCK_LENGTH: u32 = 1 << SHIFT_3;
/// Mask for getting the lower bits for the in-small-data-block offset.
pub const SMALL_DATA_MASK: u32 = SMALL_DATA_BLOCK_LENGTH - 1;
pub const CODE_POINT_MAX: u32 = 0x10ffff;

View File

@@ -0,0 +1,53 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
//! This module provides a data structure for an time-efficient lookup of values
//! associated to code points.
//!
//! It is an implementation of the existing [ICU4C UCPTrie](https://unicode-org.github.io/icu-docs/apidoc/released/icu4c/ucptrie_8h.html)
//! / [ICU4J CodePointTrie](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4j/) API.
//!
//! # Architecture
//!
//! ICU4X [`CodePointTrie`] is designed to provide a read-only view of [`CodePointTrie`] data that is exported
//! from ICU4C. Detailed information about the design of the data structure can be found in the documentation
//! for the [`CodePointTrie`] struct.
//!
//! # Examples
//!
//! ## Querying a `CodePointTrie`
//!
//! ```
//! use icu::collections::codepointtrie::planes;
//! let trie = planes::get_planes_trie();
//!
//! assert_eq!(0, trie.get32(0x41)); // 'A' as u32
//! assert_eq!(0, trie.get32(0x13E0)); // 'Ꮰ' as u32
//! assert_eq!(1, trie.get32(0x10044)); // '𐁄' as u32
//! ```
//!
//! [`ICU4X`]: ../icu/index.html
mod cptrie;
mod error;
mod impl_const;
pub mod planes;
#[cfg(feature = "serde")]
pub mod toml;
#[cfg(feature = "serde")]
mod serde;
pub use cptrie::CodePointMapRange;
pub use cptrie::CodePointMapRangeIterator;
pub use cptrie::CodePointTrie;
pub use cptrie::CodePointTrieHeader;
pub use cptrie::FastCodePointTrie;
pub use cptrie::SmallCodePointTrie;
pub use cptrie::TrieType;
pub use cptrie::TrieValue;
pub use cptrie::Typed;
pub use cptrie::TypedCodePointTrie;
pub use error::Error as CodePointTrieError;

View File

@@ -0,0 +1,296 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
//! Sample data for [`CodePointTrie`] that returns the code point's plane number.
use crate::codepointtrie::cptrie::*;
use zerovec::ZeroVec;
const INDEX_ARRAY_AS_BYTES: &[u8] = &[
0x0, 0x0, 0x40, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x88, 0x2, 0x90, 0x2,
0x90, 0x2, 0x90, 0x2, 0xb0, 0x2, 0xb0, 0x2, 0xb0, 0x2, 0xb0, 0x2, 0xd0, 0x2, 0xd0, 0x2, 0xd0,
0x2, 0xd0, 0x2, 0xf0, 0x2, 0xf0, 0x2, 0xf0, 0x2, 0xf0, 0x2, 0x10, 0x3, 0x10, 0x3, 0x10, 0x3,
0x10, 0x3, 0x30, 0x3, 0x30, 0x3, 0x30, 0x3, 0x30, 0x3, 0x50, 0x3, 0x50, 0x3, 0x50, 0x3, 0x50,
0x3, 0x70, 0x3, 0x70, 0x3, 0x70, 0x3, 0x70, 0x3, 0x90, 0x3, 0x90, 0x3, 0x90, 0x3, 0x90, 0x3,
0xb0, 0x3, 0xb0, 0x3, 0xb0, 0x3, 0xb0, 0x3, 0xd0, 0x3, 0xd0, 0x3, 0xd0, 0x3, 0xd0, 0x3, 0xf0,
0x3, 0xf0, 0x3, 0xf0, 0x3, 0xf0, 0x3, 0x10, 0x4, 0x10, 0x4, 0x10, 0x4, 0x10, 0x4, 0x30, 0x4,
0x30, 0x4, 0x30, 0x4, 0x30, 0x4, 0x50, 0x4, 0x50, 0x4, 0x50, 0x4, 0x50, 0x4, 0x70, 0x4, 0x70,
0x4, 0x70, 0x4, 0x70, 0x4, 0x0, 0x0, 0x10, 0x0, 0x20, 0x0, 0x30, 0x0, 0x40, 0x0, 0x50, 0x0,
0x60, 0x0, 0x70, 0x0, 0x0, 0x0, 0x10, 0x0, 0x20, 0x0, 0x30, 0x0, 0x0, 0x0, 0x10, 0x0, 0x20,
0x0, 0x30, 0x0, 0x0, 0x0, 0x10, 0x0, 0x20, 0x0, 0x30, 0x0, 0x0, 0x0, 0x10, 0x0, 0x20, 0x0,
0x30, 0x0, 0x0, 0x0, 0x10, 0x0, 0x20, 0x0, 0x30, 0x0, 0x0, 0x0, 0x10, 0x0, 0x20, 0x0, 0x30,
0x0, 0x0, 0x0, 0x10, 0x0, 0x20, 0x0, 0x30, 0x0, 0x0, 0x0, 0x10, 0x0, 0x20, 0x0, 0x30, 0x0,
0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80,
0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0,
0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80,
0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x80, 0x0, 0x90, 0x0, 0x90, 0x0,
0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90,
0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0,
0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90,
0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0x90, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0,
0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0,
0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0,
0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0, 0x0, 0xa0,
0x0, 0xa0, 0x0, 0xa0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0,
0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0,
0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0,
0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0, 0x0, 0xb0,
0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0,
0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0,
0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0,
0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xc0, 0x0, 0xd0, 0x0, 0xd0,
0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0,
0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0,
0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0,
0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xd0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0,
0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0,
0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0,
0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0,
0xe0, 0x0, 0xe0, 0x0, 0xe0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0,
0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0,
0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0,
0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0, 0xf0, 0x0,
0xf0, 0x0, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0,
0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1,
0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0,
0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x0, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10,
0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1,
0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10,
0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1, 0x10, 0x1,
0x10, 0x1, 0x10, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20,
0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1,
0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20,
0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1, 0x20, 0x1,
0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30,
0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1,
0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30,
0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x30, 0x1, 0x40, 0x1, 0x40, 0x1,
0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40,
0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1,
0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40,
0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x40, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1,
0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50,
0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1,
0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50, 0x1, 0x50,
0x1, 0x50, 0x1, 0x50, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1,
0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60,
0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1,
0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60, 0x1, 0x60,
0x1, 0x80, 0x0, 0x88, 0x0, 0x88, 0x0, 0x88, 0x0, 0x88, 0x0, 0x88, 0x0, 0x88, 0x0, 0x88, 0x0,
0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2,
0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0,
0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0x2,
0x0, 0x2, 0x0, 0x2, 0x0, 0x2, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8,
0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0,
0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8,
0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0, 0xa8, 0x0,
0xa8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8,
0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0,
0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8,
0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xc8, 0x0, 0xe8, 0x0,
0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8,
0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0,
0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8,
0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0xe8, 0x0, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8,
0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1,
0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8,
0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1, 0x8, 0x1,
0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28,
0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1,
0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28,
0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x28, 0x1, 0x48, 0x1, 0x48, 0x1,
0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48,
0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1,
0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48,
0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x48, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1,
0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68,
0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1,
0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68, 0x1, 0x68,
0x1, 0x68, 0x1, 0x68, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1,
0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88,
0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1,
0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88, 0x1, 0x88,
0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1,
0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8,
0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1,
0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xa8, 0x1, 0xc8, 0x1, 0xc8,
0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1,
0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8,
0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1,
0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xc8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8,
0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1,
0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8,
0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1,
0xe8, 0x1, 0xe8, 0x1, 0xe8, 0x1, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2,
0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8,
0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2,
0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x8, 0x2, 0x28, 0x2, 0x28, 0x2,
0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28,
0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2,
0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28,
0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x28, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2,
0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48,
0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2,
0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48, 0x2, 0x48,
0x2, 0x48, 0x2, 0x48, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2,
0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68,
0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2,
0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68, 0x2, 0x68,
0x2,
];
/// Return a [`CodePointTrie`] that returns the Unicode plane number, an
/// integer from 0-16 inclusive, for each code point.
///
/// This `CodePointTrie`
/// does not actually represent any Unicode property, but it is provided in
/// case it is useful to users of `CodePointTrie` for testing or other
/// purposes. See <https://www.unicode.org/glossary/#plane>.
pub fn get_planes_trie() -> CodePointTrie<'static, u8> {
let index_array_as_bytes: &[u8] = INDEX_ARRAY_AS_BYTES;
let data_8_array: &[u8] = &[
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xb,
0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xc, 0xc, 0xc,
0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xd, 0xd, 0xd, 0xd, 0xd,
0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe,
0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf,
0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0x10, 0x10, 0x10, 0,
];
#[expect(clippy::unwrap_used)] // valid bytes
let index: ZeroVec<u16> = ZeroVec::parse_bytes(index_array_as_bytes).unwrap();
#[expect(clippy::unwrap_used)] // valid bytes
let data: ZeroVec<u8> = ZeroVec::parse_bytes(data_8_array).unwrap();
let high_start = 0x100000;
let shifted12_high_start = 0x100;
let index3_null_offset = 0x2;
let data_null_offset = 0x0;
let null_value = 0x0;
let trie_type = TrieType::Small;
let trie_header = CodePointTrieHeader {
high_start,
shifted12_high_start,
index3_null_offset,
data_null_offset,
null_value,
trie_type,
};
#[expect(clippy::unwrap_used)] // valid data
CodePointTrie::try_new(trie_header, index, data).unwrap()
}
#[cfg(test)]
mod tests {
use zerovec::ZeroVec;
const INDEX_ARRAY: &[u16] = &[
0, 0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0x288, 0x290, 0x290, 0x290, 0x2b0, 0x2b0, 0x2b0, 0x2b0, 0x2d0, 0x2d0, 0x2d0,
0x2d0, 0x2f0, 0x2f0, 0x2f0, 0x2f0, 0x310, 0x310, 0x310, 0x310, 0x330, 0x330, 0x330, 0x330,
0x350, 0x350, 0x350, 0x350, 0x370, 0x370, 0x370, 0x370, 0x390, 0x390, 0x390, 0x390, 0x3b0,
0x3b0, 0x3b0, 0x3b0, 0x3d0, 0x3d0, 0x3d0, 0x3d0, 0x3f0, 0x3f0, 0x3f0, 0x3f0, 0x410, 0x410,
0x410, 0x410, 0x430, 0x430, 0x430, 0x430, 0x450, 0x450, 0x450, 0x450, 0x470, 0x470, 0x470,
0x470, 0, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70, 0, 0x10, 0x20, 0x30, 0, 0x10, 0x20,
0x30, 0, 0x10, 0x20, 0x30, 0, 0x10, 0x20, 0x30, 0, 0x10, 0x20, 0x30, 0, 0x10, 0x20, 0x30,
0, 0x10, 0x20, 0x30, 0, 0x10, 0x20, 0x30, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0xa0, 0xa0, 0xa0, 0xa0,
0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0,
0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xa0, 0xb0, 0xb0,
0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0,
0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0, 0xb0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0, 0xc0,
0xc0, 0xc0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0,
0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0, 0xd0,
0xd0, 0xd0, 0xd0, 0xd0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0,
0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xe0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0,
0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0xf0, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100,
0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100,
0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100,
0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110,
0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x110,
0x110, 0x110, 0x110, 0x110, 0x110, 0x110, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120,
0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120,
0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x120, 0x130,
0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130,
0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130, 0x130,
0x130, 0x130, 0x130, 0x130, 0x130, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140,
0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140,
0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x140, 0x150, 0x150,
0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150,
0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150, 0x150,
0x150, 0x150, 0x150, 0x150, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160,
0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160,
0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x160, 0x80, 0x88, 0x88,
0x88, 0x88, 0x88, 0x88, 0x88, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8,
0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8,
0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xa8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8,
0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8,
0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xc8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8,
0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8,
0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0xe8, 0x108, 0x108,
0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108,
0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108, 0x108,
0x108, 0x108, 0x108, 0x108, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128,
0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128,
0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x128, 0x148, 0x148, 0x148,
0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148,
0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148, 0x148,
0x148, 0x148, 0x148, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168,
0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168,
0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x168, 0x188, 0x188, 0x188, 0x188,
0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188,
0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188, 0x188,
0x188, 0x188, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8,
0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8,
0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1a8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8,
0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8,
0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8, 0x1c8,
0x1c8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8,
0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8,
0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x1e8, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208,
0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208,
0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208, 0x208,
0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228,
0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x228,
0x228, 0x228, 0x228, 0x228, 0x228, 0x228, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248,
0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248,
0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x248, 0x268,
0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268,
0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268, 0x268,
0x268, 0x268, 0x268, 0x268, 0x268,
];
#[test]
fn test_index_byte_array_literal() {
let index_array_as_bytes: &[u8] = super::INDEX_ARRAY_AS_BYTES;
let index_zv_bytes: ZeroVec<u16> =
ZeroVec::parse_bytes(index_array_as_bytes).expect("infallible");
let index_zv_aligned: ZeroVec<u16> = ZeroVec::from_slice_or_alloc(INDEX_ARRAY);
assert_eq!(index_zv_bytes, index_zv_aligned);
}
}

View File

@@ -0,0 +1,74 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use crate::codepointtrie::{CodePointTrie, CodePointTrieHeader, TrieValue};
use serde::{de::Error, Deserialize, Deserializer, Serialize, Serializer};
use zerofrom::ZeroFrom;
use zerovec::ZeroVec;
#[derive(Serialize, Deserialize)]
pub struct CodePointTrieSerde<'trie, T: TrieValue> {
header: CodePointTrieHeader,
#[serde(borrow)]
index: ZeroVec<'trie, u16>,
#[serde(borrow)]
data: ZeroVec<'trie, T>,
}
impl<T: TrieValue + Serialize> Serialize for CodePointTrie<'_, T> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let ser = CodePointTrieSerde {
header: self.header,
index: ZeroFrom::zero_from(&self.index),
data: ZeroFrom::zero_from(&self.data),
};
ser.serialize(serializer)
}
}
impl<'de, 'trie, T: TrieValue + Deserialize<'de>> Deserialize<'de> for CodePointTrie<'trie, T>
where
'de: 'trie,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
let de = CodePointTrieSerde::deserialize(deserializer)?;
// SAFETY:
// `validate_fields` upholds the invariants for the fields that
// fast-path access without bound checks relies on.
let error_value = match CodePointTrie::validate_fields(&de.header, &de.index, &de.data) {
Ok(v) => v,
Err(e) => {
match e {
super::CodePointTrieError::FromDeserialized { reason } => {
// Not supposed to be returned by `validate_fields`.
debug_assert!(false);
return Err(D::Error::custom(reason));
}
super::CodePointTrieError::EmptyDataVector => {
return Err(D::Error::custom("CodePointTrie must be constructed from data vector with at least one element"));
}
super::CodePointTrieError::IndexTooShortForFastAccess => {
return Err(D::Error::custom("CodePointTrie must be constructed from index vector long enough to accommodate fast-path access"));
}
super::CodePointTrieError::DataTooShortForFastAccess => {
return Err(D::Error::custom("CodePointTrie must be constructed from data vector long enough to accommodate fast-path access"));
}
}
}
};
// Field invariants upheld: Checked by `validate_fields` above.
Ok(CodePointTrie {
header: de.header,
index: de.index,
data: de.data,
error_value,
})
}
}

View File

@@ -0,0 +1,123 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
//! Utilities for reading CodePointTrie data from TOML files.
use crate::codepointtrie::error::Error;
use crate::codepointtrie::CodePointTrie;
use crate::codepointtrie::CodePointTrieHeader;
use crate::codepointtrie::TrieType;
use crate::codepointtrie::TrieValue;
use alloc::string::String;
use alloc::vec::Vec;
use core::convert::TryFrom;
use zerovec::ZeroVec;
/// A Serde-compatible struct for reading serialized [`CodePointTrie`] TOML files
/// generated by ICU4C.
///
/// Use `TryInto` to convert [`CodePointTrieToml`] to a proper [`CodePointTrie`].
#[derive(serde::Deserialize)]
pub struct CodePointTrieToml {
#[serde(skip)]
_short_name: String,
#[serde(skip)]
_long_name: String,
#[serde(skip)]
_name: String,
index: Vec<u16>,
data_8: Option<Vec<u8>>,
data_16: Option<Vec<u16>>,
data_32: Option<Vec<u32>>,
#[serde(skip)]
_index_length: u32,
#[serde(skip)]
_data_length: u32,
#[serde(rename = "highStart")]
high_start: u32,
#[serde(rename = "shifted12HighStart")]
shifted12_high_start: u16,
#[serde(rename = "type")]
trie_type_enum_val: u8,
#[serde(rename = "valueWidth")]
_value_width_enum_val: u8,
#[serde(rename = "index3NullOffset")]
index3_null_offset: u16,
#[serde(rename = "dataNullOffset")]
data_null_offset: u32,
#[serde(rename = "nullValue")]
null_value: u32,
}
/// Data slice from a [`CodePointTrie`] TOML.
///
/// ICU4C exports data as either `u8`, `u16`, or `u32`, which may be converted
/// to other types as appropriate.
#[allow(clippy::exhaustive_enums)] // based on a stable serialized form
pub enum CodePointDataSlice<'a> {
/// A serialized [`CodePointTrie`] data array 8-bit values.
U8(&'a [u8]),
/// A serialized [`CodePointTrie`] data array 16-bit values.
U16(&'a [u16]),
/// A serialized [`CodePointTrie`] data array 32-bit values.
U32(&'a [u32]),
}
impl CodePointTrieToml {
/// Gets the `index` slice.
pub fn index_slice(&self) -> &[u16] {
self.index.as_slice()
}
/// Gets the `data` slice.
pub fn data_slice(&self) -> Result<CodePointDataSlice<'_>, Error> {
if let Some(data_8) = &self.data_8 {
Ok(CodePointDataSlice::U8(data_8.as_slice()))
} else if let Some(data_16) = &self.data_16 {
Ok(CodePointDataSlice::U16(data_16.as_slice()))
} else if let Some(data_32) = &self.data_32 {
Ok(CodePointDataSlice::U32(data_32.as_slice()))
} else {
Err(Error::FromDeserialized {
reason: "Did not find data array for CodePointTrie in TOML",
})
}
}
}
impl TryFrom<&CodePointTrieToml> for CodePointTrieHeader {
type Error = Error;
fn try_from(cpt_data: &CodePointTrieToml) -> Result<Self, Self::Error> {
let trie_type_enum: TrieType = TrieType::try_from(cpt_data.trie_type_enum_val)?;
Ok(CodePointTrieHeader {
high_start: cpt_data.high_start,
shifted12_high_start: cpt_data.shifted12_high_start,
index3_null_offset: cpt_data.index3_null_offset,
data_null_offset: cpt_data.data_null_offset,
null_value: cpt_data.null_value,
trie_type: trie_type_enum,
})
}
}
impl<T: TrieValue> TryFrom<&CodePointTrieToml> for CodePointTrie<'static, T> {
type Error = Error;
fn try_from(cpt_data: &CodePointTrieToml) -> Result<CodePointTrie<'static, T>, Self::Error> {
use CodePointDataSlice::*;
let header = CodePointTrieHeader::try_from(cpt_data)?;
let index: ZeroVec<u16> = ZeroVec::alloc_from_slice(&cpt_data.index);
let data: Result<ZeroVec<'static, T>, T::TryFromU32Error> = match cpt_data.data_slice()? {
U8(s) => s.iter().map(|i| T::try_from_u32(*i as u32)).collect(),
U16(s) => s.iter().map(|i| T::try_from_u32(*i as u32)).collect(),
U32(s) => s.iter().map(|i| T::try_from_u32(*i)).collect(),
};
let data = data.map_err(|_| Error::FromDeserialized {
reason: "Could not parse data array to typed array",
})?;
CodePointTrie::<T>::try_new(header, index, data)
}
}