chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

204
vendor/zerovec/benches/vzv.rs vendored Normal file
View File

@@ -0,0 +1,204 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::SeedableRng;
use rand_distr::{Alphanumeric, Distribution, Uniform};
use rand_pcg::Lcg64Xsh32;
use std::ops::RangeInclusive;
use zerovec::VarZeroVec;
/// Generates an array of random alphanumeric strings.
///
/// - length = range of lengths for the strings (chosen uniformly at random)
/// - count = number of strings to generate
/// - seed = seed for the PRNG
///
/// Returns a tuple including the vector and a u64 that can be used to seed the next PRNG.
fn random_alphanums(lengths: RangeInclusive<usize>, count: usize, seed: u64) -> (Vec<String>, u64) {
// Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks.
let mut rng1 = Lcg64Xsh32::seed_from_u64(seed);
let mut rng2 = Lcg64Xsh32::seed_from_u64(rand::Rng::random(&mut rng1));
let alpha_dist = Alphanumeric;
let len_dist = Uniform::try_from(lengths).expect("range out of bounds");
let string_vec = len_dist
.sample_iter(&mut rng1)
.take(count)
.map(|len| {
(&alpha_dist)
.sample_iter(&mut rng2)
.take(len)
.map(char::from)
.collect::<String>()
})
.collect();
(string_vec, rand::Rng::random(&mut rng1))
}
fn overview_bench(c: &mut Criterion) {
// Same as vzv/char_count/vzv but with different inputs
let seed = 42;
let (string_vec, _) = random_alphanums(2..=10, 100, seed);
let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
let vzv = VarZeroVec::<str>::parse_bytes(black_box(bytes.as_slice())).unwrap();
c.bench_function("vzv/overview", |b| {
b.iter(|| {
black_box(&vzv)
.iter()
.fold(0, |sum, string| sum + string.chars().count())
});
});
{
char_count_benches(c);
binary_search_benches(c);
vzv_precompute_bench(c);
}
#[cfg(feature = "serde")]
{
serde_benches(c);
}
}
fn char_count_benches(c: &mut Criterion) {
let seed = 2021;
let (string_vec, _) = random_alphanums(2..=20, 100, seed);
let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
let vzv = VarZeroVec::<str>::parse_bytes(black_box(bytes.as_slice())).unwrap();
// *** Count chars in vec of 100 strings ***
c.bench_function("vzv/char_count/slice", |b| {
b.iter(|| {
black_box(&string_vec)
.iter()
.fold(0, |sum, string| sum + string.chars().count())
});
});
// *** Count chars in vec of 100 strings ***
c.bench_function("vzv/char_count/vzv", |b| {
b.iter(|| {
black_box(&vzv)
.iter()
.fold(0, |sum, string| sum + string.chars().count())
});
});
}
fn binary_search_benches(c: &mut Criterion) {
let seed = 2021;
let (string_vec, seed) = random_alphanums(2..=20, 500, seed);
let (needles, _) = random_alphanums(2..=20, 10, seed);
let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
let vzv = VarZeroVec::<str>::parse_bytes(black_box(bytes.as_slice())).unwrap();
let single_needle = "lmnop".to_owned();
// *** Binary search vec of 500 strings 10 times ***
c.bench_function("vzv/binary_search/slice", |b| {
b.iter(|| {
black_box(&needles)
.iter()
.map(|needle| black_box(&string_vec).binary_search(needle))
.filter(|r| r.is_ok())
.count()
});
});
// *** Binary search vec of 500 strings 10 times ***
c.bench_function("vzv/binary_search/vzv", |b| {
b.iter(|| {
black_box(&needles)
.iter()
.map(|needle| black_box(&vzv).binary_search(needle))
.filter(|r| r.is_ok())
.count()
});
});
c.bench_function("vzv/binary_search/single/slice", |b| {
b.iter(|| black_box(&string_vec).binary_search(black_box(&single_needle)));
});
c.bench_function("vzv/binary_search/single/vzv", |b| {
b.iter(|| black_box(&vzv).binary_search(black_box(&single_needle)));
});
}
#[cfg(feature = "serde")]
fn serde_benches(c: &mut Criterion) {
let seed = 2021;
let (string_vec, _) = random_alphanums(2..=20, 100, seed);
let bincode_vec = bincode::serialize(&string_vec).unwrap();
let vzv: VarZeroVec<str> = VarZeroVec::from(&*string_vec);
let bincode_vzv = bincode::serialize(&vzv).unwrap();
// *** Deserialize vec of 100 strings ***
c.bench_function("vzv/deserialize/string/vec_owned", |b| {
b.iter(|| bincode::deserialize::<Vec<String>>(black_box(&bincode_vec)));
});
// *** Deserialize vec of 100 strings ***
c.bench_function("vzv/deserialize/string/vec_borrowed", |b| {
b.iter(|| bincode::deserialize::<Vec<&str>>(black_box(&bincode_vec)));
});
// *** Deserialize vec of 100 strings ***
c.bench_function("vzv/deserialize/string/vzv", |b| {
b.iter(|| bincode::deserialize::<VarZeroVec<str>>(black_box(&bincode_vzv)));
});
}
// Testing differences between operating on slices with precomputed/non-precomputed indexing info
fn vzv_precompute_bench(c: &mut Criterion) {
let seed = 2021;
let (string_vec, seed) = random_alphanums(2..=20, 500, seed);
let (needles, _) = random_alphanums(2..=20, 10, seed);
let bytes: Vec<u8> = VarZeroVec::<str>::from(&string_vec).into_bytes();
let vzv = VarZeroVec::<str>::parse_bytes(black_box(bytes.as_slice())).unwrap();
let borrowed = vzv.as_components();
let slice = vzv.as_slice();
let single_needle = "lmnop";
c.bench_function("vzv_precompute/get/precomputed", |b| {
b.iter(|| black_box(&borrowed).get(100));
});
c.bench_function("vzv_precompute/get/slice", |b| {
b.iter(|| black_box(&slice).get(100));
});
c.bench_function("vzv_precompute/search/precomputed", |b| {
b.iter(|| black_box(&borrowed).binary_search(single_needle));
});
c.bench_function("vzv_precompute/search/slice", |b| {
b.iter(|| black_box(&slice).binary_search(single_needle));
});
c.bench_function("vzv_precompute/search_multi/precomputed", |b| {
b.iter(|| {
black_box(&needles)
.iter()
.map(|needle| black_box(&borrowed).binary_search(needle))
.filter(|r| r.is_ok())
.count()
});
});
c.bench_function("vzv_precompute/search_multi/slice", |b| {
b.iter(|| {
black_box(&needles)
.iter()
.map(|needle| black_box(&slice).binary_search(needle))
.filter(|r| r.is_ok())
.count()
});
});
}
criterion_group!(benches, overview_bench,);
criterion_main!(benches);

381
vendor/zerovec/benches/zeromap.rs vendored Normal file
View File

@@ -0,0 +1,381 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use std::collections::HashMap;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use zerovec::maps::ZeroMapKV;
use zerovec::vecs::{Index32, VarZeroSlice, VarZeroVec};
use zerovec::{ZeroHashMap, ZeroMap};
const DATA: [(&str, &str); 16] = [
("ar", "Arabic"),
("bn", "Bangla"),
("ccp", "Chakma"),
("chr", "Cherokee"),
("el", "Greek"),
("en", "English"),
("eo", "Esperanto"),
("es", "Spanish"),
("fr", "French"),
("iu", "Inuktitut"),
("ja", "Japanese"),
("ru", "Russian"),
("sr", "Serbian"),
("th", "Thai"),
("tr", "Turkish"),
("zh", "Chinese"),
];
const POSTCARD: [u8; 274] = [
98, 16, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 7, 0, 0, 0, 10, 0, 0, 0, 12, 0, 0, 0, 14, 0, 0, 0, 16,
0, 0, 0, 18, 0, 0, 0, 20, 0, 0, 0, 22, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0, 0, 0, 30, 0,
0, 0, 32, 0, 0, 0, 97, 114, 98, 110, 99, 99, 112, 99, 104, 114, 101, 108, 101, 110, 101, 111,
101, 115, 102, 114, 105, 117, 106, 97, 114, 117, 115, 114, 116, 104, 116, 114, 122, 104, 173,
1, 16, 0, 0, 0, 6, 0, 0, 0, 12, 0, 0, 0, 18, 0, 0, 0, 26, 0, 0, 0, 31, 0, 0, 0, 38, 0, 0, 0,
47, 0, 0, 0, 54, 0, 0, 0, 60, 0, 0, 0, 69, 0, 0, 0, 77, 0, 0, 0, 84, 0, 0, 0, 91, 0, 0, 0, 95,
0, 0, 0, 102, 0, 0, 0, 65, 114, 97, 98, 105, 99, 66, 97, 110, 103, 108, 97, 67, 104, 97, 107,
109, 97, 67, 104, 101, 114, 111, 107, 101, 101, 71, 114, 101, 101, 107, 69, 110, 103, 108, 105,
115, 104, 69, 115, 112, 101, 114, 97, 110, 116, 111, 83, 112, 97, 110, 105, 115, 104, 70, 114,
101, 110, 99, 104, 73, 110, 117, 107, 116, 105, 116, 117, 116, 74, 97, 112, 97, 110, 101, 115,
101, 82, 117, 115, 115, 105, 97, 110, 83, 101, 114, 98, 105, 97, 110, 84, 104, 97, 105, 84,
117, 114, 107, 105, 115, 104, 67, 104, 105, 110, 101, 115, 101,
];
const POSTCARD_HASHMAP: [u8; 176] = [
16, 2, 114, 117, 7, 82, 117, 115, 115, 105, 97, 110, 3, 99, 99, 112, 6, 67, 104, 97, 107, 109,
97, 3, 99, 104, 114, 8, 67, 104, 101, 114, 111, 107, 101, 101, 2, 116, 114, 7, 84, 117, 114,
107, 105, 115, 104, 2, 116, 104, 4, 84, 104, 97, 105, 2, 106, 97, 8, 74, 97, 112, 97, 110, 101,
115, 101, 2, 101, 115, 7, 83, 112, 97, 110, 105, 115, 104, 2, 101, 111, 9, 69, 115, 112, 101,
114, 97, 110, 116, 111, 2, 122, 104, 7, 67, 104, 105, 110, 101, 115, 101, 2, 115, 114, 7, 83,
101, 114, 98, 105, 97, 110, 2, 101, 110, 7, 69, 110, 103, 108, 105, 115, 104, 2, 105, 117, 9,
73, 110, 117, 107, 116, 105, 116, 117, 116, 2, 102, 114, 6, 70, 114, 101, 110, 99, 104, 2, 98,
110, 6, 66, 97, 110, 103, 108, 97, 2, 101, 108, 5, 71, 114, 101, 101, 107, 2, 97, 114, 6, 65,
114, 97, 98, 105, 99,
];
const POSTCARD_ZEROHASHMAP: [u8; 404] = [
128, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2,
0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 98, 16, 0, 0, 0, 2, 0, 0, 0, 4, 0, 0, 0, 6, 0, 0, 0, 8, 0, 0, 0, 10, 0, 0, 0, 13, 0,
0, 0, 15, 0, 0, 0, 17, 0, 0, 0, 19, 0, 0, 0, 21, 0, 0, 0, 24, 0, 0, 0, 26, 0, 0, 0, 28, 0, 0,
0, 30, 0, 0, 0, 32, 0, 0, 0, 115, 114, 101, 111, 116, 114, 97, 114, 105, 117, 99, 99, 112, 102,
114, 101, 115, 106, 97, 122, 104, 99, 104, 114, 98, 110, 101, 110, 101, 108, 114, 117, 116,
104, 173, 1, 16, 0, 0, 0, 7, 0, 0, 0, 16, 0, 0, 0, 23, 0, 0, 0, 29, 0, 0, 0, 38, 0, 0, 0, 44,
0, 0, 0, 50, 0, 0, 0, 57, 0, 0, 0, 65, 0, 0, 0, 72, 0, 0, 0, 80, 0, 0, 0, 86, 0, 0, 0, 93, 0,
0, 0, 98, 0, 0, 0, 105, 0, 0, 0, 83, 101, 114, 98, 105, 97, 110, 69, 115, 112, 101, 114, 97,
110, 116, 111, 84, 117, 114, 107, 105, 115, 104, 65, 114, 97, 98, 105, 99, 73, 110, 117, 107,
116, 105, 116, 117, 116, 67, 104, 97, 107, 109, 97, 70, 114, 101, 110, 99, 104, 83, 112, 97,
110, 105, 115, 104, 74, 97, 112, 97, 110, 101, 115, 101, 67, 104, 105, 110, 101, 115, 101, 67,
104, 101, 114, 111, 107, 101, 101, 66, 97, 110, 103, 108, 97, 69, 110, 103, 108, 105, 115, 104,
71, 114, 101, 101, 107, 82, 117, 115, 115, 105, 97, 110, 84, 104, 97, 105,
];
/// Run this function to print new data to the console.
/// Requires the optional `serde` Cargo feature.
#[allow(dead_code)]
fn generate_zeromap() {
let map = build_zeromap(false);
let buf = postcard::to_stdvec(&map).unwrap();
println!("{buf:?}");
}
/// Run this function to print new data to the console.
/// Requires the optional `serde` Cargo feature.
#[allow(dead_code)]
fn generate_hashmap() {
let map = build_hashmap(false);
let buf = postcard::to_stdvec(&map).unwrap();
println!("{buf:?}");
}
/// Run this function to print new data to the console.
/// Requires the optional `serde` Cargo feature.
#[allow(dead_code)]
fn generate_zerohashmap() {
let map = build_zerohashmap(false);
let buf = postcard::to_stdvec(&map).unwrap();
println!("{buf:?}");
}
fn overview_bench(c: &mut Criterion) {
bench_zeromap(c);
bench_hashmap(c);
bench_zerohashmap(c);
}
fn bench_zeromap(c: &mut Criterion) {
// Uncomment the following line to re-generate the const data.
// generate_hashmap();
bench_deserialize(c);
bench_deserialize_large(c);
bench_lookup(c);
bench_lookup_large(c);
}
fn build_zeromap(large: bool) -> ZeroMap<'static, Index32Str, Index32Str> {
// TODO(#2826): This should use ZeroMap::from_iter, however that currently takes
// *minutes*, whereas this code runs in milliseconds
let mut keys = Vec::new();
let mut values = Vec::new();
let mut data = DATA.to_vec();
data.sort();
for &(key, value) in data.iter() {
if large {
for n in 0..8192 {
keys.push(format!("{key}{n:04}"));
values.push(indexify(value));
}
} else {
keys.push(key.to_owned());
values.push(indexify(value));
}
}
let keys = keys.iter().map(|s| indexify(s)).collect::<Vec<_>>();
// keys are sorted by construction
unsafe { ZeroMap::from_parts_unchecked(VarZeroVec::from(&keys), VarZeroVec::from(&values)) }
}
fn bench_deserialize(c: &mut Criterion) {
c.bench_function("zeromap/deserialize/small", |b| {
b.iter(|| {
let map: ZeroMap<Index32Str, Index32Str> =
postcard::from_bytes(black_box(&POSTCARD)).unwrap();
assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut"));
})
});
}
fn bench_deserialize_large(c: &mut Criterion) {
let buf = large_zeromap_postcard_bytes();
c.bench_function("zeromap/deserialize/large", |b| {
b.iter(|| {
let map: ZeroMap<Index32Str, Index32Str> =
postcard::from_bytes(black_box(&buf)).unwrap();
assert_eq!(map.get(indexify("iu3333")).map(|x| &x.0), Some("Inuktitut"));
})
});
}
fn bench_lookup(c: &mut Criterion) {
let map: ZeroMap<Index32Str, Index32Str> = postcard::from_bytes(black_box(&POSTCARD)).unwrap();
c.bench_function("zeromap/lookup/small", |b| {
b.iter(|| {
assert_eq!(
map.get(black_box(indexify("iu"))).map(|x| &x.0),
Some("Inuktitut")
);
assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None);
});
});
}
fn bench_lookup_large(c: &mut Criterion) {
let buf = large_zeromap_postcard_bytes();
let map: ZeroMap<Index32Str, Index32Str> = postcard::from_bytes(&buf).unwrap();
c.bench_function("zeromap/lookup/large", |b| {
b.iter(|| {
assert_eq!(
map.get(black_box(indexify("iu3333"))).map(|x| &x.0),
Some("Inuktitut")
);
assert_eq!(map.get(black_box(indexify("zz"))).map(|x| &x.0), None);
});
});
}
fn large_zeromap_postcard_bytes() -> Vec<u8> {
postcard::to_stdvec(&build_zeromap(true)).unwrap()
}
fn bench_hashmap(c: &mut Criterion) {
// Uncomment the following line to re-generate the const data.
// generate_hashmap();
bench_deserialize_hashmap(c);
bench_deserialize_large_hashmap(c);
bench_lookup_hashmap(c);
bench_lookup_large_hashmap(c);
}
fn build_hashmap(large: bool) -> HashMap<String, String> {
let mut map: HashMap<String, String> = HashMap::new();
for &(key, value) in DATA.iter() {
if large {
for n in 0..8192 {
map.insert(format!("{key}{n}"), value.to_owned());
}
} else {
map.insert(key.to_owned(), value.to_owned());
}
}
map
}
fn bench_deserialize_hashmap(c: &mut Criterion) {
c.bench_function("zeromap/deserialize/small/hashmap", |b| {
b.iter(|| {
let map: HashMap<String, String> =
postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap();
assert_eq!(map.get("iu"), Some(&"Inuktitut".to_owned()));
})
});
}
fn bench_deserialize_large_hashmap(c: &mut Criterion) {
let buf = large_hashmap_postcard_bytes();
c.bench_function("zeromap/deserialize/large/hashmap", |b| {
b.iter(|| {
let map: HashMap<String, String> = postcard::from_bytes(black_box(&buf)).unwrap();
assert_eq!(map.get("iu3333"), Some(&"Inuktitut".to_owned()));
})
});
}
fn bench_lookup_hashmap(c: &mut Criterion) {
let map: HashMap<String, String> = postcard::from_bytes(black_box(&POSTCARD_HASHMAP)).unwrap();
c.bench_function("zeromap/lookup/small/hashmap", |b| {
b.iter(|| {
assert_eq!(map.get(black_box("iu")), Some(&"Inuktitut".to_owned()));
assert_eq!(map.get(black_box("zz")), None);
});
});
}
fn bench_lookup_large_hashmap(c: &mut Criterion) {
let buf = large_hashmap_postcard_bytes();
let map: HashMap<String, String> = postcard::from_bytes(&buf).unwrap();
c.bench_function("zeromap/lookup/large/hashmap", |b| {
b.iter(|| {
assert_eq!(map.get(black_box("iu3333")), Some(&"Inuktitut".to_owned()));
assert_eq!(map.get(black_box("zz")), None);
});
});
}
fn large_hashmap_postcard_bytes() -> Vec<u8> {
postcard::to_stdvec(&build_hashmap(true)).unwrap()
}
fn bench_zerohashmap(c: &mut Criterion) {
// Uncomment the following line to re-generate the const data.
// generate_zerohashmap();
bench_deserialize_zerohashmap(c);
bench_deserialize_large_zerohashmap(c);
bench_zerohashmap_lookup(c);
bench_zerohashmap_lookup_large(c);
}
fn build_zerohashmap(large: bool) -> ZeroHashMap<'static, Index32Str, Index32Str> {
let mut kv = Vec::new();
for (key, value) in DATA.iter() {
if large {
for n in 0..512 {
kv.push((format!("{key}{n}"), indexify(value)));
}
} else {
kv.push((key.to_string(), indexify(value)));
}
}
ZeroHashMap::from_iter(kv.iter().map(|kv| (indexify(&kv.0), kv.1)))
}
fn bench_deserialize_zerohashmap(c: &mut Criterion) {
c.bench_function("zerohashmap/deserialize/small", |b| {
b.iter(|| {
let map: ZeroHashMap<Index32Str, Index32Str> =
postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();
assert_eq!(map.get(indexify("iu")).map(|x| &x.0), Some("Inuktitut"));
})
});
}
fn bench_deserialize_large_zerohashmap(c: &mut Criterion) {
let buf = large_zerohashmap_postcard_bytes();
c.bench_function("zerohashmap/deserialize/large", |b| {
b.iter(|| {
let map: ZeroHashMap<Index32Str, Index32Str> =
postcard::from_bytes(black_box(&buf)).unwrap();
assert_eq!(map.get(indexify("iu333")).map(|x| &x.0), Some("Inuktitut"));
})
});
}
fn bench_zerohashmap_lookup(c: &mut Criterion) {
let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> =
postcard::from_bytes(black_box(&POSTCARD_ZEROHASHMAP)).unwrap();
c.bench_function("zerohashmap/lookup/small", |b| {
b.iter(|| {
assert_eq!(
zero_hashmap.get(black_box(indexify("iu"))).map(|x| &x.0),
Some("Inuktitut")
);
assert_eq!(
zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
None
);
});
});
}
fn bench_zerohashmap_lookup_large(c: &mut Criterion) {
let buf = large_zerohashmap_postcard_bytes();
let zero_hashmap: ZeroHashMap<Index32Str, Index32Str> = postcard::from_bytes(&buf).unwrap();
c.bench_function("zerohashmap/lookup/large", |b| {
b.iter(|| {
assert_eq!(
zero_hashmap.get(black_box(indexify("iu333"))).map(|x| &x.0),
Some("Inuktitut")
);
assert_eq!(
zero_hashmap.get(black_box(indexify("zz"))).map(|x| &x.0),
None
);
});
});
}
fn large_zerohashmap_postcard_bytes() -> Vec<u8> {
postcard::to_stdvec(&build_zerohashmap(true)).unwrap()
}
criterion_group!(benches, overview_bench);
criterion_main!(benches);
/// This type lets us use a u32-index-format VarZeroVec with the ZeroMap.
///
/// Eventually we will have a FormatSelector type that lets us do `ZeroMap<FormatSelector<K, Index32>, V>`
/// (https://github.com/unicode-org/icu4x/issues/2312)
///
/// , isn't actually important; it's just more convenient to use make_varule to get the
/// full suite of traits instead of `#[derive(VarULE)]`. (With `#[derive(VarULE)]` we would have to manually
/// define a Serialize implementation, and that would be gnarly)
/// https://github.com/unicode-org/icu4x/issues/2310 tracks being able to do this with derive(ULE)
#[zerovec::make_varule(Index32Str)]
#[zerovec::skip_derive(ZeroMapKV)]
#[derive(Eq, PartialEq, Ord, PartialOrd, serde::Serialize, serde::Deserialize)]
#[zerovec::derive(Serialize, Deserialize, Hash)]
pub(crate) struct Index32StrBorrowed<'a>(#[serde(borrow)] pub &'a str);
impl<'a> ZeroMapKV<'a> for Index32Str {
type Container = VarZeroVec<'a, Index32Str, Index32>;
type Slice = VarZeroSlice<Index32Str, Index32>;
type GetType = Index32Str;
type OwnedType = Box<Index32Str>;
}
#[inline]
fn indexify(s: &str) -> &Index32Str {
unsafe { &*(s as *const str as *const Index32Str) }
}

164
vendor/zerovec/benches/zerovec.rs vendored Normal file
View File

@@ -0,0 +1,164 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::SeedableRng;
use rand_distr::{Distribution, LogNormal};
use rand_pcg::Lcg64Xsh32;
use std::fmt;
#[path = "../src/samples.rs"]
mod samples;
use samples::*;
use zerovec::ule::*;
use zerovec::ZeroVec;
#[repr(align(8))]
#[derive(Default)]
struct AlignedBuffer(Vec<u8>);
/// Generate a large list of u32s for stress testing.
#[allow(dead_code)]
fn get_needles_and_haystack() -> (Vec<u32>, Vec<u32>) {
// Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks.
// LogNormal(10, 1) generates numbers with mean 36315 and mode 8103, a distribution that, in
// spirit, correlates with Unicode properties (many low values and a long tail of high values)
let mut rng = Lcg64Xsh32::seed_from_u64(2021);
let dist = LogNormal::new(10.0, 1.0).unwrap();
let haystack = {
let mut unsorted: Vec<u32> = (&dist)
.sample_iter(&mut rng)
.take(1000)
.map(|f| f as u32)
.collect();
unsorted.sort_unstable();
unsorted
};
let needles: Vec<u32> = (&dist)
.sample_iter(&mut rng)
.take(100)
.map(|f| f as u32)
.collect();
(needles, haystack)
}
#[allow(dead_code, clippy::ptr_arg)]
fn vec_to_unaligned_uvec<'a, T>(vec: &Vec<T>, buffer: &'a mut AlignedBuffer) -> ZeroVec<'a, T>
where
T: EqULE + Copy + PartialEq + fmt::Debug,
{
// Pad with zero to ensure it is not aligned
buffer.0.push(0);
buffer
.0
.extend(ZeroVec::from_slice_or_alloc(vec.as_slice()).as_bytes());
ZeroVec::<T>::parse_bytes(&buffer.0[1..]).unwrap()
}
fn overview_bench(c: &mut Criterion) {
c.bench_function("zerovec/overview", |b| {
b.iter(|| {
ZeroVec::<u32>::parse_bytes(black_box(TEST_BUFFER_LE))
.unwrap()
.iter()
.sum::<u32>()
});
});
{
sum_benches(c);
binary_search_benches(c);
}
}
fn sum_benches(c: &mut Criterion) {
let normal_slice = &TEST_SLICE[0..19];
let aligned_ule_slice =
<u32 as AsULE>::ULE::parse_bytes_to_slice(&TEST_BUFFER_LE[0..76]).unwrap();
let unalign_ule_slice =
<u32 as AsULE>::ULE::parse_bytes_to_slice(&TEST_BUFFER_LE[1..77]).unwrap();
assert_eq!(normal_slice.len(), aligned_ule_slice.len());
assert_eq!(normal_slice.len(), unalign_ule_slice.len());
c.bench_function("zerovec/sum/sample/slice", |b| {
b.iter(|| {
black_box(normal_slice)
.iter()
.copied()
.fold(0u32, |sum, val| sum.wrapping_add(val))
})
});
c.bench_function("zerovec/sum/sample/zerovec_aligned", |b| {
b.iter(|| {
ZeroVec::<u32>::new_borrowed(black_box(aligned_ule_slice))
.iter()
.fold(0u32, |sum, val| sum.wrapping_add(val))
});
});
c.bench_function("zerovec/sum/sample/zerovec_unaligned", |b| {
b.iter(|| {
ZeroVec::<u32>::new_borrowed(black_box(unalign_ule_slice))
.iter()
.fold(0u32, |sum, val| sum.wrapping_add(val))
});
});
}
fn binary_search_benches(c: &mut Criterion) {
c.bench_function("zerovec/binary_search/sample/slice", |b| {
b.iter(|| black_box(&TEST_SLICE).binary_search(&0x0c0d0c));
});
c.bench_function("zerovec/binary_search/sample/zerovec", |b| {
let zerovec = ZeroVec::<u32>::parse_bytes(black_box(TEST_BUFFER_LE)).unwrap();
b.iter(|| zerovec.binary_search(&0x0c0d0c));
});
let (needles_100, haystack) = get_needles_and_haystack();
// Only search for 50 needles to put all figures in nanoseconds
let needles_50 = &needles_100[0..50];
// *** Binary search vec of 1000 `u32` 50 times ***
c.bench_function("zerovec/binary_search/log_normal/slice", |b| {
b.iter(|| {
black_box(&needles_50)
.iter()
.map(|needle| black_box(&haystack).binary_search(needle))
.filter(|r| r.is_ok())
.count()
});
});
let mut buffer = AlignedBuffer::default();
let zerovec = vec_to_unaligned_uvec(black_box(&haystack), &mut buffer);
assert_eq!(zerovec, haystack.as_slice());
// *** Binary search vec of 1000 `u32` 50 times ***
c.bench_function("zerovec/binary_search/log_normal/zerovec", |b| {
b.iter(|| {
black_box(&needles_50)
.iter()
.map(|needle| black_box(&zerovec).binary_search(needle))
.filter(|r| r.is_ok())
.count()
});
});
let single_needle = 36315;
c.bench_function("zerovec/binary_search/log_normal/single/slice", |b| {
b.iter(|| black_box(&haystack).binary_search(&single_needle));
});
c.bench_function("zerovec/binary_search/log_normal/single/zerovec", |b| {
b.iter(|| black_box(&zerovec).binary_search(&single_needle));
});
}
criterion_group!(benches, overview_bench,);
criterion_main!(benches);

64
vendor/zerovec/benches/zerovec_iai.rs vendored Normal file
View File

@@ -0,0 +1,64 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use iai::black_box;
#[path = "../src/samples.rs"]
mod samples;
use samples::*;
use zerovec::VarZeroSlice;
use zerovec::ZeroVec;
fn sum_slice() -> u32 {
black_box(TEST_SLICE).iter().sum::<u32>()
}
fn sum_zerovec() -> u32 {
ZeroVec::<u32>::parse_bytes(black_box(TEST_BUFFER_LE))
.unwrap()
.iter()
.sum::<u32>()
}
fn binarysearch_slice() -> Result<usize, usize> {
black_box(TEST_SLICE).binary_search(&0x0c0d0c)
}
fn binarysearch_zerovec() -> Result<usize, usize> {
ZeroVec::<u32>::parse_bytes(black_box(TEST_BUFFER_LE))
.unwrap()
.binary_search(&0x0c0d0c)
}
fn varzeroslice_parse_get() -> Option<&'static str> {
let slice: &'static VarZeroSlice<str> =
VarZeroSlice::parse_bytes(black_box(TEST_VARZEROSLICE_BYTES)).unwrap();
slice.get(black_box(1))
}
fn varzeroslice_get() -> Option<&'static str> {
// Safety: The bytes are valid.
let slice: &'static VarZeroSlice<str> =
unsafe { VarZeroSlice::from_bytes_unchecked(black_box(TEST_VARZEROSLICE_BYTES)) };
slice.get(black_box(1))
}
fn varzeroslice_get_unchecked() -> &'static str {
// Safety: The bytes are valid.
let slice: &'static VarZeroSlice<str> =
unsafe { VarZeroSlice::from_bytes_unchecked(black_box(TEST_VARZEROSLICE_BYTES)) };
// Safety: The VarZeroVec has length 4.
unsafe { slice.get_unchecked(black_box(1)) }
}
iai::main!(
sum_slice,
sum_zerovec,
binarysearch_slice,
binarysearch_zerovec,
varzeroslice_parse_get,
varzeroslice_get,
varzeroslice_get_unchecked,
);

139
vendor/zerovec/benches/zerovec_serde.rs vendored Normal file
View File

@@ -0,0 +1,139 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use rand::SeedableRng;
use rand_distr::{Distribution, LogNormal};
use rand_pcg::Lcg64Xsh32;
#[path = "../src/samples.rs"]
mod samples;
use samples::*;
use zerovec::ZeroVec;
/// Generate a large list of u32s for stress testing.
#[allow(dead_code)]
fn random_numbers(count: usize) -> Vec<u32> {
// Lcg64Xsh32 is a small, fast PRNG for reproducible benchmarks.
// LogNormal(10, 1) generates numbers with mean 36315 and mode 8103, a distribution that, in
// spirit, correlates with Unicode properties (many low values and a long tail of high values)
let mut rng = Lcg64Xsh32::seed_from_u64(2021);
let dist = LogNormal::new(10.0, 1.0).unwrap();
(&dist)
.sample_iter(&mut rng)
.take(count)
.map(|f| f as u32)
.collect()
}
fn overview_bench(c: &mut Criterion) {
c.bench_function("zerovec_serde/overview", |b| {
// Same as "zerovec_serde/deserialize_sum/u32/zerovec"
let buffer =
bincode::serialize(&ZeroVec::<u32>::parse_bytes(black_box(TEST_BUFFER_LE)).unwrap())
.unwrap();
b.iter(|| {
bincode::deserialize::<ZeroVec<u32>>(&buffer)
.unwrap()
.iter()
.sum::<u32>()
});
});
{
u32_benches(c);
char_benches(c);
stress_benches(c);
}
}
fn u32_benches(c: &mut Criterion) {
c.bench_function("zerovec_serde/serialize/u32/slice", |b| {
b.iter(|| bincode::serialize(&Vec::from(black_box(TEST_SLICE))));
});
c.bench_function("zerovec_serde/deserialize_sum/u32/slice", |b| {
let buffer = bincode::serialize(&Vec::from(black_box(TEST_SLICE))).unwrap();
b.iter(|| {
bincode::deserialize::<Vec<u32>>(&buffer)
.unwrap()
.iter()
.sum::<u32>()
});
});
c.bench_function("zerovec_serde/serialize/u32/zerovec", |b| {
b.iter(|| bincode::serialize(&ZeroVec::from_slice_or_alloc(black_box(TEST_SLICE))));
});
c.bench_function("zerovec_serde/deserialize_sum/u32/zerovec", |b| {
let buffer =
bincode::serialize(&ZeroVec::<u32>::parse_bytes(black_box(TEST_BUFFER_LE)).unwrap())
.unwrap();
b.iter(|| {
bincode::deserialize::<ZeroVec<u32>>(&buffer)
.unwrap()
.iter()
.sum::<u32>()
});
});
}
fn char_benches(c: &mut Criterion) {
const ORIGINAL_CHARS: &[char] = &[
'ⶢ', '⺇', 'Ⱜ', '◁', '◩', '⌂', '⼅', '⏻', '⢜', '◊', 'ⲫ', '⏷', '◢', '⟉', '℞',
];
let char_zero_vec = &ZeroVec::alloc_from_slice(ORIGINAL_CHARS);
c.bench_function("zerovec_serde/serialize/char/slice", |b| {
b.iter(|| bincode::serialize(black_box(&Vec::from(ORIGINAL_CHARS))));
});
c.bench_function("zerovec_serde/deserialize/char/slice", |b| {
let buffer = bincode::serialize(black_box(&Vec::from(ORIGINAL_CHARS))).unwrap();
b.iter(|| bincode::deserialize::<Vec<char>>(&buffer));
});
c.bench_function("zerovec_serde/serialize/char/zerovec", |b| {
b.iter(|| bincode::serialize(black_box(char_zero_vec)));
});
c.bench_function("zerovec_serde/deserialize/char/zerovec", |b| {
let buffer = bincode::serialize(black_box(char_zero_vec)).unwrap();
b.iter(|| bincode::deserialize::<ZeroVec<char>>(&buffer));
});
}
fn stress_benches(c: &mut Criterion) {
let number_vec = random_numbers(100);
let bincode_vec = bincode::serialize(&number_vec).unwrap();
let zerovec_aligned = ZeroVec::from_slice_or_alloc(number_vec.as_slice());
let bincode_zerovec = bincode::serialize(&zerovec_aligned).unwrap();
// *** Deserialize vec of 100 `u32` ***
c.bench_function("zerovec_serde/deserialize/stress/vec", |b| {
b.iter(|| bincode::deserialize::<Vec<u32>>(&bincode_vec));
});
// *** Deserialize vec of 100 `u32` ***
c.bench_function("zerovec_serde/deserialize/stress/zerovec", |b| {
b.iter(|| bincode::deserialize::<ZeroVec<u32>>(&bincode_zerovec));
});
// *** Compute sum of vec of 100 `u32` ***
c.bench_function("zerovec_serde/sum/stress/vec", |b| {
b.iter(|| black_box(&number_vec).iter().sum::<u32>());
});
// *** Compute sum of vec of 100 `u32` ***
let zerovec = ZeroVec::<u32>::parse_bytes(zerovec_aligned.as_bytes()).unwrap();
c.bench_function("zerovec_serde/sum/stress/zerovec", |b| {
b.iter(|| black_box(&zerovec).iter().sum::<u32>());
});
}
criterion_group!(benches, overview_bench,);
criterion_main!(benches);