chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

55
vendor/tinystr/benches/common/mod.rs vendored Normal file
View File

@@ -0,0 +1,55 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
// This file was adapted from parts of https://github.com/zbraniecki/tinystr
pub static STRINGS_4: &[&str] = &[
"US", "GB", "AR", "Hans", "CN", "AT", "PL", "FR", "AT", "Cyrl", "SR", "NO", "FR", "MK", "UK",
];
pub static STRINGS_8: &[&str] = &[
"Latn", "windows", "AR", "Hans", "macos", "AT", "pl", "FR", "en", "Cyrl", "SR", "NO", "419",
"und", "UK",
];
pub static STRINGS_16: &[&str] = &[
"Latn",
"windows",
"AR",
"Hans",
"macos",
"AT",
"infiniband",
"FR",
"en",
"Cyrl",
"FromIntegral",
"NO",
"419",
"MacintoshOSX2019",
"UK",
];
#[macro_export]
macro_rules! bench_block {
($c:expr, $name:expr, $action:ident) => {
let mut group4 = $c.benchmark_group(&format!("{}/4", $name));
group4.bench_function("String", $action!(String, STRINGS_4));
group4.bench_function("TinyAsciiStr<4>", $action!(TinyAsciiStr<4>, STRINGS_4));
group4.bench_function("TinyAsciiStr<8>", $action!(TinyAsciiStr<8>, STRINGS_4));
group4.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_4));
group4.finish();
let mut group8 = $c.benchmark_group(&format!("{}/8", $name));
group8.bench_function("String", $action!(String, STRINGS_8));
group8.bench_function("TinyAsciiStr<8>", $action!(TinyAsciiStr<8>, STRINGS_8));
group8.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_8));
group8.finish();
let mut group16 = $c.benchmark_group(&format!("{}/16", $name));
group16.bench_function("String", $action!(String, STRINGS_16));
group16.bench_function("TinyAsciiStr<16>", $action!(TinyAsciiStr<16>, STRINGS_16));
group16.finish();
};
}

65
vendor/tinystr/benches/construct.rs vendored Normal file
View File

@@ -0,0 +1,65 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
// This file was adapted from https://github.com/zbraniecki/tinystr
mod common;
use common::*;
use criterion::black_box;
use criterion::criterion_group;
use criterion::criterion_main;
use criterion::Bencher;
use criterion::Criterion;
use tinystr::TinyAsciiStr;
fn construct_from_str(c: &mut Criterion) {
macro_rules! cfs {
($r:ty, $inputs:expr) => {
|b: &mut Bencher| {
b.iter(|| {
for s in $inputs {
let _: $r = black_box(s.parse().unwrap());
}
})
}
};
}
bench_block!(c, "construct_from_str", cfs);
}
fn construct_from_utf8(c: &mut Criterion) {
macro_rules! cfu {
($r:ty, $inputs:expr) => {
|b| {
let raw: Vec<&[u8]> = $inputs.iter().map(|s| s.as_bytes()).collect();
b.iter(move || {
for u in &raw {
let _ = black_box(<$r>::try_from_utf8(*u).unwrap());
}
})
}
};
}
let mut group4 = c.benchmark_group("construct_from_utf8/4");
group4.bench_function("TinyAsciiStr<4>", cfu!(TinyAsciiStr<4>, STRINGS_4));
group4.bench_function("TinyAsciiStr<8>", cfu!(TinyAsciiStr<8>, STRINGS_4));
group4.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_4));
group4.finish();
let mut group8 = c.benchmark_group("construct_from_utf8/8");
group8.bench_function("TinyAsciiStr<8>", cfu!(TinyAsciiStr<8>, STRINGS_8));
group8.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_8));
group8.finish();
let mut group16 = c.benchmark_group("construct_from_utf8/16");
group16.bench_function("TinyAsciiStr<16>", cfu!(TinyAsciiStr<16>, STRINGS_16));
group16.finish();
}
criterion_group!(benches, construct_from_str, construct_from_utf8,);
criterion_main!(benches);

129
vendor/tinystr/benches/overview.rs vendored Normal file
View File

@@ -0,0 +1,129 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
mod common;
use common::*;
use criterion::black_box;
use criterion::criterion_group;
use criterion::criterion_main;
use criterion::Criterion;
use tinystr::TinyAsciiStr;
fn overview(c: &mut Criterion) {
let mut g = c.benchmark_group("overview");
g.bench_function("construct/utf8/TinyAsciiStr", |b| {
b.iter(|| {
for s in STRINGS_4 {
let _: TinyAsciiStr<4> =
TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap();
let _: TinyAsciiStr<8> =
TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap();
let _: TinyAsciiStr<16> =
TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap();
}
for s in STRINGS_8 {
let _: TinyAsciiStr<8> =
TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap();
let _: TinyAsciiStr<16> =
TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap();
}
for s in STRINGS_16 {
let _: TinyAsciiStr<16> =
TinyAsciiStr::try_from_utf8(black_box(s.as_bytes())).unwrap();
}
});
});
let strings_4_utf16: Vec<Vec<u16>> = STRINGS_4
.iter()
.map(|s| s.encode_utf16().collect())
.collect();
let strings_8_utf16: Vec<Vec<u16>> = STRINGS_8
.iter()
.map(|s| s.encode_utf16().collect())
.collect();
let strings_16_utf16: Vec<Vec<u16>> = STRINGS_16
.iter()
.map(|s| s.encode_utf16().collect())
.collect();
g.bench_function("construct/utf16/TinyAsciiStr", |b| {
b.iter(|| {
for s in strings_4_utf16.iter() {
let _: TinyAsciiStr<4> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap();
let _: TinyAsciiStr<8> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap();
let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap();
}
for s in strings_8_utf16.iter() {
let _: TinyAsciiStr<8> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap();
let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap();
}
for s in strings_16_utf16.iter() {
let _: TinyAsciiStr<16> = TinyAsciiStr::try_from_utf16(black_box(s)).unwrap();
}
});
});
let parsed_ascii_4: Vec<TinyAsciiStr<4>> = STRINGS_4
.iter()
.map(|s| s.parse::<TinyAsciiStr<4>>().unwrap())
.collect();
let parsed_ascii_8: Vec<TinyAsciiStr<8>> = STRINGS_4
.iter()
.chain(STRINGS_8)
.map(|s| s.parse::<TinyAsciiStr<8>>().unwrap())
.collect();
let parsed_ascii_16: Vec<TinyAsciiStr<16>> = STRINGS_4
.iter()
.chain(STRINGS_8)
.chain(STRINGS_16)
.map(|s| s.parse::<TinyAsciiStr<16>>().unwrap())
.collect();
g.bench_function("read/TinyAsciiStr", |b| {
b.iter(|| {
let mut collector: usize = 0;
for t in black_box(&parsed_ascii_4) {
let s: &str = t;
collector += s.bytes().map(usize::from).sum::<usize>();
}
for t in black_box(&parsed_ascii_8) {
let s: &str = t;
collector += s.bytes().map(usize::from).sum::<usize>();
}
for t in black_box(&parsed_ascii_16) {
let s: &str = t;
collector += s.bytes().map(usize::from).sum::<usize>();
}
collector
});
});
g.bench_function("compare/TinyAsciiStr", |b| {
b.iter(|| {
let mut collector: usize = 0;
for ts in black_box(&parsed_ascii_4).windows(2) {
let o = ts[0].cmp(&ts[1]);
collector ^= o as usize;
}
for ts in black_box(&parsed_ascii_8).windows(2) {
let o = ts[0].cmp(&ts[1]);
collector ^= o as usize;
}
for ts in black_box(&parsed_ascii_16).windows(2) {
let o = ts[0].cmp(&ts[1]);
collector ^= o as usize;
}
collector
});
});
}
criterion_group!(benches, overview,);
criterion_main!(benches);

34
vendor/tinystr/benches/read.rs vendored Normal file
View File

@@ -0,0 +1,34 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
mod common;
use common::*;
use criterion::black_box;
use criterion::criterion_group;
use criterion::criterion_main;
use criterion::Bencher;
use criterion::Criterion;
use tinystr::TinyAsciiStr;
fn read(c: &mut Criterion) {
macro_rules! cfs {
($r:ty, $inputs:expr) => {
|b: &mut Bencher| {
let parsed: Vec<$r> = $inputs.iter().map(|s| s.parse().unwrap()).collect();
b.iter(|| {
for s in &parsed {
let _: &str = black_box(&**s);
}
})
}
};
}
bench_block!(c, "read", cfs);
}
criterion_group!(benches, read,);
criterion_main!(benches);

37
vendor/tinystr/benches/serde.rs vendored Normal file
View File

@@ -0,0 +1,37 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
mod common;
use common::*;
use criterion::black_box;
use criterion::criterion_group;
use criterion::criterion_main;
use criterion::Bencher;
use criterion::Criterion;
use tinystr::TinyAsciiStr;
fn deserialize(c: &mut Criterion) {
macro_rules! cfs {
($r:ty, $inputs:expr) => {
|b: &mut Bencher| {
let serialized: Vec<Vec<u8>> = $inputs
.iter()
.map(|s| postcard::to_stdvec(&s.parse::<$r>().unwrap()).unwrap())
.collect();
b.iter(|| {
for bytes in &serialized {
let _: Result<$r, _> = black_box(postcard::from_bytes(bytes));
}
})
}
};
}
bench_block!(c, "deserialize", cfs);
}
criterion_group!(benches, deserialize,);
criterion_main!(benches);