chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

43
vendor/zerovec-derive/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,43 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
//! Proc macros for generating `ULE`, `VarULE` impls and types for the `zerovec` crate
use proc_macro::TokenStream;
use syn::{parse_macro_input, DeriveInput, Ident};
mod make_ule;
mod make_varule;
pub(crate) mod ule;
mod utils;
mod varule;
/// Full docs for this proc macro can be found on the [`zerovec`](https://docs.rs/zerovec) crate.
#[proc_macro_derive(ULE)]
pub fn ule_derive(input: TokenStream) -> TokenStream {
let input = parse_macro_input!(input as DeriveInput);
TokenStream::from(ule::derive_impl(&input))
}
/// Full docs for this proc macro can be found on the [`zerovec`](https://docs.rs/zerovec) crate.
#[proc_macro_derive(VarULE)]
pub fn varule_derive(input: TokenStream) -> TokenStream {
let input = parse_macro_input!(input as DeriveInput);
TokenStream::from(varule::derive_impl(&input, None))
}
/// Full docs for this proc macro can be found on the [`zerovec`](https://docs.rs/zerovec) crate.
#[proc_macro_attribute]
pub fn make_ule(attr: TokenStream, item: TokenStream) -> TokenStream {
let input = parse_macro_input!(item as DeriveInput);
let attr = parse_macro_input!(attr as Ident);
TokenStream::from(make_ule::make_ule_impl(attr, input))
}
/// Full docs for this proc macro can be found on the [`zerovec`](https://docs.rs/zerovec) crate.
#[proc_macro_attribute]
pub fn make_varule(attr: TokenStream, item: TokenStream) -> TokenStream {
let input = parse_macro_input!(item as DeriveInput);
let attr = parse_macro_input!(attr as Ident);
TokenStream::from(make_varule::make_varule_impl(attr, input))
}

373
vendor/zerovec-derive/src/make_ule.rs vendored Normal file
View File

@@ -0,0 +1,373 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use proc_macro2::TokenStream as TokenStream2;
use quote::quote;
use crate::utils::{self, FieldInfo, ZeroVecAttrs};
use std::collections::HashSet;
use syn::spanned::Spanned;
use syn::{parse_quote, Data, DataEnum, DataStruct, DeriveInput, Error, Expr, Fields, Ident, Lit};
pub fn make_ule_impl(ule_name: Ident, mut input: DeriveInput) -> TokenStream2 {
if input.generics.type_params().next().is_some()
|| input.generics.lifetimes().next().is_some()
|| input.generics.const_params().next().is_some()
{
return Error::new(
input.generics.span(),
"#[make_ule] must be applied to a struct without any generics",
)
.to_compile_error();
}
let sp = input.span();
let attrs = match utils::extract_attributes_common(&mut input.attrs, sp, false) {
Ok(val) => val,
Err(e) => return e.to_compile_error(),
};
let name = &input.ident;
let ule_stuff = match input.data {
Data::Struct(ref s) => make_ule_struct_impl(name, &ule_name, &input, s, &attrs),
Data::Enum(ref e) => make_ule_enum_impl(name, &ule_name, &input, e, &attrs),
_ => {
return Error::new(input.span(), "#[make_ule] must be applied to a struct")
.to_compile_error();
}
};
let zmkv = if attrs.skip_kv {
quote!()
} else {
quote!(
impl<'a> zerovec::maps::ZeroMapKV<'a> for #name {
type Container = zerovec::ZeroVec<'a, #name>;
type Slice = zerovec::ZeroSlice<#name>;
type GetType = #ule_name;
type OwnedType = #name;
}
)
};
let maybe_debug = if attrs.debug {
quote!(
impl core::fmt::Debug for #ule_name {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
let this = <#name as zerovec::ule::AsULE>::from_unaligned(*self);
<#name as core::fmt::Debug>::fmt(&this, f)
}
}
)
} else {
quote!()
};
quote!(
#input
#ule_stuff
#maybe_debug
#zmkv
)
}
fn make_ule_enum_impl(
name: &Ident,
ule_name: &Ident,
input: &DeriveInput,
enu: &DataEnum,
attrs: &ZeroVecAttrs,
) -> TokenStream2 {
// We could support more int reprs in the future if needed
if !utils::ReprInfo::compute(&input.attrs).u8 {
return Error::new(
input.span(),
"#[make_ule] can only be applied to #[repr(u8)] enums",
)
.to_compile_error();
}
if enu.variants.is_empty() {
return Error::new(input.span(), "#[make_ule] cannot be applied to empty enums")
.to_compile_error();
}
// the smallest discriminant seen
let mut min = None;
// the largest discriminant seen
let mut max = None;
// Discriminants that have not been found in series (we might find them later)
let mut not_found = HashSet::new();
for (i, variant) in enu.variants.iter().enumerate() {
if !matches!(variant.fields, Fields::Unit) {
// This can be supported in the future, see zerovec/design_doc.md
return Error::new(
variant.span(),
"#[make_ule] can only be applied to enums with dataless variants",
)
.to_compile_error();
}
if let Some((_, ref discr)) = variant.discriminant {
if let Some(n) = get_expr_int(discr) {
let n = match u8::try_from(n) {
Ok(n) => n,
Err(_) => {
return Error::new(
variant.span(),
"#[make_ule] only supports discriminants from 0 to 255",
)
.to_compile_error();
}
};
match min {
Some(x) if x < n => {}
_ => {
min = Some(n);
}
}
match max {
Some(x) if x >= n => {}
_ => {
let old_max = max.unwrap_or(0u8);
for missing in (old_max + 1)..n {
not_found.insert(missing);
}
max = Some(n);
}
}
not_found.remove(&n);
// We require explicit discriminants so that it is clear that reordering
// fields would be a breaking change. Furthermore, using explicit discriminants helps ensure that
// platform-specific C ABI choices do not matter.
// We could potentially add in explicit discriminants on the user's behalf in the future, or support
// more complicated sets of explicit discriminant values.
if n as usize != i {}
} else {
return Error::new(
discr.span(),
"#[make_ule] must be applied to enums with explicit integer discriminants",
)
.to_compile_error();
}
} else {
return Error::new(
variant.span(),
"#[make_ule] must be applied to enums with explicit discriminants",
)
.to_compile_error();
}
}
let not_found = not_found.iter().collect::<Vec<_>>();
let min = min.unwrap();
let max = max.unwrap();
if not_found.len() > min as usize {
return Error::new(input.span(), format!("#[make_ule] must be applied to enums with discriminants \
filling the range from a minimum to a maximum; could not find {not_found:?}"))
.to_compile_error();
}
let maybe_ord_derives = if attrs.skip_ord {
quote!()
} else {
quote!(#[derive(Ord, PartialOrd)])
};
let vis = &input.vis;
let doc = format!("[`ULE`](zerovec::ule::ULE) type for {name}");
// Safety (based on the safety checklist on the ULE trait):
// 1. ULE type does not include any uninitialized or padding bytes.
// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant
// 2. ULE type is aligned to 1 byte.
// (achieved by `#[repr(transparent)]` on a type that satisfies this invariant)
// 3. The impl of validate_bytes() returns an error if any byte is not valid.
// (Guarantees that the byte is in range of the corresponding enum.)
// 4. The impl of validate_bytes() returns an error if there are extra bytes.
// (This does not happen since we are backed by 1 byte.)
// 5. The other ULE methods use the default impl.
// 6. ULE type byte equality is semantic equality
quote!(
#[repr(transparent)]
#[derive(Copy, Clone, PartialEq, Eq)]
#maybe_ord_derives
#[doc = #doc]
#vis struct #ule_name(u8);
unsafe impl zerovec::ule::ULE for #ule_name {
#[inline]
fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
for byte in bytes {
if *byte < #min || *byte > #max {
return Err(zerovec::ule::UleError::parse::<Self>())
}
}
Ok(())
}
}
impl zerovec::ule::AsULE for #name {
type ULE = #ule_name;
fn to_unaligned(self) -> Self::ULE {
// safety: the enum is repr(u8) and can be cast to a u8
unsafe {
::core::mem::transmute(self)
}
}
fn from_unaligned(other: Self::ULE) -> Self {
// safety: the enum is repr(u8) and can be cast from a u8,
// and `#ule_name` guarantees a valid value for this enum.
unsafe {
::core::mem::transmute(other)
}
}
}
impl #name {
/// Attempt to construct the value from its corresponding integer,
/// returning `None` if not possible
pub(crate) fn new_from_u8(value: u8) -> Option<Self> {
if value <= #max {
unsafe {
Some(::core::mem::transmute(value))
}
} else {
None
}
}
}
)
}
fn get_expr_int(e: &Expr) -> Option<u64> {
if let Ok(Lit::Int(ref i)) = syn::parse2(quote!(#e)) {
return i.base10_parse().ok();
}
None
}
fn make_ule_struct_impl(
name: &Ident,
ule_name: &Ident,
input: &DeriveInput,
struc: &DataStruct,
attrs: &ZeroVecAttrs,
) -> TokenStream2 {
if struc.fields.iter().next().is_none() {
return Error::new(
input.span(),
"#[make_ule] must be applied to a non-empty struct",
)
.to_compile_error();
}
let sized_fields = FieldInfo::make_list(struc.fields.iter());
let field_inits = crate::ule::make_ule_fields(&sized_fields);
let field_inits = utils::wrap_field_inits(&field_inits, &struc.fields);
let semi = utils::semi_for(&struc.fields);
let repr_attr = utils::repr_for(&struc.fields);
let vis = &input.vis;
let doc = format!("[`ULE`](zerovec::ule::ULE) type for [`{name}`]");
let ule_struct: DeriveInput = parse_quote!(
#[repr(#repr_attr)]
#[derive(Copy, Clone, PartialEq, Eq)]
#[doc = #doc]
// We suppress the `missing_docs` lint for the fields of the struct.
#[allow(missing_docs)]
#vis struct #ule_name #field_inits #semi
);
let derived = crate::ule::derive_impl(&ule_struct);
let mut as_ule_conversions = vec![];
let mut from_ule_conversions = vec![];
for (i, field) in struc.fields.iter().enumerate() {
let ty = &field.ty;
let i = syn::Index::from(i);
if let Some(ref ident) = field.ident {
as_ule_conversions
.push(quote!(#ident: <#ty as zerovec::ule::AsULE>::to_unaligned(self.#ident)));
from_ule_conversions.push(
quote!(#ident: <#ty as zerovec::ule::AsULE>::from_unaligned(unaligned.#ident)),
);
} else {
as_ule_conversions.push(quote!(<#ty as zerovec::ule::AsULE>::to_unaligned(self.#i)));
from_ule_conversions
.push(quote!(<#ty as zerovec::ule::AsULE>::from_unaligned(unaligned.#i)));
};
}
let as_ule_conversions = utils::wrap_field_inits(&as_ule_conversions, &struc.fields);
let from_ule_conversions = utils::wrap_field_inits(&from_ule_conversions, &struc.fields);
let asule_impl = quote!(
impl zerovec::ule::AsULE for #name {
type ULE = #ule_name;
fn to_unaligned(self) -> Self::ULE {
#ule_name #as_ule_conversions
}
fn from_unaligned(unaligned: Self::ULE) -> Self {
Self #from_ule_conversions
}
}
);
let maybe_ord_impls = if attrs.skip_ord {
quote!()
} else {
quote!(
impl core::cmp::PartialOrd for #ule_name {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl core::cmp::Ord for #ule_name {
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
let this = <#name as zerovec::ule::AsULE>::from_unaligned(*self);
let other = <#name as zerovec::ule::AsULE>::from_unaligned(*other);
<#name as core::cmp::Ord>::cmp(&this, &other)
}
}
)
};
let maybe_hash = if attrs.hash {
quote!(
#[expect(clippy::derive_hash_xor_eq)]
impl core::hash::Hash for #ule_name {
fn hash<H>(&self, state: &mut H) where H: core::hash::Hasher {
state.write(<#ule_name as zerovec::ule::ULE>::slice_as_bytes(&[*self]));
}
}
)
} else {
quote!()
};
quote!(
#asule_impl
#ule_struct
#derived
#maybe_ord_impls
#maybe_hash
)
}

887
vendor/zerovec-derive/src/make_varule.rs vendored Normal file
View File

@@ -0,0 +1,887 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use crate::utils::{self, FieldInfo};
use proc_macro2::Span;
use proc_macro2::TokenStream as TokenStream2;
use quote::{quote, ToTokens};
use syn::spanned::Spanned;
use syn::{
parse_quote, Data, DeriveInput, Error, Field, Fields, GenericArgument, Ident, Lifetime,
PathArguments, Type, TypePath,
};
pub fn make_varule_impl(ule_name: Ident, mut input: DeriveInput) -> TokenStream2 {
if input.generics.type_params().next().is_some()
|| input.generics.const_params().next().is_some()
|| input.generics.lifetimes().count() > 1
{
return Error::new(
input.generics.span(),
"#[make_varule] must be applied to a struct without any type or const parameters and at most one lifetime",
)
.to_compile_error();
}
let sp = input.span();
let attrs = match utils::extract_attributes_common(&mut input.attrs, sp, true) {
Ok(val) => val,
Err(e) => return e.to_compile_error(),
};
let lt = input.generics.lifetimes().next();
if let Some(lt) = lt {
if lt.colon_token.is_some() || !lt.bounds.is_empty() {
return Error::new(
input.generics.span(),
"#[make_varule] must be applied to a struct without lifetime bounds",
)
.to_compile_error();
}
}
let lt = lt.map(|l| &l.lifetime);
let name = &input.ident;
let input_span = input.span();
let fields = match input.data {
Data::Struct(ref mut s) => &mut s.fields,
_ => {
return Error::new(input.span(), "#[make_varule] must be applied to a struct")
.to_compile_error();
}
};
if fields.is_empty() {
return Error::new(
input.span(),
"#[make_varule] must be applied to a struct with at least one field",
)
.to_compile_error();
}
let mut sized_fields = vec![];
let mut unsized_fields = vec![];
let mut custom_varule_idents = vec![];
for field in fields.iter_mut() {
match utils::extract_field_attributes(&mut field.attrs) {
Ok(i) => custom_varule_idents.push(i),
Err(e) => return e.to_compile_error(),
}
}
for (i, field) in fields.iter().enumerate() {
match UnsizedField::new(field, i, custom_varule_idents[i].clone()) {
Ok(o) => unsized_fields.push(o),
Err(_) => sized_fields.push(FieldInfo::new_for_field(field, i)),
}
}
if unsized_fields.is_empty() {
let last_field_index = fields.len() - 1;
let last_field = fields.iter().next_back().unwrap();
let e = UnsizedField::new(
last_field,
last_field_index,
custom_varule_idents[last_field_index].clone(),
)
.unwrap_err();
return Error::new(last_field.span(), e).to_compile_error();
}
if unsized_fields[0].field.index != fields.len() - unsized_fields.len()
&& unsized_fields[0].field.field.ident.is_none()
{
return Error::new(
unsized_fields.first().unwrap().field.field.span(),
"#[make_varule] requires its unsized fields to be at the end for tuple structs",
)
.to_compile_error();
}
let unsized_field_info = UnsizedFields::new(unsized_fields, attrs.vzv_format);
let mut field_inits = crate::ule::make_ule_fields(&sized_fields);
let last_field_ule = unsized_field_info.varule_ty();
let setter = unsized_field_info.varule_setter();
let vis = &unsized_field_info.varule_vis();
field_inits.push(quote!(#vis #setter #last_field_ule));
let semi = utils::semi_for(fields);
let repr_attr = utils::repr_for(fields);
let field_inits = utils::wrap_field_inits(&field_inits, fields);
let vis = &input.vis;
let doc = format!(
"[`VarULE`](zerovec::ule::VarULE) type for [`{name}`]. See [`{name}`] for documentation."
);
let varule_struct: DeriveInput = parse_quote!(
#[repr(#repr_attr)]
#[doc = #doc]
#[allow(missing_docs)]
#vis struct #ule_name #field_inits #semi
);
let derived = crate::varule::derive_impl(&varule_struct, unsized_field_info.varule_validator());
let maybe_lt_bound = lt.as_ref().map(|lt| quote!(<#lt>));
let encode_impl = make_encode_impl(
&sized_fields,
&unsized_field_info,
name,
&ule_name,
&maybe_lt_bound,
);
let zf_and_from_impl = make_zf_and_from_impl(
&sized_fields,
&unsized_field_info,
fields,
name,
&ule_name,
lt,
input_span,
attrs.skip_from,
);
let eq_impl = quote!(
impl core::cmp::PartialEq for #ule_name {
fn eq(&self, other: &Self) -> bool {
// The VarULE invariants allow us to assume that equality is byte equality
// in non-safety-critical contexts
<Self as zerovec::ule::VarULE>::as_bytes(&self)
== <Self as zerovec::ule::VarULE>::as_bytes(&other)
}
}
impl core::cmp::Eq for #ule_name {}
);
let zerofrom_fq_path =
quote!(<#name as zerovec::__zerovec_internal_reexport::ZeroFrom<#ule_name>>);
let maybe_ord_impls = if attrs.skip_ord {
quote!()
} else {
quote!(
impl core::cmp::PartialOrd for #ule_name {
fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl core::cmp::Ord for #ule_name {
fn cmp(&self, other: &Self) -> core::cmp::Ordering {
let this = #zerofrom_fq_path::zero_from(self);
let other = #zerofrom_fq_path::zero_from(other);
<#name as core::cmp::Ord>::cmp(&this, &other)
}
}
)
};
let maybe_debug = if attrs.debug {
quote!(
impl core::fmt::Debug for #ule_name {
fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
let this = #zerofrom_fq_path::zero_from(self);
<#name as core::fmt::Debug>::fmt(&this, f)
}
}
)
} else {
quote!()
};
let maybe_toowned = if !attrs.skip_toowned {
quote!(
impl zerovec::__zerovec_internal_reexport::borrow::ToOwned for #ule_name {
type Owned = zerovec::__zerovec_internal_reexport::boxed::Box<Self>;
fn to_owned(&self) -> Self::Owned {
zerovec::ule::encode_varule_to_box(self)
}
}
)
} else {
quote!()
};
let zmkv = if attrs.skip_kv {
quote!()
} else {
quote!(
impl<'a> zerovec::maps::ZeroMapKV<'a> for #ule_name {
type Container = zerovec::VarZeroVec<'a, #ule_name>;
type Slice = zerovec::VarZeroSlice<#ule_name>;
type GetType = #ule_name;
type OwnedType = zerovec::__zerovec_internal_reexport::boxed::Box<#ule_name>;
}
)
};
let serde_path = quote!(zerovec::__zerovec_internal_reexport::serde);
let maybe_ser = if attrs.serialize {
quote!(
impl #serde_path::Serialize for #ule_name {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: #serde_path::Serializer {
if serializer.is_human_readable() {
let this = #zerofrom_fq_path::zero_from(self);
<#name as #serde_path::Serialize>::serialize(&this, serializer)
} else {
serializer.serialize_bytes(zerovec::ule::VarULE::as_bytes(self))
}
}
}
)
} else {
quote!()
};
let deserialize_error = format!("&{ule_name} can only deserialize in zero-copy ways");
let maybe_de = if attrs.deserialize {
quote!(
impl<'de> #serde_path::Deserialize<'de> for zerovec::__zerovec_internal_reexport::boxed::Box<#ule_name> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: #serde_path::Deserializer<'de> {
if deserializer.is_human_readable() {
let this = <#name as #serde_path::Deserialize>::deserialize(deserializer)?;
Ok(zerovec::ule::encode_varule_to_box(&this))
} else {
// This branch should usually not be hit, since Cow-like use cases will hit the Deserialize impl for &'a ULEType instead.
let deserialized = <& #ule_name>::deserialize(deserializer)?;
Ok(zerovec::ule::VarULE::to_boxed(deserialized))
}
}
}
impl<'a, 'de: 'a> #serde_path::Deserialize<'de> for &'a #ule_name {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: #serde_path::Deserializer<'de> {
if !deserializer.is_human_readable() {
let bytes = <&[u8]>::deserialize(deserializer)?;
<#ule_name as zerovec::ule::VarULE>::parse_bytes(bytes).map_err(#serde_path::de::Error::custom)
} else {
Err(#serde_path::de::Error::custom(#deserialize_error))
}
}
}
)
} else {
quote!()
};
let maybe_hash = if attrs.hash {
quote!(
#[expect(clippy::derive_hash_xor_eq)]
impl core::hash::Hash for #ule_name {
fn hash<H>(&self, state: &mut H) where H: core::hash::Hasher {
state.write(<#ule_name as zerovec::ule::VarULE>::as_bytes(&self));
}
}
)
} else {
quote!()
};
let maybe_multi_getters = if let Some(getters) = unsized_field_info.maybe_multi_getters() {
quote! {
impl #ule_name {
#getters
}
}
} else {
quote!()
};
quote!(
#input
#varule_struct
#maybe_multi_getters
#encode_impl
#zf_and_from_impl
#derived
#maybe_ord_impls
#eq_impl
#zmkv
#maybe_ser
#maybe_de
#maybe_debug
#maybe_toowned
#maybe_hash
)
}
#[expect(clippy::too_many_arguments)] // Internal function. Could refactor later to use some kind of context type.
fn make_zf_and_from_impl(
sized_fields: &[FieldInfo],
unsized_field_info: &UnsizedFields,
fields: &Fields,
name: &Ident,
ule_name: &Ident,
maybe_lt: Option<&Lifetime>,
span: Span,
skip_from: bool,
) -> TokenStream2 {
if !unsized_field_info.has_zf() {
return quote!();
}
let lt = if let Some(ref lt) = maybe_lt {
lt
} else {
return Error::new(
span,
"Can only generate ZeroFrom impls for types with lifetimes",
)
.to_compile_error();
};
let mut field_inits = sized_fields
.iter()
.map(|f| {
let ty = &f.field.ty;
let accessor = &f.accessor;
let setter = f.setter();
quote!(#setter <#ty as zerovec::ule::AsULE>::from_unaligned(other.#accessor))
})
.collect::<Vec<_>>();
unsized_field_info.push_zf_setters(lt, &mut field_inits);
let field_inits = utils::wrap_field_inits(&field_inits, fields);
let zerofrom_trait = quote!(zerovec::__zerovec_internal_reexport::ZeroFrom);
let maybe_from = if skip_from {
quote!()
} else {
quote!(
impl<#lt> From<&#lt #ule_name> for #name<#lt> {
fn from(other: &#lt #ule_name) -> Self {
<Self as #zerofrom_trait<#lt, #ule_name>>::zero_from(other)
}
}
)
};
quote!(
impl <#lt> #zerofrom_trait <#lt, #ule_name> for #name <#lt> {
fn zero_from(other: &#lt #ule_name) -> Self {
Self #field_inits
}
}
#maybe_from
)
}
fn make_encode_impl(
sized_fields: &[FieldInfo],
unsized_field_info: &UnsizedFields,
name: &Ident,
ule_name: &Ident,
maybe_lt_bound: &Option<TokenStream2>,
) -> TokenStream2 {
let mut lengths = vec![];
for field in sized_fields {
let ty = &field.field.ty;
lengths.push(quote!(::core::mem::size_of::<<#ty as zerovec::ule::AsULE>::ULE>()));
}
let (encoders, remaining_offset) = utils::generate_per_field_offsets(
sized_fields,
true,
|field, prev_offset_ident, size_ident| {
let ty = &field.field.ty;
let accessor = &field.accessor;
quote!(
// generate_per_field_offsets produces valid indices,
// and we don't care about panics in Encode impls
#[expect(clippy::indexing_slicing)]
let out = &mut dst[#prev_offset_ident .. #prev_offset_ident + #size_ident];
let unaligned = zerovec::ule::AsULE::to_unaligned(self.#accessor);
let unaligned_slice = &[unaligned];
let src = <<#ty as zerovec::ule::AsULE>::ULE as zerovec::ule::ULE>::slice_as_bytes(unaligned_slice);
out.copy_from_slice(src);
)
},
);
let last_encode_len = unsized_field_info.encode_len();
let last_encode_write = unsized_field_info.encode_write(quote!(out));
quote!(
unsafe impl #maybe_lt_bound zerovec::ule::EncodeAsVarULE<#ule_name> for #name #maybe_lt_bound {
// Safety: unimplemented as the other two are implemented
fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
unreachable!("other two methods implemented")
}
// Safety: returns the total length of the ULE form by adding up the lengths of each element's ULE forms
fn encode_var_ule_len(&self) -> usize {
#(#lengths +)* #last_encode_len
}
// Safety: converts each element to ULE form and writes them in sequence
fn encode_var_ule_write(&self, mut dst: &mut [u8]) {
debug_assert_eq!(self.encode_var_ule_len(), dst.len());
#encoders
// generate_per_field_offsets produces valid remainders,
// and we don't care about panics in Encode impls
#[expect(clippy::indexing_slicing)]
let out = &mut dst[#remaining_offset..];
#last_encode_write
}
}
// This second impl exists to allow for using EncodeAsVarULE without cloning
//
// A blanket impl cannot exist without coherence issues
unsafe impl #maybe_lt_bound zerovec::ule::EncodeAsVarULE<#ule_name> for &'_ #name #maybe_lt_bound {
// Safety: unimplemented as the other two are implemented
fn encode_var_ule_as_slices<R>(&self, cb: impl FnOnce(&[&[u8]]) -> R) -> R {
unreachable!("other two methods implemented")
}
// Safety: returns the total length of the ULE form by adding up the lengths of each element's ULE forms
fn encode_var_ule_len(&self) -> usize {
(**self).encode_var_ule_len()
}
// Safety: converts each element to ULE form and writes them in sequence
fn encode_var_ule_write(&self, mut dst: &mut [u8]) {
(**self).encode_var_ule_write(dst)
}
}
)
}
/// Represents a VarULE-compatible type that would typically
/// be found behind a `Cow<'a, _>` in the last field, and is represented
/// roughly the same in owned and borrowed versions
#[derive(Copy, Clone, Debug)]
enum OwnULETy<'a> {
/// [T] where T: AsULE<ULE = Self>
Slice(&'a Type),
/// str
Str,
}
/// Represents the type of the last field of the struct
#[derive(Clone, Debug)]
enum UnsizedFieldKind<'a> {
Cow(OwnULETy<'a>),
VarZeroCow(OwnULETy<'a>),
ZeroVec(&'a Type),
VarZeroVec(&'a Type),
/// Custom VarULE type, and the identifier corresponding to the VarULE type
Custom(&'a TypePath, Ident),
// Generally you should be using the above ones for maximum zero-copy, but these will still work
Growable(OwnULETy<'a>),
Boxed(OwnULETy<'a>),
Ref(OwnULETy<'a>),
}
#[derive(Clone, Debug)]
struct UnsizedField<'a> {
kind: UnsizedFieldKind<'a>,
field: FieldInfo<'a>,
}
struct UnsizedFields<'a> {
fields: Vec<UnsizedField<'a>>,
format_param: TokenStream2,
}
impl<'a> UnsizedFields<'a> {
/// The format_param is an optional tokenstream describing a VZVFormat argument needed by MultiFieldsULE
fn new(fields: Vec<UnsizedField<'a>>, format_param: Option<TokenStream2>) -> Self {
assert!(!fields.is_empty(), "Must have at least one unsized field");
let format_param = format_param.unwrap_or_else(|| quote!(zerovec::vecs::Index16));
Self {
fields,
format_param,
}
}
// Get the corresponding VarULE type that can store all of these
fn varule_ty(&self) -> TokenStream2 {
let len = self.fields.len();
let format_param = &self.format_param;
if len == 1 {
self.fields[0].kind.varule_ty()
} else {
quote!(zerovec::ule::MultiFieldsULE::<#len, #format_param>)
}
}
// Get the accessor field name in the VarULE type
fn varule_accessor(&self) -> TokenStream2 {
if self.fields.len() == 1 {
self.fields[0].field.accessor.clone()
} else if self.fields[0].field.field.ident.is_some() {
quote!(unsized_fields)
} else {
// first unsized field
self.fields[0].field.accessor.clone()
}
}
// Get the setter for this type for use in struct definition/creation syntax
fn varule_setter(&self) -> TokenStream2 {
if self.fields.len() == 1 {
self.fields[0].field.setter()
} else if self.fields[0].field.field.ident.is_some() {
quote!(unsized_fields: )
} else {
quote!()
}
}
fn varule_vis(&self) -> TokenStream2 {
if self.fields.len() == 1 {
self.fields[0].field.field.vis.to_token_stream()
} else {
// Always private
quote!()
}
}
// Check if the type has a ZeroFrom impl
fn has_zf(&self) -> bool {
self.fields.iter().all(|f| f.kind.has_zf())
}
// Takes all unsized fields on self and encodes them into a byte slice `out`
fn encode_write(&self, out: TokenStream2) -> TokenStream2 {
let len = self.fields.len();
let format_param = &self.format_param;
if len == 1 {
self.fields[0].encode_func(quote!(encode_var_ule_write), quote!(#out))
} else {
let mut lengths = vec![];
let mut writers = vec![];
for (i, field) in self.fields.iter().enumerate() {
lengths.push(field.encode_func(quote!(encode_var_ule_len), quote!()));
let (encodeable_ty, encodeable) = field.encodeable_tokens();
let varule_ty = field.kind.varule_ty();
writers
.push(quote!(multi.set_field_at::<#varule_ty, #encodeable_ty>(#i, #encodeable)))
}
quote!(
let lengths = [#(#lengths),*];
// Todo: index type should be settable by attribute
let mut multi = zerovec::ule::MultiFieldsULE::<#len, #format_param>::new_from_lengths_partially_initialized(lengths, #out);
unsafe {
#(#writers;)*
}
)
}
}
// Takes all unsized fields on self and returns the length needed for encoding into a byte slice
fn encode_len(&self) -> TokenStream2 {
let len = self.fields.len();
let format_param = &self.format_param;
if len == 1 {
self.fields[0].encode_func(quote!(encode_var_ule_len), quote!())
} else {
let mut lengths = vec![];
for field in self.fields.iter() {
lengths.push(field.encode_func(quote!(encode_var_ule_len), quote!()));
}
// Todo: index type should be settable by attribute
quote!(zerovec::ule::MultiFieldsULE::<#len, #format_param>::compute_encoded_len_for([#(#lengths),*]))
}
}
/// Constructs ZeroFrom setters for each field of the stack type
fn push_zf_setters(&self, lt: &Lifetime, field_inits: &mut Vec<TokenStream2>) {
let zerofrom_trait = quote!(zerovec::__zerovec_internal_reexport::ZeroFrom);
if self.fields.len() == 1 {
let accessor = self.fields[0].field.accessor.clone();
let setter = self.fields[0].field.setter();
let last_field_ty = &self.fields[0].field.field.ty;
let last_field_ule_ty = self.fields[0].kind.varule_ty();
field_inits.push(quote!(#setter <#last_field_ty as #zerofrom_trait <#lt, #last_field_ule_ty>>::zero_from(&other.#accessor) ));
} else {
for field in self.fields.iter() {
let setter = field.field.setter();
let getter = field.field.getter();
let field_ty = &field.field.field.ty;
let field_ule_ty = field.kind.varule_ty();
field_inits.push(quote!(#setter
<#field_ty as #zerofrom_trait <#lt, #field_ule_ty>>::zero_from(&other.#getter())
));
}
}
}
fn maybe_multi_getters(&self) -> Option<TokenStream2> {
if self.fields.len() == 1 {
None
} else {
let multi_accessor = self.varule_accessor();
let field_getters = self.fields.iter().enumerate().map(|(i, field)| {
let getter = field.field.getter();
let field_ule_ty = field.kind.varule_ty();
let doc_name = field.field.getter_doc_name();
let doc = format!("Access the VarULE type behind {doc_name}");
quote!(
#[doc = #doc]
pub fn #getter<'a>(&'a self) -> &'a #field_ule_ty {
unsafe {
self.#multi_accessor.get_field::<#field_ule_ty>(#i)
}
}
)
});
Some(quote!(#(#field_getters)*))
}
}
/// In case this needs custom validation code, return it
///
/// The code will validate a variable known as `last_field_bytes`
fn varule_validator(&self) -> Option<TokenStream2> {
let len = self.fields.len();
let format_param = &self.format_param;
if len == 1 {
None
} else {
let mut validators = vec![];
for (i, field) in self.fields.iter().enumerate() {
let varule_ty = field.kind.varule_ty();
validators.push(quote!(multi.validate_field::<#varule_ty>(#i)?;));
}
Some(quote!(
let multi = zerovec::ule::MultiFieldsULE::<#len, #format_param>::parse_bytes(last_field_bytes)?;
unsafe {
#(#validators)*
}
))
}
}
}
impl<'a> UnsizedField<'a> {
fn new(
field: &'a Field,
index: usize,
custom_varule_ident: Option<Ident>,
) -> Result<Self, String> {
Ok(UnsizedField {
kind: UnsizedFieldKind::new(&field.ty, custom_varule_ident)?,
field: FieldInfo::new_for_field(field, index),
})
}
/// Call `<Self as EncodeAsVarULE<V>>::#method(self.accessor #additional_args)` after adjusting
/// Self and self.accessor to be the right types
fn encode_func(&self, method: TokenStream2, additional_args: TokenStream2) -> TokenStream2 {
let encodeas_trait = quote!(zerovec::ule::EncodeAsVarULE);
let (encodeable_ty, encodeable) = self.encodeable_tokens();
let varule_ty = self.kind.varule_ty();
quote!(<#encodeable_ty as #encodeas_trait<#varule_ty>>::#method(#encodeable, #additional_args))
}
/// Returns (encodeable_ty, encodeable)
fn encodeable_tokens(&self) -> (TokenStream2, TokenStream2) {
let accessor = self.field.accessor.clone();
let value = quote!(self.#accessor);
let encodeable = self.kind.encodeable_value(value);
let encodeable_ty = self.kind.encodeable_ty();
(encodeable_ty, encodeable)
}
}
impl<'a> UnsizedFieldKind<'a> {
/// Construct a UnsizedFieldKind for the type of a UnsizedFieldKind if possible
fn new(
ty: &'a Type,
custom_varule_ident: Option<Ident>,
) -> Result<UnsizedFieldKind<'a>, String> {
static PATH_TYPE_IDENTITY_ERROR: &str =
"Can only automatically detect corresponding VarULE types for path types \
that are Cow, ZeroVec, VarZeroVec, Box, String, or Vec";
static PATH_TYPE_GENERICS_ERROR: &str =
"Can only automatically detect corresponding VarULE types for path \
types with at most one lifetime and at most one generic parameter. VarZeroVecFormat
types are not currently supported";
match *ty {
Type::Reference(ref tyref) => OwnULETy::new(&tyref.elem, "reference").map(UnsizedFieldKind::Ref),
Type::Path(ref typath) => {
if let Some(custom_varule_ident) = custom_varule_ident {
return Ok(UnsizedFieldKind::Custom(typath, custom_varule_ident));
}
if typath.path.segments.len() != 1 {
return Err("Can only automatically detect corresponding VarULE types for \
path types with a single path segment".into());
}
let segment = typath.path.segments.first().unwrap();
match segment.arguments {
PathArguments::None => {
if segment.ident == "String" {
Ok(UnsizedFieldKind::Growable(OwnULETy::Str))
} else {
Err(PATH_TYPE_IDENTITY_ERROR.into())
}
}
PathArguments::AngleBracketed(ref params) => {
// At most one lifetime and exactly one generic parameter
let mut lifetime = None;
let mut generic = None;
for param in &params.args {
match param {
GenericArgument::Lifetime(ref lt) if lifetime.is_none() => {
lifetime = Some(lt)
}
GenericArgument::Type(ref ty) if generic.is_none() => {
generic = Some(ty)
}
_ => return Err(PATH_TYPE_GENERICS_ERROR.into()),
}
}
// Must be exactly one generic parameter
// (we've handled the zero generics case already)
let generic = if let Some(g) = generic {
g
} else {
return Err(PATH_TYPE_GENERICS_ERROR.into());
};
let ident = segment.ident.to_string();
if lifetime.is_some() {
match &*ident {
"ZeroVec" => Ok(UnsizedFieldKind::ZeroVec(generic)),
"VarZeroVec" => Ok(UnsizedFieldKind::VarZeroVec(generic)),
"Cow" => OwnULETy::new(generic, "Cow").map(UnsizedFieldKind::Cow),
"VarZeroCow" => OwnULETy::new(generic, "VarZeroCow").map(UnsizedFieldKind::VarZeroCow),
_ => Err(PATH_TYPE_IDENTITY_ERROR.into()),
}
} else {
match &*ident {
"Vec" => Ok(UnsizedFieldKind::Growable(OwnULETy::Slice(generic))),
"Box" => OwnULETy::new(generic, "Box").map(UnsizedFieldKind::Boxed),
_ => Err(PATH_TYPE_IDENTITY_ERROR.into()),
}
}
}
_ => Err("Can only automatically detect corresponding VarULE types for path types \
with none or angle bracketed generics".into()),
}
}
_ => Err("Can only automatically detect corresponding VarULE types for path and reference types".into()),
}
}
/// Get the tokens for the corresponding VarULE type
fn varule_ty(&self) -> TokenStream2 {
match *self {
Self::Ref(ref inner)
| Self::Cow(ref inner)
| Self::VarZeroCow(ref inner)
| Self::Boxed(ref inner)
| Self::Growable(ref inner) => {
let inner_ule = inner.varule_ty();
quote!(#inner_ule)
}
Self::Custom(_, ref name) => quote!(#name),
Self::ZeroVec(ref inner) => quote!(zerovec::ZeroSlice<#inner>),
Self::VarZeroVec(ref inner) => quote!(zerovec::VarZeroSlice<#inner>),
}
}
// Takes expr `value` and returns it as a value that can be encoded via EncodeAsVarULE
fn encodeable_value(&self, value: TokenStream2) -> TokenStream2 {
match *self {
Self::Ref(_)
| Self::Cow(_)
| Self::VarZeroCow(_)
| Self::Growable(_)
| Self::Boxed(_) => quote!(&*#value),
Self::Custom(..) => quote!(&#value),
Self::ZeroVec(_) | Self::VarZeroVec(_) => quote!(&*#value),
}
}
/// Returns the EncodeAsVarULE type this can be represented as, the same returned by encodeable_value()
fn encodeable_ty(&self) -> TokenStream2 {
match *self {
Self::Ref(ref inner)
| Self::Cow(ref inner)
| Self::VarZeroCow(ref inner)
| Self::Growable(ref inner)
| Self::Boxed(ref inner) => inner.varule_ty(),
Self::Custom(ref path, _) => quote!(#path),
Self::ZeroVec(ref ty) => quote!(zerovec::ZeroSlice<#ty>),
Self::VarZeroVec(ref ty) => quote!(zerovec::VarZeroSlice<#ty>),
}
}
fn has_zf(&self) -> bool {
matches!(
*self,
Self::Ref(_)
| Self::Cow(_)
| Self::VarZeroCow(_)
| Self::ZeroVec(_)
| Self::VarZeroVec(_)
| Self::Custom(..)
)
}
}
impl<'a> OwnULETy<'a> {
fn new(ty: &'a Type, context: &str) -> Result<Self, String> {
match *ty {
Type::Slice(ref slice) => Ok(OwnULETy::Slice(&slice.elem)),
Type::Path(ref typath) => {
if typath.path.is_ident("str") {
Ok(OwnULETy::Str)
} else {
Err(format!("Cannot automatically detect corresponding VarULE type for non-str path type inside a {context}"))
}
}
_ => Err(format!("Cannot automatically detect corresponding VarULE type for non-slice/path type inside a {context}")),
}
}
/// Get the tokens for the corresponding VarULE type
fn varule_ty(&self) -> TokenStream2 {
match *self {
OwnULETy::Slice(s) => quote!([#s]),
OwnULETy::Str => quote!(str),
}
}
}

110
vendor/zerovec-derive/src/ule.rs vendored Normal file
View File

@@ -0,0 +1,110 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use proc_macro2::TokenStream as TokenStream2;
use quote::quote;
use crate::utils::{self, FieldInfo};
use syn::spanned::Spanned;
use syn::{Data, DeriveInput, Error};
pub fn derive_impl(input: &DeriveInput) -> TokenStream2 {
if !utils::ReprInfo::compute(&input.attrs).cpacked_or_transparent() {
return Error::new(
input.span(),
"derive(ULE) must be applied to a #[repr(C, packed)] or #[repr(transparent)] type",
)
.to_compile_error();
}
if input.generics.type_params().next().is_some()
|| input.generics.lifetimes().next().is_some()
|| input.generics.const_params().next().is_some()
{
return Error::new(
input.generics.span(),
"derive(ULE) must be applied to a struct without any generics",
)
.to_compile_error();
}
let struc = if let Data::Struct(ref s) = input.data {
if s.fields.iter().next().is_none() {
return Error::new(
input.span(),
"derive(ULE) must be applied to a non-empty struct",
)
.to_compile_error();
}
s
} else {
return Error::new(input.span(), "derive(ULE) must be applied to a struct")
.to_compile_error();
};
let fields = FieldInfo::make_list(struc.fields.iter());
let (validators, remaining_offset) = generate_ule_validators(&fields);
let name = &input.ident;
// Safety (based on the safety checklist on the ULE trait):
// 1. #name does not include any uninitialized or padding bytes.
// (achieved by enforcing #[repr(transparent)] or #[repr(C, packed)] on a struct of only ULE types)
// 2. #name is aligned to 1 byte.
// (achieved by enforcing #[repr(transparent)] or #[repr(C, packed)] on a struct of only ULE types)
// 3. The impl of validate_bytes() returns an error if any byte is not valid.
// 4. The impl of validate_bytes() returns an error if there are extra bytes.
// 5. The other ULE methods use the default impl.
// 6. [This impl does not enforce the non-safety equality constraint, it is up to the user to do so, ideally via a custom derive]
quote! {
unsafe impl zerovec::ule::ULE for #name {
#[inline]
fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
const SIZE: usize = ::core::mem::size_of::<#name>();
#[expect(clippy::modulo_one)]
if bytes.len() % SIZE != 0 {
return Err(zerovec::ule::UleError::length::<Self>(bytes.len()));
}
// Validate the bytes
#[expect(clippy::indexing_slicing)] // We're slicing a chunk of known size
for chunk in bytes.chunks_exact(SIZE) {
#validators
debug_assert_eq!(#remaining_offset, SIZE);
}
Ok(())
}
}
}
}
/// Given an slice over ULE struct fields, returns code validating that a slice variable `bytes` contains valid instances of those ULE types
/// in order, plus the byte offset of any remaining unvalidated bytes. ULE types should not have any remaining bytes, but VarULE types will since
/// the last field is the unsized one.
pub(crate) fn generate_ule_validators(
fields: &[FieldInfo],
// (validators, remaining_offset)
) -> (TokenStream2, syn::Ident) {
utils::generate_per_field_offsets(fields, false, |field, prev_offset_ident, size_ident| {
let ty = &field.field.ty;
quote! {
if let Some(bytes) = bytes.get(#prev_offset_ident .. #prev_offset_ident + #size_ident) {
<#ty as zerovec::ule::ULE>::validate_bytes(bytes)?;
} else {
return Err(zerovec::ule::UleError::parse::<Self>());
}
}
})
}
/// Make corresponding ULE fields for each field
pub(crate) fn make_ule_fields(fields: &[FieldInfo]) -> Vec<TokenStream2> {
fields
.iter()
.map(|f| {
let ty = &f.field.ty;
let ty = quote!(<#ty as zerovec::ule::AsULE>::ULE);
let setter = f.setter();
let vis = &f.field.vis;
quote!(#vis #setter #ty)
})
.collect::<Vec<_>>()
}

403
vendor/zerovec-derive/src/utils.rs vendored Normal file
View File

@@ -0,0 +1,403 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use quote::{quote, ToTokens};
use proc_macro2::Span;
use proc_macro2::TokenStream as TokenStream2;
use syn::parse::{Parse, ParseStream};
use syn::punctuated::Punctuated;
use syn::spanned::Spanned;
use syn::{Attribute, Error, Field, Fields, Ident, Index, Result, Token};
#[derive(Default)]
pub struct ReprInfo {
pub c: bool,
pub transparent: bool,
pub u8: bool,
pub packed: bool,
}
impl ReprInfo {
pub fn compute(attrs: &[Attribute]) -> Self {
let mut info = ReprInfo::default();
for attr in attrs.iter().filter(|a| a.path().is_ident("repr")) {
if let Ok(pieces) = attr.parse_args::<IdentListAttribute>() {
for piece in pieces.idents.iter() {
if piece == "C" || piece == "c" {
info.c = true;
} else if piece == "transparent" {
info.transparent = true;
} else if piece == "packed" {
info.packed = true;
} else if piece == "u8" {
info.u8 = true;
}
}
}
}
info
}
pub fn cpacked_or_transparent(self) -> bool {
(self.c && self.packed) || self.transparent
}
}
// An attribute that is a list of idents
struct IdentListAttribute {
idents: Punctuated<Ident, Token![,]>,
}
impl Parse for IdentListAttribute {
fn parse(input: ParseStream) -> Result<Self> {
Ok(IdentListAttribute {
idents: input.parse_terminated(Ident::parse, Token![,])?,
})
}
}
/// Given a set of entries for struct field definitions to go inside a `struct {}` definition,
/// wrap in a () or {} based on the type of field
pub fn wrap_field_inits(streams: &[TokenStream2], fields: &Fields) -> TokenStream2 {
match *fields {
Fields::Named(_) => quote!( { #(#streams),* } ),
Fields::Unnamed(_) => quote!( ( #(#streams),* ) ),
Fields::Unit => {
unreachable!("#[make_(var)ule] should have already checked that there are fields")
}
}
}
/// Return a semicolon token if necessary after the struct definition
pub fn semi_for(f: &Fields) -> TokenStream2 {
if let Fields::Unnamed(..) = *f {
quote!(;)
} else {
quote!()
}
}
/// Returns the repr attribute to be applied to the resultant ULE or VarULE type
pub fn repr_for(f: &Fields) -> TokenStream2 {
if f.len() == 1 {
quote!(transparent)
} else {
quote!(C, packed)
}
}
fn suffixed_ident(name: &str, suffix: usize, s: Span) -> Ident {
Ident::new(&format!("{name}_{suffix}"), s)
}
/// Given an iterator over ULE or AsULE struct fields, returns code that calculates field sizes and generates a line
/// of code per field based on the per_field_code function (whose parameters are the field, the identifier of the const
/// for the previous offset, the identifier for the const for the next offset, and the field index)
pub(crate) fn generate_per_field_offsets<'a>(
fields: &[FieldInfo<'a>],
// Whether the fields are ULE types or AsULE (and need conversion)
fields_are_asule: bool,
// (field, prev_offset_ident, size_ident)
mut per_field_code: impl FnMut(&FieldInfo<'a>, &Ident, &Ident) -> TokenStream2, /* (code, remaining_offset) */
) -> (TokenStream2, syn::Ident) {
let mut prev_offset_ident = Ident::new("ZERO", Span::call_site());
let mut code = quote!(
const ZERO: usize = 0;
);
for (i, field_info) in fields.iter().enumerate() {
let field = &field_info.field;
let ty = &field.ty;
let ty = if fields_are_asule {
quote!(<#ty as zerovec::ule::AsULE>::ULE)
} else {
quote!(#ty)
};
let new_offset_ident = suffixed_ident("OFFSET", i, field.span());
let size_ident = suffixed_ident("SIZE", i, field.span());
let pf_code = per_field_code(field_info, &prev_offset_ident, &size_ident);
code = quote! {
#code;
const #size_ident: usize = ::core::mem::size_of::<#ty>();
const #new_offset_ident: usize = #prev_offset_ident + #size_ident;
#pf_code;
};
prev_offset_ident = new_offset_ident;
}
(code, prev_offset_ident)
}
#[derive(Clone, Debug)]
pub(crate) struct FieldInfo<'a> {
pub accessor: TokenStream2,
pub field: &'a Field,
pub index: usize,
}
impl<'a> FieldInfo<'a> {
pub fn make_list(iter: impl Iterator<Item = &'a Field>) -> Vec<Self> {
iter.enumerate()
.map(|(i, field)| Self::new_for_field(field, i))
.collect()
}
pub fn new_for_field(f: &'a Field, index: usize) -> Self {
if let Some(ref i) = f.ident {
FieldInfo {
accessor: quote!(#i),
field: f,
index,
}
} else {
let idx = Index::from(index);
FieldInfo {
accessor: quote!(#idx),
field: f,
index,
}
}
}
/// Get the code for setting this field in struct decl/brace syntax
///
/// Use self.accessor for dot-notation accesses
pub fn setter(&self) -> TokenStream2 {
if let Some(ref i) = self.field.ident {
quote!(#i: )
} else {
quote!()
}
}
/// Produce a name for a getter for the field
pub fn getter(&self) -> TokenStream2 {
if let Some(ref i) = self.field.ident {
quote!(#i)
} else {
suffixed_ident("field", self.index, self.field.span()).into_token_stream()
}
}
/// Produce a prose name for the field for use in docs
pub fn getter_doc_name(&self) -> String {
if let Some(ref i) = self.field.ident {
format!("the unsized `{i}` field")
} else {
format!("tuple struct field #{}", self.index)
}
}
}
/// Extracts all `zerovec::name(..)` attribute
pub fn extract_parenthetical_zerovec_attrs(
attrs: &mut Vec<Attribute>,
name: &str,
) -> Result<Vec<Ident>> {
let mut ret = vec![];
let mut error = None;
attrs.retain(|a| {
// skip the "zerovec" part
let second_segment = a.path().segments.iter().nth(1);
if let Some(second) = second_segment {
if second.ident == name {
let list = match a.parse_args::<IdentListAttribute>() {
Ok(l) => l,
Err(_) => {
error = Some(Error::new(
a.span(),
format!("#[zerovec::{name}(..)] takes in a comma separated list of identifiers"),
));
return false;
}
};
ret.extend(list.idents.iter().cloned());
return false;
}
}
true
});
if let Some(error) = error {
return Err(error);
}
Ok(ret)
}
pub fn extract_single_tt_attr(
attrs: &mut Vec<Attribute>,
name: &str,
) -> Result<Option<TokenStream2>> {
let mut ret = None;
let mut error = None;
attrs.retain(|a| {
// skip the "zerovec" part
let second_segment = a.path().segments.iter().nth(1);
if let Some(second) = second_segment {
if second.ident == name {
if ret.is_some() {
error = Some(Error::new(
a.span(),
"Can only specify a single VarZeroVecFormat via #[zerovec::format(..)]",
));
return false
}
ret = match a.parse_args::<TokenStream2>() {
Ok(l) => Some(l),
Err(_) => {
error = Some(Error::new(
a.span(),
format!("#[zerovec::{name}(..)] takes in a comma separated list of identifiers"),
));
return false;
}
};
return false;
}
}
true
});
if let Some(error) = error {
return Err(error);
}
Ok(ret)
}
/// Removes all attributes with `zerovec` in the name and places them in a separate vector
pub fn extract_zerovec_attributes(attrs: &mut Vec<Attribute>) -> Vec<Attribute> {
let mut ret = vec![];
attrs.retain(|a| {
if a.path().segments.len() == 2 && a.path().segments[0].ident == "zerovec" {
ret.push(a.clone());
return false;
}
true
});
ret
}
/// Extract attributes from field, and return them
///
/// Only current field attribute is `zerovec::varule(VarUleType)`
pub fn extract_field_attributes(attrs: &mut Vec<Attribute>) -> Result<Option<Ident>> {
let mut zerovec_attrs = extract_zerovec_attributes(attrs);
let varule = extract_parenthetical_zerovec_attrs(&mut zerovec_attrs, "varule")?;
if varule.len() > 1 {
return Err(Error::new(
varule[1].span(),
"Found multiple #[zerovec::varule()] on one field",
));
}
if !zerovec_attrs.is_empty() {
return Err(Error::new(
zerovec_attrs[1].span(),
"Found unusable #[zerovec::] attrs on field, only #[zerovec::varule()] supported",
));
}
Ok(varule.first().cloned())
}
#[derive(Default, Clone)]
pub struct ZeroVecAttrs {
pub skip_kv: bool,
pub skip_ord: bool,
pub skip_toowned: bool,
pub skip_from: bool,
pub serialize: bool,
pub deserialize: bool,
pub debug: bool,
pub hash: bool,
pub vzv_format: Option<TokenStream2>,
}
/// Removes all known zerovec:: attributes from struct attrs and validates them
pub fn extract_attributes_common(
attrs: &mut Vec<Attribute>,
span: Span,
is_var: bool,
) -> Result<ZeroVecAttrs> {
let mut zerovec_attrs = extract_zerovec_attributes(attrs);
let derive = extract_parenthetical_zerovec_attrs(&mut zerovec_attrs, "derive")?;
let skip = extract_parenthetical_zerovec_attrs(&mut zerovec_attrs, "skip_derive")?;
let format = extract_single_tt_attr(&mut zerovec_attrs, "format")?;
let name = if is_var { "make_varule" } else { "make_ule" };
if let Some(attr) = zerovec_attrs.first() {
return Err(Error::new(
attr.span(),
format!("Found unknown or duplicate attribute for #[{name}]"),
));
}
let mut attrs = ZeroVecAttrs::default();
for ident in derive {
if ident == "Serialize" {
attrs.serialize = true;
} else if ident == "Deserialize" {
attrs.deserialize = true;
} else if ident == "Debug" {
attrs.debug = true;
} else if ident == "Hash" {
attrs.hash = true;
} else {
return Err(Error::new(
ident.span(),
format!(
"Found unknown derive attribute for #[{name}]: #[zerovec::derive({ident})]"
),
));
}
}
for ident in skip {
if ident == "ZeroMapKV" {
attrs.skip_kv = true;
} else if ident == "Ord" {
attrs.skip_ord = true;
} else if ident == "ToOwned" && is_var {
attrs.skip_toowned = true;
} else if ident == "From" && is_var {
attrs.skip_from = true;
} else {
return Err(Error::new(
ident.span(),
format!("Found unknown derive attribute for #[{name}]: #[zerovec::skip_derive({ident})]"),
));
}
}
if let Some(ref format) = format {
if !is_var {
return Err(Error::new(
format.span(),
format!(
"Found unknown derive attribute for #[{name}]: #[zerovec::format({format})]"
),
));
}
}
attrs.vzv_format = format;
if (attrs.serialize || attrs.deserialize) && !is_var {
return Err(Error::new(
span,
"#[make_ule] does not support #[zerovec::derive(Serialize, Deserialize)]",
));
}
Ok(attrs)
}

129
vendor/zerovec-derive/src/varule.rs vendored Normal file
View File

@@ -0,0 +1,129 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use crate::utils::{self, FieldInfo};
use proc_macro2::Span;
use proc_macro2::TokenStream as TokenStream2;
use quote::quote;
use syn::spanned::Spanned;
use syn::{Data, DeriveInput, Error, Ident};
/// Implementation for derive(VarULE). `custom_varule_validator` validates the last field bytes `last_field_bytes`
/// if specified, if not, the VarULE implementation will be used.
pub fn derive_impl(
input: &DeriveInput,
custom_varule_validator: Option<TokenStream2>,
) -> TokenStream2 {
if !utils::ReprInfo::compute(&input.attrs).cpacked_or_transparent() {
return Error::new(
input.span(),
"derive(VarULE) must be applied to a #[repr(C, packed)] or #[repr(transparent)] type",
)
.to_compile_error();
}
if input.generics.type_params().next().is_some()
|| input.generics.lifetimes().next().is_some()
|| input.generics.const_params().next().is_some()
{
return Error::new(
input.generics.span(),
"derive(VarULE) must be applied to a struct without any generics",
)
.to_compile_error();
}
let struc = if let Data::Struct(ref s) = input.data {
if s.fields.iter().next().is_none() {
return Error::new(
input.span(),
"derive(VarULE) must be applied to a non-empty struct",
)
.to_compile_error();
}
s
} else {
return Error::new(input.span(), "derive(VarULE) must be applied to a struct")
.to_compile_error();
};
let n_fields = struc.fields.len();
let ule_fields = FieldInfo::make_list(struc.fields.iter().take(n_fields - 1));
let sizes = ule_fields.iter().map(|f| {
let ty = &f.field.ty;
quote!(::core::mem::size_of::<#ty>())
});
let (validators, remaining_offset) = if n_fields > 1 {
// generate ULE validators
crate::ule::generate_ule_validators(&ule_fields)
} else {
// no ULE subfields
(
quote!(
const ZERO: usize = 0;
),
Ident::new("ZERO", Span::call_site()),
)
};
let unsized_field = &struc
.fields
.iter()
.next_back()
.expect("Already verified that struct is not empty")
.ty;
let name = &input.ident;
let ule_size = Ident::new(
&format!("__IMPL_VarULE_FOR_{name}_ULE_SIZE"),
Span::call_site(),
);
let last_field_validator = if let Some(custom_varule_validator) = custom_varule_validator {
custom_varule_validator
} else {
quote!(<#unsized_field as zerovec::ule::VarULE>::validate_bytes(last_field_bytes)?;)
};
// Safety (based on the safety checklist on the ULE trait):
// 1. #name does not include any uninitialized or padding bytes
// (achieved by enforcing #[repr(transparent)] or #[repr(C, packed)] on a struct of only ULE types)
// 2. #name is aligned to 1 byte.
// (achieved by enforcing #[repr(transparent)] or #[repr(C, packed)] on a struct of only ULE types)
// 3. The impl of `validate_bytes()` returns an error if any byte is not valid.
// 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety
// 5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
// 6. The other VarULE methods use the default impl
// 7. [This impl does not enforce the non-safety equality constraint, it is up to the user to do so, ideally via a custom derive]
quote! {
// The size of the ULE section of this type
const #ule_size: usize = 0 #(+ #sizes)*;
unsafe impl zerovec::ule::VarULE for #name {
#[inline]
fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
debug_assert_eq!(#remaining_offset, #ule_size);
let Some(last_field_bytes) = bytes.get(#remaining_offset..) else {
return Err(zerovec::ule::UleError::parse::<Self>());
};
#validators
#last_field_validator
Ok(())
}
#[inline]
unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
// just the unsized part
// Safety: The invariants of this function allow us to assume bytes is valid, and
// having at least #ule_size bytes is a validity constraint for the ULE type.
let unsized_bytes = bytes.get_unchecked(#ule_size..);
let unsized_ref = <#unsized_field as zerovec::ule::VarULE>::from_bytes_unchecked(unsized_bytes);
// We should use the pointer metadata APIs here when they are stable: https://github.com/rust-lang/rust/issues/81513
// For now we rely on all DST metadata being a usize to extract it via a fake slice pointer
let (_ptr, metadata): (usize, usize) = ::core::mem::transmute(unsized_ref);
let entire_struct_as_slice: *const [u8] = ::core::slice::from_raw_parts(bytes.as_ptr(), metadata);
&*(entire_struct_as_slice as *const Self)
}
}
}
}