chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

33
vendor/potential_utf/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,33 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
#![cfg_attr(not(any(test, doc)), no_std)]
#![cfg_attr(
not(test),
deny(
clippy::indexing_slicing,
clippy::unwrap_used,
clippy::expect_used,
clippy::panic,
clippy::exhaustive_structs,
clippy::exhaustive_enums,
clippy::trivially_copy_pass_by_ref,
missing_debug_implementations,
)
)]
//! A crate providing unvalidated string and character types.
#[cfg(feature = "alloc")]
extern crate alloc;
mod uchar;
mod ustr;
pub use uchar::PotentialCodePoint;
pub use ustr::PotentialUtf16;
pub use ustr::PotentialUtf8;
#[cfg(feature = "writeable")]
mod writeable;

375
vendor/potential_utf/src/uchar.rs vendored Normal file
View File

@@ -0,0 +1,375 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use core::cmp::Ordering;
use core::fmt;
/// A 24-bit numeric data type that is expected to be a Unicode scalar value, but is not
/// validated as such.
///
/// Use this type instead of `char` when you want to deal with data that is expected to be valid
/// Unicode scalar values, but you want control over when or if you validate that assumption.
///
/// # Examples
///
/// ```
/// use potential_utf::PotentialCodePoint;
///
/// assert_eq!(PotentialCodePoint::from_u24(0x68).try_to_char(), Ok('h'));
/// assert_eq!(PotentialCodePoint::from_char('i').try_to_char(), Ok('i'));
/// assert_eq!(
/// PotentialCodePoint::from_u24(0x1F44B).try_to_char(),
/// Ok('👋')
/// );
///
/// assert!(PotentialCodePoint::from_u24(0xDE01).try_to_char().is_err());
/// assert_eq!(
/// PotentialCodePoint::from_u24(0xDE01).to_char_lossy(),
/// char::REPLACEMENT_CHARACTER
/// );
/// ```
#[repr(transparent)]
#[allow(clippy::exhaustive_structs)] // transparent newtype
#[derive(PartialEq, Eq, Clone, Copy, Hash)]
pub struct PotentialCodePoint([u8; 3]);
impl PotentialCodePoint {
/// Create a [`PotentialCodePoint`] from a `char`.
///
/// # Examples
///
/// ```
/// use potential_utf::PotentialCodePoint;
///
/// let a = PotentialCodePoint::from_char('a');
/// assert_eq!(a.try_to_char().unwrap(), 'a');
/// ```
#[inline]
pub const fn from_char(c: char) -> Self {
let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
Self([u0, u1, u2])
}
/// Create [`PotentialCodePoint`] from a u32 value, ignoring the most significant 8 bits.
#[inline]
pub const fn from_u24(c: u32) -> Self {
let [u0, u1, u2, _u3] = c.to_le_bytes();
Self([u0, u1, u2])
}
/// Attempt to convert a [`PotentialCodePoint`] to a `char`.
///
/// # Examples
///
/// ```
/// use potential_utf::PotentialCodePoint;
/// use zerovec::ule::AsULE;
///
/// let a = PotentialCodePoint::from_char('a');
/// assert_eq!(a.try_to_char(), Ok('a'));
///
/// let b = PotentialCodePoint::from_unaligned([0xFF, 0xFF, 0xFF].into());
/// assert!(b.try_to_char().is_err());
/// ```
#[inline]
pub fn try_to_char(self) -> Result<char, core::char::CharTryFromError> {
char::try_from(u32::from(self))
}
/// Convert a [`PotentialCodePoint`] to a `char', returning [`char::REPLACEMENT_CHARACTER`]
/// if the `PotentialCodePoint` does not represent a valid Unicode scalar value.
///
/// # Examples
///
/// ```
/// use potential_utf::PotentialCodePoint;
/// use zerovec::ule::AsULE;
///
/// let a = PotentialCodePoint::from_unaligned([0xFF, 0xFF, 0xFF].into());
/// assert_eq!(a.to_char_lossy(), char::REPLACEMENT_CHARACTER);
/// ```
#[inline]
pub fn to_char_lossy(self) -> char {
self.try_to_char().unwrap_or(char::REPLACEMENT_CHARACTER)
}
/// Convert a [`PotentialCodePoint`] to a `char` without checking that it is
/// a valid Unicode scalar value.
///
/// # Safety
///
/// The `PotentialCodePoint` must be a valid Unicode scalar value in little-endian order.
///
/// # Examples
///
/// ```
/// use potential_utf::PotentialCodePoint;
///
/// let a = PotentialCodePoint::from_char('a');
/// assert_eq!(unsafe { a.to_char_unchecked() }, 'a');
/// ```
#[inline]
pub unsafe fn to_char_unchecked(self) -> char {
char::from_u32_unchecked(u32::from(self))
}
/// For converting to the ULE type in a const context
///
/// Can be removed once const traits are a thing
#[inline]
#[cfg(feature = "zerovec")]
pub const fn to_unaligned(self) -> zerovec::ule::RawBytesULE<3> {
zerovec::ule::RawBytesULE(self.0)
}
}
/// This impl requires enabling the optional `zerovec` Cargo feature
#[cfg(feature = "zerovec")]
impl zerovec::ule::AsULE for PotentialCodePoint {
type ULE = zerovec::ule::RawBytesULE<3>;
#[inline]
fn to_unaligned(self) -> Self::ULE {
zerovec::ule::RawBytesULE(self.0)
}
#[inline]
fn from_unaligned(unaligned: Self::ULE) -> Self {
Self(unaligned.0)
}
}
// Safety: PotentialCodePoint is always the little-endian representation of a char,
// which corresponds to its AsULE::ULE type
/// This impl requires enabling the optional `zerovec` Cargo feature
#[cfg(feature = "zerovec")]
unsafe impl zerovec::ule::EqULE for PotentialCodePoint {}
impl fmt::Debug for PotentialCodePoint {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Debug as a char if possible
match self.try_to_char() {
Ok(c) => fmt::Debug::fmt(&c, f),
Err(_) => fmt::Debug::fmt(&self.0, f),
}
}
}
impl PartialOrd for PotentialCodePoint {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}
impl PartialEq<char> for PotentialCodePoint {
fn eq(&self, other: &char) -> bool {
self.eq(&Self::from_char(*other))
}
}
impl PartialOrd<char> for PotentialCodePoint {
fn partial_cmp(&self, other: &char) -> Option<Ordering> {
self.partial_cmp(&Self::from_char(*other))
}
}
impl PartialEq<PotentialCodePoint> for char {
fn eq(&self, other: &PotentialCodePoint) -> bool {
PotentialCodePoint::from_char(*self).eq(other)
}
}
impl PartialOrd<PotentialCodePoint> for char {
fn partial_cmp(&self, other: &PotentialCodePoint) -> Option<Ordering> {
PotentialCodePoint::from_char(*self).partial_cmp(other)
}
}
impl Ord for PotentialCodePoint {
// custom implementation, as derived Ord would compare lexicographically
fn cmp(&self, other: &Self) -> Ordering {
let a = u32::from(*self);
let b = u32::from(*other);
a.cmp(&b)
}
}
impl From<PotentialCodePoint> for u32 {
fn from(x: PotentialCodePoint) -> Self {
let [a0, a1, a2] = x.0;
u32::from_le_bytes([a0, a1, a2, 0])
}
}
impl TryFrom<u32> for PotentialCodePoint {
type Error = ();
fn try_from(x: u32) -> Result<Self, ()> {
let [u0, u1, u2, u3] = x.to_le_bytes();
if u3 != 0 {
return Err(());
}
Ok(Self([u0, u1, u2]))
}
}
impl From<char> for PotentialCodePoint {
#[inline]
fn from(value: char) -> Self {
Self::from_char(value)
}
}
impl TryFrom<PotentialCodePoint> for char {
type Error = core::char::CharTryFromError;
#[inline]
fn try_from(value: PotentialCodePoint) -> Result<char, Self::Error> {
value.try_to_char()
}
}
/// This impl requires enabling the optional `serde` Cargo feature
#[cfg(feature = "serde")]
impl serde_core::Serialize for PotentialCodePoint {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde_core::Serializer,
{
use serde_core::ser::Error;
let c = self
.try_to_char()
.map_err(|_| S::Error::custom("invalid Unicode scalar value in PotentialCodePoint"))?;
if serializer.is_human_readable() {
serializer.serialize_char(c)
} else {
self.0.serialize(serializer)
}
}
}
/// This impl requires enabling the optional `serde` Cargo feature
#[cfg(feature = "serde")]
impl<'de> serde_core::Deserialize<'de> for PotentialCodePoint {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde_core::Deserializer<'de>,
{
if deserializer.is_human_readable() {
let c = <char>::deserialize(deserializer)?;
Ok(PotentialCodePoint::from_char(c))
} else {
let bytes = <[u8; 3]>::deserialize(deserializer)?;
Ok(PotentialCodePoint(bytes))
}
}
}
/// This impl requires enabling the optional `databake` Cargo feature
#[cfg(feature = "databake")]
impl databake::Bake for PotentialCodePoint {
fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
match self.try_to_char() {
Ok(ch) => {
env.insert("potential_utf");
let ch = ch.bake(env);
databake::quote! {
potential_utf::PotentialCodePoint::from_char(#ch)
}
}
Err(_) => {
env.insert("potential_utf");
let u24 = u32::from_le_bytes([self.0[0], self.0[1], self.0[2], 0]);
databake::quote! {
potential_utf::PotentialCodePoint::from_u24(#u24)
}
}
}
}
}
#[cfg(test)]
mod test {
use super::*;
use zerovec::ZeroVec;
#[test]
fn test_serde_fail() {
let uc = PotentialCodePoint([0xFF, 0xFF, 0xFF]);
serde_json::to_string(&uc).expect_err("serialize invalid char bytes");
bincode::serialize(&uc).expect_err("serialize invalid char bytes");
}
#[test]
fn test_serde_json() {
let c = '🙃';
let uc = PotentialCodePoint::from_char(c);
let json_ser = serde_json::to_string(&uc).unwrap();
assert_eq!(json_ser, r#""🙃""#);
let json_de: PotentialCodePoint = serde_json::from_str(&json_ser).unwrap();
assert_eq!(uc, json_de);
}
#[test]
fn test_serde_bincode() {
let c = '🙃';
let uc = PotentialCodePoint::from_char(c);
let bytes_ser = bincode::serialize(&uc).unwrap();
assert_eq!(bytes_ser, [0x43, 0xF6, 0x01]);
let bytes_de: PotentialCodePoint = bincode::deserialize(&bytes_ser).unwrap();
assert_eq!(uc, bytes_de);
}
#[test]
fn test_representation() {
let chars = ['w', 'ω', '文', '𑄃', '🙃'];
// backed by [PotentialCodePoint]
let uvchars: Vec<_> = chars
.iter()
.copied()
.map(PotentialCodePoint::from_char)
.collect();
// backed by [RawBytesULE<3>]
let zvec: ZeroVec<_> = uvchars.clone().into_iter().collect();
let ule_bytes = zvec.as_bytes();
let uvbytes;
unsafe {
let ptr = &uvchars[..] as *const _ as *const u8;
uvbytes = core::slice::from_raw_parts(ptr, ule_bytes.len());
}
// PotentialCodePoint is defined as little-endian, so this must be true on all platforms
// also asserts that to_unaligned/from_unaligned are no-ops
assert_eq!(uvbytes, ule_bytes);
assert_eq!(
&[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
ule_bytes
);
}
#[test]
fn test_char_bake() {
databake::test_bake!(
PotentialCodePoint,
const,
crate::PotentialCodePoint::from_char('b'),
potential_utf
);
// surrogate code point
databake::test_bake!(
PotentialCodePoint,
const,
crate::PotentialCodePoint::from_u24(55296u32),
potential_utf
);
}
}

278
vendor/potential_utf/src/ustr.rs vendored Normal file
View File

@@ -0,0 +1,278 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
#[cfg(feature = "alloc")]
use alloc::boxed::Box;
use core::cmp::Ordering;
use core::fmt;
use core::ops::Deref;
/// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant.
///
/// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For
/// example, strings that are keys of a map don't need to ever be reified as `str`s.
///
/// [`PotentialUtf8`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`].
///
/// The main advantage of this type over `[u8]` is that it serializes as a string in
/// human-readable formats like JSON.
///
/// # Examples
///
/// Using an [`PotentialUtf8`] as the key of a [`ZeroMap`]:
///
/// ```
/// use potential_utf::PotentialUtf8;
/// use zerovec::ZeroMap;
///
/// // This map is cheap to deserialize, as we don't need to perform UTF-8 validation.
/// let map: ZeroMap<PotentialUtf8, u8> = [
/// (PotentialUtf8::from_bytes(b"abc"), 11),
/// (PotentialUtf8::from_bytes(b"def"), 22),
/// (PotentialUtf8::from_bytes(b"ghi"), 33),
/// ]
/// .into_iter()
/// .collect();
///
/// let key = "abc";
/// let value = map.get_copied(PotentialUtf8::from_str(key));
/// assert_eq!(Some(11), value);
/// ```
///
/// [`ZeroMap`]: zerovec::ZeroMap
#[repr(transparent)]
#[derive(PartialEq, Eq, PartialOrd, Ord)]
#[allow(clippy::exhaustive_structs)] // transparent newtype
pub struct PotentialUtf8(pub [u8]);
impl fmt::Debug for PotentialUtf8 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Debug as a string if possible
match self.try_as_str() {
Ok(s) => fmt::Debug::fmt(s, f),
Err(_) => fmt::Debug::fmt(&self.0, f),
}
}
}
impl PotentialUtf8 {
/// Create a [`PotentialUtf8`] from a byte slice.
#[inline]
pub const fn from_bytes(other: &[u8]) -> &Self {
// Safety: PotentialUtf8 is transparent over [u8]
unsafe { core::mem::transmute(other) }
}
/// Create a [`PotentialUtf8`] from a string slice.
#[inline]
pub const fn from_str(s: &str) -> &Self {
Self::from_bytes(s.as_bytes())
}
/// Create a [`PotentialUtf8`] from boxed bytes.
///
/// ✨ *Enabled with the `alloc` Cargo feature.*
#[inline]
#[cfg(feature = "alloc")]
pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> {
// Safety: PotentialUtf8 is transparent over [u8]
unsafe { core::mem::transmute(other) }
}
/// Create a [`PotentialUtf8`] from a boxed `str`.
///
/// ✨ *Enabled with the `alloc` Cargo feature.*
#[inline]
#[cfg(feature = "alloc")]
pub fn from_boxed_str(other: Box<str>) -> Box<Self> {
Self::from_boxed_bytes(other.into_boxed_bytes())
}
/// Get the bytes from a [`PotentialUtf8].
#[inline]
pub const fn as_bytes(&self) -> &[u8] {
&self.0
}
/// Attempt to convert a [`PotentialUtf8`] to a `str`.
///
/// # Examples
///
/// ```
/// use potential_utf::PotentialUtf8;
///
/// static A: &PotentialUtf8 = PotentialUtf8::from_bytes(b"abc");
///
/// let b = A.try_as_str().unwrap();
/// assert_eq!(b, "abc");
/// ```
// Note: this is const starting in 1.63
#[inline]
pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> {
core::str::from_utf8(&self.0)
}
}
impl<'a> From<&'a str> for &'a PotentialUtf8 {
#[inline]
fn from(other: &'a str) -> Self {
PotentialUtf8::from_str(other)
}
}
impl PartialEq<str> for PotentialUtf8 {
fn eq(&self, other: &str) -> bool {
self.eq(Self::from_str(other))
}
}
impl PartialOrd<str> for PotentialUtf8 {
fn partial_cmp(&self, other: &str) -> Option<Ordering> {
self.partial_cmp(Self::from_str(other))
}
}
impl PartialEq<PotentialUtf8> for str {
fn eq(&self, other: &PotentialUtf8) -> bool {
PotentialUtf8::from_str(self).eq(other)
}
}
impl PartialOrd<PotentialUtf8> for str {
fn partial_cmp(&self, other: &PotentialUtf8) -> Option<Ordering> {
PotentialUtf8::from_str(self).partial_cmp(other)
}
}
#[cfg(feature = "alloc")]
impl From<Box<str>> for Box<PotentialUtf8> {
#[inline]
fn from(other: Box<str>) -> Self {
PotentialUtf8::from_boxed_str(other)
}
}
impl Deref for PotentialUtf8 {
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.0
}
}
/// This impl requires enabling the optional `zerovec` Cargo feature
#[cfg(all(feature = "zerovec", feature = "alloc"))]
impl<'a> zerovec::maps::ZeroMapKV<'a> for PotentialUtf8 {
type Container = zerovec::VarZeroVec<'a, PotentialUtf8>;
type Slice = zerovec::VarZeroSlice<PotentialUtf8>;
type GetType = PotentialUtf8;
type OwnedType = Box<PotentialUtf8>;
}
// Safety (based on the safety checklist on the VarULE trait):
// 1. PotentialUtf8 does not include any uninitialized or padding bytes (transparent over a ULE)
// 2. PotentialUtf8 is aligned to 1 byte (transparent over a ULE)
// 3. The impl of `validate_bytes()` returns an error if any byte is not valid (impossible)
// 4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (impossible)
// 5. The impl of `from_bytes_unchecked()` returns a reference to the same data (returns the argument directly)
// 6. All other methods are defaulted
// 7. `[T]` byte equality is semantic equality (transparent over a ULE)
/// This impl requires enabling the optional `zerovec` Cargo feature
#[cfg(feature = "zerovec")]
unsafe impl zerovec::ule::VarULE for PotentialUtf8 {
#[inline]
fn validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError> {
Ok(())
}
#[inline]
unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
PotentialUtf8::from_bytes(bytes)
}
}
/// This impl requires enabling the optional `serde` Cargo feature
#[cfg(feature = "serde")]
impl serde_core::Serialize for PotentialUtf8 {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde_core::Serializer,
{
use serde_core::ser::Error;
let s = self
.try_as_str()
.map_err(|_| S::Error::custom("invalid UTF-8 in PotentialUtf8"))?;
if serializer.is_human_readable() {
serializer.serialize_str(s)
} else {
serializer.serialize_bytes(s.as_bytes())
}
}
}
/// This impl requires enabling the optional `serde` Cargo feature
#[cfg(all(feature = "serde", feature = "alloc"))]
impl<'de> serde_core::Deserialize<'de> for Box<PotentialUtf8> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde_core::Deserializer<'de>,
{
if deserializer.is_human_readable() {
let boxed_str = Box::<str>::deserialize(deserializer)?;
Ok(PotentialUtf8::from_boxed_str(boxed_str))
} else {
let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?;
Ok(PotentialUtf8::from_boxed_bytes(boxed_bytes))
}
}
}
/// This impl requires enabling the optional `serde` Cargo feature
#[cfg(feature = "serde")]
impl<'de, 'a> serde_core::Deserialize<'de> for &'a PotentialUtf8
where
'de: 'a,
{
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde_core::Deserializer<'de>,
{
if deserializer.is_human_readable() {
let s = <&str>::deserialize(deserializer)?;
Ok(PotentialUtf8::from_str(s))
} else {
let bytes = <&[u8]>::deserialize(deserializer)?;
Ok(PotentialUtf8::from_bytes(bytes))
}
}
}
#[repr(transparent)]
#[derive(PartialEq, Eq, PartialOrd, Ord)]
#[allow(clippy::exhaustive_structs)] // transparent newtype
pub struct PotentialUtf16(pub [u16]);
impl fmt::Debug for PotentialUtf16 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Debug as a string if possible
for c in char::decode_utf16(self.0.iter().copied()) {
match c {
Ok(c) => write!(f, "{c}")?,
Err(e) => write!(f, "\\0x{:x}", e.unpaired_surrogate())?,
}
}
Ok(())
}
}
impl PotentialUtf16 {
/// Create a [`PotentialUtf16`] from a u16 slice.
#[inline]
pub const fn from_slice(other: &[u16]) -> &Self {
// Safety: PotentialUtf16 is transparent over [u16]
unsafe { core::mem::transmute(other) }
}
pub fn chars(&self) -> impl Iterator<Item = char> + '_ {
char::decode_utf16(self.0.iter().copied()).map(|c| c.unwrap_or(char::REPLACEMENT_CHARACTER))
}
}

128
vendor/potential_utf/src/writeable.rs vendored Normal file
View File

@@ -0,0 +1,128 @@
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use crate::{PotentialUtf16, PotentialUtf8};
use core::fmt::Write;
use writeable::{LengthHint, Part, PartsWrite, TryWriteable};
use core::{char::DecodeUtf16Error, fmt, str::Utf8Error};
/// This impl requires enabling the optional `writeable` Cargo feature
impl TryWriteable for &'_ PotentialUtf8 {
type Error = Utf8Error;
fn try_write_to_parts<S: PartsWrite + ?Sized>(
&self,
sink: &mut S,
) -> Result<Result<(), Self::Error>, fmt::Error> {
let mut remaining = &self.0;
let mut r = Ok(());
loop {
match core::str::from_utf8(remaining) {
Ok(valid) => {
sink.write_str(valid)?;
return Ok(r);
}
Err(e) => {
// SAFETY: By Utf8Error invariants
let valid = unsafe {
core::str::from_utf8_unchecked(remaining.get_unchecked(..e.valid_up_to()))
};
sink.write_str(valid)?;
sink.with_part(Part::ERROR, |s| s.write_char(char::REPLACEMENT_CHARACTER))?;
if r.is_ok() {
r = Err(e);
}
let Some(error_len) = e.error_len() else {
return Ok(r); // end of string
};
// SAFETY: By Utf8Error invariants
remaining = unsafe { remaining.get_unchecked(e.valid_up_to() + error_len..) }
}
}
}
}
fn writeable_length_hint(&self) -> LengthHint {
// Lower bound is all valid UTF-8, upper bound is all bytes with the high bit, which become replacement characters.
LengthHint::between(self.0.len(), self.0.len() * 3)
}
}
/// This impl requires enabling the optional `writeable` Cargo feature
impl TryWriteable for &'_ PotentialUtf16 {
type Error = DecodeUtf16Error;
fn try_write_to_parts<S: PartsWrite + ?Sized>(
&self,
sink: &mut S,
) -> Result<Result<(), Self::Error>, fmt::Error> {
let mut r = Ok(());
for c in core::char::decode_utf16(self.0.iter().copied()) {
match c {
Ok(c) => sink.write_char(c)?,
Err(e) => {
if r.is_ok() {
r = Err(e);
}
sink.with_part(Part::ERROR, |s| s.write_char(char::REPLACEMENT_CHARACTER))?;
}
}
}
Ok(r)
}
fn writeable_length_hint(&self) -> LengthHint {
// Lower bound is all ASCII, upper bound is all 3-byte code points (including replacement character)
LengthHint::between(self.0.len(), self.0.len() * 3)
}
}
#[cfg(test)]
mod test {
#![allow(invalid_from_utf8)] // only way to construct the error
use super::*;
use writeable::assert_try_writeable_parts_eq;
#[test]
fn test_utf8() {
assert_try_writeable_parts_eq!(
PotentialUtf8::from_bytes(b"Foo Bar"),
"Foo Bar",
Ok(()),
[]
);
assert_try_writeable_parts_eq!(
PotentialUtf8::from_bytes(b"Foo\xFDBar"),
"Foo<EFBFBD>Bar",
Err(core::str::from_utf8(b"Foo\xFDBar").unwrap_err()),
[(3, 6, Part::ERROR)]
);
assert_try_writeable_parts_eq!(
PotentialUtf8::from_bytes(b"Foo\xFDBar\xff"),
"Foo<EFBFBD>Bar<EFBFBD>",
Err(core::str::from_utf8(b"Foo\xFDBar\xff").unwrap_err()),
[(3, 6, Part::ERROR), (9, 12, Part::ERROR)],
);
}
#[test]
fn test_utf16() {
assert_try_writeable_parts_eq!(
PotentialUtf16::from_slice(&[0xD83E, 0xDD73]),
"🥳",
Ok(()),
[]
);
assert_try_writeable_parts_eq!(
PotentialUtf16::from_slice(&[0xD83E, 0x20, 0xDD73]),
"<EFBFBD> <20>",
Err(core::char::decode_utf16([0xD83E].into_iter())
.next()
.unwrap()
.unwrap_err()),
[(0, 3, Part::ERROR), (4, 7, Part::ERROR)]
);
}
}