chore: checkpoint before Python removal

2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions
--- a/vendor/utf8_iter/src/indices.rs
+++ b/vendor/utf8_iter/src/indices.rs
@@ -0,0 +1,120 @@
+// The code in this file was adapted from the CharIndices implementation of
+// the Rust standard library at revision ab32548539ec38a939c1b58599249f3b54130026
+// (https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/library/core/src/str/iter.rs).
+//
+// Excerpt from https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/COPYRIGHT ,
+// which refers to
+// https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-APACHE
+// and
+// https://github.com/rust-lang/rust/blob/ab32548539ec38a939c1b58599249f3b54130026/LICENSE-MIT
+// :
+//
+// For full authorship information, see the version control history or
+// https://thanks.rust-lang.org
+//
+// Except as otherwise noted (below and/or in individual files), Rust is
+// licensed under the Apache License, Version 2.0 <LICENSE-APACHE> or
+// <http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT> or <http://opensource.org/licenses/MIT>, at your option.
+
+use super::Utf8Chars;
+use core::iter::FusedIterator;
+
+/// An iterator over the [`char`]s  and their positions.
+#[derive(Clone, Debug)]
+#[must_use = "iterators are lazy and do nothing unless consumed"]
+pub struct Utf8CharIndices<'a> {
+    front_offset: usize,
+    iter: Utf8Chars<'a>,
+}
+
+impl<'a> Iterator for Utf8CharIndices<'a> {
+    type Item = (usize, char);
+
+    #[inline]
+    fn next(&mut self) -> Option<(usize, char)> {
+        let pre_len = self.as_slice().len();
+        match self.iter.next() {
+            None => None,
+            Some(ch) => {
+                let index = self.front_offset;
+                let len = self.as_slice().len();
+                self.front_offset += pre_len - len;
+                Some((index, ch))
+            }
+        }
+    }
+
+    #[inline]
+    fn count(self) -> usize {
+        self.iter.count()
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.iter.size_hint()
+    }
+
+    #[inline]
+    fn last(mut self) -> Option<(usize, char)> {
+        // No need to go through the entire string.
+        self.next_back()
+    }
+}
+
+impl<'a> DoubleEndedIterator for Utf8CharIndices<'a> {
+    #[inline]
+    fn next_back(&mut self) -> Option<(usize, char)> {
+        self.iter.next_back().map(|ch| {
+            let index = self.front_offset + self.as_slice().len();
+            (index, ch)
+        })
+    }
+}
+
+impl FusedIterator for Utf8CharIndices<'_> {}
+
+impl<'a> Utf8CharIndices<'a> {
+    #[inline(always)]
+    /// Creates the iterator from a byte slice.
+    pub fn new(bytes: &'a [u8]) -> Self {
+        Utf8CharIndices::<'a> {
+            front_offset: 0,
+            iter: Utf8Chars::new(bytes),
+        }
+    }
+
+    /// Views the underlying data as a subslice of the original data.
+    ///
+    /// This has the same lifetime as the original slice, and so the
+    /// iterator can continue to be used while this exists.
+    #[must_use]
+    #[inline]
+    pub fn as_slice(&self) -> &'a [u8] {
+        self.iter.as_slice()
+    }
+
+    /// Returns the byte position of the next character, or the length
+    /// of the underlying string if there are no more characters.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use utf8_iter::Utf8CharsEx;
+    /// let mut chars = "a楽".as_bytes().char_indices();
+    ///
+    /// assert_eq!(chars.offset(), 0);
+    /// assert_eq!(chars.next(), Some((0, 'a')));
+    ///
+    /// assert_eq!(chars.offset(), 1);
+    /// assert_eq!(chars.next(), Some((1, '楽')));
+    ///
+    /// assert_eq!(chars.offset(), 4);
+    /// assert_eq!(chars.next(), None);
+    /// ```
+    #[inline]
+    #[must_use]
+    pub fn offset(&self) -> usize {
+        self.front_offset
+    }
+}
--- a/vendor/utf8_iter/src/lib.rs
+++ b/vendor/utf8_iter/src/lib.rs
@@ -0,0 +1,282 @@
+// Copyright Mozilla Foundation
+//
+// Licensed under the Apache License (Version 2.0), or the MIT license,
+// (the "Licenses") at your option. You may not use this file except in
+// compliance with one of the Licenses. You may obtain copies of the
+// Licenses at:
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//    https://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the Licenses is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the Licenses for the specific language governing permissions and
+// limitations under the Licenses.
+
+#![no_std]
+
+//! Provides iteration by `char` over `&[u8]` containing potentially-invalid
+//! UTF-8 such that errors are handled according to the [WHATWG Encoding
+//! Standard](https://encoding.spec.whatwg.org/#utf-8-decoder) (i.e. the same
+//! way as in `String::from_utf8_lossy`).
+//!
+//! The trait `Utf8CharsEx` provides the convenience method `chars()` on
+//! byte slices themselves instead of having to use the more verbose
+//! `Utf8Chars::new(slice)`.
+//!
+//! ```rust
+//! use utf8_iter::Utf8CharsEx;
+//! let data = b"\xFF\xC2\xE2\xE2\x98\xF0\xF0\x9F\xF0\x9F\x92\xE2\x98\x83";
+//! let from_iter: String = data.chars().collect();
+//! let from_std = String::from_utf8_lossy(data);
+//! assert_eq!(from_iter, from_std);
+//! ```
+
+mod indices;
+mod report;
+
+pub use crate::indices::Utf8CharIndices;
+pub use crate::report::ErrorReportingUtf8Chars;
+pub use crate::report::Utf8CharsError;
+use core::iter::FusedIterator;
+
+#[repr(align(64))] // Align to cache lines
+struct Utf8Data {
+    pub table: [u8; 384],
+}
+
+// This is generated code copied and pasted from utf_8.rs of encoding_rs.
+// Please don't edit by hand but instead regenerate as instructed in that
+// file.
+
+static UTF8_DATA: Utf8Data = Utf8Data {
+    table: [
+        252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
+        252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
+        252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
+        252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
+        252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
+        252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
+        252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
+        252, 252, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 148, 148, 148,
+        148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 148, 164, 164, 164, 164, 164,
+        164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, 164,
+        164, 164, 164, 164, 164, 164, 164, 164, 164, 252, 252, 252, 252, 252, 252, 252, 252, 252,
+        252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
+        252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
+        252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252, 252,
+        252, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+        4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 16, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 32, 8, 8, 64, 8, 8, 8, 128, 4,
+        4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
+    ],
+};
+
+// End manually copypasted generated code.
+
+#[inline(always)]
+fn in_inclusive_range8(i: u8, start: u8, end: u8) -> bool {
+    i.wrapping_sub(start) <= (end - start)
+}
+
+/// Iterator by `char` over `&[u8]` that contains
+/// potentially-invalid UTF-8. See the crate documentation.
+#[derive(Debug, Clone)]
+pub struct Utf8Chars<'a> {
+    remaining: &'a [u8],
+}
+
+impl<'a> Utf8Chars<'a> {
+    #[inline(always)]
+    /// Creates the iterator from a byte slice.
+    pub fn new(bytes: &'a [u8]) -> Self {
+        Utf8Chars::<'a> { remaining: bytes }
+    }
+
+    /// Views the current remaining data in the iterator as a subslice
+    /// of the original slice.
+    #[inline(always)]
+    pub fn as_slice(&self) -> &'a [u8] {
+        self.remaining
+    }
+
+    #[inline(never)]
+    fn next_fallback(&mut self) -> Option<char> {
+        if self.remaining.is_empty() {
+            return None;
+        }
+        let first = self.remaining[0];
+        if first < 0x80 {
+            self.remaining = &self.remaining[1..];
+            return Some(char::from(first));
+        }
+        if !in_inclusive_range8(first, 0xC2, 0xF4) || self.remaining.len() == 1 {
+            self.remaining = &self.remaining[1..];
+            return Some('\u{FFFD}');
+        }
+        let second = self.remaining[1];
+        let (lower_bound, upper_bound) = match first {
+            0xE0 => (0xA0, 0xBF),
+            0xED => (0x80, 0x9F),
+            0xF0 => (0x90, 0xBF),
+            0xF4 => (0x80, 0x8F),
+            _ => (0x80, 0xBF),
+        };
+        if !in_inclusive_range8(second, lower_bound, upper_bound) {
+            self.remaining = &self.remaining[1..];
+            return Some('\u{FFFD}');
+        }
+        if first < 0xE0 {
+            self.remaining = &self.remaining[2..];
+            let point = ((u32::from(first) & 0x1F) << 6) | (u32::from(second) & 0x3F);
+            return Some(unsafe { char::from_u32_unchecked(point) });
+        }
+        if self.remaining.len() == 2 {
+            self.remaining = &self.remaining[2..];
+            return Some('\u{FFFD}');
+        }
+        let third = self.remaining[2];
+        if !in_inclusive_range8(third, 0x80, 0xBF) {
+            self.remaining = &self.remaining[2..];
+            return Some('\u{FFFD}');
+        }
+        if first < 0xF0 {
+            self.remaining = &self.remaining[3..];
+            let point = ((u32::from(first) & 0xF) << 12)
+                | ((u32::from(second) & 0x3F) << 6)
+                | (u32::from(third) & 0x3F);
+            return Some(unsafe { char::from_u32_unchecked(point) });
+        }
+        // At this point, we have a valid 3-byte prefix of a
+        // four-byte sequence that has to be incomplete, because
+        // otherwise `next()` would have succeeded.
+        self.remaining = &self.remaining[3..];
+        Some('\u{FFFD}')
+    }
+}
+
+impl<'a> Iterator for Utf8Chars<'a> {
+    type Item = char;
+
+    #[inline]
+    fn next(&mut self) -> Option<char> {
+        // Not delegating directly to `ErrorReportingUtf8Chars` to avoid
+        // an extra branch in the common case based on a cursory inspection
+        // of generated code in a similar case. Be sure to inspect the
+        // generated code as inlined into an actual usage site carefully
+        // if attempting to consolidate the source code here.
+
+        // This loop is only broken out of as goto forward
+        #[allow(clippy::never_loop)]
+        loop {
+            if self.remaining.len() < 4 {
+                break;
+            }
+            let first = self.remaining[0];
+            if first < 0x80 {
+                self.remaining = &self.remaining[1..];
+                return Some(char::from(first));
+            }
+            let second = self.remaining[1];
+            if in_inclusive_range8(first, 0xC2, 0xDF) {
+                if !in_inclusive_range8(second, 0x80, 0xBF) {
+                    break;
+                }
+                let point = ((u32::from(first) & 0x1F) << 6) | (u32::from(second) & 0x3F);
+                self.remaining = &self.remaining[2..];
+                return Some(unsafe { char::from_u32_unchecked(point) });
+            }
+            // This table-based formulation was benchmark-based in encoding_rs,
+            // but it hasn't been re-benchmarked in this iterator context.
+            let third = self.remaining[2];
+            if first < 0xF0 {
+                if ((UTF8_DATA.table[usize::from(second)]
+                    & UTF8_DATA.table[usize::from(first) + 0x80])
+                    | (third >> 6))
+                    != 2
+                {
+                    break;
+                }
+                let point = ((u32::from(first) & 0xF) << 12)
+                    | ((u32::from(second) & 0x3F) << 6)
+                    | (u32::from(third) & 0x3F);
+                self.remaining = &self.remaining[3..];
+                return Some(unsafe { char::from_u32_unchecked(point) });
+            }
+            let fourth = self.remaining[3];
+            if (u16::from(
+                UTF8_DATA.table[usize::from(second)] & UTF8_DATA.table[usize::from(first) + 0x80],
+            ) | u16::from(third >> 6)
+                | (u16::from(fourth & 0xC0) << 2))
+                != 0x202
+            {
+                break;
+            }
+            let point = ((u32::from(first) & 0x7) << 18)
+                | ((u32::from(second) & 0x3F) << 12)
+                | ((u32::from(third) & 0x3F) << 6)
+                | (u32::from(fourth) & 0x3F);
+            self.remaining = &self.remaining[4..];
+            return Some(unsafe { char::from_u32_unchecked(point) });
+        }
+        self.next_fallback()
+    }
+}
+
+impl<'a> DoubleEndedIterator for Utf8Chars<'a> {
+    #[inline]
+    fn next_back(&mut self) -> Option<char> {
+        if self.remaining.is_empty() {
+            return None;
+        }
+        let mut attempt = 1;
+        for b in self.remaining.iter().rev() {
+            if b & 0xC0 != 0x80 {
+                let (head, tail) = self.remaining.split_at(self.remaining.len() - attempt);
+                let mut inner = Utf8Chars::new(tail);
+                let candidate = inner.next();
+                if inner.as_slice().is_empty() {
+                    self.remaining = head;
+                    return candidate;
+                }
+                break;
+            }
+            if attempt == 4 {
+                break;
+            }
+            attempt += 1;
+        }
+
+        self.remaining = &self.remaining[..self.remaining.len() - 1];
+        Some('\u{FFFD}')
+    }
+}
+
+impl FusedIterator for Utf8Chars<'_> {}
+
+/// Convenience trait that adds `chars()` and `char_indices()` methods
+/// similar to the ones on string slices to byte slices.
+pub trait Utf8CharsEx {
+    fn chars(&self) -> Utf8Chars<'_>;
+    fn char_indices(&self) -> Utf8CharIndices<'_>;
+}
+
+impl Utf8CharsEx for [u8] {
+    /// Convenience method for creating an UTF-8 iterator
+    /// for the slice.
+    #[inline]
+    fn chars(&self) -> Utf8Chars<'_> {
+        Utf8Chars::new(self)
+    }
+    /// Convenience method for creating a byte index and
+    /// UTF-8 iterator for the slice.
+    #[inline]
+    fn char_indices(&self) -> Utf8CharIndices<'_> {
+        Utf8CharIndices::new(self)
+    }
+}
+
+// No manually-written tests for forward-iteration, because the code passed multiple
+// days of fuzzing comparing with known-good behavior.
--- a/vendor/utf8_iter/src/report.rs
+++ b/vendor/utf8_iter/src/report.rs
@@ -0,0 +1,234 @@
+// Copyright Mozilla Foundation
+//
+// Licensed under the Apache License (Version 2.0), or the MIT license,
+// (the "Licenses") at your option. You may not use this file except in
+// compliance with one of the Licenses. You may obtain copies of the
+// Licenses at:
+//
+//    https://www.apache.org/licenses/LICENSE-2.0
+//    https://opensource.org/licenses/MIT
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the Licenses is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the Licenses for the specific language governing permissions and
+// limitations under the Licenses.
+
+use crate::in_inclusive_range8;
+use crate::UTF8_DATA;
+use core::fmt::Formatter;
+use core::iter::FusedIterator;
+
+/// A type for signaling UTF-8 errors.
+///
+/// Note: `core::error::Error` is not implemented due to implementing it
+/// being an [unstable feature][1] at the time of writing.
+///
+/// [1]: https://github.com/rust-lang/rust/issues/103765
+#[derive(Debug, PartialEq)]
+#[non_exhaustive]
+pub struct Utf8CharsError;
+
+impl core::fmt::Display for Utf8CharsError {
+    fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), core::fmt::Error> {
+        write!(f, "byte sequence not well-formed UTF-8")
+    }
+}
+
+/// Iterator by `Result<char,Utf8CharsError>` over `&[u8]` that contains
+/// potentially-invalid UTF-8. There is exactly one `Utf8CharsError` per
+/// each error as defined by the WHATWG Encoding Standard.
+///
+/// ```
+/// let s = b"a\xFFb\xFF\x80c\xF0\x9F\xA4\xA6\xF0\x9F\xA4\xF0\x9F\xF0d";
+/// let plain = utf8_iter::Utf8Chars::new(s);
+/// let reporting = utf8_iter::ErrorReportingUtf8Chars::new(s);
+/// assert!(plain.eq(reporting.map(|r| r.unwrap_or('\u{FFFD}'))));
+/// ```
+#[derive(Debug, Clone)]
+pub struct ErrorReportingUtf8Chars<'a> {
+    remaining: &'a [u8],
+}
+
+impl<'a> ErrorReportingUtf8Chars<'a> {
+    #[inline(always)]
+    /// Creates the iterator from a byte slice.
+    pub fn new(bytes: &'a [u8]) -> Self {
+        ErrorReportingUtf8Chars::<'a> { remaining: bytes }
+    }
+
+    /// Views the current remaining data in the iterator as a subslice
+    /// of the original slice.
+    #[inline(always)]
+    pub fn as_slice(&self) -> &'a [u8] {
+        self.remaining
+    }
+
+    #[inline(never)]
+    fn next_fallback(&mut self) -> Option<Result<char, Utf8CharsError>> {
+        if self.remaining.is_empty() {
+            return None;
+        }
+        let first = self.remaining[0];
+        if first < 0x80 {
+            self.remaining = &self.remaining[1..];
+            return Some(Ok(char::from(first)));
+        }
+        if !in_inclusive_range8(first, 0xC2, 0xF4) || self.remaining.len() == 1 {
+            self.remaining = &self.remaining[1..];
+            return Some(Err(Utf8CharsError));
+        }
+        let second = self.remaining[1];
+        let (lower_bound, upper_bound) = match first {
+            0xE0 => (0xA0, 0xBF),
+            0xED => (0x80, 0x9F),
+            0xF0 => (0x90, 0xBF),
+            0xF4 => (0x80, 0x8F),
+            _ => (0x80, 0xBF),
+        };
+        if !in_inclusive_range8(second, lower_bound, upper_bound) {
+            self.remaining = &self.remaining[1..];
+            return Some(Err(Utf8CharsError));
+        }
+        if first < 0xE0 {
+            self.remaining = &self.remaining[2..];
+            let point = ((u32::from(first) & 0x1F) << 6) | (u32::from(second) & 0x3F);
+            return Some(Ok(unsafe { char::from_u32_unchecked(point) }));
+        }
+        if self.remaining.len() == 2 {
+            self.remaining = &self.remaining[2..];
+            return Some(Err(Utf8CharsError));
+        }
+        let third = self.remaining[2];
+        if !in_inclusive_range8(third, 0x80, 0xBF) {
+            self.remaining = &self.remaining[2..];
+            return Some(Err(Utf8CharsError));
+        }
+        if first < 0xF0 {
+            self.remaining = &self.remaining[3..];
+            let point = ((u32::from(first) & 0xF) << 12)
+                | ((u32::from(second) & 0x3F) << 6)
+                | (u32::from(third) & 0x3F);
+            return Some(Ok(unsafe { char::from_u32_unchecked(point) }));
+        }
+        // At this point, we have a valid 3-byte prefix of a
+        // four-byte sequence that has to be incomplete, because
+        // otherwise `next()` would have succeeded.
+        self.remaining = &self.remaining[3..];
+        Some(Err(Utf8CharsError))
+    }
+}
+
+impl<'a> Iterator for ErrorReportingUtf8Chars<'a> {
+    type Item = Result<char, Utf8CharsError>;
+
+    #[inline]
+    fn next(&mut self) -> Option<Result<char, Utf8CharsError>> {
+        // This loop is only broken out of as goto forward
+        #[allow(clippy::never_loop)]
+        loop {
+            if self.remaining.len() < 4 {
+                break;
+            }
+            let first = self.remaining[0];
+            if first < 0x80 {
+                self.remaining = &self.remaining[1..];
+                return Some(Ok(char::from(first)));
+            }
+            let second = self.remaining[1];
+            if in_inclusive_range8(first, 0xC2, 0xDF) {
+                if !in_inclusive_range8(second, 0x80, 0xBF) {
+                    break;
+                }
+                let point = ((u32::from(first) & 0x1F) << 6) | (u32::from(second) & 0x3F);
+                self.remaining = &self.remaining[2..];
+                return Some(Ok(unsafe { char::from_u32_unchecked(point) }));
+            }
+            // This table-based formulation was benchmark-based in encoding_rs,
+            // but it hasn't been re-benchmarked in this iterator context.
+            let third = self.remaining[2];
+            if first < 0xF0 {
+                if ((UTF8_DATA.table[usize::from(second)]
+                    & UTF8_DATA.table[usize::from(first) + 0x80])
+                    | (third >> 6))
+                    != 2
+                {
+                    break;
+                }
+                let point = ((u32::from(first) & 0xF) << 12)
+                    | ((u32::from(second) & 0x3F) << 6)
+                    | (u32::from(third) & 0x3F);
+                self.remaining = &self.remaining[3..];
+                return Some(Ok(unsafe { char::from_u32_unchecked(point) }));
+            }
+            let fourth = self.remaining[3];
+            if (u16::from(
+                UTF8_DATA.table[usize::from(second)] & UTF8_DATA.table[usize::from(first) + 0x80],
+            ) | u16::from(third >> 6)
+                | (u16::from(fourth & 0xC0) << 2))
+                != 0x202
+            {
+                break;
+            }
+            let point = ((u32::from(first) & 0x7) << 18)
+                | ((u32::from(second) & 0x3F) << 12)
+                | ((u32::from(third) & 0x3F) << 6)
+                | (u32::from(fourth) & 0x3F);
+            self.remaining = &self.remaining[4..];
+            return Some(Ok(unsafe { char::from_u32_unchecked(point) }));
+        }
+        self.next_fallback()
+    }
+}
+
+impl<'a> DoubleEndedIterator for ErrorReportingUtf8Chars<'a> {
+    #[inline]
+    fn next_back(&mut self) -> Option<Result<char, Utf8CharsError>> {
+        if self.remaining.is_empty() {
+            return None;
+        }
+        let mut attempt = 1;
+        for b in self.remaining.iter().rev() {
+            if b & 0xC0 != 0x80 {
+                let (head, tail) = self.remaining.split_at(self.remaining.len() - attempt);
+                let mut inner = ErrorReportingUtf8Chars::new(tail);
+                let candidate = inner.next();
+                if inner.as_slice().is_empty() {
+                    self.remaining = head;
+                    return candidate;
+                }
+                break;
+            }
+            if attempt == 4 {
+                break;
+            }
+            attempt += 1;
+        }
+
+        self.remaining = &self.remaining[..self.remaining.len() - 1];
+        Some(Err(Utf8CharsError))
+    }
+}
+
+impl FusedIterator for ErrorReportingUtf8Chars<'_> {}
+
+#[cfg(test)]
+mod tests {
+    use crate::ErrorReportingUtf8Chars;
+
+    // Should be a static assert, but not taking a dependency for this.
+    #[test]
+    fn test_size() {
+        assert_eq!(
+            core::mem::size_of::<Option<<ErrorReportingUtf8Chars<'_> as Iterator>::Item>>(),
+            core::mem::size_of::<Option<char>>()
+        );
+    }
+
+    #[test]
+    fn test_eq() {
+        let a: <ErrorReportingUtf8Chars<'_> as Iterator>::Item = Ok('a');
+        let a_again: <ErrorReportingUtf8Chars<'_> as Iterator>::Item = Ok('a');
+        assert_eq!(a, a_again);
+    }
+}