cli/vendor/jsonptr/src/token.rs

use core::str::Split;

use crate::index::{Index, ParseIndexError};
use alloc::{
    borrow::Cow,
    fmt,
    string::{String, ToString},
    vec::Vec,
};

const ENCODED_TILDE: &[u8] = b"~0";
const ENCODED_SLASH: &[u8] = b"~1";

const ENC_PREFIX: u8 = b'~';
const TILDE_ENC: u8 = b'0';
const SLASH_ENC: u8 = b'1';

/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║                                                                              ║
║                                    Token                                     ║
║                                   ¯¯¯¯¯¯¯                                    ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/

/// A `Token` is a segment of a JSON [`Pointer`](crate::Token), preceded by `'/'` (`%x2F`).
///
/// `Token`s can represent a key in a JSON object or an index in an array.
///
/// - Indexes should not contain leading zeros.
/// - When dealing with arrays or path expansion for assignment, `"-"` represent
///   the next, non-existent index in a JSON array.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token<'a> {
    inner: Cow<'a, str>,
}

impl<'a> Token<'a> {
    /// Constructs a `Token` from an RFC 6901 encoded string.
    ///
    /// This is like [`Self::from_encoded`], except that no validation is
    /// performed on the input string.
    ///
    /// ## Safety
    /// Input string must be RFC 6901 encoded.
    pub(crate) unsafe fn from_encoded_unchecked(inner: impl Into<Cow<'a, str>>) -> Self {
        Self {
            inner: inner.into(),
        }
    }

    /// Constructs a `Token` from an RFC 6901 encoded string.
    ///
    /// To be valid, the string must not contain any `/` characters, and any `~`
    /// characters must be followed by either `0` or `1`.
    ///
    /// This function does not allocate.
    ///
    /// # Examples
    ///
    /// ```
    /// # use jsonptr::Token;
    /// assert_eq!(Token::from_encoded("~1foo~1~0bar").unwrap().decoded(), "/foo/~bar");
    /// let err = Token::from_encoded("foo/oops~bar").unwrap_err();
    /// assert_eq!(err.offset, 3);
    /// ```
    ///
    /// ## Errors
    /// Returns `InvalidEncodingError` if the input string is not a valid RFC
    /// 6901 (`~` must be followed by `0` or `1`)
    pub fn from_encoded(s: &'a str) -> Result<Self, EncodingError> {
        let mut escaped = false;
        for (offset, b) in s.bytes().enumerate() {
            match b {
                b'/' => {
                    return Err(EncodingError {
                        offset,
                        source: InvalidEncoding::Slash,
                    })
                }
                ENC_PREFIX => {
                    escaped = true;
                }
                TILDE_ENC | SLASH_ENC if escaped => {
                    escaped = false;
                }
                _ => {
                    if escaped {
                        return Err(EncodingError {
                            offset,
                            source: InvalidEncoding::Tilde,
                        });
                    }
                }
            }
        }
        if escaped {
            return Err(EncodingError {
                offset: s.len(),
                source: InvalidEncoding::Slash,
            });
        }
        Ok(Self { inner: s.into() })
    }

    /// Constructs a `Token` from an arbitrary string.
    ///
    /// If the string contains a `/` or a `~`, then it will be assumed not
    /// encoded, in which case this function will encode it, allocating a new
    /// string.
    ///
    /// If the string is already encoded per RFC 6901, use
    /// [`Self::from_encoded`] instead, otherwise it will end up double-encoded.
    ///
    /// # Examples
    ///
    /// ```
    /// # use jsonptr::Token;
    /// assert_eq!(Token::new("/foo/~bar").encoded(), "~1foo~1~0bar");
    /// ```
    pub fn new(s: impl Into<Cow<'a, str>>) -> Self {
        let s = s.into();

        if let Some(i) = s.bytes().position(|b| b == b'/' || b == b'~') {
            let input = s.as_bytes();
            // we could take advantage of [`Cow::into_owned`] here, but it would
            // mean copying over the entire string, only to overwrite a portion
            // of it... so instead we explicitly allocate a new buffer and copy
            // only the prefix until the first encoded character
            // NOTE: the output is at least as large as the input + 1, so we
            // allocate that much capacity ahead of time
            let mut bytes = Vec::with_capacity(input.len() + 1);
            bytes.extend_from_slice(&input[..i]);
            for &b in &input[i..] {
                match b {
                    b'/' => {
                        bytes.extend_from_slice(ENCODED_SLASH);
                    }
                    b'~' => {
                        bytes.extend_from_slice(ENCODED_TILDE);
                    }
                    other => {
                        bytes.push(other);
                    }
                }
            }
            Self {
                // SAFETY: we started from a valid UTF-8 sequence of bytes,
                // and only replaced some ASCII characters with other two ASCII
                // characters, so the output is guaranteed valid UTF-8.
                inner: Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }),
            }
        } else {
            Self { inner: s }
        }
    }

    /// Converts into an owned copy of this token.
    ///
    /// If the token is not already owned, this will clone the referenced string
    /// slice.
    pub fn into_owned(self) -> Token<'static> {
        Token {
            inner: Cow::Owned(self.inner.into_owned()),
        }
    }

    /// Extracts an owned copy of this token.
    ///
    /// If the token is not already owned, this will clone the referenced string
    /// slice.
    ///
    /// This method is like [`Self::into_owned`], except it doesn't take
    /// ownership of the original `Token`.
    pub fn to_owned(&self) -> Token<'static> {
        Token {
            inner: Cow::Owned(self.inner.clone().into_owned()),
        }
    }

    /// Returns the encoded string representation of the `Token`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use jsonptr::Token;
    /// assert_eq!(Token::new("~bar").encoded(), "~0bar");
    /// ```
    pub fn encoded(&self) -> &str {
        &self.inner
    }

    /// Returns the decoded string representation of the `Token`.
    ///
    /// # Examples
    ///
    /// ```
    /// # use jsonptr::Token;
    /// assert_eq!(Token::new("~bar").decoded(), "~bar");
    /// ```
    pub fn decoded(&self) -> Cow<'_, str> {
        if let Some(i) = self.inner.bytes().position(|b| b == ENC_PREFIX) {
            let input = self.inner.as_bytes();
            // we could take advantage of [`Cow::into_owned`] here, but it would
            // mean copying over the entire string, only to overwrite a portion
            // of it... so instead we explicitly allocate a new buffer and copy
            // only the prefix until the first encoded character
            // NOTE: the output is at least as large as the input + 1, so we
            // allocate that much capacity ahead of time
            let mut bytes = Vec::with_capacity(input.len() + 1);
            bytes.extend_from_slice(&input[..i]);
            // we start from the first escaped character
            let mut escaped = true;
            for &b in &input[i + 1..] {
                match b {
                    ENC_PREFIX => {
                        escaped = true;
                    }
                    TILDE_ENC if escaped => {
                        bytes.push(b'~');
                        escaped = false;
                    }
                    SLASH_ENC if escaped => {
                        bytes.push(b'/');
                        escaped = false;
                    }
                    other => {
                        bytes.push(other);
                    }
                }
            }
            // SAFETY: we start from a valid String, and only write valid UTF-8
            // byte sequences into it.
            Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
        } else {
            // if there are no encoded characters, we don't need to allocate!
            self.inner.clone()
        }
    }

    /// Attempts to parse the given `Token` as an array index.
    ///
    /// Per [RFC 6901](https://datatracker.ietf.org/doc/html/rfc6901#section-4),
    /// the acceptable values are non-negative integers and the `-` character,
    /// which stands for the next, non-existent member after the last array
    /// element.
    ///
    /// ## Examples
    ///
    /// ```
    /// # use jsonptr::{index::Index, Token};
    /// assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
    /// assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
    /// assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
    /// assert!(Token::new("a").to_index().is_err());
    /// assert!(Token::new("-1").to_index().is_err());
    /// ```
    /// ## Errors
    /// Returns [`ParseIndexError`] if the token is not a valid array index.
    pub fn to_index(&self) -> Result<Index, ParseIndexError> {
        self.try_into()
    }

    /// Returns if the `Token` is `-`, which stands for the next array index.
    ///
    /// See also [`Self::to_index`].
    pub fn is_next(&self) -> bool {
        matches!(self.to_index(), Ok(Index::Next))
    }
}

macro_rules! impl_from_num {
    ($($ty:ty),*) => {
        $(
            impl From<$ty> for Token<'static> {
                fn from(v: $ty) -> Self {
                    // SAFETY: only used for integer types, which are always valid
                    unsafe { Token::from_encoded_unchecked(v.to_string()) }
                }
            }
        )*
    };
}
impl_from_num!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize);

impl<'a> From<&'a str> for Token<'a> {
    fn from(value: &'a str) -> Self {
        Token::new(value)
    }
}

impl<'a> From<&'a String> for Token<'a> {
    fn from(value: &'a String) -> Self {
        Token::new(value)
    }
}

impl From<String> for Token<'static> {
    fn from(value: String) -> Self {
        Token::new(value)
    }
}

impl<'a> From<&Token<'a>> for Token<'a> {
    fn from(value: &Token<'a>) -> Self {
        value.clone()
    }
}

impl alloc::fmt::Display for Token<'_> {
    fn fmt(&self, f: &mut alloc::fmt::Formatter<'_>) -> alloc::fmt::Result {
        write!(f, "{}", self.decoded())
    }
}

/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║                                                                              ║
║                                    Tokens                                    ║
║                                   ¯¯¯¯¯¯¯¯                                   ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/

/// An iterator over the [`Token`]s of a [`Pointer`](crate::Pointer).
#[derive(Debug)]
pub struct Tokens<'a> {
    inner: Split<'a, char>,
}

impl<'a> Iterator for Tokens<'a> {
    type Item = Token<'a>;
    fn next(&mut self) -> Option<Self::Item> {
        self.inner
            .next()
            // SAFETY: source pointer is encoded
            .map(|s| unsafe { Token::from_encoded_unchecked(s) })
    }
}
impl<'t> Tokens<'t> {
    pub(crate) fn new(inner: Split<'t, char>) -> Self {
        Self { inner }
    }
}

/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║                                                                              ║
║                             InvalidEncodingError                             ║
║                            ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯                            ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/

#[deprecated(since = "0.7.0", note = "renamed to `EncodingError`")]
/// Deprecated alias for [`EncodingError`].
pub type InvalidEncodingError = EncodingError;

/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║                                                                              ║
║                                EncodingError                                 ║
║                               ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯                                ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/

/// A token within a json pointer contained invalid encoding (`~` not followed
/// by `0` or `1`).
///
#[derive(Debug, PartialEq, Eq)]
pub struct EncodingError {
    /// offset of the erroneous `~` from within the `Token`
    pub offset: usize,
    /// the specific encoding error
    pub source: InvalidEncoding,
}

#[cfg(feature = "std")]
impl std::error::Error for EncodingError {
    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
        Some(&self.source)
    }
}

impl fmt::Display for EncodingError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "token contains invalid encoding at offset {}",
            self.offset
        )
    }
}

/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║                                                                              ║
║                               InvalidEncoding                                ║
║                              ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯                               ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/

/// Represents the specific type of invalid encoding error.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum InvalidEncoding {
    /// `~` not followed by `0` or `1`
    Tilde,
    /// non-encoded `/` found in token
    Slash,
}

impl fmt::Display for InvalidEncoding {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            InvalidEncoding::Tilde => write!(f, "tilde (~) not followed by 0 or 1"),
            InvalidEncoding::Slash => write!(f, "slash (/) found in token"),
        }
    }
}
#[cfg(feature = "std")]
impl std::error::Error for InvalidEncoding {}

/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║                                                                              ║
║                                    Tests                                     ║
║                                   ¯¯¯¯¯¯¯                                    ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/

#[cfg(test)]
mod tests {
    use crate::Pointer;

    use super::*;
    use quickcheck_macros::quickcheck;

    #[test]
    fn from() {
        assert_eq!(Token::from("/").encoded(), "~1");
        assert_eq!(Token::from("~/").encoded(), "~0~1");
        assert_eq!(Token::from(34u32).encoded(), "34");
        assert_eq!(Token::from(34u64).encoded(), "34");
        assert_eq!(Token::from(String::from("foo")).encoded(), "foo");
        assert_eq!(Token::from(&Token::new("foo")).encoded(), "foo");
    }

    #[test]
    fn to_index() {
        assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
        assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
        assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
        assert!(Token::new("a").to_index().is_err());
        assert!(Token::new("-1").to_index().is_err());
    }

    #[test]
    fn new() {
        assert_eq!(Token::new("~1").encoded(), "~01");
        assert_eq!(Token::new("a/b").encoded(), "a~1b");
    }

    #[test]
    fn from_encoded() {
        assert_eq!(Token::from_encoded("~1").unwrap().encoded(), "~1");
        assert_eq!(Token::from_encoded("~0~1").unwrap().encoded(), "~0~1");
        let t = Token::from_encoded("a~1b").unwrap();
        assert_eq!(t.decoded(), "a/b");
        assert!(Token::from_encoded("a/b").is_err());
        assert!(Token::from_encoded("a~a").is_err());
    }

    #[test]
    fn into_owned() {
        let token = Token::from_encoded("foo~0").unwrap().into_owned();
        assert_eq!(token.encoded(), "foo~0");
    }

    #[quickcheck]
    fn encode_decode(s: String) -> bool {
        let token = Token::new(s);
        let decoded = Token::from_encoded(token.encoded()).unwrap();
        token == decoded
    }

    #[test]
    fn tokens() {
        let pointer = Pointer::from_static("/a/b/c");
        let tokens: Vec<Token> = pointer.tokens().collect();
        assert_eq!(tokens, unsafe {
            vec![
                Token::from_encoded_unchecked("a"),
                Token::from_encoded_unchecked("b"),
                Token::from_encoded_unchecked("c"),
            ]
        });
    }

    #[test]
    fn is_next() {
        let token = Token::new("-");
        assert!(token.is_next());
        let token = Token::new("0");
        assert!(!token.is_next());
        let token = Token::new("a");
        assert!(!token.is_next());
        let token = Token::new("");
        assert!(!token.is_next());
    }
}