Files
cli/vendor/jsonptr/src/token.rs

521 lines
22 KiB
Rust

use core::str::Split;
use crate::index::{Index, ParseIndexError};
use alloc::{
borrow::Cow,
fmt,
string::{String, ToString},
vec::Vec,
};
const ENCODED_TILDE: &[u8] = b"~0";
const ENCODED_SLASH: &[u8] = b"~1";
const ENC_PREFIX: u8 = b'~';
const TILDE_ENC: u8 = b'0';
const SLASH_ENC: u8 = b'1';
/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║ ║
║ Token ║
║ ¯¯¯¯¯¯¯ ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/
/// A `Token` is a segment of a JSON [`Pointer`](crate::Token), preceded by `'/'` (`%x2F`).
///
/// `Token`s can represent a key in a JSON object or an index in an array.
///
/// - Indexes should not contain leading zeros.
/// - When dealing with arrays or path expansion for assignment, `"-"` represent
/// the next, non-existent index in a JSON array.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct Token<'a> {
inner: Cow<'a, str>,
}
impl<'a> Token<'a> {
/// Constructs a `Token` from an RFC 6901 encoded string.
///
/// This is like [`Self::from_encoded`], except that no validation is
/// performed on the input string.
///
/// ## Safety
/// Input string must be RFC 6901 encoded.
pub(crate) unsafe fn from_encoded_unchecked(inner: impl Into<Cow<'a, str>>) -> Self {
Self {
inner: inner.into(),
}
}
/// Constructs a `Token` from an RFC 6901 encoded string.
///
/// To be valid, the string must not contain any `/` characters, and any `~`
/// characters must be followed by either `0` or `1`.
///
/// This function does not allocate.
///
/// # Examples
///
/// ```
/// # use jsonptr::Token;
/// assert_eq!(Token::from_encoded("~1foo~1~0bar").unwrap().decoded(), "/foo/~bar");
/// let err = Token::from_encoded("foo/oops~bar").unwrap_err();
/// assert_eq!(err.offset, 3);
/// ```
///
/// ## Errors
/// Returns `InvalidEncodingError` if the input string is not a valid RFC
/// 6901 (`~` must be followed by `0` or `1`)
pub fn from_encoded(s: &'a str) -> Result<Self, EncodingError> {
let mut escaped = false;
for (offset, b) in s.bytes().enumerate() {
match b {
b'/' => {
return Err(EncodingError {
offset,
source: InvalidEncoding::Slash,
})
}
ENC_PREFIX => {
escaped = true;
}
TILDE_ENC | SLASH_ENC if escaped => {
escaped = false;
}
_ => {
if escaped {
return Err(EncodingError {
offset,
source: InvalidEncoding::Tilde,
});
}
}
}
}
if escaped {
return Err(EncodingError {
offset: s.len(),
source: InvalidEncoding::Slash,
});
}
Ok(Self { inner: s.into() })
}
/// Constructs a `Token` from an arbitrary string.
///
/// If the string contains a `/` or a `~`, then it will be assumed not
/// encoded, in which case this function will encode it, allocating a new
/// string.
///
/// If the string is already encoded per RFC 6901, use
/// [`Self::from_encoded`] instead, otherwise it will end up double-encoded.
///
/// # Examples
///
/// ```
/// # use jsonptr::Token;
/// assert_eq!(Token::new("/foo/~bar").encoded(), "~1foo~1~0bar");
/// ```
pub fn new(s: impl Into<Cow<'a, str>>) -> Self {
let s = s.into();
if let Some(i) = s.bytes().position(|b| b == b'/' || b == b'~') {
let input = s.as_bytes();
// we could take advantage of [`Cow::into_owned`] here, but it would
// mean copying over the entire string, only to overwrite a portion
// of it... so instead we explicitly allocate a new buffer and copy
// only the prefix until the first encoded character
// NOTE: the output is at least as large as the input + 1, so we
// allocate that much capacity ahead of time
let mut bytes = Vec::with_capacity(input.len() + 1);
bytes.extend_from_slice(&input[..i]);
for &b in &input[i..] {
match b {
b'/' => {
bytes.extend_from_slice(ENCODED_SLASH);
}
b'~' => {
bytes.extend_from_slice(ENCODED_TILDE);
}
other => {
bytes.push(other);
}
}
}
Self {
// SAFETY: we started from a valid UTF-8 sequence of bytes,
// and only replaced some ASCII characters with other two ASCII
// characters, so the output is guaranteed valid UTF-8.
inner: Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) }),
}
} else {
Self { inner: s }
}
}
/// Converts into an owned copy of this token.
///
/// If the token is not already owned, this will clone the referenced string
/// slice.
pub fn into_owned(self) -> Token<'static> {
Token {
inner: Cow::Owned(self.inner.into_owned()),
}
}
/// Extracts an owned copy of this token.
///
/// If the token is not already owned, this will clone the referenced string
/// slice.
///
/// This method is like [`Self::into_owned`], except it doesn't take
/// ownership of the original `Token`.
pub fn to_owned(&self) -> Token<'static> {
Token {
inner: Cow::Owned(self.inner.clone().into_owned()),
}
}
/// Returns the encoded string representation of the `Token`.
///
/// # Examples
///
/// ```
/// # use jsonptr::Token;
/// assert_eq!(Token::new("~bar").encoded(), "~0bar");
/// ```
pub fn encoded(&self) -> &str {
&self.inner
}
/// Returns the decoded string representation of the `Token`.
///
/// # Examples
///
/// ```
/// # use jsonptr::Token;
/// assert_eq!(Token::new("~bar").decoded(), "~bar");
/// ```
pub fn decoded(&self) -> Cow<'_, str> {
if let Some(i) = self.inner.bytes().position(|b| b == ENC_PREFIX) {
let input = self.inner.as_bytes();
// we could take advantage of [`Cow::into_owned`] here, but it would
// mean copying over the entire string, only to overwrite a portion
// of it... so instead we explicitly allocate a new buffer and copy
// only the prefix until the first encoded character
// NOTE: the output is at least as large as the input + 1, so we
// allocate that much capacity ahead of time
let mut bytes = Vec::with_capacity(input.len() + 1);
bytes.extend_from_slice(&input[..i]);
// we start from the first escaped character
let mut escaped = true;
for &b in &input[i + 1..] {
match b {
ENC_PREFIX => {
escaped = true;
}
TILDE_ENC if escaped => {
bytes.push(b'~');
escaped = false;
}
SLASH_ENC if escaped => {
bytes.push(b'/');
escaped = false;
}
other => {
bytes.push(other);
}
}
}
// SAFETY: we start from a valid String, and only write valid UTF-8
// byte sequences into it.
Cow::Owned(unsafe { String::from_utf8_unchecked(bytes) })
} else {
// if there are no encoded characters, we don't need to allocate!
self.inner.clone()
}
}
/// Attempts to parse the given `Token` as an array index.
///
/// Per [RFC 6901](https://datatracker.ietf.org/doc/html/rfc6901#section-4),
/// the acceptable values are non-negative integers and the `-` character,
/// which stands for the next, non-existent member after the last array
/// element.
///
/// ## Examples
///
/// ```
/// # use jsonptr::{index::Index, Token};
/// assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
/// assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
/// assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
/// assert!(Token::new("a").to_index().is_err());
/// assert!(Token::new("-1").to_index().is_err());
/// ```
/// ## Errors
/// Returns [`ParseIndexError`] if the token is not a valid array index.
pub fn to_index(&self) -> Result<Index, ParseIndexError> {
self.try_into()
}
/// Returns if the `Token` is `-`, which stands for the next array index.
///
/// See also [`Self::to_index`].
pub fn is_next(&self) -> bool {
matches!(self.to_index(), Ok(Index::Next))
}
}
macro_rules! impl_from_num {
($($ty:ty),*) => {
$(
impl From<$ty> for Token<'static> {
fn from(v: $ty) -> Self {
// SAFETY: only used for integer types, which are always valid
unsafe { Token::from_encoded_unchecked(v.to_string()) }
}
}
)*
};
}
impl_from_num!(u8, u16, u32, u64, u128, usize, i8, i16, i32, i64, i128, isize);
impl<'a> From<&'a str> for Token<'a> {
fn from(value: &'a str) -> Self {
Token::new(value)
}
}
impl<'a> From<&'a String> for Token<'a> {
fn from(value: &'a String) -> Self {
Token::new(value)
}
}
impl From<String> for Token<'static> {
fn from(value: String) -> Self {
Token::new(value)
}
}
impl<'a> From<&Token<'a>> for Token<'a> {
fn from(value: &Token<'a>) -> Self {
value.clone()
}
}
impl alloc::fmt::Display for Token<'_> {
fn fmt(&self, f: &mut alloc::fmt::Formatter<'_>) -> alloc::fmt::Result {
write!(f, "{}", self.decoded())
}
}
/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║ ║
║ Tokens ║
║ ¯¯¯¯¯¯¯¯ ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/
/// An iterator over the [`Token`]s of a [`Pointer`](crate::Pointer).
#[derive(Debug)]
pub struct Tokens<'a> {
inner: Split<'a, char>,
}
impl<'a> Iterator for Tokens<'a> {
type Item = Token<'a>;
fn next(&mut self) -> Option<Self::Item> {
self.inner
.next()
// SAFETY: source pointer is encoded
.map(|s| unsafe { Token::from_encoded_unchecked(s) })
}
}
impl<'t> Tokens<'t> {
pub(crate) fn new(inner: Split<'t, char>) -> Self {
Self { inner }
}
}
/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║ ║
║ InvalidEncodingError ║
║ ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/
#[deprecated(since = "0.7.0", note = "renamed to `EncodingError`")]
/// Deprecated alias for [`EncodingError`].
pub type InvalidEncodingError = EncodingError;
/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║ ║
║ EncodingError ║
║ ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/
/// A token within a json pointer contained invalid encoding (`~` not followed
/// by `0` or `1`).
///
#[derive(Debug, PartialEq, Eq)]
pub struct EncodingError {
/// offset of the erroneous `~` from within the `Token`
pub offset: usize,
/// the specific encoding error
pub source: InvalidEncoding,
}
#[cfg(feature = "std")]
impl std::error::Error for EncodingError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
Some(&self.source)
}
}
impl fmt::Display for EncodingError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"token contains invalid encoding at offset {}",
self.offset
)
}
}
/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║ ║
║ InvalidEncoding ║
║ ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/
/// Represents the specific type of invalid encoding error.
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum InvalidEncoding {
/// `~` not followed by `0` or `1`
Tilde,
/// non-encoded `/` found in token
Slash,
}
impl fmt::Display for InvalidEncoding {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
InvalidEncoding::Tilde => write!(f, "tilde (~) not followed by 0 or 1"),
InvalidEncoding::Slash => write!(f, "slash (/) found in token"),
}
}
}
#[cfg(feature = "std")]
impl std::error::Error for InvalidEncoding {}
/*
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
╔══════════════════════════════════════════════════════════════════════════════╗
║ ║
║ Tests ║
║ ¯¯¯¯¯¯¯ ║
╚══════════════════════════════════════════════════════════════════════════════╝
░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░
*/
#[cfg(test)]
mod tests {
use crate::Pointer;
use super::*;
use quickcheck_macros::quickcheck;
#[test]
fn from() {
assert_eq!(Token::from("/").encoded(), "~1");
assert_eq!(Token::from("~/").encoded(), "~0~1");
assert_eq!(Token::from(34u32).encoded(), "34");
assert_eq!(Token::from(34u64).encoded(), "34");
assert_eq!(Token::from(String::from("foo")).encoded(), "foo");
assert_eq!(Token::from(&Token::new("foo")).encoded(), "foo");
}
#[test]
fn to_index() {
assert_eq!(Token::new("-").to_index(), Ok(Index::Next));
assert_eq!(Token::new("0").to_index(), Ok(Index::Num(0)));
assert_eq!(Token::new("2").to_index(), Ok(Index::Num(2)));
assert!(Token::new("a").to_index().is_err());
assert!(Token::new("-1").to_index().is_err());
}
#[test]
fn new() {
assert_eq!(Token::new("~1").encoded(), "~01");
assert_eq!(Token::new("a/b").encoded(), "a~1b");
}
#[test]
fn from_encoded() {
assert_eq!(Token::from_encoded("~1").unwrap().encoded(), "~1");
assert_eq!(Token::from_encoded("~0~1").unwrap().encoded(), "~0~1");
let t = Token::from_encoded("a~1b").unwrap();
assert_eq!(t.decoded(), "a/b");
assert!(Token::from_encoded("a/b").is_err());
assert!(Token::from_encoded("a~a").is_err());
}
#[test]
fn into_owned() {
let token = Token::from_encoded("foo~0").unwrap().into_owned();
assert_eq!(token.encoded(), "foo~0");
}
#[quickcheck]
fn encode_decode(s: String) -> bool {
let token = Token::new(s);
let decoded = Token::from_encoded(token.encoded()).unwrap();
token == decoded
}
#[test]
fn tokens() {
let pointer = Pointer::from_static("/a/b/c");
let tokens: Vec<Token> = pointer.tokens().collect();
assert_eq!(tokens, unsafe {
vec![
Token::from_encoded_unchecked("a"),
Token::from_encoded_unchecked("b"),
Token::from_encoded_unchecked("c"),
]
});
}
#[test]
fn is_next() {
let token = Token::new("-");
assert!(token.is_next());
let token = Token::new("0");
assert!(!token.is_next());
let token = Token::new("a");
assert!(!token.is_next());
let token = Token::new("");
assert!(!token.is_next());
}
}