chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

1234
vendor/iri-string/src/build.rs vendored Normal file

File diff suppressed because it is too large Load Diff

267
vendor/iri-string/src/components.rs vendored Normal file
View File

@@ -0,0 +1,267 @@
//! Components of IRIs.
mod authority;
use core::num::NonZeroUsize;
use core::ops::{Range, RangeFrom, RangeTo};
use crate::parser::trusted as trusted_parser;
use crate::spec::Spec;
use crate::types::RiReferenceStr;
pub use self::authority::AuthorityComponents;
/// Positions to split an IRI into components.
#[derive(Debug, Clone, Copy)]
pub(crate) struct Splitter {
/// Scheme end.
scheme_end: Option<NonZeroUsize>,
/// Authority end.
///
/// Note that absence of the authority and the empty authority is
/// distinguished.
authority_end: Option<NonZeroUsize>,
/// Query start (after the leading `?`).
query_start: Option<NonZeroUsize>,
/// Fragment start (after the leading `#`).
fragment_start: Option<NonZeroUsize>,
}
impl Splitter {
/// Creates a new splitter.
#[inline]
#[must_use]
pub(crate) fn new(
scheme_end: Option<NonZeroUsize>,
authority_end: Option<NonZeroUsize>,
query_start: Option<NonZeroUsize>,
fragment_start: Option<NonZeroUsize>,
) -> Self {
Self {
scheme_end,
authority_end,
query_start,
fragment_start,
}
}
/// Decomposes an IRI into five major components: scheme, authority, path, query, and fragment.
#[must_use]
fn split_into_major(
self,
s: &str,
) -> (Option<&str>, Option<&str>, &str, Option<&str>, Option<&str>) {
let (scheme, next_of_scheme) = match self.scheme_end {
// +1: ":".len()
Some(end) => (Some(&s[..end.get()]), end.get() + 1),
None => (None, 0),
};
let (authority, next_of_authority) = match self.authority_end {
// +2: "//".len()
Some(end) => (Some(&s[(next_of_scheme + 2)..end.get()]), end.get()),
None => (None, next_of_scheme),
};
let (fragment, end_of_prev_of_fragment) = match self.fragment_start {
// -1: "#".len()
Some(start) => (Some(&s[start.get()..]), start.get() - 1),
None => (None, s.len()),
};
let (query, end_of_path) = match self.query_start {
Some(start) => (
Some(&s[start.get()..end_of_prev_of_fragment]),
// -1: "?".len()
start.get() - 1,
),
None => (None, end_of_prev_of_fragment),
};
let path = &s[next_of_authority..end_of_path];
(scheme, authority, path, query, fragment)
}
/// Returns the range for the scheme part.
#[inline]
#[must_use]
fn scheme_range(self) -> Option<RangeTo<usize>> {
self.scheme_end.map(|end| ..end.get())
}
/// Returns the scheme as a string.
#[inline]
#[must_use]
pub(crate) fn scheme_str<'a>(&self, s: &'a str) -> Option<&'a str> {
self.scheme_range().map(|range| &s[range])
}
/// Returns true if the IRI has a scheme part, false otherwise.
#[inline]
#[must_use]
pub(crate) fn has_scheme(&self) -> bool {
self.scheme_end.is_some()
}
/// Returns the range for the authority part.
#[inline]
#[must_use]
fn authority_range(self) -> Option<Range<usize>> {
let end = self.authority_end?.get();
// 2: "//".len()
// +3: "://".len()
let start = self.scheme_end.map_or(2, |v| v.get() + 3);
Some(start..end)
}
/// Returns the authority as a string.
#[inline]
#[must_use]
pub(crate) fn authority_str<'a>(&self, s: &'a str) -> Option<&'a str> {
self.authority_range().map(|range| &s[range])
}
/// Returns true if the IRI has an authority part, false otherwise.
#[inline]
#[must_use]
pub(crate) fn has_authority(&self) -> bool {
self.authority_end.is_some()
}
/// Returns the range for the path part.
#[inline]
#[must_use]
fn path_range(self, full_len: usize) -> Range<usize> {
// -1: "?".len() and "#".len()
let end = self
.query_start
.or(self.fragment_start)
.map_or(full_len, |v| v.get() - 1);
let start = self.authority_end.map_or_else(
// +1: ":".len()
|| self.scheme_end.map_or(0, |v| v.get() + 1),
NonZeroUsize::get,
);
start..end
}
/// Returns the path as a string.
#[inline]
#[must_use]
pub(crate) fn path_str<'a>(&self, s: &'a str) -> &'a str {
&s[self.path_range(s.len())]
}
/// Returns true if the path part of the IRI is empty.
#[inline]
#[must_use]
pub(crate) fn is_path_empty(&self, full_len: usize) -> bool {
self.path_range(full_len).is_empty()
}
/// Returns the range for the query part excluding a prefix `?`.
#[inline]
#[must_use]
fn query_range(self, full_len: usize) -> Option<Range<usize>> {
let start = self.query_start?.get();
// -1: "#".len()
let end = self.fragment_start.map_or(full_len, |v| v.get() - 1);
Some(start..end)
}
/// Returns the query as a string.
#[inline]
#[must_use]
pub(crate) fn query_str<'a>(&self, s: &'a str) -> Option<&'a str> {
self.query_range(s.len()).map(|range| &s[range])
}
/// Returns true if the IRI has a query part, false otherwise.
#[inline]
#[must_use]
pub(crate) fn has_query(&self) -> bool {
self.query_start.is_some()
}
/// Returns the range for the fragment part excluding a prefix `#`.
#[inline]
#[must_use]
pub(crate) fn fragment_range(self) -> Option<RangeFrom<usize>> {
self.fragment_start.map(|v| v.get()..)
}
/// Returns the fragment as a string.
#[inline]
#[must_use]
pub(crate) fn fragment_str<'a>(&self, s: &'a str) -> Option<&'a str> {
self.fragment_range().map(|range| &s[range])
}
}
/// Components of an IRI reference.
///
/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.2.2>.
#[derive(Debug, Clone, Copy)]
pub(crate) struct RiReferenceComponents<'a, S: Spec> {
/// Original complete string.
pub(crate) iri: &'a RiReferenceStr<S>,
/// Positions to split the IRI into components.
pub(crate) splitter: Splitter,
}
impl<'a, S: Spec> RiReferenceComponents<'a, S> {
/// Returns five major components: scheme, authority, path, query, and fragment.
#[inline]
#[must_use]
pub(crate) fn to_major(
self,
) -> (
Option<&'a str>,
Option<&'a str>,
&'a str,
Option<&'a str>,
Option<&'a str>,
) {
self.splitter.split_into_major(self.iri.as_str())
}
/// Returns the IRI reference.
#[inline]
#[must_use]
pub(crate) fn iri(&self) -> &'a RiReferenceStr<S> {
self.iri
}
/// Returns the scheme as a string.
#[inline]
#[must_use]
pub(crate) fn scheme_str(&self) -> Option<&str> {
self.splitter.scheme_str(self.iri.as_str())
}
/// Returns the authority as a string.
#[inline]
#[must_use]
pub(crate) fn authority_str(&self) -> Option<&str> {
self.splitter.authority_str(self.iri.as_str())
}
/// Returns the path as a string.
#[inline]
#[must_use]
pub(crate) fn path_str(&self) -> &str {
self.splitter.path_str(self.iri.as_str())
}
/// Returns the query as a string.
#[inline]
#[must_use]
pub(crate) fn query_str(&self) -> Option<&str> {
self.splitter.query_str(self.iri.as_str())
}
}
impl<'a, S: Spec> From<&'a RiReferenceStr<S>> for RiReferenceComponents<'a, S> {
#[inline]
fn from(s: &'a RiReferenceStr<S>) -> Self {
trusted_parser::decompose_iri_reference(s)
}
}

View File

@@ -0,0 +1,121 @@
//! Subcomponents of authority.
use crate::parser::trusted as trusted_parser;
use crate::spec::Spec;
use crate::types::RiReferenceStr;
/// Subcomponents of authority.
///
/// This is a return type of the `authority_components` method of the string
/// types (for example [`RiStr::authority_components`].
///
/// [`RiStr::authority_components`]: `crate::types::RiStr::authority_components`
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct AuthorityComponents<'a> {
/// Authority string, excluding the leading `//`.
pub(crate) authority: &'a str,
/// Start position of the `host`.
pub(crate) host_start: usize,
/// End position of the `host`.
pub(crate) host_end: usize,
}
impl<'a> AuthorityComponents<'a> {
/// Creates a new `AuthorityComponents` from the IRI.
pub fn from_iri<S: Spec>(iri: &'a RiReferenceStr<S>) -> Option<Self> {
iri.authority_str()
.map(trusted_parser::authority::decompose_authority)
}
/// Returns the `userinfo` part, excluding the following `@`.
#[must_use]
pub fn userinfo(&self) -> Option<&'a str> {
let userinfo_at = self.host_start.checked_sub(1)?;
debug_assert_eq!(self.authority.as_bytes()[userinfo_at], b'@');
Some(&self.authority[..userinfo_at])
}
/// Returns the `host` part.
#[inline]
#[must_use]
pub fn host(&self) -> &'a str {
// NOTE: RFC 6874 support may need the internal logic to change.
&self.authority[self.host_start..self.host_end]
}
/// Returns the `port` part, excluding the following `:`.
#[must_use]
pub fn port(&self) -> Option<&'a str> {
if self.host_end == self.authority.len() {
return None;
}
let port_colon = self.host_end;
debug_assert_eq!(self.authority.as_bytes()[port_colon], b':');
Some(&self.authority[(port_colon + 1)..])
}
}
#[cfg(test)]
#[cfg(feature = "alloc")]
mod tests {
use super::*;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
use crate::types::IriReferenceStr;
const USERINFO: &[&str] = &["", "user:password", "user"];
const PORT: &[&str] = &[
"",
"0",
"0000",
"80",
"1234567890123456789012345678901234567890",
];
const HOST: &[&str] = &[
"",
"localhost",
"example.com",
"192.0.2.0",
"[2001:db8::1]",
"[2001:0db8:0:0:0:0:0:1]",
"[2001:0db8::192.0.2.255]",
"[v9999.this-is-futuristic-ip-address]",
];
fn compose_to_relative_iri(userinfo: Option<&str>, host: &str, port: Option<&str>) -> String {
let mut buf = String::from("//");
if let Some(userinfo) = userinfo {
buf.push_str(userinfo);
buf.push('@');
}
buf.push_str(host);
if let Some(port) = port {
buf.push(':');
buf.push_str(port);
}
buf
}
#[test]
fn test_decompose_authority() {
for host in HOST.iter().copied() {
for userinfo in USERINFO.iter().map(|s| Some(*s)).chain(None) {
for port in PORT.iter().map(|s| Some(*s)).chain(None) {
let authority = compose_to_relative_iri(userinfo, host, port);
let authority =
IriReferenceStr::new(&authority).expect("test case should be valid");
let components = AuthorityComponents::from_iri(authority)
.expect("relative path composed for this test should contain authority");
assert_eq!(components.host(), host);
assert_eq!(components.userinfo(), userinfo);
assert_eq!(components.port(), port);
}
}
}
}
}

291
vendor/iri-string/src/convert.rs vendored Normal file
View File

@@ -0,0 +1,291 @@
//! Conversion between URI/IRI types.
use core::fmt;
#[cfg(feature = "alloc")]
use alloc::collections::TryReserveError;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
#[cfg(feature = "alloc")]
use crate::format::{ToDedicatedString, ToStringFallible};
use crate::spec::Spec;
use crate::types::{
RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr,
};
#[cfg(feature = "alloc")]
use crate::types::{
RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, RiRelativeString,
RiString,
};
#[cfg(feature = "alloc")]
use crate::types::{
UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString, UriRelativeString,
UriString,
};
/// Hexadecimal digits for a nibble.
const HEXDIGITS: [u8; 16] = [
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E', b'F',
];
/// A resource identifier mapped to a URI of some kind.
///
/// Supported `Src` type are:
///
/// * IRIs:
/// + [`IriAbsoluteStr`] (alias of `RiAbsoluteStr<IriSpec>`)
/// + [`IriReferenceStr`] (alias of `RiReferenceStr<IriSpec>`)
/// + [`IriRelativeStr`] (alias of `RiRelativeStr<IriSpec>`)
/// + [`IriStr`] (alias of `RiStr<IriSpec>`)
/// * URIs:
/// + [`UriAbsoluteStr`] (alias of `RiAbsoluteStr<UriSpec>`)
/// + [`UriReferenceStr`] (alias of `RiReferenceStr<UriSpec>`)
/// + [`UriRelativeStr`] (alias of `RiRelativeStr<UriSpec>`)
/// + [`UriStr`] (alias of `RiStr<UriSpec>`)
///
/// # Examples
///
/// ```
/// use iri_string::convert::MappedToUri;
/// use iri_string::types::{IriStr, UriStr};
///
/// let src = IriStr::new("http://example.com/?alpha=\u{03B1}")?;
/// // The type is `MappedToUri<IriStr>`, but you usually don't need to specify.
/// let mapped = MappedToUri::from(src).to_string();
/// assert_eq!(mapped, "http://example.com/?alpha=%CE%B1");
/// # Ok::<_, iri_string::validate::Error>(())
/// ```
///
/// [`IriAbsoluteStr`]: crate::types::IriAbsoluteStr
/// [`IriReferenceStr`]: crate::types::IriReferenceStr
/// [`IriRelativeStr`]: crate::types::IriRelativeStr
/// [`IriStr`]: crate::types::IriStr
/// [`UriAbsoluteStr`]: crate::types::UriAbsoluteStr
/// [`UriReferenceStr`]: crate::types::UriReferenceStr
/// [`UriRelativeStr`]: crate::types::UriRelativeStr
/// [`UriStr`]: crate::types::UriStr
#[derive(Debug, Clone, Copy)]
pub struct MappedToUri<'a, Src: ?Sized>(&'a Src);
/// Implement conversions for an IRI string type.
macro_rules! impl_for_iri {
($borrowed:ident, $owned:ident, $owned_uri:ident) => {
impl<S: Spec> fmt::Display for MappedToUri<'_, $borrowed<S>> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write_percent_encoded(f, self.0.as_str())
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> ToDedicatedString for MappedToUri<'_, $borrowed<S>> {
type Target = $owned_uri;
fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> {
let s = self.try_to_string()?;
Ok(TryFrom::try_from(s)
.expect("[validity] the IRI must be encoded into a valid URI"))
}
}
impl<'a, S: Spec> From<&'a $borrowed<S>> for MappedToUri<'a, $borrowed<S>> {
#[inline]
fn from(iri: &'a $borrowed<S>) -> Self {
Self(iri)
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a $owned<S>> for MappedToUri<'a, $borrowed<S>> {
#[inline]
fn from(iri: &'a $owned<S>) -> Self {
Self(iri.as_slice())
}
}
};
}
impl_for_iri!(RiReferenceStr, RiReferenceString, UriReferenceString);
impl_for_iri!(RiStr, RiString, UriString);
impl_for_iri!(RiAbsoluteStr, RiAbsoluteString, UriAbsoluteString);
impl_for_iri!(RiRelativeStr, RiRelativeString, UriRelativeString);
impl_for_iri!(RiQueryStr, RiQueryString, UriQueryString);
impl_for_iri!(RiFragmentStr, RiFragmentString, UriFragmentString);
/// Percent-encodes and writes the IRI string using the given buffer.
fn write_percent_encoded(f: &mut fmt::Formatter<'_>, mut s: &str) -> fmt::Result {
while !s.is_empty() {
// Skip ASCII characters.
let non_ascii_pos = s.bytes().position(|b| !b.is_ascii()).unwrap_or(s.len());
let (ascii, rest) = s.split_at(non_ascii_pos);
if !ascii.is_empty() {
f.write_str(ascii)?;
s = rest;
}
if s.is_empty() {
return Ok(());
}
// Search for the next ASCII character.
let nonascii_end = s.bytes().position(|b| b.is_ascii()).unwrap_or(s.len());
let (nonasciis, rest) = s.split_at(nonascii_end);
debug_assert!(
!nonasciis.is_empty(),
"string without non-ASCII characters should have caused early return"
);
s = rest;
// Escape non-ASCII characters as percent-encoded bytes.
//
// RFC 3987 (section 3.1 step 2) says "for each character in
// 'ucschar' or 'iprivate'", but this simply means "for each
// non-ASCII characters" since any non-ASCII characters that can
// appear in an IRI match `ucschar` or `iprivate`.
/// Number of source bytes to encode at once.
const NUM_BYTES_AT_ONCE: usize = 21;
percent_encode_bytes(f, nonasciis, &mut [0_u8; NUM_BYTES_AT_ONCE * 3])?;
}
Ok(())
}
/// Percent-encode the string and pass the encoded chunks to the given function.
///
/// `buf` is used as a temporary working buffer. It is initialized by this
/// function, so users can pass any mutable byte slice with enough size.
///
/// # Precondition
///
/// The length of `buf` must be 3 bytes or more.
fn percent_encode_bytes(f: &mut fmt::Formatter<'_>, s: &str, buf: &mut [u8]) -> fmt::Result {
/// Fill the buffer by percent-encoded bytes.
///
/// Note that this function applies percent-encoding to every characters,
/// even if it is ASCII alphabet.
///
/// # Precondition
///
/// * The length of `buf` must be 3 bytes or more.
/// * All of the `buf[i * 3]` elements should already be set to `b'%'`.
// This function have many preconditions and I don't want checks for them
// to be mandatory, so make this nested inner function.
fn fill_by_percent_encoded<'a>(buf: &'a mut [u8], bytes: &mut core::str::Bytes<'_>) -> &'a str {
let src_len = bytes.len();
// `<[u8; N]>::array_chunks_mut` is unstable as of Rust 1.58.1.
for (dest, byte) in buf.chunks_exact_mut(3).zip(bytes.by_ref()) {
debug_assert_eq!(
dest.len(),
3,
"[validity] `chunks_exact()` must return a slice with the exact length"
);
debug_assert_eq!(
dest[0], b'%',
"[precondition] the buffer must be properly initialized"
);
let upper = byte >> 4;
let lower = byte & 0b1111;
dest[1] = HEXDIGITS[usize::from(upper)];
dest[2] = HEXDIGITS[usize::from(lower)];
}
let num_dest_written = (src_len - bytes.len()) * 3;
let buf_filled = &buf[..num_dest_written];
// SAFETY: `b'%'` and `HEXDIGITS[_]` are all ASCII characters, so
// `buf_filled` is filled with ASCII characters and is valid UTF-8 bytes.
unsafe {
debug_assert!(core::str::from_utf8(buf_filled).is_ok());
core::str::from_utf8_unchecked(buf_filled)
}
}
assert!(
buf.len() >= 3,
"[precondition] length of `buf` must be 3 bytes or more"
);
// Drop the elements that will never be used.
// The length to be used is always a multiple of three.
let buf_len = buf.len() / 3 * 3;
let buf = &mut buf[..buf_len];
// Fill some bytes with `%`.
// This will be vectorized by optimization (especially for long buffers),
// so no need to selectively set `buf[i * 3]`.
buf.fill(b'%');
let mut bytes = s.bytes();
// `<core::str::Bytes as ExactSizeIterator>::is_empty` is unstable as of Rust 1.58.1.
while bytes.len() != 0 {
let encoded = fill_by_percent_encoded(buf, &mut bytes);
f.write_str(encoded)?;
}
Ok(())
}
/// Percent-encodes the given IRI using the given buffer.
#[cfg(feature = "alloc")]
pub(crate) fn try_percent_encode_iri_inline(
iri: &mut String,
) -> Result<(), alloc::collections::TryReserveError> {
// Calculate the result length and extend the buffer.
let num_nonascii = count_nonascii(iri);
if num_nonascii == 0 {
// No need to escape.
return Ok(());
}
let additional = num_nonascii * 2;
iri.try_reserve(additional)?;
let src_len = iri.len();
// Temporarily take the ownership of the internal buffer.
let mut buf = core::mem::take(iri).into_bytes();
// `b'\0'` cannot appear in a valid IRI, so this default value would be
// useful in case of debugging.
buf.extend(core::iter::repeat(b'\0').take(additional));
// Fill the buffer from the tail to the head.
let mut dest_end = buf.len();
let mut src_end = src_len;
let mut rest_nonascii = num_nonascii;
while rest_nonascii > 0 {
debug_assert!(
src_end > 0,
"[validity] the source position should not overrun"
);
debug_assert!(
dest_end > 0,
"[validity] the destination position should not overrun"
);
src_end -= 1;
dest_end -= 1;
let byte = buf[src_end];
if byte.is_ascii() {
buf[dest_end] = byte;
// Use the ASCII character directly.
} else {
// Percent-encode the byte.
dest_end -= 2;
buf[dest_end] = b'%';
let upper = byte >> 4;
let lower = byte & 0b1111;
buf[dest_end + 1] = HEXDIGITS[usize::from(upper)];
buf[dest_end + 2] = HEXDIGITS[usize::from(lower)];
rest_nonascii -= 1;
}
}
// Move the result from the temporary buffer to the destination.
let s = String::from_utf8(buf).expect("[consistency] the encoding result is an ASCII string");
*iri = s;
Ok(())
}
/// Returns the number of non-ASCII characters.
#[cfg(feature = "alloc")]
#[inline]
#[must_use]
fn count_nonascii(s: &str) -> usize {
s.bytes().filter(|b| !b.is_ascii()).count()
}

209
vendor/iri-string/src/format.rs vendored Normal file
View File

@@ -0,0 +1,209 @@
//! Utilities for formatting (especially `Display` trait).
//!
//! This module contains utilities for [`Display`][`core::fmt::Display`]-able
//! types.
use core::fmt::{self, Write as _};
#[cfg(feature = "alloc")]
use alloc::collections::TryReserveError;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
/// Output buffer capacity overflow error.
#[derive(Debug, Clone, Copy)]
pub struct CapacityOverflowError;
impl fmt::Display for CapacityOverflowError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("buffer capacity overflow")
}
}
#[cfg(feature = "std")]
impl std::error::Error for CapacityOverflowError {}
/// Writer to the bytes buffer.
struct ByteBufWriter<'b> {
/// Destination buffer.
buffer: &'b mut [u8],
/// Position to write the next string fragment.
cursor: usize,
}
impl fmt::Write for ByteBufWriter<'_> {
fn write_str(&mut self, s: &str) -> fmt::Result {
let dest = &mut self.buffer[self.cursor..];
if dest.len() < s.len() {
return Err(fmt::Error);
}
dest[..s.len()].copy_from_slice(s.as_bytes());
self.cursor += s.len();
Ok(())
}
}
/// Writes to the bytes buffer.
pub fn write_to_slice<'a, T: fmt::Display>(
buf: &'a mut [u8],
value: &T,
) -> Result<&'a str, CapacityOverflowError> {
let mut writer = ByteBufWriter {
buffer: buf,
cursor: 0,
};
if write!(writer, "{}", value).is_err() {
return Err(CapacityOverflowError);
}
let len = writer.cursor;
let result = core::str::from_utf8(&buf[..len])
.expect("[validity] fmt::Display writes valid UTF-8 byte sequence");
Ok(result)
}
/// Writer that fails (not panics) on OOM.
#[cfg(feature = "alloc")]
struct StringWriter<'a> {
/// Destination buffer.
buffer: &'a mut String,
/// Memory allocation error.
error: Option<TryReserveError>,
}
#[cfg(feature = "alloc")]
impl fmt::Write for StringWriter<'_> {
fn write_str(&mut self, s: &str) -> fmt::Result {
if self.error.is_some() {
return Err(fmt::Error);
}
if let Err(e) = self.buffer.try_reserve(s.len()) {
self.error = Some(e);
return Err(fmt::Error);
}
// This should never fail since `.try_reserve(s.len())` succeeded.
self.buffer.push_str(s);
Ok(())
}
}
/// Appends the data to the string.
///
/// When allocation failure happens, incompletely appended strings won't be
/// stripped. Callers are responsible to clean up the destination if necessary.
#[cfg(feature = "alloc")]
pub fn try_append_to_string<T: fmt::Display>(
dest: &mut String,
value: &T,
) -> Result<(), TryReserveError> {
let mut writer = StringWriter {
buffer: dest,
error: None,
};
if write!(writer, "{}", value).is_err() {
let e = writer
.error
.expect("[consistency] allocation error should be set on formatting failure");
return Err(e);
}
Ok(())
}
/// Returns true if the two equals after they are converted to strings.
pub(crate) fn eq_str_display<T>(s: &str, d: &T) -> bool
where
T: ?Sized + fmt::Display,
{
/// Dummy writer to compare the formatted object to the given string.
struct CmpWriter<'a>(&'a str);
impl fmt::Write for CmpWriter<'_> {
fn write_str(&mut self, s: &str) -> fmt::Result {
if self.0.len() < s.len() {
return Err(fmt::Error);
}
let (prefix, rest) = self.0.split_at(s.len());
self.0 = rest;
if prefix == s {
Ok(())
} else {
Err(fmt::Error)
}
}
}
let mut writer = CmpWriter(s);
let succeeded = write!(writer, "{}", d).is_ok();
succeeded && writer.0.is_empty()
}
/// A debug-printable type to hide the sensitive information.
#[derive(Clone, Copy)]
pub(crate) struct Censored;
impl core::fmt::Debug for Censored {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str("{censored}")
}
}
/// [`ToString`][`alloc::string::ToString`], but without panic.
#[cfg(feature = "alloc")]
pub trait ToStringFallible: alloc::string::ToString {
/// [`ToString::to_string`][`alloc::string::ToString::to_string`], but without panic on OOM.
fn try_to_string(&self) -> Result<String, TryReserveError>;
}
#[cfg(feature = "alloc")]
impl<T: fmt::Display> ToStringFallible for T {
/// [`ToString::to_string`][`alloc::string::ToString::to_string`], but without panic on OOM.
#[inline]
fn try_to_string(&self) -> Result<String, TryReserveError> {
let mut buf = String::new();
try_append_to_string(&mut buf, self)?;
Ok(buf)
}
}
/// A trait for types that can be converted to a dedicated allocated string types.
#[cfg(feature = "alloc")]
pub trait ToDedicatedString {
/// Conversion target type.
type Target;
/// Converts the value to the allocated string.
fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError>;
/// Converts the value to the allocated string.
///
/// # Panics
///
/// Panics if memory allocation error occured.
#[inline]
#[must_use]
fn to_dedicated_string(&self) -> Self::Target {
self.try_to_dedicated_string()
.expect("failed to allocate enough memory")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn eq_str_display_1() {
assert!(eq_str_display("hello", "hello"));
assert!(eq_str_display("42", &42));
assert!(eq_str_display(
r#"\x00\t\r\n\xff\\"#,
&b"\x00\t\r\n\xff\\".escape_ascii()
));
assert!(!eq_str_display("hello", "world"));
assert!(!eq_str_display("hello world", "hello"));
assert!(!eq_str_display("hello", "hello world"));
assert!(!eq_str_display("42", &4));
assert!(!eq_str_display("4", &42));
}
}

159
vendor/iri-string/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,159 @@
//! String types for [RFC 3987 Internationalized Resource Identifiers (IRIs)][RFC 3987] and
//! [RFC 3986 Uniform Resource Identifiers (URIs)][RFC 3986].
//!
//! Note that this crate does not have any extra knowledge about protocols.
//! Comparisons between IRI strings by `PartialEq` and `Eq` is implemented as [simple string
//! comparison](https://www.rfc-editor.org/rfc/rfc3986.html#section-6.2.1).
//! You should implement by yourself or use another crate to use such extra knowledge to compare
//! IRIs / URIs.
//!
//! # Capability
//!
//! This crate provides many features for IRIs / URIs.
//!
//! ## String types
//!
//! [`types` module][`types`] module provides various string types for IRIs and URIs.
//! The borrowed string types are unsized slice types (such as `[u8]` and `str`)
//! and not a sized struct, so they are highly interoperable with for example
//! `Cow` and `Rc`. Conversions between `&str` and borrwed IRI string types are easy.
//!
//! ## Resolvers
//!
//! [`resolve` module][`resolve`] provides IRI / URI references resolver.
//! However, you are recommended to use methods of string types such as
//! [`RiReferenceStr::resolve_against()`] or [`RiRelativeStr::resolve_against()`]
//! if you don't intend to resolve multiple IRIs against the same base.
//!
//! ## Validators
//!
//! Validator functions are provided from [`validate` module][`validate`].
//!
//! ## Percent encoding
//!
//! [`percent_encode` module][`percent_encode`] provides a converter to encode
//! user-provided string into percent-encoded one (if syntax requires so).
//!
//! ## IRI builder
//!
//! [`build` module][`build`] provides IRI builder.
//!
//! ## URI template (RFC 6570)
//!
//! [`template` module][`template`] provides an RFC 6570 URI Template processor.
//!
//! # Feature flags
//!
//! ## `std` and `alloc` support
//!
//! This crate supports `no_std` usage.
//!
//! * `alloc` feature:
//! + Std library or `alloc` crate is required.
//! + This feature enables types and functions which require memory allocation,
//! e.g. `types::IriString` and `types::IriRelativeStr::resolve_against()`.
//! * `std` feature (**enabled by default**):
//! + Std library is required.
//! + This automatically enables `alloc` feature.
//! + The feature let the crate utilize std-specific stuff, such as `std::error::Error` trait.
//! * With neither of them:
//! + The crate can be used in `no_std` environment.
//!
//! ## Other features
//!
//! * `serde`
//! + Enables serde support.
//! + Implement `Serailize` and `Deserialize` traits for IRI / URI types.
//! * `memchr`
//! + Enables faster internal character search.
//!
//! # Rationale
//!
//! ## `foo:`, `foo:/`, `foo://`, `foo:///`, `foo:////`, ... are valid IRIs
//!
//! All of these are valid IRIs.
//! (On the other hand, all of them are invalid as relative IRI reference, because they don't
//! match `relative-part` rule, especially `path-noscheme`, as the first path component of the
//! relative path contains a colon.)
//!
//! * `foo:`
//! + Decomposed to `<scheme="foo">:<path-empty="">`.
//! * `foo:/`
//! + Decomposed to `<scheme="foo">:<path-absolute="/">`.
//! * `foo://`
//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="">`.
//! * `foo:///`
//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="/">`.
//! * `foo:////`
//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="//">`.
//! * `foo://///`
//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="///">`.
//!
//! RFC 3986 says that "if authority is absent, path cannot start with `//`".
//!
//! > When authority is present, the path must either be empty or begin with a slash ("/")
//! > character. When authority is not present, the path cannot begin with two slash characters
//! > ("//").
//! >
//! > --- [RFC 3986, section 3. Syntax Components](https://www.rfc-editor.org/rfc/rfc3986.html#section-3).
//!
//! > If a URI contains an authority component, then the path component must either be empty or
//! > begin with a slash ("/") character. If a URI does not contain an authority component, then the
//! > path cannot begin with two slash characters ("//").
//! >
//! > --- [RFC 3986, section 3.3. Path](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3)
//!
//! We should interpret them as "if `authority` rule is completely unused (i.e. does not match any
//! strings **including empty string**), path cannot start with `//`".
//! In other words, we should consider this as **explaining the ABNF of `hier-part` rule**
//! (especially why it does not use `path` rule), but **not adding extra restriction to the rule
//! written in ABNF**.
//!
//! This restriction is necessary to remove ambiguity in decomposition of some strings.
//! For example, it is natural to decompose `foo://` to `<scheme="foo">:<path="//">` or
//! `<scheme="foo">://<authority=""><path="">`.
//! The restriction, **which is already encoded to the ABNF rule**, tells us to always decompose to
//! the latter form, rather than the former one.
//!
//! Readers of the spec might be confused by "when authority is **present**" and "if a URI
//! **contains** an authority component, which is unclear.
//! However, based on the interpretation above, we should consider authority part with empty string
//! as satisfying the condition "authority is **present**".
//!
//! ## IRI resolution can fail
//!
//! For some inputs, resulting string of IRI normalization and resolution can be syntactically
//! correct but semantically wrong. In such cases, the normalizer and resolver provided by this
//! crate do not silently "fix" the IRI by non-standard processing, but just
//! fail by returning `Err(_)`.
//!
//! For details, see the documentation of [`normalize`] module.
//!
//! [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
//! [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
//! [`RiReferenceStr::resolve_against()`]: `types::RiReferenceStr::resolve_against`
//! [`RiRelativeStr::resolve_against()`]: `types::RiRelativeStr::resolve_against`
#![warn(missing_docs)]
#![warn(unsafe_op_in_unsafe_fn)]
#![warn(clippy::missing_docs_in_private_items)]
#![warn(clippy::undocumented_unsafe_blocks)]
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#[cfg(feature = "alloc")]
extern crate alloc;
pub mod build;
pub mod components;
pub mod convert;
pub mod format;
pub mod mask_password;
pub mod normalize;
pub(crate) mod parser;
pub mod percent_encode;
pub(crate) mod raw;
pub mod resolve;
pub mod spec;
pub mod template;
pub mod types;
pub mod validate;

298
vendor/iri-string/src/mask_password.rs vendored Normal file
View File

@@ -0,0 +1,298 @@
//! Password masker.
use core::fmt::{self, Write as _};
use core::ops::Range;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::borrow::ToOwned;
#[cfg(feature = "alloc")]
use alloc::collections::TryReserveError;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
use crate::components::AuthorityComponents;
#[cfg(feature = "alloc")]
use crate::format::ToDedicatedString;
use crate::spec::Spec;
use crate::types::{RiAbsoluteStr, RiReferenceStr, RiRelativeStr, RiStr};
#[cfg(feature = "alloc")]
use crate::types::{RiAbsoluteString, RiReferenceString, RiRelativeString, RiString};
/// Returns the range of the password to hide.
pub(crate) fn password_range_to_hide<S: Spec>(iri: &RiReferenceStr<S>) -> Option<Range<usize>> {
/// Spec-agnostic internal implementation of `password_range_to_hide`.
fn inner(iri: &str, userinfo: &str) -> Option<Range<usize>> {
// Length (including `//`) before the `authority` compontent.
// 2: `"//".len()`.
let authority_start = 2 + iri
.find("//")
.expect("[validity] `authority` component must be prefixed with `//`");
let end = authority_start + userinfo.len();
let start = authority_start + userinfo.find(':').map_or_else(|| userinfo.len(), |v| v + 1);
Some(start..end)
}
let authority_components = AuthorityComponents::from_iri(iri)?;
let userinfo = authority_components.userinfo()?;
inner(iri.as_str(), userinfo)
}
/// Writes the URI with the password part replaced.
fn write_with_masked_password<D>(
f: &mut fmt::Formatter<'_>,
s: &str,
pw_range: Range<usize>,
alt: &D,
) -> fmt::Result
where
D: ?Sized + fmt::Display,
{
debug_assert!(
s.len() >= pw_range.end,
"[consistency] password range must be inside the IRI"
);
f.write_str(&s[..pw_range.start])?;
alt.fmt(f)?;
f.write_str(&s[pw_range.end..])?;
Ok(())
}
/// Writes an IRI with the password part trimmed.
fn write_trim_password(f: &mut fmt::Formatter<'_>, s: &str, pw_range: Range<usize>) -> fmt::Result {
write_with_masked_password(f, s, pw_range, "")
}
/// A wrapper of an IRI string that masks the non-empty password when `Display`ed.
///
/// This is a retrun type of `mask_password` method of IRI string types (such as
/// [`RiStr::mask_password`]).
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::UriReferenceStr;
///
/// let iri = UriReferenceStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
/// assert_eq!(masked.to_string(), "http://user:@example.com/path?query");
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "http://user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [`RiStr::mask_password`]: `crate::types::RiStr::mask_password`
#[derive(Clone, Copy)]
pub struct PasswordMasked<'a, T: ?Sized> {
/// IRI reference.
iri_ref: &'a T,
}
impl<'a, T: ?Sized> PasswordMasked<'a, T> {
/// Creates a new `PasswordMasked` object.
#[inline]
#[must_use]
pub(crate) fn new(iri_ref: &'a T) -> Self {
Self { iri_ref }
}
}
/// Implements traits for `PasswordMasked`.
macro_rules! impl_mask {
($borrowed:ident, $owned:ident) => {
impl<'a, S: Spec> PasswordMasked<'a, $borrowed<S>> {
/// Replaces the password with the given arbitrary content.
///
/// Note that the result might be invalid as an IRI since arbitrary string
/// can go to the place of the password.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "http://user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn replace_password<D>(&self, alt: D) -> PasswordReplaced<'a, $borrowed<S>, D>
where
D: fmt::Display,
{
PasswordReplaced::with_replacer(self.iri_ref, move |_| alt)
}
/// Replaces the password with the given arbitrary content.
///
/// Note that the result might be invalid as an IRI since arbitrary string
/// can go to the place of the password.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
///
/// let replaced = masked
/// .replace_password_with(|password| format!("{{{} chars}}", password.len()));
/// assert_eq!(
/// replaced.to_string(),
/// "http://user:{8 chars}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn replace_password_with<F, D>(
&self,
replace: F,
) -> PasswordReplaced<'a, $borrowed<S>, D>
where
F: FnOnce(&str) -> D,
D: fmt::Display,
{
PasswordReplaced::with_replacer(self.iri_ref, replace)
}
}
impl<S: Spec> fmt::Display for PasswordMasked<'_, $borrowed<S>> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match password_range_to_hide(self.iri_ref.as_ref()) {
Some(pw_range) => write_trim_password(f, self.iri_ref.as_str(), pw_range),
None => self.iri_ref.fmt(f),
}
}
}
impl<S: Spec> fmt::Debug for PasswordMasked<'_, $borrowed<S>> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_char('<')?;
fmt::Display::fmt(self, f)?;
f.write_char('>')
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> ToDedicatedString for PasswordMasked<'_, $borrowed<S>> {
type Target = $owned<S>;
fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> {
let pw_range = match password_range_to_hide(self.iri_ref.as_ref()) {
Some(pw_range) => pw_range,
None => return Ok(self.iri_ref.to_owned()),
};
let mut s = String::new();
let iri_ref = self.iri_ref.as_str();
s.try_reserve(iri_ref.len() - (pw_range.end - pw_range.start))?;
s.push_str(&iri_ref[..pw_range.start]);
s.push_str(&iri_ref[pw_range.end..]);
// SAFETY: IRI remains valid and type does not change if
// the password is trimmed.
let iri = unsafe { <$owned<S>>::new_maybe_unchecked(s) };
Ok(iri)
}
}
};
}
impl_mask!(RiReferenceStr, RiReferenceString);
impl_mask!(RiStr, RiString);
impl_mask!(RiAbsoluteStr, RiAbsoluteString);
impl_mask!(RiRelativeStr, RiRelativeString);
/// A wrapper of an IRI string that replaces the non-empty password when `Display`ed.
///
/// This is a retrun type of `mask_password` method of IRI string types (such as
/// [`RiStr::mask_password`]).
///
/// Note that the result might be invalid as an IRI since arbitrary string can
/// go to the place of the password.
#[cfg_attr(
feature = "alloc",
doc = "Because of this, [`ToDedicatedString`] trait is not implemented for this type."
)]
///
/// [`PasswordMasked::replace_password`]: `PasswordMasked::replace_password`
pub struct PasswordReplaced<'a, T: ?Sized, D> {
/// IRI reference.
iri_ref: &'a T,
/// Password range and alternative content.
password: Option<(Range<usize>, D)>,
}
impl<'a, T, D> PasswordReplaced<'a, T, D>
where
T: ?Sized,
D: fmt::Display,
{
/// Creates a new `PasswordMasked` object.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn with_replacer<S, F>(iri_ref: &'a T, replace: F) -> Self
where
S: Spec,
T: AsRef<RiReferenceStr<S>>,
F: FnOnce(&str) -> D,
{
let iri_ref_asref = iri_ref.as_ref();
let password = password_range_to_hide(iri_ref_asref)
.map(move |pw_range| (pw_range.clone(), replace(&iri_ref_asref.as_str()[pw_range])));
Self { iri_ref, password }
}
}
/// Implements traits for `PasswordReplaced`.
macro_rules! impl_replace {
($borrowed:ident, $owned:ident) => {
impl<S: Spec, D: fmt::Display> fmt::Display for PasswordReplaced<'_, $borrowed<S>, D> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.password {
Some((pw_range, alt)) => {
write_with_masked_password(f, self.iri_ref.as_str(), pw_range.clone(), alt)
}
None => self.iri_ref.fmt(f),
}
}
}
impl<S: Spec, D: fmt::Display> fmt::Debug for PasswordReplaced<'_, $borrowed<S>, D> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_char('<')?;
fmt::Display::fmt(self, f)?;
f.write_char('>')
}
}
};
}
impl_replace!(RiReferenceStr, RiReferenceString);
impl_replace!(RiStr, RiString);
impl_replace!(RiAbsoluteStr, RiAbsoluteString);
impl_replace!(RiRelativeStr, RiRelativeString);

691
vendor/iri-string/src/normalize.rs vendored Normal file
View File

@@ -0,0 +1,691 @@
//! Normalization.
//!
//! # IRI normalization (and resolution) can fail
//!
//! Though this is not explicitly stated in RFC 3986, IRI normalization can fail.
//! For example, `foo:.///bar`, `foo:./..//bar`, and `foo:/..//bar` are all
//! normalized to `foo://bar` as a string. However, IRI without authority (note
//! that this is different from "with empty authority") cannot have a path
//! starting with `//`, since it is ambiguous and can be interpreted as an IRI
//! with authority. So, `foo://bar` is decomposed as scheme `foo`, authority
//! `bar`, and empty path. The expected result is the combination of scheme
//! `foo`, no authority, and path `//bar` (though this is not possible to
//! serialize), so the algorithm fails as it cannot return the intended result.
//!
//! IRI resolution can also fail since it (conditionally) invokes normalization
//! during the resolution process. For example, resolving a reference `.///bar`
//! or `/..//bar` against the base `foo:` fail.
//!
//! Thus, IRI resolution can fail for some abnormal cases.
//!
//! Note that this kind of failure can happen only when the base IRI has no
//! authority and empty path. This would be rare in the wild, since many people
//! would use an IRI with authority part, such as `http://`.
//!
//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the
//! failure. Currently no cases are known to fail when at least one of the base
//! IRI or the relative IRI contains authorities.
//!
//! To know what will happen on resolution failure, see the module documentation
//! for [`resolve`][`crate::resolve`].
//!
//! ## Examples
//!
//! ### Normalization failure
//!
//! ```
//! # #[cfg(feature = "alloc")] {
//! use iri_string::normalize::Error;
//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
//!
//! let base = IriAbsoluteStr::new("foo:.///bar")?;
//! assert!(
//! base.normalize().ensure_rfc3986_normalizable().is_err(),
//! "this normalization should fails without WAHTWG URL Standard serialization"
//! );
//! # }
//! # Ok::<_, iri_string::validate::Error>(())
//! ```
//!
//! ### Resolution failure
//!
//! ```
//! # #[cfg(feature = "alloc")] {
//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
//!
//! let base = IriAbsoluteStr::new("scheme:")?;
//! {
//! let reference = IriReferenceStr::new(".///bar")?;
//! let result = reference.resolve_against(base)
//! .ensure_rfc3986_normalizable();
//! assert!(result.is_err());
//! }
//!
//! {
//! let reference2 = IriReferenceStr::new("/..//bar")?;
//! // Resulting string will be `scheme://bar`, but `bar` should be a path
//! // segment, not a host. So, the semantically correct target IRI cannot
//! // be represented.
//! let result2 = reference2.resolve_against(base)
//! .ensure_rfc3986_normalizable();
//! assert!(result2.is_err());
//! }
//! # }
//! # Ok::<_, iri_string::validate::Error>(())
//! ```
mod error;
mod path;
mod pct_case;
use core::fmt::{self, Display as _, Write as _};
use core::marker::PhantomData;
#[cfg(feature = "alloc")]
use alloc::collections::TryReserveError;
use crate::components::{RiReferenceComponents, Splitter};
#[cfg(feature = "alloc")]
use crate::format::{ToDedicatedString, ToStringFallible};
use crate::parser::str::rfind_split_hole;
use crate::parser::trusted::is_ascii_only_host;
use crate::spec::Spec;
use crate::types::{RiAbsoluteStr, RiReferenceStr, RiStr};
#[cfg(feature = "alloc")]
use crate::types::{RiAbsoluteString, RiString};
pub use self::error::Error;
pub(crate) use self::path::{Path, PathCharacteristic, PathToNormalize};
pub(crate) use self::pct_case::{
is_pct_case_normalized, NormalizedAsciiOnlyHost, PctCaseNormalized,
};
/// Normalization algorithm.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum NormalizationMode {
/// No normalization.
None,
/// Default normalization mode.
///
/// Applies RFC 3986 normalization whenever possible. When not possible,
/// applies serialization algorithm defined in WHATWG URL standard.
Default,
/// WHATWG-like normalization mode.
///
/// Preserves relative path as is (modulo case/pct normalization) when the
/// authority component is absent.
PreserveAuthoritylessRelativePath,
}
impl NormalizationMode {
/// Returns true if case normalization and percent-encoding normalization should be applied.
///
/// Note that even when this option is `true`, plain US-ASCII characters
/// won't be automatically lowered. Users should apply case normalization
/// for US-ASCII only `host` component by themselves.
#[inline]
#[must_use]
fn case_pct_normalization(self) -> bool {
match self {
Self::None => false,
Self::Default | Self::PreserveAuthoritylessRelativePath => true,
}
}
}
/// Normalizedness check algorithm.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum NormalizednessCheckMode {
/// Default algorithm (corresponding to [`NormalizationMode::Default`]).
Default,
/// Strict RFC 3986 normalization.
Rfc3986,
/// WHATWG-like normalization algorithm (corresponding to
/// [`NormalizationMode::PreserveAuthoritylessRelativePath`]).
PreserveAuthoritylessRelativePath,
}
/// Normalization operation.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct NormalizationOp {
/// Normalization mode.
pub(crate) mode: NormalizationMode,
}
/// Spec-agnostic IRI normalization/resolution input.
#[derive(Debug, Clone, Copy)]
pub(crate) struct NormalizationInput<'a> {
/// Target scheme.
scheme: &'a str,
/// Target authority.
authority: Option<&'a str>,
/// Target path without dot-removal.
path: Path<'a>,
/// Target query.
query: Option<&'a str>,
/// Target fragment.
fragment: Option<&'a str>,
/// Normalization type.
op: NormalizationOp,
}
impl<'a> NormalizationInput<'a> {
/// Creates a `NormalizedInput` from IRIs to resolve.
#[inline]
#[must_use]
pub(crate) fn with_resolution_params<S: Spec>(
base_components: &RiReferenceComponents<'a, S>,
reference: &'a RiReferenceStr<S>,
) -> Self {
let r = RiReferenceComponents::from(reference);
Self::create_normalization_input(
r.iri.as_str(),
&r.splitter,
base_components.iri.as_str(),
&base_components.splitter,
)
}
/// Creates a `NormalizationInput` from components to resolve an IRI.
#[must_use]
fn create_normalization_input(
r_iri: &'a str,
r: &Splitter,
b_iri: &'a str,
b: &Splitter,
) -> Self {
/// The toplevel component the reference has.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum RefToplevel {
/// Scheme.
Scheme,
/// Authority.
Authority,
/// Path.
Path,
/// Query.
Query,
/// Reference is empty or has only fragment.
None,
}
impl RefToplevel {
/// Choose a component from either of the reference or the base,
/// based on the toplevel component of the reference.
#[inline]
#[must_use]
fn choose_then<T, F, G>(self, component: RefToplevel, reference: F, base: G) -> T
where
F: FnOnce() -> T,
G: FnOnce() -> T,
{
if self <= component {
reference()
} else {
base()
}
}
}
let ref_toplevel = if r.has_scheme() {
RefToplevel::Scheme
} else if r.has_authority() {
RefToplevel::Authority
} else if !r.is_path_empty(r_iri.len()) {
RefToplevel::Path
} else if r.has_query() {
RefToplevel::Query
} else {
RefToplevel::None
};
let path = match ref_toplevel {
RefToplevel::Scheme | RefToplevel::Authority => {
Path::NeedsProcessing(PathToNormalize::from_single_path(r.path_str(r_iri)))
}
RefToplevel::Path => {
let r_path = r.path_str(r_iri);
if r_path.starts_with('/') {
Path::NeedsProcessing(PathToNormalize::from_single_path(r_path))
} else {
// About this branch, see
// <https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.3>.
//
// > o If the base URI has a defined authority component and an empty
// > path, then return a string consisting of "/" concatenated with the
// > reference's path; otherwise,
let b_path = b.path_str(b_iri);
let b_path = if b.has_authority() && b_path.is_empty() {
"/"
} else {
b_path
};
Path::NeedsProcessing(PathToNormalize::from_paths_to_be_resolved(
b_path, r_path,
))
}
}
RefToplevel::Query | RefToplevel::None => Path::Done(b.path_str(b_iri)),
};
Self {
scheme: r.scheme_str(r_iri).unwrap_or_else(|| {
b.scheme_str(b_iri)
.expect("[validity] non-relative IRI must have a scheme")
}),
authority: ref_toplevel.choose_then(
RefToplevel::Authority,
|| r.authority_str(r_iri),
|| b.authority_str(b_iri),
),
path,
query: ref_toplevel.choose_then(
RefToplevel::Query,
|| r.query_str(r_iri),
|| b.query_str(b_iri),
),
fragment: r.fragment_str(r_iri),
op: NormalizationOp {
mode: NormalizationMode::None,
},
}
}
}
impl<'a, S: Spec> From<&'a RiStr<S>> for NormalizationInput<'a> {
fn from(iri: &'a RiStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
NormalizationInput {
scheme,
authority,
path,
query,
fragment,
op: NormalizationOp {
mode: NormalizationMode::None,
},
}
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiString<S>> for NormalizationInput<'a> {
#[inline]
fn from(iri: &'a RiString<S>) -> Self {
Self::from(iri.as_slice())
}
}
impl<'a, S: Spec> From<&'a RiAbsoluteStr<S>> for NormalizationInput<'a> {
fn from(iri: &'a RiAbsoluteStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
NormalizationInput {
scheme,
authority,
path,
query,
fragment,
op: NormalizationOp {
mode: NormalizationMode::None,
},
}
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiAbsoluteString<S>> for NormalizationInput<'a> {
#[inline]
fn from(iri: &'a RiAbsoluteString<S>) -> Self {
Self::from(iri.as_slice())
}
}
impl NormalizationInput<'_> {
/// Checks if the path is normalizable by RFC 3986 algorithm.
///
/// Returns `Ok(())` when normalizable, returns `Err(_)` if not.
pub(crate) fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> {
if self.authority.is_some() {
return Ok(());
}
match self.path {
Path::Done(_) => Ok(()),
Path::NeedsProcessing(path) => path.ensure_rfc3986_normalizable_with_authority_absent(),
}
}
}
/// Writable as a normalized IRI.
///
/// Note that this implicitly apply serialization rule defined by WHATWG URL
/// Standard (to handle normalization impossible by RFC 3986) because `Display`
/// should not fail by reasons other than backend I/O failure. If you make the
/// normalization fail in such cases, check if the path starts with `/./`.
/// When the normalization succeeds by RFC 3986 algorithm, the path never starts
/// with `/./`.
struct NormalizedInner<'a, S> {
/// Spec-agnostic normalization input.
input: NormalizationInput<'a>,
/// Spec.
_spec: PhantomData<fn() -> S>,
}
impl<S: Spec> fmt::Debug for NormalizedInner<'_, S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Normalized")
.field("input", &self.input)
.finish()
}
}
impl<'a, S: Spec> NormalizedInner<'a, S> {
/// Creates a new `Normalized` object from the given input.
#[inline]
#[must_use]
fn from_input(input: NormalizationInput<'a>) -> Self {
Self {
input,
_spec: PhantomData,
}
}
}
impl<S: Spec> fmt::Display for NormalizedInner<'_, S> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Write the scheme.
if self.input.op.mode.case_pct_normalization() {
normalize_scheme(f, self.input.scheme)?;
} else {
f.write_str(self.input.scheme)?;
}
f.write_str(":")?;
// Write the authority if available.
if let Some(authority) = self.input.authority {
f.write_str("//")?;
if self.input.op.mode.case_pct_normalization() {
normalize_authority::<S>(f, authority)?;
} else {
// No case/pct normalization.
f.write_str(authority)?;
}
}
// Process and write the path.
match self.input.path {
Path::Done(s) => {
if self.input.op.mode.case_pct_normalization() {
// Normalize the path.
PathToNormalize::from_single_path(s).fmt_write_normalize::<S, _>(
f,
self.input.op,
self.input.authority.is_some(),
)?
} else {
// No normalization.
f.write_str(s)?
}
}
Path::NeedsProcessing(path) => {
path.fmt_write_normalize::<S, _>(f, self.input.op, self.input.authority.is_some())?
}
}
// Write the query if available.
if let Some(query) = self.input.query {
f.write_char('?')?;
if self.input.op.mode.case_pct_normalization() {
normalize_query::<S>(f, query)?;
} else {
f.write_str(query)?;
}
}
// Write the fragment if available.
if let Some(fragment) = self.input.fragment {
f.write_char('#')?;
if self.input.op.mode.case_pct_normalization() {
normalize_fragment::<S>(f, fragment)?;
} else {
f.write_str(fragment)?;
}
}
Ok(())
}
}
/// Writes the normalized scheme.
pub(crate) fn normalize_scheme(f: &mut fmt::Formatter<'_>, scheme: &str) -> fmt::Result {
// Apply case normalization.
//
// > namely, that the scheme and US-ASCII only host are case
// > insensitive and therefore should be normalized to lowercase.
// >
// > --- <https://datatracker.ietf.org/doc/html/rfc3987#section-5.3.2.1>.
//
// Note that `scheme` consists of only ASCII characters and contains
// no percent-encoded characters.
scheme
.chars()
.map(|c| c.to_ascii_lowercase())
.try_for_each(|c| f.write_char(c))
}
/// Writes the normalized authority.
fn normalize_authority<S: Spec>(f: &mut fmt::Formatter<'_>, authority: &str) -> fmt::Result {
let host_port = match rfind_split_hole(authority, b'@') {
Some((userinfo, host_port)) => {
// Don't lowercase `userinfo` even if it is ASCII only. `userinfo`
// is not a part of `host`.
PctCaseNormalized::<S>::new(userinfo).fmt(f)?;
f.write_char('@')?;
host_port
}
None => authority,
};
normalize_host_port::<S>(f, host_port)
}
/// Writes the normalized host and port.
pub(crate) fn normalize_host_port<S: Spec>(
f: &mut fmt::Formatter<'_>,
host_port: &str,
) -> fmt::Result {
// If the suffix is a colon, it is a delimiter between the host and empty
// port. An empty port should be removed during normalization (see RFC 3986
// section 3.2.3), so strip it.
//
// > URI producers and normalizers should omit the port component and its
// > ":" delimiter if port is empty or if its value would be the same as
// > that of the scheme's default.
// >
// > --- [RFC 3986 section 3.2.3. Port](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.3)
let host_port = host_port.strip_suffix(':').unwrap_or(host_port);
// Apply case normalization and percent-encoding normalization to `host`.
// Optional `":" port` part only consists of an ASCII colon and ASCII
// digits, so this won't affect to the test result.
if is_ascii_only_host(host_port) {
// If the host is ASCII characters only, make plain alphabets lower case.
NormalizedAsciiOnlyHost::new(host_port).fmt(f)
} else {
PctCaseNormalized::<S>::new(host_port).fmt(f)
}
}
/// Writes the normalized query without the '?' prefix.
pub(crate) fn normalize_query<S: Spec>(f: &mut fmt::Formatter<'_>, query: &str) -> fmt::Result {
// Apply percent-encoding normalization.
PctCaseNormalized::<S>::new(query).fmt(f)
}
/// Writes the normalized query without the '#' prefix.
pub(crate) fn normalize_fragment<S: Spec>(
f: &mut fmt::Formatter<'_>,
fragment: &str,
) -> fmt::Result {
// Apply percent-encoding normalization.
PctCaseNormalized::<S>::new(fragment).fmt(f)
}
/// Normalized OR resolved IRI.
///
/// Resolved IRI can be represented by this type. In that case, the result might
/// not be normalized. If you want the IRI resolution result to be normalized,
/// use [`enable_normalization`][`Self::enable_normalization`] method.
///
/// [`Display`]: `core::fmt::Display`
pub struct Normalized<'a, T: ?Sized> {
/// Spec-agnostic normalization input.
input: NormalizationInput<'a>,
/// Expected result type.
_ty_str: PhantomData<fn() -> T>,
}
impl<T: ?Sized> fmt::Debug for Normalized<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Normalized")
.field("input", &self.input)
.finish()
}
}
impl<'a, T: ?Sized> Normalized<'a, T> {
/// Creates a new `Normalized` object from the given input.
#[inline]
#[must_use]
pub(crate) fn from_input(input: NormalizationInput<'a>) -> Self {
Self {
input,
_ty_str: PhantomData,
}
}
/// Enables the normalization.
///
/// This lets the normalizer apply the case normalization, percent-encoding
/// normalization, and dot segments removal.
#[inline]
pub fn enable_normalization(&mut self) {
self.input.op.mode = NormalizationMode::Default;
}
/// Enables the normalization that preserve relative path under some condition.
///
/// Note that this normalization algorithm is not compatible with RFC 3986
/// algorithm for some inputs.
///
/// See [`RiStr::normalize_but_preserve_authorityless_relative_path()`]
/// for detail.
#[inline]
pub fn enable_normalization_preserving_authorityless_relative_path(&mut self) {
self.input.op.mode = NormalizationMode::PreserveAuthoritylessRelativePath;
}
/// Returns `Self` with normalization enabled.
#[inline]
#[must_use]
pub fn and_normalize(mut self) -> Self {
self.enable_normalization();
self
}
/// Returns `Self` with special normalization enabled.
///
/// Note that this normalization algorithm is not compatible with RFC 3986
/// algorithm for some inputs.
///
/// See [`RiStr::normalize_but_preserve_authorityless_relative_path()`]
/// for detail.
#[inline]
#[must_use]
pub fn and_normalize_but_preserve_authorityless_relative_path(mut self) -> Self {
self.enable_normalization_preserving_authorityless_relative_path();
self
}
/// Checks if the path is normalizable by RFC 3986 algorithm.
///
/// Returns `Ok(())` when normalizable, returns `Err(_)` if not.
#[inline]
pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> {
self.input.ensure_rfc3986_normalizable()
}
}
impl<S: Spec> fmt::Display for Normalized<'_, RiStr<S>> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
NormalizedInner::<S>::from_input(self.input).fmt(f)
}
}
impl<S: Spec> fmt::Display for Normalized<'_, RiAbsoluteStr<S>> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
NormalizedInner::<S>::from_input(self.input).fmt(f)
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> ToDedicatedString for Normalized<'_, RiStr<S>> {
type Target = RiString<S>;
fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> {
let s = self.try_to_string()?;
Ok(TryFrom::try_from(s).expect("[validity] the normalization result must be a valid IRI"))
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> From<Normalized<'_, RiStr<S>>> for RiString<S> {
#[inline]
fn from(v: Normalized<'_, RiStr<S>>) -> Self {
v.to_dedicated_string()
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> From<&Normalized<'_, RiStr<S>>> for RiString<S> {
#[inline]
fn from(v: &Normalized<'_, RiStr<S>>) -> Self {
v.to_dedicated_string()
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> ToDedicatedString for Normalized<'_, RiAbsoluteStr<S>> {
type Target = RiAbsoluteString<S>;
fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> {
let s = self.try_to_string()?;
Ok(TryFrom::try_from(s).expect("[validity] the normalization result must be a valid IRI"))
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> From<Normalized<'_, RiAbsoluteStr<S>>> for RiAbsoluteString<S> {
#[inline]
fn from(v: Normalized<'_, RiAbsoluteStr<S>>) -> Self {
v.to_dedicated_string()
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> From<&Normalized<'_, RiAbsoluteStr<S>>> for RiAbsoluteString<S> {
#[inline]
fn from(v: &Normalized<'_, RiAbsoluteStr<S>>) -> Self {
v.to_dedicated_string()
}
}

View File

@@ -0,0 +1,26 @@
//! Normalization and resolution error.
use core::fmt;
/// IRI normalization and resolution error.
///
/// For detail about resolution failure, see [the module documentation][`crate::resolve`].
#[derive(Debug, Clone)]
pub struct Error(());
impl Error {
/// Creates a new error.
pub(crate) fn new() -> Self {
Self(())
}
}
impl fmt::Display for Error {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("unresolvable IRI")
}
}
#[cfg(feature = "std")]
impl std::error::Error for Error {}

620
vendor/iri-string/src/normalize/path.rs vendored Normal file
View File

@@ -0,0 +1,620 @@
//! Path normalization.
use core::fmt;
use core::ops::Range;
use crate::parser::str::{find_split_hole, rfind};
use crate::spec::{Spec, UriSpec};
use super::pct_case::PctCaseNormalized;
use super::{Error, NormalizationMode, NormalizationOp};
/// Path that is (possibly) not yet processed or being processed.
#[derive(Debug, Clone, Copy)]
pub(crate) enum Path<'a> {
/// The result. No more processing is needed.
Done(&'a str),
/// Not yet completely processed path.
NeedsProcessing(PathToNormalize<'a>),
}
/// Path that needs merge and/or dot segment removal.
///
/// # Invariants
///
/// If the first field (prefix field) is not `None`, it must end with a slash.
#[derive(Debug, Clone, Copy)]
pub(crate) struct PathToNormalize<'a>(Option<&'a str>, &'a str);
impl<'a> PathToNormalize<'a> {
/// Creates a `PathToNormalize` from the given single path.
#[inline]
#[must_use]
pub(crate) fn from_single_path(path: &'a str) -> Self {
Self(None, path)
}
/// Creates a `PathToNormalize` from the given base and reference paths to be resolved.
#[must_use]
pub(crate) fn from_paths_to_be_resolved(base: &'a str, reference: &'a str) -> Self {
if reference.starts_with('/') {
return Self(None, reference);
}
match rfind(base.as_bytes(), b'/') {
Some(last_slash_pos) => Self(Some(&base[..=last_slash_pos]), reference),
None => Self(None, reference),
}
}
/// Returns true if the path is empty string.
#[inline]
#[must_use]
fn is_empty(&self) -> bool {
// If `self.0` is `Some(_)`, it ends with a slash, i.e. it is not empty.
self.0.is_none() && self.1.is_empty()
}
/// Returns the length of the not yet normalized path.
#[inline]
#[must_use]
pub(super) fn len(&self) -> usize {
self.len_prefix() + self.1.len()
}
/// Returns the length of the prefix part.
///
/// Returns 0 if the prefix part is empty.
#[inline]
#[must_use]
fn len_prefix(&self) -> usize {
self.0.map_or(0, |s| s.len())
}
/// Returns a byte at the given position.
#[must_use]
fn byte_at(&self, mut i: usize) -> Option<u8> {
if let Some(prefix) = self.0 {
if i < prefix.len() {
return Some(prefix.as_bytes()[i]);
}
i -= prefix.len();
}
self.1.as_bytes().get(i).copied()
}
/// Returns the position of the next slash of the byte at the given position.
#[must_use]
fn find_next_slash(&self, scan_start: usize) -> Option<usize> {
if let Some(prefix) = self.0 {
let prefix_len = prefix.len();
if scan_start < prefix_len {
prefix[scan_start..].find('/').map(|rel| rel + scan_start)
} else {
let local_i = scan_start - prefix_len;
self.1[local_i..].find('/').map(|rel| rel + scan_start)
}
} else {
self.1[scan_start..].find('/').map(|rel| rel + scan_start)
}
}
/// Removes the `len` characters from the beginning of `self`.
fn remove_start(&mut self, len: usize) {
if let Some(prefix) = self.0 {
if let Some(suffix_trim_len) = len.checked_sub(prefix.len()) {
self.0 = None;
self.1 = &self.1[suffix_trim_len..];
} else {
self.0 = Some(&prefix[len..]);
}
} else {
self.1 = &self.1[len..];
}
}
/// Removes the prefix that are ignorable on normalization.
// Skips the prefix dot segments without leading slashes (such as `./`,
// `../`, and `../.././`).
// This is necessary because such segments should be removed with the
// FOLLOWING slashes, not leading slashes.
fn remove_ignorable_prefix(&mut self) {
while let Some(seg) = PathSegmentsIter::new(self).next() {
if seg.has_leading_slash {
// The first segment starting with a slash is not target.
break;
}
match seg.kind(self) {
SegmentKind::Dot | SegmentKind::DotDot => {
// Attempt to skip the following slash by `+ 1`.
let skip = self.len().min(seg.range.end + 1);
self.remove_start(skip);
}
SegmentKind::Normal => break,
}
}
}
}
impl PathToNormalize<'_> {
/// Writes the normalized path.
pub(crate) fn fmt_write_normalize<S: Spec, W: fmt::Write>(
&self,
f: &mut W,
op: NormalizationOp,
authority_is_present: bool,
) -> fmt::Result {
debug_assert!(
self.0.map_or(true, |s| s.ends_with('/')),
"[validity] the prefix field of `PathToNormalize` should end with a slash"
);
if self.is_empty() {
return Ok(());
}
if (op.mode == NormalizationMode::PreserveAuthoritylessRelativePath)
&& !authority_is_present
&& self.byte_at(0) != Some(b'/')
{
// Treat the path as "opaque", i.e. do not apply dot segments removal.
// See <https://github.com/lo48576/iri-string/issues/29>.
debug_assert!(
op.mode.case_pct_normalization(),
"[consistency] case/pct normalization should still be applied"
);
if let Some(prefix) = self.0 {
write!(f, "{}", PctCaseNormalized::<S>::new(prefix))?;
}
write!(f, "{}", PctCaseNormalized::<S>::new(self.1))?;
return Ok(());
}
let mut rest = *self;
// Skip the prefix dot segments without leading slashes (such as `./`,
// `../`, and `../.././`).
// This is necessary because such segments should be removed with the
// FOLLOWING slashes, not leading slashes.
rest.remove_ignorable_prefix();
if rest.is_empty() {
// Path consists of only `/.`s and `/..`s.
// In this case, if the authority component is present, the result
// should be `/`, not empty.
if authority_is_present {
f.write_char('/')?;
}
return Ok(());
}
// None: No segments are written yet.
// Some(false): Something other than `/` is already written as the path.
// Some(true): Only a `/` is written as the path.
let mut only_a_slash_is_written = None;
let mut too_deep_area_may_have_dot_segments = true;
while !rest.is_empty() && too_deep_area_may_have_dot_segments {
/// The size of the queue to track the path segments.
///
/// This should be nonzero.
const QUEUE_SIZE: usize = 8;
{
// Skip `/.` and `/..` segments at the head.
let mut skipped_len = 0;
for seg in PathSegmentsIter::new(&rest) {
match seg.kind(&rest) {
SegmentKind::Dot | SegmentKind::DotDot => {
debug_assert!(
seg.has_leading_slash,
"[consistency] `.` or `..` segments without a
leading slash have already been skipped"
);
skipped_len = seg.range.end;
}
_ => break,
}
}
rest.remove_start(skipped_len);
if rest.is_empty() {
// Finished with a dot segment.
// The last `/.` or `/..` should be replaced to `/`.
if !authority_is_present && (only_a_slash_is_written == Some(true)) {
// Insert a dot segment to break the prefix `//`.
// Without this, the path starts with `//` and it may
// be confused with the prefix of an authority.
f.write_str(".//")?;
} else {
f.write_char('/')?;
}
break;
}
}
let mut queue: [Option<&'_ str>; QUEUE_SIZE] = Default::default();
let mut level: usize = 0;
let mut first_segment_has_leading_slash = false;
// Find higher path segments.
let mut end = 0;
for seg in PathSegmentsIter::new(&rest) {
let kind = seg.kind(&rest);
match kind {
SegmentKind::Dot => {
too_deep_area_may_have_dot_segments = true;
}
SegmentKind::DotDot => {
level = level.saturating_sub(1);
too_deep_area_may_have_dot_segments = true;
if level < queue.len() {
queue[level] = None;
}
}
SegmentKind::Normal => {
if level < queue.len() {
queue[level] = Some(seg.segment(&rest));
too_deep_area_may_have_dot_segments = false;
end = seg.range.end;
if level == 0 {
first_segment_has_leading_slash = seg.has_leading_slash;
}
}
level += 1;
}
}
}
// Write the path segments as possible, and update the internal state.
for segname in queue.iter().flatten() {
Self::emit_segment::<S, _>(
f,
&mut only_a_slash_is_written,
first_segment_has_leading_slash,
segname,
authority_is_present,
op,
)?;
}
rest.remove_start(end);
}
if !rest.is_empty() {
// No need of searching dot segments anymore.
assert!(
!too_deep_area_may_have_dot_segments,
"[consistency] loop condition of the previous loop"
);
// Apply only normalization (if needed).
for seg in PathSegmentsIter::new(&rest) {
assert_eq!(
seg.kind(&rest),
SegmentKind::Normal,
"[consistency] already confirmed that there are no more dot segments"
);
let segname = seg.segment(&rest);
Self::emit_segment::<S, _>(
f,
&mut only_a_slash_is_written,
seg.has_leading_slash,
segname,
authority_is_present,
op,
)?;
}
}
Ok(())
}
/// Emits a non-dot segment and update the current state.
//
// `first_segment_has_leading_slash` can be any value if the segment is not the first one.
fn emit_segment<S: Spec, W: fmt::Write>(
f: &mut W,
only_a_slash_is_written: &mut Option<bool>,
first_segment_has_leading_slash: bool,
segname: &str,
authority_is_present: bool,
op: NormalizationOp,
) -> fmt::Result {
// Omit the leading slash of the segment only if the segment is
// the first one and marked as not having a leading slash.
match *only_a_slash_is_written {
None => {
// First segment.
// This pass can be possible if `./` is repeated `QUEUE_SIZE`
// times at the beginning.
if first_segment_has_leading_slash {
f.write_char('/')?;
}
*only_a_slash_is_written =
Some(first_segment_has_leading_slash && segname.is_empty());
}
Some(only_a_slash) => {
if only_a_slash && !authority_is_present {
// Apply serialization like WHATWG URL Standard.
// This prevents `<scheme=foo>:<path=//bar>` from written as
// `foo://bar`, which is interpreted as
// `<scheme=foo>://<authority=bar>`. Prepending `./`, the
// serialization result would be `foo:/.//bar`, which is safe.
f.write_str("./")?;
*only_a_slash_is_written = Some(false);
}
f.write_char('/')?;
}
}
// Write the segment name.
if op.mode.case_pct_normalization() {
write!(f, "{}", PctCaseNormalized::<S>::new(segname))
} else {
f.write_str(segname)
}
}
/// Checks if the path is normalizable by RFC 3986 algorithm when the authority is absent.
///
/// Returns `Ok(())` when normalizable, returns `Err(_)` if not.
pub(crate) fn ensure_rfc3986_normalizable_with_authority_absent(&self) -> Result<(), Error> {
/// A sink to get the prefix of the input.
#[derive(Default)]
struct PrefixRetriever {
/// The buffer to remember the prefix of the input.
buf: [u8; 3],
/// The next write position in the buffer.
cursor: usize,
}
impl PrefixRetriever {
/// Returns the read prefix data.
#[inline]
#[must_use]
fn as_bytes(&self) -> &[u8] {
&self.buf[..self.cursor]
}
}
impl fmt::Write for PrefixRetriever {
fn write_str(&mut self, s: &str) -> fmt::Result {
if !s.is_empty() && (self.cursor >= self.buf.len()) {
// Enough bytes are read.
return Err(fmt::Error);
}
self.buf[self.cursor..]
.iter_mut()
.zip(s.bytes())
.for_each(|(dest, src)| *dest = src);
self.cursor = self.cursor.saturating_add(s.len()).min(self.buf.len());
Ok(())
}
}
let mut prefix = PrefixRetriever::default();
// The failure of this write indicates more than 3 characters are read.
// This is safe to ignore since the check needs only 3 characters.
let _ = self.fmt_write_normalize::<UriSpec, _>(
&mut prefix,
NormalizationOp {
mode: NormalizationMode::None,
},
// Assume the authority is absent.
false,
);
if prefix.as_bytes() == b"/./" {
Err(Error::new())
} else {
Ok(())
}
}
}
/// Characteristic of a path.
#[derive(Debug, Clone, Copy)]
pub(crate) enum PathCharacteristic {
/// Absolute path, not special.
CommonAbsolute,
/// Absolute path, not special.
CommonRelative,
/// The first path segment of the relative path has one or more colon characters.
RelativeFirstSegmentHasColon,
/// The path starts with the double slash.
StartsWithDoubleSlash,
}
impl PathCharacteristic {
/// Returns true if the path is absolute.
#[inline]
#[must_use]
pub(crate) fn is_absolute(self) -> bool {
matches!(self, Self::CommonAbsolute | Self::StartsWithDoubleSlash)
}
/// Returns the characteristic of the path.
pub(crate) fn from_path_to_display<S: Spec>(
path: &PathToNormalize<'_>,
op: NormalizationOp,
authority_is_present: bool,
) -> Self {
/// Dummy writer to get necessary values.
#[derive(Default, Clone, Copy)]
struct Writer {
/// Result.
result: Option<PathCharacteristic>,
/// Whether the normalized path is absolute.
is_absolute: Option<bool>,
}
impl fmt::Write for Writer {
fn write_str(&mut self, mut s: &str) -> fmt::Result {
if self.result.is_some() {
// Nothing more to do.
return Err(fmt::Error);
}
while !s.is_empty() {
if self.is_absolute.is_none() {
// The first input.
match s.strip_prefix('/') {
Some(rest) => {
self.is_absolute = Some(true);
s = rest;
}
None => {
self.is_absolute = Some(false);
}
}
continue;
}
if self.is_absolute == Some(true) {
let result = if s.starts_with('/') {
PathCharacteristic::StartsWithDoubleSlash
} else {
PathCharacteristic::CommonAbsolute
};
self.result = Some(result);
return Err(fmt::Error);
}
// Processing the first segment of the relative path.
match find_split_hole(s, b'/') {
Some((first_seg, _rest)) => {
let result = if first_seg.contains(':') {
PathCharacteristic::RelativeFirstSegmentHasColon
} else {
PathCharacteristic::CommonRelative
};
self.result = Some(result);
return Err(fmt::Error);
}
None => {
// `s` might not be the complete first segment.
if s.contains(':') {
self.result =
Some(PathCharacteristic::RelativeFirstSegmentHasColon);
return Err(fmt::Error);
}
break;
}
}
}
Ok(())
}
}
let mut writer = Writer::default();
match path.fmt_write_normalize::<S, _>(&mut writer, op, authority_is_present) {
// Empty path.
Ok(_) => PathCharacteristic::CommonRelative,
Err(_) => writer
.result
.expect("[consistency] the formatting quits early by `Err` when the check is done"),
}
}
}
/// Path segment kind.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SegmentKind {
/// `.` or the equivalents.
Dot,
/// `..` or the equivalents.
DotDot,
/// Other normal (not special) segments.
Normal,
}
impl SegmentKind {
/// Creates a new `SegmentKind` from the given segment name.
#[must_use]
fn from_segment(s: &str) -> Self {
match s {
"." | "%2E" | "%2e" => SegmentKind::Dot,
".." | ".%2E" | ".%2e" | "%2E." | "%2E%2E" | "%2E%2e" | "%2e." | "%2e%2E"
| "%2e%2e" => SegmentKind::DotDot,
_ => SegmentKind::Normal,
}
}
}
/// A segment with optional leading slash.
#[derive(Debug, Clone)]
struct PathSegment {
/// Presence of a leading slash.
has_leading_slash: bool,
/// Range of the segment name (without any slashes).
range: Range<usize>,
}
impl PathSegment {
/// Returns the segment without any slashes.
#[inline]
#[must_use]
fn segment<'a>(&self, path: &PathToNormalize<'a>) -> &'a str {
if let Some(prefix) = path.0 {
let prefix_len = prefix.len();
if self.range.end <= prefix_len {
&prefix[self.range.clone()]
} else {
let range = (self.range.start - prefix_len)..(self.range.end - prefix_len);
&path.1[range]
}
} else {
&path.1[self.range.clone()]
}
}
/// Returns the segment kind.
#[inline]
#[must_use]
fn kind(&self, path: &PathToNormalize<'_>) -> SegmentKind {
SegmentKind::from_segment(self.segment(path))
}
}
/// Iterator of path segments.
struct PathSegmentsIter<'a> {
/// Path.
path: &'a PathToNormalize<'a>,
/// Current cursor position.
cursor: usize,
}
impl<'a> PathSegmentsIter<'a> {
/// Creates a new iterator of path segments.
#[inline]
#[must_use]
fn new(path: &'a PathToNormalize<'a>) -> Self {
Self { path, cursor: 0 }
}
}
impl Iterator for PathSegmentsIter<'_> {
type Item = PathSegment;
fn next(&mut self) -> Option<Self::Item> {
let path_len = self.path.len();
if self.cursor >= path_len {
return None;
}
let has_leading_slash = self.path.byte_at(self.cursor) == Some(b'/');
let prefix_len = self.path.len_prefix();
if (prefix_len != 0) && (self.cursor == prefix_len - 1) {
debug_assert!(has_leading_slash);
let end = self.path.1.find('/').unwrap_or(self.path.1.len()) + prefix_len;
self.cursor = end;
return Some(PathSegment {
has_leading_slash,
range: prefix_len..end,
});
}
if has_leading_slash {
// Skip the leading slash.
self.cursor += 1;
};
let start = self.cursor;
self.cursor = self.path.find_next_slash(self.cursor).unwrap_or(path_len);
Some(PathSegment {
has_leading_slash,
range: start..self.cursor,
})
}
}

View File

@@ -0,0 +1,360 @@
//! Percent-encoding normalization and case normalization.
use core::fmt::{self, Write as _};
use core::marker::PhantomData;
use crate::format::eq_str_display;
use crate::parser::char::{is_ascii_unreserved, is_unreserved, is_utf8_byte_continue};
use crate::parser::str::{find_split_hole, take_first_char};
use crate::parser::trusted::take_xdigits2;
use crate::spec::Spec;
/// Returns true if the given string is percent-encoding normalized and case
/// normalized.
///
/// Note that normalization of ASCII-only host requires additional case
/// normalization, so checking by this function is not sufficient for that case.
pub(crate) fn is_pct_case_normalized<S: Spec>(s: &str) -> bool {
eq_str_display(s, &PctCaseNormalized::<S>::new(s))
}
/// Returns a character for the slice.
///
/// Essentially equivalent to `core::str::from_utf8(bytes).unwrap().and_then(|s| s.get(0))`,
/// but this function fully trusts that the input is a valid UTF-8 string with
/// only one character.
fn into_char_trusted(bytes: &[u8]) -> Result<char, ()> {
/// The bit mask to get the content part in a continue byte.
const CONTINUE_BYTE_MASK: u8 = 0b_0011_1111;
/// Minimum valid values for a code point in a UTF-8 sequence of 2, 3, and 4 bytes.
const MIN: [u32; 3] = [0x80, 0x800, 0x1_0000];
let len = bytes.len();
let c: u32 = match len {
2 => (u32::from(bytes[0] & 0b_0001_1111) << 6) | u32::from(bytes[1] & CONTINUE_BYTE_MASK),
3 => {
(u32::from(bytes[0] & 0b_0000_1111) << 12)
| (u32::from(bytes[1] & CONTINUE_BYTE_MASK) << 6)
| u32::from(bytes[2] & CONTINUE_BYTE_MASK)
}
4 => {
(u32::from(bytes[0] & 0b_0000_0111) << 18)
| (u32::from(bytes[1] & CONTINUE_BYTE_MASK) << 12)
| (u32::from(bytes[2] & CONTINUE_BYTE_MASK) << 6)
| u32::from(bytes[3] & CONTINUE_BYTE_MASK)
}
len => unreachable!(
"[consistency] expected 2, 3, or 4 bytes for a character, but got {len} as the length"
),
};
if c < MIN[len - 2] {
// Redundant UTF-8 encoding.
return Err(());
}
// Can be an invalid Unicode code point.
char::from_u32(c).ok_or(())
}
/// Writable as a normalized path segment percent-encoding IRI.
///
/// This wrapper does the things below when being formatted:
///
/// * Decode unnecessarily percent-encoded characters.
/// * Convert alphabetic characters uppercase in percent-encoded triplets.
///
/// Note that this does not newly encode raw characters.
///
/// # Safety
///
/// The given string should be the valid path segment.
#[derive(Debug, Clone, Copy)]
pub(crate) struct PctCaseNormalized<'a, S> {
/// Valid segment name to normalize.
segname: &'a str,
/// Spec.
_spec: PhantomData<fn() -> S>,
}
impl<'a, S: Spec> PctCaseNormalized<'a, S> {
/// Creates a new `PctCaseNormalized` value.
#[inline]
#[must_use]
pub(crate) fn new(source: &'a str) -> Self {
Self {
segname: source,
_spec: PhantomData,
}
}
}
impl<S: Spec> fmt::Display for PctCaseNormalized<'_, S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut rest = self.segname;
'outer_loop: while !rest.is_empty() {
// Scan the next percent-encoded triplet.
let (prefix, after_percent) = match find_split_hole(rest, b'%') {
Some(v) => v,
None => return f.write_str(rest),
};
// Write the string before the percent-encoded triplet.
f.write_str(prefix)?;
// Decode the percent-encoded triplet.
let (first_decoded, after_first_triplet) = take_xdigits2(after_percent);
rest = after_first_triplet;
let expected_char_len = match first_decoded {
0x00..=0x7F => {
// An ASCII character.
debug_assert!(first_decoded.is_ascii());
if is_ascii_unreserved(first_decoded) {
// Unreserved. Print the decoded.
f.write_char(char::from(first_decoded))?;
} else {
write!(f, "%{:02X}", first_decoded)?;
}
continue 'outer_loop;
}
0xC2..=0xDF => 2,
0xE0..=0xEF => 3,
0xF0..=0xF4 => 4,
0x80..=0xC1 | 0xF5..=0xFF => {
// Cannot appear as a first byte.
//
// * 0x80..=0xBF: continue byte.
// * 0xC0..=0xC1: redundant encoding.
// * 0xF5..=0xFF: above the maximum value for U+10FFFF.
write!(f, "%{:02X}", first_decoded)?;
continue 'outer_loop;
}
};
// Get continue bytes.
let c_buf = &mut [first_decoded, 0, 0, 0][..expected_char_len];
for (i, buf_dest) in c_buf[1..].iter_mut().enumerate() {
match take_first_char(rest) {
Some(('%', after_percent)) => {
let (byte, after_triplet) = take_xdigits2(after_percent);
if !is_utf8_byte_continue(byte) {
// Note that `byte` can start the new string.
// Leave the byte in the `rest` for next try (i.e.
// don't update `rest` in this case).
c_buf[..=i]
.iter()
.try_for_each(|b| write!(f, "%{:02X}", b))?;
continue 'outer_loop;
}
*buf_dest = byte;
rest = after_triplet;
}
// If the next character is not `%`, decoded bytes so far
// won't be valid UTF-8 byte sequence.
// Write the read percent-encoded triplets without decoding.
// Note that all characters in `&c_buf[1..]` (if available)
// will be decoded to "continue byte" of UTF-8, so they
// cannot be the start of a valid UTF-8 byte sequence if
// decoded.
Some((c, after_percent)) => {
c_buf[..=i]
.iter()
.try_for_each(|b| write!(f, "%{:02X}", b))?;
f.write_char(c)?;
rest = after_percent;
continue 'outer_loop;
}
None => {
c_buf[..=i]
.iter()
.try_for_each(|b| write!(f, "%{:02X}", b))?;
// Reached the end of the string.
break 'outer_loop;
}
}
}
// Decode the bytes into a character.
match into_char_trusted(&c_buf[..expected_char_len]) {
Ok(decoded_c) => {
if is_unreserved::<S>(decoded_c) {
// Unreserved. Print the decoded.
f.write_char(decoded_c)?;
} else {
c_buf[0..expected_char_len]
.iter()
.try_for_each(|b| write!(f, "%{:02X}", b))?;
}
}
Err(_) => {
// Skip decoding of the entire sequence of pct-encoded triplets loaded
// in `c_buf`. This is valid from the reasons below.
//
// * The first byte in `c_buf` is valid as the first byte, and it tells the
// expected number of bytes for a code unit. The cases the bytes being too
// short and the sequence being incomplete have already been handled, and
// the execution does not reach here then.
// * All of the non-first bytes are checked if they are valid as UTF8 continue
// bytes by `is_utf8_byte_continue()`. If they're not, the decoding of
// that codepoint is aborted and the bytes in the buffer are immediately
// emitted as pct-encoded, and the execution does not reach here. This
// means that the bytes in the current `c_buf` have passed these tests.
// * Since all of the the non-first bytes are UTF8 continue bytes, any of
// them cannot start the new valid UTF-8 byte sequence. This means that
// if the bytes in the buffer does not consitute a valid UTF-8 bytes
// sequence, the whole buffer can immediately be emmitted as pct-encoded.
debug_assert!(
c_buf[1..expected_char_len]
.iter()
.copied()
.all(is_utf8_byte_continue),
"[consistency] all non-first bytes have been \
confirmed that they are UTF-8 continue bytes"
);
// Note that the first pct-encoded triplet is stripped from
// `after_first_triplet`.
rest = &after_first_triplet[((expected_char_len - 1) * 3)..];
c_buf[0..expected_char_len]
.iter()
.try_for_each(|b| write!(f, "%{:02X}", b))?;
}
}
}
Ok(())
}
}
/// Writable as a normalized ASCII-only `host` (and optionally `port` followed).
#[derive(Debug, Clone, Copy)]
pub(crate) struct NormalizedAsciiOnlyHost<'a> {
/// Valid host (and additionaly port) to normalize.
host_port: &'a str,
}
impl<'a> NormalizedAsciiOnlyHost<'a> {
/// Creates a new `NormalizedAsciiOnlyHost` value.
///
/// # Preconditions
///
/// The given string should be the valid ASCII-only `host` or
/// `host ":" port` after percent-encoding normalization.
/// In other words, [`parser::trusted::is_ascii_only_host`] should return
/// true for the given value.
///
/// [`parser::trusted::is_ascii_only_host`]: `crate::parser::trusted::is_ascii_only_host`
#[inline]
#[must_use]
pub(crate) fn new(host_port: &'a str) -> Self {
Self { host_port }
}
}
impl fmt::Display for NormalizedAsciiOnlyHost<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut rest = self.host_port;
while !rest.is_empty() {
// Scan the next percent-encoded triplet.
let (prefix, after_percent) = match find_split_hole(rest, b'%') {
Some(v) => v,
None => {
return rest
.chars()
.try_for_each(|c| f.write_char(c.to_ascii_lowercase()));
}
};
// Write the string before the percent-encoded triplet.
prefix
.chars()
.try_for_each(|c| f.write_char(c.to_ascii_lowercase()))?;
// Decode the percent-encoded triplet.
let (first_decoded, after_triplet) = take_xdigits2(after_percent);
rest = after_triplet;
assert!(
first_decoded.is_ascii(),
"[consistency] this function requires ASCII-only host as an argument"
);
if is_ascii_unreserved(first_decoded) {
// Unreserved. Convert to lowercase and print.
f.write_char(char::from(first_decoded.to_ascii_lowercase()))?;
} else {
write!(f, "%{:02X}", first_decoded)?;
}
}
Ok(())
}
}
#[cfg(test)]
#[cfg(feature = "alloc")]
mod tests {
use super::*;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::ToString;
use crate::spec::{IriSpec, UriSpec};
#[test]
fn invalid_utf8() {
assert_eq!(
PctCaseNormalized::<UriSpec>::new("%80%cc%cc%cc").to_string(),
"%80%CC%CC%CC"
);
assert_eq!(
PctCaseNormalized::<IriSpec>::new("%80%cc%cc%cc").to_string(),
"%80%CC%CC%CC"
);
}
#[test]
fn iri_unreserved() {
assert_eq!(
PctCaseNormalized::<UriSpec>::new("%ce%b1").to_string(),
"%CE%B1"
);
assert_eq!(
PctCaseNormalized::<IriSpec>::new("%ce%b1").to_string(),
"\u{03B1}"
);
}
#[test]
fn iri_middle_decode() {
assert_eq!(
PctCaseNormalized::<UriSpec>::new("%ce%ce%b1%b1").to_string(),
"%CE%CE%B1%B1"
);
assert_eq!(
PctCaseNormalized::<IriSpec>::new("%ce%ce%b1%b1").to_string(),
"%CE\u{03B1}%B1"
);
}
#[test]
fn ascii_reserved() {
assert_eq!(PctCaseNormalized::<UriSpec>::new("%3f").to_string(), "%3F");
assert_eq!(PctCaseNormalized::<IriSpec>::new("%3f").to_string(), "%3F");
}
#[test]
fn ascii_forbidden() {
assert_eq!(
PctCaseNormalized::<UriSpec>::new("%3c%3e").to_string(),
"%3C%3E"
);
assert_eq!(
PctCaseNormalized::<IriSpec>::new("%3c%3e").to_string(),
"%3C%3E"
);
}
#[test]
fn ascii_unreserved() {
assert_eq!(PctCaseNormalized::<UriSpec>::new("%7ea").to_string(), "~a");
assert_eq!(PctCaseNormalized::<IriSpec>::new("%7ea").to_string(), "~a");
}
}

6
vendor/iri-string/src/parser.rs vendored Normal file
View File

@@ -0,0 +1,6 @@
//! Common stuff for parsing.
pub(crate) mod char;
pub(crate) mod str;
pub(crate) mod trusted;
pub(crate) mod validate;

323
vendor/iri-string/src/parser/char.rs vendored Normal file
View File

@@ -0,0 +1,323 @@
//! Characters.
use crate::spec::Spec;
/// A mask to test whether the character is continue character of `scheme`.
// `ALPHA / DIGIT / "+" / "-" / "."`
const MASK_SCHEME_CONTINUE: u8 = 1 << 0;
/// A mask to test whether the character matches `unreserved`.
// `unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"`
const MASK_UNRESERVED: u8 = 1 << 1;
/// A mask to test whether the character matches `gen-delims`.
// `gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"`
const MASK_GEN_DELIMS: u8 = 1 << 2;
/// A mask to test whether the character matches `sub-delims`.
// `sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="`
const MASK_SUB_DELIMS: u8 = 1 << 3;
/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes).
// `pchar = unreserved / pct-encoded / sub-delims / ":" / "@"`
const MASK_PCHAR: u8 = 1 << 4;
/// A mask to test whether the character can appear in `query` and `fragment`.
// `query = *( pchar / "/" / "?" )`
// `fragment = *( pchar / "/" / "?" )`
const MASK_FRAG_QUERY: u8 = 1 << 5;
/// A mask to test whether the character can appear in `userinfo` and address of `IPvFuture`.
// `userinfo = *( unreserved / pct-encoded / sub-delims / ":" )`
const MASK_USERINFO_IPVFUTUREADDR: u8 = 1 << 6;
/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes) or slash.
const MASK_PCHAR_SLASH: u8 = 1 << 7;
/// ASCII characters' properties.
const TABLE: [u8; 128] = [
0b_0000_0000, // NUL
0b_0000_0000, // SOH
0b_0000_0000, // STX
0b_0000_0000, // ETX
0b_0000_0000, // EOT
0b_0000_0000, // ENQ
0b_0000_0000, // ACK
0b_0000_0000, // BEL
0b_0000_0000, // BS
0b_0000_0000, // HT
0b_0000_0000, // LF
0b_0000_0000, // VT
0b_0000_0000, // FF
0b_0000_0000, // CR
0b_0000_0000, // SO
0b_0000_0000, // SI
0b_0000_0000, // DLE
0b_0000_0000, // DC1
0b_0000_0000, // DC2
0b_0000_0000, // DC3
0b_0000_0000, // DC4
0b_0000_0000, // NAK
0b_0000_0000, // SYN
0b_0000_0000, // ETB
0b_0000_0000, // CAN
0b_0000_0000, // EM
0b_0000_0000, // SUB
0b_0000_0000, // ESC
0b_0000_0000, // FS
0b_0000_0000, // GS
0b_0000_0000, // RS
0b_0000_0000, // US
0b_0000_0000, // SPACE
0b_1111_1000, // !
0b_0000_0000, // "
0b_0000_0100, // #
0b_1111_1000, // $
0b_0000_0000, // %
0b_1111_1000, // &
0b_1111_1000, // '
0b_1111_1000, // (
0b_1111_1000, // )
0b_1111_1000, // *
0b_1111_1001, // +
0b_1111_1000, // ,
0b_1111_0011, // -
0b_1111_0011, // .
0b_1010_0100, // /
0b_1111_0011, // 0
0b_1111_0011, // 1
0b_1111_0011, // 2
0b_1111_0011, // 3
0b_1111_0011, // 4
0b_1111_0011, // 5
0b_1111_0011, // 6
0b_1111_0011, // 7
0b_1111_0011, // 8
0b_1111_0011, // 9
0b_1111_0100, // :
0b_1111_1000, // ;
0b_0000_0000, // <
0b_1111_1000, // =
0b_0000_0000, // >
0b_0010_0100, // ?
0b_1011_0100, // @
0b_1111_0011, // A
0b_1111_0011, // B
0b_1111_0011, // C
0b_1111_0011, // D
0b_1111_0011, // E
0b_1111_0011, // F
0b_1111_0011, // G
0b_1111_0011, // H
0b_1111_0011, // I
0b_1111_0011, // J
0b_1111_0011, // K
0b_1111_0011, // L
0b_1111_0011, // M
0b_1111_0011, // N
0b_1111_0011, // O
0b_1111_0011, // P
0b_1111_0011, // Q
0b_1111_0011, // R
0b_1111_0011, // S
0b_1111_0011, // T
0b_1111_0011, // U
0b_1111_0011, // V
0b_1111_0011, // W
0b_1111_0011, // X
0b_1111_0011, // Y
0b_1111_0011, // Z
0b_0000_0100, // [
0b_0000_0000, // \
0b_0000_0100, // ]
0b_0000_0000, // ^
0b_1111_0010, // _
0b_0000_0000, // `
0b_1111_0011, // a
0b_1111_0011, // b
0b_1111_0011, // c
0b_1111_0011, // d
0b_1111_0011, // e
0b_1111_0011, // f
0b_1111_0011, // g
0b_1111_0011, // h
0b_1111_0011, // i
0b_1111_0011, // j
0b_1111_0011, // k
0b_1111_0011, // l
0b_1111_0011, // m
0b_1111_0011, // n
0b_1111_0011, // o
0b_1111_0011, // p
0b_1111_0011, // q
0b_1111_0011, // r
0b_1111_0011, // s
0b_1111_0011, // t
0b_1111_0011, // u
0b_1111_0011, // v
0b_1111_0011, // w
0b_1111_0011, // x
0b_1111_0011, // y
0b_1111_0011, // z
0b_0000_0000, // {
0b_0000_0000, // |
0b_0000_0000, // }
0b_1111_0010, // ~
0b_0000_0000, // DEL
];
/// Returns `true` if the given ASCII character is allowed as continue character of `scheme` part.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_scheme_continue(c: u8) -> bool {
(TABLE[c as usize] & MASK_SCHEME_CONTINUE) != 0
}
/// Returns `true` if the given ASCII character matches `unreserved`.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_unreserved(c: u8) -> bool {
(TABLE[c as usize] & MASK_UNRESERVED) != 0
}
/// Returns true if the character is unreserved.
#[inline]
#[must_use]
pub(crate) fn is_unreserved<S: Spec>(c: char) -> bool {
if c.is_ascii() {
is_ascii_unreserved(c as u8)
} else {
S::is_nonascii_char_unreserved(c)
}
}
///// Returns `true` if the given ASCII character matches `gen-delims`.
//#[inline]
//#[must_use]
//pub(crate) const fn is_ascii_gen_delims(c: u8) -> bool {
// (TABLE[c as usize] & MASK_GEN_DELIMS) != 0
//}
///// Returns `true` if the given ASCII character matches `sub-delims`.
//#[inline]
//#[must_use]
//pub(crate) const fn is_ascii_sub_delims(c: u8) -> bool {
// (TABLE[c as usize] & MASK_SUB_DELIMS) != 0
//}
///// Returns `true` if the given ASCII character matches `reserved`.
//#[inline]
//#[must_use]
//pub(crate) const fn is_ascii_reserved(c: u8) -> bool {
// (TABLE[c as usize] & (MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0
//}
/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded`.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_pchar(c: u8) -> bool {
(TABLE[c as usize] & MASK_PCHAR) != 0
}
/// Returns `true` if the given ASCII character is allowed to appear in `query` and `fragment`.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_frag_query(c: u8) -> bool {
(TABLE[c as usize] & MASK_FRAG_QUERY) != 0
}
/// Returns `true` if the given non-ASCII character is allowed to appear in `iquery`.
#[inline]
#[must_use]
pub(crate) fn is_nonascii_query<S: Spec>(c: char) -> bool {
S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c)
}
/// Returns `true` if the given non-ASCII character is allowed to appear in `ifragment`.
#[inline]
#[must_use]
pub(crate) fn is_nonascii_fragment<S: Spec>(c: char) -> bool {
S::is_nonascii_char_unreserved(c)
}
/// Returns `true` if the given ASCII character is allowed to appear in `userinfo` and `IPvFuture`.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_userinfo_ipvfutureaddr(c: u8) -> bool {
(TABLE[c as usize] & MASK_USERINFO_IPVFUTUREADDR) != 0
}
/// Returns `true` if the given non-ASCII character is allowed to appear in `iuserinfo`.
#[inline]
#[must_use]
pub(crate) fn is_nonascii_userinfo<S: Spec>(c: char) -> bool {
S::is_nonascii_char_unreserved(c)
}
/// Returns `true` if the given ASCII character is allowed to appear in `reg-name`
#[inline]
#[must_use]
pub(crate) const fn is_ascii_regname(c: u8) -> bool {
(TABLE[c as usize] & (MASK_UNRESERVED | MASK_SUB_DELIMS)) != 0
}
/// Returns `true` if the given non-ASCII character is allowed to appear in `ireg-name`.
#[inline]
#[must_use]
pub(crate) fn is_nonascii_regname<S: Spec>(c: char) -> bool {
S::is_nonascii_char_unreserved(c)
}
/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded` or a slash.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_pchar_slash(c: u8) -> bool {
(TABLE[c as usize] & MASK_PCHAR_SLASH) != 0
}
/// Checks if the given character matches `ucschar` rule.
#[must_use]
pub(crate) fn is_ucschar(c: char) -> bool {
matches!(
u32::from(c),
0xA0..=0xD7FF |
0xF900..=0xFDCF |
0xFDF0..=0xFFEF |
0x1_0000..=0x1_FFFD |
0x2_0000..=0x2_FFFD |
0x3_0000..=0x3_FFFD |
0x4_0000..=0x4_FFFD |
0x5_0000..=0x5_FFFD |
0x6_0000..=0x6_FFFD |
0x7_0000..=0x7_FFFD |
0x8_0000..=0x8_FFFD |
0x9_0000..=0x9_FFFD |
0xA_0000..=0xA_FFFD |
0xB_0000..=0xB_FFFD |
0xC_0000..=0xC_FFFD |
0xD_0000..=0xD_FFFD |
0xE_1000..=0xE_FFFD
)
}
/// Returns true if the given value is a continue byte of UTF-8.
#[inline(always)]
#[must_use]
pub(crate) fn is_utf8_byte_continue(byte: u8) -> bool {
// `0x80..=0xbf` (i.e. `0b_1000_0000..=0b_1011_1111`) is not the first byte,
// and `0xc0..=0xc1` (i.e. `0b_1100_0000..=0b_1100_0001` shouldn't appear
// anywhere in UTF-8 byte sequence.
// `0x80 as i8` is -128, and `0xc0 as i8` is -96.
//
// The first byte of the UTF-8 character is not `0b10xx_xxxx`, and
// the continue bytes is `0b10xx_xxxx`.
// `0b1011_1111 as i8` is -65, and `0b1000_0000 as i8` is -128.
(byte as i8) < -64
}
/// Returns true if the given ASCII character is `unreserved` or `reserved`.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_unreserved_or_reserved(c: u8) -> bool {
(TABLE[c as usize] & (MASK_UNRESERVED | MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0
}

390
vendor/iri-string/src/parser/str.rs vendored Normal file
View File

@@ -0,0 +1,390 @@
//! Functions for common string operations.
pub(crate) use self::maybe_pct_encoded::{
process_percent_encoded_best_effort, PctEncodedFragments,
};
mod maybe_pct_encoded;
/// Returns the inner string if wrapped.
#[must_use]
pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> {
let (prefix, suffix) = match s.as_bytes() {
[prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix),
_ => return None,
};
if (prefix == open) && (suffix == close) {
Some(&s[1..(s.len() - 1)])
} else {
None
}
}
/// Returns the byte that appears first.
#[cfg(not(feature = "memchr"))]
#[inline]
#[must_use]
pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
haystack
.iter()
.copied()
.find(|&b| b == needle1 || b == needle2)
}
/// Returns the byte that appears first.
#[cfg(feature = "memchr")]
#[inline]
#[must_use]
pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos])
}
/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
#[cfg(not(feature = "memchr"))]
#[inline]
#[must_use]
pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
haystack.iter().rposition(|&b| b == needle)
}
/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
#[cfg(feature = "memchr")]
#[inline]
#[must_use]
pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
memchr::memrchr(needle, haystack)
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If `needle` is not found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
haystack
.bytes()
.position(|b| b == needle)
.map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If `needle` is not found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
}
/// Finds the last needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
haystack
.bytes()
.rposition(|b| b == needle1 || b == needle2)
.map(|pos| haystack.split_at(pos))
}
/// Finds the last needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
haystack
.bytes()
.position(|b| b == needle1 || b == needle2)
.map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split3(
haystack: &str,
needle1: u8,
needle2: u8,
needle3: u8,
) -> Option<(&str, &str)> {
haystack
.bytes()
.position(|b| b == needle1 || b == needle2 || b == needle3)
.map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split3(
haystack: &str,
needle1: u8,
needle2: u8,
needle3: u8,
) -> Option<(&str, &str)> {
memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes())
.map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and after it.
///
/// If `needle` is not found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
haystack
.bytes()
.position(|b| b == needle)
.map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
}
/// Finds the first needle, and returns the string before it and after it.
///
/// If `needle` is not found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
memchr::memchr(needle, haystack.as_bytes())
.map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
}
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
///
/// If no needles are found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split2_hole(
haystack: &str,
needle1: u8,
needle2: u8,
) -> Option<(&str, u8, &str)> {
haystack
.bytes()
.position(|b| b == needle1 || b == needle2)
.map(|pos| {
(
&haystack[..pos],
haystack.as_bytes()[pos],
&haystack[(pos + 1)..],
)
})
}
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
///
/// If no needles are found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split2_hole(
haystack: &str,
needle1: u8,
needle2: u8,
) -> Option<(&str, u8, &str)> {
memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| {
(
&haystack[..pos],
haystack.as_bytes()[pos],
&haystack[(pos + 1)..],
)
})
}
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
///
/// If no needles are found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split4_hole(
haystack: &str,
needle1: u8,
needle2: u8,
needle3: u8,
needle4: u8,
) -> Option<(&str, u8, &str)> {
haystack
.bytes()
.position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4)
.map(|pos| {
(
&haystack[..pos],
haystack.as_bytes()[pos],
&haystack[(pos + 1)..],
)
})
}
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
///
/// If no needles are found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split4_hole(
haystack: &str,
needle1: u8,
needle2: u8,
needle3: u8,
needle4: u8,
) -> Option<(&str, u8, &str)> {
let bytes = haystack.as_bytes();
let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) {
Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)),
None => memchr::memchr(needle4, bytes),
};
pos.map(|pos| {
(
&haystack[..pos],
haystack.as_bytes()[pos],
&haystack[(pos + 1)..],
)
})
}
/// Finds the last needle, and returns the string before it and after it.
///
/// If `needle` is not found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
haystack
.bytes()
.rposition(|b| b == needle)
.map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
}
/// Finds the last needle, and returns the string before it and after it.
///
/// If `needle` is not found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
memchr::memrchr(needle, haystack.as_bytes())
.map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
}
/// Returns `true` if the string only contains the allowed characters.
#[must_use]
fn satisfy_chars<F, G>(mut s: &str, pred_ascii: F, pred_nonascii: G) -> bool
where
F: Copy + Fn(u8) -> bool,
G: Copy + Fn(char) -> bool,
{
while !s.is_empty() {
match s.bytes().position(|b| !b.is_ascii()) {
Some(nonascii_pos) => {
// Valdiate ASCII prefix.
if nonascii_pos != 0 {
let (prefix, rest) = s.split_at(nonascii_pos);
if !prefix.bytes().all(pred_ascii) {
return false;
}
s = rest;
}
// Extract non-ASCII part and validate it.
let (prefix, rest) = match s.bytes().position(|b| b.is_ascii()) {
Some(ascii_pos) => s.split_at(ascii_pos),
None => (s, ""),
};
if !prefix.chars().all(pred_nonascii) {
return false;
}
s = rest;
}
None => {
// All chars are ASCII.
return s.bytes().all(pred_ascii);
}
}
}
true
}
/// Returns `true` if the string only contains the allowed characters and percent-encoded char.
#[must_use]
pub(crate) fn satisfy_chars_with_pct_encoded<F, G>(
mut s: &str,
pred_ascii: F,
pred_nonascii: G,
) -> bool
where
F: Copy + Fn(u8) -> bool,
G: Copy + Fn(char) -> bool,
{
while let Some((prefix, suffix)) = find_split_hole(s, b'%') {
// Verify strings before the percent-encoded char.
if !prefix.is_empty() && !satisfy_chars(prefix, pred_ascii, pred_nonascii) {
return false;
}
// Verify the percent-encoded char.
if !starts_with_double_hexdigits(suffix.as_bytes()) {
return false;
}
// Advance the cursor.
s = &suffix[2..];
}
// Verify the rest.
satisfy_chars(s, pred_ascii, pred_nonascii)
}
/// Returns `true` if the given string starts with two hexadecimal digits.
#[must_use]
pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool {
match s {
[x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(),
_ => false,
}
}
/// Strips the first character if it is the given ASCII character, and returns the rest.
///
/// # Precondition
///
/// The given ASCII character (`prefix`) should be an ASCII character.
#[must_use]
pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> {
debug_assert!(prefix.is_ascii());
if s.as_bytes().first().copied() == Some(prefix) {
Some(&s[1..])
} else {
None
}
}
/// Splits the given string into the first character and the rest.
///
/// Returns `(first_char, rest_str)`.
#[must_use]
pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> {
let mut chars = s.chars();
let c = chars.next()?;
let rest = chars.as_str();
Some((c, rest))
}

View File

@@ -0,0 +1,369 @@
//! Processor for possibly- or invalidly-percent-encoded strings.
use core::fmt::{self, Write as _};
use core::marker::PhantomData;
use core::num::NonZeroU8;
use core::ops::ControlFlow;
use crate::parser::str::find_split;
use crate::parser::trusted::hexdigits_to_byte;
/// Fragment in a possibly percent-encoded (and possibly broken) string.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum PctEncodedFragments<'a> {
/// String fragment without percent-encoded triplets.
NoPctStr(&'a str),
/// Stray `%` (percent) character.
StrayPercent,
/// Valid percent-encoded triplets for a character.
Char(&'a str, char),
/// Percent-encoded triplets that does not consists of a valid UTF-8 sequence.
InvalidUtf8PctTriplets(&'a str),
}
/// Processes characters in a string which may contain (possibly invalid) percent-encoded triplets.
pub(crate) fn process_percent_encoded_best_effort<T, F, B>(
v: T,
mut f: F,
) -> Result<ControlFlow<B>, fmt::Error>
where
T: fmt::Display,
F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
{
let mut buf = [0_u8; 12];
let mut writer = DecomposeWriter {
f: &mut f,
decoder: Default::default(),
buf: &mut buf,
result: ControlFlow::Continue(()),
_r: PhantomData,
};
if write!(writer, "{v}").is_err() {
match writer.result {
ControlFlow::Continue(_) => return Err(fmt::Error),
ControlFlow::Break(v) => return Ok(ControlFlow::Break(v)),
}
}
// Flush the internal buffer of the decoder.
if let Some(len) = writer.decoder.flush(&mut buf).map(|v| usize::from(v.get())) {
let len_suffix = len % 3;
let triplets_end = len - len_suffix;
let triplets = core::str::from_utf8(&buf[..triplets_end])
.expect("[validity] percent-encoded triplets consist of ASCII characters");
if let ControlFlow::Break(v) = f(PctEncodedFragments::InvalidUtf8PctTriplets(triplets)) {
return Ok(ControlFlow::Break(v));
}
if len_suffix > 0 {
if let ControlFlow::Break(v) = f(PctEncodedFragments::StrayPercent) {
return Ok(ControlFlow::Break(v));
}
}
if len_suffix > 1 {
let after_percent = core::str::from_utf8(
&buf[(triplets_end + 1)..(triplets_end + len_suffix)],
)
.expect("[consistency] percent-encoded triplets contains only ASCII characters");
if let ControlFlow::Break(v) = f(PctEncodedFragments::NoPctStr(after_percent)) {
return Ok(ControlFlow::Break(v));
}
}
}
Ok(ControlFlow::Continue(()))
}
/// Writer to decompose the input into fragments.
struct DecomposeWriter<'a, F, B> {
/// Output function.
f: &'a mut F,
/// Decoder.
decoder: DecoderBuffer,
/// Buffer.
buf: &'a mut [u8],
/// Result of the last output function call.
result: ControlFlow<B>,
/// Dummy field for the type parameter of the return type of the function `f`.
_r: PhantomData<fn() -> B>,
}
impl<F, B> DecomposeWriter<'_, F, B>
where
F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
{
/// Returns `Ok(_)` if the stored result is `Continue`, and `Err(_)` otherwise.
#[inline(always)]
fn result_continue_or_err(&self) -> fmt::Result {
if self.result.is_break() {
return Err(fmt::Error);
}
Ok(())
}
/// Calls the output functions with the undecodable fragments.
fn output_as_undecodable(&mut self, len_undecodable: u8) -> fmt::Result {
let len_written = usize::from(len_undecodable);
let frag = core::str::from_utf8(&self.buf[..len_written])
.expect("[validity] `DecoderBuffer` writes a valid ASCII string");
let len_incomplete = len_written % 3;
let len_complete = len_written - len_incomplete;
self.result = (self.f)(PctEncodedFragments::InvalidUtf8PctTriplets(
&frag[..len_complete],
));
self.result_continue_or_err()?;
if len_incomplete > 0 {
// At least the first `%` exists.
self.result = (self.f)(PctEncodedFragments::StrayPercent);
if self.result.is_break() {
return Err(fmt::Error);
}
if len_incomplete > 1 {
// A following hexdigit is available.
debug_assert_eq!(
len_incomplete, 2,
"[consistency] the length of incomplete percent-encoded \
triplet must be less than 2 bytes"
);
self.result = (self.f)(PctEncodedFragments::NoPctStr(
&frag[(len_complete + 1)..len_written],
));
self.result_continue_or_err()?;
}
}
Ok(())
}
}
impl<F, B> fmt::Write for DecomposeWriter<'_, F, B>
where
F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
{
fn write_str(&mut self, s: &str) -> fmt::Result {
self.result_continue_or_err()?;
let mut rest = s;
while !rest.is_empty() {
let (len_consumed, result) = self.decoder.push_encoded(self.buf, rest);
if len_consumed == 0 {
// `rest` does not start with the percent-encoded triplets.
// Flush the decoder before attempting to decode more data.
if let Some(len_written) = self.decoder.flush(self.buf).map(NonZeroU8::get) {
self.output_as_undecodable(len_written)?;
rest = &rest[usize::from(len_written)..];
}
// Write plain string prefix (if found).
let (plain_prefix, suffix) = find_split(rest, b'%').unwrap_or((rest, ""));
debug_assert!(
!plain_prefix.is_empty(),
"[consistency] `len_consumed == 0` indicates non-empty \
`rest` not starting with `%`"
);
self.result = (self.f)(PctEncodedFragments::NoPctStr(plain_prefix));
self.result_continue_or_err()?;
rest = suffix;
continue;
}
// Process decoding result.
match result {
PushResult::Decoded(len_written, c) => {
let len_written = usize::from(len_written.get());
let frag = core::str::from_utf8(&self.buf[..len_written])
.expect("[validity] `DecoderBuffer` writes a valid ASCII string");
self.result = (self.f)(PctEncodedFragments::Char(frag, c));
self.result_continue_or_err()?;
}
PushResult::Undecodable(len_written) => {
self.output_as_undecodable(len_written)?;
}
PushResult::NeedMoreBytes => {
// Nothing to write at this time.
}
}
rest = &rest[len_consumed..];
}
Ok(())
}
}
/// A type for result of feeding data to [`DecoderBuffer`].
#[derive(Debug, Clone, Copy)]
enum PushResult {
/// Input is still incomplete, needs more bytes to get the decoding result.
NeedMoreBytes,
/// Bytes decodable to valid UTF-8 sequence.
// `.0`: Length of decodable fragment.
// `.1`: Decoded character.
Decoded(NonZeroU8, char),
/// Valid percent-encoded triplets but not decodable to valid UTF-8 sequence.
// `.0`: Length of undecodable fragment.
Undecodable(u8),
}
/// Buffer to contain (and to decode) incomplete percent-encoded triplets.
#[derive(Default, Debug, Clone, Copy)]
struct DecoderBuffer {
/// Percent-encoded triplets that possibly consists a valid UTF-8 sequence after decoded.
//
// `3 * 4`: 3 ASCII characters for single percent-encoded triplet, and
// 4 triplets at most for single Unicode codepoint in UTF-8.
encoded: [u8; 12],
/// Decoded bytes.
decoded: [u8; 4],
/// Number of bytes available in `buf_encoded` buffer.
///
/// `buf_encoded_len / 3` also indicates the length of data in `decoded`.
len_encoded: u8,
}
impl DecoderBuffer {
/// Writes the data of the given length to the destination, and remove that part from buffer.
fn write_and_pop(&mut self, dest: &mut [u8], remove_len: u8) {
let new_len = self.len_encoded - remove_len;
let remove_len = usize::from(remove_len);
let src_range = remove_len..usize::from(self.len_encoded);
dest[..remove_len].copy_from_slice(&self.encoded[..remove_len]);
if new_len == 0 {
*self = Self::default();
return;
}
self.encoded.copy_within(src_range, 0);
self.decoded
.copy_within((remove_len / 3)..usize::from(self.len_encoded / 3), 0);
self.len_encoded = new_len;
}
/// Pushes a byte of a (possible) percent-encoded tripet to the buffer.
fn push_single_encoded_byte(&mut self, byte: u8) {
debug_assert!(
self.len_encoded < 12,
"[consistency] four percent-encoded triplets are enough for a unicode code point"
);
let pos_enc = usize::from(self.len_encoded);
self.len_encoded += 1;
self.encoded[pos_enc] = byte;
if self.len_encoded % 3 == 0 {
// A new percent-encoded triplet is read. Decode and remember.
let pos_dec = usize::from(self.len_encoded / 3 - 1);
let upper = self.encoded[pos_enc - 1];
let lower = byte;
debug_assert!(
upper.is_ascii_hexdigit() && lower.is_ascii_hexdigit(),
"[consistency] the `encoded` buffer should contain valid percent-encoded triplets"
);
self.decoded[pos_dec] = hexdigits_to_byte([upper, lower]);
}
}
/// Pushes the (possibly) encoded string to the buffer.
///
/// When the push result is not `PctTripletPushResult::NeedMoreBytes`, the
/// caller should call `Self::clear()` before pushing more bytes.
///
/// # Preconditions
///
/// * `buf` should be more than 12 bytes. If not, this method may panic.
#[must_use]
pub(crate) fn push_encoded(&mut self, buf: &mut [u8], s: &str) -> (usize, PushResult) {
debug_assert!(
buf.len() >= 12,
"[internal precondition] destination buffer should be at least 12 bytes"
);
let mut chars = s.chars();
let mut len_triplet_incomplete = self.len_encoded % 3;
for c in &mut chars {
if len_triplet_incomplete == 0 {
// Expect `%`.
if c != '%' {
// Undecodable.
// `-1`: the last byte is peeked but not consumed.
let len_consumed = s.len() - chars.as_str().len() - 1;
let len_result = self.len_encoded;
self.write_and_pop(buf, len_result);
return (len_consumed, PushResult::Undecodable(len_result));
}
self.push_single_encoded_byte(b'%');
len_triplet_incomplete = 1;
continue;
}
// Expect a nibble.
if !c.is_ascii_hexdigit() {
// Undecodable.
// `-1`: the last byte is peeked but not consumed.
let len_consumed = s.len() - chars.as_str().len() - 1;
let len_result = self.len_encoded;
self.write_and_pop(buf, len_result);
return (len_consumed, PushResult::Undecodable(len_result));
}
self.push_single_encoded_byte(c as u8);
if len_triplet_incomplete == 1 {
len_triplet_incomplete = 2;
continue;
} else {
// Now a new percent-encoded triplet is read!
debug_assert_eq!(len_triplet_incomplete, 2);
len_triplet_incomplete = 0;
}
// Now a new percent-encoded triplet is read.
// Check if the buffer contains a valid decodable content.
let len_decoded = usize::from(self.len_encoded) / 3;
match core::str::from_utf8(&self.decoded[..len_decoded]) {
Ok(decoded_str) => {
// Successfully decoded.
let len_consumed = s.len() - chars.as_str().len();
let c = decoded_str
.chars()
.next()
.expect("[validity] `decoded` buffer is nonempty");
let len_result = NonZeroU8::new(self.len_encoded).expect(
"[consistency] `encoded` buffer is nonempty since \
`push_single_encoded_byte()` was called",
);
self.write_and_pop(buf, len_result.get());
return (len_consumed, PushResult::Decoded(len_result, c));
}
Err(e) => {
// Undecodable.
assert_eq!(
e.valid_up_to(),
0,
"[consistency] `decoded` buffer contains at most one character"
);
let skip_len_decoded = match e.error_len() {
// Unexpected EOF. Wait for remaining input.
None => continue,
// Skip invalid bytes.
Some(v) => v,
};
let len_consumed = s.len() - chars.as_str().len();
let len_result = skip_len_decoded as u8 * 3;
assert_ne!(
skip_len_decoded, 0,
"[consistency] empty bytes cannot be invalid"
);
self.write_and_pop(buf, len_result);
return (len_consumed, PushResult::Undecodable(len_result));
}
};
}
let len_consumed = s.len() - chars.as_str().len();
(len_consumed, PushResult::NeedMoreBytes)
}
/// Writes the incomplete data completely to the destination, and clears the internal buffer.
#[must_use]
pub(crate) fn flush(&mut self, buf: &mut [u8]) -> Option<NonZeroU8> {
let len_result = NonZeroU8::new(self.len_encoded)?;
// Emit the current (undecodable) buffer as is.
self.write_and_pop(buf, len_result.get());
debug_assert_eq!(
self.len_encoded, 0,
"[consistency] the buffer should be cleared after flushed"
);
Some(len_result)
}
}

476
vendor/iri-string/src/parser/trusted.rs vendored Normal file
View File

@@ -0,0 +1,476 @@
//! Fast parsers for trusted (already validated) input.
//!
//! Using this in wrong way will lead to unexpected wrong result.
pub(crate) mod authority;
use core::cmp::Ordering;
use core::num::NonZeroUsize;
use crate::components::{RiReferenceComponents, Splitter};
use crate::format::eq_str_display;
use crate::normalize::{is_pct_case_normalized, NormalizedAsciiOnlyHost, NormalizednessCheckMode};
use crate::parser::str::{find_split2, find_split3, find_split4_hole, find_split_hole};
use crate::spec::Spec;
use crate::types::RiReferenceStr;
/// Eats a `scheme` and a following colon, and returns the rest and the scheme.
///
/// Returns `(rest, scheme)`.
///
/// This should be called at the head of an absolute IRIs/URIs.
#[must_use]
fn scheme_colon(i: &str) -> (&str, &str) {
let (scheme, rest) =
find_split_hole(i, b':').expect("[precondition] absolute IRIs must have `scheme` part");
(rest, scheme)
}
/// Eats a `scheme` and a following colon if available, and returns the rest and the scheme.
///
/// This should be called at the head of an `IRI-reference` or similar.
#[must_use]
fn scheme_colon_opt(i: &str) -> (&str, Option<&str>) {
match find_split4_hole(i, b':', b'/', b'?', b'#') {
Some((scheme, b':', rest)) => (rest, Some(scheme)),
_ => (i, None),
}
}
/// Eats double slash and the following authority if available, and returns the authority.
///
/// This should be called at the head of an `IRI-reference`, or at the result of `scheme_colon`.
#[must_use]
fn slash_slash_authority_opt(i: &str) -> (&str, Option<&str>) {
let s = match i.strip_prefix("//") {
Some(rest) => rest,
None => return (i, None),
};
// `i` might match `path-abempty` (which can start with `//`), but it is not
// allowed as `relative-part`, so no need to care `path-abempty` rule here.
// A slash, question mark, and hash character won't appear in `authority`.
match find_split3(s, b'/', b'?', b'#') {
Some((authority, rest)) => (rest, Some(authority)),
None => ("", Some(s)),
}
}
/// Eats a string until the query, and returns that part (excluding `?` for the query).
#[must_use]
fn until_query(i: &str) -> (&str, &str) {
// `?` won't appear before the query part.
match find_split2(i, b'?', b'#') {
Some((before_query, rest)) => (rest, before_query),
None => ("", i),
}
}
/// Decomposes query and fragment, if available.
///
/// The string must starts with `?`, or `#`, or be empty.
#[must_use]
fn decompose_query_and_fragment(i: &str) -> (Option<&str>, Option<&str>) {
match i.as_bytes().first().copied() {
None => (None, None),
Some(b'?') => {
let rest = &i[1..];
match find_split_hole(rest, b'#') {
Some((query, fragment)) => (Some(query), Some(fragment)),
None => (Some(rest), None),
}
}
Some(c) => {
debug_assert_eq!(c, b'#');
(None, Some(&i[1..]))
}
}
}
/// Decomposes the given valid `IRI-reference`.
#[must_use]
pub(crate) fn decompose_iri_reference<S: Spec>(
i: &RiReferenceStr<S>,
) -> RiReferenceComponents<'_, S> {
/// Inner function to avoid unnecessary monomorphizations on `S`.
fn decompose(i: &str) -> Splitter {
let len = i.len();
let (i, scheme_end) = {
let (i, scheme) = scheme_colon_opt(i);
let end = scheme.and_then(|s| NonZeroUsize::new(s.len()));
(i, end)
};
let (i, authority_end) = {
// 2: "//".len()
let start = len - i.len() + 2;
// `authority` does not contain the two slashes of `://'.
let (i, authority) = slash_slash_authority_opt(i);
let end = authority.and_then(|s| NonZeroUsize::new(start + s.len()));
(i, end)
};
let (i, _path) = until_query(i);
let (query_start, fragment_start) = {
// This could theoretically be zero if `len` is `usize::MAX` and
// `i` has neither a query nor a fragment. However, this is
// practically impossible.
let after_first_prefix = NonZeroUsize::new((len - i.len()).wrapping_add(1));
let (query, fragment) = decompose_query_and_fragment(i);
match (query.is_some(), fragment) {
(true, Some(fragment)) => {
(after_first_prefix, NonZeroUsize::new(len - fragment.len()))
}
(true, None) => (after_first_prefix, None),
(false, Some(_fragment)) => (None, after_first_prefix),
(false, None) => (None, None),
}
};
Splitter::new(scheme_end, authority_end, query_start, fragment_start)
}
RiReferenceComponents {
iri: i,
splitter: decompose(i.as_str()),
}
}
/// Extracts `scheme` part from an IRI reference.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn extract_scheme(i: &str) -> Option<&str> {
scheme_colon_opt(i).1
}
/// Extracts `scheme` part from an absolute IRI.
///
/// # Precondition
///
/// The given string must be a valid absolute IRI.
#[inline]
#[must_use]
pub(crate) fn extract_scheme_absolute(i: &str) -> &str {
scheme_colon(i).1
}
/// Extracts `authority` part from an IRI reference.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn extract_authority(i: &str) -> Option<&str> {
let (i, _scheme) = scheme_colon_opt(i);
slash_slash_authority_opt(i).1
}
/// Extracts `authority` part from an absolute IRI.
///
/// # Precondition
///
/// The given string must be a valid absolute IRI.
#[inline]
#[must_use]
pub(crate) fn extract_authority_absolute(i: &str) -> Option<&str> {
let (i, _scheme) = scheme_colon(i);
slash_slash_authority_opt(i).1
}
/// Extracts `authority` part from a relative IRI.
///
/// # Precondition
///
/// The given string must be a valid relative IRI.
#[inline]
#[must_use]
pub(crate) fn extract_authority_relative(i: &str) -> Option<&str> {
slash_slash_authority_opt(i).1
}
/// Extracts `path` part from an IRI reference.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn extract_path(i: &str) -> &str {
let (i, _scheme) = scheme_colon_opt(i);
let (i, _authority) = slash_slash_authority_opt(i);
until_query(i).1
}
/// Extracts `path` part from an absolute IRI.
///
/// # Precondition
///
/// The given string must be a valid absolute IRI.
#[inline]
#[must_use]
pub(crate) fn extract_path_absolute(i: &str) -> &str {
let (i, _scheme) = scheme_colon(i);
let (i, _authority) = slash_slash_authority_opt(i);
until_query(i).1
}
/// Extracts `path` part from a relative IRI.
///
/// # Precondition
///
/// The given string must be a valid relative IRI.
#[inline]
#[must_use]
pub(crate) fn extract_path_relative(i: &str) -> &str {
let (i, _authority) = slash_slash_authority_opt(i);
until_query(i).1
}
/// Extracts `query` part from an IRI reference.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn extract_query(i: &str) -> Option<&str> {
let (i, _before_query) = until_query(i);
decompose_query_and_fragment(i).0
}
/// Extracts `query` part from an `absolute-IRI` string.
///
/// # Precondition
///
/// The given string must be a valid `absolute-IRI` string.
#[must_use]
pub(crate) fn extract_query_absolute_iri(i: &str) -> Option<&str> {
let (i, _before_query) = until_query(i);
if i.is_empty() {
None
} else {
debug_assert_eq!(
i.as_bytes().first(),
Some(&b'?'),
"`absolute-IRI` string must not have `fragment part"
);
Some(&i[1..])
}
}
/// Splits an IRI string into the prefix and the fragment part.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn split_fragment(iri: &str) -> (&str, Option<&str>) {
// It is completely OK to find the first `#` character from valid IRI to get fragment part,
// because the spec says that there are no `#` characters before the fragment part.
//
// > ```
// > scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
// > ```
// >
// > --- [RFC 3986, section 3.1. Scheme](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.1)
//
// > The authority component is preceded by a double slash ("//") and is terminated by the
// > next slash ("/"), question mark ("?"), or number sign ("#") character, or by the end
// > of the URI.
// >
// > --- [RFC 3986, section 3.2. Authority](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2)
//
// > The path is terminated by the first question mark ("?") or number sign ("#")
// > character, or by the end of the URI.
// >
// > --- [RFC 3986, section 3.3. Path](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3)
//
// > The query component is indicated by the first question mark ("?") character and
// > terminated by a number sign ("#") character or by the end of the URI.
// >
// > --- [RFC 3986, section 3.4. Query](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.4)
match find_split_hole(iri, b'#') {
Some((prefix, fragment)) => (prefix, Some(fragment)),
None => (iri, None),
}
}
/// Returns the fragment part of the given IRI.
///
/// A leading `#` character of the fragment is truncated.
#[inline]
#[must_use]
pub(crate) fn extract_fragment(iri: &str) -> Option<&str> {
split_fragment(iri).1
}
/// Returns `Ok(_)` if the string is normalized.
///
/// If this function returns `true`, normalization input and output will be identical.
///
/// In this function, "normalized" means that any of the normalization below
/// won't change the input on normalization:
///
/// * syntax-based normalization,
/// * case normalization,
/// * percent-encoding normalization, and
/// * path segment normalizaiton.
///
/// Note that scheme-based normalization is not considered.
#[must_use]
pub(crate) fn is_normalized<S: Spec>(i: &str, mode: NormalizednessCheckMode) -> bool {
let (i, scheme) = scheme_colon(i);
let (after_authority, authority) = slash_slash_authority_opt(i);
let (_after_path, path) = until_query(after_authority);
// Syntax-based normalization: uppercase chars in `scheme` should be
// converted to lowercase.
if scheme.bytes().any(|b| b.is_ascii_uppercase()) {
return false;
}
// Case normalization: ASCII alphabets in US-ASCII only `host` should be
// normalized to lowercase.
// Case normalization: ASCII alphabets in percent-encoding triplet should be
// normalized to uppercase.
// Percent-encoding normalization: unresreved characters should be decoded
// in `userinfo`, `host`, `path`, `query`, and `fragments`.
// Path segment normalization: the path should not have dot segments (`.`
// and/or `..`).
//
// Note that `authority` can have percent-encoded `userinfo`.
if let Some(authority) = authority {
let authority_components = authority::decompose_authority(authority);
// Check `host`.
let host = authority_components.host();
let host_is_normalized = if is_ascii_only_host(host) {
eq_str_display(host, &NormalizedAsciiOnlyHost::new(host))
} else {
// If the host is not ASCII-only, conversion to lowercase is not performed.
is_pct_case_normalized::<S>(host)
};
if !host_is_normalized {
return false;
}
// Check pencent encodings in `userinfo`.
if let Some(userinfo) = authority_components.userinfo() {
if !is_pct_case_normalized::<S>(userinfo) {
return false;
}
}
}
// Check `path`.
//
// Syntax-based normalization: Dot segments might be removed.
// Note that we don't have to care `%2e` and `%2E` since `.` is unreserved
// and they will be decoded if not normalized.
// Also note that WHATWG serialization will use `/.//` as a path prefix if
// the path is absolute and won't modify the path if the path is relative.
//
// Percent-encoding normalization: unresreved characters should be decoded
// in `path`, `query`, and `fragments`.
let path_span_no_dot_segments = if authority.is_some() {
Some(path)
} else {
match mode {
NormalizednessCheckMode::Default => Some(path.strip_prefix("/.//").unwrap_or(path)),
NormalizednessCheckMode::Rfc3986 => Some(path),
NormalizednessCheckMode::PreserveAuthoritylessRelativePath => {
if path.starts_with('/') {
// Absolute.
Some(path.strip_prefix("/.//").unwrap_or(path))
} else {
// Relative. Treat the path as "opaque". No span to check.
None
}
}
}
};
if let Some(path_span_no_dot_segments) = path_span_no_dot_segments {
if path_span_no_dot_segments
.split('/')
.any(|segment| matches!(segment, "." | ".."))
{
return false;
}
}
is_pct_case_normalized::<S>(after_authority)
}
/// Decodes two hexdigits into a byte.
///
/// # Preconditions
///
/// The parameters `upper` and `lower` should be an ASCII hexadecimal digit.
#[must_use]
pub(super) fn hexdigits_to_byte([upper, lower]: [u8; 2]) -> u8 {
let i_upper = match (upper & 0xf0).cmp(&0x40) {
Ordering::Less => upper - b'0',
Ordering::Equal => upper - (b'A' - 10),
Ordering::Greater => upper - (b'a' - 10),
};
let i_lower = match (lower & 0xf0).cmp(&0x40) {
Ordering::Less => lower - b'0',
Ordering::Equal => lower - (b'A' - 10),
Ordering::Greater => lower - (b'a' - 10),
};
(i_upper << 4) + i_lower
}
/// Converts the first two hexdigit bytes in the buffer into a byte.
///
/// # Panics
///
/// Panics if the string does not start with two hexdigits.
#[must_use]
pub(crate) fn take_xdigits2(s: &str) -> (u8, &str) {
let mut bytes = s.bytes();
let upper_xdigit = bytes
.next()
.expect("[validity] at least two bytes should follow the `%` in a valid IRI reference");
let lower_xdigit = bytes
.next()
.expect("[validity] at least two bytes should follow the `%` in a valid IRI reference");
let v = hexdigits_to_byte([upper_xdigit, lower_xdigit]);
(v, &s[2..])
}
/// Returns true if the given `host`/`ihost` string consists of only US-ASCII characters.
///
/// # Precondition
///
/// The given string should be valid `host` or `host ":" port` string.
#[must_use]
pub(crate) fn is_ascii_only_host(mut host: &str) -> bool {
while let Some((i, c)) = host
.char_indices()
.find(|(_i, c)| !c.is_ascii() || *c == '%')
{
if c != '%' {
// Non-ASCII character found.
debug_assert!(!c.is_ascii());
return false;
}
// Percent-encoded character found.
let after_pct = &host[(i + 1)..];
let (byte, rest) = take_xdigits2(after_pct);
if !byte.is_ascii() {
return false;
}
host = rest;
}
// Neither non-ASCII characters nor percent-encoded characters found.
true
}

View File

@@ -0,0 +1,32 @@
//! Parsers for trusted `authority` string.
use crate::components::AuthorityComponents;
use crate::parser::str::{find_split_hole, rfind_split2};
/// Decomposes the authority into `(userinfo, host, port)`.
///
/// The leading `:` is truncated.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn decompose_authority(authority: &str) -> AuthorityComponents<'_> {
let i = authority;
let (i, host_start) = match find_split_hole(i, b'@') {
Some((userinfo, rest)) => (rest, userinfo.len() + 1),
None => (authority, 0),
};
let colon_port_len = match rfind_split2(i, b':', b']') {
Some((_, suffix)) if suffix.starts_with(':') => suffix.len(),
_ => 0,
};
let host_end = authority.len() - colon_port_len;
AuthorityComponents {
authority,
host_start,
host_end,
}
}

223
vendor/iri-string/src/parser/validate.rs vendored Normal file
View File

@@ -0,0 +1,223 @@
//! Validating parsers for non-trusted (possibly invalid) input.
mod authority;
mod path;
use crate::parser::char;
use crate::parser::str::{
find_split, find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded,
};
use crate::spec::Spec;
use crate::validate::{Error, ErrorKind};
pub(crate) use self::authority::{validate_authority, validate_host, validate_userinfo};
pub(crate) use self::path::{validate_path, validate_path_segment};
use self::path::{
validate_path_abempty, validate_path_absolute_authority_absent,
validate_path_relative_authority_absent,
};
/// Returns `Ok(_)` if the string matches `scheme`.
pub(crate) fn validate_scheme(i: &str) -> Result<(), Error> {
let bytes = i.as_bytes();
if !i.is_empty()
&& bytes[0].is_ascii_alphabetic()
&& bytes[1..]
.iter()
.all(|&b| b.is_ascii() && char::is_ascii_scheme_continue(b))
{
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidScheme))
}
}
/// Returns `Ok(_)` if the string matches `query` or `iquery`.
pub(crate) fn validate_query<S: Spec>(i: &str) -> Result<(), Error> {
let is_valid =
satisfy_chars_with_pct_encoded(i, char::is_ascii_frag_query, char::is_nonascii_query::<S>);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidQuery))
}
}
/// Returns `Ok(_)` if the string matches `authority path-abempty` rule sequence.
fn validate_authority_path_abempty<S: Spec>(i: &str) -> Result<(), Error> {
let (maybe_authority, maybe_path) = match find_split(i, b'/') {
Some(v) => v,
None => (i, ""),
};
validate_authority::<S>(maybe_authority)?;
validate_path_abempty::<S>(maybe_path)
}
/// Returns `Ok(_)` if the string matches `URI`/`IRI` rules.
#[inline]
pub(crate) fn validate_uri<S: Spec>(i: &str) -> Result<(), Error> {
validate_uri_reference_common::<S>(i, UriReferenceRule::Absolute)
}
/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules.
#[inline]
pub(crate) fn validate_uri_reference<S: Spec>(i: &str) -> Result<(), Error> {
validate_uri_reference_common::<S>(i, UriReferenceRule::Any)
}
/// Returns `Ok(_)` if the string matches `absolute-URI`/`absolute-IRI` rules.
#[inline]
pub(crate) fn validate_absolute_uri<S: Spec>(i: &str) -> Result<(), Error> {
validate_uri_reference_common::<S>(i, UriReferenceRule::AbsoluteWithoutFragment)
}
/// Syntax rule for URI/IRI references.
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
enum UriReferenceRule {
/// `URI` and `IRI`.
///
/// This can have a fragment.
Absolute,
/// `absolute-URI` and `absolute-IRI`.
///
/// This cannot have a fragment.
AbsoluteWithoutFragment,
/// `URI-reference` and `IRI-reference`.
///
/// This can be relative.
Any,
}
impl UriReferenceRule {
/// Returns `true` is the relative reference is allowed.
#[inline]
#[must_use]
fn is_relative_allowed(self) -> bool {
self == Self::Any
}
/// Returns `true` is the fragment part is allowed.
#[inline]
#[must_use]
fn is_fragment_allowed(self) -> bool {
matches!(self, Self::Absolute | Self::Any)
}
}
/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules.
fn validate_uri_reference_common<S: Spec>(
i: &str,
ref_rule: UriReferenceRule,
) -> Result<(), Error> {
// Validate `scheme ":"`.
let (i, _scheme) = match find_split_hole(i, b':') {
None => {
if ref_rule.is_relative_allowed() {
return validate_relative_ref::<S>(i);
} else {
return Err(Error::with_kind(ErrorKind::UnexpectedRelative));
}
}
Some((maybe_scheme, rest)) => {
if validate_scheme(maybe_scheme).is_err() {
// The string before the first colon is not a scheme.
// Falling back to `relative-ref` parsing.
if ref_rule.is_relative_allowed() {
return validate_relative_ref::<S>(i);
} else {
return Err(Error::with_kind(ErrorKind::InvalidScheme));
}
}
(rest, maybe_scheme)
}
};
// Validate `hier-part`.
let after_path = match i.strip_prefix("//") {
Some(i) => {
let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') {
Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))),
None => (i, None),
};
validate_authority_path_abempty::<S>(maybe_authority_path)?;
after_path
}
None => {
let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') {
Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))),
None => (i, None),
};
// Authority is absent.
validate_path_absolute_authority_absent::<S>(maybe_path)?;
after_path
}
};
// Validate `[ "?" query ] [ "#" fragment ]`.
if let Some((first, rest)) = after_path {
validate_after_path::<S>(first, rest, ref_rule.is_fragment_allowed())?;
}
Ok(())
}
/// Returns `Ok(_)` if the string matches `relative-ref`/`irelative-ref` rules.
pub(crate) fn validate_relative_ref<S: Spec>(i: &str) -> Result<(), Error> {
// Validate `relative-part`.
let after_path = match i.strip_prefix("//") {
Some(i) => {
let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') {
Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))),
None => (i, None),
};
validate_authority_path_abempty::<S>(maybe_authority_path)?;
after_path
}
None => {
let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') {
Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))),
None => (i, None),
};
// Authority is absent.
validate_path_relative_authority_absent::<S>(maybe_path)?;
after_path
}
};
// Validate `[ "?" query ] [ "#" fragment ]`.
if let Some((first, rest)) = after_path {
validate_after_path::<S>(first, rest, true)?;
}
Ok(())
}
/// Returns `Ok(_)` if the string matches `[ "?" query ] [ "#" fragment ]` (or IRI version).
fn validate_after_path<S: Spec>(first: u8, rest: &str, accept_fragment: bool) -> Result<(), Error> {
let (maybe_query, maybe_fragment) = if first == b'?' {
match find_split_hole(rest, b'#') {
Some(v) => v,
None => (rest, ""),
}
} else {
debug_assert_eq!(first, b'#');
("", rest)
};
validate_query::<S>(maybe_query)?;
if !accept_fragment && !maybe_fragment.is_empty() {
return Err(Error::with_kind(ErrorKind::UnexpectedFragment));
}
validate_fragment::<S>(maybe_fragment)
}
/// Returns `Ok(_)` if the string matches `fragment`/`ifragment` rules.
pub(crate) fn validate_fragment<S: Spec>(i: &str) -> Result<(), Error> {
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_frag_query,
char::is_nonascii_fragment::<S>,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidFragment))
}
}

View File

@@ -0,0 +1,302 @@
//! Parsers for authority.
use core::mem;
use crate::parser::char;
use crate::parser::str::{
find_split_hole, get_wrapped_inner, rfind_split_hole, satisfy_chars_with_pct_encoded,
strip_ascii_char_prefix,
};
use crate::spec::Spec;
use crate::validate::{Error, ErrorKind};
/// Returns `Ok(_)` if the string matches `userinfo` or `iuserinfo`.
pub(crate) fn validate_userinfo<S: Spec>(i: &str) -> Result<(), Error> {
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_userinfo_ipvfutureaddr,
char::is_nonascii_userinfo::<S>,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidUserInfo))
}
}
/// Returns `true` if the string matches `dec-octet`.
///
/// In other words, this tests whether the string is decimal "0" to "255".
#[must_use]
fn is_dec_octet(i: &str) -> bool {
matches!(
i.as_bytes(),
[b'0'..=b'9']
| [b'1'..=b'9', b'0'..=b'9']
| [b'1', b'0'..=b'9', b'0'..=b'9']
| [b'2', b'0'..=b'4', b'0'..=b'9']
| [b'2', b'5', b'0'..=b'5']
)
}
/// Returns `Ok(_)` if the string matches `IPv4address`.
fn validate_ipv4address(i: &str) -> Result<(), Error> {
/// Returns `Ok(_)` if the string matches `IPv4address`, or `Err(())` if not.
fn validate_ipv4address_impl(i: &str) -> Result<(), ()> {
let (first, rest) = find_split_hole(i, b'.').ok_or(())?;
if !is_dec_octet(first) {
return Err(());
}
let (second, rest) = find_split_hole(rest, b'.').ok_or(())?;
if !is_dec_octet(second) {
return Err(());
}
let (third, fourth) = find_split_hole(rest, b'.').ok_or(())?;
if is_dec_octet(third) && is_dec_octet(fourth) {
Ok(())
} else {
Err(())
}
}
validate_ipv4address_impl(i).map_err(|_| Error::with_kind(ErrorKind::InvalidHost))
}
/// A part of IPv6 addr.
#[derive(Clone, Copy)]
enum V6AddrPart {
/// `[0-9a-fA-F]{1,4}::`.
H16Omit,
/// `[0-9a-fA-F]{1,4}:`.
H16Cont,
/// `[0-9a-fA-F]{1,4}`.
H16End,
/// IPv4 address.
V4,
/// `::`.
Omit,
}
/// Splits the IPv6 address string into the next component and the rest substring.
fn split_v6_addr_part(i: &str) -> Result<(&str, V6AddrPart), Error> {
debug_assert!(!i.is_empty());
match find_split_hole(i, b':') {
Some((prefix, rest)) => {
if prefix.len() >= 5 {
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
if prefix.is_empty() {
return match strip_ascii_char_prefix(rest, b':') {
Some(rest) => Ok((rest, V6AddrPart::Omit)),
None => Err(Error::with_kind(ErrorKind::InvalidHost)),
};
}
// Should be `h16`.
debug_assert!((1..=4).contains(&prefix.len()));
if !prefix.bytes().all(|b| b.is_ascii_hexdigit()) {
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
match strip_ascii_char_prefix(rest, b':') {
Some(rest) => Ok((rest, V6AddrPart::H16Omit)),
None => Ok((rest, V6AddrPart::H16Cont)),
}
}
None => {
if i.len() >= 5 {
// Possibly `IPv4address`.
validate_ipv4address(i)?;
return Ok(("", V6AddrPart::V4));
}
if i.bytes().all(|b| b.is_ascii_hexdigit()) {
Ok(("", V6AddrPart::H16End))
} else {
Err(Error::with_kind(ErrorKind::InvalidHost))
}
}
}
}
/// Returns `Ok(_)` if the string matches `IPv6address`.
fn validate_ipv6address(mut i: &str) -> Result<(), Error> {
let mut h16_count = 0;
let mut is_omitted = false;
while !i.is_empty() {
let (rest, part) = split_v6_addr_part(i)?;
match part {
V6AddrPart::H16Omit => {
h16_count += 1;
if mem::replace(&mut is_omitted, true) {
// Omitted twice.
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
}
V6AddrPart::H16Cont => {
h16_count += 1;
if rest.is_empty() {
// `H16Cont` cannot be the last part of an IPv6 address.
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
}
V6AddrPart::H16End => {
h16_count += 1;
break;
}
V6AddrPart::V4 => {
debug_assert!(rest.is_empty());
h16_count += 2;
break;
}
V6AddrPart::Omit => {
if mem::replace(&mut is_omitted, true) {
// Omitted twice.
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
}
}
if h16_count > 8 {
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
i = rest;
}
let is_valid = if is_omitted {
h16_count < 8
} else {
h16_count == 8
};
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidHost))
}
}
/// Returns `Ok(_)` if the string matches `authority` or `iauthority`.
pub(crate) fn validate_authority<S: Spec>(i: &str) -> Result<(), Error> {
// Strip and validate `userinfo`.
let (i, _userinfo) = match find_split_hole(i, b'@') {
Some((maybe_userinfo, i)) => {
validate_userinfo::<S>(maybe_userinfo)?;
(i, Some(maybe_userinfo))
}
None => (i, None),
};
// `host` can contain colons, but `port` cannot.
// Strip and validate `port`.
let (maybe_host, _port) = match rfind_split_hole(i, b':') {
Some((maybe_host, maybe_port)) => {
if maybe_port.bytes().all(|b| b.is_ascii_digit()) {
(maybe_host, Some(maybe_port))
} else {
(i, None)
}
}
None => (i, None),
};
// Validate `host`.
validate_host::<S>(maybe_host)
}
/// Validates `host`.
pub(crate) fn validate_host<S: Spec>(i: &str) -> Result<(), Error> {
match get_wrapped_inner(i, b'[', b']') {
Some(maybe_addr) => {
// `IP-literal`.
// Note that `v` here is case insensitive. See RFC 3987 section 3.2.2.
if let Some(maybe_addr_rest) = strip_ascii_char_prefix(maybe_addr, b'v')
.or_else(|| strip_ascii_char_prefix(maybe_addr, b'V'))
{
// `IPvFuture`.
let (maybe_ver, maybe_addr) = find_split_hole(maybe_addr_rest, b'.')
.ok_or(Error::with_kind(ErrorKind::InvalidHost))?;
// Validate version.
if maybe_ver.is_empty() || !maybe_ver.bytes().all(|b| b.is_ascii_hexdigit()) {
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
// Validate address.
if !maybe_addr.is_empty()
&& maybe_addr.is_ascii()
&& maybe_addr
.bytes()
.all(char::is_ascii_userinfo_ipvfutureaddr)
{
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidHost))
}
} else {
// `IPv6address`.
validate_ipv6address(maybe_addr)
}
}
None => {
// `IPv4address` or `reg-name`. No need to distinguish them here
// because `IPv4address` is also syntactically valid as `reg-name`.
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_regname,
char::is_nonascii_regname::<S>,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidHost))
}
}
}
}
#[cfg(test)]
#[cfg(feature = "alloc")]
mod tests {
use super::*;
use alloc::format;
macro_rules! assert_validate {
($parser:expr, $($input:expr),* $(,)?) => {{
$({
let input = $input;
let input: &str = input.as_ref();
assert!($parser(input).is_ok(), "input={:?}", input);
})*
}};
}
#[test]
fn test_ipv6address() {
use core::cmp::Ordering;
assert_validate!(validate_ipv6address, "a:bB:cCc:dDdD:e:F:a:B");
assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1:1");
assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1.1.1.1");
assert_validate!(validate_ipv6address, "2001:db8::7");
// Generate IPv6 addresses with `::`.
let make_sub = |n: usize| {
let mut s = "1:".repeat(n);
s.pop();
s
};
for len_pref in 0..=7 {
let prefix = make_sub(len_pref);
for len_suf in 1..=(7 - len_pref) {
assert_validate!(
validate_ipv6address,
&format!("{}::{}", prefix, make_sub(len_suf))
);
match len_suf.cmp(&2) {
Ordering::Greater => assert_validate!(
validate_ipv6address,
&format!("{}::{}:1.1.1.1", prefix, make_sub(len_suf - 2))
),
Ordering::Equal => {
assert_validate!(validate_ipv6address, &format!("{}::1.1.1.1", prefix))
}
Ordering::Less => {}
}
}
}
}
}

View File

@@ -0,0 +1,99 @@
//! Parsers for path.
use crate::parser::char;
use crate::parser::str::{find_split2_hole, satisfy_chars_with_pct_encoded};
use crate::spec::Spec;
use crate::validate::{Error, ErrorKind};
/// Returns `Ok(_)` if the string matches `path-abempty` or `ipath-abempty`.
pub(super) fn validate_path_abempty<S: Spec>(i: &str) -> Result<(), Error> {
if i.is_empty() {
return Ok(());
}
let i = match i.strip_prefix('/') {
Some(rest) => rest,
None => return Err(Error::with_kind(ErrorKind::InvalidPath)),
};
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_pchar_slash,
S::is_nonascii_char_unreserved,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPath))
}
}
/// Returns `Ok(_)` if the string matches `hier-part` or `ihier-part` modulo
/// `"//" authority path-abempty`.
pub(super) fn validate_path_absolute_authority_absent<S: Spec>(i: &str) -> Result<(), Error> {
if i.is_empty() {
// `path-empty`.
return Ok(());
}
if i.starts_with("//") {
unreachable!("this case should be handled by the caller");
}
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_pchar_slash,
S::is_nonascii_char_unreserved,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPath))
}
}
/// Returns `Ok(_)` if the string matches `relative-part` or `irelative-part` modulo
/// `"//" authority path-abempty`.
pub(super) fn validate_path_relative_authority_absent<S: Spec>(i: &str) -> Result<(), Error> {
if i.starts_with("//") {
unreachable!("this case should be handled by the caller");
}
let is_valid = match find_split2_hole(i, b'/', b':') {
Some((_, b'/', _)) | None => satisfy_chars_with_pct_encoded(
i,
char::is_ascii_pchar_slash,
S::is_nonascii_char_unreserved,
),
Some((_, c, _)) => {
debug_assert_eq!(c, b':');
// `foo:bar`-style. This does not match `path-noscheme`.
return Err(Error::with_kind(ErrorKind::InvalidPath));
}
};
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPath))
}
}
/// Returns `Ok(_)` if the string matches `path`/`ipath` rules.
pub(crate) fn validate_path<S: Spec>(i: &str) -> Result<(), Error> {
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_pchar_slash,
S::is_nonascii_char_unreserved,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPath))
}
}
/// Returns `Ok(_)` if the string matches `segment`/`isegment` rules.
pub(crate) fn validate_path_segment<S: Spec>(i: &str) -> Result<(), Error> {
let is_valid =
satisfy_chars_with_pct_encoded(i, char::is_ascii_pchar, S::is_nonascii_char_unreserved);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPath))
}
}

378
vendor/iri-string/src/percent_encode.rs vendored Normal file
View File

@@ -0,0 +1,378 @@
//! Percent encoding.
use core::fmt::{self, Write as _};
use core::marker::PhantomData;
use crate::parser::char;
use crate::spec::{IriSpec, Spec, UriSpec};
/// A proxy to percent-encode a string as a part of URI.
pub type PercentEncodedForUri<T> = PercentEncoded<T, UriSpec>;
/// A proxy to percent-encode a string as a part of IRI.
pub type PercentEncodedForIri<T> = PercentEncoded<T, IriSpec>;
/// Context for percent encoding.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
enum Context {
/// Encode the string as a reg-name (usually called as "hostname").
RegName,
/// Encode the string as a user name or a password (inside the `userinfo` component).
UserOrPassword,
/// Encode the string as a path segment.
///
/// A slash (`/`) will be encoded to `%2F`.
PathSegment,
/// Encode the string as path segments joined with `/`.
///
/// A slash (`/`) will be used as is.
Path,
/// Encode the string as a query string (without the `?` prefix).
Query,
/// Encode the string as a fragment string (without the `#` prefix).
Fragment,
/// Encode all characters except for `unreserved` characters.
Unreserve,
/// Encode characters only if they cannot appear anywhere in an IRI reference.
///
/// `%` character will be always encoded.
Character,
}
/// A proxy to percent-encode a string.
///
/// Type aliases [`PercentEncodedForIri`] and [`PercentEncodedForUri`] are provided.
/// You can use them to make the expression simpler, for example write
/// `PercentEncodedForUri::from_path(foo)` instead of
/// `PercentEncoded::<_, UriSpec>::from_path(foo)`.
#[derive(Debug, Clone, Copy)]
pub struct PercentEncoded<T, S> {
/// Source string context.
context: Context,
/// Raw string before being encoded.
raw: T,
/// Spec.
_spec: PhantomData<fn() -> S>,
}
impl<T: fmt::Display, S: Spec> PercentEncoded<T, S> {
/// Creates an encoded string from a raw reg-name (i.e. hostname or domain).
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "alpha.\u{03B1}.example.com";
/// let encoded = "alpha.%CE%B1.example.com";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_reg_name(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_reg_name(raw: T) -> Self {
Self {
context: Context::RegName,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw user name (inside `userinfo` component).
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "user:\u{03B1}";
/// // The first `:` will be interpreted as a delimiter, so colons will be escaped.
/// let encoded = "user%3A%CE%B1";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_user(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_user(raw: T) -> Self {
Self {
context: Context::UserOrPassword,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw user name (inside `userinfo` component).
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "password:\u{03B1}";
/// // The first `:` will be interpreted as a delimiter, and the colon
/// // inside the password will be the first one if the user name is empty,
/// // so colons will be escaped.
/// let encoded = "password%3A%CE%B1";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_password(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_password(raw: T) -> Self {
Self {
context: Context::UserOrPassword,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw path segment.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "alpha/\u{03B1}?#";
/// // Note that `/` is encoded to `%2F`.
/// let encoded = "alpha%2F%CE%B1%3F%23";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_path_segment(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_path_segment(raw: T) -> Self {
Self {
context: Context::PathSegment,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw path.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "alpha/\u{03B1}?#";
/// // Note that `/` is NOT percent encoded.
/// let encoded = "alpha/%CE%B1%3F%23";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_path(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_path(raw: T) -> Self {
Self {
context: Context::Path,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw query.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "alpha/\u{03B1}?#";
/// let encoded = "alpha/%CE%B1?%23";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_query(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_query(raw: T) -> Self {
Self {
context: Context::Query,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw fragment.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "alpha/\u{03B1}?#";
/// let encoded = "alpha/%CE%B1?%23";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_fragment(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_fragment(raw: T) -> Self {
Self {
context: Context::Fragment,
raw,
_spec: PhantomData,
}
}
/// Creates a string consists of only `unreserved` string and percent-encoded triplets.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let unreserved = "%a0-._~\u{03B1}";
/// let unreserved_encoded = "%25a0-._~%CE%B1";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::unreserve(unreserved).to_string(),
/// unreserved_encoded
/// );
///
/// let reserved = ":/?#[]@ !$&'()*+,;=";
/// let reserved_encoded =
/// "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::unreserve(reserved).to_string(),
/// reserved_encoded
/// );
/// # }
/// ```
#[inline]
#[must_use]
pub fn unreserve(raw: T) -> Self {
Self {
context: Context::Unreserve,
raw,
_spec: PhantomData,
}
}
/// Percent-encodes characters only if they cannot appear anywhere in an IRI reference.
///
/// `%` character will be always encoded. In other words, this conversion
/// is not aware of percent-encoded triplets.
///
/// Note that this encoding process does not guarantee that the resulting
/// string is a valid IRI reference.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let unreserved = "%a0-._~\u{03B1}";
/// let unreserved_encoded = "%25a0-._~%CE%B1";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::characters(unreserved).to_string(),
/// unreserved_encoded
/// );
///
/// let reserved = ":/?#[]@ !$&'()*+,;=";
/// // Note that `%20` cannot appear directly in an IRI reference.
/// let expected = ":/?#[]@%20!$&'()*+,;=";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::characters(reserved).to_string(),
/// expected
/// );
/// # }
/// ```
#[inline]
#[must_use]
pub fn characters(raw: T) -> Self {
Self {
context: Context::Character,
raw,
_spec: PhantomData,
}
}
}
impl<T: fmt::Display, S: Spec> fmt::Display for PercentEncoded<T, S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
/// Filter that encodes a character before written if necessary.
struct Filter<'a, 'b, S> {
/// Encoding context.
context: Context,
/// Writer.
writer: &'a mut fmt::Formatter<'b>,
/// Spec.
_spec: PhantomData<fn() -> S>,
}
impl<S: Spec> fmt::Write for Filter<'_, '_, S> {
fn write_str(&mut self, s: &str) -> fmt::Result {
s.chars().try_for_each(|c| self.write_char(c))
}
fn write_char(&mut self, c: char) -> fmt::Result {
let is_valid_char = match (self.context, c.is_ascii()) {
(Context::RegName, true) => char::is_ascii_regname(c as u8),
(Context::RegName, false) => char::is_nonascii_regname::<S>(c),
(Context::UserOrPassword, true) => {
c != ':' && char::is_ascii_userinfo_ipvfutureaddr(c as u8)
}
(Context::UserOrPassword, false) => char::is_nonascii_userinfo::<S>(c),
(Context::PathSegment, true) => char::is_ascii_pchar(c as u8),
(Context::PathSegment, false) => S::is_nonascii_char_unreserved(c),
(Context::Path, true) => c == '/' || char::is_ascii_pchar(c as u8),
(Context::Path, false) => S::is_nonascii_char_unreserved(c),
(Context::Query, true) => c == '/' || char::is_ascii_frag_query(c as u8),
(Context::Query, false) => char::is_nonascii_query::<S>(c),
(Context::Fragment, true) => c == '/' || char::is_ascii_frag_query(c as u8),
(Context::Fragment, false) => char::is_nonascii_fragment::<S>(c),
(Context::Unreserve, true) => char::is_ascii_unreserved(c as u8),
(Context::Unreserve, false) => S::is_nonascii_char_unreserved(c),
(Context::Character, true) => char::is_ascii_unreserved_or_reserved(c as u8),
(Context::Character, false) => {
S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c)
}
};
if is_valid_char {
self.writer.write_char(c)
} else {
write_pct_encoded_char(&mut self.writer, c)
}
}
}
let mut filter = Filter {
context: self.context,
writer: f,
_spec: PhantomData::<fn() -> S>,
};
write!(filter, "{}", self.raw)
}
}
/// Percent-encodes the given character and writes it.
#[inline]
fn write_pct_encoded_char<W: fmt::Write>(writer: &mut W, c: char) -> fmt::Result {
let mut buf = [0_u8; 4];
let buf = c.encode_utf8(&mut buf);
buf.bytes().try_for_each(|b| write!(writer, "%{:02X}", b))
}

55
vendor/iri-string/src/raw.rs vendored Normal file
View File

@@ -0,0 +1,55 @@
//! Raw IRI strings manipulation.
//!
//! Note that functions in this module may operates on raw `&str` types.
//! It is caller's responsilibility to guarantee that the given string satisfies the precondition.
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
#[cfg(feature = "alloc")]
use crate::parser::trusted as trusted_parser;
/// Sets the fragment part to the given string.
///
/// Removes fragment part (and following `#` character) if `None` is given.
#[cfg(feature = "alloc")]
pub(crate) fn set_fragment(s: &mut String, fragment: Option<&str>) {
remove_fragment(s);
if let Some(fragment) = fragment {
s.reserve(fragment.len() + 1);
s.push('#');
s.push_str(fragment);
}
}
/// Removes the fragment part from the string.
#[cfg(feature = "alloc")]
#[inline]
pub(crate) fn remove_fragment(s: &mut String) {
if let Some(colon_pos) = s.find('#') {
s.truncate(colon_pos);
}
}
/// Splits the string into the prefix and the fragment part.
///
/// A leading `#` character is truncated if the fragment part exists.
#[cfg(feature = "alloc")]
pub(crate) fn split_fragment_owned(mut s: String) -> (String, Option<String>) {
let prefix_len = match trusted_parser::split_fragment(&s) {
(_, None) => return (s, None),
(prefix, Some(_fragment)) => prefix.len(),
};
// `+ 1` is for leading `#` character.
let fragment = s.split_off(prefix_len + 1);
// Current `s` contains a trailing `#` character, which should be removed.
{
// Remove a trailing `#`.
let hash = s.pop();
assert_eq!(hash, Some('#'));
}
assert_eq!(s.len(), prefix_len);
(s, Some(fragment))
}

344
vendor/iri-string/src/resolve.rs vendored Normal file
View File

@@ -0,0 +1,344 @@
//! URI and IRI resolvers.
//!
//! # IRI resolution can fail without WHATWG URL Standard serialization
//!
//! ## Pure RFC 3986 algorithm
//!
//! Though this is not explicitly stated in RFC 3986, IRI resolution can fail.
//! Below are examples:
//!
//! * base=`scheme:`, ref=`.///bar`.
//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//! * base=`scheme:foo`, ref=`.///bar`.
//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//! * base=`scheme:`, ref=`/..//baz`.
//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//! * base=`scheme:foo/bar`, ref=`..//baz`.
//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//!
//! IRI without authority (note that this is different from "with empty authority")
//! cannot have a path starting with `//`, since it is ambiguous and can be
//! interpreted as an IRI with authority. For the above examples, `scheme://bar`
//! is not valid output, as `bar` in `scheme://bar` will be interpreted as an
//! authority, not a path.
//!
//! Thus, IRI resolution by pure RFC 3986 algorithm can fail for some abnormal
//! cases.
//!
//! Note that this kind of failure can happen only when the base IRI has no
//! authority and empty path. This would be rare in the wild, since many people
//! would use an IRI with authority part, such as `http://`.
//!
//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the
//! failure. Currently no cases are known to fail when at least one of the base
//! IRI or the relative IRI contains authorities.
//!
//! If you want this kind of abnormal IRI resolution to succeed and to be
//! idempotent, check the resolution result using
//! [`Normalized::ensure_rfc3986_normalizable`] (see the section below).
//!
//! ## WHATWG serialization
//!
//! To handle IRI resolution failure, WHATWG URL Standard defines serialization
//! algorithm for this kind of result, and it makes IRI resolution (and even
//! normalization) infallible and idempotent.
//!
//! IRI resolution and normalization provided by this crate automatically
//! applies this special rule if necessary, so they are infallible. If you want
//! to detect resolution/normalization failure, use
//! [`Normalized::ensure_rfc3986_normalizable`] method.
//!
//! ## Examples
//!
//! ```
//! # #[cfg(feature = "alloc")] {
//! use iri_string::format::ToDedicatedString;
//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
//!
//! let base = IriAbsoluteStr::new("scheme:")?;
//! {
//! let reference = IriReferenceStr::new(".///not-a-host")?;
//! let result = reference.resolve_against(base);
//! assert!(result.ensure_rfc3986_normalizable().is_err());
//! assert_eq!(result.to_dedicated_string(), "scheme:/.//not-a-host");
//! }
//!
//! {
//! let reference2 = IriReferenceStr::new("/..//not-a-host")?;
//! // Resulting string will be `scheme://not-a-host`, but `not-a-host`
//! // should be a path segment, not a host. So, the semantically correct
//! // target IRI cannot be represented by RFC 3986 IRI resolution.
//! let result2 = reference2.resolve_against(base);
//! assert!(result2.ensure_rfc3986_normalizable().is_err());
//!
//! // Algorithm defined in WHATWG URL Standard addresses this case.
//! assert_eq!(result2.to_dedicated_string(), "scheme:/.//not-a-host");
//! }
//! # }
//! # Ok::<_, iri_string::validate::Error>(())
//! ```
use crate::components::RiReferenceComponents;
use crate::normalize::{NormalizationInput, Normalized};
use crate::spec::Spec;
use crate::types::{RiAbsoluteStr, RiQueryStr, RiReferenceStr, RiStr};
/// A resolver against the fixed base.
#[derive(Debug, Clone, Copy)]
pub struct FixedBaseResolver<'a, S: Spec> {
/// Components of the base IRI.
base_components: RiReferenceComponents<'a, S>,
}
impl<'a, S: Spec> FixedBaseResolver<'a, S> {
/// Creates a new resolver with the given base.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # // `ToDedicatedString` is available only when
/// # // `alloc` feature is enabled.
/// #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
///
/// let base = IriAbsoluteStr::new("http://example.com/base/")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// let reference = IriReferenceStr::new("../there")?;
/// let resolved = resolver.resolve(reference);
///
/// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn new(base: &'a RiAbsoluteStr<S>) -> Self {
Self {
base_components: RiReferenceComponents::from(base.as_ref()),
}
}
/// Returns the base.
///
/// # Examples
///
/// ```
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
///
/// let base = IriAbsoluteStr::new("http://example.com/base/")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// assert_eq!(resolver.base(), base);
/// # Ok::<_, iri_string::validate::Error>(())
/// ```
#[must_use]
pub fn base(&self) -> &'a RiAbsoluteStr<S> {
// SAFETY: `base_components` can only be created from `&RiAbsoluteStr<S>`,
// and the type of `base_components` does not allow modification of the
// content after it is created.
unsafe { RiAbsoluteStr::new_maybe_unchecked(self.base_components.iri().as_str()) }
}
}
/// Components getters.
///
/// These getters are more efficient than calling through the result of `.base()`.
impl<S: Spec> FixedBaseResolver<'_, S> {
/// Returns the scheme.
///
/// The following colon is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::IriAbsoluteStr;
///
/// let base = IriAbsoluteStr::new("http://example.com/base/?query")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// assert_eq!(resolver.scheme_str(), "http");
/// assert_eq!(base.scheme_str(), "http");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn scheme_str(&self) -> &str {
self.base_components
.scheme_str()
.expect("[validity] absolute IRI should have the scheme part")
}
/// Returns the authority.
///
/// The leading `//` is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::IriAbsoluteStr;
///
/// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// assert_eq!(resolver.authority_str(), Some("user:pass@example.com"));
/// assert_eq!(base.authority_str(), Some("user:pass@example.com"));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_str(&self) -> Option<&str> {
self.base_components.authority_str()
}
/// Returns the path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::IriAbsoluteStr;
///
/// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// assert_eq!(resolver.path_str(), "/base/");
/// assert_eq!(base.path_str(), "/base/");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn path_str(&self) -> &str {
self.base_components.path_str()
}
/// Returns the query.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::{IriAbsoluteStr, IriQueryStr};
///
/// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
/// let resolver = FixedBaseResolver::new(base);
/// let query = IriQueryStr::new("query")?;
///
/// assert_eq!(resolver.query(), Some(query));
/// assert_eq!(base.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query(&self) -> Option<&RiQueryStr<S>> {
let query_raw = self.query_str()?;
let query = RiQueryStr::new(query_raw)
.expect("[validity] must be valid query if present in an absolute-IRI");
Some(query)
}
/// Returns the query in a raw string slice.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::IriAbsoluteStr;
///
/// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// assert_eq!(resolver.query_str(), Some("query"));
/// assert_eq!(base.query_str(), Some("query"));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query_str(&self) -> Option<&str> {
self.base_components.query_str()
}
}
impl<'a, S: Spec> FixedBaseResolver<'a, S> {
/// Resolves the given reference against the fixed base.
///
/// The task returned by this method does **not** normalize the resolution
/// result. However, `..` and `.` are recognized even when they are
/// percent-encoded.
///
/// # Failures
///
/// This function itself does not fail, but resolution algorithm defined by
/// RFC 3986 can fail. In that case, serialization algorithm defined by
/// WHATWG URL Standard would be automatically applied.
///
/// See the documentation of [`Normalized`].
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # // `ToDedicatedString` is available only when
/// # // `alloc` feature is enabled.
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
///
/// let base = IriAbsoluteStr::new("http://example.com/base/")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// let reference = IriReferenceStr::new("../there")?;
/// let resolved = resolver.resolve(reference);
///
/// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Note that `..` and `.` path segments are recognized even when they are
/// percent-encoded.
///
/// ```
/// # use iri_string::validate::Error;
/// # // `ToDedicatedString` is available only when
/// # // `alloc` feature is enabled.
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
///
/// let base = IriAbsoluteStr::new("HTTP://example.COM/base/base2/")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// // `%2e%2e` is recognized as `..`.
/// // However, `dot%2edot` is NOT normalized into `dot.dot`.
/// let reference = IriReferenceStr::new("%2e%2e/../dot%2edot")?;
/// let resolved = resolver.resolve(reference);
///
/// // Resolved but not normalized.
/// assert_eq!(resolved.to_dedicated_string(), "HTTP://example.COM/dot%2edot");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn resolve(&self, reference: &'a RiReferenceStr<S>) -> Normalized<'a, RiStr<S>> {
let input = NormalizationInput::with_resolution_params(&self.base_components, reference);
Normalized::from_input(input)
}
}

34
vendor/iri-string/src/spec.rs vendored Normal file
View File

@@ -0,0 +1,34 @@
//! IRI specs.
use core::fmt;
// Note that this MUST be private module.
// See <https://rust-lang.github.io/api-guidelines/future-proofing.html> about
// sealed trait.
mod internal;
/// A trait for spec types.
///
/// This trait is not intended to be implemented by crate users.
// Note that all types which implement `Spec` also implement `SpecInternal`.
pub trait Spec: internal::Sealed + Copy + fmt::Debug {}
/// A type that represents specification of IRI.
///
/// About IRI, see [RFC 3987].
///
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum IriSpec {}
impl Spec for IriSpec {}
/// A type that represents specification of URI.
///
/// About URI, see [RFC 3986].
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum UriSpec {}
impl Spec for UriSpec {}

58
vendor/iri-string/src/spec/internal.rs vendored Normal file
View File

@@ -0,0 +1,58 @@
//! A private module for sealed trait and internal implementations.
//!
//! Note that this MUST be a private module.
//! See [Rust API Guidelines][sealed-trait] about the necessity of being private.
//!
//! [sealed-trait]:
//! https://rust-lang.github.io/api-guidelines/future-proofing.html#sealed-traits-protect-against-downstream-implementations-c-sealed
use crate::parser::char::is_ucschar;
use crate::spec::{IriSpec, UriSpec};
/// A trait to prohibit user-defined types from implementing `Spec`.
///
/// About sealed trait, see [Rust API Guidelines][future-proofing].
///
/// [future-proofing]: https://rust-lang.github.io/api-guidelines/future-proofing.html
pub trait Sealed: SpecInternal {}
impl Sealed for IriSpec {}
impl Sealed for UriSpec {}
/// Internal implementations for spec types.
pub trait SpecInternal: Sized {
/// Checks if the given non-ASCII character matches `unreserved` or `iunreserved` rule.
#[must_use]
fn is_nonascii_char_unreserved(c: char) -> bool;
/// Checks if the given character matches `iprivate` rule.
#[must_use]
fn is_nonascii_char_private(c: char) -> bool;
}
impl SpecInternal for IriSpec {
#[inline]
fn is_nonascii_char_unreserved(c: char) -> bool {
is_ucschar(c)
}
fn is_nonascii_char_private(c: char) -> bool {
matches!(
u32::from(c),
0xE000..=0xF8FF |
0xF_0000..=0xF_FFFD |
0x10_0000..=0x10_FFFD
)
}
}
impl SpecInternal for UriSpec {
#[inline]
fn is_nonascii_char_unreserved(_: char) -> bool {
false
}
#[inline]
fn is_nonascii_char_private(_: char) -> bool {
false
}
}

200
vendor/iri-string/src/template.rs vendored Normal file
View File

@@ -0,0 +1,200 @@
//! Processor for [RFC 6570] URI Template.
//!
//! [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html
//!
//! # Usage
//!
//! 1. Prepare a template.
//! * You can create a template as [`UriTemplateStr`]
#![cfg_attr(
feature = "alloc",
doc = " type (borrowed) or [`UriTemplateString`] type (owned)."
)]
#![cfg_attr(not(feature = "alloc"), doc = " type.")]
//! 2. Prepare a context.
//! * Create a value of type that implements [`Context`] trait.
#![cfg_attr(
feature = "alloc",
doc = " * Or, if you use [`SimpleContext`], insert key-value pairs into it."
)]
//! 3. Expand.
//! * Pass the context to [`UriTemplateStr::expand`] method of the template.
//! 4. Use the result.
//! * Returned [`Expanded`] object can be directly printed since it
//! implements [`Display`][`core::fmt::Display`] trait. Or, you can call
//! `.to_string()` method of the `alloc::string::ToString` trait to
//! convert it to a `String`.
//!
//! # Examples
//!
//! ## Custom context type
//!
//! For details, see [the documentation of `context` module][`context`].
//!
//! ```
//! # use iri_string::template::Error;
//! use core::fmt;
//! use iri_string::spec::{IriSpec, Spec, UriSpec};
//! use iri_string::template::UriTemplateStr;
//! use iri_string::template::context::{Context, VarName, Visitor};
//!
//! struct UserInfo {
//! username: &'static str,
//! utf8_available: bool,
//! }
//!
//! impl Context for UserInfo {
//! fn visit<V: Visitor>(
//! &self,
//! visitor: V,
//! ) -> V::Result {
//! match visitor.var_name().as_str() {
//! "username" => visitor.visit_string(self.username),
//! "utf8" => {
//! if self.utf8_available {
//! // U+2713 CHECK MARK
//! visitor.visit_string("\u{2713}")
//! } else {
//! visitor.visit_undefined()
//! }
//! }
//! _ => visitor.visit_undefined()
//! }
//! }
//! }
//!
//! let context = UserInfo {
//! username: "foo",
//! utf8_available: true,
//! };
//!
//! let template = UriTemplateStr::new("/users/{username}{?utf8}")?;
//!
//! # #[cfg(feature = "alloc")] {
//! assert_eq!(
//! template.expand::<UriSpec, _>(&context)?.to_string(),
//! "/users/foo?utf8=%E2%9C%93"
//! );
//! assert_eq!(
//! template.expand::<IriSpec, _>(&context)?.to_string(),
//! "/users/foo?utf8=\u{2713}"
//! );
//! # }
//! # Ok::<_, Error>(())
//! ```
//!
//! ## `SimpleContext` type (enabled by `alloc` feature flag)
//!
//! ```
//! # use iri_string::template::Error;
//! # #[cfg(feature = "alloc")] {
//! use iri_string::spec::{IriSpec, UriSpec};
//! use iri_string::template::UriTemplateStr;
//! use iri_string::template::simple_context::SimpleContext;
//!
//! let mut context = SimpleContext::new();
//! context.insert("username", "foo");
//! // U+2713 CHECK MARK
//! context.insert("utf8", "\u{2713}");
//!
//! let template = UriTemplateStr::new("/users/{username}{?utf8}")?;
//!
//! assert_eq!(
//! template.expand::<UriSpec, _>(&context)?.to_string(),
//! "/users/foo?utf8=%E2%9C%93"
//! );
//! assert_eq!(
//! template.expand::<IriSpec, _>(&context)?.to_string(),
//! "/users/foo?utf8=\u{2713}"
//! );
//! # }
//! # Ok::<_, Error>(())
//! ```
//!
#![cfg_attr(
feature = "alloc",
doc = "[`SimpleContext`]: `simple_context::SimpleContext`"
)]
mod components;
pub mod context;
mod error;
mod expand;
mod parser;
#[cfg(feature = "alloc")]
pub mod simple_context;
mod string;
pub use self::context::{Context, DynamicContext};
#[cfg(feature = "alloc")]
pub use self::error::CreationError;
pub use self::error::Error;
pub use self::expand::Expanded;
#[cfg(feature = "alloc")]
pub use self::string::UriTemplateString;
pub use self::string::{UriTemplateStr, UriTemplateVariables};
/// Deprecated old name of [`template::context::VarName`].
///
/// [`template::context::VarName`]: `components::VarName`
#[deprecated(
since = "0.7.1",
note = "renamed (moved) to `template::context::VarName`"
)]
pub type VarName<'a> = self::components::VarName<'a>;
/// Variable value type.
#[derive(Debug, Clone, Copy)]
enum ValueType {
/// Undefined (i.e. null).
Undefined,
/// String value.
String,
/// List.
List,
/// Associative array.
Assoc,
}
impl ValueType {
/// Returns the value type for an undefined variable.
#[inline]
#[must_use]
pub const fn undefined() -> Self {
ValueType::Undefined
}
/// Returns the value type for a string variable.
#[inline]
#[must_use]
pub const fn string() -> Self {
ValueType::String
}
/// Returns the value type for an empty list variable.
#[inline]
#[must_use]
pub const fn empty_list() -> Self {
ValueType::Undefined
}
/// Returns the value type for a nonempty list variable.
#[inline]
#[must_use]
pub const fn nonempty_list() -> Self {
ValueType::List
}
/// Returns the value type for an empty associative array variable.
#[inline]
#[must_use]
pub const fn empty_assoc() -> Self {
ValueType::Undefined
}
/// Returns the value type for a nonempty associative array variable.
#[inline]
#[must_use]
pub const fn nonempty_assoc() -> Self {
ValueType::Assoc
}
}

View File

@@ -0,0 +1,332 @@
//! Syntax components of URI templates.
use core::mem;
use crate::parser::str::find_split_hole;
use crate::template::error::Error;
use crate::template::parser::validate as validate_parser;
/// Expression body.
///
/// This does not contain the wrapping braces (`{` and `}`).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) struct ExprBody<'a>(&'a str);
impl<'a> ExprBody<'a> {
/// Creates a new expression body.
///
/// # Precondition
///
/// The given string should be a valid expression body.
#[inline]
#[must_use]
pub(super) fn new(s: &'a str) -> Self {
debug_assert!(
!s.is_empty(),
"[precondition] valid expression body is not empty"
);
Self(s)
}
/// Decomposes the expression into an `operator` and `variable-list`.
///
/// # Panics
///
/// May panic if the input is invalid.
#[must_use]
pub(super) fn decompose(&self) -> (Operator, VarListStr<'a>) {
debug_assert!(
!self.0.is_empty(),
"[precondition] valid expression body is not empty"
);
let first = self.0.as_bytes()[0];
if first.is_ascii_alphanumeric() || (first == b'_') || (first == b'%') {
// The first byte is a part of the variable list.
(Operator::String, VarListStr::new(self.0))
} else {
let op = Operator::from_byte(first).unwrap_or_else(|| {
unreachable!(
"[precondition] valid expression has (optional) \
valid operator, but got a byte {first:#02x?}"
)
});
(op, VarListStr::new(&self.0[1..]))
}
}
/// Returns the raw expression in a string slice.
#[inline]
#[must_use]
pub(super) fn as_str(&self) -> &'a str {
self.0
}
}
/// Variable name.
// QUESTION: Should hexdigits in percent-encoded triplets be compared case sensitively?
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct VarName<'a>(&'a str);
impl<'a> VarName<'a> {
/// Creates a `VarName` from the trusted string.
///
/// # Precondition
///
/// The given string should be a valid variable name.
#[inline]
#[must_use]
pub(super) fn from_trusted(s: &'a str) -> Self {
Self(s)
}
/// Creates a `VarName` from the string.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::context::VarName;
///
/// let name = VarName::new("hello")?;
/// assert_eq!(name.as_str(), "hello");
///
/// assert!(VarName::new("0+non-variable-name").is_err());
///
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn new(s: &'a str) -> Result<Self, Error> {
match validate_parser::validate_varname(s, 0) {
Ok(_) => Ok(Self::from_trusted(s)),
Err(e) => Err(e),
}
}
/// Returns the varibale name.
#[inline]
#[must_use]
pub fn as_str(&self) -> &'a str {
self.0
}
}
/// Variable specifier.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct VarSpec<'a> {
/// Variable name.
name: VarName<'a>,
/// Variable modifier.
modifier: Modifier,
}
impl<'a> VarSpec<'a> {
/// Returns the varibale name.
#[inline]
#[must_use]
pub(super) fn name(&self) -> VarName<'a> {
self.name
}
/// Returns the modifier.
#[inline]
#[must_use]
pub(super) fn modifier(&self) -> Modifier {
self.modifier
}
/// Parses the trusted varspec string.
///
/// # Panics
///
/// May panic if the input is invalid.
#[must_use]
pub(super) fn parse_trusted(s: &'a str) -> Self {
if let Some(varname) = s.strip_suffix('*') {
// `varname "*"`.
return Self {
name: VarName::from_trusted(varname),
modifier: Modifier::Explode,
};
}
// `varname ":" max-length` or `varname`.
match find_split_hole(s, b':') {
Some((varname, max_len)) => {
let max_len: u16 = max_len
.parse()
.expect("[precondition] the input should be valid `varspec`");
Self {
name: VarName::from_trusted(varname),
modifier: Modifier::MaxLen(max_len),
}
}
None => Self {
name: VarName(s),
modifier: Modifier::None,
},
}
}
}
/// Variable list.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) struct VarListStr<'a>(&'a str);
impl<'a> VarListStr<'a> {
/// Creates a new variable list.
///
/// # Precondition
///
/// The given string should be a valid variable list.
#[inline]
#[must_use]
pub(super) fn new(s: &'a str) -> Self {
Self(s)
}
}
impl<'a> IntoIterator for VarListStr<'a> {
type IntoIter = VarListIter<'a>;
type Item = (usize, VarSpec<'a>);
#[inline]
fn into_iter(self) -> Self::IntoIter {
VarListIter { rest: self.0 }
}
}
/// Iterator of variable specs.
#[derive(Debug, Clone)]
pub(super) struct VarListIter<'a> {
/// Remaining input.
rest: &'a str,
}
impl<'a> Iterator for VarListIter<'a> {
/// A pair of the length of the varspec and the varspec itself.
type Item = (usize, VarSpec<'a>);
fn next(&mut self) -> Option<Self::Item> {
match find_split_hole(self.rest, b',') {
Some((prefix, new_rest)) => {
self.rest = new_rest;
Some((prefix.len(), VarSpec::parse_trusted(prefix)))
}
None => {
if self.rest.is_empty() {
None
} else {
Some((
self.rest.len(),
VarSpec::parse_trusted(mem::take(&mut self.rest)),
))
}
}
}
}
}
/// Variable modifier.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(super) enum Modifier {
/// No modifiers.
None,
/// Max length, greater than 0 and less than 10000.
MaxLen(u16),
/// Explode the variable, e.g. the var spec has `*`.
Explode,
}
/// Operator that is possibly reserved for future extension.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(super) enum MaybeOperator {
/// Working operator.
Operator(Operator),
/// Reserved for future extensions.
Reserved(OperatorReservedForFuture),
}
impl MaybeOperator {
/// Returns the operator for the given character.
pub(super) fn from_byte(b: u8) -> Option<Self> {
match b {
b'+' => Some(Self::Operator(Operator::Reserved)),
b'#' => Some(Self::Operator(Operator::Fragment)),
b'.' => Some(Self::Operator(Operator::Label)),
b'/' => Some(Self::Operator(Operator::PathSegments)),
b';' => Some(Self::Operator(Operator::PathParams)),
b'?' => Some(Self::Operator(Operator::FormQuery)),
b'&' => Some(Self::Operator(Operator::FormQueryCont)),
b'=' => Some(Self::Reserved(OperatorReservedForFuture::Equals)),
b',' => Some(Self::Reserved(OperatorReservedForFuture::Comma)),
b'!' => Some(Self::Reserved(OperatorReservedForFuture::Exclamation)),
b'@' => Some(Self::Reserved(OperatorReservedForFuture::AtSign)),
b'|' => Some(Self::Reserved(OperatorReservedForFuture::Pipe)),
_ => None,
}
}
}
/// Working operator.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(super) enum Operator {
/// No operator. String expansion.
String,
/// Reserved expansion by `+`.
Reserved,
/// Fragment expansion by `#`.
Fragment,
/// Label expansion by `.`.
Label,
/// Path segments by `/`.
PathSegments,
/// Path-style parameters by `;`.
PathParams,
/// Form-style query by `?`.
FormQuery,
/// Form-style query continuation by `&`.
FormQueryCont,
}
impl Operator {
/// Returns the operator for the given character.
#[must_use]
pub(super) fn from_byte(b: u8) -> Option<Self> {
match b {
b'+' => Some(Self::Reserved),
b'#' => Some(Self::Fragment),
b'.' => Some(Self::Label),
b'/' => Some(Self::PathSegments),
b';' => Some(Self::PathParams),
b'?' => Some(Self::FormQuery),
b'&' => Some(Self::FormQueryCont),
_ => None,
}
}
/// Returns the string length of the operator.
#[inline]
#[must_use]
pub(super) const fn len(self) -> usize {
if matches!(self, Self::String) {
0
} else {
1
}
}
}
/// Operator reserved for future extension.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(super) enum OperatorReservedForFuture {
/// Reserved `=` operator.
Equals,
/// Reserved `,` operator.
Comma,
/// Reserved `!` operator.
Exclamation,
/// Reserved `@` operator.
AtSign,
/// Reserved `|` operator.
Pipe,
}

View File

@@ -0,0 +1,337 @@
//! Template expansion context.
//!
//! # Examples
//!
//! 1. Define your context type.
//! 2. Implement [`Context`] trait (and [`Context::visit`] method) for the type.
//! 1. Get variable name by [`Visitor::var_name`] method.
//! 2. Feed the corresponding value(s) by one of `Visitor::visit_*` methods.
//!
//! Note that contexts should return consistent result across multiple visits for
//! the same variable. In other words, `Context::visit` should return the same
//! result for the same `Visitor::var_name()` during the context is borrowed.
//! If this condition is violated, the URI template processor can return
//! invalid result or panic at worst.
//!
//! ```
//! use iri_string::template::context::{Context, Visitor, ListVisitor, AssocVisitor};
//!
//! struct MyContext {
//! name: &'static str,
//! id: u64,
//! tags: &'static [&'static str],
//! children: &'static [(&'static str, usize)],
//! }
//!
//! impl Context for MyContext {
//! fn visit<V: Visitor>(&self, visitor: V) -> V::Result {
//! let name = visitor.var_name().as_str();
//! match name {
//! "name" => visitor.visit_string(self.name),
//! "id" => visitor.visit_string(self.id),
//! "tags" => visitor.visit_list().visit_items_and_finish(self.tags),
//! "children" => visitor
//! .visit_assoc()
//! .visit_entries_and_finish(self.children.iter().copied()),
//! _ => visitor.visit_undefined(),
//! }
//! }
//! }
//! ```
//
// # Developers note
//
// Visitor types **should not** be cloneable in order to enforce just one
// visitor is used to visit a variable. If visitors are cloneable, it can make
// the wrong usage to be available, i.e. storing cloned visitors somewhere and
// using the wrong one.
//
// However, if visitors are made cloneable by any chance, it does not indicate
// the whole implementation will be broken. Users can only use the visitors
// through visitor traits (and their API do not allow cloning), so the logic
// would work as expected if the internal usage of the visitors are correct.
// Making visitors noncloneable is an optional safety guard (with no overhead).
use core::fmt;
use core::ops::ControlFlow;
pub use crate::template::components::VarName;
/// A trait for types that can behave as a static URI template expansion context.
///
/// This type is for use with [`UriTemplateStr::expand`] method.
///
/// See [the module documentation][`crate::template`] for usage.
///
/// [`UriTemplateStr::expand`]: `crate::template::UriTemplateStr::expand`
pub trait Context: Sized {
/// Visits a variable.
///
/// To get variable name, use [`Visitor::var_name()`].
#[must_use]
fn visit<V: Visitor>(&self, visitor: V) -> V::Result;
}
/// A trait for types that can behave as a dynamic (mutable) URI template expansion context.
///
/// This type is for use with [`UriTemplateStr::expand_dynamic`] method and its
/// family.
///
/// Note that "dynamic" here does not mean that the value of variables can
/// change during a template expansion. The value should be fixed and consistent
/// during each expansion, but the context is allowed to mutate itself if it
/// does not break this rule.
///
/// # Exmaples
///
/// ```
/// # #[cfg(feature = "alloc")]
/// # extern crate alloc;
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// # use alloc::string::String;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::context::{DynamicContext, Visitor, VisitPurpose};
/// use iri_string::spec::UriSpec;
///
/// struct MyContext<'a> {
/// /// Target path.
/// target: &'a str,
/// /// Username.
/// username: Option<&'a str>,
/// /// A flag to remember whether the URI template
/// /// attempted to use `username` variable.
/// username_visited: bool,
/// }
///
/// impl DynamicContext for MyContext<'_> {
/// fn on_expansion_start(&mut self) {
/// // Reset the state.
/// self.username_visited = false;
/// }
/// fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result {
/// match visitor.var_name().as_str() {
/// "target" => visitor.visit_string(self.target),
/// "username" => {
/// if visitor.purpose() == VisitPurpose::Expand {
/// // The variable `username` is being used
/// // on the template expansion.
/// // Don't care whether `username` is defined or not.
/// self.username_visited = true;
/// }
/// if let Some(username) = &self.username {
/// visitor.visit_string(username)
/// } else {
/// visitor.visit_undefined()
/// }
/// }
/// _ => visitor.visit_undefined(),
/// }
/// }
/// }
///
/// let mut context = MyContext {
/// target: "/posts/1",
/// username: Some("the_admin"),
/// username_visited: false,
/// };
/// let mut buf = String::new();
///
/// // No access to the variable `username`.
/// let template1 = UriTemplateStr::new("{+target}")?;
/// template1.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?;
/// assert_eq!(buf, "/posts/1");
/// assert!(!context.username_visited);
///
/// buf.clear();
/// // Will access to the variable `username`.
/// let template2 = UriTemplateStr::new("{+target}{?username}")?;
/// template2.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?;
/// assert_eq!(buf, "/posts/1?username=the_admin");
/// assert!(context.username_visited);
///
/// buf.clear();
/// context.username = None;
/// // Will access to the variable `username` but it is undefined.
/// template2.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?;
/// assert_eq!(buf, "/posts/1");
/// assert!(
/// context.username_visited,
/// "`MyContext` can know and remember whether `visit_dynamic()` is called
/// for `username`, even if its value is undefined"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [`UriTemplateStr::expand_dynamic`]: `crate::template::UriTemplateStr::expand_dynamic`
pub trait DynamicContext: Sized {
/// Visits a variable.
///
/// To get variable name, use [`Visitor::var_name()`].
///
/// # Restriction
///
/// The visit results should be consistent and unchanged between the last
/// time [`on_expansion_start`][`Self::on_expansion_start`] was called and
/// the next time [`on_expansion_end`][`Self::on_expansion_end`] will be
/// called. If this condition is violated, template expansion will produce
/// wrong result or may panic at worst.
#[must_use]
fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result;
/// A callback that is called before the expansion of a URI template.
#[inline]
fn on_expansion_start(&mut self) {}
/// A callback that is called after the expansion of a URI template.
#[inline]
fn on_expansion_end(&mut self) {}
}
impl<C: Context> DynamicContext for C {
#[inline]
fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result {
self.visit(visitor)
}
}
/// A purpose of a visit.
///
/// This enum is nonexhaustive since this partially exposes the internal
/// implementation of the template expansion, and thus this is subject to
/// change.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum VisitPurpose {
/// A visit for type checking.
Typecheck,
/// A visit for template expansion to retrieve the value.
Expand,
}
/// Variable visitor.
///
/// See [the module documentation][self] for usage.
// NOTE (internal): Visitor types **should not** be cloneable.
pub trait Visitor: Sized + private::Sealed {
/// Result of the visit.
type Result;
/// List visitor.
type ListVisitor: ListVisitor<Result = Self::Result>;
/// Associative array visitor.
type AssocVisitor: AssocVisitor<Result = Self::Result>;
/// Returns the name of the variable to visit.
#[must_use]
fn var_name(&self) -> VarName<'_>;
/// Returns the purpose of the visit.
///
/// The template expansion algorithm checks the types for some variables
/// depending on its usage. To get the usage count correctly, you should
/// only count visits with [`VisitPurpose::Expand`].
///
/// If you need to know whether the variable is accessed and does not
/// need dynamic context generation or access counts, consider using
/// [`UriTemplateStr::variables`] method to iterate the variables in the
/// URI template.
///
/// [`UriTemplateStr::variables`]: `crate::template::UriTemplateStr::variables`
#[must_use]
fn purpose(&self) -> VisitPurpose;
/// Visits an undefined variable, i.e. indicates that the requested variable is unavailable.
#[must_use]
fn visit_undefined(self) -> Self::Result;
/// Visits a string variable.
#[must_use]
fn visit_string<T: fmt::Display>(self, v: T) -> Self::Result;
/// Visits a list variable.
#[must_use]
fn visit_list(self) -> Self::ListVisitor;
/// Visits an associative array variable.
#[must_use]
fn visit_assoc(self) -> Self::AssocVisitor;
}
/// List visitor.
///
/// See [the module documentation][self] for usage.
// NOTE (internal): Visitor types **should not** be cloneable.
pub trait ListVisitor: Sized + private::Sealed {
/// Result of the visit.
type Result;
/// Visits an item.
///
/// If this returned `ControlFlow::Break(v)`, [`Context::visit`] should also
/// return this `v`.
///
/// To feed multiple items at once, do
/// `items.into_iter().try_for_each(|item| self.visit_item(item))` for example.
fn visit_item<T: fmt::Display>(&mut self, item: T) -> ControlFlow<Self::Result>;
/// Finishes visiting the list.
#[must_use]
fn finish(self) -> Self::Result;
/// Visits items and finish.
#[must_use]
fn visit_items_and_finish<T, I>(mut self, items: I) -> Self::Result
where
T: fmt::Display,
I: IntoIterator<Item = T>,
{
match items.into_iter().try_for_each(|item| self.visit_item(item)) {
ControlFlow::Break(v) => v,
ControlFlow::Continue(()) => self.finish(),
}
}
}
/// Associative array visitor.
///
/// See [the module documentation][self] for usage.
// NOTE (internal): Visitor types **should not** be cloneable.
pub trait AssocVisitor: Sized + private::Sealed {
/// Result of the visit.
type Result;
/// Visits an entry.
///
/// If this returned `ControlFlow::Break(v)`, [`Context::visit`] should also
/// return this `v`.
///
/// To feed multiple items at once, do
/// `entries.into_iter().try_for_each(|(key, value)| self.visit_entry(key, value))`
/// for example.
fn visit_entry<K: fmt::Display, V: fmt::Display>(
&mut self,
key: K,
value: V,
) -> ControlFlow<Self::Result>;
/// Finishes visiting the associative array.
#[must_use]
fn finish(self) -> Self::Result;
/// Visits entries and finish.
#[must_use]
fn visit_entries_and_finish<K, V, I>(mut self, entries: I) -> Self::Result
where
K: fmt::Display,
V: fmt::Display,
I: IntoIterator<Item = (K, V)>,
{
match entries
.into_iter()
.try_for_each(|(key, value)| self.visit_entry(key, value))
{
ControlFlow::Break(v) => v,
ControlFlow::Continue(()) => self.finish(),
}
}
}
/// Private module to put the trait to seal.
pub(super) mod private {
/// A trait for visitor types of variables in a context.
pub trait Sealed {}
}

154
vendor/iri-string/src/template/error.rs vendored Normal file
View File

@@ -0,0 +1,154 @@
//! Errors related to URI templates.
use core::fmt;
#[cfg(feature = "std")]
use std::error;
/// Template construction and expansion error kind.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum ErrorKind {
/// Cannot write to the backend.
WriteFailed,
/// Expression is not closed.
ExpressionNotClosed,
/// Invalid character.
InvalidCharacter,
/// Invalid expression.
InvalidExpression,
/// Invalid percent-encoded triplets.
InvalidPercentEncoding,
/// Invalid UTF-8 bytes.
InvalidUtf8,
/// Unexpected value type for the variable.
UnexpectedValueType,
/// Unsupported operator, including operators reserved for future.
UnsupportedOperator,
}
impl ErrorKind {
/// Returns the error message.
#[must_use]
fn as_str(self) -> &'static str {
match self {
Self::WriteFailed => "failed to write to the backend writer",
Self::ExpressionNotClosed => "expression not closed",
Self::InvalidCharacter => "invalid character",
Self::InvalidExpression => "invalid expression",
Self::InvalidPercentEncoding => "invalid percent-encoded triplets",
Self::InvalidUtf8 => "invalid utf-8 byte sequence",
Self::UnexpectedValueType => "unexpected value type for the variable",
Self::UnsupportedOperator => "unsupported operator",
}
}
}
/// Template construction and expansion error.
///
// Note that this type should implement `Copy` trait.
// To return additional non-`Copy` data as an error, use wrapper type
// (as `std::string::FromUtf8Error` contains `std::str::Utf8Error`).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Error {
/// Error kind.
kind: ErrorKind,
/// Location (byte position of the error).
location: usize,
}
impl Error {
/// Creates a new `Error`.
///
/// For internal use.
#[inline]
#[must_use]
pub(super) fn new(kind: ErrorKind, location: usize) -> Self {
Self { kind, location }
}
/// Returns the byte position the error is detected.
///
/// NOTE: This is not a part of the public API since the value to be
/// returned (i.e., the definition of the "position" of an error) is not
/// guaranteed to be stable.
#[cfg(test)]
pub(super) fn location(&self) -> usize {
self.location
}
}
impl fmt::Display for Error {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"invalid URI template: {} (at {}-th byte)",
self.kind.as_str(),
self.location
)
}
}
#[cfg(feature = "std")]
impl error::Error for Error {}
/// Error on conversion into a URI template type.
// TODO: Unifiable to `types::CreationError`?
#[cfg(feature = "alloc")]
pub struct CreationError<T> {
/// Soruce data.
source: T,
/// Validation error.
error: Error,
}
#[cfg(feature = "alloc")]
impl<T> CreationError<T> {
/// Returns the source data.
#[must_use]
pub fn into_source(self) -> T {
self.source
}
/// Returns the validation error.
#[must_use]
pub fn validation_error(&self) -> Error {
self.error
}
/// Creates a new `CreationError`.
#[must_use]
pub(crate) fn new(error: Error, source: T) -> Self {
Self { source, error }
}
}
#[cfg(feature = "alloc")]
impl<T: fmt::Debug> fmt::Debug for CreationError<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("CreationError")
.field("source", &self.source)
.field("error", &self.error)
.finish()
}
}
#[cfg(feature = "alloc")]
impl<T: Clone> Clone for CreationError<T> {
fn clone(&self) -> Self {
Self {
source: self.source.clone(),
error: self.error,
}
}
}
#[cfg(feature = "alloc")]
impl<T> fmt::Display for CreationError<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.error.fmt(f)
}
}
#[cfg(feature = "std")]
impl<T: fmt::Debug> error::Error for CreationError<T> {}

1036
vendor/iri-string/src/template/expand.rs vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,6 @@
//! URI Template parser.
pub(super) mod char;
pub(super) mod validate;
pub(super) use self::validate::validate_template_str;

View File

@@ -0,0 +1,190 @@
//! Characters.
/// Properties of ASCII characters.
///
/// About `'` (single quote) being considered as a literal: see
/// [Errata ID 6937](https://www.rfc-editor.org/errata/eid6937).
const CHARS_TABLE: [u8; 128] = [
0b_0000_0000, // NUL
0b_0000_0000, // SOH
0b_0000_0000, // STX
0b_0000_0000, // ETX
0b_0000_0000, // EOT
0b_0000_0000, // ENQ
0b_0000_0000, // ACK
0b_0000_0000, // BEL
0b_0000_0000, // BS
0b_0000_0000, // HT
0b_0000_0000, // LF
0b_0000_0000, // VT
0b_0000_0000, // FF
0b_0000_0000, // CR
0b_0000_0000, // SO
0b_0000_0000, // SI
0b_0000_0000, // DLE
0b_0000_0000, // DC1
0b_0000_0000, // DC2
0b_0000_0000, // DC3
0b_0000_0000, // DC4
0b_0000_0000, // NAK
0b_0000_0000, // SYN
0b_0000_0000, // ETB
0b_0000_0000, // CAN
0b_0000_0000, // EM
0b_0000_0000, // SUB
0b_0000_0000, // ESC
0b_0000_0000, // FS
0b_0000_0000, // GS
0b_0000_0000, // RS
0b_0000_0000, // US
0b_0000_0000, // SPACE
0b_0000_0001, // !
0b_0000_0000, // "
0b_0000_0001, // #
0b_0000_0001, // $
0b_0000_0000, // %
0b_0000_0001, // &
0b_0000_0001, // '
0b_0000_0001, // (
0b_0000_0001, // )
0b_0000_0001, // *
0b_0000_0001, // +
0b_0000_0001, // ,
0b_0000_0001, // -
0b_0000_0101, // .
0b_0000_0001, // /
0b_0000_0111, // 0
0b_0000_0111, // 1
0b_0000_0111, // 2
0b_0000_0111, // 3
0b_0000_0111, // 4
0b_0000_0111, // 5
0b_0000_0111, // 6
0b_0000_0111, // 7
0b_0000_0111, // 8
0b_0000_0111, // 9
0b_0000_0001, // :
0b_0000_0001, // ;
0b_0000_0000, // <
0b_0000_0001, // =
0b_0000_0000, // >
0b_0000_0001, // ?
0b_0000_0001, // @
0b_0000_0111, // A
0b_0000_0111, // B
0b_0000_0111, // C
0b_0000_0111, // D
0b_0000_0111, // E
0b_0000_0111, // F
0b_0000_0111, // G
0b_0000_0111, // H
0b_0000_0111, // I
0b_0000_0111, // J
0b_0000_0111, // K
0b_0000_0111, // L
0b_0000_0111, // M
0b_0000_0111, // N
0b_0000_0111, // O
0b_0000_0111, // P
0b_0000_0111, // Q
0b_0000_0111, // R
0b_0000_0111, // S
0b_0000_0111, // T
0b_0000_0111, // U
0b_0000_0111, // V
0b_0000_0111, // W
0b_0000_0111, // X
0b_0000_0111, // Y
0b_0000_0111, // Z
0b_0000_0001, // [
0b_0000_0000, // \
0b_0000_0001, // ]
0b_0000_0000, // ^
0b_0000_0111, // _
0b_0000_0000, // `
0b_0000_0111, // a
0b_0000_0111, // b
0b_0000_0111, // c
0b_0000_0111, // d
0b_0000_0111, // e
0b_0000_0111, // f
0b_0000_0111, // g
0b_0000_0111, // h
0b_0000_0111, // i
0b_0000_0111, // j
0b_0000_0111, // k
0b_0000_0111, // l
0b_0000_0111, // m
0b_0000_0111, // n
0b_0000_0111, // o
0b_0000_0111, // p
0b_0000_0111, // q
0b_0000_0111, // r
0b_0000_0111, // s
0b_0000_0111, // t
0b_0000_0111, // u
0b_0000_0111, // v
0b_0000_0111, // w
0b_0000_0111, // x
0b_0000_0111, // y
0b_0000_0111, // z
0b_0000_0000, // {
0b_0000_0000, // |
0b_0000_0000, // }
0b_0000_0001, // ~
0b_0000_0000, // DEL
];
/// A mask to test whether the character matches `literals` rule defined in [RFC 6570].
///
/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.1
const CHARS_TABLE_MASK_LITERAL: u8 = 1 << 0;
/// A mask to test whether the character matches `varchar` rule defined in [RFC 6570].
///
/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.3
const CHARS_TABLE_MASK_VARCHAR_START: u8 = 1 << 1;
/// A mask to test whether the character matches `varchar` rule defined in [RFC 6570] or a period.
///
/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.3
const CHARS_TABLE_MASK_VARCHAR_CONTINUE: u8 = 1 << 2;
/// Returns true if the given ASCII character is allowed in a literal string.
///
/// # Precondition
///
/// The given byte should be an ASCII character, i.e. should be less than 128.
#[inline]
#[must_use]
pub(super) const fn is_ascii_literal_char(c: u8) -> bool {
(CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_LITERAL) != 0
}
/// Returns true if the given ASCII character is allowed as the beginning of the `varname`.
///
/// Note that this does not return true for `%` character. It is caller's
/// responsibility to test validity of percent-encoded triplets.
///
/// # Precondition
///
/// The given byte should be an ASCII character, i.e. should be less than 128.
#[inline]
#[must_use]
pub(super) const fn is_ascii_varchar_start(c: u8) -> bool {
(CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_VARCHAR_START) != 0
}
/// Returns true if the given ASCII character is allowed as the non-beginning of the `varname`.
///
/// Note that this does not return true for `%` character. It is caller's
/// responsibility to test validity of percent-encoded triplets.
///
/// # Precondition
///
/// The given byte should be an ASCII character, i.e. should be less than 128.
#[inline]
#[must_use]
pub(super) const fn is_ascii_varchar_continue(c: u8) -> bool {
(CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_VARCHAR_CONTINUE) != 0
}

View File

@@ -0,0 +1,161 @@
//! Validating parsers.
use crate::parser::str::{
find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded, starts_with_double_hexdigits,
};
use crate::template::components::MaybeOperator;
use crate::template::error::{Error, ErrorKind};
use crate::template::parser::char::{
is_ascii_literal_char, is_ascii_varchar_continue, is_ascii_varchar_start,
};
/// Returns `Ok(())` if the given string is a valid literal.
fn validate_literal(s: &str, offset: usize) -> Result<(), Error> {
match s
.chars()
.position(|c| !c.is_ascii() || !is_ascii_literal_char(c as u8))
{
Some(pos) => Err(Error::new(ErrorKind::InvalidCharacter, offset + pos)),
None => Ok(()),
}
}
/// Returns `Ok(())` if the given string is a valid varspec.
fn validate_varspec(s: &str, offset: usize) -> Result<(), Error> {
match find_split2_hole(s, b':', b'*') {
Some((maybe_varname, b':', maybe_len)) => {
validate_varname(maybe_varname, offset)?;
if !(1..=5).contains(&maybe_len.len()) {
return Err(Error::new(
ErrorKind::InvalidExpression,
offset + maybe_varname.len() + 2,
));
}
if let Some(pos) = maybe_len.bytes().position(|b| !b.is_ascii_digit()) {
return Err(Error::new(
ErrorKind::InvalidExpression,
offset + maybe_varname.len() + 2 + pos,
));
}
}
Some((maybe_varname, b'*', extra)) => {
validate_varname(maybe_varname, offset)?;
if !extra.is_empty() {
return Err(Error::new(
ErrorKind::InvalidExpression,
offset + maybe_varname.len() + 1,
));
}
}
Some((_, sep, _)) => unreachable!("[consistency] the byte {sep:#02x} is not searched"),
None => validate_varname(s, offset)?,
}
Ok(())
}
/// Returns `Ok(())` if the given string is a valid varname.
pub(crate) fn validate_varname(s: &str, offset: usize) -> Result<(), Error> {
let rest = match s.as_bytes().first() {
Some(b'%') if starts_with_double_hexdigits(&s.as_bytes()[1..]) => &s[3..],
Some(b) if b.is_ascii() && is_ascii_varchar_start(*b) => &s[1..],
_ => return Err(Error::new(ErrorKind::InvalidExpression, offset)),
};
let is_valid = satisfy_chars_with_pct_encoded(rest, is_ascii_varchar_continue, |_| false);
if !is_valid {
return Err(Error::new(ErrorKind::InvalidExpression, offset));
}
Ok(())
}
/// Returns `Ok(())` if the given string is a valid expression.
///
/// "Expression" here is the expression body inside `{` and `}`, but not including braces.
fn validate_expr_body(s: &str, mut offset: usize) -> Result<(), Error> {
if s.is_empty() {
return Err(Error::new(ErrorKind::InvalidExpression, offset));
}
// Skip the operator.
let maybe_variable_list = match MaybeOperator::from_byte(s.as_bytes()[0]) {
Some(MaybeOperator::Operator(_)) => {
offset += 1;
&s[1..]
}
Some(MaybeOperator::Reserved(_)) => {
return Err(Error::new(ErrorKind::UnsupportedOperator, offset));
}
None => s,
};
// Validate varspecs.
for (spec_i, maybe_varspec) in maybe_variable_list.split(',').enumerate() {
if spec_i != 0 {
// Add the length of the leading separator `,`.
offset += 1;
}
validate_varspec(maybe_varspec, offset)?;
offset += maybe_varspec.len();
}
Ok(())
}
/// Validates whether the given string is valid as a URI template.
///
/// Returns `Ok(())` if the given string is a valid URI template.
pub(in crate::template) fn validate_template_str(s: &str) -> Result<(), Error> {
let mut rest = s;
let mut offset = 0;
while !rest.is_empty() {
rest = match find_split2_hole(rest, b'%', b'{') {
Some((literal, b'%', xdigits2_and_rest)) => {
validate_literal(literal, offset)?;
if xdigits2_and_rest.len() < 2 {
return Err(Error::new(
ErrorKind::InvalidPercentEncoding,
offset + literal.len(),
));
}
let (xdigits2, new_rest) = xdigits2_and_rest.split_at(2);
if !xdigits2.as_bytes()[0].is_ascii_hexdigit() {
return Err(Error::new(
ErrorKind::InvalidPercentEncoding,
offset + literal.len() + 1,
));
}
if !xdigits2.as_bytes()[1].is_ascii_hexdigit() {
return Err(Error::new(
ErrorKind::InvalidPercentEncoding,
offset + literal.len() + 2,
));
}
new_rest
}
Some((literal, b'{', expr_and_rest)) => {
validate_literal(literal, offset)?;
let (expr, new_rest) = match find_split_hole(expr_and_rest, b'}') {
Some(v) => v,
None => {
return Err(Error::new(
ErrorKind::ExpressionNotClosed,
offset + literal.len(),
))
}
};
// +1 is `+ "{".len()`.
validate_expr_body(expr, offset + literal.len() + 1)?;
new_rest
}
Some(_) => unreachable!("[consistency] searching only `%` and `{{`"),
None => return validate_literal(rest, offset),
};
offset = s.len() - rest.len();
}
Ok(())
}

View File

@@ -0,0 +1,218 @@
//! Simple general-purpose context type.
use core::ops::ControlFlow;
use alloc::collections::BTreeMap;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::vec::Vec;
use crate::template::context::{Context, VarName, Visitor};
/// Value.
#[derive(Debug, Clone)]
pub enum Value {
/// Undefined (i.e. null).
Undefined,
/// String value.
String(String),
/// List.
List(Vec<String>),
/// Associative array.
Assoc(Vec<(String, String)>),
}
impl From<&str> for Value {
#[inline]
fn from(v: &str) -> Self {
Self::String(v.into())
}
}
impl From<String> for Value {
#[inline]
fn from(v: String) -> Self {
Self::String(v)
}
}
/// Simple template expansion context.
#[derive(Default, Debug, Clone)]
pub struct SimpleContext {
/// Variable values.
// Any map types (including `HashMap`) is ok, but the hash map is not provided by `alloc`.
//
// QUESTION: Should hexdigits in percent-encoded triplets in varnames be
// compared case sensitively?
variables: BTreeMap<String, Value>,
}
impl SimpleContext {
/// Creates a new empty context.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::spec::UriSpec;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::SimpleContext;
///
/// let empty_ctx = SimpleContext::new();
/// let template = UriTemplateStr::new("{no_such_variable}")?;
/// let expanded = template.expand::<UriSpec, _>(&empty_ctx)?;
///
/// assert_eq!(
/// expanded.to_string(),
/// ""
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn new() -> Self {
Self::default()
}
/// Inserts a variable.
///
/// Passing [`Value::Undefined`] removes the value from the context.
///
/// The entry will be inserted or removed even if the key is invalid as a
/// variable name. Such entries will be simply ignored on expansion.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::spec::UriSpec;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::SimpleContext;
///
/// let mut context = SimpleContext::new();
/// context.insert("username", "foo");
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// let expanded = template.expand::<UriSpec, _>(&context)?;
///
/// assert_eq!(
/// expanded.to_string(),
/// "/users/foo"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Passing [`Value::Undefined`] removes the value from the context.
///
/// ```
/// # use iri_string::template::Error;
/// ## [cfg(feature = "alloc")] {
/// use iri_string::spec::UriSpec;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::{SimpleContext, Value};
///
/// let mut context = SimpleContext::new();
/// context.insert("username", "foo");
/// context.insert("username", Value::Undefined);
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// let expanded = template.expand::<UriSpec, _>(&context)?;
///
/// assert_eq!(
/// expanded.to_string(),
/// "/users/"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn insert<K, V>(&mut self, key: K, value: V) -> Option<Value>
where
K: Into<String>,
V: Into<Value>,
{
let key = key.into();
match value.into() {
Value::Undefined => self.variables.remove(&key),
value => self.variables.insert(key, value),
}
}
/// Removes all entries in the context.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::spec::UriSpec;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::SimpleContext;
///
/// let template = UriTemplateStr::new("{foo,bar}")?;
/// let mut context = SimpleContext::new();
///
/// context.insert("foo", "FOO");
/// context.insert("bar", "BAR");
/// assert_eq!(
/// template.expand::<UriSpec, _>(&context)?.to_string(),
/// "FOO,BAR"
/// );
///
/// context.clear();
/// assert_eq!(
/// template.expand::<UriSpec, _>(&context)?.to_string(),
/// ""
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn clear(&mut self) {
self.variables.clear();
}
/// Returns a reference to the value for the key.
//
// QUESTION: Should hexdigits in percent-encoded triplets in varnames be
// compared case sensitively?
#[inline]
#[must_use]
pub fn get(&self, key: VarName<'_>) -> Option<&Value> {
self.variables.get(key.as_str())
}
}
impl Context for SimpleContext {
fn visit<V: Visitor>(&self, visitor: V) -> V::Result {
use crate::template::context::{AssocVisitor, ListVisitor};
let name = visitor.var_name().as_str();
match self.variables.get(name) {
None | Some(Value::Undefined) => visitor.visit_undefined(),
Some(Value::String(s)) => visitor.visit_string(s),
Some(Value::List(list)) => {
let mut visitor = visitor.visit_list();
if let ControlFlow::Break(res) =
list.iter().try_for_each(|item| visitor.visit_item(item))
{
return res;
}
visitor.finish()
}
Some(Value::Assoc(list)) => {
let mut visitor = visitor.visit_assoc();
if let ControlFlow::Break(res) =
list.iter().try_for_each(|(k, v)| visitor.visit_entry(k, v))
{
return res;
}
visitor.finish()
}
}
}
}

647
vendor/iri-string/src/template/string.rs vendored Normal file
View File

@@ -0,0 +1,647 @@
//! Template string types.
use core::fmt;
#[cfg(feature = "alloc")]
use alloc::borrow::Cow;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::boxed::Box;
#[cfg(feature = "alloc")]
use alloc::rc::Rc;
#[cfg(feature = "alloc")]
use alloc::string::String;
#[cfg(feature = "alloc")]
use alloc::sync::Arc;
use crate::spec::Spec;
use crate::template::components::{VarListIter, VarName};
use crate::template::context::{Context, DynamicContext};
use crate::template::error::{Error, ErrorKind};
use crate::template::expand::{expand_whole_dynamic, Chunk, Chunks, Expanded};
use crate::template::parser::validate_template_str;
#[cfg(feature = "alloc")]
pub use self::owned::UriTemplateString;
/// Implements `PartialEq` and `PartialOrd`.
macro_rules! impl_cmp {
($ty_common:ty, $ty_lhs:ty, $ty_rhs:ty) => {
impl PartialEq<$ty_rhs> for $ty_lhs {
#[inline]
fn eq(&self, o: &$ty_rhs) -> bool {
<$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref())
}
}
impl PartialEq<$ty_lhs> for $ty_rhs {
#[inline]
fn eq(&self, o: &$ty_lhs) -> bool {
<$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref())
}
}
impl PartialOrd<$ty_rhs> for $ty_lhs {
#[inline]
fn partial_cmp(&self, o: &$ty_rhs) -> Option<core::cmp::Ordering> {
<$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref())
}
}
impl PartialOrd<$ty_lhs> for $ty_rhs {
#[inline]
fn partial_cmp(&self, o: &$ty_lhs) -> Option<core::cmp::Ordering> {
<$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref())
}
}
};
}
#[cfg(feature = "alloc")]
mod owned;
/// A borrowed slice of a URI template.
///
/// URI Template is defined by [RFC 6570].
///
/// Note that "URI Template" can also be used for IRI.
///
/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html
///
/// # Valid values
///
/// This type can have a URI template string.
///
/// # Applied errata
///
/// [Errata ID 6937](https://www.rfc-editor.org/errata/eid6937) is applied, so
/// single quotes are allowed to appear in an URI template.
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("'quoted'")?;
/// # Ok::<_, Error>(())
/// ```
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(transparent))]
#[repr(transparent)]
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct UriTemplateStr {
/// The raw string.
inner: str,
}
impl UriTemplateStr {
/// Creates a new string.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn new(s: &str) -> Result<&Self, Error> {
TryFrom::try_from(s)
}
/// Creates a new string without validation.
///
/// This does not validate the given string, so it is caller's
/// responsibility to ensure the given string is valid.
///
/// # Safety
///
/// The given string must be syntactically valid as `Self` type.
/// If not, any use of the returned value or the call of this
/// function itself may result in undefined behavior.
#[inline]
#[must_use]
pub unsafe fn new_unchecked(s: &str) -> &Self {
// SAFETY: `new_always_unchecked` requires the same precondition
// as `new_always_unchecked`.
unsafe { Self::new_always_unchecked(s) }
}
/// Creates a new string without any validation.
///
/// This does not validate the given string at any time.
///
/// Intended for internal use.
///
/// # Safety
///
/// The given string must be valid.
#[inline]
#[must_use]
unsafe fn new_always_unchecked(s: &str) -> &Self {
// SAFETY: the cast is safe since `Self` type has `repr(transparent)`
// attribute and the content is guaranteed as valid by the
// precondition of the function.
unsafe { &*(s as *const str as *const Self) }
}
/// Returns the template as a plain `&str`.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// assert_eq!(template.as_str(), "/users/{username}");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn as_str(&self) -> &str {
self.as_ref()
}
/// Returns the template string length.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// assert_eq!(template.len(), "/users/{username}".len());
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.as_str().len()
}
/// Returns whether the string is empty.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// assert!(!template.is_empty());
///
/// let empty = UriTemplateStr::new("")?;
/// assert!(empty.is_empty());
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.as_str().is_empty()
}
}
impl UriTemplateStr {
/// Expands the template with the given context.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::spec::UriSpec;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::SimpleContext;
///
/// let mut context = SimpleContext::new();
/// context.insert("username", "foo");
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// let expanded = template.expand::<UriSpec, _>(&context)?;
///
/// assert_eq!(
/// expanded.to_string(),
/// "/users/foo"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// You can control allowed characters in the output by changing spec type.
///
/// ```
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::spec::{IriSpec, UriSpec};
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::SimpleContext;
///
/// let mut context = SimpleContext::new();
/// context.insert("alpha", "\u{03B1}");
///
/// let template = UriTemplateStr::new("{?alpha}")?;
///
/// assert_eq!(
/// template.expand::<UriSpec, _>(&context)?.to_string(),
/// "?alpha=%CE%B1",
/// "a URI cannot contain Unicode alpha (U+03B1), so it should be escaped"
/// );
/// assert_eq!(
/// template.expand::<IriSpec, _>(&context)?.to_string(),
/// "?alpha=\u{03B1}",
/// "an IRI can contain Unicode alpha (U+03B1), so it written as is"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn expand<'a, S: Spec, C: Context>(
&'a self,
context: &'a C,
) -> Result<Expanded<'a, S, C>, Error> {
Expanded::new(self, context)
}
/// Expands the template with the given dynamic context.
///
#[cfg_attr(
feature = "alloc",
doc = concat!(
"If you need the allocated [`String`], use",
"[`expand_dynamic_to_string`][`Self::expand_dynamic_to_string`]."
)
)]
///
/// See the documentation for [`DynamicContext`] for usage.
pub fn expand_dynamic<S: Spec, W: fmt::Write, C: DynamicContext>(
&self,
writer: &mut W,
context: &mut C,
) -> Result<(), Error> {
expand_whole_dynamic::<S, _, _>(self, writer, context)
}
/// Expands the template into a string, with the given dynamic context.
///
/// This is basically [`expand_dynamic`][`Self::expand_dynamic`] method
/// that returns an owned string instead of writing to the given writer.
///
/// See the documentation for [`DynamicContext`] for usage.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")]
/// # extern crate alloc;
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// # use alloc::string::String;
/// use iri_string::template::UriTemplateStr;
/// # use iri_string::template::context::{DynamicContext, Visitor, VisitPurpose};
/// use iri_string::spec::UriSpec;
///
/// struct MyContext<'a> {
/// // See the documentation for `DynamicContext`.
/// # /// Target path.
/// # target: &'a str,
/// # /// Username.
/// # username: Option<&'a str>,
/// # /// A flag to remember whether the URI template
/// # /// attempted to use `username` variable.
/// # username_visited: bool,
/// }
/// #
/// # impl DynamicContext for MyContext<'_> {
/// # fn on_expansion_start(&mut self) {
/// # // Reset the state.
/// # self.username_visited = false;
/// # }
/// # fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result {
/// # match visitor.var_name().as_str() {
/// # "target" => visitor.visit_string(self.target),
/// # "username" => {
/// # if visitor.purpose() == VisitPurpose::Expand {
/// # // The variable `username` is being used
/// # // on the template expansion.
/// # // Don't care whether `username` is defined or not.
/// # self.username_visited = true;
/// # }
/// # if let Some(username) = &self.username {
/// # visitor.visit_string(username)
/// # } else {
/// # visitor.visit_undefined()
/// # }
/// # }
/// # _ => visitor.visit_undefined(),
/// # }
/// # }
/// # }
///
/// let mut context = MyContext {
/// target: "/posts/1",
/// username: Some("the_admin"),
/// username_visited: false,
/// };
///
/// // No access to the variable `username`.
/// let template = UriTemplateStr::new("{+target}{?username}")?;
/// let s = template.expand_dynamic_to_string::<UriSpec, _>(&mut context)?;
/// assert_eq!(s, "/posts/1?username=the_admin");
/// assert!(context.username_visited);
/// # }
/// # Ok::<_, Error>(())
/// ```
#[cfg(feature = "alloc")]
pub fn expand_dynamic_to_string<S: Spec, C: DynamicContext>(
&self,
context: &mut C,
) -> Result<String, Error> {
let mut buf = String::new();
expand_whole_dynamic::<S, _, _>(self, &mut buf, context)?;
Ok(buf)
}
/// Returns an iterator of variables in the template.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("foo{/bar*,baz:4}{?qux}{&bar*}")?;
/// let mut vars = template.variables();
/// assert_eq!(vars.next().map(|var| var.as_str()), Some("bar"));
/// assert_eq!(vars.next().map(|var| var.as_str()), Some("baz"));
/// assert_eq!(vars.next().map(|var| var.as_str()), Some("qux"));
/// assert_eq!(vars.next().map(|var| var.as_str()), Some("bar"));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn variables(&self) -> UriTemplateVariables<'_> {
UriTemplateVariables::new(self)
}
}
impl fmt::Debug for UriTemplateStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("UriTemplateStr").field(&&self.inner).finish()
}
}
impl AsRef<str> for UriTemplateStr {
#[inline]
fn as_ref(&self) -> &str {
&self.inner
}
}
impl AsRef<UriTemplateStr> for UriTemplateStr {
#[inline]
fn as_ref(&self) -> &UriTemplateStr {
self
}
}
#[cfg(feature = "alloc")]
impl<'a> From<&'a UriTemplateStr> for Cow<'a, UriTemplateStr> {
#[inline]
fn from(s: &'a UriTemplateStr) -> Self {
Cow::Borrowed(s)
}
}
#[cfg(feature = "alloc")]
impl From<&UriTemplateStr> for Arc<UriTemplateStr> {
fn from(s: &UriTemplateStr) -> Self {
let inner: &str = s.as_str();
let buf = Arc::<str>::from(inner);
// SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so
// the memory layouts of `Arc<str>` and `Arc<UriTemplateStr>` are
// compatible.
unsafe {
let raw: *const str = Arc::into_raw(buf);
Self::from_raw(raw as *const UriTemplateStr)
}
}
}
#[cfg(feature = "alloc")]
impl From<&UriTemplateStr> for Box<UriTemplateStr> {
fn from(s: &UriTemplateStr) -> Self {
let inner: &str = s.as_str();
let buf = Box::<str>::from(inner);
// SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so
// the memory layouts of `Box<str>` and `Box<UriTemplateStr>` are
// compatible.
unsafe {
let raw: *mut str = Box::into_raw(buf);
Self::from_raw(raw as *mut UriTemplateStr)
}
}
}
#[cfg(feature = "alloc")]
impl From<&UriTemplateStr> for Rc<UriTemplateStr> {
fn from(s: &UriTemplateStr) -> Self {
let inner: &str = s.as_str();
let buf = Rc::<str>::from(inner);
// SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so
// the memory layouts of `Rc<str>` and `Rc<UriTemplateStr>` are
// compatible.
unsafe {
let raw: *const str = Rc::into_raw(buf);
Self::from_raw(raw as *const UriTemplateStr)
}
}
}
impl<'a> From<&'a UriTemplateStr> for &'a str {
#[inline]
fn from(s: &'a UriTemplateStr) -> &'a str {
s.as_ref()
}
}
impl<'a> TryFrom<&'a str> for &'a UriTemplateStr {
type Error = Error;
#[inline]
fn try_from(s: &'a str) -> Result<Self, Self::Error> {
match validate_template_str(s) {
// SAFETY: just checked the string is valid.
Ok(()) => Ok(unsafe { UriTemplateStr::new_always_unchecked(s) }),
Err(e) => Err(e),
}
}
}
impl<'a> TryFrom<&'a [u8]> for &'a UriTemplateStr {
type Error = Error;
#[inline]
fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
let s = core::str::from_utf8(bytes)
.map_err(|e| Error::new(ErrorKind::InvalidUtf8, e.valid_up_to()))?;
match validate_template_str(s) {
// SAFETY: just checked the string is valid.
Ok(()) => Ok(unsafe { UriTemplateStr::new_always_unchecked(s) }),
Err(e) => Err(e),
}
}
}
impl_cmp!(str, str, UriTemplateStr);
impl_cmp!(str, &str, UriTemplateStr);
impl_cmp!(str, str, &UriTemplateStr);
impl fmt::Display for UriTemplateStr {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
/// Serde deserializer implementation.
#[cfg(feature = "serde")]
mod __serde_slice {
use super::UriTemplateStr;
use core::fmt;
use serde::{
de::{self, Visitor},
Deserialize, Deserializer,
};
/// Custom borrowed string visitor.
#[derive(Debug, Clone, Copy)]
struct CustomStrVisitor;
impl<'de> Visitor<'de> for CustomStrVisitor {
type Value = &'de UriTemplateStr;
#[inline]
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("URI template string")
}
#[inline]
fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
where
E: de::Error,
{
<&'de UriTemplateStr as TryFrom<&'de str>>::try_from(v).map_err(E::custom)
}
}
// About `'de` and `'a`, see
// <https://serde.rs/lifetimes.html#the-deserializede-lifetime>.
impl<'a, 'de: 'a> Deserialize<'de> for &'a UriTemplateStr {
#[inline]
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_string(CustomStrVisitor)
}
}
}
/// An iterator of variables in a URI template.
#[derive(Debug, Clone)]
pub struct UriTemplateVariables<'a> {
/// Chunks iterator.
chunks: Chunks<'a>,
/// Variables in the last chunk.
vars_in_chunk: Option<VarListIter<'a>>,
}
impl<'a> UriTemplateVariables<'a> {
/// Creates a variables iterator from the URI template.
#[inline]
#[must_use]
fn new(template: &'a UriTemplateStr) -> Self {
Self {
chunks: Chunks::new(template),
vars_in_chunk: None,
}
}
}
impl<'a> Iterator for UriTemplateVariables<'a> {
type Item = VarName<'a>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(vars) = &mut self.vars_in_chunk {
match vars.next() {
Some((_len, spec)) => return Some(spec.name()),
None => self.vars_in_chunk = None,
}
}
let expr = self.chunks.find_map(|chunk| match chunk {
Chunk::Literal(_) => None,
Chunk::Expr(v) => Some(v),
});
self.vars_in_chunk = match expr {
Some(expr) => Some(expr.decompose().1.into_iter()),
None => return None,
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::spec::IriSpec;
use crate::template::context::{AssocVisitor, ListVisitor, Visitor};
struct TestContext;
impl Context for TestContext {
fn visit<V: Visitor>(&self, visitor: V) -> V::Result {
match visitor.var_name().as_str() {
"str" => visitor.visit_string("string"),
"list" => visitor
.visit_list()
.visit_items_and_finish(["item0", "item1", "item2"]),
"assoc" => visitor
.visit_assoc()
.visit_entries_and_finish([("key0", "value0"), ("key1", "value1")]),
_ => visitor.visit_undefined(),
}
}
}
#[test]
fn expand_error_pos() {
{
let e = UriTemplateStr::new("foo{list:4}")
.unwrap()
.expand::<IriSpec, _>(&TestContext)
.err()
.map(|e| e.location());
assert_eq!(e, Some("foo{".len()));
}
{
let e = UriTemplateStr::new("foo{/list*,list:4}")
.unwrap()
.expand::<IriSpec, _>(&TestContext)
.err()
.map(|e| e.location());
assert_eq!(e, Some("foo{/list*,".len()));
}
{
let e = UriTemplateStr::new("foo{/str:3,list*,assoc:4}")
.unwrap()
.expand::<IriSpec, _>(&TestContext)
.err()
.map(|e| e.location());
assert_eq!(e, Some("foo{/str:3,list*,".len()));
}
}
}

View File

@@ -0,0 +1,296 @@
//! Owned `UriTemplateString`.
use core::fmt;
use alloc::borrow::Cow;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::borrow::ToOwned;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::boxed::Box;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
use crate::template::error::{CreationError, Error, ErrorKind};
use crate::template::parser::validate_template_str;
use crate::template::string::UriTemplateStr;
/// An owned slice of a URI template.
///
/// URI Template is defined by [RFC 6570].
///
/// Note that "URI Template" can also be used for IRI.
///
/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html
///
/// # Valid values
///
/// This type can have a URI template string.
// Note that `From<$ty> for {Arc,Rc}<$slice>` is currently not implemented since
// this won't reuse allocated memory and hides internal memory reallocation. See
// <https://github.com/lo48576/iri-string/issues/20#issuecomment-1105207849>.
// However, this is not decided with firm belief or opinion, so there would be
// a chance that they are implemented in future.
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(transparent))]
#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct UriTemplateString {
/// Inner data.
inner: String,
}
impl UriTemplateString {
/// Creates a new string without validation.
///
/// This does not validate the given string, so it is caller's
/// responsibility to ensure the given string is valid.
///
/// # Safety
///
/// The given string must be syntactically valid as `Self` type.
/// If not, any use of the returned value or the call of this
/// function itself may result in undefined behavior.
#[inline]
#[must_use]
pub unsafe fn new_unchecked(s: alloc::string::String) -> Self {
// The construction itself can be written in safe Rust, but
// every other place including unsafe functions expects
// `self.inner` to be syntactically valid as `Self`. In order to
// make them safe, the construction should validate the value
// or at least should require users to validate the value by
// making the function `unsafe`.
Self { inner: s }
}
/// Shrinks the capacity of the inner buffer to match its length.
#[inline]
pub fn shrink_to_fit(&mut self) {
self.inner.shrink_to_fit()
}
/// Returns the internal buffer capacity in bytes.
#[inline]
#[must_use]
pub fn capacity(&self) -> usize {
self.inner.capacity()
}
/// Returns the borrowed IRI string slice.
///
/// This is equivalent to `&*self`.
#[inline]
#[must_use]
pub fn as_slice(&self) -> &UriTemplateStr {
self.as_ref()
}
/// Appends the template string.
#[inline]
pub fn append(&mut self, other: &UriTemplateStr) {
self.inner.push_str(other.as_str());
debug_assert!(validate_template_str(self.as_str()).is_ok());
}
}
impl AsRef<str> for UriTemplateString {
#[inline]
fn as_ref(&self) -> &str {
&self.inner
}
}
impl AsRef<UriTemplateStr> for UriTemplateString {
#[inline]
fn as_ref(&self) -> &UriTemplateStr {
// SAFETY: `UriTemplateString and `UriTemplateStr` requires same validation,
// so the content of `self: &UriTemplateString` must be valid as `UriTemplateStr`.
unsafe { UriTemplateStr::new_always_unchecked(AsRef::<str>::as_ref(self)) }
}
}
impl core::borrow::Borrow<str> for UriTemplateString {
#[inline]
fn borrow(&self) -> &str {
self.as_ref()
}
}
impl core::borrow::Borrow<UriTemplateStr> for UriTemplateString {
#[inline]
fn borrow(&self) -> &UriTemplateStr {
self.as_ref()
}
}
impl ToOwned for UriTemplateStr {
type Owned = UriTemplateString;
#[inline]
fn to_owned(&self) -> Self::Owned {
self.into()
}
}
impl From<&'_ UriTemplateStr> for UriTemplateString {
#[inline]
fn from(s: &UriTemplateStr) -> Self {
// This is safe because `s` must be valid.
Self {
inner: alloc::string::String::from(s.as_str()),
}
}
}
impl From<UriTemplateString> for alloc::string::String {
#[inline]
fn from(s: UriTemplateString) -> Self {
s.inner
}
}
impl<'a> From<UriTemplateString> for Cow<'a, UriTemplateStr> {
#[inline]
fn from(s: UriTemplateString) -> Cow<'a, UriTemplateStr> {
Cow::Owned(s)
}
}
impl From<UriTemplateString> for Box<UriTemplateStr> {
#[inline]
fn from(s: UriTemplateString) -> Box<UriTemplateStr> {
let inner: String = s.into();
let buf = Box::<str>::from(inner);
// SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so
// the memory layouts of `Box<str>` and `Box<UriTemplateStr>` are
// compatible. Additionally, `UriTemplateString` and `UriTemplateStr`
// require the same syntax.
unsafe {
let raw: *mut str = Box::into_raw(buf);
Box::<UriTemplateStr>::from_raw(raw as *mut UriTemplateStr)
}
}
}
impl TryFrom<&'_ str> for UriTemplateString {
type Error = Error;
#[inline]
fn try_from(s: &str) -> Result<Self, Self::Error> {
<&UriTemplateStr>::try_from(s).map(Into::into)
}
}
impl TryFrom<&'_ [u8]> for UriTemplateString {
type Error = Error;
#[inline]
fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
let s = core::str::from_utf8(bytes)
.map_err(|e| Error::new(ErrorKind::InvalidUtf8, e.valid_up_to()))?;
<&UriTemplateStr>::try_from(s).map(Into::into)
}
}
impl core::convert::TryFrom<alloc::string::String> for UriTemplateString {
type Error = CreationError<String>;
#[inline]
fn try_from(s: alloc::string::String) -> Result<Self, Self::Error> {
match <&UriTemplateStr>::try_from(s.as_str()) {
Ok(_) => {
// This is safe because `<&UriTemplateStr>::try_from(s)?` ensures
// that the string `s` is valid.
Ok(Self { inner: s })
}
Err(e) => Err(CreationError::new(e, s)),
}
}
}
impl alloc::str::FromStr for UriTemplateString {
type Err = Error;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
TryFrom::try_from(s)
}
}
impl core::ops::Deref for UriTemplateString {
type Target = UriTemplateStr;
#[inline]
fn deref(&self) -> &UriTemplateStr {
self.as_ref()
}
}
impl_cmp!(str, UriTemplateStr, Cow<'_, str>);
impl_cmp!(str, &UriTemplateStr, Cow<'_, str>);
impl_cmp!(str, str, UriTemplateString);
impl_cmp!(str, &str, UriTemplateString);
impl_cmp!(str, Cow<'_, str>, UriTemplateString);
impl_cmp!(str, String, UriTemplateString);
impl fmt::Display for UriTemplateString {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
/// Serde deserializer implementation.
#[cfg(feature = "serde")]
mod __serde_owned {
use super::UriTemplateString;
use core::fmt;
#[cfg(all(feature = "alloc", feature = "serde", not(feature = "std")))]
use alloc::string::String;
use serde::{
de::{self, Visitor},
Deserialize, Deserializer,
};
/// Custom owned string visitor.
#[derive(Debug, Clone, Copy)]
struct CustomStringVisitor;
impl Visitor<'_> for CustomStringVisitor {
type Value = UriTemplateString;
#[inline]
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("URI template string")
}
#[inline]
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
<UriTemplateString as TryFrom<&str>>::try_from(v).map_err(E::custom)
}
#[cfg(feature = "serde")]
#[inline]
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: de::Error,
{
<UriTemplateString as TryFrom<String>>::try_from(v).map_err(E::custom)
}
}
impl<'de> Deserialize<'de> for UriTemplateString {
#[inline]
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_str(CustomStringVisitor)
}
}
}

224
vendor/iri-string/src/types.rs vendored Normal file
View File

@@ -0,0 +1,224 @@
//! URI and IRI types.
//!
//! # URI and IRI
//!
//! IRIs (Internationalized Resource Identifiers) are defined in [RFC 3987],
//! and URIs (Uniform Resource Identifiers) are defined in [RFC 3986].
//!
//! URI consists of only ASCII characters, and is a subset of IRI.
//!
//! IRIs are defined as below:
//!
//! ```text
//! IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]
//! IRI-reference = IRI / irelative-ref
//! absolute-IRI = scheme ":" ihier-part [ "?" iquery ]
//! irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ]
//! (`irelative-part` is roughly same as `ihier-part`.)
//! ```
//!
//! Definitions for URIs are almost same, but they cannot have non-ASCII characters.
//!
//! # Types
//!
//! Types can be categorized by:
//!
//! * syntax,
//! * spec, and
//! * ownership.
//!
//! ## Syntax
//!
//! Since URIs and IRIs have almost same syntax and share algorithms, they are implemented by
//! generic types.
//!
//! * [`RiStr`] and [`RiString`]
//! + String types for `IRI` and `URI` rules.
//! * [`RiAbsoluteStr`] and [`RiAbsoluteString`]
//! + String types for `absolute-IRI` and `absolute-URI` rules.
//! * [`RiReferenceStr`] and [`RiReferenceString`]
//! + String types for `IRI-reference` and `URI-reference` rules.
//! * [`RiRelativeStr`] and [`RiRelativeString`]
//! + String types for `irelative-ref` and `relative-ref` rules.
//! * [`RiFragmentStr`] and [`RiFragmentString`]
//! + String types for `ifragment` and `fragment` rules.
//! + Note that these types represents a substring of an IRI / URI references.
//! They are not intended to used directly as an IRI / URI references.
//!
//! "Ri" stands for "Resource Identifier".
//!
//! ## Spec
//!
//! These types have a type parameter, which represents RFC specification.
//! [`IriSpec`] represents [RFC 3987] spec, and [`UriSpec`] represents [RFC 3986] spec.
//! For example, `RiAbsoluteStr<IriSpec>` can have `absolute-IRI` string value,
//! and `RiReferenceStr<UriSpec>` can have `URI-reference` string value.
//!
//! ## Ownership
//!
//! String-like types have usually two variations, borrowed and owned.
//!
//! Borrowed types (such as `str`, `Path`, `OsStr`) are unsized, and used by reference style.
//! Owned types (such as `String`, `PathBuf`, `OsString`) are sized, and requires heap allocation.
//! Owned types can be coerced to a borrowed type (for example, `&String` is automatically coerced
//! to `&str` in many context).
//!
//! IRI / URI types have same variations, `RiFooStr` and `RiFooString`
//! (`Foo` part represents syntax).
//! They are very similar to `&str` and `String`.
//! `Deref` is implemented, `RiFooStr::len()` is available, `&RiFooString` can be coerced to
//! `&RiFooStr`, `Cow<'_, RiFooStr>` and `Box<RiFooStr>` is available, and so on.
//!
//! # Hierarchy and safe conversion
//!
//! IRI syntaxes have the hierarchy below.
//!
//! ```text
//! RiReferenceStr
//! |-- RiStr
//! | `-- RiAbsoluteStr
//! `-- RiRelativeStr
//! ```
//!
//! Therefore, the conversions below are safe and cheap:
//!
//! * `RiStr -> RiReferenceStr`
//! * `RiAbsoluteStr -> RiStr`
//! * `RiAbsoluteStr -> RiReferenceStr`
//! * `RiRelativeStr -> RiReferenceStr`
//!
//! For safely convertible types (consider `FooStr -> BarStr` is safe), traits
//! below are implemented:
//!
//! * `AsRef<BarStr> for FooStr`
//! * `AsRef<BarStr> for FooString`
//! * `From<FooString> for BarString`
//! * `PartialEq<FooStr> for BarStr`, and lots of impls like that
//! + `PartialEq` and `ParitalOrd`.
//! + Slice, owned, `Cow`, reference, etc...
//!
//! ## Fallible conversions
//!
//! Fallible conversions are implemented from plain string into IRI strings.
//!
//! * `TryFrom<&str> for &FooStr`
//! * `TryFrom<&str> for FooString`
//! * `TryFrom<String> for FooString`
//! * `FromStr for FooString`
//!
//! Some IRI string types provide more convenient methods to convert between IRI types.
//! For example, [`RiReferenceString::into_iri()`] tries to convert an IRI reference into an IRI,
//! and returns `Result<IriString, IriRelativeString>`.
//! This is because an IRI reference is valid as an IRI or a relative IRI reference.
//! Such methods are usually more efficient than using `TryFrom` for plain strings, because they
//! prevents you from losing ownership of a string, and does a conversion without extra memory
//! allocation.
//!
//! # Aliases
//!
//! This module contains type aliases for RFC 3986 URI types and RFC 3987 IRI types.
//!
//! `IriFooStr{,ing}` are aliases of `RiFooStr{,ing}<IriSpec>`, and `UriFooStr{,ing}` are aliases
//! of `RiFooStr{,ing}<UriSpec>`.
//!
//! # Wrapped string types
//!
//! Similar to string types in std (such as `str`, `std::path::Path`, and `std::ffi::OsStr`),
//! IRI string types in this crate provides convenient conversions to:
//!
//! * `std::box::Box`,
//! * `std::borrow::Cow`,
//! * `std::rc::Rc`, and
//! * `std::sync::Arc`.
//!
//! ```
//! # use iri_string::validate::Error;
//! # #[cfg(feature = "std")] {
//! use std::borrow::Cow;
//! use std::rc::Rc;
//! use std::sync::Arc;
//!
//! use iri_string::types::IriStr;
//!
//! let iri = IriStr::new("http://example.com/")?;
//! let iri_owned = iri.to_owned();
//!
//! // From slice.
//! let cow_1_1: Cow<'_, IriStr> = iri.into();
//! let cow_1_2 = Cow::<'_, IriStr>::from(iri);
//! assert!(matches!(cow_1_1, Cow::Borrowed(_)));
//! assert!(matches!(cow_1_2, Cow::Borrowed(_)));
//! // From owned.
//! let cow_2_1: Cow<'_, IriStr> = iri_owned.clone().into();
//! let cow_2_2 = Cow::<'_, IriStr>::from(iri_owned.clone());
//! assert!(matches!(cow_2_1, Cow::Owned(_)));
//! assert!(matches!(cow_2_2, Cow::Owned(_)));
//!
//! // From slice.
//! let box_1_1: Box<IriStr> = iri.into();
//! let box_1_2 = Box::<IriStr>::from(iri);
//! // From owned.
//! let box_2_1: Box<IriStr> = iri_owned.clone().into();
//! let box_2_2 = Box::<IriStr>::from(iri_owned.clone());
//!
//! // From slice.
//! let rc_1_1: Rc<IriStr> = iri.into();
//! let rc_1_2 = Rc::<IriStr>::from(iri);
//! // From owned.
//! // Note that `From<owned> for Rc<borrowed>` is not implemented for now.
//! // Get borrowed string by `.as_slice()` and convert it.
//! let rc_2_1: Rc<IriStr> = iri_owned.clone().as_slice().into();
//! let rc_2_2 = Rc::<IriStr>::from(iri_owned.clone().as_slice());
//!
//! // From slice.
//! let arc_1_1: Arc<IriStr> = iri.into();
//! let arc_1_2 = Arc::<IriStr>::from(iri);
//! // From owned.
//! // Note that `From<owned> for Arc<borrowed>` is not implemented for now.
//! // Get borrowed string by `.as_slice()` and convert it.
//! let arc_2_1: Arc<IriStr> = iri_owned.clone().as_slice().into();
//! let arc_2_2 = Arc::<IriStr>::from(iri_owned.clone().as_slice());
//! # }
//! # Ok::<_, Error>(())
//! ```
//!
//! [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
//! [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
//! [`RiStr`]: struct.RiStr.html
//! [`RiString`]: struct.RiString.html
//! [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html
//! [`RiAbsoluteString`]: struct.RiAbsoluteString.html
//! [`RiFragmentStr`]: struct.RiFragmentStr.html
//! [`RiFragmentString`]: struct.RiFragmentString.html
//! [`RiReferenceStr`]: struct.RiReferenceStr.html
//! [`RiReferenceString`]: struct.RiReferenceString.html
//! [`RiReferenceString::into_iri()`]: struct.RiReferenceString.html#method.into_iri
//! [`RiRelativeStr`]: struct.RiRelativeStr.html
//! [`RiRelativeString`]: struct.RiRelativeString.html
//! [`IriSpec`]: ../spec/enum.IriSpec.html
//! [`UriSpec`]: ../spec/enum.UriSpec.html
#[cfg(feature = "alloc")]
pub use self::{
generic::{
CreationError, RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString,
RiRelativeString, RiString,
},
iri::{
IriAbsoluteString, IriFragmentString, IriQueryString, IriReferenceString,
IriRelativeString, IriString,
},
uri::{
UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString,
UriRelativeString, UriString,
},
};
pub use self::{
generic::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr},
iri::{IriAbsoluteStr, IriFragmentStr, IriQueryStr, IriReferenceStr, IriRelativeStr, IriStr},
uri::{UriAbsoluteStr, UriFragmentStr, UriQueryStr, UriReferenceStr, UriRelativeStr, UriStr},
};
pub(crate) mod generic;
mod iri;
mod uri;

57
vendor/iri-string/src/types/generic.rs vendored Normal file
View File

@@ -0,0 +1,57 @@
//! Generic resource identifier types.
//!
//! ```text
//! IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]
//! IRI-reference = IRI / irelative-ref
//! absolute-IRI = scheme ":" ihier-part [ "?" iquery ]
//! irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ]
//! (`irelative-part` is roughly same as `ihier-part`.)
//! ```
//!
//! Hierarchy:
//!
//! ```text
//! RiReferenceStr
//! |-- RiStr
//! | `-- RiAbsoluteStr
//! `-- RiRelativeStr
//! ```
//!
//! Therefore, the conversions below are safe and cheap:
//!
//! * `RiStr -> RiReferenceStr`
//! * `RiAbsoluteStr -> RiStr`
//! * `RiAbsoluteStr -> RiReferenceStr`
//! * `RiRelativeStr -> RiReferenceStr`
//!
//! For safely convertible types (consider `FooStr -> BarStr` is safe), traits
//! below are implemented:
//!
//! * `AsRef<BarStr> for FooStr`
//! * `AsRef<BarStr> for FooString`
//! * `From<FooString> for BarString`
//! * `PartialEq<FooStr> for BarStr` and lots of impls like that
//! + `PartialEq` and `ParitalOrd`.
//! + Slice, owned, `Cow`, reference, etc...
pub use self::{
absolute::RiAbsoluteStr, fragment::RiFragmentStr, normal::RiStr, query::RiQueryStr,
reference::RiReferenceStr, relative::RiRelativeStr,
};
#[cfg(feature = "alloc")]
pub use self::{
absolute::RiAbsoluteString, error::CreationError, fragment::RiFragmentString, normal::RiString,
query::RiQueryString, reference::RiReferenceString, relative::RiRelativeString,
};
#[macro_use]
mod macros;
mod absolute;
#[cfg(feature = "alloc")]
mod error;
mod fragment;
mod normal;
mod query;
mod reference;
mod relative;

View File

@@ -0,0 +1,728 @@
//! Absolute IRI (without fragment part).
use crate::components::AuthorityComponents;
#[cfg(feature = "alloc")]
use crate::mask_password::password_range_to_hide;
use crate::mask_password::PasswordMasked;
use crate::normalize::{Error, NormalizationInput, Normalized, NormalizednessCheckMode};
use crate::parser::trusted as trusted_parser;
use crate::spec::Spec;
use crate::types::{RiQueryStr, RiReferenceStr, RiStr};
#[cfg(feature = "alloc")]
use crate::types::{RiReferenceString, RiString};
use crate::validate::absolute_iri;
define_custom_string_slice! {
/// A borrowed slice of an absolute IRI without fragment part.
///
/// This corresponds to [`absolute-IRI` rule] in [RFC 3987]
/// (and [`absolute-URI` rule] in [RFC 3986]).
/// In other words, this is [`RiStr`] without fragment part.
///
/// If you want to accept fragment part, use [`RiStr`].
///
/// # Valid values
///
/// This type can have an absolute IRI without fragment part.
///
/// ```
/// # use iri_string::types::IriAbsoluteStr;
/// assert!(IriAbsoluteStr::new("https://example.com/foo?bar=baz").is_ok());
/// assert!(IriAbsoluteStr::new("foo:bar").is_ok());
/// // Scheme `foo` and empty path.
/// assert!(IriAbsoluteStr::new("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(IriAbsoluteStr::new("foo:/").is_ok());
/// assert!(IriAbsoluteStr::new("foo://").is_ok());
/// assert!(IriAbsoluteStr::new("foo:///").is_ok());
/// assert!(IriAbsoluteStr::new("foo:////").is_ok());
/// assert!(IriAbsoluteStr::new("foo://///").is_ok());
///
/// ```
///
/// Relative IRI is not allowed.
///
/// ```
/// # use iri_string::types::IriAbsoluteStr;
/// // This is relative path.
/// assert!(IriAbsoluteStr::new("foo/bar").is_err());
/// // `/foo/bar` is an absolute path, but it is authority-relative.
/// assert!(IriAbsoluteStr::new("/foo/bar").is_err());
/// // `//foo/bar` is termed "network-path reference",
/// // or usually called "protocol-relative reference".
/// assert!(IriAbsoluteStr::new("//foo/bar").is_err());
/// // Empty string is not a valid absolute IRI.
/// assert!(IriAbsoluteStr::new("").is_err());
/// ```
///
/// Fragment part (such as trailing `#foo`) is not allowed.
///
/// ```
/// # use iri_string::types::IriAbsoluteStr;
/// // Fragment part is not allowed.
/// assert!(IriAbsoluteStr::new("https://example.com/foo?bar=baz#qux").is_err());
/// ```
///
/// Some characters and sequences cannot used in an absolute IRI.
///
/// ```
/// # use iri_string::types::IriAbsoluteStr;
/// // `<` and `>` cannot directly appear in an absolute IRI.
/// assert!(IriAbsoluteStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an absolute IRI.
/// assert!(IriAbsoluteStr::new("%").is_err());
/// assert!(IriAbsoluteStr::new("%GG").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`absolute-IRI` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`absolute-URI` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.3
/// [`RiStr`]: struct.RiStr.html
struct RiAbsoluteStr {
validator = absolute_iri,
expecting_msg = "Absolute IRI string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of an absolute IRI without fragment part.
///
/// This corresponds to [`absolute-IRI` rule] in [RFC 3987]
/// (and [`absolute-URI` rule] in [RFC 3986]).
/// The rule for `absolute-IRI` is `scheme ":" ihier-part [ "?" iquery ]`.
/// In other words, this is [`RiString`] without fragment part.
///
/// If you want to accept fragment part, use [`RiString`].
///
/// For details, see the document for [`RiAbsoluteStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`absolute-IRI` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`absolute-URI` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.3
/// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html
/// [`RiString`]: struct.RiString.html
struct RiAbsoluteString {
validator = absolute_iri,
slice = RiAbsoluteStr,
expecting_msg = "Absolute IRI string",
}
}
impl<S: Spec> RiAbsoluteStr<S> {
/// Returns Ok`(())` if the IRI is normalizable by the RFC 3986 algorithm.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/%2e/bar/..")?;
/// assert!(iri.ensure_rfc3986_normalizable().is_ok());
///
/// let iri2 = IriAbsoluteStr::new("scheme:/..//bar")?;
/// // The normalization result would be `scheme://bar` according to RFC
/// // 3986, but it is unintended and should be treated as a failure.
/// // This crate automatically handles this case so that `.normalize()` won't fail.
/// assert!(!iri.ensure_rfc3986_normalizable().is_err());
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> {
NormalizationInput::from(self).ensure_rfc3986_normalizable()
}
/// Returns `true` if the IRI is already normalized.
///
/// This returns the same result as `self.normalize().to_string() == self`,
/// but does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?;
/// assert!(!iri.is_normalized());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query");
/// assert!(normalized.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:/.///foo")?;
/// // Already normalized.
/// assert!(iri.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?;
/// // Default normalization algorithm assumes the path part to be NOT opaque.
/// assert!(!iri.is_normalized());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "scheme:/.//not-a-host");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn is_normalized(&self) -> bool {
trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Default)
}
/// Returns `true` if the IRI is already normalized.
///
/// This returns the same result as
/// `self.ensure_rfc3986_normalizable() && (self.normalize().to_string() == self)`,
/// does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?;
/// assert!(!iri.is_normalized_rfc3986());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query");
/// assert!(normalized.is_normalized_rfc3986());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:/.///foo")?;
/// // Not normalized in the sense of RFC 3986.
/// assert!(!iri.is_normalized_rfc3986());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?;
/// // RFC 3986 normalization algorithm assumes the path part to be NOT opaque.
/// assert!(!iri.is_normalized_rfc3986());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "scheme:/.//not-a-host");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn is_normalized_rfc3986(&self) -> bool {
trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Rfc3986)
}
/// Returns `true` if the IRI is already normalized in the sense of
/// [`normalize_but_preserve_authorityless_relative_path`] method.
///
/// This returns the same result as
/// `self.normalize_but_preserve_authorityless_relative_path().to_string() == self`,
/// but does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?;
/// assert!(!iri.is_normalized_but_authorityless_relative_path_preserved());
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query");
/// assert!(normalized.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:/.///foo")?;
/// // Already normalized in the sense of
/// // `normalize_but_opaque_authorityless_relative_path()` method.
/// assert!(iri.is_normalized_but_authorityless_relative_path_preserved());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?;
/// // Relative path is treated as opaque since the autority component is absent.
/// assert!(iri.is_normalized_but_authorityless_relative_path_preserved());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [`normalize_but_preserve_authorityless_relative_path`]:
/// `Self::normalize_but_preserve_authorityless_relative_path`
#[inline]
#[must_use]
pub fn is_normalized_but_authorityless_relative_path_preserved(&self) -> bool {
trusted_parser::is_normalized::<S>(
self.as_str(),
NormalizednessCheckMode::PreserveAuthoritylessRelativePath,
)
}
/// Returns the normalized IRI.
///
/// # Notes
///
/// For some abnormal IRIs, the normalization can produce semantically
/// incorrect string that looks syntactically valid. To avoid security
/// issues by this trap, the normalization algorithm by this crate
/// automatically applies the workaround.
///
/// If you worry about this, test by
/// [`RiAbsoluteStr::ensure_rfc3986_normalizable`] method or
/// [`Normalized::ensure_rfc3986_normalizable`] before using the result
/// string.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?;
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn normalize(&self) -> Normalized<'_, Self> {
Normalized::from_input(NormalizationInput::from(self)).and_normalize()
}
/// Returns the normalized IRI, but preserving dot segments in relative path
/// if the authority component is absent.
///
/// This normalization would be similar to that of [WHATWG URL Standard]
/// while this implementation is not guaranteed to stricly follow the spec.
///
/// Note that this normalization algorithm is not compatible with RFC 3986
/// algorithm for some inputs.
///
/// Note that case normalization and percent-encoding normalization will
/// still be applied to any path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?;
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:relative/../f%6f%6f")?;
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "scheme:relative/../foo");
/// // `.normalize()` would normalize this to `scheme:/foo`.
/// # assert_eq!(iri.normalize().to_dedicated_string(), "scheme:/foo");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [WHATWG URL Standard]: https://url.spec.whatwg.org/
#[inline]
#[must_use]
pub fn normalize_but_preserve_authorityless_relative_path(&self) -> Normalized<'_, Self> {
Normalized::from_input(NormalizationInput::from(self))
.and_normalize_but_preserve_authorityless_relative_path()
}
/// Returns the proxy to the IRI with password masking feature.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
/// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query");
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "http://user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn mask_password(&self) -> PasswordMasked<'_, Self> {
PasswordMasked::new(self)
}
}
/// Components getters.
impl<S: Spec> RiAbsoluteStr<S> {
/// Returns the scheme.
///
/// The following colon is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?;
/// assert_eq!(iri.scheme_str(), "http");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn scheme_str(&self) -> &str {
trusted_parser::extract_scheme_absolute(self.as_str())
}
/// Returns the authority.
///
/// The leading `//` is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?;
/// assert_eq!(iri.authority_str(), Some("example.com"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_str(&self) -> Option<&str> {
trusted_parser::extract_authority_absolute(self.as_str())
}
/// Returns the path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?;
/// assert_eq!(iri.path_str(), "/pathpath");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn path_str(&self) -> &str {
trusted_parser::extract_path_absolute(self.as_str())
}
/// Returns the query.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriAbsoluteStr, IriQueryStr};
///
/// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?;
/// let query = IriQueryStr::new("queryquery")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query(&self) -> Option<&RiQueryStr<S>> {
trusted_parser::extract_query_absolute_iri(self.as_str()).map(|query| {
// SAFETY: `trusted_parser::extract_query_absolute_iri()` must return
// the query part of an IRI (including the leading `?` character),
// and the returned string consists of allowed characters since it
// is a substring of the source IRI.
unsafe { RiQueryStr::new_maybe_unchecked(query) }
})
}
/// Returns the query in a raw string slice.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?;
/// assert_eq!(iri.query_str(), Some("queryquery"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query_str(&self) -> Option<&str> {
trusted_parser::extract_query_absolute_iri(self.as_str())
}
/// Returns the authority components.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?;
/// let authority = iri.authority_components()
/// .expect("authority is available");
/// assert_eq!(authority.userinfo(), Some("user:pass"));
/// assert_eq!(authority.host(), "example.com");
/// assert_eq!(authority.port(), Some("8080"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> {
AuthorityComponents::from_iri(self.as_ref())
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> RiAbsoluteString<S> {
/// Removes the password completely (including separator colon) from `self` even if it is empty.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriAbsoluteString;
///
/// let mut iri = IriAbsoluteString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Even if the password is empty, the password and separator will be removed.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriAbsoluteString;
///
/// let mut iri = IriAbsoluteString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) => v,
None => return,
};
let separator_colon = pw_range.start - 1;
// SAFETY: the IRI must still be valid after the password component and
// the leading separator colon is removed.
unsafe {
let buf = self.as_inner_mut();
buf.drain(separator_colon..pw_range.end);
debug_assert!(
RiAbsoluteStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
/// Replaces the non-empty password in `self` to the empty password.
///
/// This leaves the separator colon if the password part was available.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriAbsoluteString;
///
/// let mut iri = IriAbsoluteString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// If the password is empty, it is left as is.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriAbsoluteString;
///
/// let mut iri = IriAbsoluteString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_nonempty_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) if !v.is_empty() => v,
_ => return,
};
debug_assert_eq!(
self.as_str().as_bytes().get(pw_range.start - 1).copied(),
Some(b':'),
"[validity] the password component must be prefixed with a separator colon"
);
// SAFETY: the IRI must be valid after the password is replaced with empty string.
unsafe {
let buf = self.as_inner_mut();
buf.drain(pw_range);
debug_assert!(
RiAbsoluteStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
}
impl_trivial_conv_between_iri! {
from_slice: RiAbsoluteStr,
from_owned: RiAbsoluteString,
to_slice: RiStr,
to_owned: RiString,
}
impl_trivial_conv_between_iri! {
from_slice: RiAbsoluteStr,
from_owned: RiAbsoluteString,
to_slice: RiReferenceStr,
to_owned: RiReferenceString,
}

View File

@@ -0,0 +1,70 @@
//! Resource identifier creation error.
use core::fmt;
#[cfg(feature = "std")]
use std::error;
use crate::validate::Error;
/// Error on conversion into an IRI type.
///
/// Enabled by `alloc` or `std` feature.
// This type itself does not require `alloc` or `std, but the type is used only when `alloc`
// feature is enabled. To avoid exporting unused stuff, the type (and the `types::generic::error`
// module) is available only when necessary.
//
// Note that all types which implement `Spec` also implement `SpecInternal`.
pub struct CreationError<T> {
/// Soruce data.
source: T,
/// Validation error.
error: Error,
}
impl<T> CreationError<T> {
/// Returns the source data.
#[must_use]
pub fn into_source(self) -> T {
self.source
}
/// Returns the validation error.
#[must_use]
pub fn validation_error(&self) -> Error {
self.error
}
/// Creates a new `CreationError`.
#[must_use]
pub(crate) fn new(error: Error, source: T) -> Self {
Self { source, error }
}
}
impl<T: fmt::Debug> fmt::Debug for CreationError<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("CreationError")
.field("source", &self.source)
.field("error", &self.error)
.finish()
}
}
impl<T: Clone> Clone for CreationError<T> {
fn clone(&self) -> Self {
Self {
source: self.source.clone(),
error: self.error,
}
}
}
impl<T> fmt::Display for CreationError<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.error.fmt(f)
}
}
#[cfg(feature = "std")]
impl<T: fmt::Debug> error::Error for CreationError<T> {}

View File

@@ -0,0 +1,106 @@
//! Fragment string.
use crate::spec::Spec;
use crate::validate::{fragment, Error, ErrorKind};
define_custom_string_slice! {
/// A borrowed slice of an IRI fragment (i.e. after the first `#` character).
///
/// This corresponds to [`ifragment` rule] in [RFC 3987] (and [`fragment` rule] in [RFC 3986]).
/// The rule for `ifragment` is `*( ipchar / "/" / "?" )`.
///
/// # Valid values
///
/// This type can have an IRI fragment.
/// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`.
///
/// ```
/// # use iri_string::types::IriFragmentStr;
/// assert!(IriFragmentStr::new("").is_ok());
/// assert!(IriFragmentStr::new("foo").is_ok());
/// assert!(IriFragmentStr::new("foo/bar").is_ok());
/// assert!(IriFragmentStr::new("/foo/bar").is_ok());
/// assert!(IriFragmentStr::new("//foo/bar").is_ok());
/// assert!(IriFragmentStr::new("https://user:pass@example.com:8080").is_ok());
/// assert!(IriFragmentStr::new("https://example.com/").is_ok());
/// ```
///
/// Some characters and sequences cannot used in a fragment.
///
/// ```
/// # use iri_string::types::IriFragmentStr;
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(IriFragmentStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(IriFragmentStr::new("%").is_err());
/// assert!(IriFragmentStr::new("%GG").is_err());
/// // Hash sign `#` cannot appear in an IRI fragment.
/// assert!(IriFragmentStr::new("#hash").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`fragment` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.5
/// [`ifragment` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
struct RiFragmentStr {
validator = fragment,
expecting_msg = "IRI fragment string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of an IRI fragment (i.e. after the first `#` character).
///
/// This corresponds to [`ifragment` rule] in [RFC 3987] (and [`fragment` rule] in [RFC 3986]).
/// The rule for `absolute-IRI` is `*( ipchar / "/" / "?" )`.
///
/// For details, see the documentation for [`RiFragmentStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`fragment` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.5
/// [`ifragment` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`RiFragmentStr`]: struct.RiFragmentStr.html
struct RiFragmentString {
validator = fragment,
slice = RiFragmentStr,
expecting_msg = "IRI fragment string",
}
}
impl<S: Spec> RiFragmentStr<S> {
/// Creates a new `&RiFragmentStr` from the fragment part prefixed by `#`.
///
/// # Examples
///
/// ```
/// # use iri_string::types::IriFragmentStr;
/// assert!(IriFragmentStr::from_prefixed("#").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#foo").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#foo/bar").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#/foo/bar").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#//foo/bar").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#https://user:pass@example.com:8080").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#https://example.com/").is_ok());
///
/// // `<` and `>` cannot directly appear in an IRI.
/// assert!(IriFragmentStr::from_prefixed("#<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI.
/// assert!(IriFragmentStr::new("#%").is_err());
/// assert!(IriFragmentStr::new("#%GG").is_err());
/// // `#` prefix is expected.
/// assert!(IriFragmentStr::from_prefixed("").is_err());
/// assert!(IriFragmentStr::from_prefixed("foo").is_err());
/// // Hash sign `#` cannot appear in an IRI fragment.
/// assert!(IriFragmentStr::from_prefixed("##hash").is_err());
/// ```
pub fn from_prefixed(s: &str) -> Result<&Self, Error> {
if !s.starts_with('#') {
return Err(Error::with_kind(ErrorKind::InvalidFragment));
}
TryFrom::try_from(&s[1..])
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,944 @@
//! Usual absolute IRI (fragment part being allowed).
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
use crate::components::AuthorityComponents;
#[cfg(feature = "alloc")]
use crate::mask_password::password_range_to_hide;
use crate::mask_password::PasswordMasked;
use crate::normalize::{Error, NormalizationInput, Normalized, NormalizednessCheckMode};
use crate::parser::trusted as trusted_parser;
#[cfg(feature = "alloc")]
use crate::raw;
use crate::spec::Spec;
use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr};
#[cfg(feature = "alloc")]
use crate::types::{RiAbsoluteString, RiFragmentString, RiReferenceString};
use crate::validate::iri;
define_custom_string_slice! {
/// A borrowed string of an absolute IRI possibly with fragment part.
///
/// This corresponds to [`IRI` rule] in [RFC 3987] (and [`URI` rule] in [RFC 3986]).
/// The rule for `IRI` is `scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]`.
/// In other words, this is [`RiAbsoluteStr`] with fragment part allowed.
///
/// # Valid values
///
/// This type can have an IRI (which is absolute, and may have fragment part).
///
/// ```
/// # use iri_string::types::IriStr;
/// assert!(IriStr::new("https://user:pass@example.com:8080").is_ok());
/// assert!(IriStr::new("https://example.com/").is_ok());
/// assert!(IriStr::new("https://example.com/foo?bar=baz").is_ok());
/// assert!(IriStr::new("https://example.com/foo?bar=baz#qux").is_ok());
/// assert!(IriStr::new("foo:bar").is_ok());
/// assert!(IriStr::new("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(IriStr::new("foo:/").is_ok());
/// assert!(IriStr::new("foo://").is_ok());
/// assert!(IriStr::new("foo:///").is_ok());
/// assert!(IriStr::new("foo:////").is_ok());
/// assert!(IriStr::new("foo://///").is_ok());
/// ```
///
/// Relative IRI reference is not allowed.
///
/// ```
/// # use iri_string::types::IriStr;
/// // This is relative path.
/// assert!(IriStr::new("foo/bar").is_err());
/// // `/foo/bar` is an absolute path, but it is authority-relative.
/// assert!(IriStr::new("/foo/bar").is_err());
/// // `//foo/bar` is termed "network-path reference",
/// // or usually called "protocol-relative reference".
/// assert!(IriStr::new("//foo/bar").is_err());
/// // Same-document reference is relative.
/// assert!(IriStr::new("#foo").is_err());
/// // Empty string is not a valid absolute IRI.
/// assert!(IriStr::new("").is_err());
/// ```
///
/// Some characters and sequences cannot used in an IRI.
///
/// ```
/// # use iri_string::types::IriStr;
/// // `<` and `>` cannot directly appear in an IRI.
/// assert!(IriStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI.
/// assert!(IriStr::new("%").is_err());
/// assert!(IriStr::new("%GG").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`IRI` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`URI` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3
/// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html
struct RiStr {
validator = iri,
expecting_msg = "IRI string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of an absolute IRI possibly with fragment part.
///
/// This corresponds to [`IRI` rule] in [RFC 3987] (and [`URI` rule] in [RFC 3986]).
/// The rule for `IRI` is `scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]`.
/// In other words, this is [`RiAbsoluteString`] with fragment part allowed.
///
/// For details, see the document for [`RiStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`IRI` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`URI` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3
/// [`RiAbsoluteString`]: struct.RiAbsoluteString.html
struct RiString {
validator = iri,
slice = RiStr,
expecting_msg = "IRI string",
}
}
impl<S: Spec> RiStr<S> {
/// Splits the IRI into an absolute IRI part and a fragment part.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// If the IRI has a fragment part, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?;
/// let (absolute, fragment) = iri.to_absolute_and_fragment();
/// let fragment_expected = IriFragmentStr::new("corge")?;
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, Some(fragment_expected));
/// # Ok::<_, Error>(())
/// ```
///
/// When the fragment part exists but is empty string, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#")?;
/// let (absolute, fragment) = iri.to_absolute_and_fragment();
/// let fragment_expected = IriFragmentStr::new("")?;
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, Some(fragment_expected));
/// # Ok::<_, Error>(())
/// ```
///
/// If the IRI has no fragment, `None` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux")?;
/// let (absolute, fragment) = iri.to_absolute_and_fragment();
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn to_absolute_and_fragment(&self) -> (&RiAbsoluteStr<S>, Option<&RiFragmentStr<S>>) {
let (prefix, fragment) = trusted_parser::split_fragment(self.as_str());
// SAFETY: an IRI without fragment part is also an absolute IRI.
let prefix = unsafe { RiAbsoluteStr::new_maybe_unchecked(prefix) };
let fragment = fragment.map(|fragment| {
// SAFETY: `trusted_parser::split_fragment()` must return a valid fragment component.
unsafe { RiFragmentStr::new_maybe_unchecked(fragment) }
});
(prefix, fragment)
}
/// Strips the fragment part if exists, and returns [`&RiAbsoluteStr`][`RiAbsoluteStr`].
///
/// # Examples
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?;
/// assert_eq!(iri.to_absolute(), "foo://bar/baz?qux=quux");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux")?;
/// assert_eq!(iri.to_absolute(), "foo://bar/baz?qux=quux");
/// # Ok::<_, Error>(())
/// ```
///
/// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html
#[must_use]
pub fn to_absolute(&self) -> &RiAbsoluteStr<S> {
let prefix_len = trusted_parser::split_fragment(self.as_str()).0.len();
// SAFETY: IRI without the fragment part (including a leading `#` character)
// is also an absolute IRI.
unsafe { RiAbsoluteStr::new_maybe_unchecked(&self.as_str()[..prefix_len]) }
}
/// Returns Ok`(())` if the IRI is normalizable by the RFC 3986 algorithm.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/%2e/bar/..")?;
/// assert!(iri.ensure_rfc3986_normalizable().is_ok());
///
/// let iri2 = IriStr::new("scheme:/..//bar")?;
/// // The normalization result would be `scheme://bar` according to RFC
/// // 3986, but it is unintended and should be treated as a failure.
/// // This crate automatically handles this case so that `.normalize()` won't fail.
/// assert!(!iri.ensure_rfc3986_normalizable().is_err());
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> {
NormalizationInput::from(self).ensure_rfc3986_normalizable()
}
/// Returns `true` if the IRI is already normalized.
///
/// This returns the same result as `self.normalize().to_string() == self`,
/// but does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?;
/// assert!(!iri.is_normalized());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query#fragment");
/// assert!(normalized.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:/.///foo")?;
/// // Already normalized.
/// assert!(iri.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:relative/..//not-a-host")?;
/// // Default normalization algorithm assumes the path part to be NOT opaque.
/// assert!(!iri.is_normalized());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "scheme:/.//not-a-host");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[must_use]
#[inline]
pub fn is_normalized(&self) -> bool {
trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Default)
}
/// Returns `true` if the IRI is already normalized in the sense of RFC 3986.
///
/// This returns the same result as
/// `self.ensure_rfc3986_normalizable() && (self.normalize().to_string() == self)`,
/// but does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?;
/// assert!(!iri.is_normalized_rfc3986());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query#fragment");
/// assert!(normalized.is_normalized_rfc3986());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:/.///foo")?;
/// // Not normalized in the sense of RFC 3986.
/// assert!(!iri.is_normalized_rfc3986());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:relative/..//not-a-host")?;
/// // RFC 3986 normalization algorithm assumes the path part to be NOT opaque.
/// assert!(!iri.is_normalized_rfc3986());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "scheme:/.//not-a-host");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[must_use]
#[inline]
pub fn is_normalized_rfc3986(&self) -> bool {
trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Rfc3986)
}
/// Returns `true` if the IRI is already normalized in the sense of
/// [`normalize_but_preserve_authorityless_relative_path`] method.
///
/// This returns the same result as
/// `self.normalize_but_preserve_authorityless_relative_path().to_string() == self`,
/// but does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?;
/// assert!(!iri.is_normalized_but_authorityless_relative_path_preserved());
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query#fragment");
/// assert!(normalized.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:/.///foo")?;
/// // Already normalized in the sense of
/// // `normalize_but_opaque_authorityless_relative_path()` method.
/// assert!(iri.is_normalized_but_authorityless_relative_path_preserved());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:relative/..//not-a-host")?;
/// // Relative path is treated as opaque since the autority component is absent.
/// assert!(iri.is_normalized_but_authorityless_relative_path_preserved());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [`normalize_but_preserve_authorityless_relative_path`]:
/// `Self::normalize_but_preserve_authorityless_relative_path`
#[must_use]
#[inline]
pub fn is_normalized_but_authorityless_relative_path_preserved(&self) -> bool {
trusted_parser::is_normalized::<S>(
self.as_str(),
NormalizednessCheckMode::PreserveAuthoritylessRelativePath,
)
}
/// Returns the normalized IRI.
///
/// # Notes
///
/// For some abnormal IRIs, the normalization can produce semantically
/// incorrect string that looks syntactically valid. To avoid security
/// issues by this trap, the normalization algorithm by this crate
/// automatically applies the workaround.
///
/// If you worry about this, test by [`RiStr::ensure_rfc3986_normalizable`]
/// method or [`Normalized::ensure_rfc3986_normalizable`] before using the
/// result string.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?;
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query#fragment");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn normalize(&self) -> Normalized<'_, Self> {
Normalized::from_input(NormalizationInput::from(self)).and_normalize()
}
/// Returns the normalized IRI, but preserving dot segments in relative path
/// if the authority component is absent.
///
/// This normalization would be similar to that of [WHATWG URL Standard]
/// while this implementation is not guaranteed to stricly follow the spec.
///
/// Note that this normalization algorithm is not compatible with RFC 3986
/// algorithm for some inputs.
///
/// Note that case normalization and percent-encoding normalization will
/// still be applied to any path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?;
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query#fragment");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:relative/../f%6f%6f")?;
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "scheme:relative/../foo");
/// // `.normalize()` would normalize this to `scheme:/foo`.
/// # assert_eq!(iri.normalize().to_dedicated_string(), "scheme:/foo");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [WHATWG URL Standard]: https://url.spec.whatwg.org/
#[inline]
#[must_use]
pub fn normalize_but_preserve_authorityless_relative_path(&self) -> Normalized<'_, Self> {
Normalized::from_input(NormalizationInput::from(self))
.and_normalize_but_preserve_authorityless_relative_path()
}
/// Returns the proxy to the IRI with password masking feature.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
/// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query");
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "http://user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn mask_password(&self) -> PasswordMasked<'_, Self> {
PasswordMasked::new(self)
}
}
/// Components getters.
impl<S: Spec> RiStr<S> {
/// Returns the scheme.
///
/// The following colon is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.scheme_str(), "http");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn scheme_str(&self) -> &str {
trusted_parser::extract_scheme_absolute(self.as_str())
}
/// Returns the authority.
///
/// The leading `//` is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.authority_str(), Some("example.com"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_str(&self) -> Option<&str> {
trusted_parser::extract_authority_absolute(self.as_str())
}
/// Returns the path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.path_str(), "/pathpath");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn path_str(&self) -> &str {
trusted_parser::extract_path_absolute(self.as_str())
}
/// Returns the query.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriQueryStr, IriStr};
///
/// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// let query = IriQueryStr::new("queryquery")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query(&self) -> Option<&RiQueryStr<S>> {
AsRef::<RiReferenceStr<S>>::as_ref(self).query()
}
/// Returns the query in a raw string slice.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.query_str(), Some("queryquery"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query_str(&self) -> Option<&str> {
trusted_parser::extract_query(self.as_str())
}
/// Returns the fragment part if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?;
/// let fragment = IriFragmentStr::new("corge")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#")?;
/// let fragment = IriFragmentStr::new("")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn fragment(&self) -> Option<&RiFragmentStr<S>> {
AsRef::<RiReferenceStr<S>>::as_ref(self).fragment()
}
/// Returns the fragment part as a raw string slice if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?;
/// assert_eq!(iri.fragment_str(), Some("corge"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#")?;
/// assert_eq!(iri.fragment_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux")?;
/// assert_eq!(iri.fragment_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn fragment_str(&self) -> Option<&str> {
AsRef::<RiReferenceStr<S>>::as_ref(self).fragment_str()
}
/// Returns the authority components.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?;
/// let authority = iri.authority_components()
/// .expect("authority is available");
/// assert_eq!(authority.userinfo(), Some("user:pass"));
/// assert_eq!(authority.host(), "example.com");
/// assert_eq!(authority.port(), Some("8080"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> {
AuthorityComponents::from_iri(self.as_ref())
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> RiString<S> {
/// Splits the IRI into an absolute IRI part and a fragment part.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// ```
/// use std::convert::TryFrom;
/// # use iri_string::{spec::IriSpec, types::{IriFragmentString, IriString}, validate::Error};
/// let iri = "foo://bar/baz?qux=quux#corge".parse::<IriString>()?;
/// let (absolute, fragment) = iri.into_absolute_and_fragment();
/// let fragment_expected = IriFragmentString::try_from("corge".to_owned())
/// .map_err(|e| e.validation_error())?;
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, Some(fragment_expected));
/// # Ok::<_, Error>(())
///
/// ```
///
/// ```
/// use std::convert::TryFrom;
/// # use iri_string::{spec::IriSpec, types::{IriFragmentString, IriString}, validate::Error};
/// let iri = "foo://bar/baz?qux=quux#".parse::<IriString>()?;
/// let (absolute, fragment) = iri.into_absolute_and_fragment();
/// let fragment_expected = IriFragmentString::try_from("".to_owned())
/// .map_err(|e| e.validation_error())?;
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, Some(fragment_expected));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// use std::convert::TryFrom;
/// # use iri_string::{spec::IriSpec, types::IriString, validate::Error};
/// let iri = "foo://bar/baz?qux=quux".parse::<IriString>()?;
/// let (absolute, fragment) = iri.into_absolute_and_fragment();
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn into_absolute_and_fragment(self) -> (RiAbsoluteString<S>, Option<RiFragmentString<S>>) {
let (prefix, fragment) = raw::split_fragment_owned(self.into());
// SAFETY: an IRI without fragment part is also an absolute IRI.
let prefix = unsafe { RiAbsoluteString::new_maybe_unchecked(prefix) };
let fragment = fragment.map(|fragment| {
// SAFETY: the string returned by `raw::split_fragment_owned()` must
// be the fragment part, and must also be a substring of the source IRI.
unsafe { RiFragmentString::new_maybe_unchecked(fragment) }
});
(prefix, fragment)
}
/// Strips the fragment part if exists, and returns an [`RiAbsoluteString`].
///
/// # Examples
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriString, validate::Error};
/// let iri = "foo://bar/baz?qux=quux#corge".parse::<IriString>()?;
/// assert_eq!(iri.into_absolute(), "foo://bar/baz?qux=quux");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriString, validate::Error};
/// let iri = "foo://bar/baz?qux=quux".parse::<IriString>()?;
/// assert_eq!(iri.into_absolute(), "foo://bar/baz?qux=quux");
/// # Ok::<_, Error>(())
/// ```
///
/// [`RiAbsoluteString`]: struct.RiAbsoluteString.html
#[must_use]
pub fn into_absolute(self) -> RiAbsoluteString<S> {
let mut s: String = self.into();
raw::remove_fragment(&mut s);
// SAFETY: an IRI without fragment part is also an absolute IRI.
unsafe { RiAbsoluteString::new_maybe_unchecked(s) }
}
/// Sets the fragment part to the given string.
///
/// Removes fragment part (and following `#` character) if `None` is given.
pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) {
raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref));
debug_assert!(iri::<S>(&self.inner).is_ok());
}
/// Removes the password completely (including separator colon) from `self` even if it is empty.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriString;
///
/// let mut iri = IriString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Even if the password is empty, the password and separator will be removed.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriString;
///
/// let mut iri = IriString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) => v,
None => return,
};
let separator_colon = pw_range.start - 1;
// SAFETY: the IRI must still be valid after the password component and
// the leading separator colon is removed.
unsafe {
let buf = self.as_inner_mut();
buf.drain(separator_colon..pw_range.end);
debug_assert!(
RiStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
/// Replaces the non-empty password in `self` to the empty password.
///
/// This leaves the separator colon if the password part was available.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriString;
///
/// let mut iri = IriString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// If the password is empty, it is left as is.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriString;
///
/// let mut iri = IriString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_nonempty_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) if !v.is_empty() => v,
_ => return,
};
debug_assert_eq!(
self.as_str().as_bytes().get(pw_range.start - 1).copied(),
Some(b':'),
"[validity] the password component must be prefixed with a separator colon"
);
// SAFETY: the IRI must still be valid if the password is replaced with
// empty string.
unsafe {
let buf = self.as_inner_mut();
buf.drain(pw_range);
debug_assert!(
RiStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
}
impl_trivial_conv_between_iri! {
from_slice: RiStr,
from_owned: RiString,
to_slice: RiReferenceStr,
to_owned: RiReferenceString,
}

View File

@@ -0,0 +1,133 @@
//! Query string.
use crate::spec::Spec;
use crate::validate::{query, Error, ErrorKind};
define_custom_string_slice! {
/// A borrowed slice of an IRI query (i.e. after the first `?` and before the first `#`).
///
/// This corresponds to [`iquery` rule] in [RFC 3987] (and [`query` rule] in [RFC 3986]).
/// The rule for `ifragment` is `*( ipchar / iprivate / "/" / "?" )`.
///
/// # Valid values
///
/// This type can have an IRI fragment.
/// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`.
///
/// ```
/// # use iri_string::types::IriFragmentStr;
/// assert!(IriFragmentStr::new("").is_ok());
/// assert!(IriFragmentStr::new("foo").is_ok());
/// assert!(IriFragmentStr::new("foo/bar").is_ok());
/// assert!(IriFragmentStr::new("/foo/bar").is_ok());
/// assert!(IriFragmentStr::new("//foo/bar").is_ok());
/// assert!(IriFragmentStr::new("https://user:pass@example.com:8080").is_ok());
/// assert!(IriFragmentStr::new("https://example.com/").is_ok());
/// ```
///
/// Some characters and sequences cannot used in a fragment.
///
/// ```
/// # use iri_string::types::IriFragmentStr;
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(IriFragmentStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(IriFragmentStr::new("%").is_err());
/// assert!(IriFragmentStr::new("%GG").is_err());
/// // Hash sign `#` cannot appear in an IRI fragment.
/// assert!(IriFragmentStr::new("#hash").is_err());
/// ```
/// ```
/// use iri_string::types::IriQueryStr;
/// assert!(IriQueryStr::new("").is_ok());
/// assert!(IriQueryStr::new("foo").is_ok());
/// assert!(IriQueryStr::new("foo/bar").is_ok());
/// assert!(IriQueryStr::new("/foo/bar").is_ok());
/// assert!(IriQueryStr::new("//foo/bar").is_ok());
/// assert!(IriQueryStr::new("https://user:pass@example.com:8080").is_ok());
/// assert!(IriQueryStr::new("https://example.com/").is_ok());
/// // Question sign `?` can appear in an IRI query.
/// assert!(IriQueryStr::new("query?again").is_ok());
/// ```
///
/// Some characters and sequences cannot used in a query.
///
/// ```
/// use iri_string::types::IriQueryStr;
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(IriQueryStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(IriQueryStr::new("%").is_err());
/// assert!(IriQueryStr::new("%GG").is_err());
/// // Hash sign `#` cannot appear in an IRI query.
/// assert!(IriQueryStr::new("#hash").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`query` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.4
/// [`iquery` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
struct RiQueryStr {
validator = query,
expecting_msg = "IRI query string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of an IRI fragment (i.e. after the first `#` character).
///
/// This corresponds to [`iquery` rule] in [RFC 3987] (and [`query` rule] in [RFC 3986]).
/// The rule for `absolute-IRI` is `*( ipchar / iprivate / "/" / "?" )`.
///
/// For details, see the documentation for [`RiQueryStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`query` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.4
/// [`iquery` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`RiQueryStr`]: struct.RiQueryStr.html
struct RiQueryString {
validator = query,
slice = RiQueryStr,
expecting_msg = "IRI query string",
}
}
impl<S: Spec> RiQueryStr<S> {
/// Creates a new `&RiQueryStr` from the query part prefixed by `?`.
///
/// # Examples
///
/// ```
/// # use iri_string::types::IriQueryStr;
/// assert!(IriQueryStr::from_prefixed("?").is_ok());
/// assert!(IriQueryStr::from_prefixed("?foo").is_ok());
/// assert!(IriQueryStr::from_prefixed("?foo/bar").is_ok());
/// assert!(IriQueryStr::from_prefixed("?/foo/bar").is_ok());
/// assert!(IriQueryStr::from_prefixed("?//foo/bar").is_ok());
/// assert!(IriQueryStr::from_prefixed("?https://user:pass@example.com:8080").is_ok());
/// assert!(IriQueryStr::from_prefixed("?https://example.com/").is_ok());
/// // Question sign `?` can appear in an IRI query.
/// assert!(IriQueryStr::from_prefixed("?query?again").is_ok());
///
/// // `<` and `>` cannot directly appear in an IRI.
/// assert!(IriQueryStr::from_prefixed("?<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI.
/// assert!(IriQueryStr::new("?%").is_err());
/// assert!(IriQueryStr::new("?%GG").is_err());
/// // `?` prefix is expected.
/// assert!(IriQueryStr::from_prefixed("").is_err());
/// assert!(IriQueryStr::from_prefixed("foo").is_err());
/// // Hash sign `#` cannot appear in an IRI query.
/// assert!(IriQueryStr::from_prefixed("?#hash").is_err());
/// ```
pub fn from_prefixed(s: &str) -> Result<&Self, Error> {
if !s.starts_with('?') {
return Err(Error::with_kind(ErrorKind::InvalidQuery));
}
TryFrom::try_from(&s[1..])
}
}

View File

@@ -0,0 +1,697 @@
//! IRI reference.
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
use crate::components::AuthorityComponents;
#[cfg(feature = "alloc")]
use crate::mask_password::password_range_to_hide;
use crate::mask_password::PasswordMasked;
use crate::normalize::Normalized;
use crate::parser::trusted as trusted_parser;
#[cfg(feature = "alloc")]
use crate::raw;
use crate::resolve::FixedBaseResolver;
use crate::spec::Spec;
use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiRelativeStr, RiStr};
#[cfg(feature = "alloc")]
use crate::types::{RiRelativeString, RiString};
#[cfg(feature = "alloc")]
use crate::validate::iri;
use crate::validate::iri_reference;
define_custom_string_slice! {
/// A borrowed string of an absolute IRI possibly with fragment part.
///
/// This corresponds to [`IRI-reference` rule] in [RFC 3987]
/// (and [`URI-reference` rule] in [RFC 3986]).
/// The rule for `IRI-reference` is `IRI / irelative-ref`.
/// In other words, this is union of [`RiStr`] and [`RiRelativeStr`].
///
/// # Valid values
///
/// This type can have an IRI reference (which can be absolute or relative).
///
/// ```
/// # use iri_string::types::IriReferenceStr;
/// assert!(IriReferenceStr::new("https://user:pass@example.com:8080").is_ok());
/// assert!(IriReferenceStr::new("https://example.com/").is_ok());
/// assert!(IriReferenceStr::new("https://example.com/foo?bar=baz").is_ok());
/// assert!(IriReferenceStr::new("https://example.com/foo?bar=baz#qux").is_ok());
/// assert!(IriReferenceStr::new("foo:bar").is_ok());
/// assert!(IriReferenceStr::new("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(IriReferenceStr::new("foo:/").is_ok());
/// assert!(IriReferenceStr::new("foo://").is_ok());
/// assert!(IriReferenceStr::new("foo:///").is_ok());
/// assert!(IriReferenceStr::new("foo:////").is_ok());
/// assert!(IriReferenceStr::new("foo://///").is_ok());
/// assert!(IriReferenceStr::new("foo/bar").is_ok());
/// assert!(IriReferenceStr::new("/foo/bar").is_ok());
/// assert!(IriReferenceStr::new("//foo/bar").is_ok());
/// assert!(IriReferenceStr::new("#foo").is_ok());
/// ```
///
/// Some characters and sequences cannot used in an IRI reference.
///
/// ```
/// # use iri_string::types::IriReferenceStr;
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(IriReferenceStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(IriReferenceStr::new("%").is_err());
/// assert!(IriReferenceStr::new("%GG").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`IRI-reference` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`URI-reference` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.1
/// [`RiRelativeStr`]: struct.RiRelativeStr.html
/// [`RiStr`]: struct.RiStr.html
struct RiReferenceStr {
validator = iri_reference,
expecting_msg = "IRI reference string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of an absolute IRI possibly with fragment part.
///
/// This corresponds to [`IRI-reference` rule] in [RFC 3987]
/// (and [`URI-reference` rule] in [RFC 3986]).
/// The rule for `IRI-reference` is `IRI / irelative-ref`.
/// In other words, this is union of [`RiString`] and [`RiRelativeString`].
///
/// For details, see the document for [`RiReferenceStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`IRI-reference` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`URI-reference` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.1
/// [`RiReferenceStr`]: struct.RiReferenceString.html
/// [`RiRelativeString`]: struct.RiRelativeString.html
/// [`RiString`]: struct.RiString.html
struct RiReferenceString {
validator = iri_reference,
slice = RiReferenceStr,
expecting_msg = "IRI reference string",
}
}
impl<S: Spec> RiReferenceStr<S> {
/// Returns the string as [`&RiStr`][`RiStr`], if it is valid as an IRI.
///
/// If it is not an IRI, then [`&RiRelativeStr`][`RiRelativeStr`] is returned as `Err(_)`.
///
/// [`RiRelativeStr`]: struct.RiRelativeStr.html
/// [`RiStr`]: struct.RiStr.html
pub fn to_iri(&self) -> Result<&RiStr<S>, &RiRelativeStr<S>> {
// Check with `IRI` rule first, because the syntax rule for `IRI-reference` is
// `IRI / irelative-ref`.
//
// > Some productions are ambiguous. The "first-match-wins" (a.k.a.
// > "greedy") algorithm applies. For details, see [RFC3986].
// >
// > --- <https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2>.
<&RiStr<S>>::try_from(self.as_str()).map_err(|_| {
// SAFETY: if an IRI reference is not an IRI, then it is a relative IRI.
// See the RFC 3987 syntax rule `IRI-reference = IRI / irelative-ref`.
unsafe { RiRelativeStr::new_maybe_unchecked(self.as_str()) }
})
}
/// Returns the string as [`&RiRelativeStr`][`RiRelativeStr`], if it is valid as an IRI.
///
/// If it is not an IRI, then [`&RiStr`][`RiStr`] is returned as `Err(_)`.
///
/// [`RiRelativeStr`]: struct.RiRelativeStr.html
/// [`RiStr`]: struct.RiStr.html
pub fn to_relative_iri(&self) -> Result<&RiRelativeStr<S>, &RiStr<S>> {
match self.to_iri() {
Ok(iri) => Err(iri),
Err(relative) => Ok(relative),
}
}
/// Returns resolved IRI against the given base IRI.
///
/// For IRI reference resolution output examples, see [RFC 3986 section 5.4].
///
/// If you are going to resolve multiple references against the common base,
/// consider using [`FixedBaseResolver`].
///
/// # Strictness
///
/// The IRI parsers provided by this crate is strict (e.g. `http:g` is
/// always interpreted as a composition of the scheme `http` and the path
/// `g`), so backward compatible parsing and resolution are not provided.
/// About parser and resolver strictness, see [RFC 3986 section 5.4.2]:
///
/// > Some parsers allow the scheme name to be present in a relative
/// > reference if it is the same as the base URI scheme. This is considered
/// > to be a loophole in prior specifications of partial URI
/// > [RFC1630](https://www.rfc-editor.org/rfc/rfc1630.html). Its use should be
/// > avoided but is allowed for backward compatibility.
/// >
/// > --- <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4.2>
///
/// # Failures
///
/// This method itself does not fail, but IRI resolution without WHATWG URL
/// Standard serialization can fail in some minor cases.
///
/// To see examples of such unresolvable IRIs, visit the documentation
/// for [`normalize`][`crate::normalize`] module.
///
/// [RFC 3986 section 5.4]: https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4
/// [RFC 3986 section 5.4.2]: https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4.2
pub fn resolve_against<'a>(&'a self, base: &'a RiAbsoluteStr<S>) -> Normalized<'a, RiStr<S>> {
FixedBaseResolver::new(base).resolve(self.as_ref())
}
/// Returns the proxy to the IRI with password masking feature.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
/// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query");
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "http://user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn mask_password(&self) -> PasswordMasked<'_, Self> {
PasswordMasked::new(self)
}
}
/// Components getters.
impl<S: Spec> RiReferenceStr<S> {
/// Returns the scheme.
///
/// The following colon is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.scheme_str(), Some("http"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("foo/bar:baz")?;
/// assert_eq!(iri.scheme_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn scheme_str(&self) -> Option<&str> {
trusted_parser::extract_scheme(self.as_str())
}
/// Returns the authority.
///
/// The leading `//` is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.authority_str(), Some("example.com"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("foo/bar:baz")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_str(&self) -> Option<&str> {
trusted_parser::extract_authority(self.as_str())
}
/// Returns the path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.path_str(), "/pathpath");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("foo/bar:baz")?;
/// assert_eq!(iri.path_str(), "foo/bar:baz");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn path_str(&self) -> &str {
trusted_parser::extract_path(self.as_str())
}
/// Returns the query.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriQueryStr, IriReferenceStr};
///
/// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// let query = IriQueryStr::new("queryquery")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query(), None);
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriQueryStr, IriReferenceStr};
///
/// let iri = IriReferenceStr::new("foo/bar:baz?")?;
/// let query = IriQueryStr::new("")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query(&self) -> Option<&RiQueryStr<S>> {
trusted_parser::extract_query(self.as_str()).map(|query| {
// SAFETY: `extract_query` returns the query part of an IRI, and the
// returned string should have only valid characters since is the
// substring of the source IRI.
unsafe { RiQueryStr::new_maybe_unchecked(query) }
})
}
/// Returns the query as a raw string slice.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.query_str(), Some("queryquery"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query_str(), None);
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("foo/bar:baz?")?;
/// assert_eq!(iri.query_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query_str(&self) -> Option<&str> {
trusted_parser::extract_query(self.as_str())
}
/// Returns the fragment part if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// If the IRI has a fragment part, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#corge")?;
/// let fragment = IriFragmentStr::new("corge")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error};
/// let iri = IriReferenceStr::new("#foo")?;
/// let fragment = IriFragmentStr::new("foo")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// When the fragment part exists but is empty string, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#")?;
/// let fragment = IriFragmentStr::new("")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error};
/// let iri = IriReferenceStr::new("#")?;
/// let fragment = IriFragmentStr::new("")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// If the IRI has no fragment, `None` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn fragment(&self) -> Option<&RiFragmentStr<S>> {
trusted_parser::extract_fragment(self.as_str()).map(|fragment| {
// SAFETY: `extract_fragment` returns the fragment part of an IRI,
// and the returned string should have only valid characters since
// is the substring of the source IRI.
unsafe { RiFragmentStr::new_maybe_unchecked(fragment) }
})
}
/// Returns the fragment part as a raw string slice if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// If the IRI has a fragment part, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#corge")?;
/// assert_eq!(iri.fragment_str(), Some("corge"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("#foo")?;
/// assert_eq!(iri.fragment_str(), Some("foo"));
/// # Ok::<_, Error>(())
/// ```
///
/// When the fragment part exists but is empty string, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#")?;
/// assert_eq!(iri.fragment_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("#")?;
/// assert_eq!(iri.fragment_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
///
/// If the IRI has no fragment, `None` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn fragment_str(&self) -> Option<&str> {
trusted_parser::extract_fragment(self.as_str())
}
/// Returns the authority components.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?;
/// let authority = iri.authority_components()
/// .expect("authority is available");
/// assert_eq!(authority.userinfo(), Some("user:pass"));
/// assert_eq!(authority.host(), "example.com");
/// assert_eq!(authority.port(), Some("8080"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("foo//bar:baz")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> {
AuthorityComponents::from_iri(self)
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> RiReferenceString<S> {
/// Returns the string as [`RiString`], if it is valid as an IRI.
///
/// If it is not an IRI, then [`RiRelativeString`] is returned as `Err(_)`.
///
/// [`RiRelativeString`]: struct.RiRelativeString.html
/// [`RiString`]: struct.RiString.html
pub fn into_iri(self) -> Result<RiString<S>, RiRelativeString<S>> {
let s: String = self.into();
// Check with `IRI` rule first, because of the syntax.
//
// > Some productions are ambiguous. The "first-match-wins" (a.k.a.
// > "greedy") algorithm applies. For details, see [RFC3986].
// >
// > --- <https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2>.
if iri::<S>(&s).is_ok() {
// SAFETY: just checked `s` is valid as an IRI.
Ok(unsafe { RiString::new_always_unchecked(s) })
} else {
// SAFETY: if an IRI reference is not an IRI, then it is a relative IRI.
// See the RFC 3987 syntax rule `IRI-reference = IRI / irelative-ref`.
Err(unsafe { RiRelativeString::new_maybe_unchecked(s) })
}
}
/// Returns the string as [`RiRelativeString`], if it is valid as an IRI.
///
/// If it is not an IRI, then [`RiString`] is returned as `Err(_)`.
///
/// [`RiRelativeString`]: struct.RiRelativeString.html
/// [`RiString`]: struct.RiString.html
pub fn into_relative_iri(self) -> Result<RiRelativeString<S>, RiString<S>> {
match self.into_iri() {
Ok(iri) => Err(iri),
Err(relative) => Ok(relative),
}
}
/// Sets the fragment part to the given string.
///
/// Removes fragment part (and following `#` character) if `None` is given.
pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) {
raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref));
debug_assert!(iri_reference::<S>(&self.inner).is_ok());
}
/// Removes the password completely (including separator colon) from `self` even if it is empty.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriReferenceString;
///
/// let mut iri = IriReferenceString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Even if the password is empty, the password and separator will be removed.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriReferenceString;
///
/// let mut iri = IriReferenceString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice()) {
Some(v) => v,
None => return,
};
let separator_colon = pw_range.start - 1;
// SAFETY: the IRI must be valid after the password component and
// the leading separator colon is removed.
unsafe {
let buf = self.as_inner_mut();
buf.drain(separator_colon..pw_range.end);
debug_assert!(
RiReferenceStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
/// Replaces the non-empty password in `self` to the empty password.
///
/// This leaves the separator colon if the password part was available.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriReferenceString;
///
/// let mut iri = IriReferenceString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// If the password is empty, it is left as is.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriReferenceString;
///
/// let mut iri = IriReferenceString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_nonempty_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice()) {
Some(v) if !v.is_empty() => v,
_ => return,
};
debug_assert_eq!(
self.as_str().as_bytes().get(pw_range.start - 1).copied(),
Some(b':'),
"[validity] the password component must be prefixed with a separator colon"
);
// SAFETY: the IRI must be valid after the password component is
// replaced with the empty password.
unsafe {
let buf = self.as_inner_mut();
buf.drain(pw_range);
debug_assert!(
RiReferenceStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component \
is replaced with the empty password"
);
}
}
}

View File

@@ -0,0 +1,571 @@
//! Relative IRI reference.
use crate::components::AuthorityComponents;
#[cfg(feature = "alloc")]
use crate::mask_password::password_range_to_hide;
use crate::mask_password::PasswordMasked;
use crate::normalize::Normalized;
use crate::parser::trusted as trusted_parser;
#[cfg(feature = "alloc")]
use crate::raw;
use crate::resolve::FixedBaseResolver;
use crate::spec::Spec;
#[cfg(feature = "alloc")]
use crate::types::RiReferenceString;
use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiStr};
use crate::validate::relative_ref;
define_custom_string_slice! {
/// A borrowed slice of a relative IRI reference.
///
/// This corresponds to [`irelative-ref` rule] in [RFC 3987]
/// (and [`relative-ref` rule] in [RFC 3986]).
/// The rule for `irelative-ref` is `irelative-part [ "?" iquery ] [ "#" ifragment ]`.
///
/// # Valid values
///
/// This type can have a relative IRI reference.
///
/// ```
/// # use iri_string::types::IriRelativeStr;
/// assert!(IriRelativeStr::new("foo").is_ok());
/// assert!(IriRelativeStr::new("foo/bar").is_ok());
/// assert!(IriRelativeStr::new("/foo").is_ok());
/// assert!(IriRelativeStr::new("//foo/bar").is_ok());
/// assert!(IriRelativeStr::new("?foo").is_ok());
/// assert!(IriRelativeStr::new("#foo").is_ok());
/// assert!(IriRelativeStr::new("foo/bar?baz#qux").is_ok());
/// // The first path component can have colon if the path is absolute.
/// assert!(IriRelativeStr::new("/foo:bar/").is_ok());
/// // Second or following path components can have colon.
/// assert!(IriRelativeStr::new("foo/bar://baz/").is_ok());
/// assert!(IriRelativeStr::new("./foo://bar").is_ok());
/// ```
///
/// Absolute form of a reference is not allowed.
///
/// ```
/// # use iri_string::types::IriRelativeStr;
/// assert!(IriRelativeStr::new("https://example.com/").is_err());
/// // The first path component cannot have colon, if the path is not absolute.
/// assert!(IriRelativeStr::new("foo:bar").is_err());
/// assert!(IriRelativeStr::new("foo:").is_err());
/// assert!(IriRelativeStr::new("foo:/").is_err());
/// assert!(IriRelativeStr::new("foo://").is_err());
/// assert!(IriRelativeStr::new("foo:///").is_err());
/// assert!(IriRelativeStr::new("foo:////").is_err());
/// assert!(IriRelativeStr::new("foo://///").is_err());
/// ```
///
/// Some characters and sequences cannot used in an IRI reference.
///
/// ```
/// # use iri_string::types::IriRelativeStr;
/// // `<` and `>` cannot directly appear in a relative IRI reference.
/// assert!(IriRelativeStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in a relative IRI reference.
/// assert!(IriRelativeStr::new("%").is_err());
/// assert!(IriRelativeStr::new("%GG").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`irelative-ref` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`relative-ref` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.2
struct RiRelativeStr {
validator = relative_ref,
expecting_msg = "Relative IRI reference string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of a relative IRI reference.
///
/// This corresponds to [`irelative-ref` rule] in [RFC 3987]
/// (and [`relative-ref` rule] in [RFC 3986]).
/// The rule for `irelative-ref` is `irelative-part [ "?" iquery ] [ "#" ifragment ]`.
///
/// For details, see the document for [`RiRelativeStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`irelative-ref` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`relative-ref` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.2
/// [`RiRelativeString`]: struct.RiRelativeString.html
struct RiRelativeString {
validator = relative_ref,
slice = RiRelativeStr,
expecting_msg = "Relative IRI reference string",
}
}
impl<S: Spec> RiRelativeStr<S> {
/// Returns resolved IRI against the given base IRI.
///
/// For IRI reference resolution output examples, see [RFC 3986 section 5.4].
///
/// If you are going to resolve multiple references against the common base,
/// consider using [`FixedBaseResolver`].
///
/// # Strictness
///
/// The IRI parsers provided by this crate is strict (e.g. `http:g` is
/// always interpreted as a composition of the scheme `http` and the path
/// `g`), so backward compatible parsing and resolution are not provided.
/// About parser and resolver strictness, see [RFC 3986 section 5.4.2]:
///
/// > Some parsers allow the scheme name to be present in a relative
/// > reference if it is the same as the base URI scheme. This is considered
/// > to be a loophole in prior specifications of partial URI
/// > [RFC1630](https://www.rfc-editor.org/rfc/rfc1630.html). Its use should be
/// > avoided but is allowed for backward compatibility.
/// >
/// > --- <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4.2>
///
/// # Failures
///
/// This method itself does not fail, but IRI resolution without WHATWG URL
/// Standard serialization can fail in some minor cases.
///
/// To see examples of such unresolvable IRIs, visit the documentation
/// for [`normalize`][`crate::normalize`] module.
///
/// [RFC 3986 section 5.4]: https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4
/// [RFC 3986 section 5.4.2]: https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4.2
pub fn resolve_against<'a>(&'a self, base: &'a RiAbsoluteStr<S>) -> Normalized<'a, RiStr<S>> {
FixedBaseResolver::new(base).resolve(self.as_ref())
}
/// Returns the proxy to the IRI with password masking feature.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("//user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
/// assert_eq!(masked.to_dedicated_string(), "//user:@example.com/path?query");
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "//user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn mask_password(&self) -> PasswordMasked<'_, Self> {
PasswordMasked::new(self)
}
}
/// Components getters.
impl<S: Spec> RiRelativeStr<S> {
/// Returns the authority.
///
/// The leading `//` is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.authority_str(), Some("example.com"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("foo//bar:baz")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_str(&self) -> Option<&str> {
trusted_parser::extract_authority_relative(self.as_str())
}
/// Returns the path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.path_str(), "/pathpath");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("foo//bar:baz")?;
/// assert_eq!(iri.path_str(), "foo//bar:baz");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn path_str(&self) -> &str {
trusted_parser::extract_path_relative(self.as_str())
}
/// Returns the query.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriQueryStr, IriRelativeStr};
///
/// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?;
/// let query = IriQueryStr::new("queryquery")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriQueryStr, IriRelativeStr};
///
/// let iri = IriRelativeStr::new("foo//bar:baz?")?;
/// let query = IriQueryStr::new("")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query(&self) -> Option<&RiQueryStr<S>> {
trusted_parser::extract_query(self.as_str()).map(|query| {
// SAFETY: `extract_query` returns the query part of an IRI, and the
// returned string should have only valid characters since is the
// substring of the source IRI.
unsafe { RiQueryStr::new_maybe_unchecked(query) }
})
}
/// Returns the query in a raw string slice.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.query_str(), Some("queryquery"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("foo//bar:baz?")?;
/// assert_eq!(iri.query_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query_str(&self) -> Option<&str> {
trusted_parser::extract_query(self.as_str())
}
/// Returns the fragment part if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// If the IRI has a fragment part, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error};
/// let iri = IriRelativeStr::new("?foo#bar")?;
/// let fragment = IriFragmentStr::new("bar")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error};
/// let iri = IriRelativeStr::new("#foo")?;
/// let fragment = IriFragmentStr::new("foo")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// When the fragment part exists but is empty string, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error};
/// let iri = IriRelativeStr::new("#")?;
/// let fragment = IriFragmentStr::new("")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// If the IRI has no fragment, `None` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error};
/// let iri = IriRelativeStr::new("")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn fragment(&self) -> Option<&RiFragmentStr<S>> {
AsRef::<RiReferenceStr<S>>::as_ref(self).fragment()
}
/// Returns the fragment part as a raw string slice if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// If the IRI has a fragment part, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error};
/// let iri = IriRelativeStr::new("?foo#bar")?;
/// assert_eq!(iri.fragment_str(), Some("bar"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error};
/// let iri = IriRelativeStr::new("#foo")?;
/// assert_eq!(iri.fragment_str(), Some("foo"));
/// # Ok::<_, Error>(())
/// ```
///
/// When the fragment part exists but is empty string, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error};
/// let iri = IriRelativeStr::new("#")?;
/// assert_eq!(iri.fragment_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
///
/// If the IRI has no fragment, `None` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error};
/// let iri = IriRelativeStr::new("")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn fragment_str(&self) -> Option<&str> {
AsRef::<RiReferenceStr<S>>::as_ref(self).fragment_str()
}
/// Returns the authority components.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("//user:pass@example.com:8080/pathpath?queryquery")?;
/// let authority = iri.authority_components()
/// .expect("authority is available");
/// assert_eq!(authority.userinfo(), Some("user:pass"));
/// assert_eq!(authority.host(), "example.com");
/// assert_eq!(authority.port(), Some("8080"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("foo//bar:baz")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> {
AuthorityComponents::from_iri(self.as_ref())
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> RiRelativeString<S> {
/// Sets the fragment part to the given string.
///
/// Removes fragment part (and following `#` character) if `None` is given.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::{IriFragmentStr, IriRelativeString};
///
/// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query#frag.old")?;
/// assert_eq!(iri.fragment_str(), Some("frag.old"));
///
/// iri.set_fragment(None);
/// assert_eq!(iri.fragment(), None);
///
/// let frag_new = IriFragmentStr::new("frag-new")?;
/// iri.set_fragment(Some(frag_new));
/// assert_eq!(iri.fragment_str(), Some("frag-new"));
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Fragment can be empty, and it is distinguished from the absense of a fragment.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriRelativeString;
///
/// let mut iri = IriRelativeString::try_from("/path#")?;
/// assert_eq!(iri, "/path#");
/// assert_eq!(iri.fragment_str(), Some(""), "Fragment is present and empty");
///
/// iri.set_fragment(None);
/// assert_eq!(iri, "/path", "Note that # is now removed");
/// assert_eq!(iri.fragment_str(), None, "Fragment is absent");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) {
raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref));
debug_assert!(relative_ref::<S>(&self.inner).is_ok());
}
/// Removes the password completely (including separator colon) from `self` even if it is empty.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriRelativeString;
///
/// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "//user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Even if the password is empty, the password and separator will be removed.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriRelativeString;
///
/// let mut iri = IriRelativeString::try_from("//user:@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "//user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) => v,
None => return,
};
let separator_colon = pw_range.start - 1;
// SAFETY: removing password component and the leading colon preserves
// the IRI still syntactically valid.
unsafe {
let buf = self.as_inner_mut();
buf.drain(separator_colon..pw_range.end);
debug_assert!(
RiRelativeStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
/// Replaces the non-empty password in `self` to the empty password.
///
/// This leaves the separator colon if the password part was available.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriRelativeString;
///
/// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "//user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// If the password is empty, it is left as is.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriRelativeString;
///
/// let mut iri = IriRelativeString::try_from("//user:@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "//user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_nonempty_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) if !v.is_empty() => v,
_ => return,
};
debug_assert_eq!(
self.as_str().as_bytes().get(pw_range.start - 1).copied(),
Some(b':'),
"[validity] the password component must be prefixed with a separator colon"
);
// SAFETY: the IRI must be valid after the password component is
// replaced with the empty password.
unsafe {
let buf = self.as_inner_mut();
buf.drain(pw_range);
debug_assert!(
RiRelativeStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component \
is replaced with the empty password"
);
}
}
}
impl_trivial_conv_between_iri! {
from_slice: RiRelativeStr,
from_owned: RiRelativeString,
to_slice: RiReferenceStr,
to_owned: RiReferenceString,
}

382
vendor/iri-string/src/types/iri.rs vendored Normal file
View File

@@ -0,0 +1,382 @@
//! IRI-specific implementations.
#[cfg(feature = "alloc")]
use alloc::collections::TryReserveError;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
#[cfg(feature = "alloc")]
use crate::convert::try_percent_encode_iri_inline;
use crate::convert::MappedToUri;
use crate::spec::IriSpec;
use crate::types::{
RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr,
};
#[cfg(feature = "alloc")]
use crate::types::{
RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, RiRelativeString,
RiString,
};
use crate::types::{
UriAbsoluteStr, UriFragmentStr, UriQueryStr, UriReferenceStr, UriRelativeStr, UriStr,
};
#[cfg(feature = "alloc")]
use crate::types::{
UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString, UriRelativeString,
UriString,
};
/// A type alias for [`RiAbsoluteStr`]`<`[`IriSpec`]`>`.
pub type IriAbsoluteStr = RiAbsoluteStr<IriSpec>;
/// A type alias for [`RiAbsoluteString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriAbsoluteString = RiAbsoluteString<IriSpec>;
/// A type alias for [`RiFragmentStr`]`<`[`IriSpec`]`>`.
pub type IriFragmentStr = RiFragmentStr<IriSpec>;
/// A type alias for [`RiFragmentString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriFragmentString = RiFragmentString<IriSpec>;
/// A type alias for [`RiStr`]`<`[`IriSpec`]`>`.
pub type IriStr = RiStr<IriSpec>;
/// A type alias for [`RiString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriString = RiString<IriSpec>;
/// A type alias for [`RiReferenceStr`]`<`[`IriSpec`]`>`.
pub type IriReferenceStr = RiReferenceStr<IriSpec>;
/// A type alias for [`RiReferenceString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriReferenceString = RiReferenceString<IriSpec>;
/// A type alias for [`RiRelativeStr`]`<`[`IriSpec`]`>`.
pub type IriRelativeStr = RiRelativeStr<IriSpec>;
/// A type alias for [`RiRelativeString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriRelativeString = RiRelativeString<IriSpec>;
/// A type alias for [`RiQueryStr`]`<`[`IriSpec`]`>`.
pub type IriQueryStr = RiQueryStr<IriSpec>;
/// A type alias for [`RiQueryString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriQueryString = RiQueryString<IriSpec>;
/// Implements the conversion from an IRI into a URI.
macro_rules! impl_conversion_between_uri {
(
$ty_owned_iri:ident,
$ty_owned_uri:ident,
$ty_borrowed_iri:ident,
$ty_borrowed_uri:ident,
$example_iri:expr,
$example_uri:expr
) => {
/// Conversion from an IRI into a URI.
impl $ty_borrowed_iri {
/// Percent-encodes the IRI into a valid URI that identifies the equivalent resource.
///
/// If you need more precise control over memory allocation and buffer
/// handling, use [`MappedToUri`][`crate::convert::MappedToUri`] type.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
#[doc = concat!("use iri_string::format::ToDedicatedString;")]
#[doc = concat!("use iri_string::types::{", stringify!($ty_borrowed_iri), ", ", stringify!($ty_owned_uri), "};")]
///
#[doc = concat!("let iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_iri), ")?;")]
/// // Type annotation here is not necessary.
#[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.encode_to_uri().to_dedicated_string();")]
#[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")]
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn encode_to_uri(&self) -> MappedToUri<'_, Self> {
MappedToUri::from(self)
}
/// Converts an IRI into a URI without modification, if possible.
///
/// This is semantically equivalent to
#[doc = concat!("`", stringify!($ty_borrowed_uri), "::new(self.as_str()).ok()`.")]
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
#[doc = concat!("use iri_string::types::{", stringify!($ty_borrowed_iri), ", ", stringify!($ty_borrowed_uri), "};")]
///
#[doc = concat!("let ascii_iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_uri), ")?;")]
/// assert_eq!(
/// ascii_iri.as_uri().map(AsRef::as_ref),
#[doc = concat!(" Some(", stringify!($example_uri), ")")]
/// );
///
#[doc = concat!("let nonascii_iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_iri), ")?;")]
/// assert_eq!(nonascii_iri.as_uri(), None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn as_uri(&self) -> Option<&$ty_borrowed_uri> {
if !self.as_str().is_ascii() {
return None;
}
debug_assert!(
<$ty_borrowed_uri>::new(self.as_str()).is_ok(),
"[consistency] the ASCII-only IRI must also be a valid URI"
);
// SAFETY: An ASCII-only IRI is a URI.
// URI (by `UriSpec`) is a subset of IRI (by `IriSpec`),
// and the difference is that URIs can only have ASCII characters.
let uri = unsafe { <$ty_borrowed_uri>::new_maybe_unchecked(self.as_str()) };
Some(uri)
}
}
/// Conversion from an IRI into a URI.
#[cfg(feature = "alloc")]
impl $ty_owned_iri {
/// Percent-encodes the IRI into a valid URI that identifies the equivalent resource.
///
/// After the encode, the IRI is also a valid URI.
///
/// If you want a new URI string rather than modifying the IRI
/// string, or if you need more precise control over memory
/// allocation and buffer handling, use
#[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")]
/// method.
///
/// # Panics
///
/// Panics if the memory allocation failed.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// #[cfg(feature = "alloc")] {
#[doc = concat!("use iri_string::types::", stringify!($ty_owned_iri), ";")]
///
#[doc = concat!("let mut iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")]
/// iri.encode_to_uri_inline();
#[doc = concat!("assert_eq!(iri, ", stringify!($example_uri), ");")]
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn encode_to_uri_inline(&mut self) {
self.try_encode_to_uri_inline()
.expect("failed to allocate memory");
}
/// Percent-encodes the IRI into a valid URI that identifies the equivalent resource.
///
/// After the encode, the IRI is also a valid URI.
///
/// If you want a new URI string rather than modifying the IRI
/// string, or if you need more precise control over memory
/// allocation and buffer handling, use
#[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")]
/// method.
///
// TODO: This seems true as of this writing, but is this guaranteed? See
// <https://users.rust-lang.org/t/does-try-reserve-guarantees-that-the-content-is-preserved-on-allocation-failure/77446>.
// /// If the memory allocation failed, the content is preserved without modification.
// ///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// #[cfg(feature = "alloc")] {
#[doc = concat!("use iri_string::types::", stringify!($ty_owned_iri), ";")]
///
#[doc = concat!("let mut iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")]
/// iri.try_encode_to_uri_inline()
/// .expect("failed to allocate memory");
#[doc = concat!("assert_eq!(iri, ", stringify!($example_uri), ");")]
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn try_encode_to_uri_inline(&mut self) -> Result<(), TryReserveError> {
// SAFETY: IRI is valid after it is encoded to URI (by percent encoding).
unsafe {
let buf = self.as_inner_mut();
try_percent_encode_iri_inline(buf)?;
}
debug_assert!(
<$ty_borrowed_iri>::new(self.as_str()).is_ok(),
"[consistency] the content must be valid at any time"
);
Ok(())
}
/// Percent-encodes the IRI into a valid URI that identifies the equivalent resource.
///
/// If you want a new URI string rather than modifying the IRI
/// string, or if you need more precise control over memory
/// allocation and buffer handling, use
#[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")]
/// method.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// #[cfg(feature = "alloc")] {
#[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")]
///
#[doc = concat!("let iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")]
/// // Type annotation here is not necessary.
#[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.encode_into_uri();")]
#[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")]
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn encode_into_uri(self) -> $ty_owned_uri {
self.try_encode_into_uri()
.expect("failed to allocate memory")
}
/// Percent-encodes the IRI into a valid URI that identifies the equivalent resource.
///
/// If you want a new URI string rather than modifying the IRI
/// string, or if you need more precise control over memory
/// allocation and buffer handling, use
#[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")]
/// method.
///
// TODO: This seems true as of this writing, but is this guaranteed? See
// <https://users.rust-lang.org/t/does-try-reserve-guarantees-that-the-content-is-preserved-on-allocation-failure/77446>.
// /// If the memory allocation failed, the content is preserved without modification.
// ///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// #[cfg(feature = "alloc")] {
#[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")]
///
#[doc = concat!("let iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")]
/// // Type annotation here is not necessary.
#[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.try_encode_into_uri()")]
/// .expect("failed to allocate memory");
#[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")]
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn try_encode_into_uri(mut self) -> Result<$ty_owned_uri, TryReserveError> {
self.try_encode_to_uri_inline()?;
let s: String = self.into();
debug_assert!(
<$ty_borrowed_uri>::new(s.as_str()).is_ok(),
"[consistency] the encoded IRI must also be a valid URI"
);
// SAFETY: An ASCII-only IRI is a URI.
// URI (by `UriSpec`) is a subset of IRI (by `IriSpec`),
// and the difference is that URIs can only have ASCII characters.
let uri = unsafe { <$ty_owned_uri>::new_maybe_unchecked(s) };
Ok(uri)
}
/// Converts an IRI into a URI without modification, if possible.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
#[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")]
///
#[doc = concat!("let ascii_iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_uri), ")?;")]
/// assert_eq!(
/// ascii_iri.try_into_uri().map(|uri| uri.to_string()),
#[doc = concat!(" Ok(", stringify!($example_uri), ".to_string())")]
/// );
///
#[doc = concat!("let nonascii_iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")]
/// assert_eq!(
/// nonascii_iri.try_into_uri().map_err(|iri| iri.to_string()),
#[doc = concat!(" Err(", stringify!($example_iri), ".to_string())")]
/// );
/// # Ok::<_, Error>(())
/// ```
pub fn try_into_uri(self) -> Result<$ty_owned_uri, $ty_owned_iri> {
if !self.as_str().is_ascii() {
return Err(self);
}
let s: String = self.into();
debug_assert!(
<$ty_borrowed_uri>::new(s.as_str()).is_ok(),
"[consistency] the ASCII-only IRI must also be a valid URI"
);
// SAFETY: An ASCII-only IRI is a URI.
// URI (by `UriSpec`) is a subset of IRI (by `IriSpec`),
// and the difference is that URIs can only have ASCII characters.
let uri = unsafe { <$ty_owned_uri>::new_maybe_unchecked(s) };
Ok(uri)
}
}
};
}
impl_conversion_between_uri!(
IriAbsoluteString,
UriAbsoluteString,
IriAbsoluteStr,
UriAbsoluteStr,
"http://example.com/?alpha=\u{03B1}",
"http://example.com/?alpha=%CE%B1"
);
impl_conversion_between_uri!(
IriReferenceString,
UriReferenceString,
IriReferenceStr,
UriReferenceStr,
"http://example.com/?alpha=\u{03B1}",
"http://example.com/?alpha=%CE%B1"
);
impl_conversion_between_uri!(
IriRelativeString,
UriRelativeString,
IriRelativeStr,
UriRelativeStr,
"../?alpha=\u{03B1}",
"../?alpha=%CE%B1"
);
impl_conversion_between_uri!(
IriString,
UriString,
IriStr,
UriStr,
"http://example.com/?alpha=\u{03B1}",
"http://example.com/?alpha=%CE%B1"
);
impl_conversion_between_uri!(
IriQueryString,
UriQueryString,
IriQueryStr,
UriQueryStr,
"alpha-is-\u{03B1}",
"alpha-is-%CE%B1"
);
impl_conversion_between_uri!(
IriFragmentString,
UriFragmentString,
IriFragmentStr,
UriFragmentStr,
"alpha-is-\u{03B1}",
"alpha-is-%CE%B1"
);

115
vendor/iri-string/src/types/uri.rs vendored Normal file
View File

@@ -0,0 +1,115 @@
//! URI-specific implementations.
use crate::spec::UriSpec;
use crate::types::{
IriAbsoluteStr, IriFragmentStr, IriQueryStr, IriReferenceStr, IriRelativeStr, IriStr,
RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr,
};
#[cfg(feature = "alloc")]
use crate::types::{
IriAbsoluteString, IriFragmentString, IriQueryString, IriReferenceString, IriRelativeString,
IriString, RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString,
RiRelativeString, RiString,
};
/// A type alias for [`RiAbsoluteStr`]`<`[`UriSpec`]`>`.
pub type UriAbsoluteStr = RiAbsoluteStr<UriSpec>;
/// A type alias for [`RiAbsoluteString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriAbsoluteString = RiAbsoluteString<UriSpec>;
/// A type alias for [`RiFragmentStr`]`<`[`UriSpec`]`>`.
pub type UriFragmentStr = RiFragmentStr<UriSpec>;
/// A type alias for [`RiFragmentString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriFragmentString = RiFragmentString<UriSpec>;
/// A type alias for [`RiStr`]`<`[`UriSpec`]`>`.
pub type UriStr = RiStr<UriSpec>;
/// A type alias for [`RiString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriString = RiString<UriSpec>;
/// A type alias for [`RiReferenceStr`]`<`[`UriSpec`]`>`.
pub type UriReferenceStr = RiReferenceStr<UriSpec>;
/// A type alias for [`RiReferenceString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriReferenceString = RiReferenceString<UriSpec>;
/// A type alias for [`RiRelativeStr`]`<`[`UriSpec`]`>`.
pub type UriRelativeStr = RiRelativeStr<UriSpec>;
/// A type alias for [`RiRelativeString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriRelativeString = RiRelativeString<UriSpec>;
/// A type alias for [`RiQueryStr`]`<`[`UriSpec`]`>`.
pub type UriQueryStr = RiQueryStr<UriSpec>;
/// A type alias for [`RiQueryString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriQueryString = RiQueryString<UriSpec>;
/// Implements the trivial conversions between a URI and an IRI.
macro_rules! impl_conversions_between_iri {
(
$borrowed_uri:ident,
$owned_uri:ident,
$borrowed_iri:ident,
$owned_iri:ident,
) => {
impl AsRef<$borrowed_iri> for $borrowed_uri {
fn as_ref(&self) -> &$borrowed_iri {
// SAFETY: A valid URI is also a valid IRI.
unsafe { <$borrowed_iri>::new_maybe_unchecked(self.as_str()) }
}
}
#[cfg(feature = "alloc")]
impl From<$owned_uri> for $owned_iri {
#[inline]
fn from(uri: $owned_uri) -> Self {
// SAFETY: A valid URI is also a valid IRI.
unsafe { Self::new_maybe_unchecked(uri.into()) }
}
}
#[cfg(feature = "alloc")]
impl AsRef<$borrowed_iri> for $owned_uri {
fn as_ref(&self) -> &$borrowed_iri {
AsRef::<$borrowed_uri>::as_ref(self).as_ref()
}
}
};
}
impl_conversions_between_iri!(
UriAbsoluteStr,
UriAbsoluteString,
IriAbsoluteStr,
IriAbsoluteString,
);
impl_conversions_between_iri!(
UriReferenceStr,
UriReferenceString,
IriReferenceStr,
IriReferenceString,
);
impl_conversions_between_iri!(
UriRelativeStr,
UriRelativeString,
IriRelativeStr,
IriRelativeString,
);
impl_conversions_between_iri!(UriStr, UriString, IriStr, IriString,);
impl_conversions_between_iri!(UriQueryStr, UriQueryString, IriQueryStr, IriQueryString,);
impl_conversions_between_iri!(
UriFragmentStr,
UriFragmentString,
IriFragmentStr,
IriFragmentString,
);

607
vendor/iri-string/src/validate.rs vendored Normal file
View File

@@ -0,0 +1,607 @@
//! Validators.
//!
//! Validators are functions that receive the string and checks if the entire
//! string is syntactically valid.
use core::fmt;
#[cfg(feature = "std")]
use std::error;
use crate::parser::validate as parser;
use crate::spec::Spec;
/// Resource identifier validation error.
// Note that this type should implement `Copy` trait.
// To return additional non-`Copy` data as an error, use wrapper type
// (as `std::string::FromUtf8Error` contains `std::str::Utf8Error`).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Error {
/// Error kind.
kind: ErrorKind,
}
impl Error {
/// Creates a new `Error` from the given error kind.
#[inline]
#[must_use]
pub(crate) fn with_kind(kind: ErrorKind) -> Self {
Self { kind }
}
}
impl fmt::Display for Error {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "invalid IRI: {}", self.kind.description())
}
}
#[cfg(feature = "std")]
impl error::Error for Error {}
/// Error kind.
///
/// This type may be reorganized between minor version bumps, so users should
/// not expect specific error kind (or specific error message) to be returned
/// for a specific error.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub(crate) enum ErrorKind {
/// Invalid scheme.
InvalidScheme,
/// Invalid userinfo.
InvalidUserInfo,
/// Invalid host.
InvalidHost,
/// Invalid port.
InvalidPort,
/// Invalid path character.
InvalidPath,
/// Invalid query.
InvalidQuery,
/// Invalid fragment.
InvalidFragment,
/// Got an unexpected fragment.
UnexpectedFragment,
/// Expected a relative IRI but got an absolute IRI.
UnexpectedAbsolute,
/// Expected an absolute IRI but got a relative IRI.
UnexpectedRelative,
/// Invalid UTF-8 bytes.
InvalidUtf8,
}
impl ErrorKind {
/// Returns the human-friendly description for the error kind.
#[must_use]
fn description(self) -> &'static str {
match self {
Self::InvalidScheme => "invalid scheme",
Self::InvalidUserInfo => "invalid userinfo",
Self::InvalidHost => "invalid host",
Self::InvalidPort => "invalid port",
Self::InvalidPath => "invalid path",
Self::InvalidQuery => "invalid query",
Self::InvalidFragment => "invalid fragment",
Self::UnexpectedFragment => "unexpected fragment",
Self::UnexpectedAbsolute => "expected a relative IRI but got an absolute IRI",
Self::UnexpectedRelative => "expected an absolute IRI but got a relative IRI",
Self::InvalidUtf8 => "invalid utf-8 bytes",
}
}
}
/// Validates [IRI][uri].
///
/// This validator corresponds to [`RiStr`] and [`RiString`] types.
///
/// # Examples
///
/// This type can have an IRI (which is absolute, and may have fragment part).
///
/// ```
/// use iri_string::{spec::UriSpec, validate::iri};
/// assert!(iri::<UriSpec>("https://user:pass@example.com:8080").is_ok());
/// assert!(iri::<UriSpec>("https://example.com/").is_ok());
/// assert!(iri::<UriSpec>("https://example.com/foo?bar=baz").is_ok());
/// assert!(iri::<UriSpec>("https://example.com/foo?bar=baz#qux").is_ok());
/// assert!(iri::<UriSpec>("foo:bar").is_ok());
/// assert!(iri::<UriSpec>("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(iri::<UriSpec>("foo:/").is_ok());
/// assert!(iri::<UriSpec>("foo://").is_ok());
/// assert!(iri::<UriSpec>("foo:///").is_ok());
/// assert!(iri::<UriSpec>("foo:////").is_ok());
/// assert!(iri::<UriSpec>("foo://///").is_ok());
/// ```
///
/// Relative IRI reference is not allowed.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::iri};
/// // This is relative path.
/// assert!(iri::<UriSpec>("foo/bar").is_err());
/// // `/foo/bar` is an absolute path, but it is authority-relative.
/// assert!(iri::<UriSpec>("/foo/bar").is_err());
/// // `//foo/bar` is termed "network-path reference",
/// // or usually called "protocol-relative reference".
/// assert!(iri::<UriSpec>("//foo/bar").is_err());
/// // Same-document reference is relative.
/// assert!(iri::<UriSpec>("#foo").is_err());
/// // Empty string is not a valid absolute IRI.
/// assert!(iri::<UriSpec>("").is_err());
/// ```
///
/// Some characters and sequences cannot used in an IRI.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::iri};
/// // `<` and `>` cannot directly appear in an IRI.
/// assert!(iri::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI.
/// assert!(iri::<UriSpec>("%").is_err());
/// assert!(iri::<UriSpec>("%GG").is_err());
/// ```
///
/// [uri]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3
/// [`RiStr`]: ../types/struct.RiStr.html
/// [`RiString`]: ../types/struct.RiString.html
pub fn iri<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_uri::<S>(s)
}
/// Validates [IRI reference][uri-reference].
///
/// This validator corresponds to [`RiReferenceStr`] and [`RiReferenceString`] types.
///
/// # Examples
///
/// This type can have an IRI reference (which can be absolute or relative).
///
/// ```
/// use iri_string::{spec::UriSpec, validate::iri_reference};
/// assert!(iri_reference::<UriSpec>("https://user:pass@example.com:8080").is_ok());
/// assert!(iri_reference::<UriSpec>("https://example.com/").is_ok());
/// assert!(iri_reference::<UriSpec>("https://example.com/foo?bar=baz").is_ok());
/// assert!(iri_reference::<UriSpec>("https://example.com/foo?bar=baz#qux").is_ok());
/// assert!(iri_reference::<UriSpec>("foo:bar").is_ok());
/// assert!(iri_reference::<UriSpec>("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(iri_reference::<UriSpec>("foo:/").is_ok());
/// assert!(iri_reference::<UriSpec>("foo://").is_ok());
/// assert!(iri_reference::<UriSpec>("foo:///").is_ok());
/// assert!(iri_reference::<UriSpec>("foo:////").is_ok());
/// assert!(iri_reference::<UriSpec>("foo://///").is_ok());
/// assert!(iri_reference::<UriSpec>("foo/bar").is_ok());
/// assert!(iri_reference::<UriSpec>("/foo/bar").is_ok());
/// assert!(iri_reference::<UriSpec>("//foo/bar").is_ok());
/// assert!(iri_reference::<UriSpec>("#foo").is_ok());
/// ```
///
/// Some characters and sequences cannot used in an IRI reference.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::iri_reference};
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(iri_reference::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(iri_reference::<UriSpec>("%").is_err());
/// assert!(iri_reference::<UriSpec>("%GG").is_err());
/// ```
///
/// [uri-reference]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.1
/// [`RiReferenceStr`]: ../types/struct.RiReferenceStr.html
/// [`RiReferenceString`]: ../types/struct.RiReferenceString.html
pub fn iri_reference<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_uri_reference::<S>(s)
}
/// Validates [absolute IRI][absolute-uri].
///
/// This validator corresponds to [`RiAbsoluteStr`] and [`RiAbsoluteString`] types.
///
/// # Examples
///
/// This type can have an absolute IRI without fragment part.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::absolute_iri};
/// assert!(absolute_iri::<UriSpec>("https://example.com/foo?bar=baz").is_ok());
/// assert!(absolute_iri::<UriSpec>("foo:bar").is_ok());
/// // Scheme `foo` and empty path.
/// assert!(absolute_iri::<UriSpec>("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(absolute_iri::<UriSpec>("foo:/").is_ok());
/// assert!(absolute_iri::<UriSpec>("foo://").is_ok());
/// assert!(absolute_iri::<UriSpec>("foo:///").is_ok());
/// assert!(absolute_iri::<UriSpec>("foo:////").is_ok());
/// assert!(absolute_iri::<UriSpec>("foo://///").is_ok());
///
/// ```
///
/// Relative IRI is not allowed.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::absolute_iri};
/// // This is relative path.
/// assert!(absolute_iri::<UriSpec>("foo/bar").is_err());
/// // `/foo/bar` is an absolute path, but it is authority-relative.
/// assert!(absolute_iri::<UriSpec>("/foo/bar").is_err());
/// // `//foo/bar` is termed "network-path reference",
/// // or usually called "protocol-relative reference".
/// assert!(absolute_iri::<UriSpec>("//foo/bar").is_err());
/// // Empty string is not a valid absolute IRI.
/// assert!(absolute_iri::<UriSpec>("").is_err());
/// ```
///
/// Fragment part (such as trailing `#foo`) is not allowed.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::absolute_iri};
/// // Fragment part is not allowed.
/// assert!(absolute_iri::<UriSpec>("https://example.com/foo?bar=baz#qux").is_err());
/// ```
///
/// Some characters and sequences cannot used in an absolute IRI.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::absolute_iri};
/// // `<` and `>` cannot directly appear in an absolute IRI.
/// assert!(absolute_iri::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an absolute IRI.
/// assert!(absolute_iri::<UriSpec>("%").is_err());
/// assert!(absolute_iri::<UriSpec>("%GG").is_err());
/// ```
///
/// [absolute-uri]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.3
/// [`RiAbsoluteStr`]: ../types/struct.RiAbsoluteStr.html
/// [`RiAbsoluteString`]: ../types/struct.RiAbsoluteString.html
pub fn absolute_iri<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_absolute_uri::<S>(s)
}
/// Validates [relative reference][relative-ref].
///
/// This validator corresponds to [`RiRelativeStr`] and [`RiRelativeString`] types.
///
/// # Valid values
///
/// This type can have a relative IRI reference.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::relative_ref};
/// assert!(relative_ref::<UriSpec>("foo").is_ok());
/// assert!(relative_ref::<UriSpec>("foo/bar").is_ok());
/// assert!(relative_ref::<UriSpec>("/foo").is_ok());
/// assert!(relative_ref::<UriSpec>("//foo/bar").is_ok());
/// assert!(relative_ref::<UriSpec>("?foo").is_ok());
/// assert!(relative_ref::<UriSpec>("#foo").is_ok());
/// assert!(relative_ref::<UriSpec>("foo/bar?baz#qux").is_ok());
/// // The first path component can have colon if the path is absolute.
/// assert!(relative_ref::<UriSpec>("/foo:bar/").is_ok());
/// // Second or following path components can have colon.
/// assert!(relative_ref::<UriSpec>("foo/bar://baz/").is_ok());
/// assert!(relative_ref::<UriSpec>("./foo://bar").is_ok());
/// ```
///
/// Absolute form of a reference is not allowed.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::relative_ref};
/// assert!(relative_ref::<UriSpec>("https://example.com/").is_err());
/// // The first path component cannot have colon, if the path is not absolute.
/// assert!(relative_ref::<UriSpec>("foo:bar").is_err());
/// assert!(relative_ref::<UriSpec>("foo:").is_err());
/// assert!(relative_ref::<UriSpec>("foo:/").is_err());
/// assert!(relative_ref::<UriSpec>("foo://").is_err());
/// assert!(relative_ref::<UriSpec>("foo:///").is_err());
/// assert!(relative_ref::<UriSpec>("foo:////").is_err());
/// assert!(relative_ref::<UriSpec>("foo://///").is_err());
/// ```
///
/// Some characters and sequences cannot used in an IRI reference.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::relative_ref};
/// // `<` and `>` cannot directly appear in a relative IRI reference.
/// assert!(relative_ref::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in a relative IRI reference.
/// assert!(relative_ref::<UriSpec>("%").is_err());
/// assert!(relative_ref::<UriSpec>("%GG").is_err());
/// ```
///
/// [relative-ref]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.2
/// [`RiRelativeStr`]: ../types/struct.RiRelativeStr.html
/// [`RiRelativeString`]: ../types/struct.RiRelativeString.html
pub fn relative_ref<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_relative_ref::<S>(s)
}
/// Validates [IRI scheme][scheme].
///
/// Note that this function does not accept a trailing colon.
///
/// Also note that the syntax of the scheme is common between RFC 3986 (URIs)
/// and RFC 3987 (IRIs).
///
/// # Examples
///
/// ```
/// use iri_string::validate::scheme;
/// assert!(scheme("https").is_ok());
/// assert!(scheme("file").is_ok());
/// assert!(scheme("git+ssh").is_ok());
///
/// // Colon is syntactically not part of the scheme.
/// assert!(scheme("colon:").is_err());
/// // Scheme cannot be empty.
/// assert!(scheme("").is_err());
/// // The first character should be alphabetic character.
/// assert!(scheme("0abc").is_err());
/// assert!(scheme("+a").is_err());
/// assert!(scheme("-a").is_err());
/// ```
///
/// [scheme]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.1
pub fn scheme(s: &str) -> Result<(), Error> {
parser::validate_scheme(s)
}
/// Validates [IRI authority][authority].
///
/// # Examples
///
/// ```
/// use iri_string::{spec::UriSpec, validate::authority};
/// assert!(authority::<UriSpec>("example.com").is_ok());
/// assert!(authority::<UriSpec>("subdomain.example.com").is_ok());
/// assert!(authority::<UriSpec>("no-period").is_ok());
/// // Though strongly discouraged, this percent-encoded reg-name with
/// // non-UTF-8 bytes is considered syntactically valid.
/// assert!(authority::<UriSpec>("non-%99-utf-8").is_ok());
/// // Empty authority is valid. Remember `file:///` has empty authority.
/// assert!(authority::<UriSpec>("").is_ok());
/// assert!(authority::<UriSpec>("127.0.0.1:8080").is_ok());
/// assert!(authority::<UriSpec>("[::127.0.0.1]:8088").is_ok());
/// // URI/IRI syntax itself does not have limit on the port number.
/// assert!(authority::<UriSpec>("[::1]:9999999999").is_ok());
/// // Syntax for future versions of IP addresses.
/// assert!(authority::<UriSpec>("[v89ab.1+2,3(4)5&6]").is_ok());
/// assert!(authority::<UriSpec>("user:password@host").is_ok());
/// assert!(authority::<UriSpec>("co%3Alon:at%40sign@host:8888").is_ok());
/// // Percent-encoded non-UTF8 (or even non-ASCII) bytes are valid.
/// // Users are responsible to validate or reject such unusual input if needed.
/// assert!(authority::<UriSpec>("not-a-%80-utf8@host").is_ok());
///
/// // Invalid percent encodings.
/// assert!(authority::<UriSpec>("invalid%GGescape@host").is_err());
/// // Invalid characters.
/// assert!(authority::<UriSpec>("foo@bar@host").is_err());
/// assert!(authority::<UriSpec>("slash/is-not-allowed").is_err());
/// ```
///
/// [authority]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2
pub fn authority<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_authority::<S>(s)
}
/// Validates [IRI host][host].
///
/// # Examples
///
/// ```
/// use iri_string::{spec::UriSpec, validate::host};
/// assert!(host::<UriSpec>("example.com").is_ok());
/// assert!(host::<UriSpec>("subdomain.example.com").is_ok());
/// assert!(host::<UriSpec>("no-period").is_ok());
/// // Though strongly discouraged, this percent-encoded reg-name with
/// // non-UTF-8 bytes is considered syntactically valid.
/// assert!(host::<UriSpec>("non-%99-utf-8").is_ok());
/// // Empty host is valid. Remember `file:///` has empty authority (and empty host).
/// assert!(host::<UriSpec>("").is_ok());
/// assert!(host::<UriSpec>("127.0.0.1").is_ok());
/// assert!(host::<UriSpec>("[::1]").is_ok());
/// assert!(host::<UriSpec>("[::127.0.0.1]").is_ok());
/// // Syntax for future versions of IP addresses.
/// assert!(host::<UriSpec>("[v89ab.1+2,3(4)5&6]").is_ok());
///
/// // `port` is not a part of the host.
/// assert!(host::<UriSpec>("host:8080").is_err());
/// // `userinfo` is not a part of the host.
/// assert!(host::<UriSpec>("user:password@host").is_err());
/// ```
///
/// [host]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.2
pub fn host<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_host::<S>(s)
}
/// Validates [IRI port][port].
///
/// Note that the syntax of the port is common between RFC 3986 (URIs) and
/// RFC 3987 (IRIs).
///
/// Also note that this function does not accept a leading colon.
///
/// [host]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.3
///
/// # Examples
///
/// ```
/// use iri_string::validate::port;
/// assert!(port("0").is_ok());
/// assert!(port("8080").is_ok());
/// assert!(port("0000080").is_ok());
/// // URI/IRI syntax itself does not have limit on the port number.
/// assert!(port("999999999").is_ok());
///
/// // The leading colon is not a part of the `port`.
/// assert!(port(":443").is_err());
/// ```
pub fn port(s: &str) -> Result<(), Error> {
if s.bytes().all(|b| b.is_ascii_digit()) {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPort))
}
}
/// Validates [IRI userinfo][userinfo].
///
/// # Examples
///
/// ```
/// use iri_string::{spec::UriSpec, validate::userinfo};
/// assert!(userinfo::<UriSpec>("user").is_ok());
/// assert!(userinfo::<UriSpec>("user:password").is_ok());
/// assert!(userinfo::<UriSpec>("non-%99-utf-8").is_ok());
/// // Special characters can be included if they are percent-encoded.
/// assert!(userinfo::<UriSpec>("co%3Alon:at%40sign").is_ok());
///
/// // The trailing atsign is not a part of the userinfo.
/// assert!(userinfo::<UriSpec>("user:password@").is_err());
/// // Invalid characters.
/// assert!(userinfo::<UriSpec>("foo@bar").is_err());
/// assert!(userinfo::<UriSpec>("slash/is-not-allowed").is_err());
/// ```
///
/// [authority]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.1
pub fn userinfo<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_userinfo::<S>(s)
}
/// Validates [IRI path][path].
///
/// # Examples
///
/// ```
/// use iri_string::{spec::UriSpec, validate::path};
/// assert!(path::<UriSpec>("").is_ok());
/// assert!(path::<UriSpec>("foo/bar").is_ok());
/// assert!(path::<UriSpec>("foo/bar/").is_ok());
/// assert!(path::<UriSpec>("/foo/bar").is_ok());
/// assert!(path::<UriSpec>("non-%99-utf-8").is_ok());
/// // Be careful! This is completely valid (absolute) path, but may be confused
/// // with an protocol-relative URI, with the authority `foo` and the path `/bar`.
/// assert!(path::<UriSpec>("//foo/bar").is_ok());
/// // Be careful! This is completely valid (relative) path, but may be confused
/// // with an absolute URI, with the scheme `foo` and the path `bar`.
/// assert!(path::<UriSpec>("foo:bar").is_ok());
///
/// // Invalid characters.
/// assert!(path::<UriSpec>("foo?bar").is_err());
/// assert!(path::<UriSpec>("foo#bar").is_err());
/// ```
///
/// [path]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3
pub fn path<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_path::<S>(s)
}
/// Validates [IRI path segment][segment].
///
/// # Examples
///
/// ```
/// use iri_string::{spec::UriSpec, validate::path_segment};
/// assert!(path_segment::<UriSpec>("").is_ok());
/// assert!(path_segment::<UriSpec>("escaped-%2F-slash").is_ok());
/// assert!(path_segment::<UriSpec>("non-%99-utf-8").is_ok());
///
/// // A path segment itself cannot contain an unescaped slash.
/// assert!(path_segment::<UriSpec>("foo/bar").is_err());
/// ```
///
/// [segment]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3
pub fn path_segment<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_path_segment::<S>(s)
}
/// Validates [IRI query][query].
///
/// This validator corresponds to [`RiQueryStr`] and [`RiQueryString`] types.
///
/// Note that the first `?` character in an IRI is not a part of a query.
/// For example, `https://example.com/?foo#bar` has a query `foo`, **not** `?foo`.
///
/// # Examples
///
/// This type can have an IRI query.
/// Note that the IRI `foo://bar/baz?qux#quux` has the query `qux`, **not** `?qux`.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::query};
/// assert!(query::<UriSpec>("").is_ok());
/// assert!(query::<UriSpec>("foo").is_ok());
/// assert!(query::<UriSpec>("foo/bar").is_ok());
/// assert!(query::<UriSpec>("/foo/bar").is_ok());
/// assert!(query::<UriSpec>("//foo/bar").is_ok());
/// assert!(query::<UriSpec>("https://user:pass@example.com:8080").is_ok());
/// assert!(query::<UriSpec>("https://example.com/").is_ok());
/// // Question sign `?` can appear in an IRI query.
/// assert!(query::<UriSpec>("query?again").is_ok());
/// ```
///
/// Some characters and sequences cannot used in a query.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::query};
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(query::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(query::<UriSpec>("%").is_err());
/// assert!(query::<UriSpec>("%GG").is_err());
/// // Hash sign `#` cannot appear in an IRI query.
/// assert!(query::<UriSpec>("#hash").is_err());
/// ```
///
/// [query]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.4
/// [`RiQueryStr`]: ../types/struct.RiQueryStr.html
/// [`RiQueryString`]: ../types/struct.RiQueryString.html
pub fn query<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_query::<S>(s)
}
/// Validates [IRI fragment][fragment].
///
/// This validator corresponds to [`RiFragmentStr`] and [`RiFragmentString`] types.
///
/// Note that the first `#` character in an IRI is not a part of a fragment.
/// For example, `https://example.com/#foo` has a fragment `foo`, **not** `#foo`.
///
/// # Examples
///
/// This type can have an IRI fragment.
/// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::fragment};
/// assert!(fragment::<UriSpec>("").is_ok());
/// assert!(fragment::<UriSpec>("foo").is_ok());
/// assert!(fragment::<UriSpec>("foo/bar").is_ok());
/// assert!(fragment::<UriSpec>("/foo/bar").is_ok());
/// assert!(fragment::<UriSpec>("//foo/bar").is_ok());
/// assert!(fragment::<UriSpec>("https://user:pass@example.com:8080").is_ok());
/// assert!(fragment::<UriSpec>("https://example.com/").is_ok());
/// ```
///
/// Some characters and sequences cannot used in a fragment.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::fragment};
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(fragment::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(fragment::<UriSpec>("%").is_err());
/// assert!(fragment::<UriSpec>("%GG").is_err());
/// // Hash sign `#` cannot appear in an IRI fragment.
/// assert!(fragment::<UriSpec>("#hash").is_err());
/// ```
///
/// [fragment]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.5
/// [`RiFragmentStr`]: ../types/struct.RiFragmentStr.html
/// [`RiFragmentString`]: ../types/struct.RiFragmentString.html
pub fn fragment<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_fragment::<S>(s)
}