chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,6 @@
{
"git": {
"sha1": "ca622228dd1a899e3a14c95ac0fc34b80c17a03d"
},
"path_in_vcs": ""
}

16
vendor/iri-string/.editorconfig vendored Normal file
View File

@@ -0,0 +1,16 @@
root = true
# Use rustfmt (not editorconfig) to format Rust sources.
[*.{toml,yaml,yml}]
charset = utf-8
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
[*.toml]
indent_size = 4
[*.{yaml,yml}]
indent_size = 2
indent_style = space

3
vendor/iri-string/.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "iri-string-tests/uritemplate-test"]
path = iri-string-tests/assets/uritemplate-test
url = https://github.com/uri-templates/uritemplate-test.git

View File

@@ -0,0 +1,11 @@
[env]
# To disable this, simply export the variable with the value 0 or give cargo
# `--config=env.RUST_BACKTRACE=\'0\'` option.
# Note that the value should have a string type, not an integer.
RUST_BACKTRACE = '1'
[term]
quiet = false
verbose = true
color = 'always'
progress.when = 'never'

View File

@@ -0,0 +1,27 @@
---
# Checks if doc comments will have no errors on `docs.rs`.
when:
- event: [manual, push, tag]
variables:
rust_nightly_image: &rust_nightly_image 'rustlang/rust:nightly-slim'
clone:
git:
image: woodpeckerci/plugin-git
settings:
# Custom root CA certificate will be used in some local networks.
# The global environment variable WP_GIT_SKIP_VERIFY will be
# set to `true` in such environment.
skip-verify: ${WP_GIT_SKIP_VERIFY:-false}
lfs: false
steps:
- name: doc (docs.rs)
image: *rust_nightly_image
commands:
- export RUSTDOCFLAGS="-D warnings"
# Workaround to avoid build failure caused by too-old dependencies.
- cargo update
- cargo rustdoc --all-features -- --cfg docsrs

View File

@@ -0,0 +1,55 @@
---
# Checks if doc comments have no errors.
when:
- event: [manual, push, tag]
variables:
msrv_channel: &msrv_channel '1.60.0'
# Docker image of the MSRV.
rust_msrv_image: &rust_image 'library/rust:1.60.0-slim'
configure_toolchain_cmds: &configure_toolchain_cmds
- cp .woodpecker/cargo-config.toml $${CARGO_HOME}/config.toml
# Use the Rust toolchain(s) cached in the CI workspace directory.
- export RUSTUP_HOME=$${CI_WORKSPACE}/.tmp-rustup-home && test -d $${RUSTUP_HOME}
- rustc --version && cargo --version
matrix:
OPT_FEATURES:
- --features= # no features
- --features=default
- --all-features
- --features=alloc
- --features=std
clone:
git:
image: woodpeckerci/plugin-git
settings:
# Custom root CA certificate will be used in some local networks.
# The global environment variable WP_GIT_SKIP_VERIFY will be
# set to `true` in such environment.
skip-verify: ${WP_GIT_SKIP_VERIFY:-false}
lfs: false
steps:
- name: install Rust toolchain
image: *rust_image
commands:
- .woodpecker/scripts/prepare-toolchain.sh $${RUST_CHANNEL}
environment:
RUST_CHANNEL: *msrv_channel
- name: doc
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- export RUSTDOCFLAGS="-D warnings"
- cargo doc --workspace --no-deps ${OPT_FEATURES}
- name: doc private-items
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- export RUSTDOCFLAGS="-D warnings"
- cargo doc --workspace --no-deps ${OPT_FEATURES} --document-private-items

View File

@@ -0,0 +1,46 @@
---
# Checks if doc comments have no errors.
when:
- event: [manual, push, tag]
variables:
# Docker image of the latest stable Rust.
rust_image: &rust_image 'library/rust:slim'
configure_toolchain_cmds: &configure_toolchain_cmds
- cp .woodpecker/cargo-config.toml $${CARGO_HOME}/config.toml
# Use the Rust toolchain(s) cached in the CI workspace directory.
- export RUSTUP_HOME=$${CI_WORKSPACE}/.tmp-rustup-home && test -d $${RUSTUP_HOME}
- rustc --version && cargo --version
clone:
git:
image: woodpeckerci/plugin-git
settings:
# Custom root CA certificate will be used in some local networks.
# The global environment variable WP_GIT_SKIP_VERIFY will be
# set to `true` in such environment.
skip-verify: ${WP_GIT_SKIP_VERIFY:-false}
lfs: false
steps:
- name: install Rust toolchain
image: *rust_image
commands:
- .woodpecker/scripts/prepare-toolchain.sh $${RUST_CHANNEL}
environment:
RUST_CHANNEL: stable
- name: doc
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- export RUSTDOCFLAGS="-D warnings"
- cargo doc --workspace --no-deps ${OPT_FEATURES}
- name: doc private-items
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- export RUSTDOCFLAGS="-D warnings"
- cargo doc --workspace --no-deps ${OPT_FEATURES} --document-private-items

View File

@@ -0,0 +1,42 @@
---
# Checks if the sources are properly formatted.
when:
- event: [manual, push, tag]
variables:
rust_image: &rust_image 'library/rust:1.89.0-slim'
format_cmds: &format_cmds
- cp .woodpecker/cargo-config.toml $${CARGO_HOME}/config.toml
- rustup update --no-self-update $${RUST_CHANNEL} && rustup default $${RUST_CHANNEL}
- rustc --version && cargo --version
- rustup component add rustfmt
- cargo fmt --all -- --check
clone:
git:
image: woodpeckerci/plugin-git
settings:
# Custom root CA certificate will be used in some local networks.
# The global environment variable WP_GIT_SKIP_VERIFY will be
# set to `true` in such environment.
skip-verify: ${WP_GIT_SKIP_VERIFY:-false}
lfs: false
steps:
- name: rust format stable
image: *rust_image
commands:
- <<: *format_cmds
environment:
RUST_CHANNEL: stable
depends_on: []
- name: rust format beta
failure: ignore
image: *rust_image
commands:
- <<: *format_cmds
environment:
RUST_CHANNEL: beta
depends_on: []

View File

@@ -0,0 +1,24 @@
---
# Checks if MSRV is consistent with the toplevel `Cargo.toml`.
when:
- event: [manual, push, tag]
variables:
base_image: &base_image 'library/debian:12-slim'
clone:
git:
image: woodpeckerci/plugin-git
settings:
# Custom root CA certificate will be used in some local networks.
# The global environment variable WP_GIT_SKIP_VERIFY will be
# set to `true` in such environment.
skip-verify: ${WP_GIT_SKIP_VERIFY:-false}
lfs: false
steps:
- name: check
image: *base_image
commands:
- .woodpecker/scripts/check-msrv-consistency.sh

View File

@@ -0,0 +1,34 @@
#!/bin/sh
set -eu
cd "$(readlink -f "$(dirname "$0")/../..")"
# Get MSRV from the toplevel Cargo.toml
MSRV="$(sed -ne 's/^rust-version\s*=\s*[^0-9#]\([0-9.]\+\).*$/\1/p' Cargo.toml)"
echo "MSRV=${MSRV}"
MSRV_REGEX="$(echo "$MSRV" | sed -e 's/\./\\./g')"
check_readme() {
echo "checking README.md"
grep --color=always --with-filename --line-number --ignore-case 'minimum supported rust\(c\)\? version.*'"${MSRV_REGEX}" README.md
}
check_readme
check_woodpecker() {
for yml_path in .woodpecker/*.yml ; do
echo "checking ${yml_path}"
# Check `msrv_channel` variable.
if grep --quiet '^\s*msrv_channel:' "$yml_path" ; then
grep --color=always --with-filename --line-number '^\s*msrv_channel:[^#]*'"${MSRV_REGEX}"'\>' "$yml_path"
fi
# Check `rust_msrv_image` variable.
if grep --quiet '^\s*rust_msrv_image:' "$yml_path" ; then
grep --color=always --with-filename --line-number '^\s*rust_msrv_image:[^#]*'"${MSRV_REGEX}"'\>' "$yml_path"
fi
done
}
check_woodpecker
# vim: set expandtab tabstop=4 :

View File

@@ -0,0 +1,25 @@
#!/bin/sh
set -eu
cd "$(readlink -f "$(dirname "$0")/../..")"
set -x
RUST_CHANNEL="$1"
# Use the preset for CI environment.
cp .woodpecker/cargo-config.toml ${CARGO_HOME}/config.toml
# The repository should not have a directory which would be used as the cached `$RUSTUP_HOME`.
CACHED_RUSTUP_HOME=${CI_WORKSPACE}/.tmp-rustup-home && test ! -e ${CACHED_RUSTUP_HOME}
# Copy the pre-installed toolchains of the Docker container into the new `$RUSTUP_HOME`.
cp -a /usr/local/rustup ${CACHED_RUSTUP_HOME} && test -d ${CACHED_RUSTUP_HOME}
# Use the new `$RUSTUP_HOME` variable.
export RUSTUP_HOME=${CACHED_RUSTUP_HOME}
# Install the toolchain of the specified version (if not yet available locally).
rustup update --no-self-update ${RUST_CHANNEL} && rustup default ${RUST_CHANNEL}
rustc --version && cargo --version

View File

@@ -0,0 +1,170 @@
---
# Lint, build, and test.
# Clippy lint will make the subsequent build faster, and the result of the build
# is necessary for the test.
when:
- event: [manual, push, tag]
variables:
# Docker image of the latest stable Rust.
rust_image: &rust_image 'library/rust:slim'
rust_nightly_image: &rust_nightly_image 'rustlang/rust:nightly-slim'
configure_toolchain_cmds: &configure_toolchain_cmds
- cp .woodpecker/cargo-config.toml $${CARGO_HOME}/config.toml
# Use the Rust toolchain(s) cached in the CI workspace directory.
- export RUSTUP_HOME=$${CI_WORKSPACE}/.tmp-rustup-home && test -d $${RUSTUP_HOME}
- rustc --version && cargo --version
matrix:
RUST_CHANNEL:
- stable
BUILD_PROFILE:
- dev # equivalent to no --release flag (debug build)
- release # equivalent to --release flag
OPT_FEATURES:
- --features= # no features
- --features=default
- --all-features
# `minimal`: minimal dependency versions
# `direct-minimal`: minimal direct dependency versions
# `default`: default dependency versions (just ensures `Cargo.lock` exists)
# `updated`: explicitly updated dependency versions (cargo update)
RUST_DEP_VERSIONS:
- direct-minimal
- default
include:
# To see if updated dependencies break something.
- RUST_CHANNEL: stable
BUILD_PROFILE: dev
OPT_FEATURES: --all-features
RUST_DEP_VERSIONS: updated
# To see if updated toolchain reject something.
- RUST_CHANNEL: beta
BUILD_PROFILE: dev
OPT_FEATURES: --all-features
RUST_DEP_VERSIONS: direct-minimal
# To see if updated toolchain and dependencies break something.
- RUST_CHANNEL: beta
BUILD_PROFILE: dev
OPT_FEATURES: --all-features
RUST_DEP_VERSIONS: updated
clone:
git:
image: woodpeckerci/plugin-git
settings:
# Custom root CA certificate will be used in some local networks.
# The global environment variable WP_GIT_SKIP_VERIFY will be
# set to `true` in such environment.
skip-verify: ${WP_GIT_SKIP_VERIFY:-false}
lfs: false
steps:
- name: install Rust toolchain
image: *rust_image
commands:
- .woodpecker/scripts/prepare-toolchain.sh $${RUST_CHANNEL}
- name: prepare deps
image: *rust_image
commands:
# Ensure `Cargo.lock` exists.
- if [ ! -f Cargo.lock ] ; then echo "Cargo.lock is missing" >&2 ; exit 1 ; fi
when:
- matrix:
RUST_DEP_VERSIONS: default
- name: prepare deps
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- cargo update
when:
- matrix:
RUST_DEP_VERSIONS: updated
- name: prepare deps
image: *rust_nightly_image
commands:
- rustc --version && cargo --version
- cargo update -Zminimal-versions
when:
- matrix:
RUST_DEP_VERSIONS: minimal
- name: prepare deps
image: *rust_nightly_image
commands:
- rustc --version && cargo --version
- cargo update -Zdirect-minimal-versions
when:
- matrix:
RUST_DEP_VERSIONS: direct-minimal
- name: fetch deps
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- cargo fetch --locked
- mkdir -p .cargo
- cargo vendor --color=always --locked >> .cargo/config.toml
- name: clippy (allow warning)
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- rustup component add clippy
- cargo clippy --version
# TODO: Deny warnings for all the workspace members once the MSRV and deps are bumped.
#- cargo clippy --color=always --frozen --workspace --all-targets --no-deps --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES} -- --deny warnings
- cargo clippy --color=always --frozen --all-targets --no-deps --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
when:
- matrix:
RUST_DEP_VERSIONS: minimal
- matrix:
RUST_DEP_VERSIONS: direct-minimal
- matrix:
RUST_DEP_VERSIONS: default
- name: clippy
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- rustup component add clippy
- cargo clippy --version
# TODO: Deny warnings for all the workspace members once the MSRV and deps are bumped.
#- cargo clippy --color=always --frozen --workspace --all-targets --no-deps --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES} -- --deny warnings
- cargo clippy --color=always --frozen --all-targets --no-deps --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES} -- --deny warnings
when:
- matrix:
RUST_DEP_VERSIONS: updated
- name: build
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
# TODO: Build all the workspace members once the MSRV and deps are bumped.
#- cargo build --color=always --frozen --workspace --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
- cargo build --color=always --frozen --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
- name: run tests
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- export RUST_BACKTRACE=1
# TODO: Test all the workspace members once the MSRV and deps are bumped.
#- cargo test --color=always --frozen --workspace --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
- cargo test --color=always --frozen --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
- name: run doctests
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- export RUST_BACKTRACE=1
# TODO: Test all the workspace members once the MSRV and deps are bumped.
#- cargo test --color=always --frozen --workspace --doc --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
- cargo test --color=always --frozen --doc --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}

View File

@@ -0,0 +1,220 @@
---
# Lint, build, and test.
# Clippy lint will make the subsequent build faster, and the result of the build
# is necessary for the test.
when:
- event: [manual, push, tag]
variables:
msrv_channel: &msrv_channel '1.60.0'
# Docker image of the MSRV.
rust_msrv_image: &rust_image 'library/rust:1.60.0-slim'
rust_nightly_image: &rust_nightly_image 'rustlang/rust:nightly-slim'
configure_toolchain_cmds: &configure_toolchain_cmds
- cp .woodpecker/cargo-config.toml $${CARGO_HOME}/config.toml
# Use the Rust toolchain(s) cached in the CI workspace directory.
- export RUSTUP_HOME=$${CI_WORKSPACE}/.tmp-rustup-home && test -d $${RUSTUP_HOME}
- rustc --version && cargo --version
matrix:
BUILD_PROFILE:
- dev # equivalent to no --release flag (debug build)
- release # equivalent to --release flag
OPT_FEATURES:
- --features= # no features
- --features=default
- --all-features
- --features=alloc
- --features=std
- --features=alloc,serde
# `minimal`: minimal dependency versions
# `direct-minimal`: minimal direct dependency versions
# `default`: default dependency versions (just ensures `Cargo.lock` exists)
# `updated`: explicitly updated dependency versions (cargo update)
RUST_DEP_VERSIONS:
- direct-minimal
- default
- updated
clone:
git:
image: woodpeckerci/plugin-git
settings:
# Custom root CA certificate will be used in some local networks.
# The global environment variable WP_GIT_SKIP_VERIFY will be
# set to `true` in such environment.
skip-verify: ${WP_GIT_SKIP_VERIFY:-false}
lfs: false
steps:
- name: install Rust toolchain
image: *rust_image
commands:
- .woodpecker/scripts/prepare-toolchain.sh $${RUST_CHANNEL}
environment:
RUST_CHANNEL: *msrv_channel
- name: prepare deps
image: *rust_image
commands:
# Ensure `Cargo.lock` exists.
- if [ ! -f Cargo.lock ] ; then echo "Cargo.lock is missing" >&2 ; exit 1 ; fi
when:
- matrix:
RUST_DEP_VERSIONS: default
- name: prepare deps
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- cargo update
when:
- matrix:
RUST_DEP_VERSIONS: updated
- name: prepare deps
image: *rust_nightly_image
commands:
- rustc --version && cargo --version
- cargo update --workspace -Zminimal-versions
when:
- matrix:
RUST_DEP_VERSIONS: minimal
- name: prepare deps
image: *rust_nightly_image
commands:
- rustc --version && cargo --version
- cargo update -Zdirect-minimal-versions
when:
- matrix:
RUST_DEP_VERSIONS: direct-minimal
- name: fetch deps
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- cargo fetch --locked
- mkdir -p .cargo
- cargo vendor --color=always --locked >> .cargo/config.toml
- name: clippy (allow warning)
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- rustup component add clippy
- cargo clippy --version
# TODO: Deny warnings for all the workspace members once the MSRV and deps are bumped.
#- cargo clippy --color=always --frozen --workspace --all-targets --no-deps --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES} -- --deny warnings
- cargo clippy --color=always --frozen --all-targets --no-deps --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
when:
- matrix:
RUST_DEP_VERSIONS: minimal
- matrix:
RUST_DEP_VERSIONS: default
- name: clippy (allow warning, allow failure)
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- rustup component add clippy
- cargo clippy --version
# TODO: Deny warnings for all the workspace members once the MSRV and deps are bumped.
#- cargo clippy --color=always --frozen --workspace --all-targets --no-deps --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES} -- --deny warnings
- cargo clippy --color=always --frozen --all-targets --no-deps --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
# Build may fail because updated crates will have higher MSRV.
failure: ignore
when:
- matrix:
RUST_DEP_VERSIONS: direct-minimal
- matrix:
RUST_DEP_VERSIONS: updated
- name: build (allow failure)
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
# TODO: Build all the workspace members once the MSRV and deps are bumped.
#- cargo build --color=always --frozen --workspace --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
- cargo build --color=always --frozen --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
# Build may fail because updated crates will have higher MSRV.
failure: ignore
when:
- matrix:
RUST_DEP_VERSIONS: direct-minimal
- matrix:
RUST_DEP_VERSIONS: updated
- name: build
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
# TODO: Build all the workspace members once the MSRV and deps are bumped.
#- cargo build --color=always --frozen --workspace --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
- cargo build --color=always --frozen --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
when:
- matrix:
RUST_DEP_VERSIONS: minimal
- matrix:
RUST_DEP_VERSIONS: default
- name: run tests (allow failure)
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- export RUST_BACKTRACE=1
# TODO: Test all the workspace members once the MSRV and deps are bumped.
#- cargo test --color=always --frozen --workspace --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
- cargo test --color=always --frozen --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
# Build may fail because updated crates will have higher MSRV.
failure: ignore
when:
- matrix:
RUST_DEP_VERSIONS: direct-minimal
- matrix:
RUST_DEP_VERSIONS: updated
- name: run tests
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- export RUST_BACKTRACE=1
# TODO: Test all the workspace members once the MSRV and deps are bumped.
#- cargo test --color=always --frozen --workspace --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
- cargo test --color=always --frozen --all-targets --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
when:
- matrix:
RUST_DEP_VERSIONS: minimal
- matrix:
RUST_DEP_VERSIONS: default
- name: run doctests (allow failure)
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- export RUST_BACKTRACE=1
# TODO: Test all the workspace members once the MSRV and deps are bumped.
#- cargo test --color=always --frozen --workspace --doc --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
- cargo test --color=always --frozen --doc --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
# Build may fail because updated crates will have higher MSRV.
failure: ignore
when:
- matrix:
RUST_DEP_VERSIONS: direct-minimal
- matrix:
RUST_DEP_VERSIONS: updated
- name: run doctests
image: *rust_image
commands:
- <<: *configure_toolchain_cmds
- export RUST_BACKTRACE=1
# TODO: Test all the workspace members once the MSRV and deps are bumped.
#- cargo test --color=always --frozen --workspace --doc --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
- cargo test --color=always --frozen --doc --profile=${BUILD_PROFILE} --no-default-features ${OPT_FEATURES}
when:
- matrix:
RUST_DEP_VERSIONS: minimal
- matrix:
RUST_DEP_VERSIONS: default

1099
vendor/iri-string/CHANGELOG.md vendored Normal file

File diff suppressed because it is too large Load Diff

82
vendor/iri-string/Cargo.lock generated vendored Normal file
View File

@@ -0,0 +1,82 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "iri-string"
version = "0.7.10"
dependencies = [
"memchr",
"serde",
"serde_test",
]
[[package]]
name = "memchr"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "proc-macro2"
version = "1.0.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51ef7cd2518ead700af67bf9d1a658d90b6037d77110fd9c0445429d0ba1c6c9"
dependencies = [
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ab938ebe6f1c82426b5fb82eaf10c3e3028c53deaa3fbe38f5904b37cf4d767"
dependencies = [
"proc-macro2",
]
[[package]]
name = "serde"
version = "1.0.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1217f97ab8e8904b57dd22eb61cde455fa7446a9c1cf43966066da047c1f3702"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8c6faef9a2e64b0064f48570289b4bf8823b7581f1d6157c1b52152306651d0"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_test"
version = "1.0.104"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33f96dff8c3744387b53404ea33e834073b0791dcc1ea9c85b805745f9324704"
dependencies = [
"serde",
]
[[package]]
name = "syn"
version = "1.0.67"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6498a9efc342871f91cc2d0d694c674368b4ceb40f62b65a7a08c3792935e702"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "unicode-xid"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"

129
vendor/iri-string/Cargo.toml vendored Normal file
View File

@@ -0,0 +1,129 @@
# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
#
# When uploading crates to the registry Cargo will automatically
# "normalize" Cargo.toml files for maximal compatibility
# with all versions of Cargo and also rewrite `path` dependencies
# to registry (e.g., crates.io) dependencies.
#
# If you are reading this file be aware that the original Cargo.toml
# will likely look very different (and much more reasonable).
# See Cargo.toml.orig for the original contents.
[package]
edition = "2021"
rust-version = "1.60"
name = "iri-string"
version = "0.7.10"
authors = ["YOSHIOKA Takuma <nop_thread@nops.red>"]
build = false
autolib = false
autobins = false
autoexamples = false
autotests = false
autobenches = false
description = "IRI as string types"
readme = "README.md"
keywords = [
"IRI",
"URI",
]
license = "MIT OR Apache-2.0"
repository = "https://github.com/lo48576/iri-string"
[package.metadata.docs.rs]
all-features = true
rustdoc-args = [
"--cfg",
"docsrs",
]
[badges.maintenance]
status = "actively-developed"
[features]
alloc = ["serde?/alloc"]
default = ["std"]
std = [
"alloc",
"memchr?/std",
"serde?/std",
]
[lib]
name = "iri_string"
path = "src/lib.rs"
bench = false
[[example]]
name = "flamegraph-parse"
path = "examples/flamegraph-parse.rs"
[[example]]
name = "flamegraph-resolve"
path = "examples/flamegraph-resolve.rs"
required-features = ["alloc"]
[[example]]
name = "normalize"
path = "examples/normalize.rs"
required-features = ["std"]
[[example]]
name = "parse"
path = "examples/parse.rs"
required-features = ["std"]
[[example]]
name = "resolve"
path = "examples/resolve.rs"
required-features = ["std"]
[[test]]
name = "build"
path = "tests/build.rs"
[[test]]
name = "gh-issues"
path = "tests/gh-issues.rs"
[[test]]
name = "iri"
path = "tests/iri.rs"
[[test]]
name = "normalize"
path = "tests/normalize.rs"
[[test]]
name = "percent_encode"
path = "tests/percent_encode.rs"
[[test]]
name = "resolve"
path = "tests/resolve.rs"
[[test]]
name = "serde"
path = "tests/serde.rs"
[[test]]
name = "string_types_interop"
path = "tests/string_types_interop.rs"
[[test]]
name = "template"
path = "tests/template.rs"
[dependencies.memchr]
version = "2.4.1"
optional = true
default-features = false
[dependencies.serde]
version = "1.0.103"
features = ["derive"]
optional = true
default-features = false
[dev-dependencies.serde_test]
version = "1.0.104"

202
vendor/iri-string/LICENSE-APACHE.txt vendored Normal file
View File

@@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

25
vendor/iri-string/LICENSE-MIT.txt vendored Normal file
View File

@@ -0,0 +1,25 @@
Copyright 2019-2024 YOSHIOKA Takuma
Permission is hereby granted, free of charge, to any
person obtaining a copy of this software and associated
documentation files (the "Software"), to deal in the
Software without restriction, including without
limitation the rights to use, copy, modify, merge,
publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software
is furnished to do so, subject to the following
conditions:
The above copyright notice and this permission notice
shall be included in all copies or substantial portions
of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.

82
vendor/iri-string/README.md vendored Normal file
View File

@@ -0,0 +1,82 @@
# iri-string
[![Latest version](https://img.shields.io/crates/v/iri-string.svg)](https://crates.io/crates/iri-string)
[![Documentation](https://docs.rs/iri-string/badge.svg)](https://docs.rs/iri-string)
* Minimum supported Rust version: 1.60
String types for [IRI](https://www.rfc-editor.org/rfc/rfc3987.html)s (Internationalized Resource
Identifiers) and [URI](https://www.rfc-editor.org/rfc/rfc3986.html)s (Uniform Resource Identifiers).
See the [documentation](https://docs.rs/iri-string) for details.
## Features
* `no_std` support.
* String types (both owned and borrowed) for RFC 3986 URIs and RFC 3987 IRIs.
+ Native slice types, so highly operable with `Cow`, `ToOwned`, etc.
+ URIs/IRIs validation.
+ Conversions between URIs and IRIs.
+ Decomposition into components.
* IRI reference resolution algorithm.
* IRI normalization algorithm.
* Masking password part of an IRI (optional and not automatic).
* Percent encoding of user-provided strings.
* IRI builder.
* RFC 6570 URI Template.
### Feature flags
#### Direct
* `alloc` (enabled by default)
+ Enables types and functions which require memory allocation.
+ Requires `std` or `alloc` crate available.
* `std` (enabled by default)
+ Enables all `std` features (such as memory allocations and `std::error::Error` trait).
+ Requires `std` crate available.
+ This automatically enables `alloc` feature.
#### memchr
* `memchr`
+ Enables optimization for internal parsers, using [`memchr`] crate.
[`memchr`]: https://crates.io/crates/memchr
#### serde
* `serde`
+ Implements `Serialize` and `Deserialize` traits for string types.
## CI
CI is running on the main author's private instance of
[Woodpecker CI](https://woodpecker-ci.org/), and CI runs should pass on
`master` and `develop` branches.
While the instance is not public, anyone can run the CI tests if you have (or
they deploy) their own Woodpecker CI instance.
The reason not to use free CI services are:
* Running tests for multiple combinations of feature flags and toolchain
versions can cause service credits to be consumed very quickly,
* I (the main author) don't like to depend on proprietary services, and
* I'm not using git repository hosting services (including GitHub and GitLab) as
a primary remote, and don't like to depend on CI runners tied to them even if
they are free software.
## License
Licensed under either of
* Apache License, Version 2.0, ([LICENSE-APACHE.txt](LICENSE-APACHE.txt) or
<https://www.apache.org/licenses/LICENSE-2.0>)
* MIT license ([LICENSE-MIT.txt](LICENSE-MIT.txt) or
<https://opensource.org/licenses/MIT>)
at your option.
### Contribution
Unless you explicitly state otherwise, any contribution intentionally submitted
for inclusion in the work by you, as defined in the Apache-2.0 license, shall be
dual licensed as above, without any additional terms or conditions.

View File

@@ -0,0 +1,26 @@
use iri_string::types::IriReferenceStr;
fn main() {
for _ in 0..1000000 {
let s = concat!(
"scheme://user:pw@sub.example.com:8080/a/b/c/%30/%31/%32%33%34",
"/foo/foo/../../../foo.foo/foo/foo/././././//////foo",
"/\u{03B1}\u{03B2}\u{03B3}/\u{03B1}\u{03B2}\u{03B3}/\u{03B1}\u{03B2}\u{03B3}",
"?k1=v1&k2=v2&k3=v3#fragment"
);
let domain = "scheme://sub.sub.sub.example.com:8080/a/b/c";
let v4 = "scheme://198.51.100.23:8080/a/b/c";
let v6 = "scheme://[2001:db8:0123::cafe]:8080/a/b/c";
let v6v4 = "scheme://[2001:db8::198.51.100.23]:8080/a/b/c";
let vfuture = "scheme://[v2.ipv2-does-not-exist]:8080/a/b/c";
let _ = (
IriReferenceStr::new(s),
IriReferenceStr::new(domain),
IriReferenceStr::new(v4),
IriReferenceStr::new(v6),
IriReferenceStr::new(v6v4),
IriReferenceStr::new(vfuture),
);
}
}

View File

@@ -0,0 +1,17 @@
#![cfg(feature = "alloc")]
use iri_string::format::ToDedicatedString;
use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
fn main() {
let base = IriAbsoluteStr::new("https://sub.example.com/foo1/foo2/foo3/foo4/foo5")
.expect("should be valid IRI");
let rel = IriReferenceStr::new(concat!(
"bar1/bar2/bar3/../bar4/../../bar5/bar6/bar7/../../../../..",
"/bar8/../../../bar9/././././././bar10/bar11",
))
.expect("should be valid IRI");
for _ in 0..1000000 {
let resolved = rel.resolve_against(base).to_dedicated_string();
drop(resolved);
}
}

145
vendor/iri-string/examples/normalize.rs vendored Normal file
View File

@@ -0,0 +1,145 @@
//! An example to normalize an IRI from the CLI argument.
use iri_string::format::ToDedicatedString;
use iri_string::types::{RiStr, RiString};
const USAGE: &str = "\
USAGE:
normalize [FLAGS] [--] IRI
FLAGS:
-h, --help Prints this help
-i, --iri Handle the input as an IRI (RFC 3987)
-u, --uri Handle the input as an URI (RFC 3986)
-a, --ascii Converts the output to an URI (RFC 3986)
-w, --whatwg Serialize normalization result according to WHATWG URL Standard.
ARGS:
<IRI> IRI
";
fn print_help() {
eprintln!("{USAGE}");
}
fn help_and_exit() -> ! {
print_help();
std::process::exit(1);
}
fn die(msg: impl std::fmt::Display) -> ! {
eprintln!("ERROR: {msg}");
eprintln!();
print_help();
std::process::exit(1);
}
/// Syntax specification.
#[derive(Debug, Clone, Copy)]
enum Spec {
/// RFC 3986 URI.
Uri,
/// RFC 3987 IRI.
Iri,
}
impl Default for Spec {
#[inline]
fn default() -> Self {
Self::Iri
}
}
/// CLI options.
#[derive(Default, Debug, Clone)]
struct CliOpt {
/// IRI.
iri: String,
/// Syntax spec.
spec: Spec,
/// Whether to convert output to ASCII URI or not.
output_ascii: bool,
/// Whether to serialize in WHATWG URL Standard way.
whatwg_serialization: bool,
}
impl CliOpt {
fn parse() -> Self {
let mut args = std::env::args();
// Skip `argv[0]`.
args.next();
let mut iri = None;
let mut spec = None;
let mut output_ascii = false;
let mut whatwg_serialization = false;
for arg in args.by_ref() {
match arg.as_str() {
"--ascii" | "-a" => output_ascii = true,
"--iri" | "-i" => spec = Some(Spec::Iri),
"--uri" | "-u" => spec = Some(Spec::Uri),
"--whatwg" | "-w" => whatwg_serialization = true,
"--help" | "-h" => help_and_exit(),
opt if opt.starts_with('-') => die(format_args!("Unknown option: {opt}")),
_ => {
if iri.replace(arg).is_some() {
die("IRI can be specified at most once");
}
}
}
}
for arg in args {
if iri.replace(arg).is_some() {
eprintln!("ERROR: IRI can be specified at most once");
}
}
let iri = iri.unwrap_or_else(|| die("IRI should be specified"));
let spec = spec.unwrap_or_default();
Self {
iri,
spec,
output_ascii,
whatwg_serialization,
}
}
}
fn main() {
let opt = CliOpt::parse();
match opt.spec {
Spec::Iri => process_iri(&opt),
Spec::Uri => process_uri(&opt),
}
}
fn process_iri(opt: &CliOpt) {
let mut normalized = normalize::<iri_string::spec::IriSpec>(opt);
if opt.output_ascii {
normalized.encode_to_uri_inline();
}
println!("{normalized}");
}
fn process_uri(opt: &CliOpt) {
let normalized = normalize::<iri_string::spec::UriSpec>(opt);
println!("{normalized}");
}
fn normalize<S: iri_string::spec::Spec>(opt: &CliOpt) -> RiString<S> {
let raw = &opt.iri.as_str();
let iri = match RiStr::<S>::new(raw) {
Ok(v) => v,
Err(e) => die(format_args!("Failed to parse {raw:?}: {e:?}")),
};
let normalized = iri.normalize();
if !opt.whatwg_serialization {
if let Err(e) = normalized.ensure_rfc3986_normalizable() {
die(format_args!("Failed to normalize: {e:?}"));
}
}
normalized.to_dedicated_string()
}

159
vendor/iri-string/examples/parse.rs vendored Normal file
View File

@@ -0,0 +1,159 @@
//! An example to parse IRI from the CLI argument.
use iri_string::types::{IriStr, RiReferenceStr, RiStr};
const USAGE: &str = "\
USAGE:
parse [FLAGS] [--] IRI
FLAGS:
-h, --help Prints this help
-i, --iri Handle the input as an IRI (RFC 3987)
-u, --uri Handle the input as an URI (RFC 3986)
ARGS:
<IRI> IRI or URI
";
fn print_help() {
eprintln!("{}", USAGE);
}
fn help_and_exit() -> ! {
print_help();
std::process::exit(1);
}
fn die(msg: impl std::fmt::Display) -> ! {
eprintln!("ERROR: {}", msg);
eprintln!();
print_help();
std::process::exit(1);
}
/// Syntax specification.
#[derive(Debug, Clone, Copy)]
enum Spec {
/// RFC 3986 URI.
Uri,
/// RFC 3987 IRI.
Iri,
}
impl Default for Spec {
#[inline]
fn default() -> Self {
Self::Iri
}
}
/// CLI options.
#[derive(Default, Debug, Clone)]
struct CliOpt {
/// IRI.
iri: String,
/// Syntax spec.
spec: Spec,
}
impl CliOpt {
fn parse() -> Self {
let mut args = std::env::args();
// Skip `argv[0]`.
args.next();
let mut iri = None;
let mut spec = None;
for arg in args.by_ref() {
match arg.as_str() {
"--iri" | "-i" => spec = Some(Spec::Iri),
"--uri" | "-u" => spec = Some(Spec::Uri),
"--help" | "-h" => help_and_exit(),
opt if opt.starts_with('-') => die(format_args!("Unknown option: {}", opt)),
_ => {
if iri.replace(arg).is_some() {
die("IRI can be specified at most once");
}
}
}
}
for arg in args {
if iri.replace(arg).is_some() {
eprintln!("ERROR: IRI can be specified at most once");
}
}
let iri = iri.unwrap_or_else(|| die("IRI should be specified"));
let spec = spec.unwrap_or_default();
Self { iri, spec }
}
}
fn main() {
let opt = CliOpt::parse();
match opt.spec {
Spec::Iri => parse_iri(&opt),
Spec::Uri => parse_uri(&opt),
}
}
fn parse_iri(opt: &CliOpt) {
let iri = parse::<iri_string::spec::IriSpec>(opt);
let uri = iri.encode_to_uri();
println!("ASCII: {:?}", uri);
}
fn parse_uri(opt: &CliOpt) {
let iri = parse::<iri_string::spec::UriSpec>(opt);
println!("ASCII: {:?}", iri);
}
fn parse<S: iri_string::spec::Spec>(opt: &CliOpt) -> &RiReferenceStr<S>
where
RiStr<S>: AsRef<RiStr<iri_string::spec::IriSpec>>,
{
let raw = &opt.iri.as_str();
let iri = match RiReferenceStr::<S>::new(raw) {
Ok(v) => v,
Err(e) => die(format_args!("Failed to parse {:?}: {}", raw, e)),
};
println!("Successfully parsed: {:?}", iri);
let absolute = iri.to_iri().ok();
match absolute {
Some(_) => println!("IRI is ablolute."),
None => println!("IRI is relative."),
}
print_components(iri);
if let Some(absolute) = absolute {
print_normalized(absolute.as_ref());
}
iri
}
fn print_components<S: iri_string::spec::Spec>(iri: &RiReferenceStr<S>) {
println!("scheme: {:?}", iri.scheme_str());
println!("authority: {:?}", iri.authority_str());
if let Some(components) = iri.authority_components() {
println!(" userinfo: {:?}", components.userinfo());
println!(" host: {:?}", components.host());
println!(" port: {:?}", components.port());
}
println!("path: {:?}", iri.path_str());
println!("query: {:?}", iri.query_str());
println!("fragment: {:?}", iri.fragment());
}
pub fn print_normalized(iri: &IriStr) {
println!("is_normalized_rfc3986: {}", iri.is_normalized_rfc3986());
println!(
"is_normalized_but_authorityless_relative_path_preserved: {}",
iri.is_normalized_but_authorityless_relative_path_preserved()
);
println!("normalized: {}", iri.normalize());
}

154
vendor/iri-string/examples/resolve.rs vendored Normal file
View File

@@ -0,0 +1,154 @@
//! An example to parse IRI from the CLI argument.
use iri_string::types::{RiAbsoluteStr, RiReferenceStr};
const USAGE: &str = "\
USAGE:
resolve [FLAGS] [--] BASE REFERENCE
FLAGS:
-h, --help Prints this help
-i, --iri Handle the input as an IRI (RFC 3987)
-u, --uri Handle the input as an URI (RFC 3986)
-w, --whatwg Serialize normalization result according to WHATWG URL Standard.
ARGS:
<BASE> Base IRI or URI to resolve REFERENCE against
<REFERENCE> IRI or URI to resolve
";
fn print_help() {
eprintln!("{}", USAGE);
}
fn help_and_exit() -> ! {
print_help();
std::process::exit(1);
}
fn die(msg: impl std::fmt::Display) -> ! {
eprintln!("ERROR: {}", msg);
eprintln!();
print_help();
std::process::exit(1);
}
/// Syntax specification.
#[derive(Debug, Clone, Copy)]
enum Spec {
/// RFC 3986 URI.
Uri,
/// RFC 3987 IRI.
Iri,
}
impl Default for Spec {
#[inline]
fn default() -> Self {
Self::Iri
}
}
/// CLI options.
#[derive(Default, Debug, Clone)]
struct CliOpt {
/// Base IRI.
base: String,
/// Reference IRI.
reference: String,
/// Syntax spec.
spec: Spec,
/// Whether to serialize in WHATWG URL Standard way.
whatwg_serialization: bool,
}
impl CliOpt {
fn parse() -> Self {
let mut args = std::env::args();
// Skip `argv[0]`.
args.next();
let mut base = None;
let mut reference = None;
let mut spec = None;
let mut whatwg_serialization = false;
for arg in args.by_ref() {
match arg.as_str() {
"--iri" | "-i" => spec = Some(Spec::Iri),
"--uri" | "-u" => spec = Some(Spec::Uri),
"--whatwg" | "-w" => whatwg_serialization = true,
"--help" | "-h" => help_and_exit(),
opt if opt.starts_with('-') => die(format_args!("Unknown option: {}", opt)),
_ => {
if base.is_none() {
base = Some(arg);
} else if reference.is_none() {
reference = Some(arg);
} else {
die("IRI can be specified at most twice");
}
}
}
}
for arg in args {
if base.is_none() {
base = Some(arg);
} else if reference.is_none() {
reference = Some(arg);
} else {
die("IRI can be specified at most twice");
}
}
let base = base.unwrap_or_else(|| die("Base IRI should be specified"));
let reference = reference.unwrap_or_else(|| die("Reference IRI should be specified"));
let spec = spec.unwrap_or_default();
Self {
base,
reference,
spec,
whatwg_serialization,
}
}
}
fn main() {
let opt = CliOpt::parse();
match opt.spec {
Spec::Iri => parse::<iri_string::spec::IriSpec>(&opt),
Spec::Uri => parse::<iri_string::spec::UriSpec>(&opt),
}
}
fn parse<S: iri_string::spec::Spec>(opt: &CliOpt) {
let base_raw = &opt.base.as_str();
let reference_raw = &opt.reference.as_str();
let base = match RiAbsoluteStr::<S>::new(base_raw) {
Ok(v) => v,
Err(e) => die(format_args!(
"Failed to parse {:?} as an IRI (without fragment): {}",
reference_raw, e
)),
};
let reference = match RiReferenceStr::<S>::new(reference_raw) {
Ok(v) => v,
Err(e) => die(format_args!(
"Failed to parse {:?} as an IRI reference: {}",
reference_raw, e
)),
};
let resolved = reference.resolve_against(base);
if !opt.whatwg_serialization {
if let Err(e) = resolved.ensure_rfc3986_normalizable() {
die(format_args!(
"Failed to resolve {:?} against {:?}: {}",
reference_raw, base_raw, e
));
}
}
println!("{}", resolved);
}

1234
vendor/iri-string/src/build.rs vendored Normal file

File diff suppressed because it is too large Load Diff

267
vendor/iri-string/src/components.rs vendored Normal file
View File

@@ -0,0 +1,267 @@
//! Components of IRIs.
mod authority;
use core::num::NonZeroUsize;
use core::ops::{Range, RangeFrom, RangeTo};
use crate::parser::trusted as trusted_parser;
use crate::spec::Spec;
use crate::types::RiReferenceStr;
pub use self::authority::AuthorityComponents;
/// Positions to split an IRI into components.
#[derive(Debug, Clone, Copy)]
pub(crate) struct Splitter {
/// Scheme end.
scheme_end: Option<NonZeroUsize>,
/// Authority end.
///
/// Note that absence of the authority and the empty authority is
/// distinguished.
authority_end: Option<NonZeroUsize>,
/// Query start (after the leading `?`).
query_start: Option<NonZeroUsize>,
/// Fragment start (after the leading `#`).
fragment_start: Option<NonZeroUsize>,
}
impl Splitter {
/// Creates a new splitter.
#[inline]
#[must_use]
pub(crate) fn new(
scheme_end: Option<NonZeroUsize>,
authority_end: Option<NonZeroUsize>,
query_start: Option<NonZeroUsize>,
fragment_start: Option<NonZeroUsize>,
) -> Self {
Self {
scheme_end,
authority_end,
query_start,
fragment_start,
}
}
/// Decomposes an IRI into five major components: scheme, authority, path, query, and fragment.
#[must_use]
fn split_into_major(
self,
s: &str,
) -> (Option<&str>, Option<&str>, &str, Option<&str>, Option<&str>) {
let (scheme, next_of_scheme) = match self.scheme_end {
// +1: ":".len()
Some(end) => (Some(&s[..end.get()]), end.get() + 1),
None => (None, 0),
};
let (authority, next_of_authority) = match self.authority_end {
// +2: "//".len()
Some(end) => (Some(&s[(next_of_scheme + 2)..end.get()]), end.get()),
None => (None, next_of_scheme),
};
let (fragment, end_of_prev_of_fragment) = match self.fragment_start {
// -1: "#".len()
Some(start) => (Some(&s[start.get()..]), start.get() - 1),
None => (None, s.len()),
};
let (query, end_of_path) = match self.query_start {
Some(start) => (
Some(&s[start.get()..end_of_prev_of_fragment]),
// -1: "?".len()
start.get() - 1,
),
None => (None, end_of_prev_of_fragment),
};
let path = &s[next_of_authority..end_of_path];
(scheme, authority, path, query, fragment)
}
/// Returns the range for the scheme part.
#[inline]
#[must_use]
fn scheme_range(self) -> Option<RangeTo<usize>> {
self.scheme_end.map(|end| ..end.get())
}
/// Returns the scheme as a string.
#[inline]
#[must_use]
pub(crate) fn scheme_str<'a>(&self, s: &'a str) -> Option<&'a str> {
self.scheme_range().map(|range| &s[range])
}
/// Returns true if the IRI has a scheme part, false otherwise.
#[inline]
#[must_use]
pub(crate) fn has_scheme(&self) -> bool {
self.scheme_end.is_some()
}
/// Returns the range for the authority part.
#[inline]
#[must_use]
fn authority_range(self) -> Option<Range<usize>> {
let end = self.authority_end?.get();
// 2: "//".len()
// +3: "://".len()
let start = self.scheme_end.map_or(2, |v| v.get() + 3);
Some(start..end)
}
/// Returns the authority as a string.
#[inline]
#[must_use]
pub(crate) fn authority_str<'a>(&self, s: &'a str) -> Option<&'a str> {
self.authority_range().map(|range| &s[range])
}
/// Returns true if the IRI has an authority part, false otherwise.
#[inline]
#[must_use]
pub(crate) fn has_authority(&self) -> bool {
self.authority_end.is_some()
}
/// Returns the range for the path part.
#[inline]
#[must_use]
fn path_range(self, full_len: usize) -> Range<usize> {
// -1: "?".len() and "#".len()
let end = self
.query_start
.or(self.fragment_start)
.map_or(full_len, |v| v.get() - 1);
let start = self.authority_end.map_or_else(
// +1: ":".len()
|| self.scheme_end.map_or(0, |v| v.get() + 1),
NonZeroUsize::get,
);
start..end
}
/// Returns the path as a string.
#[inline]
#[must_use]
pub(crate) fn path_str<'a>(&self, s: &'a str) -> &'a str {
&s[self.path_range(s.len())]
}
/// Returns true if the path part of the IRI is empty.
#[inline]
#[must_use]
pub(crate) fn is_path_empty(&self, full_len: usize) -> bool {
self.path_range(full_len).is_empty()
}
/// Returns the range for the query part excluding a prefix `?`.
#[inline]
#[must_use]
fn query_range(self, full_len: usize) -> Option<Range<usize>> {
let start = self.query_start?.get();
// -1: "#".len()
let end = self.fragment_start.map_or(full_len, |v| v.get() - 1);
Some(start..end)
}
/// Returns the query as a string.
#[inline]
#[must_use]
pub(crate) fn query_str<'a>(&self, s: &'a str) -> Option<&'a str> {
self.query_range(s.len()).map(|range| &s[range])
}
/// Returns true if the IRI has a query part, false otherwise.
#[inline]
#[must_use]
pub(crate) fn has_query(&self) -> bool {
self.query_start.is_some()
}
/// Returns the range for the fragment part excluding a prefix `#`.
#[inline]
#[must_use]
pub(crate) fn fragment_range(self) -> Option<RangeFrom<usize>> {
self.fragment_start.map(|v| v.get()..)
}
/// Returns the fragment as a string.
#[inline]
#[must_use]
pub(crate) fn fragment_str<'a>(&self, s: &'a str) -> Option<&'a str> {
self.fragment_range().map(|range| &s[range])
}
}
/// Components of an IRI reference.
///
/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.2.2>.
#[derive(Debug, Clone, Copy)]
pub(crate) struct RiReferenceComponents<'a, S: Spec> {
/// Original complete string.
pub(crate) iri: &'a RiReferenceStr<S>,
/// Positions to split the IRI into components.
pub(crate) splitter: Splitter,
}
impl<'a, S: Spec> RiReferenceComponents<'a, S> {
/// Returns five major components: scheme, authority, path, query, and fragment.
#[inline]
#[must_use]
pub(crate) fn to_major(
self,
) -> (
Option<&'a str>,
Option<&'a str>,
&'a str,
Option<&'a str>,
Option<&'a str>,
) {
self.splitter.split_into_major(self.iri.as_str())
}
/// Returns the IRI reference.
#[inline]
#[must_use]
pub(crate) fn iri(&self) -> &'a RiReferenceStr<S> {
self.iri
}
/// Returns the scheme as a string.
#[inline]
#[must_use]
pub(crate) fn scheme_str(&self) -> Option<&str> {
self.splitter.scheme_str(self.iri.as_str())
}
/// Returns the authority as a string.
#[inline]
#[must_use]
pub(crate) fn authority_str(&self) -> Option<&str> {
self.splitter.authority_str(self.iri.as_str())
}
/// Returns the path as a string.
#[inline]
#[must_use]
pub(crate) fn path_str(&self) -> &str {
self.splitter.path_str(self.iri.as_str())
}
/// Returns the query as a string.
#[inline]
#[must_use]
pub(crate) fn query_str(&self) -> Option<&str> {
self.splitter.query_str(self.iri.as_str())
}
}
impl<'a, S: Spec> From<&'a RiReferenceStr<S>> for RiReferenceComponents<'a, S> {
#[inline]
fn from(s: &'a RiReferenceStr<S>) -> Self {
trusted_parser::decompose_iri_reference(s)
}
}

View File

@@ -0,0 +1,121 @@
//! Subcomponents of authority.
use crate::parser::trusted as trusted_parser;
use crate::spec::Spec;
use crate::types::RiReferenceStr;
/// Subcomponents of authority.
///
/// This is a return type of the `authority_components` method of the string
/// types (for example [`RiStr::authority_components`].
///
/// [`RiStr::authority_components`]: `crate::types::RiStr::authority_components`
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct AuthorityComponents<'a> {
/// Authority string, excluding the leading `//`.
pub(crate) authority: &'a str,
/// Start position of the `host`.
pub(crate) host_start: usize,
/// End position of the `host`.
pub(crate) host_end: usize,
}
impl<'a> AuthorityComponents<'a> {
/// Creates a new `AuthorityComponents` from the IRI.
pub fn from_iri<S: Spec>(iri: &'a RiReferenceStr<S>) -> Option<Self> {
iri.authority_str()
.map(trusted_parser::authority::decompose_authority)
}
/// Returns the `userinfo` part, excluding the following `@`.
#[must_use]
pub fn userinfo(&self) -> Option<&'a str> {
let userinfo_at = self.host_start.checked_sub(1)?;
debug_assert_eq!(self.authority.as_bytes()[userinfo_at], b'@');
Some(&self.authority[..userinfo_at])
}
/// Returns the `host` part.
#[inline]
#[must_use]
pub fn host(&self) -> &'a str {
// NOTE: RFC 6874 support may need the internal logic to change.
&self.authority[self.host_start..self.host_end]
}
/// Returns the `port` part, excluding the following `:`.
#[must_use]
pub fn port(&self) -> Option<&'a str> {
if self.host_end == self.authority.len() {
return None;
}
let port_colon = self.host_end;
debug_assert_eq!(self.authority.as_bytes()[port_colon], b':');
Some(&self.authority[(port_colon + 1)..])
}
}
#[cfg(test)]
#[cfg(feature = "alloc")]
mod tests {
use super::*;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
use crate::types::IriReferenceStr;
const USERINFO: &[&str] = &["", "user:password", "user"];
const PORT: &[&str] = &[
"",
"0",
"0000",
"80",
"1234567890123456789012345678901234567890",
];
const HOST: &[&str] = &[
"",
"localhost",
"example.com",
"192.0.2.0",
"[2001:db8::1]",
"[2001:0db8:0:0:0:0:0:1]",
"[2001:0db8::192.0.2.255]",
"[v9999.this-is-futuristic-ip-address]",
];
fn compose_to_relative_iri(userinfo: Option<&str>, host: &str, port: Option<&str>) -> String {
let mut buf = String::from("//");
if let Some(userinfo) = userinfo {
buf.push_str(userinfo);
buf.push('@');
}
buf.push_str(host);
if let Some(port) = port {
buf.push(':');
buf.push_str(port);
}
buf
}
#[test]
fn test_decompose_authority() {
for host in HOST.iter().copied() {
for userinfo in USERINFO.iter().map(|s| Some(*s)).chain(None) {
for port in PORT.iter().map(|s| Some(*s)).chain(None) {
let authority = compose_to_relative_iri(userinfo, host, port);
let authority =
IriReferenceStr::new(&authority).expect("test case should be valid");
let components = AuthorityComponents::from_iri(authority)
.expect("relative path composed for this test should contain authority");
assert_eq!(components.host(), host);
assert_eq!(components.userinfo(), userinfo);
assert_eq!(components.port(), port);
}
}
}
}
}

291
vendor/iri-string/src/convert.rs vendored Normal file
View File

@@ -0,0 +1,291 @@
//! Conversion between URI/IRI types.
use core::fmt;
#[cfg(feature = "alloc")]
use alloc::collections::TryReserveError;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
#[cfg(feature = "alloc")]
use crate::format::{ToDedicatedString, ToStringFallible};
use crate::spec::Spec;
use crate::types::{
RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr,
};
#[cfg(feature = "alloc")]
use crate::types::{
RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, RiRelativeString,
RiString,
};
#[cfg(feature = "alloc")]
use crate::types::{
UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString, UriRelativeString,
UriString,
};
/// Hexadecimal digits for a nibble.
const HEXDIGITS: [u8; 16] = [
b'0', b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'A', b'B', b'C', b'D', b'E', b'F',
];
/// A resource identifier mapped to a URI of some kind.
///
/// Supported `Src` type are:
///
/// * IRIs:
/// + [`IriAbsoluteStr`] (alias of `RiAbsoluteStr<IriSpec>`)
/// + [`IriReferenceStr`] (alias of `RiReferenceStr<IriSpec>`)
/// + [`IriRelativeStr`] (alias of `RiRelativeStr<IriSpec>`)
/// + [`IriStr`] (alias of `RiStr<IriSpec>`)
/// * URIs:
/// + [`UriAbsoluteStr`] (alias of `RiAbsoluteStr<UriSpec>`)
/// + [`UriReferenceStr`] (alias of `RiReferenceStr<UriSpec>`)
/// + [`UriRelativeStr`] (alias of `RiRelativeStr<UriSpec>`)
/// + [`UriStr`] (alias of `RiStr<UriSpec>`)
///
/// # Examples
///
/// ```
/// use iri_string::convert::MappedToUri;
/// use iri_string::types::{IriStr, UriStr};
///
/// let src = IriStr::new("http://example.com/?alpha=\u{03B1}")?;
/// // The type is `MappedToUri<IriStr>`, but you usually don't need to specify.
/// let mapped = MappedToUri::from(src).to_string();
/// assert_eq!(mapped, "http://example.com/?alpha=%CE%B1");
/// # Ok::<_, iri_string::validate::Error>(())
/// ```
///
/// [`IriAbsoluteStr`]: crate::types::IriAbsoluteStr
/// [`IriReferenceStr`]: crate::types::IriReferenceStr
/// [`IriRelativeStr`]: crate::types::IriRelativeStr
/// [`IriStr`]: crate::types::IriStr
/// [`UriAbsoluteStr`]: crate::types::UriAbsoluteStr
/// [`UriReferenceStr`]: crate::types::UriReferenceStr
/// [`UriRelativeStr`]: crate::types::UriRelativeStr
/// [`UriStr`]: crate::types::UriStr
#[derive(Debug, Clone, Copy)]
pub struct MappedToUri<'a, Src: ?Sized>(&'a Src);
/// Implement conversions for an IRI string type.
macro_rules! impl_for_iri {
($borrowed:ident, $owned:ident, $owned_uri:ident) => {
impl<S: Spec> fmt::Display for MappedToUri<'_, $borrowed<S>> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write_percent_encoded(f, self.0.as_str())
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> ToDedicatedString for MappedToUri<'_, $borrowed<S>> {
type Target = $owned_uri;
fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> {
let s = self.try_to_string()?;
Ok(TryFrom::try_from(s)
.expect("[validity] the IRI must be encoded into a valid URI"))
}
}
impl<'a, S: Spec> From<&'a $borrowed<S>> for MappedToUri<'a, $borrowed<S>> {
#[inline]
fn from(iri: &'a $borrowed<S>) -> Self {
Self(iri)
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a $owned<S>> for MappedToUri<'a, $borrowed<S>> {
#[inline]
fn from(iri: &'a $owned<S>) -> Self {
Self(iri.as_slice())
}
}
};
}
impl_for_iri!(RiReferenceStr, RiReferenceString, UriReferenceString);
impl_for_iri!(RiStr, RiString, UriString);
impl_for_iri!(RiAbsoluteStr, RiAbsoluteString, UriAbsoluteString);
impl_for_iri!(RiRelativeStr, RiRelativeString, UriRelativeString);
impl_for_iri!(RiQueryStr, RiQueryString, UriQueryString);
impl_for_iri!(RiFragmentStr, RiFragmentString, UriFragmentString);
/// Percent-encodes and writes the IRI string using the given buffer.
fn write_percent_encoded(f: &mut fmt::Formatter<'_>, mut s: &str) -> fmt::Result {
while !s.is_empty() {
// Skip ASCII characters.
let non_ascii_pos = s.bytes().position(|b| !b.is_ascii()).unwrap_or(s.len());
let (ascii, rest) = s.split_at(non_ascii_pos);
if !ascii.is_empty() {
f.write_str(ascii)?;
s = rest;
}
if s.is_empty() {
return Ok(());
}
// Search for the next ASCII character.
let nonascii_end = s.bytes().position(|b| b.is_ascii()).unwrap_or(s.len());
let (nonasciis, rest) = s.split_at(nonascii_end);
debug_assert!(
!nonasciis.is_empty(),
"string without non-ASCII characters should have caused early return"
);
s = rest;
// Escape non-ASCII characters as percent-encoded bytes.
//
// RFC 3987 (section 3.1 step 2) says "for each character in
// 'ucschar' or 'iprivate'", but this simply means "for each
// non-ASCII characters" since any non-ASCII characters that can
// appear in an IRI match `ucschar` or `iprivate`.
/// Number of source bytes to encode at once.
const NUM_BYTES_AT_ONCE: usize = 21;
percent_encode_bytes(f, nonasciis, &mut [0_u8; NUM_BYTES_AT_ONCE * 3])?;
}
Ok(())
}
/// Percent-encode the string and pass the encoded chunks to the given function.
///
/// `buf` is used as a temporary working buffer. It is initialized by this
/// function, so users can pass any mutable byte slice with enough size.
///
/// # Precondition
///
/// The length of `buf` must be 3 bytes or more.
fn percent_encode_bytes(f: &mut fmt::Formatter<'_>, s: &str, buf: &mut [u8]) -> fmt::Result {
/// Fill the buffer by percent-encoded bytes.
///
/// Note that this function applies percent-encoding to every characters,
/// even if it is ASCII alphabet.
///
/// # Precondition
///
/// * The length of `buf` must be 3 bytes or more.
/// * All of the `buf[i * 3]` elements should already be set to `b'%'`.
// This function have many preconditions and I don't want checks for them
// to be mandatory, so make this nested inner function.
fn fill_by_percent_encoded<'a>(buf: &'a mut [u8], bytes: &mut core::str::Bytes<'_>) -> &'a str {
let src_len = bytes.len();
// `<[u8; N]>::array_chunks_mut` is unstable as of Rust 1.58.1.
for (dest, byte) in buf.chunks_exact_mut(3).zip(bytes.by_ref()) {
debug_assert_eq!(
dest.len(),
3,
"[validity] `chunks_exact()` must return a slice with the exact length"
);
debug_assert_eq!(
dest[0], b'%',
"[precondition] the buffer must be properly initialized"
);
let upper = byte >> 4;
let lower = byte & 0b1111;
dest[1] = HEXDIGITS[usize::from(upper)];
dest[2] = HEXDIGITS[usize::from(lower)];
}
let num_dest_written = (src_len - bytes.len()) * 3;
let buf_filled = &buf[..num_dest_written];
// SAFETY: `b'%'` and `HEXDIGITS[_]` are all ASCII characters, so
// `buf_filled` is filled with ASCII characters and is valid UTF-8 bytes.
unsafe {
debug_assert!(core::str::from_utf8(buf_filled).is_ok());
core::str::from_utf8_unchecked(buf_filled)
}
}
assert!(
buf.len() >= 3,
"[precondition] length of `buf` must be 3 bytes or more"
);
// Drop the elements that will never be used.
// The length to be used is always a multiple of three.
let buf_len = buf.len() / 3 * 3;
let buf = &mut buf[..buf_len];
// Fill some bytes with `%`.
// This will be vectorized by optimization (especially for long buffers),
// so no need to selectively set `buf[i * 3]`.
buf.fill(b'%');
let mut bytes = s.bytes();
// `<core::str::Bytes as ExactSizeIterator>::is_empty` is unstable as of Rust 1.58.1.
while bytes.len() != 0 {
let encoded = fill_by_percent_encoded(buf, &mut bytes);
f.write_str(encoded)?;
}
Ok(())
}
/// Percent-encodes the given IRI using the given buffer.
#[cfg(feature = "alloc")]
pub(crate) fn try_percent_encode_iri_inline(
iri: &mut String,
) -> Result<(), alloc::collections::TryReserveError> {
// Calculate the result length and extend the buffer.
let num_nonascii = count_nonascii(iri);
if num_nonascii == 0 {
// No need to escape.
return Ok(());
}
let additional = num_nonascii * 2;
iri.try_reserve(additional)?;
let src_len = iri.len();
// Temporarily take the ownership of the internal buffer.
let mut buf = core::mem::take(iri).into_bytes();
// `b'\0'` cannot appear in a valid IRI, so this default value would be
// useful in case of debugging.
buf.extend(core::iter::repeat(b'\0').take(additional));
// Fill the buffer from the tail to the head.
let mut dest_end = buf.len();
let mut src_end = src_len;
let mut rest_nonascii = num_nonascii;
while rest_nonascii > 0 {
debug_assert!(
src_end > 0,
"[validity] the source position should not overrun"
);
debug_assert!(
dest_end > 0,
"[validity] the destination position should not overrun"
);
src_end -= 1;
dest_end -= 1;
let byte = buf[src_end];
if byte.is_ascii() {
buf[dest_end] = byte;
// Use the ASCII character directly.
} else {
// Percent-encode the byte.
dest_end -= 2;
buf[dest_end] = b'%';
let upper = byte >> 4;
let lower = byte & 0b1111;
buf[dest_end + 1] = HEXDIGITS[usize::from(upper)];
buf[dest_end + 2] = HEXDIGITS[usize::from(lower)];
rest_nonascii -= 1;
}
}
// Move the result from the temporary buffer to the destination.
let s = String::from_utf8(buf).expect("[consistency] the encoding result is an ASCII string");
*iri = s;
Ok(())
}
/// Returns the number of non-ASCII characters.
#[cfg(feature = "alloc")]
#[inline]
#[must_use]
fn count_nonascii(s: &str) -> usize {
s.bytes().filter(|b| !b.is_ascii()).count()
}

209
vendor/iri-string/src/format.rs vendored Normal file
View File

@@ -0,0 +1,209 @@
//! Utilities for formatting (especially `Display` trait).
//!
//! This module contains utilities for [`Display`][`core::fmt::Display`]-able
//! types.
use core::fmt::{self, Write as _};
#[cfg(feature = "alloc")]
use alloc::collections::TryReserveError;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
/// Output buffer capacity overflow error.
#[derive(Debug, Clone, Copy)]
pub struct CapacityOverflowError;
impl fmt::Display for CapacityOverflowError {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("buffer capacity overflow")
}
}
#[cfg(feature = "std")]
impl std::error::Error for CapacityOverflowError {}
/// Writer to the bytes buffer.
struct ByteBufWriter<'b> {
/// Destination buffer.
buffer: &'b mut [u8],
/// Position to write the next string fragment.
cursor: usize,
}
impl fmt::Write for ByteBufWriter<'_> {
fn write_str(&mut self, s: &str) -> fmt::Result {
let dest = &mut self.buffer[self.cursor..];
if dest.len() < s.len() {
return Err(fmt::Error);
}
dest[..s.len()].copy_from_slice(s.as_bytes());
self.cursor += s.len();
Ok(())
}
}
/// Writes to the bytes buffer.
pub fn write_to_slice<'a, T: fmt::Display>(
buf: &'a mut [u8],
value: &T,
) -> Result<&'a str, CapacityOverflowError> {
let mut writer = ByteBufWriter {
buffer: buf,
cursor: 0,
};
if write!(writer, "{}", value).is_err() {
return Err(CapacityOverflowError);
}
let len = writer.cursor;
let result = core::str::from_utf8(&buf[..len])
.expect("[validity] fmt::Display writes valid UTF-8 byte sequence");
Ok(result)
}
/// Writer that fails (not panics) on OOM.
#[cfg(feature = "alloc")]
struct StringWriter<'a> {
/// Destination buffer.
buffer: &'a mut String,
/// Memory allocation error.
error: Option<TryReserveError>,
}
#[cfg(feature = "alloc")]
impl fmt::Write for StringWriter<'_> {
fn write_str(&mut self, s: &str) -> fmt::Result {
if self.error.is_some() {
return Err(fmt::Error);
}
if let Err(e) = self.buffer.try_reserve(s.len()) {
self.error = Some(e);
return Err(fmt::Error);
}
// This should never fail since `.try_reserve(s.len())` succeeded.
self.buffer.push_str(s);
Ok(())
}
}
/// Appends the data to the string.
///
/// When allocation failure happens, incompletely appended strings won't be
/// stripped. Callers are responsible to clean up the destination if necessary.
#[cfg(feature = "alloc")]
pub fn try_append_to_string<T: fmt::Display>(
dest: &mut String,
value: &T,
) -> Result<(), TryReserveError> {
let mut writer = StringWriter {
buffer: dest,
error: None,
};
if write!(writer, "{}", value).is_err() {
let e = writer
.error
.expect("[consistency] allocation error should be set on formatting failure");
return Err(e);
}
Ok(())
}
/// Returns true if the two equals after they are converted to strings.
pub(crate) fn eq_str_display<T>(s: &str, d: &T) -> bool
where
T: ?Sized + fmt::Display,
{
/// Dummy writer to compare the formatted object to the given string.
struct CmpWriter<'a>(&'a str);
impl fmt::Write for CmpWriter<'_> {
fn write_str(&mut self, s: &str) -> fmt::Result {
if self.0.len() < s.len() {
return Err(fmt::Error);
}
let (prefix, rest) = self.0.split_at(s.len());
self.0 = rest;
if prefix == s {
Ok(())
} else {
Err(fmt::Error)
}
}
}
let mut writer = CmpWriter(s);
let succeeded = write!(writer, "{}", d).is_ok();
succeeded && writer.0.is_empty()
}
/// A debug-printable type to hide the sensitive information.
#[derive(Clone, Copy)]
pub(crate) struct Censored;
impl core::fmt::Debug for Censored {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str("{censored}")
}
}
/// [`ToString`][`alloc::string::ToString`], but without panic.
#[cfg(feature = "alloc")]
pub trait ToStringFallible: alloc::string::ToString {
/// [`ToString::to_string`][`alloc::string::ToString::to_string`], but without panic on OOM.
fn try_to_string(&self) -> Result<String, TryReserveError>;
}
#[cfg(feature = "alloc")]
impl<T: fmt::Display> ToStringFallible for T {
/// [`ToString::to_string`][`alloc::string::ToString::to_string`], but without panic on OOM.
#[inline]
fn try_to_string(&self) -> Result<String, TryReserveError> {
let mut buf = String::new();
try_append_to_string(&mut buf, self)?;
Ok(buf)
}
}
/// A trait for types that can be converted to a dedicated allocated string types.
#[cfg(feature = "alloc")]
pub trait ToDedicatedString {
/// Conversion target type.
type Target;
/// Converts the value to the allocated string.
fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError>;
/// Converts the value to the allocated string.
///
/// # Panics
///
/// Panics if memory allocation error occured.
#[inline]
#[must_use]
fn to_dedicated_string(&self) -> Self::Target {
self.try_to_dedicated_string()
.expect("failed to allocate enough memory")
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn eq_str_display_1() {
assert!(eq_str_display("hello", "hello"));
assert!(eq_str_display("42", &42));
assert!(eq_str_display(
r#"\x00\t\r\n\xff\\"#,
&b"\x00\t\r\n\xff\\".escape_ascii()
));
assert!(!eq_str_display("hello", "world"));
assert!(!eq_str_display("hello world", "hello"));
assert!(!eq_str_display("hello", "hello world"));
assert!(!eq_str_display("42", &4));
assert!(!eq_str_display("4", &42));
}
}

159
vendor/iri-string/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,159 @@
//! String types for [RFC 3987 Internationalized Resource Identifiers (IRIs)][RFC 3987] and
//! [RFC 3986 Uniform Resource Identifiers (URIs)][RFC 3986].
//!
//! Note that this crate does not have any extra knowledge about protocols.
//! Comparisons between IRI strings by `PartialEq` and `Eq` is implemented as [simple string
//! comparison](https://www.rfc-editor.org/rfc/rfc3986.html#section-6.2.1).
//! You should implement by yourself or use another crate to use such extra knowledge to compare
//! IRIs / URIs.
//!
//! # Capability
//!
//! This crate provides many features for IRIs / URIs.
//!
//! ## String types
//!
//! [`types` module][`types`] module provides various string types for IRIs and URIs.
//! The borrowed string types are unsized slice types (such as `[u8]` and `str`)
//! and not a sized struct, so they are highly interoperable with for example
//! `Cow` and `Rc`. Conversions between `&str` and borrwed IRI string types are easy.
//!
//! ## Resolvers
//!
//! [`resolve` module][`resolve`] provides IRI / URI references resolver.
//! However, you are recommended to use methods of string types such as
//! [`RiReferenceStr::resolve_against()`] or [`RiRelativeStr::resolve_against()`]
//! if you don't intend to resolve multiple IRIs against the same base.
//!
//! ## Validators
//!
//! Validator functions are provided from [`validate` module][`validate`].
//!
//! ## Percent encoding
//!
//! [`percent_encode` module][`percent_encode`] provides a converter to encode
//! user-provided string into percent-encoded one (if syntax requires so).
//!
//! ## IRI builder
//!
//! [`build` module][`build`] provides IRI builder.
//!
//! ## URI template (RFC 6570)
//!
//! [`template` module][`template`] provides an RFC 6570 URI Template processor.
//!
//! # Feature flags
//!
//! ## `std` and `alloc` support
//!
//! This crate supports `no_std` usage.
//!
//! * `alloc` feature:
//! + Std library or `alloc` crate is required.
//! + This feature enables types and functions which require memory allocation,
//! e.g. `types::IriString` and `types::IriRelativeStr::resolve_against()`.
//! * `std` feature (**enabled by default**):
//! + Std library is required.
//! + This automatically enables `alloc` feature.
//! + The feature let the crate utilize std-specific stuff, such as `std::error::Error` trait.
//! * With neither of them:
//! + The crate can be used in `no_std` environment.
//!
//! ## Other features
//!
//! * `serde`
//! + Enables serde support.
//! + Implement `Serailize` and `Deserialize` traits for IRI / URI types.
//! * `memchr`
//! + Enables faster internal character search.
//!
//! # Rationale
//!
//! ## `foo:`, `foo:/`, `foo://`, `foo:///`, `foo:////`, ... are valid IRIs
//!
//! All of these are valid IRIs.
//! (On the other hand, all of them are invalid as relative IRI reference, because they don't
//! match `relative-part` rule, especially `path-noscheme`, as the first path component of the
//! relative path contains a colon.)
//!
//! * `foo:`
//! + Decomposed to `<scheme="foo">:<path-empty="">`.
//! * `foo:/`
//! + Decomposed to `<scheme="foo">:<path-absolute="/">`.
//! * `foo://`
//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="">`.
//! * `foo:///`
//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="/">`.
//! * `foo:////`
//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="//">`.
//! * `foo://///`
//! + Decomposed to `<scheme="foo">://<authority=""><path-absolute="///">`.
//!
//! RFC 3986 says that "if authority is absent, path cannot start with `//`".
//!
//! > When authority is present, the path must either be empty or begin with a slash ("/")
//! > character. When authority is not present, the path cannot begin with two slash characters
//! > ("//").
//! >
//! > --- [RFC 3986, section 3. Syntax Components](https://www.rfc-editor.org/rfc/rfc3986.html#section-3).
//!
//! > If a URI contains an authority component, then the path component must either be empty or
//! > begin with a slash ("/") character. If a URI does not contain an authority component, then the
//! > path cannot begin with two slash characters ("//").
//! >
//! > --- [RFC 3986, section 3.3. Path](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3)
//!
//! We should interpret them as "if `authority` rule is completely unused (i.e. does not match any
//! strings **including empty string**), path cannot start with `//`".
//! In other words, we should consider this as **explaining the ABNF of `hier-part` rule**
//! (especially why it does not use `path` rule), but **not adding extra restriction to the rule
//! written in ABNF**.
//!
//! This restriction is necessary to remove ambiguity in decomposition of some strings.
//! For example, it is natural to decompose `foo://` to `<scheme="foo">:<path="//">` or
//! `<scheme="foo">://<authority=""><path="">`.
//! The restriction, **which is already encoded to the ABNF rule**, tells us to always decompose to
//! the latter form, rather than the former one.
//!
//! Readers of the spec might be confused by "when authority is **present**" and "if a URI
//! **contains** an authority component, which is unclear.
//! However, based on the interpretation above, we should consider authority part with empty string
//! as satisfying the condition "authority is **present**".
//!
//! ## IRI resolution can fail
//!
//! For some inputs, resulting string of IRI normalization and resolution can be syntactically
//! correct but semantically wrong. In such cases, the normalizer and resolver provided by this
//! crate do not silently "fix" the IRI by non-standard processing, but just
//! fail by returning `Err(_)`.
//!
//! For details, see the documentation of [`normalize`] module.
//!
//! [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
//! [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
//! [`RiReferenceStr::resolve_against()`]: `types::RiReferenceStr::resolve_against`
//! [`RiRelativeStr::resolve_against()`]: `types::RiRelativeStr::resolve_against`
#![warn(missing_docs)]
#![warn(unsafe_op_in_unsafe_fn)]
#![warn(clippy::missing_docs_in_private_items)]
#![warn(clippy::undocumented_unsafe_blocks)]
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#[cfg(feature = "alloc")]
extern crate alloc;
pub mod build;
pub mod components;
pub mod convert;
pub mod format;
pub mod mask_password;
pub mod normalize;
pub(crate) mod parser;
pub mod percent_encode;
pub(crate) mod raw;
pub mod resolve;
pub mod spec;
pub mod template;
pub mod types;
pub mod validate;

298
vendor/iri-string/src/mask_password.rs vendored Normal file
View File

@@ -0,0 +1,298 @@
//! Password masker.
use core::fmt::{self, Write as _};
use core::ops::Range;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::borrow::ToOwned;
#[cfg(feature = "alloc")]
use alloc::collections::TryReserveError;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
use crate::components::AuthorityComponents;
#[cfg(feature = "alloc")]
use crate::format::ToDedicatedString;
use crate::spec::Spec;
use crate::types::{RiAbsoluteStr, RiReferenceStr, RiRelativeStr, RiStr};
#[cfg(feature = "alloc")]
use crate::types::{RiAbsoluteString, RiReferenceString, RiRelativeString, RiString};
/// Returns the range of the password to hide.
pub(crate) fn password_range_to_hide<S: Spec>(iri: &RiReferenceStr<S>) -> Option<Range<usize>> {
/// Spec-agnostic internal implementation of `password_range_to_hide`.
fn inner(iri: &str, userinfo: &str) -> Option<Range<usize>> {
// Length (including `//`) before the `authority` compontent.
// 2: `"//".len()`.
let authority_start = 2 + iri
.find("//")
.expect("[validity] `authority` component must be prefixed with `//`");
let end = authority_start + userinfo.len();
let start = authority_start + userinfo.find(':').map_or_else(|| userinfo.len(), |v| v + 1);
Some(start..end)
}
let authority_components = AuthorityComponents::from_iri(iri)?;
let userinfo = authority_components.userinfo()?;
inner(iri.as_str(), userinfo)
}
/// Writes the URI with the password part replaced.
fn write_with_masked_password<D>(
f: &mut fmt::Formatter<'_>,
s: &str,
pw_range: Range<usize>,
alt: &D,
) -> fmt::Result
where
D: ?Sized + fmt::Display,
{
debug_assert!(
s.len() >= pw_range.end,
"[consistency] password range must be inside the IRI"
);
f.write_str(&s[..pw_range.start])?;
alt.fmt(f)?;
f.write_str(&s[pw_range.end..])?;
Ok(())
}
/// Writes an IRI with the password part trimmed.
fn write_trim_password(f: &mut fmt::Formatter<'_>, s: &str, pw_range: Range<usize>) -> fmt::Result {
write_with_masked_password(f, s, pw_range, "")
}
/// A wrapper of an IRI string that masks the non-empty password when `Display`ed.
///
/// This is a retrun type of `mask_password` method of IRI string types (such as
/// [`RiStr::mask_password`]).
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::UriReferenceStr;
///
/// let iri = UriReferenceStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
/// assert_eq!(masked.to_string(), "http://user:@example.com/path?query");
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "http://user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [`RiStr::mask_password`]: `crate::types::RiStr::mask_password`
#[derive(Clone, Copy)]
pub struct PasswordMasked<'a, T: ?Sized> {
/// IRI reference.
iri_ref: &'a T,
}
impl<'a, T: ?Sized> PasswordMasked<'a, T> {
/// Creates a new `PasswordMasked` object.
#[inline]
#[must_use]
pub(crate) fn new(iri_ref: &'a T) -> Self {
Self { iri_ref }
}
}
/// Implements traits for `PasswordMasked`.
macro_rules! impl_mask {
($borrowed:ident, $owned:ident) => {
impl<'a, S: Spec> PasswordMasked<'a, $borrowed<S>> {
/// Replaces the password with the given arbitrary content.
///
/// Note that the result might be invalid as an IRI since arbitrary string
/// can go to the place of the password.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "http://user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn replace_password<D>(&self, alt: D) -> PasswordReplaced<'a, $borrowed<S>, D>
where
D: fmt::Display,
{
PasswordReplaced::with_replacer(self.iri_ref, move |_| alt)
}
/// Replaces the password with the given arbitrary content.
///
/// Note that the result might be invalid as an IRI since arbitrary string
/// can go to the place of the password.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
///
/// let replaced = masked
/// .replace_password_with(|password| format!("{{{} chars}}", password.len()));
/// assert_eq!(
/// replaced.to_string(),
/// "http://user:{8 chars}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn replace_password_with<F, D>(
&self,
replace: F,
) -> PasswordReplaced<'a, $borrowed<S>, D>
where
F: FnOnce(&str) -> D,
D: fmt::Display,
{
PasswordReplaced::with_replacer(self.iri_ref, replace)
}
}
impl<S: Spec> fmt::Display for PasswordMasked<'_, $borrowed<S>> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match password_range_to_hide(self.iri_ref.as_ref()) {
Some(pw_range) => write_trim_password(f, self.iri_ref.as_str(), pw_range),
None => self.iri_ref.fmt(f),
}
}
}
impl<S: Spec> fmt::Debug for PasswordMasked<'_, $borrowed<S>> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_char('<')?;
fmt::Display::fmt(self, f)?;
f.write_char('>')
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> ToDedicatedString for PasswordMasked<'_, $borrowed<S>> {
type Target = $owned<S>;
fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> {
let pw_range = match password_range_to_hide(self.iri_ref.as_ref()) {
Some(pw_range) => pw_range,
None => return Ok(self.iri_ref.to_owned()),
};
let mut s = String::new();
let iri_ref = self.iri_ref.as_str();
s.try_reserve(iri_ref.len() - (pw_range.end - pw_range.start))?;
s.push_str(&iri_ref[..pw_range.start]);
s.push_str(&iri_ref[pw_range.end..]);
// SAFETY: IRI remains valid and type does not change if
// the password is trimmed.
let iri = unsafe { <$owned<S>>::new_maybe_unchecked(s) };
Ok(iri)
}
}
};
}
impl_mask!(RiReferenceStr, RiReferenceString);
impl_mask!(RiStr, RiString);
impl_mask!(RiAbsoluteStr, RiAbsoluteString);
impl_mask!(RiRelativeStr, RiRelativeString);
/// A wrapper of an IRI string that replaces the non-empty password when `Display`ed.
///
/// This is a retrun type of `mask_password` method of IRI string types (such as
/// [`RiStr::mask_password`]).
///
/// Note that the result might be invalid as an IRI since arbitrary string can
/// go to the place of the password.
#[cfg_attr(
feature = "alloc",
doc = "Because of this, [`ToDedicatedString`] trait is not implemented for this type."
)]
///
/// [`PasswordMasked::replace_password`]: `PasswordMasked::replace_password`
pub struct PasswordReplaced<'a, T: ?Sized, D> {
/// IRI reference.
iri_ref: &'a T,
/// Password range and alternative content.
password: Option<(Range<usize>, D)>,
}
impl<'a, T, D> PasswordReplaced<'a, T, D>
where
T: ?Sized,
D: fmt::Display,
{
/// Creates a new `PasswordMasked` object.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn with_replacer<S, F>(iri_ref: &'a T, replace: F) -> Self
where
S: Spec,
T: AsRef<RiReferenceStr<S>>,
F: FnOnce(&str) -> D,
{
let iri_ref_asref = iri_ref.as_ref();
let password = password_range_to_hide(iri_ref_asref)
.map(move |pw_range| (pw_range.clone(), replace(&iri_ref_asref.as_str()[pw_range])));
Self { iri_ref, password }
}
}
/// Implements traits for `PasswordReplaced`.
macro_rules! impl_replace {
($borrowed:ident, $owned:ident) => {
impl<S: Spec, D: fmt::Display> fmt::Display for PasswordReplaced<'_, $borrowed<S>, D> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match &self.password {
Some((pw_range, alt)) => {
write_with_masked_password(f, self.iri_ref.as_str(), pw_range.clone(), alt)
}
None => self.iri_ref.fmt(f),
}
}
}
impl<S: Spec, D: fmt::Display> fmt::Debug for PasswordReplaced<'_, $borrowed<S>, D> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_char('<')?;
fmt::Display::fmt(self, f)?;
f.write_char('>')
}
}
};
}
impl_replace!(RiReferenceStr, RiReferenceString);
impl_replace!(RiStr, RiString);
impl_replace!(RiAbsoluteStr, RiAbsoluteString);
impl_replace!(RiRelativeStr, RiRelativeString);

691
vendor/iri-string/src/normalize.rs vendored Normal file
View File

@@ -0,0 +1,691 @@
//! Normalization.
//!
//! # IRI normalization (and resolution) can fail
//!
//! Though this is not explicitly stated in RFC 3986, IRI normalization can fail.
//! For example, `foo:.///bar`, `foo:./..//bar`, and `foo:/..//bar` are all
//! normalized to `foo://bar` as a string. However, IRI without authority (note
//! that this is different from "with empty authority") cannot have a path
//! starting with `//`, since it is ambiguous and can be interpreted as an IRI
//! with authority. So, `foo://bar` is decomposed as scheme `foo`, authority
//! `bar`, and empty path. The expected result is the combination of scheme
//! `foo`, no authority, and path `//bar` (though this is not possible to
//! serialize), so the algorithm fails as it cannot return the intended result.
//!
//! IRI resolution can also fail since it (conditionally) invokes normalization
//! during the resolution process. For example, resolving a reference `.///bar`
//! or `/..//bar` against the base `foo:` fail.
//!
//! Thus, IRI resolution can fail for some abnormal cases.
//!
//! Note that this kind of failure can happen only when the base IRI has no
//! authority and empty path. This would be rare in the wild, since many people
//! would use an IRI with authority part, such as `http://`.
//!
//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the
//! failure. Currently no cases are known to fail when at least one of the base
//! IRI or the relative IRI contains authorities.
//!
//! To know what will happen on resolution failure, see the module documentation
//! for [`resolve`][`crate::resolve`].
//!
//! ## Examples
//!
//! ### Normalization failure
//!
//! ```
//! # #[cfg(feature = "alloc")] {
//! use iri_string::normalize::Error;
//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
//!
//! let base = IriAbsoluteStr::new("foo:.///bar")?;
//! assert!(
//! base.normalize().ensure_rfc3986_normalizable().is_err(),
//! "this normalization should fails without WAHTWG URL Standard serialization"
//! );
//! # }
//! # Ok::<_, iri_string::validate::Error>(())
//! ```
//!
//! ### Resolution failure
//!
//! ```
//! # #[cfg(feature = "alloc")] {
//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
//!
//! let base = IriAbsoluteStr::new("scheme:")?;
//! {
//! let reference = IriReferenceStr::new(".///bar")?;
//! let result = reference.resolve_against(base)
//! .ensure_rfc3986_normalizable();
//! assert!(result.is_err());
//! }
//!
//! {
//! let reference2 = IriReferenceStr::new("/..//bar")?;
//! // Resulting string will be `scheme://bar`, but `bar` should be a path
//! // segment, not a host. So, the semantically correct target IRI cannot
//! // be represented.
//! let result2 = reference2.resolve_against(base)
//! .ensure_rfc3986_normalizable();
//! assert!(result2.is_err());
//! }
//! # }
//! # Ok::<_, iri_string::validate::Error>(())
//! ```
mod error;
mod path;
mod pct_case;
use core::fmt::{self, Display as _, Write as _};
use core::marker::PhantomData;
#[cfg(feature = "alloc")]
use alloc::collections::TryReserveError;
use crate::components::{RiReferenceComponents, Splitter};
#[cfg(feature = "alloc")]
use crate::format::{ToDedicatedString, ToStringFallible};
use crate::parser::str::rfind_split_hole;
use crate::parser::trusted::is_ascii_only_host;
use crate::spec::Spec;
use crate::types::{RiAbsoluteStr, RiReferenceStr, RiStr};
#[cfg(feature = "alloc")]
use crate::types::{RiAbsoluteString, RiString};
pub use self::error::Error;
pub(crate) use self::path::{Path, PathCharacteristic, PathToNormalize};
pub(crate) use self::pct_case::{
is_pct_case_normalized, NormalizedAsciiOnlyHost, PctCaseNormalized,
};
/// Normalization algorithm.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum NormalizationMode {
/// No normalization.
None,
/// Default normalization mode.
///
/// Applies RFC 3986 normalization whenever possible. When not possible,
/// applies serialization algorithm defined in WHATWG URL standard.
Default,
/// WHATWG-like normalization mode.
///
/// Preserves relative path as is (modulo case/pct normalization) when the
/// authority component is absent.
PreserveAuthoritylessRelativePath,
}
impl NormalizationMode {
/// Returns true if case normalization and percent-encoding normalization should be applied.
///
/// Note that even when this option is `true`, plain US-ASCII characters
/// won't be automatically lowered. Users should apply case normalization
/// for US-ASCII only `host` component by themselves.
#[inline]
#[must_use]
fn case_pct_normalization(self) -> bool {
match self {
Self::None => false,
Self::Default | Self::PreserveAuthoritylessRelativePath => true,
}
}
}
/// Normalizedness check algorithm.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum NormalizednessCheckMode {
/// Default algorithm (corresponding to [`NormalizationMode::Default`]).
Default,
/// Strict RFC 3986 normalization.
Rfc3986,
/// WHATWG-like normalization algorithm (corresponding to
/// [`NormalizationMode::PreserveAuthoritylessRelativePath`]).
PreserveAuthoritylessRelativePath,
}
/// Normalization operation.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct NormalizationOp {
/// Normalization mode.
pub(crate) mode: NormalizationMode,
}
/// Spec-agnostic IRI normalization/resolution input.
#[derive(Debug, Clone, Copy)]
pub(crate) struct NormalizationInput<'a> {
/// Target scheme.
scheme: &'a str,
/// Target authority.
authority: Option<&'a str>,
/// Target path without dot-removal.
path: Path<'a>,
/// Target query.
query: Option<&'a str>,
/// Target fragment.
fragment: Option<&'a str>,
/// Normalization type.
op: NormalizationOp,
}
impl<'a> NormalizationInput<'a> {
/// Creates a `NormalizedInput` from IRIs to resolve.
#[inline]
#[must_use]
pub(crate) fn with_resolution_params<S: Spec>(
base_components: &RiReferenceComponents<'a, S>,
reference: &'a RiReferenceStr<S>,
) -> Self {
let r = RiReferenceComponents::from(reference);
Self::create_normalization_input(
r.iri.as_str(),
&r.splitter,
base_components.iri.as_str(),
&base_components.splitter,
)
}
/// Creates a `NormalizationInput` from components to resolve an IRI.
#[must_use]
fn create_normalization_input(
r_iri: &'a str,
r: &Splitter,
b_iri: &'a str,
b: &Splitter,
) -> Self {
/// The toplevel component the reference has.
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum RefToplevel {
/// Scheme.
Scheme,
/// Authority.
Authority,
/// Path.
Path,
/// Query.
Query,
/// Reference is empty or has only fragment.
None,
}
impl RefToplevel {
/// Choose a component from either of the reference or the base,
/// based on the toplevel component of the reference.
#[inline]
#[must_use]
fn choose_then<T, F, G>(self, component: RefToplevel, reference: F, base: G) -> T
where
F: FnOnce() -> T,
G: FnOnce() -> T,
{
if self <= component {
reference()
} else {
base()
}
}
}
let ref_toplevel = if r.has_scheme() {
RefToplevel::Scheme
} else if r.has_authority() {
RefToplevel::Authority
} else if !r.is_path_empty(r_iri.len()) {
RefToplevel::Path
} else if r.has_query() {
RefToplevel::Query
} else {
RefToplevel::None
};
let path = match ref_toplevel {
RefToplevel::Scheme | RefToplevel::Authority => {
Path::NeedsProcessing(PathToNormalize::from_single_path(r.path_str(r_iri)))
}
RefToplevel::Path => {
let r_path = r.path_str(r_iri);
if r_path.starts_with('/') {
Path::NeedsProcessing(PathToNormalize::from_single_path(r_path))
} else {
// About this branch, see
// <https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.3>.
//
// > o If the base URI has a defined authority component and an empty
// > path, then return a string consisting of "/" concatenated with the
// > reference's path; otherwise,
let b_path = b.path_str(b_iri);
let b_path = if b.has_authority() && b_path.is_empty() {
"/"
} else {
b_path
};
Path::NeedsProcessing(PathToNormalize::from_paths_to_be_resolved(
b_path, r_path,
))
}
}
RefToplevel::Query | RefToplevel::None => Path::Done(b.path_str(b_iri)),
};
Self {
scheme: r.scheme_str(r_iri).unwrap_or_else(|| {
b.scheme_str(b_iri)
.expect("[validity] non-relative IRI must have a scheme")
}),
authority: ref_toplevel.choose_then(
RefToplevel::Authority,
|| r.authority_str(r_iri),
|| b.authority_str(b_iri),
),
path,
query: ref_toplevel.choose_then(
RefToplevel::Query,
|| r.query_str(r_iri),
|| b.query_str(b_iri),
),
fragment: r.fragment_str(r_iri),
op: NormalizationOp {
mode: NormalizationMode::None,
},
}
}
}
impl<'a, S: Spec> From<&'a RiStr<S>> for NormalizationInput<'a> {
fn from(iri: &'a RiStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
NormalizationInput {
scheme,
authority,
path,
query,
fragment,
op: NormalizationOp {
mode: NormalizationMode::None,
},
}
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiString<S>> for NormalizationInput<'a> {
#[inline]
fn from(iri: &'a RiString<S>) -> Self {
Self::from(iri.as_slice())
}
}
impl<'a, S: Spec> From<&'a RiAbsoluteStr<S>> for NormalizationInput<'a> {
fn from(iri: &'a RiAbsoluteStr<S>) -> Self {
let components = RiReferenceComponents::<S>::from(iri.as_ref());
let (scheme, authority, path, query, fragment) = components.to_major();
let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`");
let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path));
NormalizationInput {
scheme,
authority,
path,
query,
fragment,
op: NormalizationOp {
mode: NormalizationMode::None,
},
}
}
}
#[cfg(feature = "alloc")]
impl<'a, S: Spec> From<&'a RiAbsoluteString<S>> for NormalizationInput<'a> {
#[inline]
fn from(iri: &'a RiAbsoluteString<S>) -> Self {
Self::from(iri.as_slice())
}
}
impl NormalizationInput<'_> {
/// Checks if the path is normalizable by RFC 3986 algorithm.
///
/// Returns `Ok(())` when normalizable, returns `Err(_)` if not.
pub(crate) fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> {
if self.authority.is_some() {
return Ok(());
}
match self.path {
Path::Done(_) => Ok(()),
Path::NeedsProcessing(path) => path.ensure_rfc3986_normalizable_with_authority_absent(),
}
}
}
/// Writable as a normalized IRI.
///
/// Note that this implicitly apply serialization rule defined by WHATWG URL
/// Standard (to handle normalization impossible by RFC 3986) because `Display`
/// should not fail by reasons other than backend I/O failure. If you make the
/// normalization fail in such cases, check if the path starts with `/./`.
/// When the normalization succeeds by RFC 3986 algorithm, the path never starts
/// with `/./`.
struct NormalizedInner<'a, S> {
/// Spec-agnostic normalization input.
input: NormalizationInput<'a>,
/// Spec.
_spec: PhantomData<fn() -> S>,
}
impl<S: Spec> fmt::Debug for NormalizedInner<'_, S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Normalized")
.field("input", &self.input)
.finish()
}
}
impl<'a, S: Spec> NormalizedInner<'a, S> {
/// Creates a new `Normalized` object from the given input.
#[inline]
#[must_use]
fn from_input(input: NormalizationInput<'a>) -> Self {
Self {
input,
_spec: PhantomData,
}
}
}
impl<S: Spec> fmt::Display for NormalizedInner<'_, S> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Write the scheme.
if self.input.op.mode.case_pct_normalization() {
normalize_scheme(f, self.input.scheme)?;
} else {
f.write_str(self.input.scheme)?;
}
f.write_str(":")?;
// Write the authority if available.
if let Some(authority) = self.input.authority {
f.write_str("//")?;
if self.input.op.mode.case_pct_normalization() {
normalize_authority::<S>(f, authority)?;
} else {
// No case/pct normalization.
f.write_str(authority)?;
}
}
// Process and write the path.
match self.input.path {
Path::Done(s) => {
if self.input.op.mode.case_pct_normalization() {
// Normalize the path.
PathToNormalize::from_single_path(s).fmt_write_normalize::<S, _>(
f,
self.input.op,
self.input.authority.is_some(),
)?
} else {
// No normalization.
f.write_str(s)?
}
}
Path::NeedsProcessing(path) => {
path.fmt_write_normalize::<S, _>(f, self.input.op, self.input.authority.is_some())?
}
}
// Write the query if available.
if let Some(query) = self.input.query {
f.write_char('?')?;
if self.input.op.mode.case_pct_normalization() {
normalize_query::<S>(f, query)?;
} else {
f.write_str(query)?;
}
}
// Write the fragment if available.
if let Some(fragment) = self.input.fragment {
f.write_char('#')?;
if self.input.op.mode.case_pct_normalization() {
normalize_fragment::<S>(f, fragment)?;
} else {
f.write_str(fragment)?;
}
}
Ok(())
}
}
/// Writes the normalized scheme.
pub(crate) fn normalize_scheme(f: &mut fmt::Formatter<'_>, scheme: &str) -> fmt::Result {
// Apply case normalization.
//
// > namely, that the scheme and US-ASCII only host are case
// > insensitive and therefore should be normalized to lowercase.
// >
// > --- <https://datatracker.ietf.org/doc/html/rfc3987#section-5.3.2.1>.
//
// Note that `scheme` consists of only ASCII characters and contains
// no percent-encoded characters.
scheme
.chars()
.map(|c| c.to_ascii_lowercase())
.try_for_each(|c| f.write_char(c))
}
/// Writes the normalized authority.
fn normalize_authority<S: Spec>(f: &mut fmt::Formatter<'_>, authority: &str) -> fmt::Result {
let host_port = match rfind_split_hole(authority, b'@') {
Some((userinfo, host_port)) => {
// Don't lowercase `userinfo` even if it is ASCII only. `userinfo`
// is not a part of `host`.
PctCaseNormalized::<S>::new(userinfo).fmt(f)?;
f.write_char('@')?;
host_port
}
None => authority,
};
normalize_host_port::<S>(f, host_port)
}
/// Writes the normalized host and port.
pub(crate) fn normalize_host_port<S: Spec>(
f: &mut fmt::Formatter<'_>,
host_port: &str,
) -> fmt::Result {
// If the suffix is a colon, it is a delimiter between the host and empty
// port. An empty port should be removed during normalization (see RFC 3986
// section 3.2.3), so strip it.
//
// > URI producers and normalizers should omit the port component and its
// > ":" delimiter if port is empty or if its value would be the same as
// > that of the scheme's default.
// >
// > --- [RFC 3986 section 3.2.3. Port](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.3)
let host_port = host_port.strip_suffix(':').unwrap_or(host_port);
// Apply case normalization and percent-encoding normalization to `host`.
// Optional `":" port` part only consists of an ASCII colon and ASCII
// digits, so this won't affect to the test result.
if is_ascii_only_host(host_port) {
// If the host is ASCII characters only, make plain alphabets lower case.
NormalizedAsciiOnlyHost::new(host_port).fmt(f)
} else {
PctCaseNormalized::<S>::new(host_port).fmt(f)
}
}
/// Writes the normalized query without the '?' prefix.
pub(crate) fn normalize_query<S: Spec>(f: &mut fmt::Formatter<'_>, query: &str) -> fmt::Result {
// Apply percent-encoding normalization.
PctCaseNormalized::<S>::new(query).fmt(f)
}
/// Writes the normalized query without the '#' prefix.
pub(crate) fn normalize_fragment<S: Spec>(
f: &mut fmt::Formatter<'_>,
fragment: &str,
) -> fmt::Result {
// Apply percent-encoding normalization.
PctCaseNormalized::<S>::new(fragment).fmt(f)
}
/// Normalized OR resolved IRI.
///
/// Resolved IRI can be represented by this type. In that case, the result might
/// not be normalized. If you want the IRI resolution result to be normalized,
/// use [`enable_normalization`][`Self::enable_normalization`] method.
///
/// [`Display`]: `core::fmt::Display`
pub struct Normalized<'a, T: ?Sized> {
/// Spec-agnostic normalization input.
input: NormalizationInput<'a>,
/// Expected result type.
_ty_str: PhantomData<fn() -> T>,
}
impl<T: ?Sized> fmt::Debug for Normalized<'_, T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Normalized")
.field("input", &self.input)
.finish()
}
}
impl<'a, T: ?Sized> Normalized<'a, T> {
/// Creates a new `Normalized` object from the given input.
#[inline]
#[must_use]
pub(crate) fn from_input(input: NormalizationInput<'a>) -> Self {
Self {
input,
_ty_str: PhantomData,
}
}
/// Enables the normalization.
///
/// This lets the normalizer apply the case normalization, percent-encoding
/// normalization, and dot segments removal.
#[inline]
pub fn enable_normalization(&mut self) {
self.input.op.mode = NormalizationMode::Default;
}
/// Enables the normalization that preserve relative path under some condition.
///
/// Note that this normalization algorithm is not compatible with RFC 3986
/// algorithm for some inputs.
///
/// See [`RiStr::normalize_but_preserve_authorityless_relative_path()`]
/// for detail.
#[inline]
pub fn enable_normalization_preserving_authorityless_relative_path(&mut self) {
self.input.op.mode = NormalizationMode::PreserveAuthoritylessRelativePath;
}
/// Returns `Self` with normalization enabled.
#[inline]
#[must_use]
pub fn and_normalize(mut self) -> Self {
self.enable_normalization();
self
}
/// Returns `Self` with special normalization enabled.
///
/// Note that this normalization algorithm is not compatible with RFC 3986
/// algorithm for some inputs.
///
/// See [`RiStr::normalize_but_preserve_authorityless_relative_path()`]
/// for detail.
#[inline]
#[must_use]
pub fn and_normalize_but_preserve_authorityless_relative_path(mut self) -> Self {
self.enable_normalization_preserving_authorityless_relative_path();
self
}
/// Checks if the path is normalizable by RFC 3986 algorithm.
///
/// Returns `Ok(())` when normalizable, returns `Err(_)` if not.
#[inline]
pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> {
self.input.ensure_rfc3986_normalizable()
}
}
impl<S: Spec> fmt::Display for Normalized<'_, RiStr<S>> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
NormalizedInner::<S>::from_input(self.input).fmt(f)
}
}
impl<S: Spec> fmt::Display for Normalized<'_, RiAbsoluteStr<S>> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
NormalizedInner::<S>::from_input(self.input).fmt(f)
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> ToDedicatedString for Normalized<'_, RiStr<S>> {
type Target = RiString<S>;
fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> {
let s = self.try_to_string()?;
Ok(TryFrom::try_from(s).expect("[validity] the normalization result must be a valid IRI"))
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> From<Normalized<'_, RiStr<S>>> for RiString<S> {
#[inline]
fn from(v: Normalized<'_, RiStr<S>>) -> Self {
v.to_dedicated_string()
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> From<&Normalized<'_, RiStr<S>>> for RiString<S> {
#[inline]
fn from(v: &Normalized<'_, RiStr<S>>) -> Self {
v.to_dedicated_string()
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> ToDedicatedString for Normalized<'_, RiAbsoluteStr<S>> {
type Target = RiAbsoluteString<S>;
fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> {
let s = self.try_to_string()?;
Ok(TryFrom::try_from(s).expect("[validity] the normalization result must be a valid IRI"))
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> From<Normalized<'_, RiAbsoluteStr<S>>> for RiAbsoluteString<S> {
#[inline]
fn from(v: Normalized<'_, RiAbsoluteStr<S>>) -> Self {
v.to_dedicated_string()
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> From<&Normalized<'_, RiAbsoluteStr<S>>> for RiAbsoluteString<S> {
#[inline]
fn from(v: &Normalized<'_, RiAbsoluteStr<S>>) -> Self {
v.to_dedicated_string()
}
}

View File

@@ -0,0 +1,26 @@
//! Normalization and resolution error.
use core::fmt;
/// IRI normalization and resolution error.
///
/// For detail about resolution failure, see [the module documentation][`crate::resolve`].
#[derive(Debug, Clone)]
pub struct Error(());
impl Error {
/// Creates a new error.
pub(crate) fn new() -> Self {
Self(())
}
}
impl fmt::Display for Error {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("unresolvable IRI")
}
}
#[cfg(feature = "std")]
impl std::error::Error for Error {}

620
vendor/iri-string/src/normalize/path.rs vendored Normal file
View File

@@ -0,0 +1,620 @@
//! Path normalization.
use core::fmt;
use core::ops::Range;
use crate::parser::str::{find_split_hole, rfind};
use crate::spec::{Spec, UriSpec};
use super::pct_case::PctCaseNormalized;
use super::{Error, NormalizationMode, NormalizationOp};
/// Path that is (possibly) not yet processed or being processed.
#[derive(Debug, Clone, Copy)]
pub(crate) enum Path<'a> {
/// The result. No more processing is needed.
Done(&'a str),
/// Not yet completely processed path.
NeedsProcessing(PathToNormalize<'a>),
}
/// Path that needs merge and/or dot segment removal.
///
/// # Invariants
///
/// If the first field (prefix field) is not `None`, it must end with a slash.
#[derive(Debug, Clone, Copy)]
pub(crate) struct PathToNormalize<'a>(Option<&'a str>, &'a str);
impl<'a> PathToNormalize<'a> {
/// Creates a `PathToNormalize` from the given single path.
#[inline]
#[must_use]
pub(crate) fn from_single_path(path: &'a str) -> Self {
Self(None, path)
}
/// Creates a `PathToNormalize` from the given base and reference paths to be resolved.
#[must_use]
pub(crate) fn from_paths_to_be_resolved(base: &'a str, reference: &'a str) -> Self {
if reference.starts_with('/') {
return Self(None, reference);
}
match rfind(base.as_bytes(), b'/') {
Some(last_slash_pos) => Self(Some(&base[..=last_slash_pos]), reference),
None => Self(None, reference),
}
}
/// Returns true if the path is empty string.
#[inline]
#[must_use]
fn is_empty(&self) -> bool {
// If `self.0` is `Some(_)`, it ends with a slash, i.e. it is not empty.
self.0.is_none() && self.1.is_empty()
}
/// Returns the length of the not yet normalized path.
#[inline]
#[must_use]
pub(super) fn len(&self) -> usize {
self.len_prefix() + self.1.len()
}
/// Returns the length of the prefix part.
///
/// Returns 0 if the prefix part is empty.
#[inline]
#[must_use]
fn len_prefix(&self) -> usize {
self.0.map_or(0, |s| s.len())
}
/// Returns a byte at the given position.
#[must_use]
fn byte_at(&self, mut i: usize) -> Option<u8> {
if let Some(prefix) = self.0 {
if i < prefix.len() {
return Some(prefix.as_bytes()[i]);
}
i -= prefix.len();
}
self.1.as_bytes().get(i).copied()
}
/// Returns the position of the next slash of the byte at the given position.
#[must_use]
fn find_next_slash(&self, scan_start: usize) -> Option<usize> {
if let Some(prefix) = self.0 {
let prefix_len = prefix.len();
if scan_start < prefix_len {
prefix[scan_start..].find('/').map(|rel| rel + scan_start)
} else {
let local_i = scan_start - prefix_len;
self.1[local_i..].find('/').map(|rel| rel + scan_start)
}
} else {
self.1[scan_start..].find('/').map(|rel| rel + scan_start)
}
}
/// Removes the `len` characters from the beginning of `self`.
fn remove_start(&mut self, len: usize) {
if let Some(prefix) = self.0 {
if let Some(suffix_trim_len) = len.checked_sub(prefix.len()) {
self.0 = None;
self.1 = &self.1[suffix_trim_len..];
} else {
self.0 = Some(&prefix[len..]);
}
} else {
self.1 = &self.1[len..];
}
}
/// Removes the prefix that are ignorable on normalization.
// Skips the prefix dot segments without leading slashes (such as `./`,
// `../`, and `../.././`).
// This is necessary because such segments should be removed with the
// FOLLOWING slashes, not leading slashes.
fn remove_ignorable_prefix(&mut self) {
while let Some(seg) = PathSegmentsIter::new(self).next() {
if seg.has_leading_slash {
// The first segment starting with a slash is not target.
break;
}
match seg.kind(self) {
SegmentKind::Dot | SegmentKind::DotDot => {
// Attempt to skip the following slash by `+ 1`.
let skip = self.len().min(seg.range.end + 1);
self.remove_start(skip);
}
SegmentKind::Normal => break,
}
}
}
}
impl PathToNormalize<'_> {
/// Writes the normalized path.
pub(crate) fn fmt_write_normalize<S: Spec, W: fmt::Write>(
&self,
f: &mut W,
op: NormalizationOp,
authority_is_present: bool,
) -> fmt::Result {
debug_assert!(
self.0.map_or(true, |s| s.ends_with('/')),
"[validity] the prefix field of `PathToNormalize` should end with a slash"
);
if self.is_empty() {
return Ok(());
}
if (op.mode == NormalizationMode::PreserveAuthoritylessRelativePath)
&& !authority_is_present
&& self.byte_at(0) != Some(b'/')
{
// Treat the path as "opaque", i.e. do not apply dot segments removal.
// See <https://github.com/lo48576/iri-string/issues/29>.
debug_assert!(
op.mode.case_pct_normalization(),
"[consistency] case/pct normalization should still be applied"
);
if let Some(prefix) = self.0 {
write!(f, "{}", PctCaseNormalized::<S>::new(prefix))?;
}
write!(f, "{}", PctCaseNormalized::<S>::new(self.1))?;
return Ok(());
}
let mut rest = *self;
// Skip the prefix dot segments without leading slashes (such as `./`,
// `../`, and `../.././`).
// This is necessary because such segments should be removed with the
// FOLLOWING slashes, not leading slashes.
rest.remove_ignorable_prefix();
if rest.is_empty() {
// Path consists of only `/.`s and `/..`s.
// In this case, if the authority component is present, the result
// should be `/`, not empty.
if authority_is_present {
f.write_char('/')?;
}
return Ok(());
}
// None: No segments are written yet.
// Some(false): Something other than `/` is already written as the path.
// Some(true): Only a `/` is written as the path.
let mut only_a_slash_is_written = None;
let mut too_deep_area_may_have_dot_segments = true;
while !rest.is_empty() && too_deep_area_may_have_dot_segments {
/// The size of the queue to track the path segments.
///
/// This should be nonzero.
const QUEUE_SIZE: usize = 8;
{
// Skip `/.` and `/..` segments at the head.
let mut skipped_len = 0;
for seg in PathSegmentsIter::new(&rest) {
match seg.kind(&rest) {
SegmentKind::Dot | SegmentKind::DotDot => {
debug_assert!(
seg.has_leading_slash,
"[consistency] `.` or `..` segments without a
leading slash have already been skipped"
);
skipped_len = seg.range.end;
}
_ => break,
}
}
rest.remove_start(skipped_len);
if rest.is_empty() {
// Finished with a dot segment.
// The last `/.` or `/..` should be replaced to `/`.
if !authority_is_present && (only_a_slash_is_written == Some(true)) {
// Insert a dot segment to break the prefix `//`.
// Without this, the path starts with `//` and it may
// be confused with the prefix of an authority.
f.write_str(".//")?;
} else {
f.write_char('/')?;
}
break;
}
}
let mut queue: [Option<&'_ str>; QUEUE_SIZE] = Default::default();
let mut level: usize = 0;
let mut first_segment_has_leading_slash = false;
// Find higher path segments.
let mut end = 0;
for seg in PathSegmentsIter::new(&rest) {
let kind = seg.kind(&rest);
match kind {
SegmentKind::Dot => {
too_deep_area_may_have_dot_segments = true;
}
SegmentKind::DotDot => {
level = level.saturating_sub(1);
too_deep_area_may_have_dot_segments = true;
if level < queue.len() {
queue[level] = None;
}
}
SegmentKind::Normal => {
if level < queue.len() {
queue[level] = Some(seg.segment(&rest));
too_deep_area_may_have_dot_segments = false;
end = seg.range.end;
if level == 0 {
first_segment_has_leading_slash = seg.has_leading_slash;
}
}
level += 1;
}
}
}
// Write the path segments as possible, and update the internal state.
for segname in queue.iter().flatten() {
Self::emit_segment::<S, _>(
f,
&mut only_a_slash_is_written,
first_segment_has_leading_slash,
segname,
authority_is_present,
op,
)?;
}
rest.remove_start(end);
}
if !rest.is_empty() {
// No need of searching dot segments anymore.
assert!(
!too_deep_area_may_have_dot_segments,
"[consistency] loop condition of the previous loop"
);
// Apply only normalization (if needed).
for seg in PathSegmentsIter::new(&rest) {
assert_eq!(
seg.kind(&rest),
SegmentKind::Normal,
"[consistency] already confirmed that there are no more dot segments"
);
let segname = seg.segment(&rest);
Self::emit_segment::<S, _>(
f,
&mut only_a_slash_is_written,
seg.has_leading_slash,
segname,
authority_is_present,
op,
)?;
}
}
Ok(())
}
/// Emits a non-dot segment and update the current state.
//
// `first_segment_has_leading_slash` can be any value if the segment is not the first one.
fn emit_segment<S: Spec, W: fmt::Write>(
f: &mut W,
only_a_slash_is_written: &mut Option<bool>,
first_segment_has_leading_slash: bool,
segname: &str,
authority_is_present: bool,
op: NormalizationOp,
) -> fmt::Result {
// Omit the leading slash of the segment only if the segment is
// the first one and marked as not having a leading slash.
match *only_a_slash_is_written {
None => {
// First segment.
// This pass can be possible if `./` is repeated `QUEUE_SIZE`
// times at the beginning.
if first_segment_has_leading_slash {
f.write_char('/')?;
}
*only_a_slash_is_written =
Some(first_segment_has_leading_slash && segname.is_empty());
}
Some(only_a_slash) => {
if only_a_slash && !authority_is_present {
// Apply serialization like WHATWG URL Standard.
// This prevents `<scheme=foo>:<path=//bar>` from written as
// `foo://bar`, which is interpreted as
// `<scheme=foo>://<authority=bar>`. Prepending `./`, the
// serialization result would be `foo:/.//bar`, which is safe.
f.write_str("./")?;
*only_a_slash_is_written = Some(false);
}
f.write_char('/')?;
}
}
// Write the segment name.
if op.mode.case_pct_normalization() {
write!(f, "{}", PctCaseNormalized::<S>::new(segname))
} else {
f.write_str(segname)
}
}
/// Checks if the path is normalizable by RFC 3986 algorithm when the authority is absent.
///
/// Returns `Ok(())` when normalizable, returns `Err(_)` if not.
pub(crate) fn ensure_rfc3986_normalizable_with_authority_absent(&self) -> Result<(), Error> {
/// A sink to get the prefix of the input.
#[derive(Default)]
struct PrefixRetriever {
/// The buffer to remember the prefix of the input.
buf: [u8; 3],
/// The next write position in the buffer.
cursor: usize,
}
impl PrefixRetriever {
/// Returns the read prefix data.
#[inline]
#[must_use]
fn as_bytes(&self) -> &[u8] {
&self.buf[..self.cursor]
}
}
impl fmt::Write for PrefixRetriever {
fn write_str(&mut self, s: &str) -> fmt::Result {
if !s.is_empty() && (self.cursor >= self.buf.len()) {
// Enough bytes are read.
return Err(fmt::Error);
}
self.buf[self.cursor..]
.iter_mut()
.zip(s.bytes())
.for_each(|(dest, src)| *dest = src);
self.cursor = self.cursor.saturating_add(s.len()).min(self.buf.len());
Ok(())
}
}
let mut prefix = PrefixRetriever::default();
// The failure of this write indicates more than 3 characters are read.
// This is safe to ignore since the check needs only 3 characters.
let _ = self.fmt_write_normalize::<UriSpec, _>(
&mut prefix,
NormalizationOp {
mode: NormalizationMode::None,
},
// Assume the authority is absent.
false,
);
if prefix.as_bytes() == b"/./" {
Err(Error::new())
} else {
Ok(())
}
}
}
/// Characteristic of a path.
#[derive(Debug, Clone, Copy)]
pub(crate) enum PathCharacteristic {
/// Absolute path, not special.
CommonAbsolute,
/// Absolute path, not special.
CommonRelative,
/// The first path segment of the relative path has one or more colon characters.
RelativeFirstSegmentHasColon,
/// The path starts with the double slash.
StartsWithDoubleSlash,
}
impl PathCharacteristic {
/// Returns true if the path is absolute.
#[inline]
#[must_use]
pub(crate) fn is_absolute(self) -> bool {
matches!(self, Self::CommonAbsolute | Self::StartsWithDoubleSlash)
}
/// Returns the characteristic of the path.
pub(crate) fn from_path_to_display<S: Spec>(
path: &PathToNormalize<'_>,
op: NormalizationOp,
authority_is_present: bool,
) -> Self {
/// Dummy writer to get necessary values.
#[derive(Default, Clone, Copy)]
struct Writer {
/// Result.
result: Option<PathCharacteristic>,
/// Whether the normalized path is absolute.
is_absolute: Option<bool>,
}
impl fmt::Write for Writer {
fn write_str(&mut self, mut s: &str) -> fmt::Result {
if self.result.is_some() {
// Nothing more to do.
return Err(fmt::Error);
}
while !s.is_empty() {
if self.is_absolute.is_none() {
// The first input.
match s.strip_prefix('/') {
Some(rest) => {
self.is_absolute = Some(true);
s = rest;
}
None => {
self.is_absolute = Some(false);
}
}
continue;
}
if self.is_absolute == Some(true) {
let result = if s.starts_with('/') {
PathCharacteristic::StartsWithDoubleSlash
} else {
PathCharacteristic::CommonAbsolute
};
self.result = Some(result);
return Err(fmt::Error);
}
// Processing the first segment of the relative path.
match find_split_hole(s, b'/') {
Some((first_seg, _rest)) => {
let result = if first_seg.contains(':') {
PathCharacteristic::RelativeFirstSegmentHasColon
} else {
PathCharacteristic::CommonRelative
};
self.result = Some(result);
return Err(fmt::Error);
}
None => {
// `s` might not be the complete first segment.
if s.contains(':') {
self.result =
Some(PathCharacteristic::RelativeFirstSegmentHasColon);
return Err(fmt::Error);
}
break;
}
}
}
Ok(())
}
}
let mut writer = Writer::default();
match path.fmt_write_normalize::<S, _>(&mut writer, op, authority_is_present) {
// Empty path.
Ok(_) => PathCharacteristic::CommonRelative,
Err(_) => writer
.result
.expect("[consistency] the formatting quits early by `Err` when the check is done"),
}
}
}
/// Path segment kind.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SegmentKind {
/// `.` or the equivalents.
Dot,
/// `..` or the equivalents.
DotDot,
/// Other normal (not special) segments.
Normal,
}
impl SegmentKind {
/// Creates a new `SegmentKind` from the given segment name.
#[must_use]
fn from_segment(s: &str) -> Self {
match s {
"." | "%2E" | "%2e" => SegmentKind::Dot,
".." | ".%2E" | ".%2e" | "%2E." | "%2E%2E" | "%2E%2e" | "%2e." | "%2e%2E"
| "%2e%2e" => SegmentKind::DotDot,
_ => SegmentKind::Normal,
}
}
}
/// A segment with optional leading slash.
#[derive(Debug, Clone)]
struct PathSegment {
/// Presence of a leading slash.
has_leading_slash: bool,
/// Range of the segment name (without any slashes).
range: Range<usize>,
}
impl PathSegment {
/// Returns the segment without any slashes.
#[inline]
#[must_use]
fn segment<'a>(&self, path: &PathToNormalize<'a>) -> &'a str {
if let Some(prefix) = path.0 {
let prefix_len = prefix.len();
if self.range.end <= prefix_len {
&prefix[self.range.clone()]
} else {
let range = (self.range.start - prefix_len)..(self.range.end - prefix_len);
&path.1[range]
}
} else {
&path.1[self.range.clone()]
}
}
/// Returns the segment kind.
#[inline]
#[must_use]
fn kind(&self, path: &PathToNormalize<'_>) -> SegmentKind {
SegmentKind::from_segment(self.segment(path))
}
}
/// Iterator of path segments.
struct PathSegmentsIter<'a> {
/// Path.
path: &'a PathToNormalize<'a>,
/// Current cursor position.
cursor: usize,
}
impl<'a> PathSegmentsIter<'a> {
/// Creates a new iterator of path segments.
#[inline]
#[must_use]
fn new(path: &'a PathToNormalize<'a>) -> Self {
Self { path, cursor: 0 }
}
}
impl Iterator for PathSegmentsIter<'_> {
type Item = PathSegment;
fn next(&mut self) -> Option<Self::Item> {
let path_len = self.path.len();
if self.cursor >= path_len {
return None;
}
let has_leading_slash = self.path.byte_at(self.cursor) == Some(b'/');
let prefix_len = self.path.len_prefix();
if (prefix_len != 0) && (self.cursor == prefix_len - 1) {
debug_assert!(has_leading_slash);
let end = self.path.1.find('/').unwrap_or(self.path.1.len()) + prefix_len;
self.cursor = end;
return Some(PathSegment {
has_leading_slash,
range: prefix_len..end,
});
}
if has_leading_slash {
// Skip the leading slash.
self.cursor += 1;
};
let start = self.cursor;
self.cursor = self.path.find_next_slash(self.cursor).unwrap_or(path_len);
Some(PathSegment {
has_leading_slash,
range: start..self.cursor,
})
}
}

View File

@@ -0,0 +1,360 @@
//! Percent-encoding normalization and case normalization.
use core::fmt::{self, Write as _};
use core::marker::PhantomData;
use crate::format::eq_str_display;
use crate::parser::char::{is_ascii_unreserved, is_unreserved, is_utf8_byte_continue};
use crate::parser::str::{find_split_hole, take_first_char};
use crate::parser::trusted::take_xdigits2;
use crate::spec::Spec;
/// Returns true if the given string is percent-encoding normalized and case
/// normalized.
///
/// Note that normalization of ASCII-only host requires additional case
/// normalization, so checking by this function is not sufficient for that case.
pub(crate) fn is_pct_case_normalized<S: Spec>(s: &str) -> bool {
eq_str_display(s, &PctCaseNormalized::<S>::new(s))
}
/// Returns a character for the slice.
///
/// Essentially equivalent to `core::str::from_utf8(bytes).unwrap().and_then(|s| s.get(0))`,
/// but this function fully trusts that the input is a valid UTF-8 string with
/// only one character.
fn into_char_trusted(bytes: &[u8]) -> Result<char, ()> {
/// The bit mask to get the content part in a continue byte.
const CONTINUE_BYTE_MASK: u8 = 0b_0011_1111;
/// Minimum valid values for a code point in a UTF-8 sequence of 2, 3, and 4 bytes.
const MIN: [u32; 3] = [0x80, 0x800, 0x1_0000];
let len = bytes.len();
let c: u32 = match len {
2 => (u32::from(bytes[0] & 0b_0001_1111) << 6) | u32::from(bytes[1] & CONTINUE_BYTE_MASK),
3 => {
(u32::from(bytes[0] & 0b_0000_1111) << 12)
| (u32::from(bytes[1] & CONTINUE_BYTE_MASK) << 6)
| u32::from(bytes[2] & CONTINUE_BYTE_MASK)
}
4 => {
(u32::from(bytes[0] & 0b_0000_0111) << 18)
| (u32::from(bytes[1] & CONTINUE_BYTE_MASK) << 12)
| (u32::from(bytes[2] & CONTINUE_BYTE_MASK) << 6)
| u32::from(bytes[3] & CONTINUE_BYTE_MASK)
}
len => unreachable!(
"[consistency] expected 2, 3, or 4 bytes for a character, but got {len} as the length"
),
};
if c < MIN[len - 2] {
// Redundant UTF-8 encoding.
return Err(());
}
// Can be an invalid Unicode code point.
char::from_u32(c).ok_or(())
}
/// Writable as a normalized path segment percent-encoding IRI.
///
/// This wrapper does the things below when being formatted:
///
/// * Decode unnecessarily percent-encoded characters.
/// * Convert alphabetic characters uppercase in percent-encoded triplets.
///
/// Note that this does not newly encode raw characters.
///
/// # Safety
///
/// The given string should be the valid path segment.
#[derive(Debug, Clone, Copy)]
pub(crate) struct PctCaseNormalized<'a, S> {
/// Valid segment name to normalize.
segname: &'a str,
/// Spec.
_spec: PhantomData<fn() -> S>,
}
impl<'a, S: Spec> PctCaseNormalized<'a, S> {
/// Creates a new `PctCaseNormalized` value.
#[inline]
#[must_use]
pub(crate) fn new(source: &'a str) -> Self {
Self {
segname: source,
_spec: PhantomData,
}
}
}
impl<S: Spec> fmt::Display for PctCaseNormalized<'_, S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut rest = self.segname;
'outer_loop: while !rest.is_empty() {
// Scan the next percent-encoded triplet.
let (prefix, after_percent) = match find_split_hole(rest, b'%') {
Some(v) => v,
None => return f.write_str(rest),
};
// Write the string before the percent-encoded triplet.
f.write_str(prefix)?;
// Decode the percent-encoded triplet.
let (first_decoded, after_first_triplet) = take_xdigits2(after_percent);
rest = after_first_triplet;
let expected_char_len = match first_decoded {
0x00..=0x7F => {
// An ASCII character.
debug_assert!(first_decoded.is_ascii());
if is_ascii_unreserved(first_decoded) {
// Unreserved. Print the decoded.
f.write_char(char::from(first_decoded))?;
} else {
write!(f, "%{:02X}", first_decoded)?;
}
continue 'outer_loop;
}
0xC2..=0xDF => 2,
0xE0..=0xEF => 3,
0xF0..=0xF4 => 4,
0x80..=0xC1 | 0xF5..=0xFF => {
// Cannot appear as a first byte.
//
// * 0x80..=0xBF: continue byte.
// * 0xC0..=0xC1: redundant encoding.
// * 0xF5..=0xFF: above the maximum value for U+10FFFF.
write!(f, "%{:02X}", first_decoded)?;
continue 'outer_loop;
}
};
// Get continue bytes.
let c_buf = &mut [first_decoded, 0, 0, 0][..expected_char_len];
for (i, buf_dest) in c_buf[1..].iter_mut().enumerate() {
match take_first_char(rest) {
Some(('%', after_percent)) => {
let (byte, after_triplet) = take_xdigits2(after_percent);
if !is_utf8_byte_continue(byte) {
// Note that `byte` can start the new string.
// Leave the byte in the `rest` for next try (i.e.
// don't update `rest` in this case).
c_buf[..=i]
.iter()
.try_for_each(|b| write!(f, "%{:02X}", b))?;
continue 'outer_loop;
}
*buf_dest = byte;
rest = after_triplet;
}
// If the next character is not `%`, decoded bytes so far
// won't be valid UTF-8 byte sequence.
// Write the read percent-encoded triplets without decoding.
// Note that all characters in `&c_buf[1..]` (if available)
// will be decoded to "continue byte" of UTF-8, so they
// cannot be the start of a valid UTF-8 byte sequence if
// decoded.
Some((c, after_percent)) => {
c_buf[..=i]
.iter()
.try_for_each(|b| write!(f, "%{:02X}", b))?;
f.write_char(c)?;
rest = after_percent;
continue 'outer_loop;
}
None => {
c_buf[..=i]
.iter()
.try_for_each(|b| write!(f, "%{:02X}", b))?;
// Reached the end of the string.
break 'outer_loop;
}
}
}
// Decode the bytes into a character.
match into_char_trusted(&c_buf[..expected_char_len]) {
Ok(decoded_c) => {
if is_unreserved::<S>(decoded_c) {
// Unreserved. Print the decoded.
f.write_char(decoded_c)?;
} else {
c_buf[0..expected_char_len]
.iter()
.try_for_each(|b| write!(f, "%{:02X}", b))?;
}
}
Err(_) => {
// Skip decoding of the entire sequence of pct-encoded triplets loaded
// in `c_buf`. This is valid from the reasons below.
//
// * The first byte in `c_buf` is valid as the first byte, and it tells the
// expected number of bytes for a code unit. The cases the bytes being too
// short and the sequence being incomplete have already been handled, and
// the execution does not reach here then.
// * All of the non-first bytes are checked if they are valid as UTF8 continue
// bytes by `is_utf8_byte_continue()`. If they're not, the decoding of
// that codepoint is aborted and the bytes in the buffer are immediately
// emitted as pct-encoded, and the execution does not reach here. This
// means that the bytes in the current `c_buf` have passed these tests.
// * Since all of the the non-first bytes are UTF8 continue bytes, any of
// them cannot start the new valid UTF-8 byte sequence. This means that
// if the bytes in the buffer does not consitute a valid UTF-8 bytes
// sequence, the whole buffer can immediately be emmitted as pct-encoded.
debug_assert!(
c_buf[1..expected_char_len]
.iter()
.copied()
.all(is_utf8_byte_continue),
"[consistency] all non-first bytes have been \
confirmed that they are UTF-8 continue bytes"
);
// Note that the first pct-encoded triplet is stripped from
// `after_first_triplet`.
rest = &after_first_triplet[((expected_char_len - 1) * 3)..];
c_buf[0..expected_char_len]
.iter()
.try_for_each(|b| write!(f, "%{:02X}", b))?;
}
}
}
Ok(())
}
}
/// Writable as a normalized ASCII-only `host` (and optionally `port` followed).
#[derive(Debug, Clone, Copy)]
pub(crate) struct NormalizedAsciiOnlyHost<'a> {
/// Valid host (and additionaly port) to normalize.
host_port: &'a str,
}
impl<'a> NormalizedAsciiOnlyHost<'a> {
/// Creates a new `NormalizedAsciiOnlyHost` value.
///
/// # Preconditions
///
/// The given string should be the valid ASCII-only `host` or
/// `host ":" port` after percent-encoding normalization.
/// In other words, [`parser::trusted::is_ascii_only_host`] should return
/// true for the given value.
///
/// [`parser::trusted::is_ascii_only_host`]: `crate::parser::trusted::is_ascii_only_host`
#[inline]
#[must_use]
pub(crate) fn new(host_port: &'a str) -> Self {
Self { host_port }
}
}
impl fmt::Display for NormalizedAsciiOnlyHost<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut rest = self.host_port;
while !rest.is_empty() {
// Scan the next percent-encoded triplet.
let (prefix, after_percent) = match find_split_hole(rest, b'%') {
Some(v) => v,
None => {
return rest
.chars()
.try_for_each(|c| f.write_char(c.to_ascii_lowercase()));
}
};
// Write the string before the percent-encoded triplet.
prefix
.chars()
.try_for_each(|c| f.write_char(c.to_ascii_lowercase()))?;
// Decode the percent-encoded triplet.
let (first_decoded, after_triplet) = take_xdigits2(after_percent);
rest = after_triplet;
assert!(
first_decoded.is_ascii(),
"[consistency] this function requires ASCII-only host as an argument"
);
if is_ascii_unreserved(first_decoded) {
// Unreserved. Convert to lowercase and print.
f.write_char(char::from(first_decoded.to_ascii_lowercase()))?;
} else {
write!(f, "%{:02X}", first_decoded)?;
}
}
Ok(())
}
}
#[cfg(test)]
#[cfg(feature = "alloc")]
mod tests {
use super::*;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::ToString;
use crate::spec::{IriSpec, UriSpec};
#[test]
fn invalid_utf8() {
assert_eq!(
PctCaseNormalized::<UriSpec>::new("%80%cc%cc%cc").to_string(),
"%80%CC%CC%CC"
);
assert_eq!(
PctCaseNormalized::<IriSpec>::new("%80%cc%cc%cc").to_string(),
"%80%CC%CC%CC"
);
}
#[test]
fn iri_unreserved() {
assert_eq!(
PctCaseNormalized::<UriSpec>::new("%ce%b1").to_string(),
"%CE%B1"
);
assert_eq!(
PctCaseNormalized::<IriSpec>::new("%ce%b1").to_string(),
"\u{03B1}"
);
}
#[test]
fn iri_middle_decode() {
assert_eq!(
PctCaseNormalized::<UriSpec>::new("%ce%ce%b1%b1").to_string(),
"%CE%CE%B1%B1"
);
assert_eq!(
PctCaseNormalized::<IriSpec>::new("%ce%ce%b1%b1").to_string(),
"%CE\u{03B1}%B1"
);
}
#[test]
fn ascii_reserved() {
assert_eq!(PctCaseNormalized::<UriSpec>::new("%3f").to_string(), "%3F");
assert_eq!(PctCaseNormalized::<IriSpec>::new("%3f").to_string(), "%3F");
}
#[test]
fn ascii_forbidden() {
assert_eq!(
PctCaseNormalized::<UriSpec>::new("%3c%3e").to_string(),
"%3C%3E"
);
assert_eq!(
PctCaseNormalized::<IriSpec>::new("%3c%3e").to_string(),
"%3C%3E"
);
}
#[test]
fn ascii_unreserved() {
assert_eq!(PctCaseNormalized::<UriSpec>::new("%7ea").to_string(), "~a");
assert_eq!(PctCaseNormalized::<IriSpec>::new("%7ea").to_string(), "~a");
}
}

6
vendor/iri-string/src/parser.rs vendored Normal file
View File

@@ -0,0 +1,6 @@
//! Common stuff for parsing.
pub(crate) mod char;
pub(crate) mod str;
pub(crate) mod trusted;
pub(crate) mod validate;

323
vendor/iri-string/src/parser/char.rs vendored Normal file
View File

@@ -0,0 +1,323 @@
//! Characters.
use crate::spec::Spec;
/// A mask to test whether the character is continue character of `scheme`.
// `ALPHA / DIGIT / "+" / "-" / "."`
const MASK_SCHEME_CONTINUE: u8 = 1 << 0;
/// A mask to test whether the character matches `unreserved`.
// `unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"`
const MASK_UNRESERVED: u8 = 1 << 1;
/// A mask to test whether the character matches `gen-delims`.
// `gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"`
const MASK_GEN_DELIMS: u8 = 1 << 2;
/// A mask to test whether the character matches `sub-delims`.
// `sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="`
const MASK_SUB_DELIMS: u8 = 1 << 3;
/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes).
// `pchar = unreserved / pct-encoded / sub-delims / ":" / "@"`
const MASK_PCHAR: u8 = 1 << 4;
/// A mask to test whether the character can appear in `query` and `fragment`.
// `query = *( pchar / "/" / "?" )`
// `fragment = *( pchar / "/" / "?" )`
const MASK_FRAG_QUERY: u8 = 1 << 5;
/// A mask to test whether the character can appear in `userinfo` and address of `IPvFuture`.
// `userinfo = *( unreserved / pct-encoded / sub-delims / ":" )`
const MASK_USERINFO_IPVFUTUREADDR: u8 = 1 << 6;
/// A mask to test whether the character matches `pchar` (modulo percent-encoded bytes) or slash.
const MASK_PCHAR_SLASH: u8 = 1 << 7;
/// ASCII characters' properties.
const TABLE: [u8; 128] = [
0b_0000_0000, // NUL
0b_0000_0000, // SOH
0b_0000_0000, // STX
0b_0000_0000, // ETX
0b_0000_0000, // EOT
0b_0000_0000, // ENQ
0b_0000_0000, // ACK
0b_0000_0000, // BEL
0b_0000_0000, // BS
0b_0000_0000, // HT
0b_0000_0000, // LF
0b_0000_0000, // VT
0b_0000_0000, // FF
0b_0000_0000, // CR
0b_0000_0000, // SO
0b_0000_0000, // SI
0b_0000_0000, // DLE
0b_0000_0000, // DC1
0b_0000_0000, // DC2
0b_0000_0000, // DC3
0b_0000_0000, // DC4
0b_0000_0000, // NAK
0b_0000_0000, // SYN
0b_0000_0000, // ETB
0b_0000_0000, // CAN
0b_0000_0000, // EM
0b_0000_0000, // SUB
0b_0000_0000, // ESC
0b_0000_0000, // FS
0b_0000_0000, // GS
0b_0000_0000, // RS
0b_0000_0000, // US
0b_0000_0000, // SPACE
0b_1111_1000, // !
0b_0000_0000, // "
0b_0000_0100, // #
0b_1111_1000, // $
0b_0000_0000, // %
0b_1111_1000, // &
0b_1111_1000, // '
0b_1111_1000, // (
0b_1111_1000, // )
0b_1111_1000, // *
0b_1111_1001, // +
0b_1111_1000, // ,
0b_1111_0011, // -
0b_1111_0011, // .
0b_1010_0100, // /
0b_1111_0011, // 0
0b_1111_0011, // 1
0b_1111_0011, // 2
0b_1111_0011, // 3
0b_1111_0011, // 4
0b_1111_0011, // 5
0b_1111_0011, // 6
0b_1111_0011, // 7
0b_1111_0011, // 8
0b_1111_0011, // 9
0b_1111_0100, // :
0b_1111_1000, // ;
0b_0000_0000, // <
0b_1111_1000, // =
0b_0000_0000, // >
0b_0010_0100, // ?
0b_1011_0100, // @
0b_1111_0011, // A
0b_1111_0011, // B
0b_1111_0011, // C
0b_1111_0011, // D
0b_1111_0011, // E
0b_1111_0011, // F
0b_1111_0011, // G
0b_1111_0011, // H
0b_1111_0011, // I
0b_1111_0011, // J
0b_1111_0011, // K
0b_1111_0011, // L
0b_1111_0011, // M
0b_1111_0011, // N
0b_1111_0011, // O
0b_1111_0011, // P
0b_1111_0011, // Q
0b_1111_0011, // R
0b_1111_0011, // S
0b_1111_0011, // T
0b_1111_0011, // U
0b_1111_0011, // V
0b_1111_0011, // W
0b_1111_0011, // X
0b_1111_0011, // Y
0b_1111_0011, // Z
0b_0000_0100, // [
0b_0000_0000, // \
0b_0000_0100, // ]
0b_0000_0000, // ^
0b_1111_0010, // _
0b_0000_0000, // `
0b_1111_0011, // a
0b_1111_0011, // b
0b_1111_0011, // c
0b_1111_0011, // d
0b_1111_0011, // e
0b_1111_0011, // f
0b_1111_0011, // g
0b_1111_0011, // h
0b_1111_0011, // i
0b_1111_0011, // j
0b_1111_0011, // k
0b_1111_0011, // l
0b_1111_0011, // m
0b_1111_0011, // n
0b_1111_0011, // o
0b_1111_0011, // p
0b_1111_0011, // q
0b_1111_0011, // r
0b_1111_0011, // s
0b_1111_0011, // t
0b_1111_0011, // u
0b_1111_0011, // v
0b_1111_0011, // w
0b_1111_0011, // x
0b_1111_0011, // y
0b_1111_0011, // z
0b_0000_0000, // {
0b_0000_0000, // |
0b_0000_0000, // }
0b_1111_0010, // ~
0b_0000_0000, // DEL
];
/// Returns `true` if the given ASCII character is allowed as continue character of `scheme` part.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_scheme_continue(c: u8) -> bool {
(TABLE[c as usize] & MASK_SCHEME_CONTINUE) != 0
}
/// Returns `true` if the given ASCII character matches `unreserved`.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_unreserved(c: u8) -> bool {
(TABLE[c as usize] & MASK_UNRESERVED) != 0
}
/// Returns true if the character is unreserved.
#[inline]
#[must_use]
pub(crate) fn is_unreserved<S: Spec>(c: char) -> bool {
if c.is_ascii() {
is_ascii_unreserved(c as u8)
} else {
S::is_nonascii_char_unreserved(c)
}
}
///// Returns `true` if the given ASCII character matches `gen-delims`.
//#[inline]
//#[must_use]
//pub(crate) const fn is_ascii_gen_delims(c: u8) -> bool {
// (TABLE[c as usize] & MASK_GEN_DELIMS) != 0
//}
///// Returns `true` if the given ASCII character matches `sub-delims`.
//#[inline]
//#[must_use]
//pub(crate) const fn is_ascii_sub_delims(c: u8) -> bool {
// (TABLE[c as usize] & MASK_SUB_DELIMS) != 0
//}
///// Returns `true` if the given ASCII character matches `reserved`.
//#[inline]
//#[must_use]
//pub(crate) const fn is_ascii_reserved(c: u8) -> bool {
// (TABLE[c as usize] & (MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0
//}
/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded`.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_pchar(c: u8) -> bool {
(TABLE[c as usize] & MASK_PCHAR) != 0
}
/// Returns `true` if the given ASCII character is allowed to appear in `query` and `fragment`.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_frag_query(c: u8) -> bool {
(TABLE[c as usize] & MASK_FRAG_QUERY) != 0
}
/// Returns `true` if the given non-ASCII character is allowed to appear in `iquery`.
#[inline]
#[must_use]
pub(crate) fn is_nonascii_query<S: Spec>(c: char) -> bool {
S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c)
}
/// Returns `true` if the given non-ASCII character is allowed to appear in `ifragment`.
#[inline]
#[must_use]
pub(crate) fn is_nonascii_fragment<S: Spec>(c: char) -> bool {
S::is_nonascii_char_unreserved(c)
}
/// Returns `true` if the given ASCII character is allowed to appear in `userinfo` and `IPvFuture`.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_userinfo_ipvfutureaddr(c: u8) -> bool {
(TABLE[c as usize] & MASK_USERINFO_IPVFUTUREADDR) != 0
}
/// Returns `true` if the given non-ASCII character is allowed to appear in `iuserinfo`.
#[inline]
#[must_use]
pub(crate) fn is_nonascii_userinfo<S: Spec>(c: char) -> bool {
S::is_nonascii_char_unreserved(c)
}
/// Returns `true` if the given ASCII character is allowed to appear in `reg-name`
#[inline]
#[must_use]
pub(crate) const fn is_ascii_regname(c: u8) -> bool {
(TABLE[c as usize] & (MASK_UNRESERVED | MASK_SUB_DELIMS)) != 0
}
/// Returns `true` if the given non-ASCII character is allowed to appear in `ireg-name`.
#[inline]
#[must_use]
pub(crate) fn is_nonascii_regname<S: Spec>(c: char) -> bool {
S::is_nonascii_char_unreserved(c)
}
/// Returns `true` if the given ASCII character matches `pchar` modulo `pct-encoded` or a slash.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_pchar_slash(c: u8) -> bool {
(TABLE[c as usize] & MASK_PCHAR_SLASH) != 0
}
/// Checks if the given character matches `ucschar` rule.
#[must_use]
pub(crate) fn is_ucschar(c: char) -> bool {
matches!(
u32::from(c),
0xA0..=0xD7FF |
0xF900..=0xFDCF |
0xFDF0..=0xFFEF |
0x1_0000..=0x1_FFFD |
0x2_0000..=0x2_FFFD |
0x3_0000..=0x3_FFFD |
0x4_0000..=0x4_FFFD |
0x5_0000..=0x5_FFFD |
0x6_0000..=0x6_FFFD |
0x7_0000..=0x7_FFFD |
0x8_0000..=0x8_FFFD |
0x9_0000..=0x9_FFFD |
0xA_0000..=0xA_FFFD |
0xB_0000..=0xB_FFFD |
0xC_0000..=0xC_FFFD |
0xD_0000..=0xD_FFFD |
0xE_1000..=0xE_FFFD
)
}
/// Returns true if the given value is a continue byte of UTF-8.
#[inline(always)]
#[must_use]
pub(crate) fn is_utf8_byte_continue(byte: u8) -> bool {
// `0x80..=0xbf` (i.e. `0b_1000_0000..=0b_1011_1111`) is not the first byte,
// and `0xc0..=0xc1` (i.e. `0b_1100_0000..=0b_1100_0001` shouldn't appear
// anywhere in UTF-8 byte sequence.
// `0x80 as i8` is -128, and `0xc0 as i8` is -96.
//
// The first byte of the UTF-8 character is not `0b10xx_xxxx`, and
// the continue bytes is `0b10xx_xxxx`.
// `0b1011_1111 as i8` is -65, and `0b1000_0000 as i8` is -128.
(byte as i8) < -64
}
/// Returns true if the given ASCII character is `unreserved` or `reserved`.
#[inline]
#[must_use]
pub(crate) const fn is_ascii_unreserved_or_reserved(c: u8) -> bool {
(TABLE[c as usize] & (MASK_UNRESERVED | MASK_GEN_DELIMS | MASK_SUB_DELIMS)) != 0
}

390
vendor/iri-string/src/parser/str.rs vendored Normal file
View File

@@ -0,0 +1,390 @@
//! Functions for common string operations.
pub(crate) use self::maybe_pct_encoded::{
process_percent_encoded_best_effort, PctEncodedFragments,
};
mod maybe_pct_encoded;
/// Returns the inner string if wrapped.
#[must_use]
pub(crate) fn get_wrapped_inner(s: &str, open: u8, close: u8) -> Option<&str> {
let (prefix, suffix) = match s.as_bytes() {
[prefix, suffix] | [prefix, .., suffix] => (*prefix, *suffix),
_ => return None,
};
if (prefix == open) && (suffix == close) {
Some(&s[1..(s.len() - 1)])
} else {
None
}
}
/// Returns the byte that appears first.
#[cfg(not(feature = "memchr"))]
#[inline]
#[must_use]
pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
haystack
.iter()
.copied()
.find(|&b| b == needle1 || b == needle2)
}
/// Returns the byte that appears first.
#[cfg(feature = "memchr")]
#[inline]
#[must_use]
pub(crate) fn prior_byte2(haystack: &[u8], needle1: u8, needle2: u8) -> Option<u8> {
memchr::memchr2(needle1, needle2, haystack).map(|pos| haystack[pos])
}
/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
#[cfg(not(feature = "memchr"))]
#[inline]
#[must_use]
pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
haystack.iter().rposition(|&b| b == needle)
}
/// (Possibly) faster version of `haystack.rfind(needle)` when `needle` is an ASCII character.
#[cfg(feature = "memchr")]
#[inline]
#[must_use]
pub(crate) fn rfind(haystack: &[u8], needle: u8) -> Option<usize> {
memchr::memrchr(needle, haystack)
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If `needle` is not found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
haystack
.bytes()
.position(|b| b == needle)
.map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If `needle` is not found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split(haystack: &str, needle: u8) -> Option<(&str, &str)> {
memchr::memchr(needle, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
}
/// Finds the last needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
haystack
.bytes()
.rposition(|b| b == needle1 || b == needle2)
.map(|pos| haystack.split_at(pos))
}
/// Finds the last needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn rfind_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
memchr::memrchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
haystack
.bytes()
.position(|b| b == needle1 || b == needle2)
.map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split2(haystack: &str, needle1: u8, needle2: u8) -> Option<(&str, &str)> {
memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split3(
haystack: &str,
needle1: u8,
needle2: u8,
needle3: u8,
) -> Option<(&str, &str)> {
haystack
.bytes()
.position(|b| b == needle1 || b == needle2 || b == needle3)
.map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and the rest.
///
/// If no needles are found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split3(
haystack: &str,
needle1: u8,
needle2: u8,
needle3: u8,
) -> Option<(&str, &str)> {
memchr::memchr3(needle1, needle2, needle3, haystack.as_bytes())
.map(|pos| haystack.split_at(pos))
}
/// Finds the first needle, and returns the string before it and after it.
///
/// If `needle` is not found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
haystack
.bytes()
.position(|b| b == needle)
.map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
}
/// Finds the first needle, and returns the string before it and after it.
///
/// If `needle` is not found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
memchr::memchr(needle, haystack.as_bytes())
.map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
}
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
///
/// If no needles are found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split2_hole(
haystack: &str,
needle1: u8,
needle2: u8,
) -> Option<(&str, u8, &str)> {
haystack
.bytes()
.position(|b| b == needle1 || b == needle2)
.map(|pos| {
(
&haystack[..pos],
haystack.as_bytes()[pos],
&haystack[(pos + 1)..],
)
})
}
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
///
/// If no needles are found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split2_hole(
haystack: &str,
needle1: u8,
needle2: u8,
) -> Option<(&str, u8, &str)> {
memchr::memchr2(needle1, needle2, haystack.as_bytes()).map(|pos| {
(
&haystack[..pos],
haystack.as_bytes()[pos],
&haystack[(pos + 1)..],
)
})
}
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
///
/// If no needles are found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn find_split4_hole(
haystack: &str,
needle1: u8,
needle2: u8,
needle3: u8,
needle4: u8,
) -> Option<(&str, u8, &str)> {
haystack
.bytes()
.position(|b| b == needle1 || b == needle2 || b == needle3 || b == needle4)
.map(|pos| {
(
&haystack[..pos],
haystack.as_bytes()[pos],
&haystack[(pos + 1)..],
)
})
}
/// Finds the first needle, and returns the string before it, the needle, and the string after it.
///
/// If no needles are found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn find_split4_hole(
haystack: &str,
needle1: u8,
needle2: u8,
needle3: u8,
needle4: u8,
) -> Option<(&str, u8, &str)> {
let bytes = haystack.as_bytes();
let pos = match memchr::memchr3(needle1, needle2, needle3, bytes) {
Some(prefix_len) => memchr::memchr(needle4, &bytes[..prefix_len]).or(Some(prefix_len)),
None => memchr::memchr(needle4, bytes),
};
pos.map(|pos| {
(
&haystack[..pos],
haystack.as_bytes()[pos],
&haystack[(pos + 1)..],
)
})
}
/// Finds the last needle, and returns the string before it and after it.
///
/// If `needle` is not found, returns `None`.
#[cfg(not(feature = "memchr"))]
#[must_use]
pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
haystack
.bytes()
.rposition(|b| b == needle)
.map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
}
/// Finds the last needle, and returns the string before it and after it.
///
/// If `needle` is not found, returns `None`.
#[cfg(feature = "memchr")]
#[must_use]
pub(crate) fn rfind_split_hole(haystack: &str, needle: u8) -> Option<(&str, &str)> {
memchr::memrchr(needle, haystack.as_bytes())
.map(|pos| (&haystack[..pos], &haystack[(pos + 1)..]))
}
/// Returns `true` if the string only contains the allowed characters.
#[must_use]
fn satisfy_chars<F, G>(mut s: &str, pred_ascii: F, pred_nonascii: G) -> bool
where
F: Copy + Fn(u8) -> bool,
G: Copy + Fn(char) -> bool,
{
while !s.is_empty() {
match s.bytes().position(|b| !b.is_ascii()) {
Some(nonascii_pos) => {
// Valdiate ASCII prefix.
if nonascii_pos != 0 {
let (prefix, rest) = s.split_at(nonascii_pos);
if !prefix.bytes().all(pred_ascii) {
return false;
}
s = rest;
}
// Extract non-ASCII part and validate it.
let (prefix, rest) = match s.bytes().position(|b| b.is_ascii()) {
Some(ascii_pos) => s.split_at(ascii_pos),
None => (s, ""),
};
if !prefix.chars().all(pred_nonascii) {
return false;
}
s = rest;
}
None => {
// All chars are ASCII.
return s.bytes().all(pred_ascii);
}
}
}
true
}
/// Returns `true` if the string only contains the allowed characters and percent-encoded char.
#[must_use]
pub(crate) fn satisfy_chars_with_pct_encoded<F, G>(
mut s: &str,
pred_ascii: F,
pred_nonascii: G,
) -> bool
where
F: Copy + Fn(u8) -> bool,
G: Copy + Fn(char) -> bool,
{
while let Some((prefix, suffix)) = find_split_hole(s, b'%') {
// Verify strings before the percent-encoded char.
if !prefix.is_empty() && !satisfy_chars(prefix, pred_ascii, pred_nonascii) {
return false;
}
// Verify the percent-encoded char.
if !starts_with_double_hexdigits(suffix.as_bytes()) {
return false;
}
// Advance the cursor.
s = &suffix[2..];
}
// Verify the rest.
satisfy_chars(s, pred_ascii, pred_nonascii)
}
/// Returns `true` if the given string starts with two hexadecimal digits.
#[must_use]
pub(crate) fn starts_with_double_hexdigits(s: &[u8]) -> bool {
match s {
[x, y] | [x, y, ..] => x.is_ascii_hexdigit() && y.is_ascii_hexdigit(),
_ => false,
}
}
/// Strips the first character if it is the given ASCII character, and returns the rest.
///
/// # Precondition
///
/// The given ASCII character (`prefix`) should be an ASCII character.
#[must_use]
pub(crate) fn strip_ascii_char_prefix(s: &str, prefix: u8) -> Option<&str> {
debug_assert!(prefix.is_ascii());
if s.as_bytes().first().copied() == Some(prefix) {
Some(&s[1..])
} else {
None
}
}
/// Splits the given string into the first character and the rest.
///
/// Returns `(first_char, rest_str)`.
#[must_use]
pub(crate) fn take_first_char(s: &str) -> Option<(char, &str)> {
let mut chars = s.chars();
let c = chars.next()?;
let rest = chars.as_str();
Some((c, rest))
}

View File

@@ -0,0 +1,369 @@
//! Processor for possibly- or invalidly-percent-encoded strings.
use core::fmt::{self, Write as _};
use core::marker::PhantomData;
use core::num::NonZeroU8;
use core::ops::ControlFlow;
use crate::parser::str::find_split;
use crate::parser::trusted::hexdigits_to_byte;
/// Fragment in a possibly percent-encoded (and possibly broken) string.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) enum PctEncodedFragments<'a> {
/// String fragment without percent-encoded triplets.
NoPctStr(&'a str),
/// Stray `%` (percent) character.
StrayPercent,
/// Valid percent-encoded triplets for a character.
Char(&'a str, char),
/// Percent-encoded triplets that does not consists of a valid UTF-8 sequence.
InvalidUtf8PctTriplets(&'a str),
}
/// Processes characters in a string which may contain (possibly invalid) percent-encoded triplets.
pub(crate) fn process_percent_encoded_best_effort<T, F, B>(
v: T,
mut f: F,
) -> Result<ControlFlow<B>, fmt::Error>
where
T: fmt::Display,
F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
{
let mut buf = [0_u8; 12];
let mut writer = DecomposeWriter {
f: &mut f,
decoder: Default::default(),
buf: &mut buf,
result: ControlFlow::Continue(()),
_r: PhantomData,
};
if write!(writer, "{v}").is_err() {
match writer.result {
ControlFlow::Continue(_) => return Err(fmt::Error),
ControlFlow::Break(v) => return Ok(ControlFlow::Break(v)),
}
}
// Flush the internal buffer of the decoder.
if let Some(len) = writer.decoder.flush(&mut buf).map(|v| usize::from(v.get())) {
let len_suffix = len % 3;
let triplets_end = len - len_suffix;
let triplets = core::str::from_utf8(&buf[..triplets_end])
.expect("[validity] percent-encoded triplets consist of ASCII characters");
if let ControlFlow::Break(v) = f(PctEncodedFragments::InvalidUtf8PctTriplets(triplets)) {
return Ok(ControlFlow::Break(v));
}
if len_suffix > 0 {
if let ControlFlow::Break(v) = f(PctEncodedFragments::StrayPercent) {
return Ok(ControlFlow::Break(v));
}
}
if len_suffix > 1 {
let after_percent = core::str::from_utf8(
&buf[(triplets_end + 1)..(triplets_end + len_suffix)],
)
.expect("[consistency] percent-encoded triplets contains only ASCII characters");
if let ControlFlow::Break(v) = f(PctEncodedFragments::NoPctStr(after_percent)) {
return Ok(ControlFlow::Break(v));
}
}
}
Ok(ControlFlow::Continue(()))
}
/// Writer to decompose the input into fragments.
struct DecomposeWriter<'a, F, B> {
/// Output function.
f: &'a mut F,
/// Decoder.
decoder: DecoderBuffer,
/// Buffer.
buf: &'a mut [u8],
/// Result of the last output function call.
result: ControlFlow<B>,
/// Dummy field for the type parameter of the return type of the function `f`.
_r: PhantomData<fn() -> B>,
}
impl<F, B> DecomposeWriter<'_, F, B>
where
F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
{
/// Returns `Ok(_)` if the stored result is `Continue`, and `Err(_)` otherwise.
#[inline(always)]
fn result_continue_or_err(&self) -> fmt::Result {
if self.result.is_break() {
return Err(fmt::Error);
}
Ok(())
}
/// Calls the output functions with the undecodable fragments.
fn output_as_undecodable(&mut self, len_undecodable: u8) -> fmt::Result {
let len_written = usize::from(len_undecodable);
let frag = core::str::from_utf8(&self.buf[..len_written])
.expect("[validity] `DecoderBuffer` writes a valid ASCII string");
let len_incomplete = len_written % 3;
let len_complete = len_written - len_incomplete;
self.result = (self.f)(PctEncodedFragments::InvalidUtf8PctTriplets(
&frag[..len_complete],
));
self.result_continue_or_err()?;
if len_incomplete > 0 {
// At least the first `%` exists.
self.result = (self.f)(PctEncodedFragments::StrayPercent);
if self.result.is_break() {
return Err(fmt::Error);
}
if len_incomplete > 1 {
// A following hexdigit is available.
debug_assert_eq!(
len_incomplete, 2,
"[consistency] the length of incomplete percent-encoded \
triplet must be less than 2 bytes"
);
self.result = (self.f)(PctEncodedFragments::NoPctStr(
&frag[(len_complete + 1)..len_written],
));
self.result_continue_or_err()?;
}
}
Ok(())
}
}
impl<F, B> fmt::Write for DecomposeWriter<'_, F, B>
where
F: FnMut(PctEncodedFragments<'_>) -> ControlFlow<B>,
{
fn write_str(&mut self, s: &str) -> fmt::Result {
self.result_continue_or_err()?;
let mut rest = s;
while !rest.is_empty() {
let (len_consumed, result) = self.decoder.push_encoded(self.buf, rest);
if len_consumed == 0 {
// `rest` does not start with the percent-encoded triplets.
// Flush the decoder before attempting to decode more data.
if let Some(len_written) = self.decoder.flush(self.buf).map(NonZeroU8::get) {
self.output_as_undecodable(len_written)?;
rest = &rest[usize::from(len_written)..];
}
// Write plain string prefix (if found).
let (plain_prefix, suffix) = find_split(rest, b'%').unwrap_or((rest, ""));
debug_assert!(
!plain_prefix.is_empty(),
"[consistency] `len_consumed == 0` indicates non-empty \
`rest` not starting with `%`"
);
self.result = (self.f)(PctEncodedFragments::NoPctStr(plain_prefix));
self.result_continue_or_err()?;
rest = suffix;
continue;
}
// Process decoding result.
match result {
PushResult::Decoded(len_written, c) => {
let len_written = usize::from(len_written.get());
let frag = core::str::from_utf8(&self.buf[..len_written])
.expect("[validity] `DecoderBuffer` writes a valid ASCII string");
self.result = (self.f)(PctEncodedFragments::Char(frag, c));
self.result_continue_or_err()?;
}
PushResult::Undecodable(len_written) => {
self.output_as_undecodable(len_written)?;
}
PushResult::NeedMoreBytes => {
// Nothing to write at this time.
}
}
rest = &rest[len_consumed..];
}
Ok(())
}
}
/// A type for result of feeding data to [`DecoderBuffer`].
#[derive(Debug, Clone, Copy)]
enum PushResult {
/// Input is still incomplete, needs more bytes to get the decoding result.
NeedMoreBytes,
/// Bytes decodable to valid UTF-8 sequence.
// `.0`: Length of decodable fragment.
// `.1`: Decoded character.
Decoded(NonZeroU8, char),
/// Valid percent-encoded triplets but not decodable to valid UTF-8 sequence.
// `.0`: Length of undecodable fragment.
Undecodable(u8),
}
/// Buffer to contain (and to decode) incomplete percent-encoded triplets.
#[derive(Default, Debug, Clone, Copy)]
struct DecoderBuffer {
/// Percent-encoded triplets that possibly consists a valid UTF-8 sequence after decoded.
//
// `3 * 4`: 3 ASCII characters for single percent-encoded triplet, and
// 4 triplets at most for single Unicode codepoint in UTF-8.
encoded: [u8; 12],
/// Decoded bytes.
decoded: [u8; 4],
/// Number of bytes available in `buf_encoded` buffer.
///
/// `buf_encoded_len / 3` also indicates the length of data in `decoded`.
len_encoded: u8,
}
impl DecoderBuffer {
/// Writes the data of the given length to the destination, and remove that part from buffer.
fn write_and_pop(&mut self, dest: &mut [u8], remove_len: u8) {
let new_len = self.len_encoded - remove_len;
let remove_len = usize::from(remove_len);
let src_range = remove_len..usize::from(self.len_encoded);
dest[..remove_len].copy_from_slice(&self.encoded[..remove_len]);
if new_len == 0 {
*self = Self::default();
return;
}
self.encoded.copy_within(src_range, 0);
self.decoded
.copy_within((remove_len / 3)..usize::from(self.len_encoded / 3), 0);
self.len_encoded = new_len;
}
/// Pushes a byte of a (possible) percent-encoded tripet to the buffer.
fn push_single_encoded_byte(&mut self, byte: u8) {
debug_assert!(
self.len_encoded < 12,
"[consistency] four percent-encoded triplets are enough for a unicode code point"
);
let pos_enc = usize::from(self.len_encoded);
self.len_encoded += 1;
self.encoded[pos_enc] = byte;
if self.len_encoded % 3 == 0 {
// A new percent-encoded triplet is read. Decode and remember.
let pos_dec = usize::from(self.len_encoded / 3 - 1);
let upper = self.encoded[pos_enc - 1];
let lower = byte;
debug_assert!(
upper.is_ascii_hexdigit() && lower.is_ascii_hexdigit(),
"[consistency] the `encoded` buffer should contain valid percent-encoded triplets"
);
self.decoded[pos_dec] = hexdigits_to_byte([upper, lower]);
}
}
/// Pushes the (possibly) encoded string to the buffer.
///
/// When the push result is not `PctTripletPushResult::NeedMoreBytes`, the
/// caller should call `Self::clear()` before pushing more bytes.
///
/// # Preconditions
///
/// * `buf` should be more than 12 bytes. If not, this method may panic.
#[must_use]
pub(crate) fn push_encoded(&mut self, buf: &mut [u8], s: &str) -> (usize, PushResult) {
debug_assert!(
buf.len() >= 12,
"[internal precondition] destination buffer should be at least 12 bytes"
);
let mut chars = s.chars();
let mut len_triplet_incomplete = self.len_encoded % 3;
for c in &mut chars {
if len_triplet_incomplete == 0 {
// Expect `%`.
if c != '%' {
// Undecodable.
// `-1`: the last byte is peeked but not consumed.
let len_consumed = s.len() - chars.as_str().len() - 1;
let len_result = self.len_encoded;
self.write_and_pop(buf, len_result);
return (len_consumed, PushResult::Undecodable(len_result));
}
self.push_single_encoded_byte(b'%');
len_triplet_incomplete = 1;
continue;
}
// Expect a nibble.
if !c.is_ascii_hexdigit() {
// Undecodable.
// `-1`: the last byte is peeked but not consumed.
let len_consumed = s.len() - chars.as_str().len() - 1;
let len_result = self.len_encoded;
self.write_and_pop(buf, len_result);
return (len_consumed, PushResult::Undecodable(len_result));
}
self.push_single_encoded_byte(c as u8);
if len_triplet_incomplete == 1 {
len_triplet_incomplete = 2;
continue;
} else {
// Now a new percent-encoded triplet is read!
debug_assert_eq!(len_triplet_incomplete, 2);
len_triplet_incomplete = 0;
}
// Now a new percent-encoded triplet is read.
// Check if the buffer contains a valid decodable content.
let len_decoded = usize::from(self.len_encoded) / 3;
match core::str::from_utf8(&self.decoded[..len_decoded]) {
Ok(decoded_str) => {
// Successfully decoded.
let len_consumed = s.len() - chars.as_str().len();
let c = decoded_str
.chars()
.next()
.expect("[validity] `decoded` buffer is nonempty");
let len_result = NonZeroU8::new(self.len_encoded).expect(
"[consistency] `encoded` buffer is nonempty since \
`push_single_encoded_byte()` was called",
);
self.write_and_pop(buf, len_result.get());
return (len_consumed, PushResult::Decoded(len_result, c));
}
Err(e) => {
// Undecodable.
assert_eq!(
e.valid_up_to(),
0,
"[consistency] `decoded` buffer contains at most one character"
);
let skip_len_decoded = match e.error_len() {
// Unexpected EOF. Wait for remaining input.
None => continue,
// Skip invalid bytes.
Some(v) => v,
};
let len_consumed = s.len() - chars.as_str().len();
let len_result = skip_len_decoded as u8 * 3;
assert_ne!(
skip_len_decoded, 0,
"[consistency] empty bytes cannot be invalid"
);
self.write_and_pop(buf, len_result);
return (len_consumed, PushResult::Undecodable(len_result));
}
};
}
let len_consumed = s.len() - chars.as_str().len();
(len_consumed, PushResult::NeedMoreBytes)
}
/// Writes the incomplete data completely to the destination, and clears the internal buffer.
#[must_use]
pub(crate) fn flush(&mut self, buf: &mut [u8]) -> Option<NonZeroU8> {
let len_result = NonZeroU8::new(self.len_encoded)?;
// Emit the current (undecodable) buffer as is.
self.write_and_pop(buf, len_result.get());
debug_assert_eq!(
self.len_encoded, 0,
"[consistency] the buffer should be cleared after flushed"
);
Some(len_result)
}
}

476
vendor/iri-string/src/parser/trusted.rs vendored Normal file
View File

@@ -0,0 +1,476 @@
//! Fast parsers for trusted (already validated) input.
//!
//! Using this in wrong way will lead to unexpected wrong result.
pub(crate) mod authority;
use core::cmp::Ordering;
use core::num::NonZeroUsize;
use crate::components::{RiReferenceComponents, Splitter};
use crate::format::eq_str_display;
use crate::normalize::{is_pct_case_normalized, NormalizedAsciiOnlyHost, NormalizednessCheckMode};
use crate::parser::str::{find_split2, find_split3, find_split4_hole, find_split_hole};
use crate::spec::Spec;
use crate::types::RiReferenceStr;
/// Eats a `scheme` and a following colon, and returns the rest and the scheme.
///
/// Returns `(rest, scheme)`.
///
/// This should be called at the head of an absolute IRIs/URIs.
#[must_use]
fn scheme_colon(i: &str) -> (&str, &str) {
let (scheme, rest) =
find_split_hole(i, b':').expect("[precondition] absolute IRIs must have `scheme` part");
(rest, scheme)
}
/// Eats a `scheme` and a following colon if available, and returns the rest and the scheme.
///
/// This should be called at the head of an `IRI-reference` or similar.
#[must_use]
fn scheme_colon_opt(i: &str) -> (&str, Option<&str>) {
match find_split4_hole(i, b':', b'/', b'?', b'#') {
Some((scheme, b':', rest)) => (rest, Some(scheme)),
_ => (i, None),
}
}
/// Eats double slash and the following authority if available, and returns the authority.
///
/// This should be called at the head of an `IRI-reference`, or at the result of `scheme_colon`.
#[must_use]
fn slash_slash_authority_opt(i: &str) -> (&str, Option<&str>) {
let s = match i.strip_prefix("//") {
Some(rest) => rest,
None => return (i, None),
};
// `i` might match `path-abempty` (which can start with `//`), but it is not
// allowed as `relative-part`, so no need to care `path-abempty` rule here.
// A slash, question mark, and hash character won't appear in `authority`.
match find_split3(s, b'/', b'?', b'#') {
Some((authority, rest)) => (rest, Some(authority)),
None => ("", Some(s)),
}
}
/// Eats a string until the query, and returns that part (excluding `?` for the query).
#[must_use]
fn until_query(i: &str) -> (&str, &str) {
// `?` won't appear before the query part.
match find_split2(i, b'?', b'#') {
Some((before_query, rest)) => (rest, before_query),
None => ("", i),
}
}
/// Decomposes query and fragment, if available.
///
/// The string must starts with `?`, or `#`, or be empty.
#[must_use]
fn decompose_query_and_fragment(i: &str) -> (Option<&str>, Option<&str>) {
match i.as_bytes().first().copied() {
None => (None, None),
Some(b'?') => {
let rest = &i[1..];
match find_split_hole(rest, b'#') {
Some((query, fragment)) => (Some(query), Some(fragment)),
None => (Some(rest), None),
}
}
Some(c) => {
debug_assert_eq!(c, b'#');
(None, Some(&i[1..]))
}
}
}
/// Decomposes the given valid `IRI-reference`.
#[must_use]
pub(crate) fn decompose_iri_reference<S: Spec>(
i: &RiReferenceStr<S>,
) -> RiReferenceComponents<'_, S> {
/// Inner function to avoid unnecessary monomorphizations on `S`.
fn decompose(i: &str) -> Splitter {
let len = i.len();
let (i, scheme_end) = {
let (i, scheme) = scheme_colon_opt(i);
let end = scheme.and_then(|s| NonZeroUsize::new(s.len()));
(i, end)
};
let (i, authority_end) = {
// 2: "//".len()
let start = len - i.len() + 2;
// `authority` does not contain the two slashes of `://'.
let (i, authority) = slash_slash_authority_opt(i);
let end = authority.and_then(|s| NonZeroUsize::new(start + s.len()));
(i, end)
};
let (i, _path) = until_query(i);
let (query_start, fragment_start) = {
// This could theoretically be zero if `len` is `usize::MAX` and
// `i` has neither a query nor a fragment. However, this is
// practically impossible.
let after_first_prefix = NonZeroUsize::new((len - i.len()).wrapping_add(1));
let (query, fragment) = decompose_query_and_fragment(i);
match (query.is_some(), fragment) {
(true, Some(fragment)) => {
(after_first_prefix, NonZeroUsize::new(len - fragment.len()))
}
(true, None) => (after_first_prefix, None),
(false, Some(_fragment)) => (None, after_first_prefix),
(false, None) => (None, None),
}
};
Splitter::new(scheme_end, authority_end, query_start, fragment_start)
}
RiReferenceComponents {
iri: i,
splitter: decompose(i.as_str()),
}
}
/// Extracts `scheme` part from an IRI reference.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn extract_scheme(i: &str) -> Option<&str> {
scheme_colon_opt(i).1
}
/// Extracts `scheme` part from an absolute IRI.
///
/// # Precondition
///
/// The given string must be a valid absolute IRI.
#[inline]
#[must_use]
pub(crate) fn extract_scheme_absolute(i: &str) -> &str {
scheme_colon(i).1
}
/// Extracts `authority` part from an IRI reference.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn extract_authority(i: &str) -> Option<&str> {
let (i, _scheme) = scheme_colon_opt(i);
slash_slash_authority_opt(i).1
}
/// Extracts `authority` part from an absolute IRI.
///
/// # Precondition
///
/// The given string must be a valid absolute IRI.
#[inline]
#[must_use]
pub(crate) fn extract_authority_absolute(i: &str) -> Option<&str> {
let (i, _scheme) = scheme_colon(i);
slash_slash_authority_opt(i).1
}
/// Extracts `authority` part from a relative IRI.
///
/// # Precondition
///
/// The given string must be a valid relative IRI.
#[inline]
#[must_use]
pub(crate) fn extract_authority_relative(i: &str) -> Option<&str> {
slash_slash_authority_opt(i).1
}
/// Extracts `path` part from an IRI reference.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn extract_path(i: &str) -> &str {
let (i, _scheme) = scheme_colon_opt(i);
let (i, _authority) = slash_slash_authority_opt(i);
until_query(i).1
}
/// Extracts `path` part from an absolute IRI.
///
/// # Precondition
///
/// The given string must be a valid absolute IRI.
#[inline]
#[must_use]
pub(crate) fn extract_path_absolute(i: &str) -> &str {
let (i, _scheme) = scheme_colon(i);
let (i, _authority) = slash_slash_authority_opt(i);
until_query(i).1
}
/// Extracts `path` part from a relative IRI.
///
/// # Precondition
///
/// The given string must be a valid relative IRI.
#[inline]
#[must_use]
pub(crate) fn extract_path_relative(i: &str) -> &str {
let (i, _authority) = slash_slash_authority_opt(i);
until_query(i).1
}
/// Extracts `query` part from an IRI reference.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn extract_query(i: &str) -> Option<&str> {
let (i, _before_query) = until_query(i);
decompose_query_and_fragment(i).0
}
/// Extracts `query` part from an `absolute-IRI` string.
///
/// # Precondition
///
/// The given string must be a valid `absolute-IRI` string.
#[must_use]
pub(crate) fn extract_query_absolute_iri(i: &str) -> Option<&str> {
let (i, _before_query) = until_query(i);
if i.is_empty() {
None
} else {
debug_assert_eq!(
i.as_bytes().first(),
Some(&b'?'),
"`absolute-IRI` string must not have `fragment part"
);
Some(&i[1..])
}
}
/// Splits an IRI string into the prefix and the fragment part.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn split_fragment(iri: &str) -> (&str, Option<&str>) {
// It is completely OK to find the first `#` character from valid IRI to get fragment part,
// because the spec says that there are no `#` characters before the fragment part.
//
// > ```
// > scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
// > ```
// >
// > --- [RFC 3986, section 3.1. Scheme](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.1)
//
// > The authority component is preceded by a double slash ("//") and is terminated by the
// > next slash ("/"), question mark ("?"), or number sign ("#") character, or by the end
// > of the URI.
// >
// > --- [RFC 3986, section 3.2. Authority](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2)
//
// > The path is terminated by the first question mark ("?") or number sign ("#")
// > character, or by the end of the URI.
// >
// > --- [RFC 3986, section 3.3. Path](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3)
//
// > The query component is indicated by the first question mark ("?") character and
// > terminated by a number sign ("#") character or by the end of the URI.
// >
// > --- [RFC 3986, section 3.4. Query](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.4)
match find_split_hole(iri, b'#') {
Some((prefix, fragment)) => (prefix, Some(fragment)),
None => (iri, None),
}
}
/// Returns the fragment part of the given IRI.
///
/// A leading `#` character of the fragment is truncated.
#[inline]
#[must_use]
pub(crate) fn extract_fragment(iri: &str) -> Option<&str> {
split_fragment(iri).1
}
/// Returns `Ok(_)` if the string is normalized.
///
/// If this function returns `true`, normalization input and output will be identical.
///
/// In this function, "normalized" means that any of the normalization below
/// won't change the input on normalization:
///
/// * syntax-based normalization,
/// * case normalization,
/// * percent-encoding normalization, and
/// * path segment normalizaiton.
///
/// Note that scheme-based normalization is not considered.
#[must_use]
pub(crate) fn is_normalized<S: Spec>(i: &str, mode: NormalizednessCheckMode) -> bool {
let (i, scheme) = scheme_colon(i);
let (after_authority, authority) = slash_slash_authority_opt(i);
let (_after_path, path) = until_query(after_authority);
// Syntax-based normalization: uppercase chars in `scheme` should be
// converted to lowercase.
if scheme.bytes().any(|b| b.is_ascii_uppercase()) {
return false;
}
// Case normalization: ASCII alphabets in US-ASCII only `host` should be
// normalized to lowercase.
// Case normalization: ASCII alphabets in percent-encoding triplet should be
// normalized to uppercase.
// Percent-encoding normalization: unresreved characters should be decoded
// in `userinfo`, `host`, `path`, `query`, and `fragments`.
// Path segment normalization: the path should not have dot segments (`.`
// and/or `..`).
//
// Note that `authority` can have percent-encoded `userinfo`.
if let Some(authority) = authority {
let authority_components = authority::decompose_authority(authority);
// Check `host`.
let host = authority_components.host();
let host_is_normalized = if is_ascii_only_host(host) {
eq_str_display(host, &NormalizedAsciiOnlyHost::new(host))
} else {
// If the host is not ASCII-only, conversion to lowercase is not performed.
is_pct_case_normalized::<S>(host)
};
if !host_is_normalized {
return false;
}
// Check pencent encodings in `userinfo`.
if let Some(userinfo) = authority_components.userinfo() {
if !is_pct_case_normalized::<S>(userinfo) {
return false;
}
}
}
// Check `path`.
//
// Syntax-based normalization: Dot segments might be removed.
// Note that we don't have to care `%2e` and `%2E` since `.` is unreserved
// and they will be decoded if not normalized.
// Also note that WHATWG serialization will use `/.//` as a path prefix if
// the path is absolute and won't modify the path if the path is relative.
//
// Percent-encoding normalization: unresreved characters should be decoded
// in `path`, `query`, and `fragments`.
let path_span_no_dot_segments = if authority.is_some() {
Some(path)
} else {
match mode {
NormalizednessCheckMode::Default => Some(path.strip_prefix("/.//").unwrap_or(path)),
NormalizednessCheckMode::Rfc3986 => Some(path),
NormalizednessCheckMode::PreserveAuthoritylessRelativePath => {
if path.starts_with('/') {
// Absolute.
Some(path.strip_prefix("/.//").unwrap_or(path))
} else {
// Relative. Treat the path as "opaque". No span to check.
None
}
}
}
};
if let Some(path_span_no_dot_segments) = path_span_no_dot_segments {
if path_span_no_dot_segments
.split('/')
.any(|segment| matches!(segment, "." | ".."))
{
return false;
}
}
is_pct_case_normalized::<S>(after_authority)
}
/// Decodes two hexdigits into a byte.
///
/// # Preconditions
///
/// The parameters `upper` and `lower` should be an ASCII hexadecimal digit.
#[must_use]
pub(super) fn hexdigits_to_byte([upper, lower]: [u8; 2]) -> u8 {
let i_upper = match (upper & 0xf0).cmp(&0x40) {
Ordering::Less => upper - b'0',
Ordering::Equal => upper - (b'A' - 10),
Ordering::Greater => upper - (b'a' - 10),
};
let i_lower = match (lower & 0xf0).cmp(&0x40) {
Ordering::Less => lower - b'0',
Ordering::Equal => lower - (b'A' - 10),
Ordering::Greater => lower - (b'a' - 10),
};
(i_upper << 4) + i_lower
}
/// Converts the first two hexdigit bytes in the buffer into a byte.
///
/// # Panics
///
/// Panics if the string does not start with two hexdigits.
#[must_use]
pub(crate) fn take_xdigits2(s: &str) -> (u8, &str) {
let mut bytes = s.bytes();
let upper_xdigit = bytes
.next()
.expect("[validity] at least two bytes should follow the `%` in a valid IRI reference");
let lower_xdigit = bytes
.next()
.expect("[validity] at least two bytes should follow the `%` in a valid IRI reference");
let v = hexdigits_to_byte([upper_xdigit, lower_xdigit]);
(v, &s[2..])
}
/// Returns true if the given `host`/`ihost` string consists of only US-ASCII characters.
///
/// # Precondition
///
/// The given string should be valid `host` or `host ":" port` string.
#[must_use]
pub(crate) fn is_ascii_only_host(mut host: &str) -> bool {
while let Some((i, c)) = host
.char_indices()
.find(|(_i, c)| !c.is_ascii() || *c == '%')
{
if c != '%' {
// Non-ASCII character found.
debug_assert!(!c.is_ascii());
return false;
}
// Percent-encoded character found.
let after_pct = &host[(i + 1)..];
let (byte, rest) = take_xdigits2(after_pct);
if !byte.is_ascii() {
return false;
}
host = rest;
}
// Neither non-ASCII characters nor percent-encoded characters found.
true
}

View File

@@ -0,0 +1,32 @@
//! Parsers for trusted `authority` string.
use crate::components::AuthorityComponents;
use crate::parser::str::{find_split_hole, rfind_split2};
/// Decomposes the authority into `(userinfo, host, port)`.
///
/// The leading `:` is truncated.
///
/// # Precondition
///
/// The given string must be a valid IRI reference.
#[inline]
#[must_use]
pub(crate) fn decompose_authority(authority: &str) -> AuthorityComponents<'_> {
let i = authority;
let (i, host_start) = match find_split_hole(i, b'@') {
Some((userinfo, rest)) => (rest, userinfo.len() + 1),
None => (authority, 0),
};
let colon_port_len = match rfind_split2(i, b':', b']') {
Some((_, suffix)) if suffix.starts_with(':') => suffix.len(),
_ => 0,
};
let host_end = authority.len() - colon_port_len;
AuthorityComponents {
authority,
host_start,
host_end,
}
}

223
vendor/iri-string/src/parser/validate.rs vendored Normal file
View File

@@ -0,0 +1,223 @@
//! Validating parsers for non-trusted (possibly invalid) input.
mod authority;
mod path;
use crate::parser::char;
use crate::parser::str::{
find_split, find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded,
};
use crate::spec::Spec;
use crate::validate::{Error, ErrorKind};
pub(crate) use self::authority::{validate_authority, validate_host, validate_userinfo};
pub(crate) use self::path::{validate_path, validate_path_segment};
use self::path::{
validate_path_abempty, validate_path_absolute_authority_absent,
validate_path_relative_authority_absent,
};
/// Returns `Ok(_)` if the string matches `scheme`.
pub(crate) fn validate_scheme(i: &str) -> Result<(), Error> {
let bytes = i.as_bytes();
if !i.is_empty()
&& bytes[0].is_ascii_alphabetic()
&& bytes[1..]
.iter()
.all(|&b| b.is_ascii() && char::is_ascii_scheme_continue(b))
{
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidScheme))
}
}
/// Returns `Ok(_)` if the string matches `query` or `iquery`.
pub(crate) fn validate_query<S: Spec>(i: &str) -> Result<(), Error> {
let is_valid =
satisfy_chars_with_pct_encoded(i, char::is_ascii_frag_query, char::is_nonascii_query::<S>);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidQuery))
}
}
/// Returns `Ok(_)` if the string matches `authority path-abempty` rule sequence.
fn validate_authority_path_abempty<S: Spec>(i: &str) -> Result<(), Error> {
let (maybe_authority, maybe_path) = match find_split(i, b'/') {
Some(v) => v,
None => (i, ""),
};
validate_authority::<S>(maybe_authority)?;
validate_path_abempty::<S>(maybe_path)
}
/// Returns `Ok(_)` if the string matches `URI`/`IRI` rules.
#[inline]
pub(crate) fn validate_uri<S: Spec>(i: &str) -> Result<(), Error> {
validate_uri_reference_common::<S>(i, UriReferenceRule::Absolute)
}
/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules.
#[inline]
pub(crate) fn validate_uri_reference<S: Spec>(i: &str) -> Result<(), Error> {
validate_uri_reference_common::<S>(i, UriReferenceRule::Any)
}
/// Returns `Ok(_)` if the string matches `absolute-URI`/`absolute-IRI` rules.
#[inline]
pub(crate) fn validate_absolute_uri<S: Spec>(i: &str) -> Result<(), Error> {
validate_uri_reference_common::<S>(i, UriReferenceRule::AbsoluteWithoutFragment)
}
/// Syntax rule for URI/IRI references.
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
enum UriReferenceRule {
/// `URI` and `IRI`.
///
/// This can have a fragment.
Absolute,
/// `absolute-URI` and `absolute-IRI`.
///
/// This cannot have a fragment.
AbsoluteWithoutFragment,
/// `URI-reference` and `IRI-reference`.
///
/// This can be relative.
Any,
}
impl UriReferenceRule {
/// Returns `true` is the relative reference is allowed.
#[inline]
#[must_use]
fn is_relative_allowed(self) -> bool {
self == Self::Any
}
/// Returns `true` is the fragment part is allowed.
#[inline]
#[must_use]
fn is_fragment_allowed(self) -> bool {
matches!(self, Self::Absolute | Self::Any)
}
}
/// Returns `Ok(_)` if the string matches `URI-reference`/`IRI-reference` rules.
fn validate_uri_reference_common<S: Spec>(
i: &str,
ref_rule: UriReferenceRule,
) -> Result<(), Error> {
// Validate `scheme ":"`.
let (i, _scheme) = match find_split_hole(i, b':') {
None => {
if ref_rule.is_relative_allowed() {
return validate_relative_ref::<S>(i);
} else {
return Err(Error::with_kind(ErrorKind::UnexpectedRelative));
}
}
Some((maybe_scheme, rest)) => {
if validate_scheme(maybe_scheme).is_err() {
// The string before the first colon is not a scheme.
// Falling back to `relative-ref` parsing.
if ref_rule.is_relative_allowed() {
return validate_relative_ref::<S>(i);
} else {
return Err(Error::with_kind(ErrorKind::InvalidScheme));
}
}
(rest, maybe_scheme)
}
};
// Validate `hier-part`.
let after_path = match i.strip_prefix("//") {
Some(i) => {
let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') {
Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))),
None => (i, None),
};
validate_authority_path_abempty::<S>(maybe_authority_path)?;
after_path
}
None => {
let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') {
Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))),
None => (i, None),
};
// Authority is absent.
validate_path_absolute_authority_absent::<S>(maybe_path)?;
after_path
}
};
// Validate `[ "?" query ] [ "#" fragment ]`.
if let Some((first, rest)) = after_path {
validate_after_path::<S>(first, rest, ref_rule.is_fragment_allowed())?;
}
Ok(())
}
/// Returns `Ok(_)` if the string matches `relative-ref`/`irelative-ref` rules.
pub(crate) fn validate_relative_ref<S: Spec>(i: &str) -> Result<(), Error> {
// Validate `relative-part`.
let after_path = match i.strip_prefix("//") {
Some(i) => {
let (maybe_authority_path, after_path) = match find_split2_hole(i, b'?', b'#') {
Some((maybe_authority_path, c, rest)) => (maybe_authority_path, Some((c, rest))),
None => (i, None),
};
validate_authority_path_abempty::<S>(maybe_authority_path)?;
after_path
}
None => {
let (maybe_path, after_path) = match find_split2_hole(i, b'?', b'#') {
Some((maybe_path, c, rest)) => (maybe_path, Some((c, rest))),
None => (i, None),
};
// Authority is absent.
validate_path_relative_authority_absent::<S>(maybe_path)?;
after_path
}
};
// Validate `[ "?" query ] [ "#" fragment ]`.
if let Some((first, rest)) = after_path {
validate_after_path::<S>(first, rest, true)?;
}
Ok(())
}
/// Returns `Ok(_)` if the string matches `[ "?" query ] [ "#" fragment ]` (or IRI version).
fn validate_after_path<S: Spec>(first: u8, rest: &str, accept_fragment: bool) -> Result<(), Error> {
let (maybe_query, maybe_fragment) = if first == b'?' {
match find_split_hole(rest, b'#') {
Some(v) => v,
None => (rest, ""),
}
} else {
debug_assert_eq!(first, b'#');
("", rest)
};
validate_query::<S>(maybe_query)?;
if !accept_fragment && !maybe_fragment.is_empty() {
return Err(Error::with_kind(ErrorKind::UnexpectedFragment));
}
validate_fragment::<S>(maybe_fragment)
}
/// Returns `Ok(_)` if the string matches `fragment`/`ifragment` rules.
pub(crate) fn validate_fragment<S: Spec>(i: &str) -> Result<(), Error> {
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_frag_query,
char::is_nonascii_fragment::<S>,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidFragment))
}
}

View File

@@ -0,0 +1,302 @@
//! Parsers for authority.
use core::mem;
use crate::parser::char;
use crate::parser::str::{
find_split_hole, get_wrapped_inner, rfind_split_hole, satisfy_chars_with_pct_encoded,
strip_ascii_char_prefix,
};
use crate::spec::Spec;
use crate::validate::{Error, ErrorKind};
/// Returns `Ok(_)` if the string matches `userinfo` or `iuserinfo`.
pub(crate) fn validate_userinfo<S: Spec>(i: &str) -> Result<(), Error> {
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_userinfo_ipvfutureaddr,
char::is_nonascii_userinfo::<S>,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidUserInfo))
}
}
/// Returns `true` if the string matches `dec-octet`.
///
/// In other words, this tests whether the string is decimal "0" to "255".
#[must_use]
fn is_dec_octet(i: &str) -> bool {
matches!(
i.as_bytes(),
[b'0'..=b'9']
| [b'1'..=b'9', b'0'..=b'9']
| [b'1', b'0'..=b'9', b'0'..=b'9']
| [b'2', b'0'..=b'4', b'0'..=b'9']
| [b'2', b'5', b'0'..=b'5']
)
}
/// Returns `Ok(_)` if the string matches `IPv4address`.
fn validate_ipv4address(i: &str) -> Result<(), Error> {
/// Returns `Ok(_)` if the string matches `IPv4address`, or `Err(())` if not.
fn validate_ipv4address_impl(i: &str) -> Result<(), ()> {
let (first, rest) = find_split_hole(i, b'.').ok_or(())?;
if !is_dec_octet(first) {
return Err(());
}
let (second, rest) = find_split_hole(rest, b'.').ok_or(())?;
if !is_dec_octet(second) {
return Err(());
}
let (third, fourth) = find_split_hole(rest, b'.').ok_or(())?;
if is_dec_octet(third) && is_dec_octet(fourth) {
Ok(())
} else {
Err(())
}
}
validate_ipv4address_impl(i).map_err(|_| Error::with_kind(ErrorKind::InvalidHost))
}
/// A part of IPv6 addr.
#[derive(Clone, Copy)]
enum V6AddrPart {
/// `[0-9a-fA-F]{1,4}::`.
H16Omit,
/// `[0-9a-fA-F]{1,4}:`.
H16Cont,
/// `[0-9a-fA-F]{1,4}`.
H16End,
/// IPv4 address.
V4,
/// `::`.
Omit,
}
/// Splits the IPv6 address string into the next component and the rest substring.
fn split_v6_addr_part(i: &str) -> Result<(&str, V6AddrPart), Error> {
debug_assert!(!i.is_empty());
match find_split_hole(i, b':') {
Some((prefix, rest)) => {
if prefix.len() >= 5 {
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
if prefix.is_empty() {
return match strip_ascii_char_prefix(rest, b':') {
Some(rest) => Ok((rest, V6AddrPart::Omit)),
None => Err(Error::with_kind(ErrorKind::InvalidHost)),
};
}
// Should be `h16`.
debug_assert!((1..=4).contains(&prefix.len()));
if !prefix.bytes().all(|b| b.is_ascii_hexdigit()) {
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
match strip_ascii_char_prefix(rest, b':') {
Some(rest) => Ok((rest, V6AddrPart::H16Omit)),
None => Ok((rest, V6AddrPart::H16Cont)),
}
}
None => {
if i.len() >= 5 {
// Possibly `IPv4address`.
validate_ipv4address(i)?;
return Ok(("", V6AddrPart::V4));
}
if i.bytes().all(|b| b.is_ascii_hexdigit()) {
Ok(("", V6AddrPart::H16End))
} else {
Err(Error::with_kind(ErrorKind::InvalidHost))
}
}
}
}
/// Returns `Ok(_)` if the string matches `IPv6address`.
fn validate_ipv6address(mut i: &str) -> Result<(), Error> {
let mut h16_count = 0;
let mut is_omitted = false;
while !i.is_empty() {
let (rest, part) = split_v6_addr_part(i)?;
match part {
V6AddrPart::H16Omit => {
h16_count += 1;
if mem::replace(&mut is_omitted, true) {
// Omitted twice.
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
}
V6AddrPart::H16Cont => {
h16_count += 1;
if rest.is_empty() {
// `H16Cont` cannot be the last part of an IPv6 address.
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
}
V6AddrPart::H16End => {
h16_count += 1;
break;
}
V6AddrPart::V4 => {
debug_assert!(rest.is_empty());
h16_count += 2;
break;
}
V6AddrPart::Omit => {
if mem::replace(&mut is_omitted, true) {
// Omitted twice.
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
}
}
if h16_count > 8 {
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
i = rest;
}
let is_valid = if is_omitted {
h16_count < 8
} else {
h16_count == 8
};
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidHost))
}
}
/// Returns `Ok(_)` if the string matches `authority` or `iauthority`.
pub(crate) fn validate_authority<S: Spec>(i: &str) -> Result<(), Error> {
// Strip and validate `userinfo`.
let (i, _userinfo) = match find_split_hole(i, b'@') {
Some((maybe_userinfo, i)) => {
validate_userinfo::<S>(maybe_userinfo)?;
(i, Some(maybe_userinfo))
}
None => (i, None),
};
// `host` can contain colons, but `port` cannot.
// Strip and validate `port`.
let (maybe_host, _port) = match rfind_split_hole(i, b':') {
Some((maybe_host, maybe_port)) => {
if maybe_port.bytes().all(|b| b.is_ascii_digit()) {
(maybe_host, Some(maybe_port))
} else {
(i, None)
}
}
None => (i, None),
};
// Validate `host`.
validate_host::<S>(maybe_host)
}
/// Validates `host`.
pub(crate) fn validate_host<S: Spec>(i: &str) -> Result<(), Error> {
match get_wrapped_inner(i, b'[', b']') {
Some(maybe_addr) => {
// `IP-literal`.
// Note that `v` here is case insensitive. See RFC 3987 section 3.2.2.
if let Some(maybe_addr_rest) = strip_ascii_char_prefix(maybe_addr, b'v')
.or_else(|| strip_ascii_char_prefix(maybe_addr, b'V'))
{
// `IPvFuture`.
let (maybe_ver, maybe_addr) = find_split_hole(maybe_addr_rest, b'.')
.ok_or(Error::with_kind(ErrorKind::InvalidHost))?;
// Validate version.
if maybe_ver.is_empty() || !maybe_ver.bytes().all(|b| b.is_ascii_hexdigit()) {
return Err(Error::with_kind(ErrorKind::InvalidHost));
}
// Validate address.
if !maybe_addr.is_empty()
&& maybe_addr.is_ascii()
&& maybe_addr
.bytes()
.all(char::is_ascii_userinfo_ipvfutureaddr)
{
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidHost))
}
} else {
// `IPv6address`.
validate_ipv6address(maybe_addr)
}
}
None => {
// `IPv4address` or `reg-name`. No need to distinguish them here
// because `IPv4address` is also syntactically valid as `reg-name`.
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_regname,
char::is_nonascii_regname::<S>,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidHost))
}
}
}
}
#[cfg(test)]
#[cfg(feature = "alloc")]
mod tests {
use super::*;
use alloc::format;
macro_rules! assert_validate {
($parser:expr, $($input:expr),* $(,)?) => {{
$({
let input = $input;
let input: &str = input.as_ref();
assert!($parser(input).is_ok(), "input={:?}", input);
})*
}};
}
#[test]
fn test_ipv6address() {
use core::cmp::Ordering;
assert_validate!(validate_ipv6address, "a:bB:cCc:dDdD:e:F:a:B");
assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1:1");
assert_validate!(validate_ipv6address, "1:1:1:1:1:1:1.1.1.1");
assert_validate!(validate_ipv6address, "2001:db8::7");
// Generate IPv6 addresses with `::`.
let make_sub = |n: usize| {
let mut s = "1:".repeat(n);
s.pop();
s
};
for len_pref in 0..=7 {
let prefix = make_sub(len_pref);
for len_suf in 1..=(7 - len_pref) {
assert_validate!(
validate_ipv6address,
&format!("{}::{}", prefix, make_sub(len_suf))
);
match len_suf.cmp(&2) {
Ordering::Greater => assert_validate!(
validate_ipv6address,
&format!("{}::{}:1.1.1.1", prefix, make_sub(len_suf - 2))
),
Ordering::Equal => {
assert_validate!(validate_ipv6address, &format!("{}::1.1.1.1", prefix))
}
Ordering::Less => {}
}
}
}
}
}

View File

@@ -0,0 +1,99 @@
//! Parsers for path.
use crate::parser::char;
use crate::parser::str::{find_split2_hole, satisfy_chars_with_pct_encoded};
use crate::spec::Spec;
use crate::validate::{Error, ErrorKind};
/// Returns `Ok(_)` if the string matches `path-abempty` or `ipath-abempty`.
pub(super) fn validate_path_abempty<S: Spec>(i: &str) -> Result<(), Error> {
if i.is_empty() {
return Ok(());
}
let i = match i.strip_prefix('/') {
Some(rest) => rest,
None => return Err(Error::with_kind(ErrorKind::InvalidPath)),
};
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_pchar_slash,
S::is_nonascii_char_unreserved,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPath))
}
}
/// Returns `Ok(_)` if the string matches `hier-part` or `ihier-part` modulo
/// `"//" authority path-abempty`.
pub(super) fn validate_path_absolute_authority_absent<S: Spec>(i: &str) -> Result<(), Error> {
if i.is_empty() {
// `path-empty`.
return Ok(());
}
if i.starts_with("//") {
unreachable!("this case should be handled by the caller");
}
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_pchar_slash,
S::is_nonascii_char_unreserved,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPath))
}
}
/// Returns `Ok(_)` if the string matches `relative-part` or `irelative-part` modulo
/// `"//" authority path-abempty`.
pub(super) fn validate_path_relative_authority_absent<S: Spec>(i: &str) -> Result<(), Error> {
if i.starts_with("//") {
unreachable!("this case should be handled by the caller");
}
let is_valid = match find_split2_hole(i, b'/', b':') {
Some((_, b'/', _)) | None => satisfy_chars_with_pct_encoded(
i,
char::is_ascii_pchar_slash,
S::is_nonascii_char_unreserved,
),
Some((_, c, _)) => {
debug_assert_eq!(c, b':');
// `foo:bar`-style. This does not match `path-noscheme`.
return Err(Error::with_kind(ErrorKind::InvalidPath));
}
};
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPath))
}
}
/// Returns `Ok(_)` if the string matches `path`/`ipath` rules.
pub(crate) fn validate_path<S: Spec>(i: &str) -> Result<(), Error> {
let is_valid = satisfy_chars_with_pct_encoded(
i,
char::is_ascii_pchar_slash,
S::is_nonascii_char_unreserved,
);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPath))
}
}
/// Returns `Ok(_)` if the string matches `segment`/`isegment` rules.
pub(crate) fn validate_path_segment<S: Spec>(i: &str) -> Result<(), Error> {
let is_valid =
satisfy_chars_with_pct_encoded(i, char::is_ascii_pchar, S::is_nonascii_char_unreserved);
if is_valid {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPath))
}
}

378
vendor/iri-string/src/percent_encode.rs vendored Normal file
View File

@@ -0,0 +1,378 @@
//! Percent encoding.
use core::fmt::{self, Write as _};
use core::marker::PhantomData;
use crate::parser::char;
use crate::spec::{IriSpec, Spec, UriSpec};
/// A proxy to percent-encode a string as a part of URI.
pub type PercentEncodedForUri<T> = PercentEncoded<T, UriSpec>;
/// A proxy to percent-encode a string as a part of IRI.
pub type PercentEncodedForIri<T> = PercentEncoded<T, IriSpec>;
/// Context for percent encoding.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[non_exhaustive]
enum Context {
/// Encode the string as a reg-name (usually called as "hostname").
RegName,
/// Encode the string as a user name or a password (inside the `userinfo` component).
UserOrPassword,
/// Encode the string as a path segment.
///
/// A slash (`/`) will be encoded to `%2F`.
PathSegment,
/// Encode the string as path segments joined with `/`.
///
/// A slash (`/`) will be used as is.
Path,
/// Encode the string as a query string (without the `?` prefix).
Query,
/// Encode the string as a fragment string (without the `#` prefix).
Fragment,
/// Encode all characters except for `unreserved` characters.
Unreserve,
/// Encode characters only if they cannot appear anywhere in an IRI reference.
///
/// `%` character will be always encoded.
Character,
}
/// A proxy to percent-encode a string.
///
/// Type aliases [`PercentEncodedForIri`] and [`PercentEncodedForUri`] are provided.
/// You can use them to make the expression simpler, for example write
/// `PercentEncodedForUri::from_path(foo)` instead of
/// `PercentEncoded::<_, UriSpec>::from_path(foo)`.
#[derive(Debug, Clone, Copy)]
pub struct PercentEncoded<T, S> {
/// Source string context.
context: Context,
/// Raw string before being encoded.
raw: T,
/// Spec.
_spec: PhantomData<fn() -> S>,
}
impl<T: fmt::Display, S: Spec> PercentEncoded<T, S> {
/// Creates an encoded string from a raw reg-name (i.e. hostname or domain).
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "alpha.\u{03B1}.example.com";
/// let encoded = "alpha.%CE%B1.example.com";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_reg_name(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_reg_name(raw: T) -> Self {
Self {
context: Context::RegName,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw user name (inside `userinfo` component).
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "user:\u{03B1}";
/// // The first `:` will be interpreted as a delimiter, so colons will be escaped.
/// let encoded = "user%3A%CE%B1";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_user(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_user(raw: T) -> Self {
Self {
context: Context::UserOrPassword,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw user name (inside `userinfo` component).
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "password:\u{03B1}";
/// // The first `:` will be interpreted as a delimiter, and the colon
/// // inside the password will be the first one if the user name is empty,
/// // so colons will be escaped.
/// let encoded = "password%3A%CE%B1";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_password(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_password(raw: T) -> Self {
Self {
context: Context::UserOrPassword,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw path segment.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "alpha/\u{03B1}?#";
/// // Note that `/` is encoded to `%2F`.
/// let encoded = "alpha%2F%CE%B1%3F%23";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_path_segment(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_path_segment(raw: T) -> Self {
Self {
context: Context::PathSegment,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw path.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "alpha/\u{03B1}?#";
/// // Note that `/` is NOT percent encoded.
/// let encoded = "alpha/%CE%B1%3F%23";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_path(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_path(raw: T) -> Self {
Self {
context: Context::Path,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw query.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "alpha/\u{03B1}?#";
/// let encoded = "alpha/%CE%B1?%23";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_query(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_query(raw: T) -> Self {
Self {
context: Context::Query,
raw,
_spec: PhantomData,
}
}
/// Creates an encoded string from a raw fragment.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let raw = "alpha/\u{03B1}?#";
/// let encoded = "alpha/%CE%B1?%23";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::from_fragment(raw).to_string(),
/// encoded
/// );
/// # }
/// ```
pub fn from_fragment(raw: T) -> Self {
Self {
context: Context::Fragment,
raw,
_spec: PhantomData,
}
}
/// Creates a string consists of only `unreserved` string and percent-encoded triplets.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let unreserved = "%a0-._~\u{03B1}";
/// let unreserved_encoded = "%25a0-._~%CE%B1";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::unreserve(unreserved).to_string(),
/// unreserved_encoded
/// );
///
/// let reserved = ":/?#[]@ !$&'()*+,;=";
/// let reserved_encoded =
/// "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::unreserve(reserved).to_string(),
/// reserved_encoded
/// );
/// # }
/// ```
#[inline]
#[must_use]
pub fn unreserve(raw: T) -> Self {
Self {
context: Context::Unreserve,
raw,
_spec: PhantomData,
}
}
/// Percent-encodes characters only if they cannot appear anywhere in an IRI reference.
///
/// `%` character will be always encoded. In other words, this conversion
/// is not aware of percent-encoded triplets.
///
/// Note that this encoding process does not guarantee that the resulting
/// string is a valid IRI reference.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")] {
/// use iri_string::percent_encode::PercentEncoded;
/// use iri_string::spec::UriSpec;
///
/// let unreserved = "%a0-._~\u{03B1}";
/// let unreserved_encoded = "%25a0-._~%CE%B1";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::characters(unreserved).to_string(),
/// unreserved_encoded
/// );
///
/// let reserved = ":/?#[]@ !$&'()*+,;=";
/// // Note that `%20` cannot appear directly in an IRI reference.
/// let expected = ":/?#[]@%20!$&'()*+,;=";
/// assert_eq!(
/// PercentEncoded::<_, UriSpec>::characters(reserved).to_string(),
/// expected
/// );
/// # }
/// ```
#[inline]
#[must_use]
pub fn characters(raw: T) -> Self {
Self {
context: Context::Character,
raw,
_spec: PhantomData,
}
}
}
impl<T: fmt::Display, S: Spec> fmt::Display for PercentEncoded<T, S> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
/// Filter that encodes a character before written if necessary.
struct Filter<'a, 'b, S> {
/// Encoding context.
context: Context,
/// Writer.
writer: &'a mut fmt::Formatter<'b>,
/// Spec.
_spec: PhantomData<fn() -> S>,
}
impl<S: Spec> fmt::Write for Filter<'_, '_, S> {
fn write_str(&mut self, s: &str) -> fmt::Result {
s.chars().try_for_each(|c| self.write_char(c))
}
fn write_char(&mut self, c: char) -> fmt::Result {
let is_valid_char = match (self.context, c.is_ascii()) {
(Context::RegName, true) => char::is_ascii_regname(c as u8),
(Context::RegName, false) => char::is_nonascii_regname::<S>(c),
(Context::UserOrPassword, true) => {
c != ':' && char::is_ascii_userinfo_ipvfutureaddr(c as u8)
}
(Context::UserOrPassword, false) => char::is_nonascii_userinfo::<S>(c),
(Context::PathSegment, true) => char::is_ascii_pchar(c as u8),
(Context::PathSegment, false) => S::is_nonascii_char_unreserved(c),
(Context::Path, true) => c == '/' || char::is_ascii_pchar(c as u8),
(Context::Path, false) => S::is_nonascii_char_unreserved(c),
(Context::Query, true) => c == '/' || char::is_ascii_frag_query(c as u8),
(Context::Query, false) => char::is_nonascii_query::<S>(c),
(Context::Fragment, true) => c == '/' || char::is_ascii_frag_query(c as u8),
(Context::Fragment, false) => char::is_nonascii_fragment::<S>(c),
(Context::Unreserve, true) => char::is_ascii_unreserved(c as u8),
(Context::Unreserve, false) => S::is_nonascii_char_unreserved(c),
(Context::Character, true) => char::is_ascii_unreserved_or_reserved(c as u8),
(Context::Character, false) => {
S::is_nonascii_char_unreserved(c) || S::is_nonascii_char_private(c)
}
};
if is_valid_char {
self.writer.write_char(c)
} else {
write_pct_encoded_char(&mut self.writer, c)
}
}
}
let mut filter = Filter {
context: self.context,
writer: f,
_spec: PhantomData::<fn() -> S>,
};
write!(filter, "{}", self.raw)
}
}
/// Percent-encodes the given character and writes it.
#[inline]
fn write_pct_encoded_char<W: fmt::Write>(writer: &mut W, c: char) -> fmt::Result {
let mut buf = [0_u8; 4];
let buf = c.encode_utf8(&mut buf);
buf.bytes().try_for_each(|b| write!(writer, "%{:02X}", b))
}

55
vendor/iri-string/src/raw.rs vendored Normal file
View File

@@ -0,0 +1,55 @@
//! Raw IRI strings manipulation.
//!
//! Note that functions in this module may operates on raw `&str` types.
//! It is caller's responsilibility to guarantee that the given string satisfies the precondition.
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
#[cfg(feature = "alloc")]
use crate::parser::trusted as trusted_parser;
/// Sets the fragment part to the given string.
///
/// Removes fragment part (and following `#` character) if `None` is given.
#[cfg(feature = "alloc")]
pub(crate) fn set_fragment(s: &mut String, fragment: Option<&str>) {
remove_fragment(s);
if let Some(fragment) = fragment {
s.reserve(fragment.len() + 1);
s.push('#');
s.push_str(fragment);
}
}
/// Removes the fragment part from the string.
#[cfg(feature = "alloc")]
#[inline]
pub(crate) fn remove_fragment(s: &mut String) {
if let Some(colon_pos) = s.find('#') {
s.truncate(colon_pos);
}
}
/// Splits the string into the prefix and the fragment part.
///
/// A leading `#` character is truncated if the fragment part exists.
#[cfg(feature = "alloc")]
pub(crate) fn split_fragment_owned(mut s: String) -> (String, Option<String>) {
let prefix_len = match trusted_parser::split_fragment(&s) {
(_, None) => return (s, None),
(prefix, Some(_fragment)) => prefix.len(),
};
// `+ 1` is for leading `#` character.
let fragment = s.split_off(prefix_len + 1);
// Current `s` contains a trailing `#` character, which should be removed.
{
// Remove a trailing `#`.
let hash = s.pop();
assert_eq!(hash, Some('#'));
}
assert_eq!(s.len(), prefix_len);
(s, Some(fragment))
}

344
vendor/iri-string/src/resolve.rs vendored Normal file
View File

@@ -0,0 +1,344 @@
//! URI and IRI resolvers.
//!
//! # IRI resolution can fail without WHATWG URL Standard serialization
//!
//! ## Pure RFC 3986 algorithm
//!
//! Though this is not explicitly stated in RFC 3986, IRI resolution can fail.
//! Below are examples:
//!
//! * base=`scheme:`, ref=`.///bar`.
//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//! * base=`scheme:foo`, ref=`.///bar`.
//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//! * base=`scheme:`, ref=`/..//baz`.
//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//! * base=`scheme:foo/bar`, ref=`..//baz`.
//! + Resulting IRI should have scheme `scheme` and path `//bar`, but does not have authority.
//!
//! IRI without authority (note that this is different from "with empty authority")
//! cannot have a path starting with `//`, since it is ambiguous and can be
//! interpreted as an IRI with authority. For the above examples, `scheme://bar`
//! is not valid output, as `bar` in `scheme://bar` will be interpreted as an
//! authority, not a path.
//!
//! Thus, IRI resolution by pure RFC 3986 algorithm can fail for some abnormal
//! cases.
//!
//! Note that this kind of failure can happen only when the base IRI has no
//! authority and empty path. This would be rare in the wild, since many people
//! would use an IRI with authority part, such as `http://`.
//!
//! If you are handling `scheme://`-style URIs and IRIs, don't worry about the
//! failure. Currently no cases are known to fail when at least one of the base
//! IRI or the relative IRI contains authorities.
//!
//! If you want this kind of abnormal IRI resolution to succeed and to be
//! idempotent, check the resolution result using
//! [`Normalized::ensure_rfc3986_normalizable`] (see the section below).
//!
//! ## WHATWG serialization
//!
//! To handle IRI resolution failure, WHATWG URL Standard defines serialization
//! algorithm for this kind of result, and it makes IRI resolution (and even
//! normalization) infallible and idempotent.
//!
//! IRI resolution and normalization provided by this crate automatically
//! applies this special rule if necessary, so they are infallible. If you want
//! to detect resolution/normalization failure, use
//! [`Normalized::ensure_rfc3986_normalizable`] method.
//!
//! ## Examples
//!
//! ```
//! # #[cfg(feature = "alloc")] {
//! use iri_string::format::ToDedicatedString;
//! use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
//!
//! let base = IriAbsoluteStr::new("scheme:")?;
//! {
//! let reference = IriReferenceStr::new(".///not-a-host")?;
//! let result = reference.resolve_against(base);
//! assert!(result.ensure_rfc3986_normalizable().is_err());
//! assert_eq!(result.to_dedicated_string(), "scheme:/.//not-a-host");
//! }
//!
//! {
//! let reference2 = IriReferenceStr::new("/..//not-a-host")?;
//! // Resulting string will be `scheme://not-a-host`, but `not-a-host`
//! // should be a path segment, not a host. So, the semantically correct
//! // target IRI cannot be represented by RFC 3986 IRI resolution.
//! let result2 = reference2.resolve_against(base);
//! assert!(result2.ensure_rfc3986_normalizable().is_err());
//!
//! // Algorithm defined in WHATWG URL Standard addresses this case.
//! assert_eq!(result2.to_dedicated_string(), "scheme:/.//not-a-host");
//! }
//! # }
//! # Ok::<_, iri_string::validate::Error>(())
//! ```
use crate::components::RiReferenceComponents;
use crate::normalize::{NormalizationInput, Normalized};
use crate::spec::Spec;
use crate::types::{RiAbsoluteStr, RiQueryStr, RiReferenceStr, RiStr};
/// A resolver against the fixed base.
#[derive(Debug, Clone, Copy)]
pub struct FixedBaseResolver<'a, S: Spec> {
/// Components of the base IRI.
base_components: RiReferenceComponents<'a, S>,
}
impl<'a, S: Spec> FixedBaseResolver<'a, S> {
/// Creates a new resolver with the given base.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # // `ToDedicatedString` is available only when
/// # // `alloc` feature is enabled.
/// #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
///
/// let base = IriAbsoluteStr::new("http://example.com/base/")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// let reference = IriReferenceStr::new("../there")?;
/// let resolved = resolver.resolve(reference);
///
/// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn new(base: &'a RiAbsoluteStr<S>) -> Self {
Self {
base_components: RiReferenceComponents::from(base.as_ref()),
}
}
/// Returns the base.
///
/// # Examples
///
/// ```
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
///
/// let base = IriAbsoluteStr::new("http://example.com/base/")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// assert_eq!(resolver.base(), base);
/// # Ok::<_, iri_string::validate::Error>(())
/// ```
#[must_use]
pub fn base(&self) -> &'a RiAbsoluteStr<S> {
// SAFETY: `base_components` can only be created from `&RiAbsoluteStr<S>`,
// and the type of `base_components` does not allow modification of the
// content after it is created.
unsafe { RiAbsoluteStr::new_maybe_unchecked(self.base_components.iri().as_str()) }
}
}
/// Components getters.
///
/// These getters are more efficient than calling through the result of `.base()`.
impl<S: Spec> FixedBaseResolver<'_, S> {
/// Returns the scheme.
///
/// The following colon is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::IriAbsoluteStr;
///
/// let base = IriAbsoluteStr::new("http://example.com/base/?query")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// assert_eq!(resolver.scheme_str(), "http");
/// assert_eq!(base.scheme_str(), "http");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn scheme_str(&self) -> &str {
self.base_components
.scheme_str()
.expect("[validity] absolute IRI should have the scheme part")
}
/// Returns the authority.
///
/// The leading `//` is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::IriAbsoluteStr;
///
/// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// assert_eq!(resolver.authority_str(), Some("user:pass@example.com"));
/// assert_eq!(base.authority_str(), Some("user:pass@example.com"));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_str(&self) -> Option<&str> {
self.base_components.authority_str()
}
/// Returns the path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::IriAbsoluteStr;
///
/// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// assert_eq!(resolver.path_str(), "/base/");
/// assert_eq!(base.path_str(), "/base/");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn path_str(&self) -> &str {
self.base_components.path_str()
}
/// Returns the query.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::{IriAbsoluteStr, IriQueryStr};
///
/// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
/// let resolver = FixedBaseResolver::new(base);
/// let query = IriQueryStr::new("query")?;
///
/// assert_eq!(resolver.query(), Some(query));
/// assert_eq!(base.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query(&self) -> Option<&RiQueryStr<S>> {
let query_raw = self.query_str()?;
let query = RiQueryStr::new(query_raw)
.expect("[validity] must be valid query if present in an absolute-IRI");
Some(query)
}
/// Returns the query in a raw string slice.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::IriAbsoluteStr;
///
/// let base = IriAbsoluteStr::new("http://user:pass@example.com/base/?query")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// assert_eq!(resolver.query_str(), Some("query"));
/// assert_eq!(base.query_str(), Some("query"));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query_str(&self) -> Option<&str> {
self.base_components.query_str()
}
}
impl<'a, S: Spec> FixedBaseResolver<'a, S> {
/// Resolves the given reference against the fixed base.
///
/// The task returned by this method does **not** normalize the resolution
/// result. However, `..` and `.` are recognized even when they are
/// percent-encoded.
///
/// # Failures
///
/// This function itself does not fail, but resolution algorithm defined by
/// RFC 3986 can fail. In that case, serialization algorithm defined by
/// WHATWG URL Standard would be automatically applied.
///
/// See the documentation of [`Normalized`].
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # // `ToDedicatedString` is available only when
/// # // `alloc` feature is enabled.
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
///
/// let base = IriAbsoluteStr::new("http://example.com/base/")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// let reference = IriReferenceStr::new("../there")?;
/// let resolved = resolver.resolve(reference);
///
/// assert_eq!(resolved.to_dedicated_string(), "http://example.com/there");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Note that `..` and `.` path segments are recognized even when they are
/// percent-encoded.
///
/// ```
/// # use iri_string::validate::Error;
/// # // `ToDedicatedString` is available only when
/// # // `alloc` feature is enabled.
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::resolve::FixedBaseResolver;
/// use iri_string::types::{IriAbsoluteStr, IriReferenceStr};
///
/// let base = IriAbsoluteStr::new("HTTP://example.COM/base/base2/")?;
/// let resolver = FixedBaseResolver::new(base);
///
/// // `%2e%2e` is recognized as `..`.
/// // However, `dot%2edot` is NOT normalized into `dot.dot`.
/// let reference = IriReferenceStr::new("%2e%2e/../dot%2edot")?;
/// let resolved = resolver.resolve(reference);
///
/// // Resolved but not normalized.
/// assert_eq!(resolved.to_dedicated_string(), "HTTP://example.COM/dot%2edot");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn resolve(&self, reference: &'a RiReferenceStr<S>) -> Normalized<'a, RiStr<S>> {
let input = NormalizationInput::with_resolution_params(&self.base_components, reference);
Normalized::from_input(input)
}
}

34
vendor/iri-string/src/spec.rs vendored Normal file
View File

@@ -0,0 +1,34 @@
//! IRI specs.
use core::fmt;
// Note that this MUST be private module.
// See <https://rust-lang.github.io/api-guidelines/future-proofing.html> about
// sealed trait.
mod internal;
/// A trait for spec types.
///
/// This trait is not intended to be implemented by crate users.
// Note that all types which implement `Spec` also implement `SpecInternal`.
pub trait Spec: internal::Sealed + Copy + fmt::Debug {}
/// A type that represents specification of IRI.
///
/// About IRI, see [RFC 3987].
///
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum IriSpec {}
impl Spec for IriSpec {}
/// A type that represents specification of URI.
///
/// About URI, see [RFC 3986].
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum UriSpec {}
impl Spec for UriSpec {}

58
vendor/iri-string/src/spec/internal.rs vendored Normal file
View File

@@ -0,0 +1,58 @@
//! A private module for sealed trait and internal implementations.
//!
//! Note that this MUST be a private module.
//! See [Rust API Guidelines][sealed-trait] about the necessity of being private.
//!
//! [sealed-trait]:
//! https://rust-lang.github.io/api-guidelines/future-proofing.html#sealed-traits-protect-against-downstream-implementations-c-sealed
use crate::parser::char::is_ucschar;
use crate::spec::{IriSpec, UriSpec};
/// A trait to prohibit user-defined types from implementing `Spec`.
///
/// About sealed trait, see [Rust API Guidelines][future-proofing].
///
/// [future-proofing]: https://rust-lang.github.io/api-guidelines/future-proofing.html
pub trait Sealed: SpecInternal {}
impl Sealed for IriSpec {}
impl Sealed for UriSpec {}
/// Internal implementations for spec types.
pub trait SpecInternal: Sized {
/// Checks if the given non-ASCII character matches `unreserved` or `iunreserved` rule.
#[must_use]
fn is_nonascii_char_unreserved(c: char) -> bool;
/// Checks if the given character matches `iprivate` rule.
#[must_use]
fn is_nonascii_char_private(c: char) -> bool;
}
impl SpecInternal for IriSpec {
#[inline]
fn is_nonascii_char_unreserved(c: char) -> bool {
is_ucschar(c)
}
fn is_nonascii_char_private(c: char) -> bool {
matches!(
u32::from(c),
0xE000..=0xF8FF |
0xF_0000..=0xF_FFFD |
0x10_0000..=0x10_FFFD
)
}
}
impl SpecInternal for UriSpec {
#[inline]
fn is_nonascii_char_unreserved(_: char) -> bool {
false
}
#[inline]
fn is_nonascii_char_private(_: char) -> bool {
false
}
}

200
vendor/iri-string/src/template.rs vendored Normal file
View File

@@ -0,0 +1,200 @@
//! Processor for [RFC 6570] URI Template.
//!
//! [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html
//!
//! # Usage
//!
//! 1. Prepare a template.
//! * You can create a template as [`UriTemplateStr`]
#![cfg_attr(
feature = "alloc",
doc = " type (borrowed) or [`UriTemplateString`] type (owned)."
)]
#![cfg_attr(not(feature = "alloc"), doc = " type.")]
//! 2. Prepare a context.
//! * Create a value of type that implements [`Context`] trait.
#![cfg_attr(
feature = "alloc",
doc = " * Or, if you use [`SimpleContext`], insert key-value pairs into it."
)]
//! 3. Expand.
//! * Pass the context to [`UriTemplateStr::expand`] method of the template.
//! 4. Use the result.
//! * Returned [`Expanded`] object can be directly printed since it
//! implements [`Display`][`core::fmt::Display`] trait. Or, you can call
//! `.to_string()` method of the `alloc::string::ToString` trait to
//! convert it to a `String`.
//!
//! # Examples
//!
//! ## Custom context type
//!
//! For details, see [the documentation of `context` module][`context`].
//!
//! ```
//! # use iri_string::template::Error;
//! use core::fmt;
//! use iri_string::spec::{IriSpec, Spec, UriSpec};
//! use iri_string::template::UriTemplateStr;
//! use iri_string::template::context::{Context, VarName, Visitor};
//!
//! struct UserInfo {
//! username: &'static str,
//! utf8_available: bool,
//! }
//!
//! impl Context for UserInfo {
//! fn visit<V: Visitor>(
//! &self,
//! visitor: V,
//! ) -> V::Result {
//! match visitor.var_name().as_str() {
//! "username" => visitor.visit_string(self.username),
//! "utf8" => {
//! if self.utf8_available {
//! // U+2713 CHECK MARK
//! visitor.visit_string("\u{2713}")
//! } else {
//! visitor.visit_undefined()
//! }
//! }
//! _ => visitor.visit_undefined()
//! }
//! }
//! }
//!
//! let context = UserInfo {
//! username: "foo",
//! utf8_available: true,
//! };
//!
//! let template = UriTemplateStr::new("/users/{username}{?utf8}")?;
//!
//! # #[cfg(feature = "alloc")] {
//! assert_eq!(
//! template.expand::<UriSpec, _>(&context)?.to_string(),
//! "/users/foo?utf8=%E2%9C%93"
//! );
//! assert_eq!(
//! template.expand::<IriSpec, _>(&context)?.to_string(),
//! "/users/foo?utf8=\u{2713}"
//! );
//! # }
//! # Ok::<_, Error>(())
//! ```
//!
//! ## `SimpleContext` type (enabled by `alloc` feature flag)
//!
//! ```
//! # use iri_string::template::Error;
//! # #[cfg(feature = "alloc")] {
//! use iri_string::spec::{IriSpec, UriSpec};
//! use iri_string::template::UriTemplateStr;
//! use iri_string::template::simple_context::SimpleContext;
//!
//! let mut context = SimpleContext::new();
//! context.insert("username", "foo");
//! // U+2713 CHECK MARK
//! context.insert("utf8", "\u{2713}");
//!
//! let template = UriTemplateStr::new("/users/{username}{?utf8}")?;
//!
//! assert_eq!(
//! template.expand::<UriSpec, _>(&context)?.to_string(),
//! "/users/foo?utf8=%E2%9C%93"
//! );
//! assert_eq!(
//! template.expand::<IriSpec, _>(&context)?.to_string(),
//! "/users/foo?utf8=\u{2713}"
//! );
//! # }
//! # Ok::<_, Error>(())
//! ```
//!
#![cfg_attr(
feature = "alloc",
doc = "[`SimpleContext`]: `simple_context::SimpleContext`"
)]
mod components;
pub mod context;
mod error;
mod expand;
mod parser;
#[cfg(feature = "alloc")]
pub mod simple_context;
mod string;
pub use self::context::{Context, DynamicContext};
#[cfg(feature = "alloc")]
pub use self::error::CreationError;
pub use self::error::Error;
pub use self::expand::Expanded;
#[cfg(feature = "alloc")]
pub use self::string::UriTemplateString;
pub use self::string::{UriTemplateStr, UriTemplateVariables};
/// Deprecated old name of [`template::context::VarName`].
///
/// [`template::context::VarName`]: `components::VarName`
#[deprecated(
since = "0.7.1",
note = "renamed (moved) to `template::context::VarName`"
)]
pub type VarName<'a> = self::components::VarName<'a>;
/// Variable value type.
#[derive(Debug, Clone, Copy)]
enum ValueType {
/// Undefined (i.e. null).
Undefined,
/// String value.
String,
/// List.
List,
/// Associative array.
Assoc,
}
impl ValueType {
/// Returns the value type for an undefined variable.
#[inline]
#[must_use]
pub const fn undefined() -> Self {
ValueType::Undefined
}
/// Returns the value type for a string variable.
#[inline]
#[must_use]
pub const fn string() -> Self {
ValueType::String
}
/// Returns the value type for an empty list variable.
#[inline]
#[must_use]
pub const fn empty_list() -> Self {
ValueType::Undefined
}
/// Returns the value type for a nonempty list variable.
#[inline]
#[must_use]
pub const fn nonempty_list() -> Self {
ValueType::List
}
/// Returns the value type for an empty associative array variable.
#[inline]
#[must_use]
pub const fn empty_assoc() -> Self {
ValueType::Undefined
}
/// Returns the value type for a nonempty associative array variable.
#[inline]
#[must_use]
pub const fn nonempty_assoc() -> Self {
ValueType::Assoc
}
}

View File

@@ -0,0 +1,332 @@
//! Syntax components of URI templates.
use core::mem;
use crate::parser::str::find_split_hole;
use crate::template::error::Error;
use crate::template::parser::validate as validate_parser;
/// Expression body.
///
/// This does not contain the wrapping braces (`{` and `}`).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) struct ExprBody<'a>(&'a str);
impl<'a> ExprBody<'a> {
/// Creates a new expression body.
///
/// # Precondition
///
/// The given string should be a valid expression body.
#[inline]
#[must_use]
pub(super) fn new(s: &'a str) -> Self {
debug_assert!(
!s.is_empty(),
"[precondition] valid expression body is not empty"
);
Self(s)
}
/// Decomposes the expression into an `operator` and `variable-list`.
///
/// # Panics
///
/// May panic if the input is invalid.
#[must_use]
pub(super) fn decompose(&self) -> (Operator, VarListStr<'a>) {
debug_assert!(
!self.0.is_empty(),
"[precondition] valid expression body is not empty"
);
let first = self.0.as_bytes()[0];
if first.is_ascii_alphanumeric() || (first == b'_') || (first == b'%') {
// The first byte is a part of the variable list.
(Operator::String, VarListStr::new(self.0))
} else {
let op = Operator::from_byte(first).unwrap_or_else(|| {
unreachable!(
"[precondition] valid expression has (optional) \
valid operator, but got a byte {first:#02x?}"
)
});
(op, VarListStr::new(&self.0[1..]))
}
}
/// Returns the raw expression in a string slice.
#[inline]
#[must_use]
pub(super) fn as_str(&self) -> &'a str {
self.0
}
}
/// Variable name.
// QUESTION: Should hexdigits in percent-encoded triplets be compared case sensitively?
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct VarName<'a>(&'a str);
impl<'a> VarName<'a> {
/// Creates a `VarName` from the trusted string.
///
/// # Precondition
///
/// The given string should be a valid variable name.
#[inline]
#[must_use]
pub(super) fn from_trusted(s: &'a str) -> Self {
Self(s)
}
/// Creates a `VarName` from the string.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::context::VarName;
///
/// let name = VarName::new("hello")?;
/// assert_eq!(name.as_str(), "hello");
///
/// assert!(VarName::new("0+non-variable-name").is_err());
///
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn new(s: &'a str) -> Result<Self, Error> {
match validate_parser::validate_varname(s, 0) {
Ok(_) => Ok(Self::from_trusted(s)),
Err(e) => Err(e),
}
}
/// Returns the varibale name.
#[inline]
#[must_use]
pub fn as_str(&self) -> &'a str {
self.0
}
}
/// Variable specifier.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct VarSpec<'a> {
/// Variable name.
name: VarName<'a>,
/// Variable modifier.
modifier: Modifier,
}
impl<'a> VarSpec<'a> {
/// Returns the varibale name.
#[inline]
#[must_use]
pub(super) fn name(&self) -> VarName<'a> {
self.name
}
/// Returns the modifier.
#[inline]
#[must_use]
pub(super) fn modifier(&self) -> Modifier {
self.modifier
}
/// Parses the trusted varspec string.
///
/// # Panics
///
/// May panic if the input is invalid.
#[must_use]
pub(super) fn parse_trusted(s: &'a str) -> Self {
if let Some(varname) = s.strip_suffix('*') {
// `varname "*"`.
return Self {
name: VarName::from_trusted(varname),
modifier: Modifier::Explode,
};
}
// `varname ":" max-length` or `varname`.
match find_split_hole(s, b':') {
Some((varname, max_len)) => {
let max_len: u16 = max_len
.parse()
.expect("[precondition] the input should be valid `varspec`");
Self {
name: VarName::from_trusted(varname),
modifier: Modifier::MaxLen(max_len),
}
}
None => Self {
name: VarName(s),
modifier: Modifier::None,
},
}
}
}
/// Variable list.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) struct VarListStr<'a>(&'a str);
impl<'a> VarListStr<'a> {
/// Creates a new variable list.
///
/// # Precondition
///
/// The given string should be a valid variable list.
#[inline]
#[must_use]
pub(super) fn new(s: &'a str) -> Self {
Self(s)
}
}
impl<'a> IntoIterator for VarListStr<'a> {
type IntoIter = VarListIter<'a>;
type Item = (usize, VarSpec<'a>);
#[inline]
fn into_iter(self) -> Self::IntoIter {
VarListIter { rest: self.0 }
}
}
/// Iterator of variable specs.
#[derive(Debug, Clone)]
pub(super) struct VarListIter<'a> {
/// Remaining input.
rest: &'a str,
}
impl<'a> Iterator for VarListIter<'a> {
/// A pair of the length of the varspec and the varspec itself.
type Item = (usize, VarSpec<'a>);
fn next(&mut self) -> Option<Self::Item> {
match find_split_hole(self.rest, b',') {
Some((prefix, new_rest)) => {
self.rest = new_rest;
Some((prefix.len(), VarSpec::parse_trusted(prefix)))
}
None => {
if self.rest.is_empty() {
None
} else {
Some((
self.rest.len(),
VarSpec::parse_trusted(mem::take(&mut self.rest)),
))
}
}
}
}
}
/// Variable modifier.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(super) enum Modifier {
/// No modifiers.
None,
/// Max length, greater than 0 and less than 10000.
MaxLen(u16),
/// Explode the variable, e.g. the var spec has `*`.
Explode,
}
/// Operator that is possibly reserved for future extension.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(super) enum MaybeOperator {
/// Working operator.
Operator(Operator),
/// Reserved for future extensions.
Reserved(OperatorReservedForFuture),
}
impl MaybeOperator {
/// Returns the operator for the given character.
pub(super) fn from_byte(b: u8) -> Option<Self> {
match b {
b'+' => Some(Self::Operator(Operator::Reserved)),
b'#' => Some(Self::Operator(Operator::Fragment)),
b'.' => Some(Self::Operator(Operator::Label)),
b'/' => Some(Self::Operator(Operator::PathSegments)),
b';' => Some(Self::Operator(Operator::PathParams)),
b'?' => Some(Self::Operator(Operator::FormQuery)),
b'&' => Some(Self::Operator(Operator::FormQueryCont)),
b'=' => Some(Self::Reserved(OperatorReservedForFuture::Equals)),
b',' => Some(Self::Reserved(OperatorReservedForFuture::Comma)),
b'!' => Some(Self::Reserved(OperatorReservedForFuture::Exclamation)),
b'@' => Some(Self::Reserved(OperatorReservedForFuture::AtSign)),
b'|' => Some(Self::Reserved(OperatorReservedForFuture::Pipe)),
_ => None,
}
}
}
/// Working operator.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(super) enum Operator {
/// No operator. String expansion.
String,
/// Reserved expansion by `+`.
Reserved,
/// Fragment expansion by `#`.
Fragment,
/// Label expansion by `.`.
Label,
/// Path segments by `/`.
PathSegments,
/// Path-style parameters by `;`.
PathParams,
/// Form-style query by `?`.
FormQuery,
/// Form-style query continuation by `&`.
FormQueryCont,
}
impl Operator {
/// Returns the operator for the given character.
#[must_use]
pub(super) fn from_byte(b: u8) -> Option<Self> {
match b {
b'+' => Some(Self::Reserved),
b'#' => Some(Self::Fragment),
b'.' => Some(Self::Label),
b'/' => Some(Self::PathSegments),
b';' => Some(Self::PathParams),
b'?' => Some(Self::FormQuery),
b'&' => Some(Self::FormQueryCont),
_ => None,
}
}
/// Returns the string length of the operator.
#[inline]
#[must_use]
pub(super) const fn len(self) -> usize {
if matches!(self, Self::String) {
0
} else {
1
}
}
}
/// Operator reserved for future extension.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub(super) enum OperatorReservedForFuture {
/// Reserved `=` operator.
Equals,
/// Reserved `,` operator.
Comma,
/// Reserved `!` operator.
Exclamation,
/// Reserved `@` operator.
AtSign,
/// Reserved `|` operator.
Pipe,
}

View File

@@ -0,0 +1,337 @@
//! Template expansion context.
//!
//! # Examples
//!
//! 1. Define your context type.
//! 2. Implement [`Context`] trait (and [`Context::visit`] method) for the type.
//! 1. Get variable name by [`Visitor::var_name`] method.
//! 2. Feed the corresponding value(s) by one of `Visitor::visit_*` methods.
//!
//! Note that contexts should return consistent result across multiple visits for
//! the same variable. In other words, `Context::visit` should return the same
//! result for the same `Visitor::var_name()` during the context is borrowed.
//! If this condition is violated, the URI template processor can return
//! invalid result or panic at worst.
//!
//! ```
//! use iri_string::template::context::{Context, Visitor, ListVisitor, AssocVisitor};
//!
//! struct MyContext {
//! name: &'static str,
//! id: u64,
//! tags: &'static [&'static str],
//! children: &'static [(&'static str, usize)],
//! }
//!
//! impl Context for MyContext {
//! fn visit<V: Visitor>(&self, visitor: V) -> V::Result {
//! let name = visitor.var_name().as_str();
//! match name {
//! "name" => visitor.visit_string(self.name),
//! "id" => visitor.visit_string(self.id),
//! "tags" => visitor.visit_list().visit_items_and_finish(self.tags),
//! "children" => visitor
//! .visit_assoc()
//! .visit_entries_and_finish(self.children.iter().copied()),
//! _ => visitor.visit_undefined(),
//! }
//! }
//! }
//! ```
//
// # Developers note
//
// Visitor types **should not** be cloneable in order to enforce just one
// visitor is used to visit a variable. If visitors are cloneable, it can make
// the wrong usage to be available, i.e. storing cloned visitors somewhere and
// using the wrong one.
//
// However, if visitors are made cloneable by any chance, it does not indicate
// the whole implementation will be broken. Users can only use the visitors
// through visitor traits (and their API do not allow cloning), so the logic
// would work as expected if the internal usage of the visitors are correct.
// Making visitors noncloneable is an optional safety guard (with no overhead).
use core::fmt;
use core::ops::ControlFlow;
pub use crate::template::components::VarName;
/// A trait for types that can behave as a static URI template expansion context.
///
/// This type is for use with [`UriTemplateStr::expand`] method.
///
/// See [the module documentation][`crate::template`] for usage.
///
/// [`UriTemplateStr::expand`]: `crate::template::UriTemplateStr::expand`
pub trait Context: Sized {
/// Visits a variable.
///
/// To get variable name, use [`Visitor::var_name()`].
#[must_use]
fn visit<V: Visitor>(&self, visitor: V) -> V::Result;
}
/// A trait for types that can behave as a dynamic (mutable) URI template expansion context.
///
/// This type is for use with [`UriTemplateStr::expand_dynamic`] method and its
/// family.
///
/// Note that "dynamic" here does not mean that the value of variables can
/// change during a template expansion. The value should be fixed and consistent
/// during each expansion, but the context is allowed to mutate itself if it
/// does not break this rule.
///
/// # Exmaples
///
/// ```
/// # #[cfg(feature = "alloc")]
/// # extern crate alloc;
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// # use alloc::string::String;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::context::{DynamicContext, Visitor, VisitPurpose};
/// use iri_string::spec::UriSpec;
///
/// struct MyContext<'a> {
/// /// Target path.
/// target: &'a str,
/// /// Username.
/// username: Option<&'a str>,
/// /// A flag to remember whether the URI template
/// /// attempted to use `username` variable.
/// username_visited: bool,
/// }
///
/// impl DynamicContext for MyContext<'_> {
/// fn on_expansion_start(&mut self) {
/// // Reset the state.
/// self.username_visited = false;
/// }
/// fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result {
/// match visitor.var_name().as_str() {
/// "target" => visitor.visit_string(self.target),
/// "username" => {
/// if visitor.purpose() == VisitPurpose::Expand {
/// // The variable `username` is being used
/// // on the template expansion.
/// // Don't care whether `username` is defined or not.
/// self.username_visited = true;
/// }
/// if let Some(username) = &self.username {
/// visitor.visit_string(username)
/// } else {
/// visitor.visit_undefined()
/// }
/// }
/// _ => visitor.visit_undefined(),
/// }
/// }
/// }
///
/// let mut context = MyContext {
/// target: "/posts/1",
/// username: Some("the_admin"),
/// username_visited: false,
/// };
/// let mut buf = String::new();
///
/// // No access to the variable `username`.
/// let template1 = UriTemplateStr::new("{+target}")?;
/// template1.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?;
/// assert_eq!(buf, "/posts/1");
/// assert!(!context.username_visited);
///
/// buf.clear();
/// // Will access to the variable `username`.
/// let template2 = UriTemplateStr::new("{+target}{?username}")?;
/// template2.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?;
/// assert_eq!(buf, "/posts/1?username=the_admin");
/// assert!(context.username_visited);
///
/// buf.clear();
/// context.username = None;
/// // Will access to the variable `username` but it is undefined.
/// template2.expand_dynamic::<UriSpec, _, _>(&mut buf, &mut context)?;
/// assert_eq!(buf, "/posts/1");
/// assert!(
/// context.username_visited,
/// "`MyContext` can know and remember whether `visit_dynamic()` is called
/// for `username`, even if its value is undefined"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [`UriTemplateStr::expand_dynamic`]: `crate::template::UriTemplateStr::expand_dynamic`
pub trait DynamicContext: Sized {
/// Visits a variable.
///
/// To get variable name, use [`Visitor::var_name()`].
///
/// # Restriction
///
/// The visit results should be consistent and unchanged between the last
/// time [`on_expansion_start`][`Self::on_expansion_start`] was called and
/// the next time [`on_expansion_end`][`Self::on_expansion_end`] will be
/// called. If this condition is violated, template expansion will produce
/// wrong result or may panic at worst.
#[must_use]
fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result;
/// A callback that is called before the expansion of a URI template.
#[inline]
fn on_expansion_start(&mut self) {}
/// A callback that is called after the expansion of a URI template.
#[inline]
fn on_expansion_end(&mut self) {}
}
impl<C: Context> DynamicContext for C {
#[inline]
fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result {
self.visit(visitor)
}
}
/// A purpose of a visit.
///
/// This enum is nonexhaustive since this partially exposes the internal
/// implementation of the template expansion, and thus this is subject to
/// change.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub enum VisitPurpose {
/// A visit for type checking.
Typecheck,
/// A visit for template expansion to retrieve the value.
Expand,
}
/// Variable visitor.
///
/// See [the module documentation][self] for usage.
// NOTE (internal): Visitor types **should not** be cloneable.
pub trait Visitor: Sized + private::Sealed {
/// Result of the visit.
type Result;
/// List visitor.
type ListVisitor: ListVisitor<Result = Self::Result>;
/// Associative array visitor.
type AssocVisitor: AssocVisitor<Result = Self::Result>;
/// Returns the name of the variable to visit.
#[must_use]
fn var_name(&self) -> VarName<'_>;
/// Returns the purpose of the visit.
///
/// The template expansion algorithm checks the types for some variables
/// depending on its usage. To get the usage count correctly, you should
/// only count visits with [`VisitPurpose::Expand`].
///
/// If you need to know whether the variable is accessed and does not
/// need dynamic context generation or access counts, consider using
/// [`UriTemplateStr::variables`] method to iterate the variables in the
/// URI template.
///
/// [`UriTemplateStr::variables`]: `crate::template::UriTemplateStr::variables`
#[must_use]
fn purpose(&self) -> VisitPurpose;
/// Visits an undefined variable, i.e. indicates that the requested variable is unavailable.
#[must_use]
fn visit_undefined(self) -> Self::Result;
/// Visits a string variable.
#[must_use]
fn visit_string<T: fmt::Display>(self, v: T) -> Self::Result;
/// Visits a list variable.
#[must_use]
fn visit_list(self) -> Self::ListVisitor;
/// Visits an associative array variable.
#[must_use]
fn visit_assoc(self) -> Self::AssocVisitor;
}
/// List visitor.
///
/// See [the module documentation][self] for usage.
// NOTE (internal): Visitor types **should not** be cloneable.
pub trait ListVisitor: Sized + private::Sealed {
/// Result of the visit.
type Result;
/// Visits an item.
///
/// If this returned `ControlFlow::Break(v)`, [`Context::visit`] should also
/// return this `v`.
///
/// To feed multiple items at once, do
/// `items.into_iter().try_for_each(|item| self.visit_item(item))` for example.
fn visit_item<T: fmt::Display>(&mut self, item: T) -> ControlFlow<Self::Result>;
/// Finishes visiting the list.
#[must_use]
fn finish(self) -> Self::Result;
/// Visits items and finish.
#[must_use]
fn visit_items_and_finish<T, I>(mut self, items: I) -> Self::Result
where
T: fmt::Display,
I: IntoIterator<Item = T>,
{
match items.into_iter().try_for_each(|item| self.visit_item(item)) {
ControlFlow::Break(v) => v,
ControlFlow::Continue(()) => self.finish(),
}
}
}
/// Associative array visitor.
///
/// See [the module documentation][self] for usage.
// NOTE (internal): Visitor types **should not** be cloneable.
pub trait AssocVisitor: Sized + private::Sealed {
/// Result of the visit.
type Result;
/// Visits an entry.
///
/// If this returned `ControlFlow::Break(v)`, [`Context::visit`] should also
/// return this `v`.
///
/// To feed multiple items at once, do
/// `entries.into_iter().try_for_each(|(key, value)| self.visit_entry(key, value))`
/// for example.
fn visit_entry<K: fmt::Display, V: fmt::Display>(
&mut self,
key: K,
value: V,
) -> ControlFlow<Self::Result>;
/// Finishes visiting the associative array.
#[must_use]
fn finish(self) -> Self::Result;
/// Visits entries and finish.
#[must_use]
fn visit_entries_and_finish<K, V, I>(mut self, entries: I) -> Self::Result
where
K: fmt::Display,
V: fmt::Display,
I: IntoIterator<Item = (K, V)>,
{
match entries
.into_iter()
.try_for_each(|(key, value)| self.visit_entry(key, value))
{
ControlFlow::Break(v) => v,
ControlFlow::Continue(()) => self.finish(),
}
}
}
/// Private module to put the trait to seal.
pub(super) mod private {
/// A trait for visitor types of variables in a context.
pub trait Sealed {}
}

154
vendor/iri-string/src/template/error.rs vendored Normal file
View File

@@ -0,0 +1,154 @@
//! Errors related to URI templates.
use core::fmt;
#[cfg(feature = "std")]
use std::error;
/// Template construction and expansion error kind.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum ErrorKind {
/// Cannot write to the backend.
WriteFailed,
/// Expression is not closed.
ExpressionNotClosed,
/// Invalid character.
InvalidCharacter,
/// Invalid expression.
InvalidExpression,
/// Invalid percent-encoded triplets.
InvalidPercentEncoding,
/// Invalid UTF-8 bytes.
InvalidUtf8,
/// Unexpected value type for the variable.
UnexpectedValueType,
/// Unsupported operator, including operators reserved for future.
UnsupportedOperator,
}
impl ErrorKind {
/// Returns the error message.
#[must_use]
fn as_str(self) -> &'static str {
match self {
Self::WriteFailed => "failed to write to the backend writer",
Self::ExpressionNotClosed => "expression not closed",
Self::InvalidCharacter => "invalid character",
Self::InvalidExpression => "invalid expression",
Self::InvalidPercentEncoding => "invalid percent-encoded triplets",
Self::InvalidUtf8 => "invalid utf-8 byte sequence",
Self::UnexpectedValueType => "unexpected value type for the variable",
Self::UnsupportedOperator => "unsupported operator",
}
}
}
/// Template construction and expansion error.
///
// Note that this type should implement `Copy` trait.
// To return additional non-`Copy` data as an error, use wrapper type
// (as `std::string::FromUtf8Error` contains `std::str::Utf8Error`).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Error {
/// Error kind.
kind: ErrorKind,
/// Location (byte position of the error).
location: usize,
}
impl Error {
/// Creates a new `Error`.
///
/// For internal use.
#[inline]
#[must_use]
pub(super) fn new(kind: ErrorKind, location: usize) -> Self {
Self { kind, location }
}
/// Returns the byte position the error is detected.
///
/// NOTE: This is not a part of the public API since the value to be
/// returned (i.e., the definition of the "position" of an error) is not
/// guaranteed to be stable.
#[cfg(test)]
pub(super) fn location(&self) -> usize {
self.location
}
}
impl fmt::Display for Error {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"invalid URI template: {} (at {}-th byte)",
self.kind.as_str(),
self.location
)
}
}
#[cfg(feature = "std")]
impl error::Error for Error {}
/// Error on conversion into a URI template type.
// TODO: Unifiable to `types::CreationError`?
#[cfg(feature = "alloc")]
pub struct CreationError<T> {
/// Soruce data.
source: T,
/// Validation error.
error: Error,
}
#[cfg(feature = "alloc")]
impl<T> CreationError<T> {
/// Returns the source data.
#[must_use]
pub fn into_source(self) -> T {
self.source
}
/// Returns the validation error.
#[must_use]
pub fn validation_error(&self) -> Error {
self.error
}
/// Creates a new `CreationError`.
#[must_use]
pub(crate) fn new(error: Error, source: T) -> Self {
Self { source, error }
}
}
#[cfg(feature = "alloc")]
impl<T: fmt::Debug> fmt::Debug for CreationError<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("CreationError")
.field("source", &self.source)
.field("error", &self.error)
.finish()
}
}
#[cfg(feature = "alloc")]
impl<T: Clone> Clone for CreationError<T> {
fn clone(&self) -> Self {
Self {
source: self.source.clone(),
error: self.error,
}
}
}
#[cfg(feature = "alloc")]
impl<T> fmt::Display for CreationError<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.error.fmt(f)
}
}
#[cfg(feature = "std")]
impl<T: fmt::Debug> error::Error for CreationError<T> {}

1036
vendor/iri-string/src/template/expand.rs vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,6 @@
//! URI Template parser.
pub(super) mod char;
pub(super) mod validate;
pub(super) use self::validate::validate_template_str;

View File

@@ -0,0 +1,190 @@
//! Characters.
/// Properties of ASCII characters.
///
/// About `'` (single quote) being considered as a literal: see
/// [Errata ID 6937](https://www.rfc-editor.org/errata/eid6937).
const CHARS_TABLE: [u8; 128] = [
0b_0000_0000, // NUL
0b_0000_0000, // SOH
0b_0000_0000, // STX
0b_0000_0000, // ETX
0b_0000_0000, // EOT
0b_0000_0000, // ENQ
0b_0000_0000, // ACK
0b_0000_0000, // BEL
0b_0000_0000, // BS
0b_0000_0000, // HT
0b_0000_0000, // LF
0b_0000_0000, // VT
0b_0000_0000, // FF
0b_0000_0000, // CR
0b_0000_0000, // SO
0b_0000_0000, // SI
0b_0000_0000, // DLE
0b_0000_0000, // DC1
0b_0000_0000, // DC2
0b_0000_0000, // DC3
0b_0000_0000, // DC4
0b_0000_0000, // NAK
0b_0000_0000, // SYN
0b_0000_0000, // ETB
0b_0000_0000, // CAN
0b_0000_0000, // EM
0b_0000_0000, // SUB
0b_0000_0000, // ESC
0b_0000_0000, // FS
0b_0000_0000, // GS
0b_0000_0000, // RS
0b_0000_0000, // US
0b_0000_0000, // SPACE
0b_0000_0001, // !
0b_0000_0000, // "
0b_0000_0001, // #
0b_0000_0001, // $
0b_0000_0000, // %
0b_0000_0001, // &
0b_0000_0001, // '
0b_0000_0001, // (
0b_0000_0001, // )
0b_0000_0001, // *
0b_0000_0001, // +
0b_0000_0001, // ,
0b_0000_0001, // -
0b_0000_0101, // .
0b_0000_0001, // /
0b_0000_0111, // 0
0b_0000_0111, // 1
0b_0000_0111, // 2
0b_0000_0111, // 3
0b_0000_0111, // 4
0b_0000_0111, // 5
0b_0000_0111, // 6
0b_0000_0111, // 7
0b_0000_0111, // 8
0b_0000_0111, // 9
0b_0000_0001, // :
0b_0000_0001, // ;
0b_0000_0000, // <
0b_0000_0001, // =
0b_0000_0000, // >
0b_0000_0001, // ?
0b_0000_0001, // @
0b_0000_0111, // A
0b_0000_0111, // B
0b_0000_0111, // C
0b_0000_0111, // D
0b_0000_0111, // E
0b_0000_0111, // F
0b_0000_0111, // G
0b_0000_0111, // H
0b_0000_0111, // I
0b_0000_0111, // J
0b_0000_0111, // K
0b_0000_0111, // L
0b_0000_0111, // M
0b_0000_0111, // N
0b_0000_0111, // O
0b_0000_0111, // P
0b_0000_0111, // Q
0b_0000_0111, // R
0b_0000_0111, // S
0b_0000_0111, // T
0b_0000_0111, // U
0b_0000_0111, // V
0b_0000_0111, // W
0b_0000_0111, // X
0b_0000_0111, // Y
0b_0000_0111, // Z
0b_0000_0001, // [
0b_0000_0000, // \
0b_0000_0001, // ]
0b_0000_0000, // ^
0b_0000_0111, // _
0b_0000_0000, // `
0b_0000_0111, // a
0b_0000_0111, // b
0b_0000_0111, // c
0b_0000_0111, // d
0b_0000_0111, // e
0b_0000_0111, // f
0b_0000_0111, // g
0b_0000_0111, // h
0b_0000_0111, // i
0b_0000_0111, // j
0b_0000_0111, // k
0b_0000_0111, // l
0b_0000_0111, // m
0b_0000_0111, // n
0b_0000_0111, // o
0b_0000_0111, // p
0b_0000_0111, // q
0b_0000_0111, // r
0b_0000_0111, // s
0b_0000_0111, // t
0b_0000_0111, // u
0b_0000_0111, // v
0b_0000_0111, // w
0b_0000_0111, // x
0b_0000_0111, // y
0b_0000_0111, // z
0b_0000_0000, // {
0b_0000_0000, // |
0b_0000_0000, // }
0b_0000_0001, // ~
0b_0000_0000, // DEL
];
/// A mask to test whether the character matches `literals` rule defined in [RFC 6570].
///
/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.1
const CHARS_TABLE_MASK_LITERAL: u8 = 1 << 0;
/// A mask to test whether the character matches `varchar` rule defined in [RFC 6570].
///
/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.3
const CHARS_TABLE_MASK_VARCHAR_START: u8 = 1 << 1;
/// A mask to test whether the character matches `varchar` rule defined in [RFC 6570] or a period.
///
/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html#section-2.3
const CHARS_TABLE_MASK_VARCHAR_CONTINUE: u8 = 1 << 2;
/// Returns true if the given ASCII character is allowed in a literal string.
///
/// # Precondition
///
/// The given byte should be an ASCII character, i.e. should be less than 128.
#[inline]
#[must_use]
pub(super) const fn is_ascii_literal_char(c: u8) -> bool {
(CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_LITERAL) != 0
}
/// Returns true if the given ASCII character is allowed as the beginning of the `varname`.
///
/// Note that this does not return true for `%` character. It is caller's
/// responsibility to test validity of percent-encoded triplets.
///
/// # Precondition
///
/// The given byte should be an ASCII character, i.e. should be less than 128.
#[inline]
#[must_use]
pub(super) const fn is_ascii_varchar_start(c: u8) -> bool {
(CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_VARCHAR_START) != 0
}
/// Returns true if the given ASCII character is allowed as the non-beginning of the `varname`.
///
/// Note that this does not return true for `%` character. It is caller's
/// responsibility to test validity of percent-encoded triplets.
///
/// # Precondition
///
/// The given byte should be an ASCII character, i.e. should be less than 128.
#[inline]
#[must_use]
pub(super) const fn is_ascii_varchar_continue(c: u8) -> bool {
(CHARS_TABLE[c as usize] & CHARS_TABLE_MASK_VARCHAR_CONTINUE) != 0
}

View File

@@ -0,0 +1,161 @@
//! Validating parsers.
use crate::parser::str::{
find_split2_hole, find_split_hole, satisfy_chars_with_pct_encoded, starts_with_double_hexdigits,
};
use crate::template::components::MaybeOperator;
use crate::template::error::{Error, ErrorKind};
use crate::template::parser::char::{
is_ascii_literal_char, is_ascii_varchar_continue, is_ascii_varchar_start,
};
/// Returns `Ok(())` if the given string is a valid literal.
fn validate_literal(s: &str, offset: usize) -> Result<(), Error> {
match s
.chars()
.position(|c| !c.is_ascii() || !is_ascii_literal_char(c as u8))
{
Some(pos) => Err(Error::new(ErrorKind::InvalidCharacter, offset + pos)),
None => Ok(()),
}
}
/// Returns `Ok(())` if the given string is a valid varspec.
fn validate_varspec(s: &str, offset: usize) -> Result<(), Error> {
match find_split2_hole(s, b':', b'*') {
Some((maybe_varname, b':', maybe_len)) => {
validate_varname(maybe_varname, offset)?;
if !(1..=5).contains(&maybe_len.len()) {
return Err(Error::new(
ErrorKind::InvalidExpression,
offset + maybe_varname.len() + 2,
));
}
if let Some(pos) = maybe_len.bytes().position(|b| !b.is_ascii_digit()) {
return Err(Error::new(
ErrorKind::InvalidExpression,
offset + maybe_varname.len() + 2 + pos,
));
}
}
Some((maybe_varname, b'*', extra)) => {
validate_varname(maybe_varname, offset)?;
if !extra.is_empty() {
return Err(Error::new(
ErrorKind::InvalidExpression,
offset + maybe_varname.len() + 1,
));
}
}
Some((_, sep, _)) => unreachable!("[consistency] the byte {sep:#02x} is not searched"),
None => validate_varname(s, offset)?,
}
Ok(())
}
/// Returns `Ok(())` if the given string is a valid varname.
pub(crate) fn validate_varname(s: &str, offset: usize) -> Result<(), Error> {
let rest = match s.as_bytes().first() {
Some(b'%') if starts_with_double_hexdigits(&s.as_bytes()[1..]) => &s[3..],
Some(b) if b.is_ascii() && is_ascii_varchar_start(*b) => &s[1..],
_ => return Err(Error::new(ErrorKind::InvalidExpression, offset)),
};
let is_valid = satisfy_chars_with_pct_encoded(rest, is_ascii_varchar_continue, |_| false);
if !is_valid {
return Err(Error::new(ErrorKind::InvalidExpression, offset));
}
Ok(())
}
/// Returns `Ok(())` if the given string is a valid expression.
///
/// "Expression" here is the expression body inside `{` and `}`, but not including braces.
fn validate_expr_body(s: &str, mut offset: usize) -> Result<(), Error> {
if s.is_empty() {
return Err(Error::new(ErrorKind::InvalidExpression, offset));
}
// Skip the operator.
let maybe_variable_list = match MaybeOperator::from_byte(s.as_bytes()[0]) {
Some(MaybeOperator::Operator(_)) => {
offset += 1;
&s[1..]
}
Some(MaybeOperator::Reserved(_)) => {
return Err(Error::new(ErrorKind::UnsupportedOperator, offset));
}
None => s,
};
// Validate varspecs.
for (spec_i, maybe_varspec) in maybe_variable_list.split(',').enumerate() {
if spec_i != 0 {
// Add the length of the leading separator `,`.
offset += 1;
}
validate_varspec(maybe_varspec, offset)?;
offset += maybe_varspec.len();
}
Ok(())
}
/// Validates whether the given string is valid as a URI template.
///
/// Returns `Ok(())` if the given string is a valid URI template.
pub(in crate::template) fn validate_template_str(s: &str) -> Result<(), Error> {
let mut rest = s;
let mut offset = 0;
while !rest.is_empty() {
rest = match find_split2_hole(rest, b'%', b'{') {
Some((literal, b'%', xdigits2_and_rest)) => {
validate_literal(literal, offset)?;
if xdigits2_and_rest.len() < 2 {
return Err(Error::new(
ErrorKind::InvalidPercentEncoding,
offset + literal.len(),
));
}
let (xdigits2, new_rest) = xdigits2_and_rest.split_at(2);
if !xdigits2.as_bytes()[0].is_ascii_hexdigit() {
return Err(Error::new(
ErrorKind::InvalidPercentEncoding,
offset + literal.len() + 1,
));
}
if !xdigits2.as_bytes()[1].is_ascii_hexdigit() {
return Err(Error::new(
ErrorKind::InvalidPercentEncoding,
offset + literal.len() + 2,
));
}
new_rest
}
Some((literal, b'{', expr_and_rest)) => {
validate_literal(literal, offset)?;
let (expr, new_rest) = match find_split_hole(expr_and_rest, b'}') {
Some(v) => v,
None => {
return Err(Error::new(
ErrorKind::ExpressionNotClosed,
offset + literal.len(),
))
}
};
// +1 is `+ "{".len()`.
validate_expr_body(expr, offset + literal.len() + 1)?;
new_rest
}
Some(_) => unreachable!("[consistency] searching only `%` and `{{`"),
None => return validate_literal(rest, offset),
};
offset = s.len() - rest.len();
}
Ok(())
}

View File

@@ -0,0 +1,218 @@
//! Simple general-purpose context type.
use core::ops::ControlFlow;
use alloc::collections::BTreeMap;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::vec::Vec;
use crate::template::context::{Context, VarName, Visitor};
/// Value.
#[derive(Debug, Clone)]
pub enum Value {
/// Undefined (i.e. null).
Undefined,
/// String value.
String(String),
/// List.
List(Vec<String>),
/// Associative array.
Assoc(Vec<(String, String)>),
}
impl From<&str> for Value {
#[inline]
fn from(v: &str) -> Self {
Self::String(v.into())
}
}
impl From<String> for Value {
#[inline]
fn from(v: String) -> Self {
Self::String(v)
}
}
/// Simple template expansion context.
#[derive(Default, Debug, Clone)]
pub struct SimpleContext {
/// Variable values.
// Any map types (including `HashMap`) is ok, but the hash map is not provided by `alloc`.
//
// QUESTION: Should hexdigits in percent-encoded triplets in varnames be
// compared case sensitively?
variables: BTreeMap<String, Value>,
}
impl SimpleContext {
/// Creates a new empty context.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::spec::UriSpec;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::SimpleContext;
///
/// let empty_ctx = SimpleContext::new();
/// let template = UriTemplateStr::new("{no_such_variable}")?;
/// let expanded = template.expand::<UriSpec, _>(&empty_ctx)?;
///
/// assert_eq!(
/// expanded.to_string(),
/// ""
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn new() -> Self {
Self::default()
}
/// Inserts a variable.
///
/// Passing [`Value::Undefined`] removes the value from the context.
///
/// The entry will be inserted or removed even if the key is invalid as a
/// variable name. Such entries will be simply ignored on expansion.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::spec::UriSpec;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::SimpleContext;
///
/// let mut context = SimpleContext::new();
/// context.insert("username", "foo");
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// let expanded = template.expand::<UriSpec, _>(&context)?;
///
/// assert_eq!(
/// expanded.to_string(),
/// "/users/foo"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Passing [`Value::Undefined`] removes the value from the context.
///
/// ```
/// # use iri_string::template::Error;
/// ## [cfg(feature = "alloc")] {
/// use iri_string::spec::UriSpec;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::{SimpleContext, Value};
///
/// let mut context = SimpleContext::new();
/// context.insert("username", "foo");
/// context.insert("username", Value::Undefined);
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// let expanded = template.expand::<UriSpec, _>(&context)?;
///
/// assert_eq!(
/// expanded.to_string(),
/// "/users/"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn insert<K, V>(&mut self, key: K, value: V) -> Option<Value>
where
K: Into<String>,
V: Into<Value>,
{
let key = key.into();
match value.into() {
Value::Undefined => self.variables.remove(&key),
value => self.variables.insert(key, value),
}
}
/// Removes all entries in the context.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::spec::UriSpec;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::SimpleContext;
///
/// let template = UriTemplateStr::new("{foo,bar}")?;
/// let mut context = SimpleContext::new();
///
/// context.insert("foo", "FOO");
/// context.insert("bar", "BAR");
/// assert_eq!(
/// template.expand::<UriSpec, _>(&context)?.to_string(),
/// "FOO,BAR"
/// );
///
/// context.clear();
/// assert_eq!(
/// template.expand::<UriSpec, _>(&context)?.to_string(),
/// ""
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn clear(&mut self) {
self.variables.clear();
}
/// Returns a reference to the value for the key.
//
// QUESTION: Should hexdigits in percent-encoded triplets in varnames be
// compared case sensitively?
#[inline]
#[must_use]
pub fn get(&self, key: VarName<'_>) -> Option<&Value> {
self.variables.get(key.as_str())
}
}
impl Context for SimpleContext {
fn visit<V: Visitor>(&self, visitor: V) -> V::Result {
use crate::template::context::{AssocVisitor, ListVisitor};
let name = visitor.var_name().as_str();
match self.variables.get(name) {
None | Some(Value::Undefined) => visitor.visit_undefined(),
Some(Value::String(s)) => visitor.visit_string(s),
Some(Value::List(list)) => {
let mut visitor = visitor.visit_list();
if let ControlFlow::Break(res) =
list.iter().try_for_each(|item| visitor.visit_item(item))
{
return res;
}
visitor.finish()
}
Some(Value::Assoc(list)) => {
let mut visitor = visitor.visit_assoc();
if let ControlFlow::Break(res) =
list.iter().try_for_each(|(k, v)| visitor.visit_entry(k, v))
{
return res;
}
visitor.finish()
}
}
}
}

647
vendor/iri-string/src/template/string.rs vendored Normal file
View File

@@ -0,0 +1,647 @@
//! Template string types.
use core::fmt;
#[cfg(feature = "alloc")]
use alloc::borrow::Cow;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::boxed::Box;
#[cfg(feature = "alloc")]
use alloc::rc::Rc;
#[cfg(feature = "alloc")]
use alloc::string::String;
#[cfg(feature = "alloc")]
use alloc::sync::Arc;
use crate::spec::Spec;
use crate::template::components::{VarListIter, VarName};
use crate::template::context::{Context, DynamicContext};
use crate::template::error::{Error, ErrorKind};
use crate::template::expand::{expand_whole_dynamic, Chunk, Chunks, Expanded};
use crate::template::parser::validate_template_str;
#[cfg(feature = "alloc")]
pub use self::owned::UriTemplateString;
/// Implements `PartialEq` and `PartialOrd`.
macro_rules! impl_cmp {
($ty_common:ty, $ty_lhs:ty, $ty_rhs:ty) => {
impl PartialEq<$ty_rhs> for $ty_lhs {
#[inline]
fn eq(&self, o: &$ty_rhs) -> bool {
<$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref())
}
}
impl PartialEq<$ty_lhs> for $ty_rhs {
#[inline]
fn eq(&self, o: &$ty_lhs) -> bool {
<$ty_common as PartialEq<$ty_common>>::eq(self.as_ref(), o.as_ref())
}
}
impl PartialOrd<$ty_rhs> for $ty_lhs {
#[inline]
fn partial_cmp(&self, o: &$ty_rhs) -> Option<core::cmp::Ordering> {
<$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref())
}
}
impl PartialOrd<$ty_lhs> for $ty_rhs {
#[inline]
fn partial_cmp(&self, o: &$ty_lhs) -> Option<core::cmp::Ordering> {
<$ty_common as PartialOrd<$ty_common>>::partial_cmp(self.as_ref(), o.as_ref())
}
}
};
}
#[cfg(feature = "alloc")]
mod owned;
/// A borrowed slice of a URI template.
///
/// URI Template is defined by [RFC 6570].
///
/// Note that "URI Template" can also be used for IRI.
///
/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html
///
/// # Valid values
///
/// This type can have a URI template string.
///
/// # Applied errata
///
/// [Errata ID 6937](https://www.rfc-editor.org/errata/eid6937) is applied, so
/// single quotes are allowed to appear in an URI template.
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("'quoted'")?;
/// # Ok::<_, Error>(())
/// ```
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(transparent))]
#[repr(transparent)]
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct UriTemplateStr {
/// The raw string.
inner: str,
}
impl UriTemplateStr {
/// Creates a new string.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn new(s: &str) -> Result<&Self, Error> {
TryFrom::try_from(s)
}
/// Creates a new string without validation.
///
/// This does not validate the given string, so it is caller's
/// responsibility to ensure the given string is valid.
///
/// # Safety
///
/// The given string must be syntactically valid as `Self` type.
/// If not, any use of the returned value or the call of this
/// function itself may result in undefined behavior.
#[inline]
#[must_use]
pub unsafe fn new_unchecked(s: &str) -> &Self {
// SAFETY: `new_always_unchecked` requires the same precondition
// as `new_always_unchecked`.
unsafe { Self::new_always_unchecked(s) }
}
/// Creates a new string without any validation.
///
/// This does not validate the given string at any time.
///
/// Intended for internal use.
///
/// # Safety
///
/// The given string must be valid.
#[inline]
#[must_use]
unsafe fn new_always_unchecked(s: &str) -> &Self {
// SAFETY: the cast is safe since `Self` type has `repr(transparent)`
// attribute and the content is guaranteed as valid by the
// precondition of the function.
unsafe { &*(s as *const str as *const Self) }
}
/// Returns the template as a plain `&str`.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// assert_eq!(template.as_str(), "/users/{username}");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn as_str(&self) -> &str {
self.as_ref()
}
/// Returns the template string length.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// assert_eq!(template.len(), "/users/{username}".len());
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.as_str().len()
}
/// Returns whether the string is empty.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// assert!(!template.is_empty());
///
/// let empty = UriTemplateStr::new("")?;
/// assert!(empty.is_empty());
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.as_str().is_empty()
}
}
impl UriTemplateStr {
/// Expands the template with the given context.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::spec::UriSpec;
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::SimpleContext;
///
/// let mut context = SimpleContext::new();
/// context.insert("username", "foo");
///
/// let template = UriTemplateStr::new("/users/{username}")?;
/// let expanded = template.expand::<UriSpec, _>(&context)?;
///
/// assert_eq!(
/// expanded.to_string(),
/// "/users/foo"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// You can control allowed characters in the output by changing spec type.
///
/// ```
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::spec::{IriSpec, UriSpec};
/// use iri_string::template::UriTemplateStr;
/// use iri_string::template::simple_context::SimpleContext;
///
/// let mut context = SimpleContext::new();
/// context.insert("alpha", "\u{03B1}");
///
/// let template = UriTemplateStr::new("{?alpha}")?;
///
/// assert_eq!(
/// template.expand::<UriSpec, _>(&context)?.to_string(),
/// "?alpha=%CE%B1",
/// "a URI cannot contain Unicode alpha (U+03B1), so it should be escaped"
/// );
/// assert_eq!(
/// template.expand::<IriSpec, _>(&context)?.to_string(),
/// "?alpha=\u{03B1}",
/// "an IRI can contain Unicode alpha (U+03B1), so it written as is"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn expand<'a, S: Spec, C: Context>(
&'a self,
context: &'a C,
) -> Result<Expanded<'a, S, C>, Error> {
Expanded::new(self, context)
}
/// Expands the template with the given dynamic context.
///
#[cfg_attr(
feature = "alloc",
doc = concat!(
"If you need the allocated [`String`], use",
"[`expand_dynamic_to_string`][`Self::expand_dynamic_to_string`]."
)
)]
///
/// See the documentation for [`DynamicContext`] for usage.
pub fn expand_dynamic<S: Spec, W: fmt::Write, C: DynamicContext>(
&self,
writer: &mut W,
context: &mut C,
) -> Result<(), Error> {
expand_whole_dynamic::<S, _, _>(self, writer, context)
}
/// Expands the template into a string, with the given dynamic context.
///
/// This is basically [`expand_dynamic`][`Self::expand_dynamic`] method
/// that returns an owned string instead of writing to the given writer.
///
/// See the documentation for [`DynamicContext`] for usage.
///
/// # Examples
///
/// ```
/// # #[cfg(feature = "alloc")]
/// # extern crate alloc;
/// # use iri_string::template::Error;
/// # #[cfg(feature = "alloc")] {
/// # use alloc::string::String;
/// use iri_string::template::UriTemplateStr;
/// # use iri_string::template::context::{DynamicContext, Visitor, VisitPurpose};
/// use iri_string::spec::UriSpec;
///
/// struct MyContext<'a> {
/// // See the documentation for `DynamicContext`.
/// # /// Target path.
/// # target: &'a str,
/// # /// Username.
/// # username: Option<&'a str>,
/// # /// A flag to remember whether the URI template
/// # /// attempted to use `username` variable.
/// # username_visited: bool,
/// }
/// #
/// # impl DynamicContext for MyContext<'_> {
/// # fn on_expansion_start(&mut self) {
/// # // Reset the state.
/// # self.username_visited = false;
/// # }
/// # fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result {
/// # match visitor.var_name().as_str() {
/// # "target" => visitor.visit_string(self.target),
/// # "username" => {
/// # if visitor.purpose() == VisitPurpose::Expand {
/// # // The variable `username` is being used
/// # // on the template expansion.
/// # // Don't care whether `username` is defined or not.
/// # self.username_visited = true;
/// # }
/// # if let Some(username) = &self.username {
/// # visitor.visit_string(username)
/// # } else {
/// # visitor.visit_undefined()
/// # }
/// # }
/// # _ => visitor.visit_undefined(),
/// # }
/// # }
/// # }
///
/// let mut context = MyContext {
/// target: "/posts/1",
/// username: Some("the_admin"),
/// username_visited: false,
/// };
///
/// // No access to the variable `username`.
/// let template = UriTemplateStr::new("{+target}{?username}")?;
/// let s = template.expand_dynamic_to_string::<UriSpec, _>(&mut context)?;
/// assert_eq!(s, "/posts/1?username=the_admin");
/// assert!(context.username_visited);
/// # }
/// # Ok::<_, Error>(())
/// ```
#[cfg(feature = "alloc")]
pub fn expand_dynamic_to_string<S: Spec, C: DynamicContext>(
&self,
context: &mut C,
) -> Result<String, Error> {
let mut buf = String::new();
expand_whole_dynamic::<S, _, _>(self, &mut buf, context)?;
Ok(buf)
}
/// Returns an iterator of variables in the template.
///
/// # Examples
///
/// ```
/// # use iri_string::template::Error;
/// use iri_string::template::UriTemplateStr;
///
/// let template = UriTemplateStr::new("foo{/bar*,baz:4}{?qux}{&bar*}")?;
/// let mut vars = template.variables();
/// assert_eq!(vars.next().map(|var| var.as_str()), Some("bar"));
/// assert_eq!(vars.next().map(|var| var.as_str()), Some("baz"));
/// assert_eq!(vars.next().map(|var| var.as_str()), Some("qux"));
/// assert_eq!(vars.next().map(|var| var.as_str()), Some("bar"));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn variables(&self) -> UriTemplateVariables<'_> {
UriTemplateVariables::new(self)
}
}
impl fmt::Debug for UriTemplateStr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("UriTemplateStr").field(&&self.inner).finish()
}
}
impl AsRef<str> for UriTemplateStr {
#[inline]
fn as_ref(&self) -> &str {
&self.inner
}
}
impl AsRef<UriTemplateStr> for UriTemplateStr {
#[inline]
fn as_ref(&self) -> &UriTemplateStr {
self
}
}
#[cfg(feature = "alloc")]
impl<'a> From<&'a UriTemplateStr> for Cow<'a, UriTemplateStr> {
#[inline]
fn from(s: &'a UriTemplateStr) -> Self {
Cow::Borrowed(s)
}
}
#[cfg(feature = "alloc")]
impl From<&UriTemplateStr> for Arc<UriTemplateStr> {
fn from(s: &UriTemplateStr) -> Self {
let inner: &str = s.as_str();
let buf = Arc::<str>::from(inner);
// SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so
// the memory layouts of `Arc<str>` and `Arc<UriTemplateStr>` are
// compatible.
unsafe {
let raw: *const str = Arc::into_raw(buf);
Self::from_raw(raw as *const UriTemplateStr)
}
}
}
#[cfg(feature = "alloc")]
impl From<&UriTemplateStr> for Box<UriTemplateStr> {
fn from(s: &UriTemplateStr) -> Self {
let inner: &str = s.as_str();
let buf = Box::<str>::from(inner);
// SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so
// the memory layouts of `Box<str>` and `Box<UriTemplateStr>` are
// compatible.
unsafe {
let raw: *mut str = Box::into_raw(buf);
Self::from_raw(raw as *mut UriTemplateStr)
}
}
}
#[cfg(feature = "alloc")]
impl From<&UriTemplateStr> for Rc<UriTemplateStr> {
fn from(s: &UriTemplateStr) -> Self {
let inner: &str = s.as_str();
let buf = Rc::<str>::from(inner);
// SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so
// the memory layouts of `Rc<str>` and `Rc<UriTemplateStr>` are
// compatible.
unsafe {
let raw: *const str = Rc::into_raw(buf);
Self::from_raw(raw as *const UriTemplateStr)
}
}
}
impl<'a> From<&'a UriTemplateStr> for &'a str {
#[inline]
fn from(s: &'a UriTemplateStr) -> &'a str {
s.as_ref()
}
}
impl<'a> TryFrom<&'a str> for &'a UriTemplateStr {
type Error = Error;
#[inline]
fn try_from(s: &'a str) -> Result<Self, Self::Error> {
match validate_template_str(s) {
// SAFETY: just checked the string is valid.
Ok(()) => Ok(unsafe { UriTemplateStr::new_always_unchecked(s) }),
Err(e) => Err(e),
}
}
}
impl<'a> TryFrom<&'a [u8]> for &'a UriTemplateStr {
type Error = Error;
#[inline]
fn try_from(bytes: &'a [u8]) -> Result<Self, Self::Error> {
let s = core::str::from_utf8(bytes)
.map_err(|e| Error::new(ErrorKind::InvalidUtf8, e.valid_up_to()))?;
match validate_template_str(s) {
// SAFETY: just checked the string is valid.
Ok(()) => Ok(unsafe { UriTemplateStr::new_always_unchecked(s) }),
Err(e) => Err(e),
}
}
}
impl_cmp!(str, str, UriTemplateStr);
impl_cmp!(str, &str, UriTemplateStr);
impl_cmp!(str, str, &UriTemplateStr);
impl fmt::Display for UriTemplateStr {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
/// Serde deserializer implementation.
#[cfg(feature = "serde")]
mod __serde_slice {
use super::UriTemplateStr;
use core::fmt;
use serde::{
de::{self, Visitor},
Deserialize, Deserializer,
};
/// Custom borrowed string visitor.
#[derive(Debug, Clone, Copy)]
struct CustomStrVisitor;
impl<'de> Visitor<'de> for CustomStrVisitor {
type Value = &'de UriTemplateStr;
#[inline]
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("URI template string")
}
#[inline]
fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
where
E: de::Error,
{
<&'de UriTemplateStr as TryFrom<&'de str>>::try_from(v).map_err(E::custom)
}
}
// About `'de` and `'a`, see
// <https://serde.rs/lifetimes.html#the-deserializede-lifetime>.
impl<'a, 'de: 'a> Deserialize<'de> for &'a UriTemplateStr {
#[inline]
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_string(CustomStrVisitor)
}
}
}
/// An iterator of variables in a URI template.
#[derive(Debug, Clone)]
pub struct UriTemplateVariables<'a> {
/// Chunks iterator.
chunks: Chunks<'a>,
/// Variables in the last chunk.
vars_in_chunk: Option<VarListIter<'a>>,
}
impl<'a> UriTemplateVariables<'a> {
/// Creates a variables iterator from the URI template.
#[inline]
#[must_use]
fn new(template: &'a UriTemplateStr) -> Self {
Self {
chunks: Chunks::new(template),
vars_in_chunk: None,
}
}
}
impl<'a> Iterator for UriTemplateVariables<'a> {
type Item = VarName<'a>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(vars) = &mut self.vars_in_chunk {
match vars.next() {
Some((_len, spec)) => return Some(spec.name()),
None => self.vars_in_chunk = None,
}
}
let expr = self.chunks.find_map(|chunk| match chunk {
Chunk::Literal(_) => None,
Chunk::Expr(v) => Some(v),
});
self.vars_in_chunk = match expr {
Some(expr) => Some(expr.decompose().1.into_iter()),
None => return None,
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::spec::IriSpec;
use crate::template::context::{AssocVisitor, ListVisitor, Visitor};
struct TestContext;
impl Context for TestContext {
fn visit<V: Visitor>(&self, visitor: V) -> V::Result {
match visitor.var_name().as_str() {
"str" => visitor.visit_string("string"),
"list" => visitor
.visit_list()
.visit_items_and_finish(["item0", "item1", "item2"]),
"assoc" => visitor
.visit_assoc()
.visit_entries_and_finish([("key0", "value0"), ("key1", "value1")]),
_ => visitor.visit_undefined(),
}
}
}
#[test]
fn expand_error_pos() {
{
let e = UriTemplateStr::new("foo{list:4}")
.unwrap()
.expand::<IriSpec, _>(&TestContext)
.err()
.map(|e| e.location());
assert_eq!(e, Some("foo{".len()));
}
{
let e = UriTemplateStr::new("foo{/list*,list:4}")
.unwrap()
.expand::<IriSpec, _>(&TestContext)
.err()
.map(|e| e.location());
assert_eq!(e, Some("foo{/list*,".len()));
}
{
let e = UriTemplateStr::new("foo{/str:3,list*,assoc:4}")
.unwrap()
.expand::<IriSpec, _>(&TestContext)
.err()
.map(|e| e.location());
assert_eq!(e, Some("foo{/str:3,list*,".len()));
}
}
}

View File

@@ -0,0 +1,296 @@
//! Owned `UriTemplateString`.
use core::fmt;
use alloc::borrow::Cow;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::borrow::ToOwned;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::boxed::Box;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
use crate::template::error::{CreationError, Error, ErrorKind};
use crate::template::parser::validate_template_str;
use crate::template::string::UriTemplateStr;
/// An owned slice of a URI template.
///
/// URI Template is defined by [RFC 6570].
///
/// Note that "URI Template" can also be used for IRI.
///
/// [RFC 6570]: https://www.rfc-editor.org/rfc/rfc6570.html
///
/// # Valid values
///
/// This type can have a URI template string.
// Note that `From<$ty> for {Arc,Rc}<$slice>` is currently not implemented since
// this won't reuse allocated memory and hides internal memory reallocation. See
// <https://github.com/lo48576/iri-string/issues/20#issuecomment-1105207849>.
// However, this is not decided with firm belief or opinion, so there would be
// a chance that they are implemented in future.
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
#[cfg_attr(feature = "serde", serde(transparent))]
#[derive(Default, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct UriTemplateString {
/// Inner data.
inner: String,
}
impl UriTemplateString {
/// Creates a new string without validation.
///
/// This does not validate the given string, so it is caller's
/// responsibility to ensure the given string is valid.
///
/// # Safety
///
/// The given string must be syntactically valid as `Self` type.
/// If not, any use of the returned value or the call of this
/// function itself may result in undefined behavior.
#[inline]
#[must_use]
pub unsafe fn new_unchecked(s: alloc::string::String) -> Self {
// The construction itself can be written in safe Rust, but
// every other place including unsafe functions expects
// `self.inner` to be syntactically valid as `Self`. In order to
// make them safe, the construction should validate the value
// or at least should require users to validate the value by
// making the function `unsafe`.
Self { inner: s }
}
/// Shrinks the capacity of the inner buffer to match its length.
#[inline]
pub fn shrink_to_fit(&mut self) {
self.inner.shrink_to_fit()
}
/// Returns the internal buffer capacity in bytes.
#[inline]
#[must_use]
pub fn capacity(&self) -> usize {
self.inner.capacity()
}
/// Returns the borrowed IRI string slice.
///
/// This is equivalent to `&*self`.
#[inline]
#[must_use]
pub fn as_slice(&self) -> &UriTemplateStr {
self.as_ref()
}
/// Appends the template string.
#[inline]
pub fn append(&mut self, other: &UriTemplateStr) {
self.inner.push_str(other.as_str());
debug_assert!(validate_template_str(self.as_str()).is_ok());
}
}
impl AsRef<str> for UriTemplateString {
#[inline]
fn as_ref(&self) -> &str {
&self.inner
}
}
impl AsRef<UriTemplateStr> for UriTemplateString {
#[inline]
fn as_ref(&self) -> &UriTemplateStr {
// SAFETY: `UriTemplateString and `UriTemplateStr` requires same validation,
// so the content of `self: &UriTemplateString` must be valid as `UriTemplateStr`.
unsafe { UriTemplateStr::new_always_unchecked(AsRef::<str>::as_ref(self)) }
}
}
impl core::borrow::Borrow<str> for UriTemplateString {
#[inline]
fn borrow(&self) -> &str {
self.as_ref()
}
}
impl core::borrow::Borrow<UriTemplateStr> for UriTemplateString {
#[inline]
fn borrow(&self) -> &UriTemplateStr {
self.as_ref()
}
}
impl ToOwned for UriTemplateStr {
type Owned = UriTemplateString;
#[inline]
fn to_owned(&self) -> Self::Owned {
self.into()
}
}
impl From<&'_ UriTemplateStr> for UriTemplateString {
#[inline]
fn from(s: &UriTemplateStr) -> Self {
// This is safe because `s` must be valid.
Self {
inner: alloc::string::String::from(s.as_str()),
}
}
}
impl From<UriTemplateString> for alloc::string::String {
#[inline]
fn from(s: UriTemplateString) -> Self {
s.inner
}
}
impl<'a> From<UriTemplateString> for Cow<'a, UriTemplateStr> {
#[inline]
fn from(s: UriTemplateString) -> Cow<'a, UriTemplateStr> {
Cow::Owned(s)
}
}
impl From<UriTemplateString> for Box<UriTemplateStr> {
#[inline]
fn from(s: UriTemplateString) -> Box<UriTemplateStr> {
let inner: String = s.into();
let buf = Box::<str>::from(inner);
// SAFETY: `UriTemplateStr` has `repr(transparent)` attribute, so
// the memory layouts of `Box<str>` and `Box<UriTemplateStr>` are
// compatible. Additionally, `UriTemplateString` and `UriTemplateStr`
// require the same syntax.
unsafe {
let raw: *mut str = Box::into_raw(buf);
Box::<UriTemplateStr>::from_raw(raw as *mut UriTemplateStr)
}
}
}
impl TryFrom<&'_ str> for UriTemplateString {
type Error = Error;
#[inline]
fn try_from(s: &str) -> Result<Self, Self::Error> {
<&UriTemplateStr>::try_from(s).map(Into::into)
}
}
impl TryFrom<&'_ [u8]> for UriTemplateString {
type Error = Error;
#[inline]
fn try_from(bytes: &[u8]) -> Result<Self, Self::Error> {
let s = core::str::from_utf8(bytes)
.map_err(|e| Error::new(ErrorKind::InvalidUtf8, e.valid_up_to()))?;
<&UriTemplateStr>::try_from(s).map(Into::into)
}
}
impl core::convert::TryFrom<alloc::string::String> for UriTemplateString {
type Error = CreationError<String>;
#[inline]
fn try_from(s: alloc::string::String) -> Result<Self, Self::Error> {
match <&UriTemplateStr>::try_from(s.as_str()) {
Ok(_) => {
// This is safe because `<&UriTemplateStr>::try_from(s)?` ensures
// that the string `s` is valid.
Ok(Self { inner: s })
}
Err(e) => Err(CreationError::new(e, s)),
}
}
}
impl alloc::str::FromStr for UriTemplateString {
type Err = Error;
#[inline]
fn from_str(s: &str) -> Result<Self, Self::Err> {
TryFrom::try_from(s)
}
}
impl core::ops::Deref for UriTemplateString {
type Target = UriTemplateStr;
#[inline]
fn deref(&self) -> &UriTemplateStr {
self.as_ref()
}
}
impl_cmp!(str, UriTemplateStr, Cow<'_, str>);
impl_cmp!(str, &UriTemplateStr, Cow<'_, str>);
impl_cmp!(str, str, UriTemplateString);
impl_cmp!(str, &str, UriTemplateString);
impl_cmp!(str, Cow<'_, str>, UriTemplateString);
impl_cmp!(str, String, UriTemplateString);
impl fmt::Display for UriTemplateString {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
/// Serde deserializer implementation.
#[cfg(feature = "serde")]
mod __serde_owned {
use super::UriTemplateString;
use core::fmt;
#[cfg(all(feature = "alloc", feature = "serde", not(feature = "std")))]
use alloc::string::String;
use serde::{
de::{self, Visitor},
Deserialize, Deserializer,
};
/// Custom owned string visitor.
#[derive(Debug, Clone, Copy)]
struct CustomStringVisitor;
impl Visitor<'_> for CustomStringVisitor {
type Value = UriTemplateString;
#[inline]
fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("URI template string")
}
#[inline]
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
<UriTemplateString as TryFrom<&str>>::try_from(v).map_err(E::custom)
}
#[cfg(feature = "serde")]
#[inline]
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: de::Error,
{
<UriTemplateString as TryFrom<String>>::try_from(v).map_err(E::custom)
}
}
impl<'de> Deserialize<'de> for UriTemplateString {
#[inline]
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
deserializer.deserialize_str(CustomStringVisitor)
}
}
}

224
vendor/iri-string/src/types.rs vendored Normal file
View File

@@ -0,0 +1,224 @@
//! URI and IRI types.
//!
//! # URI and IRI
//!
//! IRIs (Internationalized Resource Identifiers) are defined in [RFC 3987],
//! and URIs (Uniform Resource Identifiers) are defined in [RFC 3986].
//!
//! URI consists of only ASCII characters, and is a subset of IRI.
//!
//! IRIs are defined as below:
//!
//! ```text
//! IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]
//! IRI-reference = IRI / irelative-ref
//! absolute-IRI = scheme ":" ihier-part [ "?" iquery ]
//! irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ]
//! (`irelative-part` is roughly same as `ihier-part`.)
//! ```
//!
//! Definitions for URIs are almost same, but they cannot have non-ASCII characters.
//!
//! # Types
//!
//! Types can be categorized by:
//!
//! * syntax,
//! * spec, and
//! * ownership.
//!
//! ## Syntax
//!
//! Since URIs and IRIs have almost same syntax and share algorithms, they are implemented by
//! generic types.
//!
//! * [`RiStr`] and [`RiString`]
//! + String types for `IRI` and `URI` rules.
//! * [`RiAbsoluteStr`] and [`RiAbsoluteString`]
//! + String types for `absolute-IRI` and `absolute-URI` rules.
//! * [`RiReferenceStr`] and [`RiReferenceString`]
//! + String types for `IRI-reference` and `URI-reference` rules.
//! * [`RiRelativeStr`] and [`RiRelativeString`]
//! + String types for `irelative-ref` and `relative-ref` rules.
//! * [`RiFragmentStr`] and [`RiFragmentString`]
//! + String types for `ifragment` and `fragment` rules.
//! + Note that these types represents a substring of an IRI / URI references.
//! They are not intended to used directly as an IRI / URI references.
//!
//! "Ri" stands for "Resource Identifier".
//!
//! ## Spec
//!
//! These types have a type parameter, which represents RFC specification.
//! [`IriSpec`] represents [RFC 3987] spec, and [`UriSpec`] represents [RFC 3986] spec.
//! For example, `RiAbsoluteStr<IriSpec>` can have `absolute-IRI` string value,
//! and `RiReferenceStr<UriSpec>` can have `URI-reference` string value.
//!
//! ## Ownership
//!
//! String-like types have usually two variations, borrowed and owned.
//!
//! Borrowed types (such as `str`, `Path`, `OsStr`) are unsized, and used by reference style.
//! Owned types (such as `String`, `PathBuf`, `OsString`) are sized, and requires heap allocation.
//! Owned types can be coerced to a borrowed type (for example, `&String` is automatically coerced
//! to `&str` in many context).
//!
//! IRI / URI types have same variations, `RiFooStr` and `RiFooString`
//! (`Foo` part represents syntax).
//! They are very similar to `&str` and `String`.
//! `Deref` is implemented, `RiFooStr::len()` is available, `&RiFooString` can be coerced to
//! `&RiFooStr`, `Cow<'_, RiFooStr>` and `Box<RiFooStr>` is available, and so on.
//!
//! # Hierarchy and safe conversion
//!
//! IRI syntaxes have the hierarchy below.
//!
//! ```text
//! RiReferenceStr
//! |-- RiStr
//! | `-- RiAbsoluteStr
//! `-- RiRelativeStr
//! ```
//!
//! Therefore, the conversions below are safe and cheap:
//!
//! * `RiStr -> RiReferenceStr`
//! * `RiAbsoluteStr -> RiStr`
//! * `RiAbsoluteStr -> RiReferenceStr`
//! * `RiRelativeStr -> RiReferenceStr`
//!
//! For safely convertible types (consider `FooStr -> BarStr` is safe), traits
//! below are implemented:
//!
//! * `AsRef<BarStr> for FooStr`
//! * `AsRef<BarStr> for FooString`
//! * `From<FooString> for BarString`
//! * `PartialEq<FooStr> for BarStr`, and lots of impls like that
//! + `PartialEq` and `ParitalOrd`.
//! + Slice, owned, `Cow`, reference, etc...
//!
//! ## Fallible conversions
//!
//! Fallible conversions are implemented from plain string into IRI strings.
//!
//! * `TryFrom<&str> for &FooStr`
//! * `TryFrom<&str> for FooString`
//! * `TryFrom<String> for FooString`
//! * `FromStr for FooString`
//!
//! Some IRI string types provide more convenient methods to convert between IRI types.
//! For example, [`RiReferenceString::into_iri()`] tries to convert an IRI reference into an IRI,
//! and returns `Result<IriString, IriRelativeString>`.
//! This is because an IRI reference is valid as an IRI or a relative IRI reference.
//! Such methods are usually more efficient than using `TryFrom` for plain strings, because they
//! prevents you from losing ownership of a string, and does a conversion without extra memory
//! allocation.
//!
//! # Aliases
//!
//! This module contains type aliases for RFC 3986 URI types and RFC 3987 IRI types.
//!
//! `IriFooStr{,ing}` are aliases of `RiFooStr{,ing}<IriSpec>`, and `UriFooStr{,ing}` are aliases
//! of `RiFooStr{,ing}<UriSpec>`.
//!
//! # Wrapped string types
//!
//! Similar to string types in std (such as `str`, `std::path::Path`, and `std::ffi::OsStr`),
//! IRI string types in this crate provides convenient conversions to:
//!
//! * `std::box::Box`,
//! * `std::borrow::Cow`,
//! * `std::rc::Rc`, and
//! * `std::sync::Arc`.
//!
//! ```
//! # use iri_string::validate::Error;
//! # #[cfg(feature = "std")] {
//! use std::borrow::Cow;
//! use std::rc::Rc;
//! use std::sync::Arc;
//!
//! use iri_string::types::IriStr;
//!
//! let iri = IriStr::new("http://example.com/")?;
//! let iri_owned = iri.to_owned();
//!
//! // From slice.
//! let cow_1_1: Cow<'_, IriStr> = iri.into();
//! let cow_1_2 = Cow::<'_, IriStr>::from(iri);
//! assert!(matches!(cow_1_1, Cow::Borrowed(_)));
//! assert!(matches!(cow_1_2, Cow::Borrowed(_)));
//! // From owned.
//! let cow_2_1: Cow<'_, IriStr> = iri_owned.clone().into();
//! let cow_2_2 = Cow::<'_, IriStr>::from(iri_owned.clone());
//! assert!(matches!(cow_2_1, Cow::Owned(_)));
//! assert!(matches!(cow_2_2, Cow::Owned(_)));
//!
//! // From slice.
//! let box_1_1: Box<IriStr> = iri.into();
//! let box_1_2 = Box::<IriStr>::from(iri);
//! // From owned.
//! let box_2_1: Box<IriStr> = iri_owned.clone().into();
//! let box_2_2 = Box::<IriStr>::from(iri_owned.clone());
//!
//! // From slice.
//! let rc_1_1: Rc<IriStr> = iri.into();
//! let rc_1_2 = Rc::<IriStr>::from(iri);
//! // From owned.
//! // Note that `From<owned> for Rc<borrowed>` is not implemented for now.
//! // Get borrowed string by `.as_slice()` and convert it.
//! let rc_2_1: Rc<IriStr> = iri_owned.clone().as_slice().into();
//! let rc_2_2 = Rc::<IriStr>::from(iri_owned.clone().as_slice());
//!
//! // From slice.
//! let arc_1_1: Arc<IriStr> = iri.into();
//! let arc_1_2 = Arc::<IriStr>::from(iri);
//! // From owned.
//! // Note that `From<owned> for Arc<borrowed>` is not implemented for now.
//! // Get borrowed string by `.as_slice()` and convert it.
//! let arc_2_1: Arc<IriStr> = iri_owned.clone().as_slice().into();
//! let arc_2_2 = Arc::<IriStr>::from(iri_owned.clone().as_slice());
//! # }
//! # Ok::<_, Error>(())
//! ```
//!
//! [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
//! [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
//! [`RiStr`]: struct.RiStr.html
//! [`RiString`]: struct.RiString.html
//! [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html
//! [`RiAbsoluteString`]: struct.RiAbsoluteString.html
//! [`RiFragmentStr`]: struct.RiFragmentStr.html
//! [`RiFragmentString`]: struct.RiFragmentString.html
//! [`RiReferenceStr`]: struct.RiReferenceStr.html
//! [`RiReferenceString`]: struct.RiReferenceString.html
//! [`RiReferenceString::into_iri()`]: struct.RiReferenceString.html#method.into_iri
//! [`RiRelativeStr`]: struct.RiRelativeStr.html
//! [`RiRelativeString`]: struct.RiRelativeString.html
//! [`IriSpec`]: ../spec/enum.IriSpec.html
//! [`UriSpec`]: ../spec/enum.UriSpec.html
#[cfg(feature = "alloc")]
pub use self::{
generic::{
CreationError, RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString,
RiRelativeString, RiString,
},
iri::{
IriAbsoluteString, IriFragmentString, IriQueryString, IriReferenceString,
IriRelativeString, IriString,
},
uri::{
UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString,
UriRelativeString, UriString,
},
};
pub use self::{
generic::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr},
iri::{IriAbsoluteStr, IriFragmentStr, IriQueryStr, IriReferenceStr, IriRelativeStr, IriStr},
uri::{UriAbsoluteStr, UriFragmentStr, UriQueryStr, UriReferenceStr, UriRelativeStr, UriStr},
};
pub(crate) mod generic;
mod iri;
mod uri;

57
vendor/iri-string/src/types/generic.rs vendored Normal file
View File

@@ -0,0 +1,57 @@
//! Generic resource identifier types.
//!
//! ```text
//! IRI = scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]
//! IRI-reference = IRI / irelative-ref
//! absolute-IRI = scheme ":" ihier-part [ "?" iquery ]
//! irelative-ref = irelative-part [ "?" iquery ] [ "#" ifragment ]
//! (`irelative-part` is roughly same as `ihier-part`.)
//! ```
//!
//! Hierarchy:
//!
//! ```text
//! RiReferenceStr
//! |-- RiStr
//! | `-- RiAbsoluteStr
//! `-- RiRelativeStr
//! ```
//!
//! Therefore, the conversions below are safe and cheap:
//!
//! * `RiStr -> RiReferenceStr`
//! * `RiAbsoluteStr -> RiStr`
//! * `RiAbsoluteStr -> RiReferenceStr`
//! * `RiRelativeStr -> RiReferenceStr`
//!
//! For safely convertible types (consider `FooStr -> BarStr` is safe), traits
//! below are implemented:
//!
//! * `AsRef<BarStr> for FooStr`
//! * `AsRef<BarStr> for FooString`
//! * `From<FooString> for BarString`
//! * `PartialEq<FooStr> for BarStr` and lots of impls like that
//! + `PartialEq` and `ParitalOrd`.
//! + Slice, owned, `Cow`, reference, etc...
pub use self::{
absolute::RiAbsoluteStr, fragment::RiFragmentStr, normal::RiStr, query::RiQueryStr,
reference::RiReferenceStr, relative::RiRelativeStr,
};
#[cfg(feature = "alloc")]
pub use self::{
absolute::RiAbsoluteString, error::CreationError, fragment::RiFragmentString, normal::RiString,
query::RiQueryString, reference::RiReferenceString, relative::RiRelativeString,
};
#[macro_use]
mod macros;
mod absolute;
#[cfg(feature = "alloc")]
mod error;
mod fragment;
mod normal;
mod query;
mod reference;
mod relative;

View File

@@ -0,0 +1,728 @@
//! Absolute IRI (without fragment part).
use crate::components::AuthorityComponents;
#[cfg(feature = "alloc")]
use crate::mask_password::password_range_to_hide;
use crate::mask_password::PasswordMasked;
use crate::normalize::{Error, NormalizationInput, Normalized, NormalizednessCheckMode};
use crate::parser::trusted as trusted_parser;
use crate::spec::Spec;
use crate::types::{RiQueryStr, RiReferenceStr, RiStr};
#[cfg(feature = "alloc")]
use crate::types::{RiReferenceString, RiString};
use crate::validate::absolute_iri;
define_custom_string_slice! {
/// A borrowed slice of an absolute IRI without fragment part.
///
/// This corresponds to [`absolute-IRI` rule] in [RFC 3987]
/// (and [`absolute-URI` rule] in [RFC 3986]).
/// In other words, this is [`RiStr`] without fragment part.
///
/// If you want to accept fragment part, use [`RiStr`].
///
/// # Valid values
///
/// This type can have an absolute IRI without fragment part.
///
/// ```
/// # use iri_string::types::IriAbsoluteStr;
/// assert!(IriAbsoluteStr::new("https://example.com/foo?bar=baz").is_ok());
/// assert!(IriAbsoluteStr::new("foo:bar").is_ok());
/// // Scheme `foo` and empty path.
/// assert!(IriAbsoluteStr::new("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(IriAbsoluteStr::new("foo:/").is_ok());
/// assert!(IriAbsoluteStr::new("foo://").is_ok());
/// assert!(IriAbsoluteStr::new("foo:///").is_ok());
/// assert!(IriAbsoluteStr::new("foo:////").is_ok());
/// assert!(IriAbsoluteStr::new("foo://///").is_ok());
///
/// ```
///
/// Relative IRI is not allowed.
///
/// ```
/// # use iri_string::types::IriAbsoluteStr;
/// // This is relative path.
/// assert!(IriAbsoluteStr::new("foo/bar").is_err());
/// // `/foo/bar` is an absolute path, but it is authority-relative.
/// assert!(IriAbsoluteStr::new("/foo/bar").is_err());
/// // `//foo/bar` is termed "network-path reference",
/// // or usually called "protocol-relative reference".
/// assert!(IriAbsoluteStr::new("//foo/bar").is_err());
/// // Empty string is not a valid absolute IRI.
/// assert!(IriAbsoluteStr::new("").is_err());
/// ```
///
/// Fragment part (such as trailing `#foo`) is not allowed.
///
/// ```
/// # use iri_string::types::IriAbsoluteStr;
/// // Fragment part is not allowed.
/// assert!(IriAbsoluteStr::new("https://example.com/foo?bar=baz#qux").is_err());
/// ```
///
/// Some characters and sequences cannot used in an absolute IRI.
///
/// ```
/// # use iri_string::types::IriAbsoluteStr;
/// // `<` and `>` cannot directly appear in an absolute IRI.
/// assert!(IriAbsoluteStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an absolute IRI.
/// assert!(IriAbsoluteStr::new("%").is_err());
/// assert!(IriAbsoluteStr::new("%GG").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`absolute-IRI` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`absolute-URI` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.3
/// [`RiStr`]: struct.RiStr.html
struct RiAbsoluteStr {
validator = absolute_iri,
expecting_msg = "Absolute IRI string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of an absolute IRI without fragment part.
///
/// This corresponds to [`absolute-IRI` rule] in [RFC 3987]
/// (and [`absolute-URI` rule] in [RFC 3986]).
/// The rule for `absolute-IRI` is `scheme ":" ihier-part [ "?" iquery ]`.
/// In other words, this is [`RiString`] without fragment part.
///
/// If you want to accept fragment part, use [`RiString`].
///
/// For details, see the document for [`RiAbsoluteStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`absolute-IRI` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`absolute-URI` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.3
/// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html
/// [`RiString`]: struct.RiString.html
struct RiAbsoluteString {
validator = absolute_iri,
slice = RiAbsoluteStr,
expecting_msg = "Absolute IRI string",
}
}
impl<S: Spec> RiAbsoluteStr<S> {
/// Returns Ok`(())` if the IRI is normalizable by the RFC 3986 algorithm.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/%2e/bar/..")?;
/// assert!(iri.ensure_rfc3986_normalizable().is_ok());
///
/// let iri2 = IriAbsoluteStr::new("scheme:/..//bar")?;
/// // The normalization result would be `scheme://bar` according to RFC
/// // 3986, but it is unintended and should be treated as a failure.
/// // This crate automatically handles this case so that `.normalize()` won't fail.
/// assert!(!iri.ensure_rfc3986_normalizable().is_err());
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> {
NormalizationInput::from(self).ensure_rfc3986_normalizable()
}
/// Returns `true` if the IRI is already normalized.
///
/// This returns the same result as `self.normalize().to_string() == self`,
/// but does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?;
/// assert!(!iri.is_normalized());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query");
/// assert!(normalized.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:/.///foo")?;
/// // Already normalized.
/// assert!(iri.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?;
/// // Default normalization algorithm assumes the path part to be NOT opaque.
/// assert!(!iri.is_normalized());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "scheme:/.//not-a-host");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn is_normalized(&self) -> bool {
trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Default)
}
/// Returns `true` if the IRI is already normalized.
///
/// This returns the same result as
/// `self.ensure_rfc3986_normalizable() && (self.normalize().to_string() == self)`,
/// does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?;
/// assert!(!iri.is_normalized_rfc3986());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query");
/// assert!(normalized.is_normalized_rfc3986());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:/.///foo")?;
/// // Not normalized in the sense of RFC 3986.
/// assert!(!iri.is_normalized_rfc3986());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?;
/// // RFC 3986 normalization algorithm assumes the path part to be NOT opaque.
/// assert!(!iri.is_normalized_rfc3986());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "scheme:/.//not-a-host");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn is_normalized_rfc3986(&self) -> bool {
trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Rfc3986)
}
/// Returns `true` if the IRI is already normalized in the sense of
/// [`normalize_but_preserve_authorityless_relative_path`] method.
///
/// This returns the same result as
/// `self.normalize_but_preserve_authorityless_relative_path().to_string() == self`,
/// but does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?;
/// assert!(!iri.is_normalized_but_authorityless_relative_path_preserved());
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query");
/// assert!(normalized.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:/.///foo")?;
/// // Already normalized in the sense of
/// // `normalize_but_opaque_authorityless_relative_path()` method.
/// assert!(iri.is_normalized_but_authorityless_relative_path_preserved());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:relative/..//not-a-host")?;
/// // Relative path is treated as opaque since the autority component is absent.
/// assert!(iri.is_normalized_but_authorityless_relative_path_preserved());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [`normalize_but_preserve_authorityless_relative_path`]:
/// `Self::normalize_but_preserve_authorityless_relative_path`
#[inline]
#[must_use]
pub fn is_normalized_but_authorityless_relative_path_preserved(&self) -> bool {
trusted_parser::is_normalized::<S>(
self.as_str(),
NormalizednessCheckMode::PreserveAuthoritylessRelativePath,
)
}
/// Returns the normalized IRI.
///
/// # Notes
///
/// For some abnormal IRIs, the normalization can produce semantically
/// incorrect string that looks syntactically valid. To avoid security
/// issues by this trap, the normalization algorithm by this crate
/// automatically applies the workaround.
///
/// If you worry about this, test by
/// [`RiAbsoluteStr::ensure_rfc3986_normalizable`] method or
/// [`Normalized::ensure_rfc3986_normalizable`] before using the result
/// string.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?;
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn normalize(&self) -> Normalized<'_, Self> {
Normalized::from_input(NormalizationInput::from(self)).and_normalize()
}
/// Returns the normalized IRI, but preserving dot segments in relative path
/// if the authority component is absent.
///
/// This normalization would be similar to that of [WHATWG URL Standard]
/// while this implementation is not guaranteed to stricly follow the spec.
///
/// Note that this normalization algorithm is not compatible with RFC 3986
/// algorithm for some inputs.
///
/// Note that case normalization and percent-encoding normalization will
/// still be applied to any path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query")?;
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("scheme:relative/../f%6f%6f")?;
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "scheme:relative/../foo");
/// // `.normalize()` would normalize this to `scheme:/foo`.
/// # assert_eq!(iri.normalize().to_dedicated_string(), "scheme:/foo");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [WHATWG URL Standard]: https://url.spec.whatwg.org/
#[inline]
#[must_use]
pub fn normalize_but_preserve_authorityless_relative_path(&self) -> Normalized<'_, Self> {
Normalized::from_input(NormalizationInput::from(self))
.and_normalize_but_preserve_authorityless_relative_path()
}
/// Returns the proxy to the IRI with password masking feature.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
/// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query");
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "http://user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn mask_password(&self) -> PasswordMasked<'_, Self> {
PasswordMasked::new(self)
}
}
/// Components getters.
impl<S: Spec> RiAbsoluteStr<S> {
/// Returns the scheme.
///
/// The following colon is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?;
/// assert_eq!(iri.scheme_str(), "http");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn scheme_str(&self) -> &str {
trusted_parser::extract_scheme_absolute(self.as_str())
}
/// Returns the authority.
///
/// The leading `//` is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?;
/// assert_eq!(iri.authority_str(), Some("example.com"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_str(&self) -> Option<&str> {
trusted_parser::extract_authority_absolute(self.as_str())
}
/// Returns the path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?;
/// assert_eq!(iri.path_str(), "/pathpath");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn path_str(&self) -> &str {
trusted_parser::extract_path_absolute(self.as_str())
}
/// Returns the query.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriAbsoluteStr, IriQueryStr};
///
/// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?;
/// let query = IriQueryStr::new("queryquery")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query(&self) -> Option<&RiQueryStr<S>> {
trusted_parser::extract_query_absolute_iri(self.as_str()).map(|query| {
// SAFETY: `trusted_parser::extract_query_absolute_iri()` must return
// the query part of an IRI (including the leading `?` character),
// and the returned string consists of allowed characters since it
// is a substring of the source IRI.
unsafe { RiQueryStr::new_maybe_unchecked(query) }
})
}
/// Returns the query in a raw string slice.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://example.com/pathpath?queryquery")?;
/// assert_eq!(iri.query_str(), Some("queryquery"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query_str(&self) -> Option<&str> {
trusted_parser::extract_query_absolute_iri(self.as_str())
}
/// Returns the authority components.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?;
/// let authority = iri.authority_components()
/// .expect("authority is available");
/// assert_eq!(authority.userinfo(), Some("user:pass"));
/// assert_eq!(authority.host(), "example.com");
/// assert_eq!(authority.port(), Some("8080"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriAbsoluteStr;
///
/// let iri = IriAbsoluteStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> {
AuthorityComponents::from_iri(self.as_ref())
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> RiAbsoluteString<S> {
/// Removes the password completely (including separator colon) from `self` even if it is empty.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriAbsoluteString;
///
/// let mut iri = IriAbsoluteString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Even if the password is empty, the password and separator will be removed.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriAbsoluteString;
///
/// let mut iri = IriAbsoluteString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) => v,
None => return,
};
let separator_colon = pw_range.start - 1;
// SAFETY: the IRI must still be valid after the password component and
// the leading separator colon is removed.
unsafe {
let buf = self.as_inner_mut();
buf.drain(separator_colon..pw_range.end);
debug_assert!(
RiAbsoluteStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
/// Replaces the non-empty password in `self` to the empty password.
///
/// This leaves the separator colon if the password part was available.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriAbsoluteString;
///
/// let mut iri = IriAbsoluteString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// If the password is empty, it is left as is.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriAbsoluteString;
///
/// let mut iri = IriAbsoluteString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_nonempty_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) if !v.is_empty() => v,
_ => return,
};
debug_assert_eq!(
self.as_str().as_bytes().get(pw_range.start - 1).copied(),
Some(b':'),
"[validity] the password component must be prefixed with a separator colon"
);
// SAFETY: the IRI must be valid after the password is replaced with empty string.
unsafe {
let buf = self.as_inner_mut();
buf.drain(pw_range);
debug_assert!(
RiAbsoluteStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
}
impl_trivial_conv_between_iri! {
from_slice: RiAbsoluteStr,
from_owned: RiAbsoluteString,
to_slice: RiStr,
to_owned: RiString,
}
impl_trivial_conv_between_iri! {
from_slice: RiAbsoluteStr,
from_owned: RiAbsoluteString,
to_slice: RiReferenceStr,
to_owned: RiReferenceString,
}

View File

@@ -0,0 +1,70 @@
//! Resource identifier creation error.
use core::fmt;
#[cfg(feature = "std")]
use std::error;
use crate::validate::Error;
/// Error on conversion into an IRI type.
///
/// Enabled by `alloc` or `std` feature.
// This type itself does not require `alloc` or `std, but the type is used only when `alloc`
// feature is enabled. To avoid exporting unused stuff, the type (and the `types::generic::error`
// module) is available only when necessary.
//
// Note that all types which implement `Spec` also implement `SpecInternal`.
pub struct CreationError<T> {
/// Soruce data.
source: T,
/// Validation error.
error: Error,
}
impl<T> CreationError<T> {
/// Returns the source data.
#[must_use]
pub fn into_source(self) -> T {
self.source
}
/// Returns the validation error.
#[must_use]
pub fn validation_error(&self) -> Error {
self.error
}
/// Creates a new `CreationError`.
#[must_use]
pub(crate) fn new(error: Error, source: T) -> Self {
Self { source, error }
}
}
impl<T: fmt::Debug> fmt::Debug for CreationError<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("CreationError")
.field("source", &self.source)
.field("error", &self.error)
.finish()
}
}
impl<T: Clone> Clone for CreationError<T> {
fn clone(&self) -> Self {
Self {
source: self.source.clone(),
error: self.error,
}
}
}
impl<T> fmt::Display for CreationError<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.error.fmt(f)
}
}
#[cfg(feature = "std")]
impl<T: fmt::Debug> error::Error for CreationError<T> {}

View File

@@ -0,0 +1,106 @@
//! Fragment string.
use crate::spec::Spec;
use crate::validate::{fragment, Error, ErrorKind};
define_custom_string_slice! {
/// A borrowed slice of an IRI fragment (i.e. after the first `#` character).
///
/// This corresponds to [`ifragment` rule] in [RFC 3987] (and [`fragment` rule] in [RFC 3986]).
/// The rule for `ifragment` is `*( ipchar / "/" / "?" )`.
///
/// # Valid values
///
/// This type can have an IRI fragment.
/// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`.
///
/// ```
/// # use iri_string::types::IriFragmentStr;
/// assert!(IriFragmentStr::new("").is_ok());
/// assert!(IriFragmentStr::new("foo").is_ok());
/// assert!(IriFragmentStr::new("foo/bar").is_ok());
/// assert!(IriFragmentStr::new("/foo/bar").is_ok());
/// assert!(IriFragmentStr::new("//foo/bar").is_ok());
/// assert!(IriFragmentStr::new("https://user:pass@example.com:8080").is_ok());
/// assert!(IriFragmentStr::new("https://example.com/").is_ok());
/// ```
///
/// Some characters and sequences cannot used in a fragment.
///
/// ```
/// # use iri_string::types::IriFragmentStr;
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(IriFragmentStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(IriFragmentStr::new("%").is_err());
/// assert!(IriFragmentStr::new("%GG").is_err());
/// // Hash sign `#` cannot appear in an IRI fragment.
/// assert!(IriFragmentStr::new("#hash").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`fragment` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.5
/// [`ifragment` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
struct RiFragmentStr {
validator = fragment,
expecting_msg = "IRI fragment string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of an IRI fragment (i.e. after the first `#` character).
///
/// This corresponds to [`ifragment` rule] in [RFC 3987] (and [`fragment` rule] in [RFC 3986]).
/// The rule for `absolute-IRI` is `*( ipchar / "/" / "?" )`.
///
/// For details, see the documentation for [`RiFragmentStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`fragment` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.5
/// [`ifragment` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`RiFragmentStr`]: struct.RiFragmentStr.html
struct RiFragmentString {
validator = fragment,
slice = RiFragmentStr,
expecting_msg = "IRI fragment string",
}
}
impl<S: Spec> RiFragmentStr<S> {
/// Creates a new `&RiFragmentStr` from the fragment part prefixed by `#`.
///
/// # Examples
///
/// ```
/// # use iri_string::types::IriFragmentStr;
/// assert!(IriFragmentStr::from_prefixed("#").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#foo").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#foo/bar").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#/foo/bar").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#//foo/bar").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#https://user:pass@example.com:8080").is_ok());
/// assert!(IriFragmentStr::from_prefixed("#https://example.com/").is_ok());
///
/// // `<` and `>` cannot directly appear in an IRI.
/// assert!(IriFragmentStr::from_prefixed("#<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI.
/// assert!(IriFragmentStr::new("#%").is_err());
/// assert!(IriFragmentStr::new("#%GG").is_err());
/// // `#` prefix is expected.
/// assert!(IriFragmentStr::from_prefixed("").is_err());
/// assert!(IriFragmentStr::from_prefixed("foo").is_err());
/// // Hash sign `#` cannot appear in an IRI fragment.
/// assert!(IriFragmentStr::from_prefixed("##hash").is_err());
/// ```
pub fn from_prefixed(s: &str) -> Result<&Self, Error> {
if !s.starts_with('#') {
return Err(Error::with_kind(ErrorKind::InvalidFragment));
}
TryFrom::try_from(&s[1..])
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,944 @@
//! Usual absolute IRI (fragment part being allowed).
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
use crate::components::AuthorityComponents;
#[cfg(feature = "alloc")]
use crate::mask_password::password_range_to_hide;
use crate::mask_password::PasswordMasked;
use crate::normalize::{Error, NormalizationInput, Normalized, NormalizednessCheckMode};
use crate::parser::trusted as trusted_parser;
#[cfg(feature = "alloc")]
use crate::raw;
use crate::spec::Spec;
use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr};
#[cfg(feature = "alloc")]
use crate::types::{RiAbsoluteString, RiFragmentString, RiReferenceString};
use crate::validate::iri;
define_custom_string_slice! {
/// A borrowed string of an absolute IRI possibly with fragment part.
///
/// This corresponds to [`IRI` rule] in [RFC 3987] (and [`URI` rule] in [RFC 3986]).
/// The rule for `IRI` is `scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]`.
/// In other words, this is [`RiAbsoluteStr`] with fragment part allowed.
///
/// # Valid values
///
/// This type can have an IRI (which is absolute, and may have fragment part).
///
/// ```
/// # use iri_string::types::IriStr;
/// assert!(IriStr::new("https://user:pass@example.com:8080").is_ok());
/// assert!(IriStr::new("https://example.com/").is_ok());
/// assert!(IriStr::new("https://example.com/foo?bar=baz").is_ok());
/// assert!(IriStr::new("https://example.com/foo?bar=baz#qux").is_ok());
/// assert!(IriStr::new("foo:bar").is_ok());
/// assert!(IriStr::new("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(IriStr::new("foo:/").is_ok());
/// assert!(IriStr::new("foo://").is_ok());
/// assert!(IriStr::new("foo:///").is_ok());
/// assert!(IriStr::new("foo:////").is_ok());
/// assert!(IriStr::new("foo://///").is_ok());
/// ```
///
/// Relative IRI reference is not allowed.
///
/// ```
/// # use iri_string::types::IriStr;
/// // This is relative path.
/// assert!(IriStr::new("foo/bar").is_err());
/// // `/foo/bar` is an absolute path, but it is authority-relative.
/// assert!(IriStr::new("/foo/bar").is_err());
/// // `//foo/bar` is termed "network-path reference",
/// // or usually called "protocol-relative reference".
/// assert!(IriStr::new("//foo/bar").is_err());
/// // Same-document reference is relative.
/// assert!(IriStr::new("#foo").is_err());
/// // Empty string is not a valid absolute IRI.
/// assert!(IriStr::new("").is_err());
/// ```
///
/// Some characters and sequences cannot used in an IRI.
///
/// ```
/// # use iri_string::types::IriStr;
/// // `<` and `>` cannot directly appear in an IRI.
/// assert!(IriStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI.
/// assert!(IriStr::new("%").is_err());
/// assert!(IriStr::new("%GG").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`IRI` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`URI` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3
/// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html
struct RiStr {
validator = iri,
expecting_msg = "IRI string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of an absolute IRI possibly with fragment part.
///
/// This corresponds to [`IRI` rule] in [RFC 3987] (and [`URI` rule] in [RFC 3986]).
/// The rule for `IRI` is `scheme ":" ihier-part [ "?" iquery ] [ "#" ifragment ]`.
/// In other words, this is [`RiAbsoluteString`] with fragment part allowed.
///
/// For details, see the document for [`RiStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`IRI` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`URI` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3
/// [`RiAbsoluteString`]: struct.RiAbsoluteString.html
struct RiString {
validator = iri,
slice = RiStr,
expecting_msg = "IRI string",
}
}
impl<S: Spec> RiStr<S> {
/// Splits the IRI into an absolute IRI part and a fragment part.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// If the IRI has a fragment part, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?;
/// let (absolute, fragment) = iri.to_absolute_and_fragment();
/// let fragment_expected = IriFragmentStr::new("corge")?;
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, Some(fragment_expected));
/// # Ok::<_, Error>(())
/// ```
///
/// When the fragment part exists but is empty string, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#")?;
/// let (absolute, fragment) = iri.to_absolute_and_fragment();
/// let fragment_expected = IriFragmentStr::new("")?;
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, Some(fragment_expected));
/// # Ok::<_, Error>(())
/// ```
///
/// If the IRI has no fragment, `None` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux")?;
/// let (absolute, fragment) = iri.to_absolute_and_fragment();
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn to_absolute_and_fragment(&self) -> (&RiAbsoluteStr<S>, Option<&RiFragmentStr<S>>) {
let (prefix, fragment) = trusted_parser::split_fragment(self.as_str());
// SAFETY: an IRI without fragment part is also an absolute IRI.
let prefix = unsafe { RiAbsoluteStr::new_maybe_unchecked(prefix) };
let fragment = fragment.map(|fragment| {
// SAFETY: `trusted_parser::split_fragment()` must return a valid fragment component.
unsafe { RiFragmentStr::new_maybe_unchecked(fragment) }
});
(prefix, fragment)
}
/// Strips the fragment part if exists, and returns [`&RiAbsoluteStr`][`RiAbsoluteStr`].
///
/// # Examples
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?;
/// assert_eq!(iri.to_absolute(), "foo://bar/baz?qux=quux");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux")?;
/// assert_eq!(iri.to_absolute(), "foo://bar/baz?qux=quux");
/// # Ok::<_, Error>(())
/// ```
///
/// [`RiAbsoluteStr`]: struct.RiAbsoluteStr.html
#[must_use]
pub fn to_absolute(&self) -> &RiAbsoluteStr<S> {
let prefix_len = trusted_parser::split_fragment(self.as_str()).0.len();
// SAFETY: IRI without the fragment part (including a leading `#` character)
// is also an absolute IRI.
unsafe { RiAbsoluteStr::new_maybe_unchecked(&self.as_str()[..prefix_len]) }
}
/// Returns Ok`(())` if the IRI is normalizable by the RFC 3986 algorithm.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/%2e/bar/..")?;
/// assert!(iri.ensure_rfc3986_normalizable().is_ok());
///
/// let iri2 = IriStr::new("scheme:/..//bar")?;
/// // The normalization result would be `scheme://bar` according to RFC
/// // 3986, but it is unintended and should be treated as a failure.
/// // This crate automatically handles this case so that `.normalize()` won't fail.
/// assert!(!iri.ensure_rfc3986_normalizable().is_err());
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> {
NormalizationInput::from(self).ensure_rfc3986_normalizable()
}
/// Returns `true` if the IRI is already normalized.
///
/// This returns the same result as `self.normalize().to_string() == self`,
/// but does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?;
/// assert!(!iri.is_normalized());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query#fragment");
/// assert!(normalized.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:/.///foo")?;
/// // Already normalized.
/// assert!(iri.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:relative/..//not-a-host")?;
/// // Default normalization algorithm assumes the path part to be NOT opaque.
/// assert!(!iri.is_normalized());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "scheme:/.//not-a-host");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[must_use]
#[inline]
pub fn is_normalized(&self) -> bool {
trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Default)
}
/// Returns `true` if the IRI is already normalized in the sense of RFC 3986.
///
/// This returns the same result as
/// `self.ensure_rfc3986_normalizable() && (self.normalize().to_string() == self)`,
/// but does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?;
/// assert!(!iri.is_normalized_rfc3986());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query#fragment");
/// assert!(normalized.is_normalized_rfc3986());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:/.///foo")?;
/// // Not normalized in the sense of RFC 3986.
/// assert!(!iri.is_normalized_rfc3986());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:relative/..//not-a-host")?;
/// // RFC 3986 normalization algorithm assumes the path part to be NOT opaque.
/// assert!(!iri.is_normalized_rfc3986());
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "scheme:/.//not-a-host");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[must_use]
#[inline]
pub fn is_normalized_rfc3986(&self) -> bool {
trusted_parser::is_normalized::<S>(self.as_str(), NormalizednessCheckMode::Rfc3986)
}
/// Returns `true` if the IRI is already normalized in the sense of
/// [`normalize_but_preserve_authorityless_relative_path`] method.
///
/// This returns the same result as
/// `self.normalize_but_preserve_authorityless_relative_path().to_string() == self`,
/// but does this more efficiently without heap allocation.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?;
/// assert!(!iri.is_normalized_but_authorityless_relative_path_preserved());
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query#fragment");
/// assert!(normalized.is_normalized());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:/.///foo")?;
/// // Already normalized in the sense of
/// // `normalize_but_opaque_authorityless_relative_path()` method.
/// assert!(iri.is_normalized_but_authorityless_relative_path_preserved());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:relative/..//not-a-host")?;
/// // Relative path is treated as opaque since the autority component is absent.
/// assert!(iri.is_normalized_but_authorityless_relative_path_preserved());
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [`normalize_but_preserve_authorityless_relative_path`]:
/// `Self::normalize_but_preserve_authorityless_relative_path`
#[must_use]
#[inline]
pub fn is_normalized_but_authorityless_relative_path_preserved(&self) -> bool {
trusted_parser::is_normalized::<S>(
self.as_str(),
NormalizednessCheckMode::PreserveAuthoritylessRelativePath,
)
}
/// Returns the normalized IRI.
///
/// # Notes
///
/// For some abnormal IRIs, the normalization can produce semantically
/// incorrect string that looks syntactically valid. To avoid security
/// issues by this trap, the normalization algorithm by this crate
/// automatically applies the workaround.
///
/// If you worry about this, test by [`RiStr::ensure_rfc3986_normalizable`]
/// method or [`Normalized::ensure_rfc3986_normalizable`] before using the
/// result string.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?;
///
/// let normalized = iri.normalize().to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query#fragment");
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn normalize(&self) -> Normalized<'_, Self> {
Normalized::from_input(NormalizationInput::from(self)).and_normalize()
}
/// Returns the normalized IRI, but preserving dot segments in relative path
/// if the authority component is absent.
///
/// This normalization would be similar to that of [WHATWG URL Standard]
/// while this implementation is not guaranteed to stricly follow the spec.
///
/// Note that this normalization algorithm is not compatible with RFC 3986
/// algorithm for some inputs.
///
/// Note that case normalization and percent-encoding normalization will
/// still be applied to any path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("HTTP://example.COM/foo/./bar/%2e%2e/../baz?query#fragment")?;
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "http://example.com/baz?query#fragment");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("scheme:relative/../f%6f%6f")?;
///
/// let normalized = iri
/// .normalize_but_preserve_authorityless_relative_path()
/// .to_dedicated_string();
/// assert_eq!(normalized, "scheme:relative/../foo");
/// // `.normalize()` would normalize this to `scheme:/foo`.
/// # assert_eq!(iri.normalize().to_dedicated_string(), "scheme:/foo");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// [WHATWG URL Standard]: https://url.spec.whatwg.org/
#[inline]
#[must_use]
pub fn normalize_but_preserve_authorityless_relative_path(&self) -> Normalized<'_, Self> {
Normalized::from_input(NormalizationInput::from(self))
.and_normalize_but_preserve_authorityless_relative_path()
}
/// Returns the proxy to the IRI with password masking feature.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
/// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query");
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "http://user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn mask_password(&self) -> PasswordMasked<'_, Self> {
PasswordMasked::new(self)
}
}
/// Components getters.
impl<S: Spec> RiStr<S> {
/// Returns the scheme.
///
/// The following colon is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.scheme_str(), "http");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn scheme_str(&self) -> &str {
trusted_parser::extract_scheme_absolute(self.as_str())
}
/// Returns the authority.
///
/// The leading `//` is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.authority_str(), Some("example.com"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_str(&self) -> Option<&str> {
trusted_parser::extract_authority_absolute(self.as_str())
}
/// Returns the path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.path_str(), "/pathpath");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn path_str(&self) -> &str {
trusted_parser::extract_path_absolute(self.as_str())
}
/// Returns the query.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriQueryStr, IriStr};
///
/// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// let query = IriQueryStr::new("queryquery")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query(&self) -> Option<&RiQueryStr<S>> {
AsRef::<RiReferenceStr<S>>::as_ref(self).query()
}
/// Returns the query in a raw string slice.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.query_str(), Some("queryquery"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query_str(&self) -> Option<&str> {
trusted_parser::extract_query(self.as_str())
}
/// Returns the fragment part if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?;
/// let fragment = IriFragmentStr::new("corge")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriStr}, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#")?;
/// let fragment = IriFragmentStr::new("")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn fragment(&self) -> Option<&RiFragmentStr<S>> {
AsRef::<RiReferenceStr<S>>::as_ref(self).fragment()
}
/// Returns the fragment part as a raw string slice if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#corge")?;
/// assert_eq!(iri.fragment_str(), Some("corge"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux#")?;
/// assert_eq!(iri.fragment_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriStr, validate::Error};
/// let iri = IriStr::new("foo://bar/baz?qux=quux")?;
/// assert_eq!(iri.fragment_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn fragment_str(&self) -> Option<&str> {
AsRef::<RiReferenceStr<S>>::as_ref(self).fragment_str()
}
/// Returns the authority components.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?;
/// let authority = iri.authority_components()
/// .expect("authority is available");
/// assert_eq!(authority.userinfo(), Some("user:pass"));
/// assert_eq!(authority.host(), "example.com");
/// assert_eq!(authority.port(), Some("8080"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriStr;
///
/// let iri = IriStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> {
AuthorityComponents::from_iri(self.as_ref())
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> RiString<S> {
/// Splits the IRI into an absolute IRI part and a fragment part.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// ```
/// use std::convert::TryFrom;
/// # use iri_string::{spec::IriSpec, types::{IriFragmentString, IriString}, validate::Error};
/// let iri = "foo://bar/baz?qux=quux#corge".parse::<IriString>()?;
/// let (absolute, fragment) = iri.into_absolute_and_fragment();
/// let fragment_expected = IriFragmentString::try_from("corge".to_owned())
/// .map_err(|e| e.validation_error())?;
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, Some(fragment_expected));
/// # Ok::<_, Error>(())
///
/// ```
///
/// ```
/// use std::convert::TryFrom;
/// # use iri_string::{spec::IriSpec, types::{IriFragmentString, IriString}, validate::Error};
/// let iri = "foo://bar/baz?qux=quux#".parse::<IriString>()?;
/// let (absolute, fragment) = iri.into_absolute_and_fragment();
/// let fragment_expected = IriFragmentString::try_from("".to_owned())
/// .map_err(|e| e.validation_error())?;
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, Some(fragment_expected));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// use std::convert::TryFrom;
/// # use iri_string::{spec::IriSpec, types::IriString, validate::Error};
/// let iri = "foo://bar/baz?qux=quux".parse::<IriString>()?;
/// let (absolute, fragment) = iri.into_absolute_and_fragment();
/// assert_eq!(absolute, "foo://bar/baz?qux=quux");
/// assert_eq!(fragment, None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn into_absolute_and_fragment(self) -> (RiAbsoluteString<S>, Option<RiFragmentString<S>>) {
let (prefix, fragment) = raw::split_fragment_owned(self.into());
// SAFETY: an IRI without fragment part is also an absolute IRI.
let prefix = unsafe { RiAbsoluteString::new_maybe_unchecked(prefix) };
let fragment = fragment.map(|fragment| {
// SAFETY: the string returned by `raw::split_fragment_owned()` must
// be the fragment part, and must also be a substring of the source IRI.
unsafe { RiFragmentString::new_maybe_unchecked(fragment) }
});
(prefix, fragment)
}
/// Strips the fragment part if exists, and returns an [`RiAbsoluteString`].
///
/// # Examples
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriString, validate::Error};
/// let iri = "foo://bar/baz?qux=quux#corge".parse::<IriString>()?;
/// assert_eq!(iri.into_absolute(), "foo://bar/baz?qux=quux");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriString, validate::Error};
/// let iri = "foo://bar/baz?qux=quux".parse::<IriString>()?;
/// assert_eq!(iri.into_absolute(), "foo://bar/baz?qux=quux");
/// # Ok::<_, Error>(())
/// ```
///
/// [`RiAbsoluteString`]: struct.RiAbsoluteString.html
#[must_use]
pub fn into_absolute(self) -> RiAbsoluteString<S> {
let mut s: String = self.into();
raw::remove_fragment(&mut s);
// SAFETY: an IRI without fragment part is also an absolute IRI.
unsafe { RiAbsoluteString::new_maybe_unchecked(s) }
}
/// Sets the fragment part to the given string.
///
/// Removes fragment part (and following `#` character) if `None` is given.
pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) {
raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref));
debug_assert!(iri::<S>(&self.inner).is_ok());
}
/// Removes the password completely (including separator colon) from `self` even if it is empty.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriString;
///
/// let mut iri = IriString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Even if the password is empty, the password and separator will be removed.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriString;
///
/// let mut iri = IriString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) => v,
None => return,
};
let separator_colon = pw_range.start - 1;
// SAFETY: the IRI must still be valid after the password component and
// the leading separator colon is removed.
unsafe {
let buf = self.as_inner_mut();
buf.drain(separator_colon..pw_range.end);
debug_assert!(
RiStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
/// Replaces the non-empty password in `self` to the empty password.
///
/// This leaves the separator colon if the password part was available.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriString;
///
/// let mut iri = IriString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// If the password is empty, it is left as is.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriString;
///
/// let mut iri = IriString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_nonempty_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) if !v.is_empty() => v,
_ => return,
};
debug_assert_eq!(
self.as_str().as_bytes().get(pw_range.start - 1).copied(),
Some(b':'),
"[validity] the password component must be prefixed with a separator colon"
);
// SAFETY: the IRI must still be valid if the password is replaced with
// empty string.
unsafe {
let buf = self.as_inner_mut();
buf.drain(pw_range);
debug_assert!(
RiStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
}
impl_trivial_conv_between_iri! {
from_slice: RiStr,
from_owned: RiString,
to_slice: RiReferenceStr,
to_owned: RiReferenceString,
}

View File

@@ -0,0 +1,133 @@
//! Query string.
use crate::spec::Spec;
use crate::validate::{query, Error, ErrorKind};
define_custom_string_slice! {
/// A borrowed slice of an IRI query (i.e. after the first `?` and before the first `#`).
///
/// This corresponds to [`iquery` rule] in [RFC 3987] (and [`query` rule] in [RFC 3986]).
/// The rule for `ifragment` is `*( ipchar / iprivate / "/" / "?" )`.
///
/// # Valid values
///
/// This type can have an IRI fragment.
/// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`.
///
/// ```
/// # use iri_string::types::IriFragmentStr;
/// assert!(IriFragmentStr::new("").is_ok());
/// assert!(IriFragmentStr::new("foo").is_ok());
/// assert!(IriFragmentStr::new("foo/bar").is_ok());
/// assert!(IriFragmentStr::new("/foo/bar").is_ok());
/// assert!(IriFragmentStr::new("//foo/bar").is_ok());
/// assert!(IriFragmentStr::new("https://user:pass@example.com:8080").is_ok());
/// assert!(IriFragmentStr::new("https://example.com/").is_ok());
/// ```
///
/// Some characters and sequences cannot used in a fragment.
///
/// ```
/// # use iri_string::types::IriFragmentStr;
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(IriFragmentStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(IriFragmentStr::new("%").is_err());
/// assert!(IriFragmentStr::new("%GG").is_err());
/// // Hash sign `#` cannot appear in an IRI fragment.
/// assert!(IriFragmentStr::new("#hash").is_err());
/// ```
/// ```
/// use iri_string::types::IriQueryStr;
/// assert!(IriQueryStr::new("").is_ok());
/// assert!(IriQueryStr::new("foo").is_ok());
/// assert!(IriQueryStr::new("foo/bar").is_ok());
/// assert!(IriQueryStr::new("/foo/bar").is_ok());
/// assert!(IriQueryStr::new("//foo/bar").is_ok());
/// assert!(IriQueryStr::new("https://user:pass@example.com:8080").is_ok());
/// assert!(IriQueryStr::new("https://example.com/").is_ok());
/// // Question sign `?` can appear in an IRI query.
/// assert!(IriQueryStr::new("query?again").is_ok());
/// ```
///
/// Some characters and sequences cannot used in a query.
///
/// ```
/// use iri_string::types::IriQueryStr;
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(IriQueryStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(IriQueryStr::new("%").is_err());
/// assert!(IriQueryStr::new("%GG").is_err());
/// // Hash sign `#` cannot appear in an IRI query.
/// assert!(IriQueryStr::new("#hash").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`query` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.4
/// [`iquery` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
struct RiQueryStr {
validator = query,
expecting_msg = "IRI query string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of an IRI fragment (i.e. after the first `#` character).
///
/// This corresponds to [`iquery` rule] in [RFC 3987] (and [`query` rule] in [RFC 3986]).
/// The rule for `absolute-IRI` is `*( ipchar / iprivate / "/" / "?" )`.
///
/// For details, see the documentation for [`RiQueryStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`query` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.4
/// [`iquery` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`RiQueryStr`]: struct.RiQueryStr.html
struct RiQueryString {
validator = query,
slice = RiQueryStr,
expecting_msg = "IRI query string",
}
}
impl<S: Spec> RiQueryStr<S> {
/// Creates a new `&RiQueryStr` from the query part prefixed by `?`.
///
/// # Examples
///
/// ```
/// # use iri_string::types::IriQueryStr;
/// assert!(IriQueryStr::from_prefixed("?").is_ok());
/// assert!(IriQueryStr::from_prefixed("?foo").is_ok());
/// assert!(IriQueryStr::from_prefixed("?foo/bar").is_ok());
/// assert!(IriQueryStr::from_prefixed("?/foo/bar").is_ok());
/// assert!(IriQueryStr::from_prefixed("?//foo/bar").is_ok());
/// assert!(IriQueryStr::from_prefixed("?https://user:pass@example.com:8080").is_ok());
/// assert!(IriQueryStr::from_prefixed("?https://example.com/").is_ok());
/// // Question sign `?` can appear in an IRI query.
/// assert!(IriQueryStr::from_prefixed("?query?again").is_ok());
///
/// // `<` and `>` cannot directly appear in an IRI.
/// assert!(IriQueryStr::from_prefixed("?<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI.
/// assert!(IriQueryStr::new("?%").is_err());
/// assert!(IriQueryStr::new("?%GG").is_err());
/// // `?` prefix is expected.
/// assert!(IriQueryStr::from_prefixed("").is_err());
/// assert!(IriQueryStr::from_prefixed("foo").is_err());
/// // Hash sign `#` cannot appear in an IRI query.
/// assert!(IriQueryStr::from_prefixed("?#hash").is_err());
/// ```
pub fn from_prefixed(s: &str) -> Result<&Self, Error> {
if !s.starts_with('?') {
return Err(Error::with_kind(ErrorKind::InvalidQuery));
}
TryFrom::try_from(&s[1..])
}
}

View File

@@ -0,0 +1,697 @@
//! IRI reference.
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
use crate::components::AuthorityComponents;
#[cfg(feature = "alloc")]
use crate::mask_password::password_range_to_hide;
use crate::mask_password::PasswordMasked;
use crate::normalize::Normalized;
use crate::parser::trusted as trusted_parser;
#[cfg(feature = "alloc")]
use crate::raw;
use crate::resolve::FixedBaseResolver;
use crate::spec::Spec;
use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiRelativeStr, RiStr};
#[cfg(feature = "alloc")]
use crate::types::{RiRelativeString, RiString};
#[cfg(feature = "alloc")]
use crate::validate::iri;
use crate::validate::iri_reference;
define_custom_string_slice! {
/// A borrowed string of an absolute IRI possibly with fragment part.
///
/// This corresponds to [`IRI-reference` rule] in [RFC 3987]
/// (and [`URI-reference` rule] in [RFC 3986]).
/// The rule for `IRI-reference` is `IRI / irelative-ref`.
/// In other words, this is union of [`RiStr`] and [`RiRelativeStr`].
///
/// # Valid values
///
/// This type can have an IRI reference (which can be absolute or relative).
///
/// ```
/// # use iri_string::types::IriReferenceStr;
/// assert!(IriReferenceStr::new("https://user:pass@example.com:8080").is_ok());
/// assert!(IriReferenceStr::new("https://example.com/").is_ok());
/// assert!(IriReferenceStr::new("https://example.com/foo?bar=baz").is_ok());
/// assert!(IriReferenceStr::new("https://example.com/foo?bar=baz#qux").is_ok());
/// assert!(IriReferenceStr::new("foo:bar").is_ok());
/// assert!(IriReferenceStr::new("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(IriReferenceStr::new("foo:/").is_ok());
/// assert!(IriReferenceStr::new("foo://").is_ok());
/// assert!(IriReferenceStr::new("foo:///").is_ok());
/// assert!(IriReferenceStr::new("foo:////").is_ok());
/// assert!(IriReferenceStr::new("foo://///").is_ok());
/// assert!(IriReferenceStr::new("foo/bar").is_ok());
/// assert!(IriReferenceStr::new("/foo/bar").is_ok());
/// assert!(IriReferenceStr::new("//foo/bar").is_ok());
/// assert!(IriReferenceStr::new("#foo").is_ok());
/// ```
///
/// Some characters and sequences cannot used in an IRI reference.
///
/// ```
/// # use iri_string::types::IriReferenceStr;
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(IriReferenceStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(IriReferenceStr::new("%").is_err());
/// assert!(IriReferenceStr::new("%GG").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`IRI-reference` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`URI-reference` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.1
/// [`RiRelativeStr`]: struct.RiRelativeStr.html
/// [`RiStr`]: struct.RiStr.html
struct RiReferenceStr {
validator = iri_reference,
expecting_msg = "IRI reference string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of an absolute IRI possibly with fragment part.
///
/// This corresponds to [`IRI-reference` rule] in [RFC 3987]
/// (and [`URI-reference` rule] in [RFC 3986]).
/// The rule for `IRI-reference` is `IRI / irelative-ref`.
/// In other words, this is union of [`RiString`] and [`RiRelativeString`].
///
/// For details, see the document for [`RiReferenceStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`IRI-reference` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`URI-reference` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.1
/// [`RiReferenceStr`]: struct.RiReferenceString.html
/// [`RiRelativeString`]: struct.RiRelativeString.html
/// [`RiString`]: struct.RiString.html
struct RiReferenceString {
validator = iri_reference,
slice = RiReferenceStr,
expecting_msg = "IRI reference string",
}
}
impl<S: Spec> RiReferenceStr<S> {
/// Returns the string as [`&RiStr`][`RiStr`], if it is valid as an IRI.
///
/// If it is not an IRI, then [`&RiRelativeStr`][`RiRelativeStr`] is returned as `Err(_)`.
///
/// [`RiRelativeStr`]: struct.RiRelativeStr.html
/// [`RiStr`]: struct.RiStr.html
pub fn to_iri(&self) -> Result<&RiStr<S>, &RiRelativeStr<S>> {
// Check with `IRI` rule first, because the syntax rule for `IRI-reference` is
// `IRI / irelative-ref`.
//
// > Some productions are ambiguous. The "first-match-wins" (a.k.a.
// > "greedy") algorithm applies. For details, see [RFC3986].
// >
// > --- <https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2>.
<&RiStr<S>>::try_from(self.as_str()).map_err(|_| {
// SAFETY: if an IRI reference is not an IRI, then it is a relative IRI.
// See the RFC 3987 syntax rule `IRI-reference = IRI / irelative-ref`.
unsafe { RiRelativeStr::new_maybe_unchecked(self.as_str()) }
})
}
/// Returns the string as [`&RiRelativeStr`][`RiRelativeStr`], if it is valid as an IRI.
///
/// If it is not an IRI, then [`&RiStr`][`RiStr`] is returned as `Err(_)`.
///
/// [`RiRelativeStr`]: struct.RiRelativeStr.html
/// [`RiStr`]: struct.RiStr.html
pub fn to_relative_iri(&self) -> Result<&RiRelativeStr<S>, &RiStr<S>> {
match self.to_iri() {
Ok(iri) => Err(iri),
Err(relative) => Ok(relative),
}
}
/// Returns resolved IRI against the given base IRI.
///
/// For IRI reference resolution output examples, see [RFC 3986 section 5.4].
///
/// If you are going to resolve multiple references against the common base,
/// consider using [`FixedBaseResolver`].
///
/// # Strictness
///
/// The IRI parsers provided by this crate is strict (e.g. `http:g` is
/// always interpreted as a composition of the scheme `http` and the path
/// `g`), so backward compatible parsing and resolution are not provided.
/// About parser and resolver strictness, see [RFC 3986 section 5.4.2]:
///
/// > Some parsers allow the scheme name to be present in a relative
/// > reference if it is the same as the base URI scheme. This is considered
/// > to be a loophole in prior specifications of partial URI
/// > [RFC1630](https://www.rfc-editor.org/rfc/rfc1630.html). Its use should be
/// > avoided but is allowed for backward compatibility.
/// >
/// > --- <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4.2>
///
/// # Failures
///
/// This method itself does not fail, but IRI resolution without WHATWG URL
/// Standard serialization can fail in some minor cases.
///
/// To see examples of such unresolvable IRIs, visit the documentation
/// for [`normalize`][`crate::normalize`] module.
///
/// [RFC 3986 section 5.4]: https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4
/// [RFC 3986 section 5.4.2]: https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4.2
pub fn resolve_against<'a>(&'a self, base: &'a RiAbsoluteStr<S>) -> Normalized<'a, RiStr<S>> {
FixedBaseResolver::new(base).resolve(self.as_ref())
}
/// Returns the proxy to the IRI with password masking feature.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
/// assert_eq!(masked.to_dedicated_string(), "http://user:@example.com/path?query");
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "http://user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn mask_password(&self) -> PasswordMasked<'_, Self> {
PasswordMasked::new(self)
}
}
/// Components getters.
impl<S: Spec> RiReferenceStr<S> {
/// Returns the scheme.
///
/// The following colon is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.scheme_str(), Some("http"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("foo/bar:baz")?;
/// assert_eq!(iri.scheme_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn scheme_str(&self) -> Option<&str> {
trusted_parser::extract_scheme(self.as_str())
}
/// Returns the authority.
///
/// The leading `//` is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.authority_str(), Some("example.com"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("foo/bar:baz")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_str(&self) -> Option<&str> {
trusted_parser::extract_authority(self.as_str())
}
/// Returns the path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.path_str(), "/pathpath");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.path_str(), "uuid:10db315b-fcd1-4428-aca8-15babc9a2da2");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("foo/bar:baz")?;
/// assert_eq!(iri.path_str(), "foo/bar:baz");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn path_str(&self) -> &str {
trusted_parser::extract_path(self.as_str())
}
/// Returns the query.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriQueryStr, IriReferenceStr};
///
/// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// let query = IriQueryStr::new("queryquery")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query(), None);
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriQueryStr, IriReferenceStr};
///
/// let iri = IriReferenceStr::new("foo/bar:baz?")?;
/// let query = IriQueryStr::new("")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query(&self) -> Option<&RiQueryStr<S>> {
trusted_parser::extract_query(self.as_str()).map(|query| {
// SAFETY: `extract_query` returns the query part of an IRI, and the
// returned string should have only valid characters since is the
// substring of the source IRI.
unsafe { RiQueryStr::new_maybe_unchecked(query) }
})
}
/// Returns the query as a raw string slice.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.query_str(), Some("queryquery"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("urn:uuid:10db315b-fcd1-4428-aca8-15babc9a2da2")?;
/// assert_eq!(iri.query_str(), None);
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("foo/bar:baz?")?;
/// assert_eq!(iri.query_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query_str(&self) -> Option<&str> {
trusted_parser::extract_query(self.as_str())
}
/// Returns the fragment part if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// If the IRI has a fragment part, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#corge")?;
/// let fragment = IriFragmentStr::new("corge")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error};
/// let iri = IriReferenceStr::new("#foo")?;
/// let fragment = IriFragmentStr::new("foo")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// When the fragment part exists but is empty string, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#")?;
/// let fragment = IriFragmentStr::new("")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriReferenceStr}, validate::Error};
/// let iri = IriReferenceStr::new("#")?;
/// let fragment = IriFragmentStr::new("")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// If the IRI has no fragment, `None` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn fragment(&self) -> Option<&RiFragmentStr<S>> {
trusted_parser::extract_fragment(self.as_str()).map(|fragment| {
// SAFETY: `extract_fragment` returns the fragment part of an IRI,
// and the returned string should have only valid characters since
// is the substring of the source IRI.
unsafe { RiFragmentStr::new_maybe_unchecked(fragment) }
})
}
/// Returns the fragment part as a raw string slice if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// If the IRI has a fragment part, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#corge")?;
/// assert_eq!(iri.fragment_str(), Some("corge"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("#foo")?;
/// assert_eq!(iri.fragment_str(), Some("foo"));
/// # Ok::<_, Error>(())
/// ```
///
/// When the fragment part exists but is empty string, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux#")?;
/// assert_eq!(iri.fragment_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("#")?;
/// assert_eq!(iri.fragment_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
///
/// If the IRI has no fragment, `None` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("foo://bar/baz?qux=quux")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriReferenceStr, validate::Error};
/// let iri = IriReferenceStr::new("")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn fragment_str(&self) -> Option<&str> {
trusted_parser::extract_fragment(self.as_str())
}
/// Returns the authority components.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("http://user:pass@example.com:8080/pathpath?queryquery")?;
/// let authority = iri.authority_components()
/// .expect("authority is available");
/// assert_eq!(authority.userinfo(), Some("user:pass"));
/// assert_eq!(authority.host(), "example.com");
/// assert_eq!(authority.port(), Some("8080"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriReferenceStr;
///
/// let iri = IriReferenceStr::new("foo//bar:baz")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> {
AuthorityComponents::from_iri(self)
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> RiReferenceString<S> {
/// Returns the string as [`RiString`], if it is valid as an IRI.
///
/// If it is not an IRI, then [`RiRelativeString`] is returned as `Err(_)`.
///
/// [`RiRelativeString`]: struct.RiRelativeString.html
/// [`RiString`]: struct.RiString.html
pub fn into_iri(self) -> Result<RiString<S>, RiRelativeString<S>> {
let s: String = self.into();
// Check with `IRI` rule first, because of the syntax.
//
// > Some productions are ambiguous. The "first-match-wins" (a.k.a.
// > "greedy") algorithm applies. For details, see [RFC3986].
// >
// > --- <https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2>.
if iri::<S>(&s).is_ok() {
// SAFETY: just checked `s` is valid as an IRI.
Ok(unsafe { RiString::new_always_unchecked(s) })
} else {
// SAFETY: if an IRI reference is not an IRI, then it is a relative IRI.
// See the RFC 3987 syntax rule `IRI-reference = IRI / irelative-ref`.
Err(unsafe { RiRelativeString::new_maybe_unchecked(s) })
}
}
/// Returns the string as [`RiRelativeString`], if it is valid as an IRI.
///
/// If it is not an IRI, then [`RiString`] is returned as `Err(_)`.
///
/// [`RiRelativeString`]: struct.RiRelativeString.html
/// [`RiString`]: struct.RiString.html
pub fn into_relative_iri(self) -> Result<RiRelativeString<S>, RiString<S>> {
match self.into_iri() {
Ok(iri) => Err(iri),
Err(relative) => Ok(relative),
}
}
/// Sets the fragment part to the given string.
///
/// Removes fragment part (and following `#` character) if `None` is given.
pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) {
raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref));
debug_assert!(iri_reference::<S>(&self.inner).is_ok());
}
/// Removes the password completely (including separator colon) from `self` even if it is empty.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriReferenceString;
///
/// let mut iri = IriReferenceString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Even if the password is empty, the password and separator will be removed.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriReferenceString;
///
/// let mut iri = IriReferenceString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "http://user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice()) {
Some(v) => v,
None => return,
};
let separator_colon = pw_range.start - 1;
// SAFETY: the IRI must be valid after the password component and
// the leading separator colon is removed.
unsafe {
let buf = self.as_inner_mut();
buf.drain(separator_colon..pw_range.end);
debug_assert!(
RiReferenceStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
/// Replaces the non-empty password in `self` to the empty password.
///
/// This leaves the separator colon if the password part was available.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriReferenceString;
///
/// let mut iri = IriReferenceString::try_from("http://user:password@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// If the password is empty, it is left as is.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriReferenceString;
///
/// let mut iri = IriReferenceString::try_from("http://user:@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "http://user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_nonempty_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice()) {
Some(v) if !v.is_empty() => v,
_ => return,
};
debug_assert_eq!(
self.as_str().as_bytes().get(pw_range.start - 1).copied(),
Some(b':'),
"[validity] the password component must be prefixed with a separator colon"
);
// SAFETY: the IRI must be valid after the password component is
// replaced with the empty password.
unsafe {
let buf = self.as_inner_mut();
buf.drain(pw_range);
debug_assert!(
RiReferenceStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component \
is replaced with the empty password"
);
}
}
}

View File

@@ -0,0 +1,571 @@
//! Relative IRI reference.
use crate::components::AuthorityComponents;
#[cfg(feature = "alloc")]
use crate::mask_password::password_range_to_hide;
use crate::mask_password::PasswordMasked;
use crate::normalize::Normalized;
use crate::parser::trusted as trusted_parser;
#[cfg(feature = "alloc")]
use crate::raw;
use crate::resolve::FixedBaseResolver;
use crate::spec::Spec;
#[cfg(feature = "alloc")]
use crate::types::RiReferenceString;
use crate::types::{RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiStr};
use crate::validate::relative_ref;
define_custom_string_slice! {
/// A borrowed slice of a relative IRI reference.
///
/// This corresponds to [`irelative-ref` rule] in [RFC 3987]
/// (and [`relative-ref` rule] in [RFC 3986]).
/// The rule for `irelative-ref` is `irelative-part [ "?" iquery ] [ "#" ifragment ]`.
///
/// # Valid values
///
/// This type can have a relative IRI reference.
///
/// ```
/// # use iri_string::types::IriRelativeStr;
/// assert!(IriRelativeStr::new("foo").is_ok());
/// assert!(IriRelativeStr::new("foo/bar").is_ok());
/// assert!(IriRelativeStr::new("/foo").is_ok());
/// assert!(IriRelativeStr::new("//foo/bar").is_ok());
/// assert!(IriRelativeStr::new("?foo").is_ok());
/// assert!(IriRelativeStr::new("#foo").is_ok());
/// assert!(IriRelativeStr::new("foo/bar?baz#qux").is_ok());
/// // The first path component can have colon if the path is absolute.
/// assert!(IriRelativeStr::new("/foo:bar/").is_ok());
/// // Second or following path components can have colon.
/// assert!(IriRelativeStr::new("foo/bar://baz/").is_ok());
/// assert!(IriRelativeStr::new("./foo://bar").is_ok());
/// ```
///
/// Absolute form of a reference is not allowed.
///
/// ```
/// # use iri_string::types::IriRelativeStr;
/// assert!(IriRelativeStr::new("https://example.com/").is_err());
/// // The first path component cannot have colon, if the path is not absolute.
/// assert!(IriRelativeStr::new("foo:bar").is_err());
/// assert!(IriRelativeStr::new("foo:").is_err());
/// assert!(IriRelativeStr::new("foo:/").is_err());
/// assert!(IriRelativeStr::new("foo://").is_err());
/// assert!(IriRelativeStr::new("foo:///").is_err());
/// assert!(IriRelativeStr::new("foo:////").is_err());
/// assert!(IriRelativeStr::new("foo://///").is_err());
/// ```
///
/// Some characters and sequences cannot used in an IRI reference.
///
/// ```
/// # use iri_string::types::IriRelativeStr;
/// // `<` and `>` cannot directly appear in a relative IRI reference.
/// assert!(IriRelativeStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in a relative IRI reference.
/// assert!(IriRelativeStr::new("%").is_err());
/// assert!(IriRelativeStr::new("%GG").is_err());
/// ```
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`irelative-ref` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`relative-ref` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.2
struct RiRelativeStr {
validator = relative_ref,
expecting_msg = "Relative IRI reference string",
}
}
#[cfg(feature = "alloc")]
define_custom_string_owned! {
/// An owned string of a relative IRI reference.
///
/// This corresponds to [`irelative-ref` rule] in [RFC 3987]
/// (and [`relative-ref` rule] in [RFC 3986]).
/// The rule for `irelative-ref` is `irelative-part [ "?" iquery ] [ "#" ifragment ]`.
///
/// For details, see the document for [`RiRelativeStr`].
///
/// Enabled by `alloc` or `std` feature.
///
/// [RFC 3986]: https://www.rfc-editor.org/rfc/rfc3986.html
/// [RFC 3987]: https://www.rfc-editor.org/rfc/rfc3987.html
/// [`irelative-ref` rule]: https://www.rfc-editor.org/rfc/rfc3987.html#section-2.2
/// [`relative-ref` rule]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.2
/// [`RiRelativeString`]: struct.RiRelativeString.html
struct RiRelativeString {
validator = relative_ref,
slice = RiRelativeStr,
expecting_msg = "Relative IRI reference string",
}
}
impl<S: Spec> RiRelativeStr<S> {
/// Returns resolved IRI against the given base IRI.
///
/// For IRI reference resolution output examples, see [RFC 3986 section 5.4].
///
/// If you are going to resolve multiple references against the common base,
/// consider using [`FixedBaseResolver`].
///
/// # Strictness
///
/// The IRI parsers provided by this crate is strict (e.g. `http:g` is
/// always interpreted as a composition of the scheme `http` and the path
/// `g`), so backward compatible parsing and resolution are not provided.
/// About parser and resolver strictness, see [RFC 3986 section 5.4.2]:
///
/// > Some parsers allow the scheme name to be present in a relative
/// > reference if it is the same as the base URI scheme. This is considered
/// > to be a loophole in prior specifications of partial URI
/// > [RFC1630](https://www.rfc-editor.org/rfc/rfc1630.html). Its use should be
/// > avoided but is allowed for backward compatibility.
/// >
/// > --- <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4.2>
///
/// # Failures
///
/// This method itself does not fail, but IRI resolution without WHATWG URL
/// Standard serialization can fail in some minor cases.
///
/// To see examples of such unresolvable IRIs, visit the documentation
/// for [`normalize`][`crate::normalize`] module.
///
/// [RFC 3986 section 5.4]: https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4
/// [RFC 3986 section 5.4.2]: https://www.rfc-editor.org/rfc/rfc3986.html#section-5.4.2
pub fn resolve_against<'a>(&'a self, base: &'a RiAbsoluteStr<S>) -> Normalized<'a, RiStr<S>> {
FixedBaseResolver::new(base).resolve(self.as_ref())
}
/// Returns the proxy to the IRI with password masking feature.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::format::ToDedicatedString;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("//user:password@example.com/path?query")?;
/// let masked = iri.mask_password();
/// assert_eq!(masked.to_dedicated_string(), "//user:@example.com/path?query");
///
/// assert_eq!(
/// masked.replace_password("${password}").to_string(),
/// "//user:${password}@example.com/path?query"
/// );
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn mask_password(&self) -> PasswordMasked<'_, Self> {
PasswordMasked::new(self)
}
}
/// Components getters.
impl<S: Spec> RiRelativeStr<S> {
/// Returns the authority.
///
/// The leading `//` is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.authority_str(), Some("example.com"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("foo//bar:baz")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_str(&self) -> Option<&str> {
trusted_parser::extract_authority_relative(self.as_str())
}
/// Returns the path.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.path_str(), "/pathpath");
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("foo//bar:baz")?;
/// assert_eq!(iri.path_str(), "foo//bar:baz");
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn path_str(&self) -> &str {
trusted_parser::extract_path_relative(self.as_str())
}
/// Returns the query.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriQueryStr, IriRelativeStr};
///
/// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?;
/// let query = IriQueryStr::new("queryquery")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::{IriQueryStr, IriRelativeStr};
///
/// let iri = IriRelativeStr::new("foo//bar:baz?")?;
/// let query = IriQueryStr::new("")?;
/// assert_eq!(iri.query(), Some(query));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query(&self) -> Option<&RiQueryStr<S>> {
trusted_parser::extract_query(self.as_str()).map(|query| {
// SAFETY: `extract_query` returns the query part of an IRI, and the
// returned string should have only valid characters since is the
// substring of the source IRI.
unsafe { RiQueryStr::new_maybe_unchecked(query) }
})
}
/// Returns the query in a raw string slice.
///
/// The leading question mark (`?`) is truncated.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("//example.com/pathpath?queryquery#fragfrag")?;
/// assert_eq!(iri.query_str(), Some("queryquery"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("foo//bar:baz?")?;
/// assert_eq!(iri.query_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn query_str(&self) -> Option<&str> {
trusted_parser::extract_query(self.as_str())
}
/// Returns the fragment part if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// If the IRI has a fragment part, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error};
/// let iri = IriRelativeStr::new("?foo#bar")?;
/// let fragment = IriFragmentStr::new("bar")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error};
/// let iri = IriRelativeStr::new("#foo")?;
/// let fragment = IriFragmentStr::new("foo")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// When the fragment part exists but is empty string, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::{IriFragmentStr, IriRelativeStr}, validate::Error};
/// let iri = IriRelativeStr::new("#")?;
/// let fragment = IriFragmentStr::new("")?;
/// assert_eq!(iri.fragment(), Some(fragment));
/// # Ok::<_, Error>(())
/// ```
///
/// If the IRI has no fragment, `None` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error};
/// let iri = IriRelativeStr::new("")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn fragment(&self) -> Option<&RiFragmentStr<S>> {
AsRef::<RiReferenceStr<S>>::as_ref(self).fragment()
}
/// Returns the fragment part as a raw string slice if exists.
///
/// A leading `#` character is truncated if the fragment part exists.
///
/// # Examples
///
/// If the IRI has a fragment part, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error};
/// let iri = IriRelativeStr::new("?foo#bar")?;
/// assert_eq!(iri.fragment_str(), Some("bar"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error};
/// let iri = IriRelativeStr::new("#foo")?;
/// assert_eq!(iri.fragment_str(), Some("foo"));
/// # Ok::<_, Error>(())
/// ```
///
/// When the fragment part exists but is empty string, `Some(_)` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error};
/// let iri = IriRelativeStr::new("#")?;
/// assert_eq!(iri.fragment_str(), Some(""));
/// # Ok::<_, Error>(())
/// ```
///
/// If the IRI has no fragment, `None` is returned.
///
/// ```
/// # use iri_string::{spec::IriSpec, types::IriRelativeStr, validate::Error};
/// let iri = IriRelativeStr::new("")?;
/// assert_eq!(iri.fragment(), None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn fragment_str(&self) -> Option<&str> {
AsRef::<RiReferenceStr<S>>::as_ref(self).fragment_str()
}
/// Returns the authority components.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("//user:pass@example.com:8080/pathpath?queryquery")?;
/// let authority = iri.authority_components()
/// .expect("authority is available");
/// assert_eq!(authority.userinfo(), Some("user:pass"));
/// assert_eq!(authority.host(), "example.com");
/// assert_eq!(authority.port(), Some("8080"));
/// # Ok::<_, Error>(())
/// ```
///
/// ```
/// # use iri_string::validate::Error;
/// use iri_string::types::IriRelativeStr;
///
/// let iri = IriRelativeStr::new("foo//bar:baz")?;
/// assert_eq!(iri.authority_str(), None);
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn authority_components(&self) -> Option<AuthorityComponents<'_>> {
AuthorityComponents::from_iri(self.as_ref())
}
}
#[cfg(feature = "alloc")]
impl<S: Spec> RiRelativeString<S> {
/// Sets the fragment part to the given string.
///
/// Removes fragment part (and following `#` character) if `None` is given.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::{IriFragmentStr, IriRelativeString};
///
/// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query#frag.old")?;
/// assert_eq!(iri.fragment_str(), Some("frag.old"));
///
/// iri.set_fragment(None);
/// assert_eq!(iri.fragment(), None);
///
/// let frag_new = IriFragmentStr::new("frag-new")?;
/// iri.set_fragment(Some(frag_new));
/// assert_eq!(iri.fragment_str(), Some("frag-new"));
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Fragment can be empty, and it is distinguished from the absense of a fragment.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriRelativeString;
///
/// let mut iri = IriRelativeString::try_from("/path#")?;
/// assert_eq!(iri, "/path#");
/// assert_eq!(iri.fragment_str(), Some(""), "Fragment is present and empty");
///
/// iri.set_fragment(None);
/// assert_eq!(iri, "/path", "Note that # is now removed");
/// assert_eq!(iri.fragment_str(), None, "Fragment is absent");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn set_fragment(&mut self, fragment: Option<&RiFragmentStr<S>>) {
raw::set_fragment(&mut self.inner, fragment.map(AsRef::as_ref));
debug_assert!(relative_ref::<S>(&self.inner).is_ok());
}
/// Removes the password completely (including separator colon) from `self` even if it is empty.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriRelativeString;
///
/// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "//user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// Even if the password is empty, the password and separator will be removed.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriRelativeString;
///
/// let mut iri = IriRelativeString::try_from("//user:@example.com/path?query")?;
/// iri.remove_password_inline();
/// assert_eq!(iri, "//user@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) => v,
None => return,
};
let separator_colon = pw_range.start - 1;
// SAFETY: removing password component and the leading colon preserves
// the IRI still syntactically valid.
unsafe {
let buf = self.as_inner_mut();
buf.drain(separator_colon..pw_range.end);
debug_assert!(
RiRelativeStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component is removed"
);
}
}
/// Replaces the non-empty password in `self` to the empty password.
///
/// This leaves the separator colon if the password part was available.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriRelativeString;
///
/// let mut iri = IriRelativeString::try_from("//user:password@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "//user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
///
/// If the password is empty, it is left as is.
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
/// use iri_string::types::IriRelativeString;
///
/// let mut iri = IriRelativeString::try_from("//user:@example.com/path?query")?;
/// iri.remove_nonempty_password_inline();
/// assert_eq!(iri, "//user:@example.com/path?query");
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn remove_nonempty_password_inline(&mut self) {
let pw_range = match password_range_to_hide(self.as_slice().as_ref()) {
Some(v) if !v.is_empty() => v,
_ => return,
};
debug_assert_eq!(
self.as_str().as_bytes().get(pw_range.start - 1).copied(),
Some(b':'),
"[validity] the password component must be prefixed with a separator colon"
);
// SAFETY: the IRI must be valid after the password component is
// replaced with the empty password.
unsafe {
let buf = self.as_inner_mut();
buf.drain(pw_range);
debug_assert!(
RiRelativeStr::<S>::new(buf).is_ok(),
"[validity] the IRI must be valid after the password component \
is replaced with the empty password"
);
}
}
}
impl_trivial_conv_between_iri! {
from_slice: RiRelativeStr,
from_owned: RiRelativeString,
to_slice: RiReferenceStr,
to_owned: RiReferenceString,
}

382
vendor/iri-string/src/types/iri.rs vendored Normal file
View File

@@ -0,0 +1,382 @@
//! IRI-specific implementations.
#[cfg(feature = "alloc")]
use alloc::collections::TryReserveError;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::String;
#[cfg(feature = "alloc")]
use crate::convert::try_percent_encode_iri_inline;
use crate::convert::MappedToUri;
use crate::spec::IriSpec;
use crate::types::{
RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr,
};
#[cfg(feature = "alloc")]
use crate::types::{
RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString, RiRelativeString,
RiString,
};
use crate::types::{
UriAbsoluteStr, UriFragmentStr, UriQueryStr, UriReferenceStr, UriRelativeStr, UriStr,
};
#[cfg(feature = "alloc")]
use crate::types::{
UriAbsoluteString, UriFragmentString, UriQueryString, UriReferenceString, UriRelativeString,
UriString,
};
/// A type alias for [`RiAbsoluteStr`]`<`[`IriSpec`]`>`.
pub type IriAbsoluteStr = RiAbsoluteStr<IriSpec>;
/// A type alias for [`RiAbsoluteString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriAbsoluteString = RiAbsoluteString<IriSpec>;
/// A type alias for [`RiFragmentStr`]`<`[`IriSpec`]`>`.
pub type IriFragmentStr = RiFragmentStr<IriSpec>;
/// A type alias for [`RiFragmentString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriFragmentString = RiFragmentString<IriSpec>;
/// A type alias for [`RiStr`]`<`[`IriSpec`]`>`.
pub type IriStr = RiStr<IriSpec>;
/// A type alias for [`RiString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriString = RiString<IriSpec>;
/// A type alias for [`RiReferenceStr`]`<`[`IriSpec`]`>`.
pub type IriReferenceStr = RiReferenceStr<IriSpec>;
/// A type alias for [`RiReferenceString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriReferenceString = RiReferenceString<IriSpec>;
/// A type alias for [`RiRelativeStr`]`<`[`IriSpec`]`>`.
pub type IriRelativeStr = RiRelativeStr<IriSpec>;
/// A type alias for [`RiRelativeString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriRelativeString = RiRelativeString<IriSpec>;
/// A type alias for [`RiQueryStr`]`<`[`IriSpec`]`>`.
pub type IriQueryStr = RiQueryStr<IriSpec>;
/// A type alias for [`RiQueryString`]`<`[`IriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type IriQueryString = RiQueryString<IriSpec>;
/// Implements the conversion from an IRI into a URI.
macro_rules! impl_conversion_between_uri {
(
$ty_owned_iri:ident,
$ty_owned_uri:ident,
$ty_borrowed_iri:ident,
$ty_borrowed_uri:ident,
$example_iri:expr,
$example_uri:expr
) => {
/// Conversion from an IRI into a URI.
impl $ty_borrowed_iri {
/// Percent-encodes the IRI into a valid URI that identifies the equivalent resource.
///
/// If you need more precise control over memory allocation and buffer
/// handling, use [`MappedToUri`][`crate::convert::MappedToUri`] type.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// # #[cfg(feature = "alloc")] {
#[doc = concat!("use iri_string::format::ToDedicatedString;")]
#[doc = concat!("use iri_string::types::{", stringify!($ty_borrowed_iri), ", ", stringify!($ty_owned_uri), "};")]
///
#[doc = concat!("let iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_iri), ")?;")]
/// // Type annotation here is not necessary.
#[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.encode_to_uri().to_dedicated_string();")]
#[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")]
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn encode_to_uri(&self) -> MappedToUri<'_, Self> {
MappedToUri::from(self)
}
/// Converts an IRI into a URI without modification, if possible.
///
/// This is semantically equivalent to
#[doc = concat!("`", stringify!($ty_borrowed_uri), "::new(self.as_str()).ok()`.")]
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
#[doc = concat!("use iri_string::types::{", stringify!($ty_borrowed_iri), ", ", stringify!($ty_borrowed_uri), "};")]
///
#[doc = concat!("let ascii_iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_uri), ")?;")]
/// assert_eq!(
/// ascii_iri.as_uri().map(AsRef::as_ref),
#[doc = concat!(" Some(", stringify!($example_uri), ")")]
/// );
///
#[doc = concat!("let nonascii_iri = ", stringify!($ty_borrowed_iri), "::new(", stringify!($example_iri), ")?;")]
/// assert_eq!(nonascii_iri.as_uri(), None);
/// # Ok::<_, Error>(())
/// ```
#[must_use]
pub fn as_uri(&self) -> Option<&$ty_borrowed_uri> {
if !self.as_str().is_ascii() {
return None;
}
debug_assert!(
<$ty_borrowed_uri>::new(self.as_str()).is_ok(),
"[consistency] the ASCII-only IRI must also be a valid URI"
);
// SAFETY: An ASCII-only IRI is a URI.
// URI (by `UriSpec`) is a subset of IRI (by `IriSpec`),
// and the difference is that URIs can only have ASCII characters.
let uri = unsafe { <$ty_borrowed_uri>::new_maybe_unchecked(self.as_str()) };
Some(uri)
}
}
/// Conversion from an IRI into a URI.
#[cfg(feature = "alloc")]
impl $ty_owned_iri {
/// Percent-encodes the IRI into a valid URI that identifies the equivalent resource.
///
/// After the encode, the IRI is also a valid URI.
///
/// If you want a new URI string rather than modifying the IRI
/// string, or if you need more precise control over memory
/// allocation and buffer handling, use
#[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")]
/// method.
///
/// # Panics
///
/// Panics if the memory allocation failed.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// #[cfg(feature = "alloc")] {
#[doc = concat!("use iri_string::types::", stringify!($ty_owned_iri), ";")]
///
#[doc = concat!("let mut iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")]
/// iri.encode_to_uri_inline();
#[doc = concat!("assert_eq!(iri, ", stringify!($example_uri), ");")]
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn encode_to_uri_inline(&mut self) {
self.try_encode_to_uri_inline()
.expect("failed to allocate memory");
}
/// Percent-encodes the IRI into a valid URI that identifies the equivalent resource.
///
/// After the encode, the IRI is also a valid URI.
///
/// If you want a new URI string rather than modifying the IRI
/// string, or if you need more precise control over memory
/// allocation and buffer handling, use
#[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")]
/// method.
///
// TODO: This seems true as of this writing, but is this guaranteed? See
// <https://users.rust-lang.org/t/does-try-reserve-guarantees-that-the-content-is-preserved-on-allocation-failure/77446>.
// /// If the memory allocation failed, the content is preserved without modification.
// ///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// #[cfg(feature = "alloc")] {
#[doc = concat!("use iri_string::types::", stringify!($ty_owned_iri), ";")]
///
#[doc = concat!("let mut iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")]
/// iri.try_encode_to_uri_inline()
/// .expect("failed to allocate memory");
#[doc = concat!("assert_eq!(iri, ", stringify!($example_uri), ");")]
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
pub fn try_encode_to_uri_inline(&mut self) -> Result<(), TryReserveError> {
// SAFETY: IRI is valid after it is encoded to URI (by percent encoding).
unsafe {
let buf = self.as_inner_mut();
try_percent_encode_iri_inline(buf)?;
}
debug_assert!(
<$ty_borrowed_iri>::new(self.as_str()).is_ok(),
"[consistency] the content must be valid at any time"
);
Ok(())
}
/// Percent-encodes the IRI into a valid URI that identifies the equivalent resource.
///
/// If you want a new URI string rather than modifying the IRI
/// string, or if you need more precise control over memory
/// allocation and buffer handling, use
#[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")]
/// method.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// #[cfg(feature = "alloc")] {
#[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")]
///
#[doc = concat!("let iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")]
/// // Type annotation here is not necessary.
#[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.encode_into_uri();")]
#[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")]
/// # }
/// # Ok::<_, Error>(())
/// ```
#[inline]
#[must_use]
pub fn encode_into_uri(self) -> $ty_owned_uri {
self.try_encode_into_uri()
.expect("failed to allocate memory")
}
/// Percent-encodes the IRI into a valid URI that identifies the equivalent resource.
///
/// If you want a new URI string rather than modifying the IRI
/// string, or if you need more precise control over memory
/// allocation and buffer handling, use
#[doc = concat!("[`encode_to_uri`][`", stringify!($ty_borrowed_iri), "::encode_to_uri`]")]
/// method.
///
// TODO: This seems true as of this writing, but is this guaranteed? See
// <https://users.rust-lang.org/t/does-try-reserve-guarantees-that-the-content-is-preserved-on-allocation-failure/77446>.
// /// If the memory allocation failed, the content is preserved without modification.
// ///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
/// #[cfg(feature = "alloc")] {
#[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")]
///
#[doc = concat!("let iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")]
/// // Type annotation here is not necessary.
#[doc = concat!("let uri: ", stringify!($ty_owned_uri), " = iri.try_encode_into_uri()")]
/// .expect("failed to allocate memory");
#[doc = concat!("assert_eq!(uri, ", stringify!($example_uri), ");")]
/// # }
/// # Ok::<_, Error>(())
/// ```
pub fn try_encode_into_uri(mut self) -> Result<$ty_owned_uri, TryReserveError> {
self.try_encode_to_uri_inline()?;
let s: String = self.into();
debug_assert!(
<$ty_borrowed_uri>::new(s.as_str()).is_ok(),
"[consistency] the encoded IRI must also be a valid URI"
);
// SAFETY: An ASCII-only IRI is a URI.
// URI (by `UriSpec`) is a subset of IRI (by `IriSpec`),
// and the difference is that URIs can only have ASCII characters.
let uri = unsafe { <$ty_owned_uri>::new_maybe_unchecked(s) };
Ok(uri)
}
/// Converts an IRI into a URI without modification, if possible.
///
/// # Examples
///
/// ```
/// # use iri_string::validate::Error;
#[doc = concat!("use iri_string::types::{", stringify!($ty_owned_iri), ", ", stringify!($ty_owned_uri), "};")]
///
#[doc = concat!("let ascii_iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_uri), ")?;")]
/// assert_eq!(
/// ascii_iri.try_into_uri().map(|uri| uri.to_string()),
#[doc = concat!(" Ok(", stringify!($example_uri), ".to_string())")]
/// );
///
#[doc = concat!("let nonascii_iri = ", stringify!($ty_owned_iri), "::try_from(", stringify!($example_iri), ")?;")]
/// assert_eq!(
/// nonascii_iri.try_into_uri().map_err(|iri| iri.to_string()),
#[doc = concat!(" Err(", stringify!($example_iri), ".to_string())")]
/// );
/// # Ok::<_, Error>(())
/// ```
pub fn try_into_uri(self) -> Result<$ty_owned_uri, $ty_owned_iri> {
if !self.as_str().is_ascii() {
return Err(self);
}
let s: String = self.into();
debug_assert!(
<$ty_borrowed_uri>::new(s.as_str()).is_ok(),
"[consistency] the ASCII-only IRI must also be a valid URI"
);
// SAFETY: An ASCII-only IRI is a URI.
// URI (by `UriSpec`) is a subset of IRI (by `IriSpec`),
// and the difference is that URIs can only have ASCII characters.
let uri = unsafe { <$ty_owned_uri>::new_maybe_unchecked(s) };
Ok(uri)
}
}
};
}
impl_conversion_between_uri!(
IriAbsoluteString,
UriAbsoluteString,
IriAbsoluteStr,
UriAbsoluteStr,
"http://example.com/?alpha=\u{03B1}",
"http://example.com/?alpha=%CE%B1"
);
impl_conversion_between_uri!(
IriReferenceString,
UriReferenceString,
IriReferenceStr,
UriReferenceStr,
"http://example.com/?alpha=\u{03B1}",
"http://example.com/?alpha=%CE%B1"
);
impl_conversion_between_uri!(
IriRelativeString,
UriRelativeString,
IriRelativeStr,
UriRelativeStr,
"../?alpha=\u{03B1}",
"../?alpha=%CE%B1"
);
impl_conversion_between_uri!(
IriString,
UriString,
IriStr,
UriStr,
"http://example.com/?alpha=\u{03B1}",
"http://example.com/?alpha=%CE%B1"
);
impl_conversion_between_uri!(
IriQueryString,
UriQueryString,
IriQueryStr,
UriQueryStr,
"alpha-is-\u{03B1}",
"alpha-is-%CE%B1"
);
impl_conversion_between_uri!(
IriFragmentString,
UriFragmentString,
IriFragmentStr,
UriFragmentStr,
"alpha-is-\u{03B1}",
"alpha-is-%CE%B1"
);

115
vendor/iri-string/src/types/uri.rs vendored Normal file
View File

@@ -0,0 +1,115 @@
//! URI-specific implementations.
use crate::spec::UriSpec;
use crate::types::{
IriAbsoluteStr, IriFragmentStr, IriQueryStr, IriReferenceStr, IriRelativeStr, IriStr,
RiAbsoluteStr, RiFragmentStr, RiQueryStr, RiReferenceStr, RiRelativeStr, RiStr,
};
#[cfg(feature = "alloc")]
use crate::types::{
IriAbsoluteString, IriFragmentString, IriQueryString, IriReferenceString, IriRelativeString,
IriString, RiAbsoluteString, RiFragmentString, RiQueryString, RiReferenceString,
RiRelativeString, RiString,
};
/// A type alias for [`RiAbsoluteStr`]`<`[`UriSpec`]`>`.
pub type UriAbsoluteStr = RiAbsoluteStr<UriSpec>;
/// A type alias for [`RiAbsoluteString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriAbsoluteString = RiAbsoluteString<UriSpec>;
/// A type alias for [`RiFragmentStr`]`<`[`UriSpec`]`>`.
pub type UriFragmentStr = RiFragmentStr<UriSpec>;
/// A type alias for [`RiFragmentString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriFragmentString = RiFragmentString<UriSpec>;
/// A type alias for [`RiStr`]`<`[`UriSpec`]`>`.
pub type UriStr = RiStr<UriSpec>;
/// A type alias for [`RiString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriString = RiString<UriSpec>;
/// A type alias for [`RiReferenceStr`]`<`[`UriSpec`]`>`.
pub type UriReferenceStr = RiReferenceStr<UriSpec>;
/// A type alias for [`RiReferenceString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriReferenceString = RiReferenceString<UriSpec>;
/// A type alias for [`RiRelativeStr`]`<`[`UriSpec`]`>`.
pub type UriRelativeStr = RiRelativeStr<UriSpec>;
/// A type alias for [`RiRelativeString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriRelativeString = RiRelativeString<UriSpec>;
/// A type alias for [`RiQueryStr`]`<`[`UriSpec`]`>`.
pub type UriQueryStr = RiQueryStr<UriSpec>;
/// A type alias for [`RiQueryString`]`<`[`UriSpec`]`>`.
#[cfg(feature = "alloc")]
pub type UriQueryString = RiQueryString<UriSpec>;
/// Implements the trivial conversions between a URI and an IRI.
macro_rules! impl_conversions_between_iri {
(
$borrowed_uri:ident,
$owned_uri:ident,
$borrowed_iri:ident,
$owned_iri:ident,
) => {
impl AsRef<$borrowed_iri> for $borrowed_uri {
fn as_ref(&self) -> &$borrowed_iri {
// SAFETY: A valid URI is also a valid IRI.
unsafe { <$borrowed_iri>::new_maybe_unchecked(self.as_str()) }
}
}
#[cfg(feature = "alloc")]
impl From<$owned_uri> for $owned_iri {
#[inline]
fn from(uri: $owned_uri) -> Self {
// SAFETY: A valid URI is also a valid IRI.
unsafe { Self::new_maybe_unchecked(uri.into()) }
}
}
#[cfg(feature = "alloc")]
impl AsRef<$borrowed_iri> for $owned_uri {
fn as_ref(&self) -> &$borrowed_iri {
AsRef::<$borrowed_uri>::as_ref(self).as_ref()
}
}
};
}
impl_conversions_between_iri!(
UriAbsoluteStr,
UriAbsoluteString,
IriAbsoluteStr,
IriAbsoluteString,
);
impl_conversions_between_iri!(
UriReferenceStr,
UriReferenceString,
IriReferenceStr,
IriReferenceString,
);
impl_conversions_between_iri!(
UriRelativeStr,
UriRelativeString,
IriRelativeStr,
IriRelativeString,
);
impl_conversions_between_iri!(UriStr, UriString, IriStr, IriString,);
impl_conversions_between_iri!(UriQueryStr, UriQueryString, IriQueryStr, IriQueryString,);
impl_conversions_between_iri!(
UriFragmentStr,
UriFragmentString,
IriFragmentStr,
IriFragmentString,
);

607
vendor/iri-string/src/validate.rs vendored Normal file
View File

@@ -0,0 +1,607 @@
//! Validators.
//!
//! Validators are functions that receive the string and checks if the entire
//! string is syntactically valid.
use core::fmt;
#[cfg(feature = "std")]
use std::error;
use crate::parser::validate as parser;
use crate::spec::Spec;
/// Resource identifier validation error.
// Note that this type should implement `Copy` trait.
// To return additional non-`Copy` data as an error, use wrapper type
// (as `std::string::FromUtf8Error` contains `std::str::Utf8Error`).
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Error {
/// Error kind.
kind: ErrorKind,
}
impl Error {
/// Creates a new `Error` from the given error kind.
#[inline]
#[must_use]
pub(crate) fn with_kind(kind: ErrorKind) -> Self {
Self { kind }
}
}
impl fmt::Display for Error {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "invalid IRI: {}", self.kind.description())
}
}
#[cfg(feature = "std")]
impl error::Error for Error {}
/// Error kind.
///
/// This type may be reorganized between minor version bumps, so users should
/// not expect specific error kind (or specific error message) to be returned
/// for a specific error.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[non_exhaustive]
pub(crate) enum ErrorKind {
/// Invalid scheme.
InvalidScheme,
/// Invalid userinfo.
InvalidUserInfo,
/// Invalid host.
InvalidHost,
/// Invalid port.
InvalidPort,
/// Invalid path character.
InvalidPath,
/// Invalid query.
InvalidQuery,
/// Invalid fragment.
InvalidFragment,
/// Got an unexpected fragment.
UnexpectedFragment,
/// Expected a relative IRI but got an absolute IRI.
UnexpectedAbsolute,
/// Expected an absolute IRI but got a relative IRI.
UnexpectedRelative,
/// Invalid UTF-8 bytes.
InvalidUtf8,
}
impl ErrorKind {
/// Returns the human-friendly description for the error kind.
#[must_use]
fn description(self) -> &'static str {
match self {
Self::InvalidScheme => "invalid scheme",
Self::InvalidUserInfo => "invalid userinfo",
Self::InvalidHost => "invalid host",
Self::InvalidPort => "invalid port",
Self::InvalidPath => "invalid path",
Self::InvalidQuery => "invalid query",
Self::InvalidFragment => "invalid fragment",
Self::UnexpectedFragment => "unexpected fragment",
Self::UnexpectedAbsolute => "expected a relative IRI but got an absolute IRI",
Self::UnexpectedRelative => "expected an absolute IRI but got a relative IRI",
Self::InvalidUtf8 => "invalid utf-8 bytes",
}
}
}
/// Validates [IRI][uri].
///
/// This validator corresponds to [`RiStr`] and [`RiString`] types.
///
/// # Examples
///
/// This type can have an IRI (which is absolute, and may have fragment part).
///
/// ```
/// use iri_string::{spec::UriSpec, validate::iri};
/// assert!(iri::<UriSpec>("https://user:pass@example.com:8080").is_ok());
/// assert!(iri::<UriSpec>("https://example.com/").is_ok());
/// assert!(iri::<UriSpec>("https://example.com/foo?bar=baz").is_ok());
/// assert!(iri::<UriSpec>("https://example.com/foo?bar=baz#qux").is_ok());
/// assert!(iri::<UriSpec>("foo:bar").is_ok());
/// assert!(iri::<UriSpec>("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(iri::<UriSpec>("foo:/").is_ok());
/// assert!(iri::<UriSpec>("foo://").is_ok());
/// assert!(iri::<UriSpec>("foo:///").is_ok());
/// assert!(iri::<UriSpec>("foo:////").is_ok());
/// assert!(iri::<UriSpec>("foo://///").is_ok());
/// ```
///
/// Relative IRI reference is not allowed.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::iri};
/// // This is relative path.
/// assert!(iri::<UriSpec>("foo/bar").is_err());
/// // `/foo/bar` is an absolute path, but it is authority-relative.
/// assert!(iri::<UriSpec>("/foo/bar").is_err());
/// // `//foo/bar` is termed "network-path reference",
/// // or usually called "protocol-relative reference".
/// assert!(iri::<UriSpec>("//foo/bar").is_err());
/// // Same-document reference is relative.
/// assert!(iri::<UriSpec>("#foo").is_err());
/// // Empty string is not a valid absolute IRI.
/// assert!(iri::<UriSpec>("").is_err());
/// ```
///
/// Some characters and sequences cannot used in an IRI.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::iri};
/// // `<` and `>` cannot directly appear in an IRI.
/// assert!(iri::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI.
/// assert!(iri::<UriSpec>("%").is_err());
/// assert!(iri::<UriSpec>("%GG").is_err());
/// ```
///
/// [uri]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3
/// [`RiStr`]: ../types/struct.RiStr.html
/// [`RiString`]: ../types/struct.RiString.html
pub fn iri<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_uri::<S>(s)
}
/// Validates [IRI reference][uri-reference].
///
/// This validator corresponds to [`RiReferenceStr`] and [`RiReferenceString`] types.
///
/// # Examples
///
/// This type can have an IRI reference (which can be absolute or relative).
///
/// ```
/// use iri_string::{spec::UriSpec, validate::iri_reference};
/// assert!(iri_reference::<UriSpec>("https://user:pass@example.com:8080").is_ok());
/// assert!(iri_reference::<UriSpec>("https://example.com/").is_ok());
/// assert!(iri_reference::<UriSpec>("https://example.com/foo?bar=baz").is_ok());
/// assert!(iri_reference::<UriSpec>("https://example.com/foo?bar=baz#qux").is_ok());
/// assert!(iri_reference::<UriSpec>("foo:bar").is_ok());
/// assert!(iri_reference::<UriSpec>("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(iri_reference::<UriSpec>("foo:/").is_ok());
/// assert!(iri_reference::<UriSpec>("foo://").is_ok());
/// assert!(iri_reference::<UriSpec>("foo:///").is_ok());
/// assert!(iri_reference::<UriSpec>("foo:////").is_ok());
/// assert!(iri_reference::<UriSpec>("foo://///").is_ok());
/// assert!(iri_reference::<UriSpec>("foo/bar").is_ok());
/// assert!(iri_reference::<UriSpec>("/foo/bar").is_ok());
/// assert!(iri_reference::<UriSpec>("//foo/bar").is_ok());
/// assert!(iri_reference::<UriSpec>("#foo").is_ok());
/// ```
///
/// Some characters and sequences cannot used in an IRI reference.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::iri_reference};
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(iri_reference::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(iri_reference::<UriSpec>("%").is_err());
/// assert!(iri_reference::<UriSpec>("%GG").is_err());
/// ```
///
/// [uri-reference]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.1
/// [`RiReferenceStr`]: ../types/struct.RiReferenceStr.html
/// [`RiReferenceString`]: ../types/struct.RiReferenceString.html
pub fn iri_reference<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_uri_reference::<S>(s)
}
/// Validates [absolute IRI][absolute-uri].
///
/// This validator corresponds to [`RiAbsoluteStr`] and [`RiAbsoluteString`] types.
///
/// # Examples
///
/// This type can have an absolute IRI without fragment part.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::absolute_iri};
/// assert!(absolute_iri::<UriSpec>("https://example.com/foo?bar=baz").is_ok());
/// assert!(absolute_iri::<UriSpec>("foo:bar").is_ok());
/// // Scheme `foo` and empty path.
/// assert!(absolute_iri::<UriSpec>("foo:").is_ok());
/// // `foo://.../` below are all allowed. See the crate documentation for detail.
/// assert!(absolute_iri::<UriSpec>("foo:/").is_ok());
/// assert!(absolute_iri::<UriSpec>("foo://").is_ok());
/// assert!(absolute_iri::<UriSpec>("foo:///").is_ok());
/// assert!(absolute_iri::<UriSpec>("foo:////").is_ok());
/// assert!(absolute_iri::<UriSpec>("foo://///").is_ok());
///
/// ```
///
/// Relative IRI is not allowed.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::absolute_iri};
/// // This is relative path.
/// assert!(absolute_iri::<UriSpec>("foo/bar").is_err());
/// // `/foo/bar` is an absolute path, but it is authority-relative.
/// assert!(absolute_iri::<UriSpec>("/foo/bar").is_err());
/// // `//foo/bar` is termed "network-path reference",
/// // or usually called "protocol-relative reference".
/// assert!(absolute_iri::<UriSpec>("//foo/bar").is_err());
/// // Empty string is not a valid absolute IRI.
/// assert!(absolute_iri::<UriSpec>("").is_err());
/// ```
///
/// Fragment part (such as trailing `#foo`) is not allowed.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::absolute_iri};
/// // Fragment part is not allowed.
/// assert!(absolute_iri::<UriSpec>("https://example.com/foo?bar=baz#qux").is_err());
/// ```
///
/// Some characters and sequences cannot used in an absolute IRI.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::absolute_iri};
/// // `<` and `>` cannot directly appear in an absolute IRI.
/// assert!(absolute_iri::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an absolute IRI.
/// assert!(absolute_iri::<UriSpec>("%").is_err());
/// assert!(absolute_iri::<UriSpec>("%GG").is_err());
/// ```
///
/// [absolute-uri]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.3
/// [`RiAbsoluteStr`]: ../types/struct.RiAbsoluteStr.html
/// [`RiAbsoluteString`]: ../types/struct.RiAbsoluteString.html
pub fn absolute_iri<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_absolute_uri::<S>(s)
}
/// Validates [relative reference][relative-ref].
///
/// This validator corresponds to [`RiRelativeStr`] and [`RiRelativeString`] types.
///
/// # Valid values
///
/// This type can have a relative IRI reference.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::relative_ref};
/// assert!(relative_ref::<UriSpec>("foo").is_ok());
/// assert!(relative_ref::<UriSpec>("foo/bar").is_ok());
/// assert!(relative_ref::<UriSpec>("/foo").is_ok());
/// assert!(relative_ref::<UriSpec>("//foo/bar").is_ok());
/// assert!(relative_ref::<UriSpec>("?foo").is_ok());
/// assert!(relative_ref::<UriSpec>("#foo").is_ok());
/// assert!(relative_ref::<UriSpec>("foo/bar?baz#qux").is_ok());
/// // The first path component can have colon if the path is absolute.
/// assert!(relative_ref::<UriSpec>("/foo:bar/").is_ok());
/// // Second or following path components can have colon.
/// assert!(relative_ref::<UriSpec>("foo/bar://baz/").is_ok());
/// assert!(relative_ref::<UriSpec>("./foo://bar").is_ok());
/// ```
///
/// Absolute form of a reference is not allowed.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::relative_ref};
/// assert!(relative_ref::<UriSpec>("https://example.com/").is_err());
/// // The first path component cannot have colon, if the path is not absolute.
/// assert!(relative_ref::<UriSpec>("foo:bar").is_err());
/// assert!(relative_ref::<UriSpec>("foo:").is_err());
/// assert!(relative_ref::<UriSpec>("foo:/").is_err());
/// assert!(relative_ref::<UriSpec>("foo://").is_err());
/// assert!(relative_ref::<UriSpec>("foo:///").is_err());
/// assert!(relative_ref::<UriSpec>("foo:////").is_err());
/// assert!(relative_ref::<UriSpec>("foo://///").is_err());
/// ```
///
/// Some characters and sequences cannot used in an IRI reference.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::relative_ref};
/// // `<` and `>` cannot directly appear in a relative IRI reference.
/// assert!(relative_ref::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in a relative IRI reference.
/// assert!(relative_ref::<UriSpec>("%").is_err());
/// assert!(relative_ref::<UriSpec>("%GG").is_err());
/// ```
///
/// [relative-ref]: https://www.rfc-editor.org/rfc/rfc3986.html#section-4.2
/// [`RiRelativeStr`]: ../types/struct.RiRelativeStr.html
/// [`RiRelativeString`]: ../types/struct.RiRelativeString.html
pub fn relative_ref<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_relative_ref::<S>(s)
}
/// Validates [IRI scheme][scheme].
///
/// Note that this function does not accept a trailing colon.
///
/// Also note that the syntax of the scheme is common between RFC 3986 (URIs)
/// and RFC 3987 (IRIs).
///
/// # Examples
///
/// ```
/// use iri_string::validate::scheme;
/// assert!(scheme("https").is_ok());
/// assert!(scheme("file").is_ok());
/// assert!(scheme("git+ssh").is_ok());
///
/// // Colon is syntactically not part of the scheme.
/// assert!(scheme("colon:").is_err());
/// // Scheme cannot be empty.
/// assert!(scheme("").is_err());
/// // The first character should be alphabetic character.
/// assert!(scheme("0abc").is_err());
/// assert!(scheme("+a").is_err());
/// assert!(scheme("-a").is_err());
/// ```
///
/// [scheme]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.1
pub fn scheme(s: &str) -> Result<(), Error> {
parser::validate_scheme(s)
}
/// Validates [IRI authority][authority].
///
/// # Examples
///
/// ```
/// use iri_string::{spec::UriSpec, validate::authority};
/// assert!(authority::<UriSpec>("example.com").is_ok());
/// assert!(authority::<UriSpec>("subdomain.example.com").is_ok());
/// assert!(authority::<UriSpec>("no-period").is_ok());
/// // Though strongly discouraged, this percent-encoded reg-name with
/// // non-UTF-8 bytes is considered syntactically valid.
/// assert!(authority::<UriSpec>("non-%99-utf-8").is_ok());
/// // Empty authority is valid. Remember `file:///` has empty authority.
/// assert!(authority::<UriSpec>("").is_ok());
/// assert!(authority::<UriSpec>("127.0.0.1:8080").is_ok());
/// assert!(authority::<UriSpec>("[::127.0.0.1]:8088").is_ok());
/// // URI/IRI syntax itself does not have limit on the port number.
/// assert!(authority::<UriSpec>("[::1]:9999999999").is_ok());
/// // Syntax for future versions of IP addresses.
/// assert!(authority::<UriSpec>("[v89ab.1+2,3(4)5&6]").is_ok());
/// assert!(authority::<UriSpec>("user:password@host").is_ok());
/// assert!(authority::<UriSpec>("co%3Alon:at%40sign@host:8888").is_ok());
/// // Percent-encoded non-UTF8 (or even non-ASCII) bytes are valid.
/// // Users are responsible to validate or reject such unusual input if needed.
/// assert!(authority::<UriSpec>("not-a-%80-utf8@host").is_ok());
///
/// // Invalid percent encodings.
/// assert!(authority::<UriSpec>("invalid%GGescape@host").is_err());
/// // Invalid characters.
/// assert!(authority::<UriSpec>("foo@bar@host").is_err());
/// assert!(authority::<UriSpec>("slash/is-not-allowed").is_err());
/// ```
///
/// [authority]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2
pub fn authority<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_authority::<S>(s)
}
/// Validates [IRI host][host].
///
/// # Examples
///
/// ```
/// use iri_string::{spec::UriSpec, validate::host};
/// assert!(host::<UriSpec>("example.com").is_ok());
/// assert!(host::<UriSpec>("subdomain.example.com").is_ok());
/// assert!(host::<UriSpec>("no-period").is_ok());
/// // Though strongly discouraged, this percent-encoded reg-name with
/// // non-UTF-8 bytes is considered syntactically valid.
/// assert!(host::<UriSpec>("non-%99-utf-8").is_ok());
/// // Empty host is valid. Remember `file:///` has empty authority (and empty host).
/// assert!(host::<UriSpec>("").is_ok());
/// assert!(host::<UriSpec>("127.0.0.1").is_ok());
/// assert!(host::<UriSpec>("[::1]").is_ok());
/// assert!(host::<UriSpec>("[::127.0.0.1]").is_ok());
/// // Syntax for future versions of IP addresses.
/// assert!(host::<UriSpec>("[v89ab.1+2,3(4)5&6]").is_ok());
///
/// // `port` is not a part of the host.
/// assert!(host::<UriSpec>("host:8080").is_err());
/// // `userinfo` is not a part of the host.
/// assert!(host::<UriSpec>("user:password@host").is_err());
/// ```
///
/// [host]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.2
pub fn host<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_host::<S>(s)
}
/// Validates [IRI port][port].
///
/// Note that the syntax of the port is common between RFC 3986 (URIs) and
/// RFC 3987 (IRIs).
///
/// Also note that this function does not accept a leading colon.
///
/// [host]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.3
///
/// # Examples
///
/// ```
/// use iri_string::validate::port;
/// assert!(port("0").is_ok());
/// assert!(port("8080").is_ok());
/// assert!(port("0000080").is_ok());
/// // URI/IRI syntax itself does not have limit on the port number.
/// assert!(port("999999999").is_ok());
///
/// // The leading colon is not a part of the `port`.
/// assert!(port(":443").is_err());
/// ```
pub fn port(s: &str) -> Result<(), Error> {
if s.bytes().all(|b| b.is_ascii_digit()) {
Ok(())
} else {
Err(Error::with_kind(ErrorKind::InvalidPort))
}
}
/// Validates [IRI userinfo][userinfo].
///
/// # Examples
///
/// ```
/// use iri_string::{spec::UriSpec, validate::userinfo};
/// assert!(userinfo::<UriSpec>("user").is_ok());
/// assert!(userinfo::<UriSpec>("user:password").is_ok());
/// assert!(userinfo::<UriSpec>("non-%99-utf-8").is_ok());
/// // Special characters can be included if they are percent-encoded.
/// assert!(userinfo::<UriSpec>("co%3Alon:at%40sign").is_ok());
///
/// // The trailing atsign is not a part of the userinfo.
/// assert!(userinfo::<UriSpec>("user:password@").is_err());
/// // Invalid characters.
/// assert!(userinfo::<UriSpec>("foo@bar").is_err());
/// assert!(userinfo::<UriSpec>("slash/is-not-allowed").is_err());
/// ```
///
/// [authority]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.1
pub fn userinfo<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_userinfo::<S>(s)
}
/// Validates [IRI path][path].
///
/// # Examples
///
/// ```
/// use iri_string::{spec::UriSpec, validate::path};
/// assert!(path::<UriSpec>("").is_ok());
/// assert!(path::<UriSpec>("foo/bar").is_ok());
/// assert!(path::<UriSpec>("foo/bar/").is_ok());
/// assert!(path::<UriSpec>("/foo/bar").is_ok());
/// assert!(path::<UriSpec>("non-%99-utf-8").is_ok());
/// // Be careful! This is completely valid (absolute) path, but may be confused
/// // with an protocol-relative URI, with the authority `foo` and the path `/bar`.
/// assert!(path::<UriSpec>("//foo/bar").is_ok());
/// // Be careful! This is completely valid (relative) path, but may be confused
/// // with an absolute URI, with the scheme `foo` and the path `bar`.
/// assert!(path::<UriSpec>("foo:bar").is_ok());
///
/// // Invalid characters.
/// assert!(path::<UriSpec>("foo?bar").is_err());
/// assert!(path::<UriSpec>("foo#bar").is_err());
/// ```
///
/// [path]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3
pub fn path<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_path::<S>(s)
}
/// Validates [IRI path segment][segment].
///
/// # Examples
///
/// ```
/// use iri_string::{spec::UriSpec, validate::path_segment};
/// assert!(path_segment::<UriSpec>("").is_ok());
/// assert!(path_segment::<UriSpec>("escaped-%2F-slash").is_ok());
/// assert!(path_segment::<UriSpec>("non-%99-utf-8").is_ok());
///
/// // A path segment itself cannot contain an unescaped slash.
/// assert!(path_segment::<UriSpec>("foo/bar").is_err());
/// ```
///
/// [segment]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.3
pub fn path_segment<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_path_segment::<S>(s)
}
/// Validates [IRI query][query].
///
/// This validator corresponds to [`RiQueryStr`] and [`RiQueryString`] types.
///
/// Note that the first `?` character in an IRI is not a part of a query.
/// For example, `https://example.com/?foo#bar` has a query `foo`, **not** `?foo`.
///
/// # Examples
///
/// This type can have an IRI query.
/// Note that the IRI `foo://bar/baz?qux#quux` has the query `qux`, **not** `?qux`.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::query};
/// assert!(query::<UriSpec>("").is_ok());
/// assert!(query::<UriSpec>("foo").is_ok());
/// assert!(query::<UriSpec>("foo/bar").is_ok());
/// assert!(query::<UriSpec>("/foo/bar").is_ok());
/// assert!(query::<UriSpec>("//foo/bar").is_ok());
/// assert!(query::<UriSpec>("https://user:pass@example.com:8080").is_ok());
/// assert!(query::<UriSpec>("https://example.com/").is_ok());
/// // Question sign `?` can appear in an IRI query.
/// assert!(query::<UriSpec>("query?again").is_ok());
/// ```
///
/// Some characters and sequences cannot used in a query.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::query};
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(query::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(query::<UriSpec>("%").is_err());
/// assert!(query::<UriSpec>("%GG").is_err());
/// // Hash sign `#` cannot appear in an IRI query.
/// assert!(query::<UriSpec>("#hash").is_err());
/// ```
///
/// [query]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.4
/// [`RiQueryStr`]: ../types/struct.RiQueryStr.html
/// [`RiQueryString`]: ../types/struct.RiQueryString.html
pub fn query<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_query::<S>(s)
}
/// Validates [IRI fragment][fragment].
///
/// This validator corresponds to [`RiFragmentStr`] and [`RiFragmentString`] types.
///
/// Note that the first `#` character in an IRI is not a part of a fragment.
/// For example, `https://example.com/#foo` has a fragment `foo`, **not** `#foo`.
///
/// # Examples
///
/// This type can have an IRI fragment.
/// Note that the IRI `foo://bar/baz#qux` has the fragment `qux`, **not** `#qux`.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::fragment};
/// assert!(fragment::<UriSpec>("").is_ok());
/// assert!(fragment::<UriSpec>("foo").is_ok());
/// assert!(fragment::<UriSpec>("foo/bar").is_ok());
/// assert!(fragment::<UriSpec>("/foo/bar").is_ok());
/// assert!(fragment::<UriSpec>("//foo/bar").is_ok());
/// assert!(fragment::<UriSpec>("https://user:pass@example.com:8080").is_ok());
/// assert!(fragment::<UriSpec>("https://example.com/").is_ok());
/// ```
///
/// Some characters and sequences cannot used in a fragment.
///
/// ```
/// use iri_string::{spec::UriSpec, validate::fragment};
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(fragment::<UriSpec>("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(fragment::<UriSpec>("%").is_err());
/// assert!(fragment::<UriSpec>("%GG").is_err());
/// // Hash sign `#` cannot appear in an IRI fragment.
/// assert!(fragment::<UriSpec>("#hash").is_err());
/// ```
///
/// [fragment]: https://www.rfc-editor.org/rfc/rfc3986.html#section-3.5
/// [`RiFragmentStr`]: ../types/struct.RiFragmentStr.html
/// [`RiFragmentString`]: ../types/struct.RiFragmentString.html
pub fn fragment<S: Spec>(s: &str) -> Result<(), Error> {
parser::validate_fragment::<S>(s)
}

Binary file not shown.

View File

@@ -0,0 +1 @@
{"name":"iri-string","vers":"0.7.10","deps":[{"name":"memchr","req":"^2.4.1","features":[],"optional":true,"default_features":false,"target":null,"kind":"normal","registry":"https://github.com/rust-lang/crates.io-index","package":null,"public":null,"artifact":null,"bindep_target":null,"lib":false},{"name":"serde","req":"^1.0.103","features":["derive"],"optional":true,"default_features":false,"target":null,"kind":"normal","registry":"https://github.com/rust-lang/crates.io-index","package":null,"public":null,"artifact":null,"bindep_target":null,"lib":false},{"name":"serde_test","req":"^1.0.104","features":[],"optional":false,"default_features":true,"target":null,"kind":"dev","registry":"https://github.com/rust-lang/crates.io-index","package":null,"public":null,"artifact":null,"bindep_target":null,"lib":false}],"features":{"alloc":["serde?/alloc"],"default":["std"],"std":["alloc","memchr?/std","serde?/std"]},"features2":null,"cksum":"7292a91e8152d742a15aaaa12e8bb58b53fc6f7e52f2caed08cbe48459af4124","yanked":null,"links":null,"rust_version":null,"v":2}

576
vendor/iri-string/tests/build.rs vendored Normal file
View File

@@ -0,0 +1,576 @@
//! Tests for builder.
mod components;
#[macro_use]
mod utils;
use iri_string::build::Builder;
use iri_string::format::write_to_slice;
use iri_string::types::*;
use self::components::{Components, TestCase, TEST_CASES};
/// Pairs of components and composed IRI should be consistent.
///
/// This also (implicitly) tests that build-and-decompose and decompose-and-build
/// operations are identity conversions.
#[test]
fn consistent_components_and_composed() {
for case in TEST_CASES.iter().copied() {
let mut builder = Builder::new();
case.components.feed_builder(&mut builder, false);
// composed -> components.
let built = builder
.build::<IriReferenceStr>()
.expect("should be valid IRI reference");
assert_eq_display!(built, case.composed);
// components -> composed.
let composed = IriReferenceStr::new(case.composed).expect("should be valid IRI reference");
let scheme = composed.scheme_str();
let (user, password, host, port) = match composed.authority_components() {
None => (None, None, None, None),
Some(authority) => {
let (user, password) = match authority.userinfo() {
None => (None, None),
Some(userinfo) => match userinfo.find(':').map(|pos| userinfo.split_at(pos)) {
Some((user, password)) => (Some(user), Some(&password[1..])),
None => (Some(userinfo), None),
},
};
(user, password, Some(authority.host()), authority.port())
}
};
let path = composed.path_str();
let query = composed.query().map(|s| s.as_str());
let fragment = composed.fragment().map(|s| s.as_str());
let roundtrip_result = Components {
scheme,
user,
password,
host,
port,
path,
query,
fragment,
};
assert_eq!(roundtrip_result, case.components, "case={case:#?}");
}
}
fn assert_builds_for_case(case: &TestCase<'_>, builder: &Builder<'_>) {
if case.is_iri_class() {
{
let built = builder
.clone()
.build::<IriReferenceStr>()
.expect("should be valid IRI reference");
assert_eq_display!(built, case.composed);
}
{
let built = builder.clone().build::<IriStr>();
if case.is_absolute() {
let built = built.expect("should be valid IRI");
assert_eq_display!(built, case.composed);
} else {
assert!(built.is_err(), "should be invalid as IRI");
}
}
{
let built = builder.clone().build::<IriAbsoluteStr>();
if case.is_absolute_without_fragment() {
let built = built.expect("should be valid absolute IRI");
assert_eq_display!(built, case.composed);
} else {
assert!(built.is_err(), "should be invalid as absolute IRI");
}
}
{
let built = builder.clone().build::<IriRelativeStr>();
if case.is_relative() {
let built = built.expect("should be valid relative IRI reference");
assert_eq_display!(built, case.composed);
} else {
assert!(
built.is_err(),
"should be invalid as relative IRI reference"
);
}
}
}
if case.is_uri_class() {
{
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, case.composed);
}
{
let built = builder.clone().build::<UriStr>();
if case.is_absolute() {
let built = built.expect("should be valid URI");
assert_eq_display!(built, case.composed);
} else {
assert!(built.is_err(), "should be invalid as URI");
}
}
{
let built = builder.clone().build::<UriAbsoluteStr>();
if case.is_absolute_without_fragment() {
let built = built.expect("should be valid absolute URI");
assert_eq_display!(built, case.composed);
} else {
assert!(built.is_err(), "should be invalid as absolute URI");
}
}
{
let built = builder.clone().build::<UriRelativeStr>();
if case.is_relative() {
let built = built.expect("should be valid relative URI reference");
assert_eq_display!(built, case.composed);
} else {
assert!(
built.is_err(),
"should be invalid as relative URI reference"
);
}
}
}
}
/// Build should succeed or fail, depending on the target syntax and the source string.
#[test]
fn build_simple() {
for case in TEST_CASES.iter() {
let mut builder = Builder::new();
case.components.feed_builder(&mut builder, false);
assert_builds_for_case(case, &builder);
}
}
/// Fields of a builder can be unset.
#[test]
fn reuse_dirty_builder() {
let dirty = {
let mut b = Builder::new();
b.scheme("scheme");
b.userinfo(("user", "password"));
b.host("host");
b.port("90127");
b.path("/path/path-again");
b.query("query");
b.fragment("fragment");
b
};
for case in TEST_CASES.iter() {
let mut builder = dirty.clone();
case.components.feed_builder(&mut builder, true);
assert_builds_for_case(case, &builder);
}
}
/// Builder can normalize absolute IRIs.
#[test]
fn build_normalized_absolute() {
for case in TEST_CASES.iter().filter(|case| case.is_absolute()) {
assert!(
!case.is_relative(),
"every IRI is absolute or relative, but not both"
);
let mut builder = Builder::new();
case.components.feed_builder(&mut builder, false);
builder.normalize();
let built_iri = builder
.clone()
.build::<IriStr>()
.expect("should be valid IRI reference");
assert_eq_display!(built_iri, case.normalized_iri, "case={case:#?}");
if case.is_uri_class() {
let built_uri = builder
.build::<UriStr>()
.expect("should be valid URI reference");
assert_eq_display!(built_uri, case.normalized_uri, "case={case:#?}");
}
}
}
/// Builder can normalize relative IRIs.
#[test]
fn build_normalized_relative() {
for case in TEST_CASES.iter().filter(|case| case.is_relative()) {
assert!(
!case.is_absolute(),
"every IRI is absolute or relative, but not both"
);
let mut builder = Builder::new();
case.components.feed_builder(&mut builder, false);
builder.normalize();
let built = builder
.clone()
.build::<IriRelativeStr>()
.expect("should be valid relative IRI reference");
assert_eq_display!(built, case.normalized_iri, "case={case:#?}");
if case.is_uri_class() {
let built_uri = builder
.build::<UriReferenceStr>()
.expect("should be valid relative URI reference");
assert_eq_display!(built_uri, case.normalized_uri, "case={case:#?}");
}
}
}
/// Build result can judge RFC3986-normalizedness correctly.
#[test]
fn build_normalizedness() {
for case in TEST_CASES.iter().filter(|case| case.is_absolute()) {
let mut builder = Builder::new();
case.components.feed_builder(&mut builder, false);
builder.normalize();
let built = builder
.clone()
.build::<IriStr>()
.expect("should be valid IRI reference");
let built_judge = built.ensure_rfc3986_normalizable().is_ok();
assert_eq!(
built_judge,
case.is_rfc3986_normalizable(),
"RFC3986-normalizedness should be correctly judged: case={case:#?}"
);
let mut buf = [0_u8; 512];
let s = write_to_slice(&mut buf, &built).expect("not enough buffer");
let built_slice = IriStr::new(s).expect("should be valid IRI reference");
assert!(
built_slice.is_normalized_but_authorityless_relative_path_preserved(),
"should be normalized"
);
let slice_judge = built_slice.is_normalized_rfc3986();
assert_eq!(
slice_judge, built_judge,
"RFC3986-normalizedness should be consistently judged: case={case:#?}"
);
}
}
/// `Builder::port` should accept `u8` value.
#[test]
fn set_port_u8() {
let mut builder = Builder::new();
builder.port(8_u8);
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, "//:8", "should accept `u8`");
}
/// `Builder::port` should accept `u16` value.
#[test]
fn set_port_u16() {
let mut builder = Builder::new();
builder.port(65535_u16);
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, "//:65535", "should accept `u16`");
}
/// `Builder::port` should accept `&str` value.
#[test]
fn set_port_str() {
let mut builder = Builder::new();
builder.port("8080");
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, "//:8080", "should accept `&str`");
}
/// `Builder::port` should accept `&str` value even it is quite large.
#[test]
fn set_port_str_large() {
let mut builder = Builder::new();
builder.port("12345678901234567890");
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(
built,
"//:12345678901234567890",
"should accept `&str` even it is quite large"
);
}
/// `Builder::ip_address` should accept `std::net::Ipv4Addr` value.
#[test]
#[cfg(feature = "std")]
fn set_ip_address_ipv4addr() {
let mut builder = Builder::new();
builder.ip_address(std::net::Ipv4Addr::new(192, 0, 2, 0));
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, "//192.0.2.0", "should accept `std::net::Ipv4Addr`");
}
/// `Builder::ip_address` should accept `std::net::Ipv6Addr` value.
#[test]
#[cfg(feature = "std")]
fn set_ip_address_ipv6addr() {
let mut builder = Builder::new();
builder.ip_address(std::net::Ipv6Addr::new(0x2001, 0xdb8, 0, 0, 0, 0, 0, 1));
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(
built,
"//[2001:db8::1]",
"should accept `std::net::Ipv6Addr`"
);
}
/// `Builder::ip_address` should accept `std::net::IpAddr` value.
#[test]
#[cfg(feature = "std")]
fn set_ip_address_ipaddr() {
let mut builder = Builder::new();
builder.ip_address(std::net::IpAddr::V4(std::net::Ipv4Addr::new(192, 0, 2, 0)));
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, "//192.0.2.0", "should accept `std::net::IpAddr`");
}
/// `Builder::userinfo` should accept `&str`.
#[test]
fn set_userinfo_str() {
let mut builder = Builder::new();
{
builder.userinfo("user:password");
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, "//user:password@", "should accept `&str`");
}
{
builder.userinfo("arbitrary-valid-string");
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, "//arbitrary-valid-string@", "should accept `&str`");
}
{
builder.userinfo("arbitrary:valid:string");
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, "//arbitrary:valid:string@", "should accept `&str`");
}
}
/// `Builder::userinfo` should accept `(&str, &str)`.
#[test]
fn set_userinfo_pair_str_str() {
let mut builder = Builder::new();
{
builder.userinfo(("user", "password"));
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, "//user:password@", "should accept `&str`");
}
{
builder.userinfo(("", ""));
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, "//:@", "empty user and password should be preserved");
}
}
/// `Builder::userinfo` should accept `(&str, Option<&str>)`.
#[test]
fn set_userinfo_pair_str_optstr() {
let mut builder = Builder::new();
{
builder.userinfo(("user", Some("password")));
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(
built,
"//user:password@",
"should accept `(&str, Option<&str>)`"
);
}
{
builder.userinfo(("", Some("")));
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(built, "//:@", "empty user and password should be preserved");
}
{
builder.userinfo(("user", None));
let built = builder
.clone()
.build::<UriReferenceStr>()
.expect("should be valid URI reference");
assert_eq_display!(
built,
"//user@",
"password given as `None` should be absent"
);
}
}
/// Builder should reject a colon in user.
#[test]
fn user_with_colon() {
let mut builder = Builder::new();
builder.userinfo(("us:er", Some("password")));
let result = builder.clone().build::<UriReferenceStr>();
assert!(result.is_err(), "`user` part cannot have a colon");
}
/// Builder should be able to build a normalized IRI even when it requires
/// edge case handling of RFC 3986 normalization.
#[test]
fn normalize_double_slash_prefix() {
let mut builder = Builder::new();
builder.scheme("scheme");
builder.path("/..//bar");
builder.normalize();
let built = builder
.build::<IriStr>()
.expect("normalizable by `/.` path prefix");
// Naive application of RFC 3986 normalization/resolution algorithm
// results in `scheme://bar`, but this is unintentional. `bar` should be
// the second path segment, not a host. So this should be rejected.
assert!(
built.ensure_rfc3986_normalizable().is_err(),
"not normalizable by RFC 3986 algorithm"
);
// In contrast to RFC 3986, WHATWG URL Standard defines serialization
// algorithm and handles this case specially. In this case, the result
// is `scheme:/.//bar`, this won't be considered fully normalized from
// the RFC 3986 point of view, but more normalization would be
// impossible and this would practically work in most situations.
assert_eq_display!(built, "scheme:/.//bar");
}
/// Builder should be able to build a normalized IRI even when it requires
/// edge case handling of RFC 3986 normalization.
#[test]
fn absolute_double_slash_path_without_authority() {
let mut builder = Builder::new();
builder.scheme("scheme");
builder.path("//bar");
// Should fail without normalization.
{
let result = builder.clone().build::<IriStr>();
assert!(
result.is_err(),
"`scheme://bar` is unintended so the build should fail"
);
}
// With normalization, the build succeeds.
builder.normalize();
let built = builder
.build::<IriStr>()
.expect("normalizable by `/.` path prefix");
// Naive application of RFC 3986 normalization/resolution algorithm
// results in `scheme://bar`, but this is unintentional. `bar` should be
// the second path segment, not a host. So this should be rejected.
assert!(
built.ensure_rfc3986_normalizable().is_err(),
"not normalizable by RFC 3986 algorithm"
);
// In contrast to RFC 3986, WHATWG URL Standard defines serialization
// algorithm and handles this case specially. In this case, the result
// is `scheme:/.//bar`, this won't be considered fully normalized from
// the RFC 3986 point of view, but more normalization would be
// impossible and this would practically work in most situations.
assert_eq_display!(built, "scheme:/.//bar");
}
/// Authority requires the path to be empty or absolute (without normalization enabled).
#[test]
fn authority_and_relative_path() {
let mut builder = Builder::new();
builder.host("example.com");
builder.path("relative/path");
assert!(
builder.clone().build::<IriReferenceStr>().is_err(),
"authority requires the path to be empty or absolute"
);
// Even if normalization is enabled, the relative path is unacceptable.
builder.normalize();
assert!(
builder.build::<IriReferenceStr>().is_err(),
"authority requires the path to be empty or absolute"
);
}
#[test]
fn no_authority_and_double_slash_prefix_without_normalization() {
let mut builder = Builder::new();
// This would be interpreted as "network-path reference" (see RFC 3986
// section 4.2), so this should be rejected.
builder.path("//double-slash");
assert!(builder.build::<IriReferenceStr>().is_err());
}
#[test]
fn no_authority_and_double_slash_prefix_with_normalization() {
let mut builder = Builder::new();
builder.path("//double-slash");
builder.normalize();
let built = builder
.build::<IriReferenceStr>()
.expect("normalizable by `/.` path prefix");
assert_eq_display!(built, "/.//double-slash");
assert!(built.ensure_rfc3986_normalizable().is_err());
}
#[test]
fn no_authority_and_relative_first_segment_colon() {
let mut builder = Builder::new();
// This would be interpreted as scheme `foo` and host `bar`,
// so this should be rejected.
builder.path("foo:bar");
assert!(builder.clone().build::<IriReferenceStr>().is_err());
// Normalization does not change the situation.
builder.normalize();
assert!(builder.build::<IriReferenceStr>().is_err());
}

1771
vendor/iri-string/tests/components/mod.rs vendored Normal file

File diff suppressed because it is too large Load Diff

215
vendor/iri-string/tests/gh-issues.rs vendored Normal file
View File

@@ -0,0 +1,215 @@
//! Test cases for issues reported on GitHub.
#[macro_use]
mod utils;
use iri_string::types::UriReferenceStr;
mod issue_17 {
use super::*;
#[test]
fn ipv6_literal_authority_host() {
let uri = UriReferenceStr::new("//[::1]").expect("valid relative URI");
let authority = uri
.authority_components()
.expect("the URI has authority `[::1]`");
assert_eq!(authority.host(), "[::1]");
}
#[test]
fn extra_trailing_colon_in_ipv6_literal() {
assert!(UriReferenceStr::new("//[::1:]").is_err());
}
#[test]
fn ipvfuture_literal_capital_v() {
assert!(UriReferenceStr::new("//[v0.0]").is_ok());
assert!(UriReferenceStr::new("//[V0.0]").is_ok());
}
#[test]
fn ipvfuture_empty_part() {
assert!(
UriReferenceStr::new("//[v0.]").is_err(),
"address should not be empty"
);
assert!(
UriReferenceStr::new("//[v.0]").is_err(),
"version should not be empty"
);
assert!(
UriReferenceStr::new("//[v.]").is_err(),
"neither address nor version should be empty"
);
}
}
mod issue_36 {
use super::*;
#[cfg(feature = "alloc")]
use iri_string::format::ToDedicatedString;
use iri_string::types::UriAbsoluteStr;
// "/.//.".resolve_against("a:/")
// => "a:" + remove_dot_segments("/.//.")
//
// STEP OUTPUT BUFFER INPUT BUFFER
// 1 : /.//.
// 2B: //.
// 2E: / /.
// 2B: / /
// 2E: //
// (see RFC 3986 section 5.2.4 for this notation.)
//
// => "a://"
//
// However, this is invalid since it should be semantically
// `<scheme="a">:<path="//">` but this string will be parsed as
// `<scheme="a">://<path="">`. So, `./` should be inserted to break
// `//` at the beginning of the path part.
#[test]
fn abnormal_resolution() {
let base = UriAbsoluteStr::new("a:/").expect("valid absolute URI");
{
let relative = UriReferenceStr::new("/.//.").expect("valid relative URI");
let result = relative.resolve_against(base);
assert!(
result.ensure_rfc3986_normalizable().is_err(),
"strict RFC 3986 resolution should fail for base={:?}, ref={:?}",
base,
relative
);
assert_eq_display!(
result,
"a:/.//",
"resolution result will be modified using serialization by WHATWG URL Standard"
);
}
{
let relative = UriReferenceStr::new(".//.").expect("valid relative URI");
let result = relative.resolve_against(base);
assert!(
result.ensure_rfc3986_normalizable().is_err(),
"strict RFC 3986 resolution should fail for base={:?}, ref={:?}",
base,
relative
);
assert_eq_display!(
result,
"a:/.//",
"resolution result will be modified using serialization by WHATWG URL Standard"
);
}
}
#[test]
fn abnormal_normalization() {
let uri = UriAbsoluteStr::new("a:/.//.").expect("valid absolute URI");
let normalized = uri.normalize();
assert!(
normalized.ensure_rfc3986_normalizable().is_err(),
"strict RFC 3986 normalization should fail for uri={:?}",
uri
);
assert_eq_display!(
normalized,
"a:/.//",
"normalization result will be modified using serialization by WHATWG URL Standard"
);
#[cfg(feature = "alloc")]
{
assert!(
!normalized.to_dedicated_string().is_normalized_rfc3986(),
"not normalizable by strict RFC 3986 algorithm"
);
}
}
#[test]
fn abnormal_normalization2() {
{
let uri = UriAbsoluteStr::new("a:/bar//.").expect("valid absolute URI");
assert_eq_display!(uri.normalize(), "a:/bar//");
}
{
let uri = UriAbsoluteStr::new("a:/bar/..//.").expect("valid absolute URI");
assert_eq_display!(
uri.normalize(),
"a:/.//",
"normalization result will be modified using serialization by WHATWG URL Standard"
);
}
{
let uri = UriAbsoluteStr::new("a:/.//bar/.").expect("valid absolute URI");
assert_eq_display!(
uri.normalize(),
"a:/.//bar/",
"normalization result will be modified using serialization by WHATWG URL Standard"
);
}
{
let uri = UriAbsoluteStr::new("a:/././././././foo/./.././././././././././/.")
.expect("valid absolute URI");
assert_eq_display!(
uri.normalize(),
"a:/.//",
"normalization result will be modified using serialization by WHATWG URL Standard"
);
}
}
#[test]
fn normalization_pct_triplet_loss() {
let uri = UriAbsoluteStr::new("a://%92%99").expect("valid absolute URI");
assert_eq_display!(uri.normalize(), "a://%92%99");
// Other problems are found during fixing this bug. The test cases for
// them have been added to generic test case data source.
}
}
/// <https://github.com/lo48576/iri-string/pull/46>
#[cfg(feature = "alloc")]
mod issue_46 {
use iri_string::types::{UriFragmentStr, UriRelativeString};
#[test]
fn set_fragment_to_relative() {
let mut uri =
UriRelativeString::try_from("//user:password@example.com/path?query#frag.old")
.expect("valid relative URI");
assert_eq!(uri, "//user:password@example.com/path?query#frag.old");
assert_eq!(uri.fragment_str(), Some("frag.old"));
uri.set_fragment(None);
assert_eq!(uri, "//user:password@example.com/path?query");
assert_eq!(uri.fragment(), None);
let frag_new = UriFragmentStr::new("frag-new").expect("valid URI fragment");
uri.set_fragment(Some(frag_new));
assert_eq!(uri.fragment_str(), Some("frag-new"));
}
}
/// <https://github.com/lo48576/iri-string/issues/48>
mod issue_48 {
use super::*;
use iri_string::types::IriStr;
#[test]
fn percent_decode_invalid_utf8() {
let s = "a:%F9%A8%8E%80";
let iri = IriStr::new(s).expect("valid URI with percent encoded invalid UTF-8 bytes");
assert_eq_display!(
iri.normalize(),
s,
"percent-encoded invalid UTF-8 bytes should be left as is without being decoded"
);
}
}

95
vendor/iri-string/tests/iri.rs vendored Normal file
View File

@@ -0,0 +1,95 @@
//! Tests specific to IRIs (not URIs).
#[macro_use]
mod utils;
use iri_string::format::write_to_slice;
#[cfg(feature = "alloc")]
use iri_string::format::ToDedicatedString;
#[cfg(feature = "alloc")]
use iri_string::types::IriReferenceString;
use iri_string::types::{IriReferenceStr, UriReferenceStr};
#[derive(Debug, Clone, Copy)]
struct TestCase {
iri: &'static str,
uri: &'static str,
}
// `[(iri, uri)]`.
const CASES: &[TestCase] = &[
TestCase {
iri: "?alpha=\u{03B1}",
uri: "?alpha=%CE%B1",
},
TestCase {
iri: "?katakana-letter-i=\u{30A4}",
uri: "?katakana-letter-i=%E3%82%A4",
},
TestCase {
iri: "?sushi=\u{1f363}",
uri: "?sushi=%F0%9F%8D%A3",
},
];
#[test]
fn iri_to_uri() {
let mut buf = [0_u8; 256];
let mut buf2 = [0_u8; 256];
for case in CASES.iter().copied() {
let expected = UriReferenceStr::new(case.uri).expect("should be valid URI reference");
let iri = IriReferenceStr::new(case.iri).expect("should be valid URI reference");
let encoded = iri.encode_to_uri();
assert_eq_display!(encoded, expected);
let encoded_uri = write_to_slice(&mut buf, &encoded).expect("not enough buffer");
let encoded_uri = UriReferenceStr::new(encoded_uri).expect("should be valid URI reference");
assert_eq!(encoded_uri, expected);
let encoded_again = AsRef::<IriReferenceStr>::as_ref(encoded_uri).encode_to_uri();
assert_eq_display!(encoded_again, expected);
let encoded_again_uri =
write_to_slice(&mut buf2, &encoded_again).expect("not enough buffer");
let encoded_again_uri =
UriReferenceStr::new(encoded_again_uri).expect("should be valid URI reference");
assert_eq!(encoded_again_uri, expected);
}
}
#[cfg(feature = "alloc")]
#[test]
fn iri_to_uri_allocated() {
for case in CASES.iter().copied() {
let expected = UriReferenceStr::new(case.uri).expect("should be valid URI reference");
let iri = IriReferenceStr::new(case.iri).expect("should be valid URI reference");
let encoded = iri.encode_to_uri().to_dedicated_string();
assert_eq!(encoded, expected);
let encoded_again = AsRef::<IriReferenceStr>::as_ref(&encoded)
.encode_to_uri()
.to_dedicated_string();
assert_eq!(encoded_again, expected);
}
}
#[cfg(feature = "alloc")]
#[test]
fn iri_to_uri_inline() {
for case in CASES.iter().copied() {
let expected = UriReferenceStr::new(case.uri).expect("should be valid URI reference");
let mut iri =
IriReferenceString::try_from(case.iri).expect("should be valid URI reference");
iri.encode_to_uri_inline();
assert_eq!(iri, expected);
iri.encode_to_uri_inline();
assert_eq!(
iri, expected,
"``encode_to_uri_inline()` method should be idempotent"
);
}
}

218
vendor/iri-string/tests/normalize.rs vendored Normal file
View File

@@ -0,0 +1,218 @@
//! Tests for normalization.
mod components;
#[macro_use]
mod utils;
#[cfg(feature = "alloc")]
use iri_string::format::ToDedicatedString;
use iri_string::types::*;
use self::components::TEST_CASES;
/// Semantically different IRIs should not be normalized into the same IRI.
#[test]
fn different_iris() {
for case in TEST_CASES
.iter()
.filter(|case| !case.different_iris.is_empty())
{
let normalized = IriStr::new(case.normalized_iri).expect("should be valid IRI reference");
for other in case.different_iris.iter().copied() {
let other = IriStr::new(other).expect("should be valid IRI reference");
assert_ne!(
normalized, other,
"<{}> should not be normalized to <{other}>, case={case:#?}",
case.composed
);
}
}
}
/// Normalization should work for IRI.
#[test]
fn normalize_uri() {
for case in TEST_CASES
.iter()
.filter(|case| case.is_uri_class() && case.is_absolute())
{
let source = UriStr::new(case.composed).expect("should be valid URI");
let normalized = source.normalize();
let expected = UriStr::new(case.normalized_uri).expect("should be valid URI");
assert_eq_display!(normalized, expected, "case={case:#?}");
#[cfg(feature = "alloc")]
assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}");
#[cfg(feature = "alloc")]
assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}");
assert_eq!(
case.is_rfc3986_normalizable(),
normalized.ensure_rfc3986_normalizable().is_ok(),
"case={case:#?}"
);
}
}
/// Normalization should work for IRI.
#[test]
fn normalize_iri() {
for case in TEST_CASES
.iter()
.filter(|case| case.is_iri_class() && case.is_absolute())
{
let source = IriStr::new(case.composed).expect("should be valid IRI");
let normalized = source.normalize();
let expected = IriStr::new(case.normalized_iri).expect("should be valid IRI");
assert_eq_display!(normalized, expected, "case={case:#?}");
#[cfg(feature = "alloc")]
assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}");
#[cfg(feature = "alloc")]
assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}");
assert_eq!(
case.is_rfc3986_normalizable(),
normalized.ensure_rfc3986_normalizable().is_ok(),
"case={case:#?}"
);
}
}
/// WHATWG-like normalization should work for IRI.
#[test]
fn normalize_uri_whatwg_like() {
for case in TEST_CASES
.iter()
.filter(|case| case.is_uri_class() && case.is_absolute())
{
let source = UriStr::new(case.composed).expect("should be valid URI");
let normalized = source.normalize_but_preserve_authorityless_relative_path();
let expected = UriStr::new(
case.normalized_uri_whatwg_like
.unwrap_or(case.normalized_uri),
)
.expect("should be valid URI");
assert_eq_display!(normalized, expected, "case={case:#?}");
#[cfg(feature = "alloc")]
assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}");
#[cfg(feature = "alloc")]
assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}");
assert_eq!(
case.is_rfc3986_normalizable(),
normalized.ensure_rfc3986_normalizable().is_ok(),
"case={case:#?}"
);
}
}
/// WHATWG-like normalization should work for IRI.
#[test]
fn normalize_iri_whatwg_like() {
for case in TEST_CASES
.iter()
.filter(|case| case.is_iri_class() && case.is_absolute())
{
let source = IriStr::new(case.composed).expect("should be valid IRI");
let normalized = source.normalize_but_preserve_authorityless_relative_path();
let expected = IriStr::new(
case.normalized_iri_whatwg_like
.unwrap_or(case.normalized_iri),
)
.expect("should be valid IRI");
assert_eq_display!(normalized, expected, "case={case:#?}");
#[cfg(feature = "alloc")]
assert_eq!(normalized.to_string(), expected.as_str(), "case={case:#?}");
#[cfg(feature = "alloc")]
assert_eq!(normalized.to_dedicated_string(), expected, "case={case:#?}");
assert_eq!(
case.is_rfc3986_normalizable(),
normalized.ensure_rfc3986_normalizable().is_ok(),
"case={case:#?}"
);
}
}
/// Normalization should be idempotent.
#[test]
fn normalize_idempotent() {
let mut buf = [0_u8; 512];
for case in TEST_CASES
.iter()
.filter(|case| case.is_iri_class() && case.is_absolute())
{
let source = IriStr::new(case.composed).expect("should be valid IRI");
let normalized = source.normalize();
let expected = IriStr::new(case.normalized_iri).expect("should be valid IRI");
let normalized_s =
iri_string::format::write_to_slice(&mut buf, &normalized).expect("not enough buffer");
let normalized_s = IriStr::new(normalized_s).expect("should be valid IRI reference");
// Normalize again.
let normalized_again = normalized_s.normalize();
assert_eq_display!(normalized_again, expected, "case={case:#?}");
}
}
/// Normalizedness checks.
#[test]
fn normalizedness() {
#[derive(Debug, Clone, Copy)]
struct Case {
iri: &'static str,
is_normalized_default: bool,
is_normalized_rfc3986: bool,
is_normalized_whatwg_like: bool,
}
const CASES: &[Case] = &[
Case {
iri: "scheme:/.//foo",
is_normalized_default: true,
is_normalized_rfc3986: false,
is_normalized_whatwg_like: true,
},
Case {
iri: "scheme:.///foo",
is_normalized_default: false,
is_normalized_rfc3986: false,
is_normalized_whatwg_like: true,
},
Case {
iri: "scheme://authority/.//foo",
is_normalized_default: false,
is_normalized_rfc3986: false,
is_normalized_whatwg_like: false,
},
Case {
iri: "scheme:relative/..//foo",
is_normalized_default: false,
is_normalized_rfc3986: false,
is_normalized_whatwg_like: true,
},
];
for case in CASES {
let iri = IriStr::new(case.iri).expect("should be valid IRI");
assert_eq!(
iri.is_normalized(),
case.is_normalized_default,
"case={case:?}"
);
assert_eq!(
iri.is_normalized_rfc3986(),
case.is_normalized_rfc3986,
"case={case:?}"
);
assert_eq!(
iri.is_normalized_but_authorityless_relative_path_preserved(),
case.is_normalized_whatwg_like,
"case={case:?}"
);
}
}

View File

@@ -0,0 +1,174 @@
//! Tests for percent encoding.
#[cfg(feature = "alloc")]
extern crate alloc;
#[macro_use]
mod utils;
#[cfg(all(feature = "alloc", not(feature = "std")))]
use alloc::string::ToString;
use iri_string::percent_encode::{PercentEncodedForIri, PercentEncodedForUri};
#[test]
fn regname_uri() {
let encoded = PercentEncodedForUri::from_reg_name("alpha.\u{03B1}.reg.name");
let expected = "alpha.%CE%B1.reg.name";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn regname_iri() {
let encoded = PercentEncodedForIri::from_reg_name("alpha.\u{03B1}.reg.name");
let expected = "alpha.\u{03B1}.reg.name";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn path_segment_uri() {
let encoded = PercentEncodedForUri::from_path_segment("\u{03B1}/<alpha>?#");
let expected = "%CE%B1%2F%3Calpha%3E%3F%23";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn path_segment_iri() {
let encoded = PercentEncodedForIri::from_path_segment("\u{03B1}/<alpha>?#");
let expected = "\u{03B1}%2F%3Calpha%3E%3F%23";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn path_uri() {
let encoded = PercentEncodedForUri::from_path("\u{03B1}/<alpha>?#");
let expected = "%CE%B1/%3Calpha%3E%3F%23";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn path_iri() {
let encoded = PercentEncodedForIri::from_path("\u{03B1}/<alpha>?#");
let expected = "\u{03B1}/%3Calpha%3E%3F%23";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn query_uri() {
let encoded = PercentEncodedForUri::from_query("\u{03B1}/<alpha>?#");
let expected = "%CE%B1/%3Calpha%3E?%23";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn query_iri() {
let encoded = PercentEncodedForIri::from_query("\u{03B1}/<alpha>?#");
let expected = "\u{03B1}/%3Calpha%3E?%23";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn fragment_uri() {
let encoded = PercentEncodedForUri::from_fragment("\u{03B1}/<alpha>?#");
let expected = "%CE%B1/%3Calpha%3E?%23";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn fragment_iri() {
let encoded = PercentEncodedForIri::from_fragment("\u{03B1}/<alpha>?#");
let expected = "\u{03B1}/%3Calpha%3E?%23";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn unreserve_uri_unreserved() {
let encoded = PercentEncodedForUri::unreserve("%a0-._~\u{03B1}");
let expected = "%25a0-._~%CE%B1";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn unreserve_iri_unreserved() {
let encoded = PercentEncodedForIri::unreserve("%a0-._~\u{03B1}");
let expected = "%25a0-._~\u{03B1}";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn unreserve_uri_reserved() {
let encoded = PercentEncodedForUri::unreserve(":/?#[]@ !$&'()*+,;=");
let expected = "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn unreserve_iri_reserved() {
let encoded = PercentEncodedForIri::unreserve(":/?#[]@ !$&'()*+,;=");
let expected = "%3A%2F%3F%23%5B%5D%40%20%21%24%26%27%28%29%2A%2B%2C%3B%3D";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn characters_uri_unreserved() {
let encoded = PercentEncodedForUri::characters("%a0-._~\u{03B1}");
let expected = "%25a0-._~%CE%B1";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn characters_iri_unreserved() {
let encoded = PercentEncodedForIri::characters("%a0-._~\u{03B1}");
let expected = "%25a0-._~\u{03B1}";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn characters_uri_reserved() {
let encoded = PercentEncodedForUri::characters(":/?#[]@ !$&'()*+,;=");
let expected = ":/?#[]@%20!$&'()*+,;=";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}
#[test]
fn characters_iri_reserved() {
let encoded = PercentEncodedForIri::characters(":/?#[]@ !$&'()*+,;=");
let expected = ":/?#[]@%20!$&'()*+,;=";
assert_eq_display!(encoded, expected);
#[cfg(feature = "alloc")]
assert_eq!(encoded.to_string(), expected);
}

473
vendor/iri-string/tests/resolve.rs vendored Normal file
View File

@@ -0,0 +1,473 @@
//! Tests for IRI resolution.
mod components;
#[macro_use]
mod utils;
mod resolve_refimpl;
use iri_string::format::write_to_slice;
#[cfg(feature = "alloc")]
use iri_string::format::ToDedicatedString;
use iri_string::resolve::FixedBaseResolver;
use iri_string::types::*;
#[cfg(feature = "alloc")]
use self::resolve_refimpl::resolve as resolve_refimpl;
/// Test cases for strict resolvers.
// [(base, [(reference, output, Option<output_normalized>)])]
#[allow(clippy::type_complexity)]
const TEST_CASES: &[(&str, &[(&str, &str, Option<&str>)])] = &[
// RFC 3986, section 5.2.4.
("scheme:///a/b/c/./../../", &[("g", "scheme:///a/g", None)]),
("scheme:///a/b/c/./../", &[("../g", "scheme:///a/g", None)]),
("scheme:///a/b/c/./", &[("../../g", "scheme:///a/g", None)]),
("scheme:///a/b/c/", &[("./../../g", "scheme:///a/g", None)]),
("scheme:///a/b/", &[("c/./../../g", "scheme:///a/g", None)]),
("scheme:///a/", &[("b/c/./../../g", "scheme:///a/g", None)]),
("scheme:///", &[("a/b/c/./../../g", "scheme:///a/g", None)]),
("scheme:mid/content=5/../", &[("6", "scheme:mid/6", None)]),
("scheme:mid/content=5/", &[("../6", "scheme:mid/6", None)]),
("scheme:mid/", &[("content=5/../6", "scheme:mid/6", None)]),
("scheme:", &[("mid/content=5/../6", "scheme:mid/6", None)]),
// RFC 3986, section 5.4.1.
(
"http://a/b/c/d;p?q",
&[
("g:h", "g:h", None),
("g", "http://a/b/c/g", None),
("./g", "http://a/b/c/g", None),
("g/", "http://a/b/c/g/", None),
("/g", "http://a/g", None),
("//g", "http://g", None),
("?y", "http://a/b/c/d;p?y", None),
("g?y", "http://a/b/c/g?y", None),
("#s", "http://a/b/c/d;p?q#s", None),
("g#s", "http://a/b/c/g#s", None),
("g?y#s", "http://a/b/c/g?y#s", None),
(";x", "http://a/b/c/;x", None),
("g;x", "http://a/b/c/g;x", None),
("g;x?y#s", "http://a/b/c/g;x?y#s", None),
("", "http://a/b/c/d;p?q", None),
(".", "http://a/b/c/", None),
("./", "http://a/b/c/", None),
("..", "http://a/b/", None),
("../", "http://a/b/", None),
("../g", "http://a/b/g", None),
("../..", "http://a/", None),
("../../", "http://a/", None),
("../../g", "http://a/g", None),
],
),
// RFC 3986, section 5.4.2.
(
"http://a/b/c/d;p?q",
&[
("../../../g", "http://a/g", None),
("../../../../g", "http://a/g", None),
("/./g", "http://a/g", None),
("/../g", "http://a/g", None),
("g.", "http://a/b/c/g.", None),
(".g", "http://a/b/c/.g", None),
("g..", "http://a/b/c/g..", None),
("..g", "http://a/b/c/..g", None),
("./../g", "http://a/b/g", None),
("./g/.", "http://a/b/c/g/", None),
("g/./h", "http://a/b/c/g/h", None),
("g/../h", "http://a/b/c/h", None),
("g;x=1/./y", "http://a/b/c/g;x=1/y", None),
("g;x=1/../y", "http://a/b/c/y", None),
("g?y/./x", "http://a/b/c/g?y/./x", None),
("g?y/../x", "http://a/b/c/g?y/../x", None),
("g#s/./x", "http://a/b/c/g#s/./x", None),
("g#s/../x", "http://a/b/c/g#s/../x", None),
("http:g", "http:g", None),
],
),
// Custom cases.
(
"http://a/b/c/d/e/../..",
&[
// `/a/b/c/d/e/../..` but without dot segments removal.
("", "http://a/b/c/d/e/../..", Some("http://a/b/c/")),
// `/a/b/c/d/e/../..`
("..", "http://a/b/c/", None),
// `/a/b/c/d/e/../../`
("../", "http://a/b/c/", None),
// `/a/b/c/d/e/../.`
(".", "http://a/b/c/d/", None),
// `/a/b/c/d/e/.././`
("./", "http://a/b/c/d/", None),
// `/a/b/c/d/e/../..?query` but without dot segments removal.
(
"?query",
"http://a/b/c/d/e/../..?query",
Some("http://a/b/c/?query"),
),
// `/a/b/c/d/e/../..#frag` but without dot segments removal.
(
"#frag",
"http://a/b/c/d/e/../..#frag",
Some("http://a/b/c/#frag"),
),
// If the authority is specified, paths won't be merged.
("http://example.com", "http://example.com", None),
("http://example.com/", "http://example.com/", None),
// If the path of the reference is not empty, remove_dot_segments is applied.
("http://example.com/..", "http://example.com/", None),
// If the scheme is specified, paths won't be merged.
("scheme:", "scheme:", None),
("scheme:foo#frag", "scheme:foo#frag", None),
],
),
// Custom cases.
(
"https://a/b/c",
&[
("", "https://a/b/c", None),
("x/", "https://a/b/x/", None),
("x//", "https://a/b/x//", None),
("x///", "https://a/b/x///", None),
("x//y", "https://a/b/x//y", None),
("x//y/", "https://a/b/x//y/", None),
("x//y//", "https://a/b/x//y//", None),
// `/b/x//..//y//`.
// STEP OUTPUT BUFFER INPUT BUFFER
// 1 : /b/x//..//y//
// 2E: /b /x//..//y//
// 2E: /b/x //..//y//
// 2E: /b/x/ /..//y//
// 2C: /b/x //y//
// 2E: /b/x/ /y//
// 2E: /b/x//y //
// 2E: /b/x//y/ /
// 2E: /b/x//y//
("x//..//y//", "https://a/b/x//y//", None),
],
),
// Custom cases.
(
"scheme:a/b/c",
&[
("", "scheme:a/b/c", None),
("x/", "scheme:a/b/x/", None),
("x//", "scheme:a/b/x//", None),
("x///", "scheme:a/b/x///", None),
("x//y", "scheme:a/b/x//y", None),
("x//y/", "scheme:a/b/x//y/", None),
// `a/b/x//..//y//`.
// STEP OUTPUT BUFFER INPUT BUFFER
// 1 : a/b/x//..//y//
// 2E: a /b/x//..//y//
// 2E: a/b /x//..//y//
// 2E: a/b/x //..//y//
// 2E: a/b/x/ /..//y//
// 2C: a/b/x //y//
// 2E: a/b/x/ /y//
// 2E: a/b/x//y //
// 2E: a/b/x//y/ /
// 2E: a/b/x//y//
("x//..//y//", "scheme:a/b/x//y//", None),
],
),
// Custom cases.
(
"scheme:a",
&[
// `x/../..`.
// STEP OUTPUT BUFFER INPUT BUFFER
// 1 : x/../..
// 2E: x /../..
// 2C: /..
// 2C: /
// 2E: /
("x/../..", "scheme:/", None),
// `x/../../y`.
// STEP OUTPUT BUFFER INPUT BUFFER
// 1 : x/../../y
// 2E: x /../../y
// 2C: /../y
// 2C: /y
// 2E: /y
("x/../../y", "scheme:/y", None),
],
),
// Custom cases.
// Empty base path should be considered as `/` when the base authority is present.
(
"scheme://host",
&[
("", "scheme://host", None),
(".", "scheme://host/", None),
("..", "scheme://host/", None),
("foo", "scheme://host/foo", None),
],
),
// Custom cases.
(
"HTTP://USER:PASS@EXAMPLE.COM:80/1/2/3/4/.././5/../6/?QUERY",
&[(
"A/b/c/d/e/f/g/h/i/../../../j/k/l/../../../../m/n/./o",
"HTTP://USER:PASS@EXAMPLE.COM:80/1/2/3/6/A/b/c/d/e/m/n/o",
Some("http://USER:PASS@example.com:80/1/2/3/6/A/b/c/d/e/m/n/o"),
)],
),
(
"HTTP://USER:PASS@EXAMPLE.COM:/%7e/2/beta=%CE%B2/4/.././5/../6/",
&[(
"a/b/alpha=%CE%B1/d/e/f/g/h/i/../../../j/k/l/../../../../%3c/%7e/./%3e?query#fragment",
"HTTP://USER:PASS@EXAMPLE.COM:/%7e/2/beta=%CE%B2/6/a/b/alpha=%CE%B1/d/e/%3c/%7e/%3e?query#fragment",
Some("http://USER:PASS@example.com/~/2/beta=\u{03B2}/6/a/b/alpha=\u{03B1}/d/e/%3C/~/%3E?query#fragment")
)],
),
(
"http://user:pass@example.com:/%7e/2/beta=%ce%b2/4/.././5/../6/",
&[(
"a/b/alpha=%ce%b1/d/e/f/g/h/i/../../../j/k/l/../../../../%3c/%7e/./%3e?query#fragment",
"http://user:pass@example.com:/%7e/2/beta=%ce%b2/6/a/b/alpha=%ce%b1/d/e/%3c/%7e/%3e?query#fragment",
Some("http://user:pass@example.com/~/2/beta=\u{03B2}/6/a/b/alpha=\u{03B1}/d/e/%3C/~/%3E?query#fragment")
)],
),
];
#[test]
fn resolve() {
for (base, pairs) in TEST_CASES {
let base = IriAbsoluteStr::new(base).expect("should be valid base IRI");
for (target, expected, _normalized_expected) in *pairs {
let target = IriReferenceStr::new(target).expect("should be valid IRI reference");
let resolved = target.resolve_against(base);
assert_eq_display!(resolved, expected, "base={base:?}, target={target:?}");
#[cfg(feature = "alloc")]
assert_eq!(
resolved.to_dedicated_string().as_str(),
*expected,
"base={base:?}, target={target:?}"
);
}
}
}
#[test]
fn resolve_normalize() {
for (base, pairs) in TEST_CASES {
let base = IriAbsoluteStr::new(base).expect("should be valid base IRI");
for (target, expected, expected_normalized) in *pairs {
let target = IriReferenceStr::new(target).expect("should be valid IRI reference");
let resolved_normalized = target.resolve_against(base).and_normalize();
let expected = expected_normalized.unwrap_or(*expected);
assert_eq_display!(
resolved_normalized,
expected,
"base={base:?}, target={target:?}"
);
#[cfg(feature = "alloc")]
assert_eq!(
resolved_normalized.to_dedicated_string().as_str(),
expected,
"base={base:?}, target={target:?}"
);
}
}
}
#[test]
fn fixed_base_resolver() {
for (base, pairs) in TEST_CASES {
let base = IriAbsoluteStr::new(base).expect("should be valid base IRI");
let resolver = FixedBaseResolver::new(base);
for (target, expected, _normalized_expected) in *pairs {
let target = IriReferenceStr::new(target).expect("should be valid IRI reference");
let resolved = resolver.resolve(target);
assert_eq_display!(resolved, expected, "base={base:?}, target={target:?}");
#[cfg(feature = "alloc")]
assert_eq!(
resolved.to_dedicated_string().as_str(),
*expected,
"base={base:?}, target={target:?}"
);
}
}
}
#[cfg(feature = "alloc")]
#[test]
fn same_result_as_reference_impl() {
for (base, pairs) in TEST_CASES {
let base = IriAbsoluteStr::new(base).expect("should be valid base IRI");
for (target, expected, _normalized_expected) in *pairs {
let target = IriReferenceStr::new(target).expect("should be valid IRI reference");
let resolved = target.resolve_against(base).to_dedicated_string();
let expected_refimpl = resolve_refimpl(target, base);
assert_eq!(
*expected, expected_refimpl,
"base={base:?}, target={target:?}"
);
assert_eq!(
resolved, expected_refimpl,
"base={base:?}, target={target:?}"
);
}
}
}
#[test]
fn percent_encoded_dots() {
// [(base, ref, result)]
const TEST_CASES: &[(&str, &str, &str)] = &[
("scheme:", ".", "scheme:"),
("scheme:", "%2e", "scheme:"),
("scheme:", "%2E", "scheme:"),
("scheme://a", ".", "scheme://a/"),
("scheme://a", "%2e", "scheme://a/"),
("scheme://a", "%2E", "scheme://a/"),
("scheme://a/b/c", ".", "scheme://a/b/"),
("scheme://a/b/c", "%2e", "scheme://a/b/"),
("scheme://a/b/c", "%2E", "scheme://a/b/"),
("scheme://a/b/c", "./g", "scheme://a/b/g"),
("scheme://a/b/c", "%2e/g", "scheme://a/b/g"),
("scheme://a/b/c", "%2E/g", "scheme://a/b/g"),
("scheme://a/b/c/d/e/f", "../../../g", "scheme://a/b/g"),
(
"scheme://a/b/c/d/e/f",
"%2E%2E/%2E%2e/%2E./g",
"scheme://a/b/g",
),
(
"scheme://a/b/c/d/e/f",
"%2e%2E/%2e%2e/%2e./g",
"scheme://a/b/g",
),
("scheme://a/b/c/d/e/f", ".%2E/.%2e/../g", "scheme://a/b/g"),
];
for (base, reference, expected) in TEST_CASES {
let base = IriAbsoluteStr::new(base).expect("should be valid base IRI");
let reference = IriReferenceStr::new(reference).expect("should be valid IRI reference");
let resolved = reference.resolve_against(base);
assert_eq_display!(resolved, *expected);
#[cfg(feature = "alloc")]
assert_eq!(resolved.to_dedicated_string(), *expected);
}
}
#[test]
fn write_to_slice_dont_require_extra_capacity() {
let mut buf = [0_u8; 128];
for (base, pairs) in TEST_CASES {
let base = IriAbsoluteStr::new(base).expect("should be valid base IRI");
let resolver = FixedBaseResolver::new(base);
for (target, expected, _normalized_expected) in *pairs {
if expected.is_empty() {
continue;
}
let target = IriReferenceStr::new(target).expect("should be valid IRI reference");
let resolved = resolver.resolve(target);
let result_small = write_to_slice(&mut buf[..expected.len() - 1], &resolved);
assert!(result_small.is_err(), "should fail due to too small buffer");
let result_enough = write_to_slice(&mut buf[..expected.len()], &resolved);
assert!(result_enough.is_ok(), "buffer should have enough size");
assert_eq!(
result_enough.unwrap(),
*expected,
"correct result should be written"
);
}
}
}
#[test]
fn resolution_result_live_longer_than_fixed_base_resolver() {
let mut buf = [0_u8; 128];
let base = IriAbsoluteStr::new("http://example.com/").expect("should be valid base IRI");
let reference = IriReferenceStr::new("foo/bar").expect("should be valid IRI reference");
let resolved = {
let resolver = FixedBaseResolver::new(base);
resolver.resolve(reference)
};
// Note that `the result of `resolver.resolve()` is still alive here.
let result = write_to_slice(&mut buf, &resolved).expect("`buf` should have enough capacity");
assert_eq!(result, "http://example.com/foo/bar");
}
#[test]
fn uri_resolution_against_self_with_normalization() {
for case in components::TEST_CASES
.iter()
.filter(|case| case.is_uri_class() && case.is_absolute())
{
let reference = UriStr::new(case.composed).expect("should be valid URI");
let resolved_normalized = AsRef::<UriReferenceStr>::as_ref(reference)
.resolve_against(reference.to_absolute())
.and_normalize();
assert_eq_display!(resolved_normalized, case.normalized_uri, "case={case:#?}");
#[cfg(feature = "alloc")]
assert_eq!(
resolved_normalized.to_string(),
case.normalized_uri,
"case={case:#?}"
);
#[cfg(feature = "alloc")]
assert_eq!(
resolved_normalized.to_dedicated_string(),
case.normalized_uri,
"case={case:#?}"
);
assert_eq!(
case.is_rfc3986_normalizable(),
resolved_normalized.ensure_rfc3986_normalizable().is_ok(),
"case={case:#?}"
);
}
}
#[test]
fn iri_resolution_against_self_with_normalization() {
for case in components::TEST_CASES
.iter()
.filter(|case| case.is_iri_class() && case.is_absolute())
{
let reference = IriStr::new(case.composed).expect("should be valid IRI");
let resolved_normalized = AsRef::<IriReferenceStr>::as_ref(reference)
.resolve_against(reference.to_absolute())
.and_normalize();
assert_eq_display!(resolved_normalized, case.normalized_iri, "case={case:#?}");
#[cfg(feature = "alloc")]
assert_eq!(
resolved_normalized.to_string(),
case.normalized_iri,
"case={case:#?}"
);
#[cfg(feature = "alloc")]
assert_eq!(
resolved_normalized.to_dedicated_string(),
case.normalized_iri,
"case={case:#?}"
);
assert_eq!(
case.is_rfc3986_normalizable(),
resolved_normalized.ensure_rfc3986_normalizable().is_ok(),
"case={case:#?}"
);
}
}

View File

@@ -0,0 +1,179 @@
//! Reference implementation based on RFC 3986 section 5.
#![cfg(feature = "alloc")]
extern crate alloc;
use alloc::format;
#[cfg(not(feature = "std"))]
use alloc::string::String;
use iri_string::spec::Spec;
use iri_string::types::{RiAbsoluteStr, RiReferenceStr, RiString};
fn to_major_components<S: Spec>(
s: &RiReferenceStr<S>,
) -> (Option<&str>, Option<&str>, &str, Option<&str>, Option<&str>) {
(
s.scheme_str(),
s.authority_str(),
s.path_str(),
s.query().map(|s| s.as_str()),
s.fragment().map(|s| s.as_str()),
)
}
/// Resolves the relative IRI.
///
/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.2.2>.
pub(super) fn resolve<S: Spec>(
reference: &RiReferenceStr<S>,
base: &RiAbsoluteStr<S>,
) -> RiString<S> {
let (r_scheme, r_authority, r_path, r_query, r_fragment) = to_major_components(reference);
let (b_scheme, b_authority, b_path, b_query, _) = to_major_components(base.as_ref());
let t_scheme: &str;
let t_authority: Option<&str>;
let t_path: String;
let t_query: Option<&str>;
if let Some(r_scheme) = r_scheme {
t_scheme = r_scheme;
t_authority = r_authority;
t_path = remove_dot_segments(r_path.into());
t_query = r_query;
} else {
if r_authority.is_some() {
t_authority = r_authority;
t_path = remove_dot_segments(r_path.into());
t_query = r_query;
} else {
if r_path.is_empty() {
t_path = b_path.into();
if r_query.is_some() {
t_query = r_query;
} else {
t_query = b_query;
}
} else {
if r_path.starts_with('/') {
t_path = remove_dot_segments(r_path.into());
} else {
t_path = remove_dot_segments(merge(b_path, r_path, b_authority.is_some()));
}
t_query = r_query;
}
t_authority = b_authority;
}
t_scheme = b_scheme.expect("non-relative IRI must have a scheme");
}
let t_fragment: Option<&str> = r_fragment;
let s = recompose(t_scheme, t_authority, &t_path, t_query, t_fragment);
RiString::<S>::try_from(s).expect("resolution result must be a valid IRI")
}
/// Merges the two paths.
///
/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.2.3>.
fn merge(base_path: &str, ref_path: &str, base_authority_defined: bool) -> String {
if base_authority_defined && base_path.is_empty() {
format!("/{}", ref_path)
} else {
let base_path_end = base_path.rfind('/').map_or(0, |s| s + 1);
format!("{}{}", &base_path[..base_path_end], ref_path)
}
}
/// Removes dot segments from the path.
///
/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.2.4>.
fn remove_dot_segments(mut input: String) -> String {
let mut output = String::new();
while !input.is_empty() {
if input.starts_with("../") {
// 2A.
input.drain(..3);
} else if input.starts_with("./") {
// 2A.
input.drain(..2);
} else if input.starts_with("/./") {
// 2B.
input.replace_range(..3, "/");
} else if input == "/." {
// 2B.
input.replace_range(..2, "/");
} else if input.starts_with("/../") {
// 2C.
input.replace_range(..4, "/");
remove_last_segment_and_preceding_slash(&mut output);
} else if input == "/.." {
// 2C.
input.replace_range(..3, "/");
remove_last_segment_and_preceding_slash(&mut output);
} else if input == "." {
// 2D.
input.drain(..1);
} else if input == ".." {
// 2D.
input.drain(..2);
} else {
// 2E.
let first_seg_end = if let Some(after_slash) = input.strip_prefix('/') {
// `+1` is the length of the initial slash.
after_slash
.find('/')
.map_or_else(|| input.len(), |pos| pos + 1)
} else {
input.find('/').unwrap_or(input.len())
};
output.extend(input.drain(..first_seg_end));
}
}
output
}
/// Removes the last path segment and the preceding slash if any.
///
/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.2.4>,
/// step 2C.
fn remove_last_segment_and_preceding_slash(output: &mut String) {
match output.rfind('/') {
Some(slash_pos) => {
output.drain(slash_pos..);
}
None => output.clear(),
}
}
/// Recomposes the components.
///
/// See <https://www.rfc-editor.org/rfc/rfc3986.html#section-5.3>.
fn recompose(
scheme: &str,
authority: Option<&str>,
path: &str,
query: Option<&str>,
fragment: Option<&str>,
) -> String {
let mut result = String::new();
result.push_str(scheme);
result.push(':');
if let Some(authority) = authority {
result.push_str("//");
result.push_str(authority);
}
result.push_str(path);
if let Some(query) = query {
result.push('?');
result.push_str(query);
}
if let Some(fragment) = fragment {
result.push('#');
result.push_str(fragment);
}
result
}

99
vendor/iri-string/tests/serde.rs vendored Normal file
View File

@@ -0,0 +1,99 @@
//! Serde test.
#![cfg(feature = "serde")]
use serde_test::{assert_tokens, Token};
use iri_string::types::*;
mod utils;
macro_rules! define_tests {
($positive:ident, $negative:ident, ($spec:ident, $kind:ident), $slice:ty, $owned:ty,) => {
define_tests! {
@positive,
$positive,
($spec, $kind),
$slice,
$owned,
}
};
(@positive, $name:ident, ($spec:ident, $kind:ident), $slice:ty, $owned:ty,) => {
#[test]
fn $name() {
for raw in utils::positive(utils::Spec::$spec, utils::Kind::$kind) {
let s = <$slice>::new(raw).expect("Should not fail: valid string");
assert_tokens(&s, &[Token::BorrowedStr(raw)]);
#[cfg(all(feature = "serde", feature = "alloc"))]
{
let s = s.to_owned();
assert_tokens(&s, &[Token::BorrowedStr(raw)]);
}
}
}
};
}
define_tests! {
uri,
not_uri,
(Uri, Normal),
UriStr,
UriString,
}
define_tests! {
uri_absolute,
not_uri_absolute,
(Uri, Absolute),
UriAbsoluteStr,
UriAbsoluteString,
}
define_tests! {
uri_reference,
not_uri_reference,
(Uri, Reference),
UriReferenceStr,
UriReferenceString,
}
define_tests! {
uri_relative,
not_uri_relative,
(Uri, Relative),
UriRelativeStr,
UriRelativeString,
}
define_tests! {
iri,
not_iri,
(Iri, Normal),
IriStr,
IriString,
}
define_tests! {
iri_absolute,
not_iri_absolute,
(Iri, Absolute),
IriAbsoluteStr,
IriAbsoluteString,
}
define_tests! {
iri_reference,
not_iri_reference,
(Iri, Reference),
IriReferenceStr,
IriReferenceString,
}
define_tests! {
iri_relative,
not_iri_relative,
(Iri, Relative),
IriRelativeStr,
IriRelativeString,
}

View File

@@ -0,0 +1,342 @@
//! Conversions between types.
use iri_string::types::*;
fn assert_convertible<T>(source: &str)
where
T: ?Sized + PartialEq<str> + core::fmt::Debug,
for<'a> &'a T: TryFrom<&'a str>,
for<'a> <&'a T as TryFrom<&'a str>>::Error: core::fmt::Debug,
{
match <&T>::try_from(source) {
Ok(parsed) => assert_eq!(parsed, source),
Err(e) => panic!("should be convertible: source={:?}: {:?}", source, e),
}
}
fn assert_non_convertible<T>(source: &str)
where
T: ?Sized + PartialEq<str> + core::fmt::Debug,
for<'a> &'a T: TryFrom<&'a str>,
for<'a> <&'a T as TryFrom<&'a str>>::Error: core::fmt::Debug,
{
if let Ok(parsed) = <&T>::try_from(source) {
panic!(
"should not be convertible: source={:?}, parsed={:?}",
source, parsed
);
}
}
#[test]
fn rfc3986_uris_absolute_without_fragment() {
const URIS: &[&str] = &[
// RFC 3986 itself.
"https://www.rfc-editor.org/rfc/rfc3986.html",
"https://datatracker.ietf.org/doc/html/rfc3986",
// RFC 3986 section 1.1.2.
"ftp://ftp.is.co.za/rfc/rfc1808.txt",
"http://www.ietf.org/rfc/rfc2396.txt",
"ldap://[2001:db8::7]/c=GB?objectClass?one",
"mailto:John.Doe@example.com",
"news:comp.infosystems.www.servers.unix",
"tel:+1-816-555-1212",
"telnet://192.0.2.16:80/",
"urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
// RFC 3986 section 3.
"urn:example:animal:ferret:nose",
// RFC 3986 section 3.3.
"mailto:fred@example.com",
"foo://info.example.com?fred",
// RFC 3986 section 5.4.
"http://a/b/c/d;p?q",
// RFC 3986 section 5.4.1.
"g:h",
"http://a/b/c/g",
"http://a/b/c/g/",
"http://a/g",
"http://g",
"http://a/b/c/d;p?y",
"http://a/b/c/g?y",
"http://a/b/c/;x",
"http://a/b/c/g;x",
"http://a/b/c/d;p?q",
"http://a/b/c/",
"http://a/b/",
"http://a/b/g",
"http://a/",
// RFC 3986 section 5.4.2.
"http://a/b/c/g.",
"http://a/b/c/.g",
"http://a/b/c/g..",
"http://a/b/c/..g",
"http://a/b/c/g/h",
"http://a/b/c/h",
"http://a/b/c/g;x=1/y",
"http://a/b/c/y",
"http://a/b/c/g?y/./x",
"http://a/b/c/g?y/../x",
// RFC 3986 section 6.2.2.
"example://a/b/c/%7Bfoo%7D",
"eXAMPLE://a/./b/../b/%63/%7bfoo%7d",
// RFC 3986 section 6.2.2.1.
"HTTP://www.EXAMPLE.com/",
"http://www.example.com/",
// RFC 3986 section 6.2.3.
"http://example.com",
"http://example.com/",
"http://example.com:/",
"http://example.com:80/",
"http://example.com/?",
"mailto:Joe@Example.COM",
"mailto:Joe@example.com",
// RFC 3986 section 6.2.4.
"http://example.com/data",
"http://example.com/data/",
"ftp://cnn.example.com&story=breaking_news@10.0.0.1/top_story.htm",
// RFC 3986 section Appendix C.
"http://www.w3.org/Addressing/",
"ftp://foo.example.com/rfc/",
// RFC 3987 itself.
"https://www.rfc-editor.org/rfc/rfc3987.html",
"https://datatracker.ietf.org/doc/html/rfc3987",
// RFC 3987 section 3.1.
"http://xn--rsum-bpad.example.org",
"http://r%C3%A9sum%C3%A9.example.org",
// RFC 3987 section 3.2.
"http://example.com/%F0%90%8C%80%F0%90%8C%81%F0%90%8C%82",
// RFC 3987 section 3.2.1.
"http://www.example.org/r%C3%A9sum%C3%A9.html",
"http://www.example.org/r%E9sum%E9.html",
"http://www.example.org/D%C3%BCrst",
"http://www.example.org/D%FCrst",
"http://xn--99zt52a.example.org/%e2%80%ae",
"http://xn--99zt52a.example.org/%E2%80%AE",
// RFC 3987 section 4.4.
"http://ab.CDEFGH.ij/kl/mn/op.html",
"http://ab.CDE.FGH/ij/kl/mn/op.html",
"http://AB.CD.ef/gh/IJ/KL.html",
"http://ab.cd.EF/GH/ij/kl.html",
"http://ab.CD.EF/GH/IJ/kl.html",
"http://ab.CDE123FGH.ij/kl/mn/op.html",
"http://ab.cd.ef/GH1/2IJ/KL.html",
"http://ab.cd.ef/GH%31/%32IJ/KL.html",
"http://ab.CDEFGH.123/kl/mn/op.html",
// RFC 3987 section 5.3.2.
"eXAMPLE://a/./b/../b/%63/%7bfoo%7d/ros%C3%A9",
// RFC 3987 section 5.3.2.1.
"HTTP://www.EXAMPLE.com/",
"http://www.example.com/",
// RFC 3987 section 5.3.2.3.
"http://example.org/~user",
"http://example.org/%7euser",
"http://example.org/%7Euser",
// RFC 3987 section 5.3.3.
"http://example.com",
"http://example.com/",
"http://example.com:/",
"http://example.com:80/",
//"http://xn--rsum-bpad.example.org", // duplicate
// RFC 3987 section 5.3.4.
"http://example.com/data",
"http://example.com/data/",
// RFC 3987 section 6.4.
//"http://www.example.org/r%C3%A9sum%C3%A9.html", // duplicate
//"http://www.example.org/r%E9sum%E9.html", // duplicate
];
for uri in URIS {
assert_convertible::<IriReferenceStr>(uri);
assert_convertible::<UriReferenceStr>(uri);
assert_convertible::<IriStr>(uri);
assert_convertible::<UriStr>(uri);
assert_convertible::<IriAbsoluteStr>(uri);
assert_convertible::<UriAbsoluteStr>(uri);
assert_non_convertible::<IriRelativeStr>(uri);
assert_non_convertible::<UriRelativeStr>(uri);
}
}
#[test]
fn rfc3986_uris_absolute_with_fragment() {
const URIS: &[&str] = &[
// RFC 3986 section 3.
"foo://example.com:8042/over/there?name=ferret#nose",
// RFC 3986 section 5.4.1.
"http://a/b/c/d;p?q#s",
"http://a/b/c/g#s",
"http://a/b/c/g?y#s",
"http://a/b/c/g;x?y#s",
// RFC 3986 section 5.4.2.
"http://a/b/c/g#s/./x",
"http://a/b/c/g#s/../x",
// RFC 3986 section Appendix B.
"http://www.ics.uci.edu/pub/ietf/uri/#Related",
// RFC 3986 section Appendix C.
"http://www.ics.uci.edu/pub/ietf/uri/historical.html#WARNING",
// RFC 3987 section 3.1.
"http://www.example.org/red%09ros%C3%A9#red",
// RFC 3987 section 4.4.
"http://AB.CD.EF/GH/IJ/KL?MN=OP;QR=ST#UV",
];
for uri in URIS {
assert_convertible::<IriReferenceStr>(uri);
assert_convertible::<UriReferenceStr>(uri);
assert_convertible::<IriStr>(uri);
assert_convertible::<UriStr>(uri);
assert_non_convertible::<IriAbsoluteStr>(uri);
assert_non_convertible::<UriAbsoluteStr>(uri);
assert_non_convertible::<IriRelativeStr>(uri);
assert_non_convertible::<UriRelativeStr>(uri);
}
}
#[test]
fn rfc3986_uris_relative() {
const URIS: &[&str] = &[
// RFC 3986 section 5.4.1.
"g",
"./g",
"g/",
"/g",
"//g",
"?y",
"g?y",
"#s",
"g#s",
"g?y#s",
";x",
"g;x",
"g;x?y#s",
"",
".",
"./",
"..",
"../",
"../g",
"../..",
"../../",
"../../g",
// RFC 3986 section 5.4.2.
"/./g",
"/../g",
"g.",
".g",
"g..",
"..g",
"./../g",
"./g/.",
"g/./h",
"g/../h",
"g;x=1/./y",
"g;x=1/../y",
"g?y/./x",
"g?y/../x",
"g#s/./x",
"g#s/../x",
];
for uri in URIS {
assert_convertible::<IriReferenceStr>(uri);
assert_convertible::<UriReferenceStr>(uri);
assert_non_convertible::<IriStr>(uri);
assert_non_convertible::<UriStr>(uri);
assert_non_convertible::<IriAbsoluteStr>(uri);
assert_non_convertible::<UriAbsoluteStr>(uri);
assert_convertible::<IriRelativeStr>(uri);
assert_convertible::<UriRelativeStr>(uri);
}
}
#[test]
fn rfc3987_iris_absolute_without_fragment() {
const URIS: &[&str] = &[
// RFC 3987 section 3.1.
"http://r\u{E9}sum\u{E9}.example.org",
// RFC 3987 section 3.2.
"http://example.com/\u{10300}\u{10301}\u{10302}",
"http://www.example.org/D\u{FC}rst",
"http://\u{7D0D}\u{8C46}.example.org/%E2%80%AE",
// RFC 3987 section 5.2.
"http://example.org/ros\u{E9}",
// RFC 3987 section 5.3.2.
"example://a/b/c/%7Bfoo%7D/ros\u{E9}",
// RFC 3987 section 5.3.2.2.
"http://www.example.org/r\u{E9}sum\u{E9}.html",
"http://www.example.org/re\u{301}sume\u{301}.html",
// RFC 3987 section 5.3.3.
//"http://r\u{E9}sum\u{E9}.example.org", // duplicate
// RFC 3987 section 6.4.
//"http://www.example.org/r\u{E9}sum\u{E9}.html", // duplicate
];
for uri in URIS {
assert_convertible::<IriReferenceStr>(uri);
assert_non_convertible::<UriReferenceStr>(uri);
assert_convertible::<IriStr>(uri);
assert_non_convertible::<UriStr>(uri);
assert_convertible::<IriAbsoluteStr>(uri);
assert_non_convertible::<UriAbsoluteStr>(uri);
assert_non_convertible::<IriRelativeStr>(uri);
assert_non_convertible::<UriRelativeStr>(uri);
}
}
#[test]
fn rfc3987_iris_absolute_with_fragment() {
const URIS: &[&str] = &[
// RFC 3987 section 6.4.
"http://www.example.org/r%E9sum%E9.xml#r\u{E9}sum\u{E9}",
];
for uri in URIS {
assert_convertible::<IriReferenceStr>(uri);
assert_non_convertible::<UriReferenceStr>(uri);
assert_convertible::<IriStr>(uri);
assert_non_convertible::<UriStr>(uri);
assert_non_convertible::<IriAbsoluteStr>(uri);
assert_non_convertible::<UriAbsoluteStr>(uri);
assert_non_convertible::<IriRelativeStr>(uri);
assert_non_convertible::<UriRelativeStr>(uri);
}
}
#[test]
fn test_invalid_char() {
const URIS: &[&str] = &[
"##", // Fragment cannot have `#`.
"<", // `<` cannot appear in an IRI reference.
">", // `>` cannot appear in an IRI reference.
];
for uri in URIS {
assert_non_convertible::<IriReferenceStr>(uri);
assert_non_convertible::<UriReferenceStr>(uri);
assert_non_convertible::<IriStr>(uri);
assert_non_convertible::<UriStr>(uri);
assert_non_convertible::<IriAbsoluteStr>(uri);
assert_non_convertible::<UriAbsoluteStr>(uri);
assert_non_convertible::<IriRelativeStr>(uri);
assert_non_convertible::<UriRelativeStr>(uri);
}
}
#[test]
fn invalid_percent_encoding() {
const URIS: &[&str] = &["%", "%0", "%0g", "%f", "%fg", "%g", "%g0", "%gf", "%gg"];
for uri in URIS {
assert_non_convertible::<IriReferenceStr>(uri);
assert_non_convertible::<UriReferenceStr>(uri);
assert_non_convertible::<IriStr>(uri);
assert_non_convertible::<UriStr>(uri);
assert_non_convertible::<IriAbsoluteStr>(uri);
assert_non_convertible::<UriAbsoluteStr>(uri);
assert_non_convertible::<IriRelativeStr>(uri);
assert_non_convertible::<UriRelativeStr>(uri);
}
}
#[test]
fn compare_different_types()
where
UriAbsoluteStr: PartialEq<IriReferenceStr>,
IriReferenceStr: PartialEq<UriAbsoluteStr>,
IriAbsoluteStr: PartialEq<UriReferenceStr>,
UriReferenceStr: PartialEq<IriAbsoluteStr>,
{
}

404
vendor/iri-string/tests/template.rs vendored Normal file
View File

@@ -0,0 +1,404 @@
//! Tests for URI template.
#![cfg(feature = "alloc")]
#[macro_use]
mod utils;
use std::cell::Cell;
use iri_string::spec::UriSpec;
use iri_string::template::context::{Context, DynamicContext, Visitor};
use iri_string::template::simple_context::{SimpleContext, Value};
use iri_string::template::UriTemplateStr;
/// Returns the context used by examples in RFC 6570 section 3.2.
fn rfc6570_context() -> SimpleContext {
let mut ctx = SimpleContext::new();
ctx.insert(
"count",
Value::List(vec!["one".to_owned(), "two".to_owned(), "three".to_owned()]),
);
ctx.insert(
"dom",
Value::List(vec!["example".to_owned(), "com".to_owned()]),
);
ctx.insert("dub", Value::String("me/too".to_owned()));
ctx.insert("hello", Value::String("Hello World!".to_owned()));
ctx.insert("half", Value::String("50%".to_owned()));
ctx.insert("var", Value::String("value".to_owned()));
ctx.insert("who", Value::String("fred".to_owned()));
ctx.insert("base", Value::String("http://example.com/home/".to_owned()));
ctx.insert("path", Value::String("/foo/bar".to_owned()));
ctx.insert(
"list",
Value::List(vec![
"red".to_owned(),
"green".to_owned(),
"blue".to_owned(),
]),
);
ctx.insert(
"keys",
Value::Assoc(vec![
("semi".to_owned(), ";".to_owned()),
("dot".to_owned(), ".".to_owned()),
("comma".to_owned(), ",".to_owned()),
]),
);
ctx.insert("v", Value::String("6".to_owned()));
ctx.insert("x", Value::String("1024".to_owned()));
ctx.insert("y", Value::String("768".to_owned()));
ctx.insert("empty", Value::String("".to_owned()));
ctx.insert("empty_keys", Value::Assoc(vec![]));
ctx.insert("undef", Value::Undefined);
ctx
}
/// Expression and expected expansion.
const SUCCESS_CASES: &[(&str, &str)] = &[
// Section 3.2.1. Variable Expansion.
("{count}", "one,two,three"),
("{count*}", "one,two,three"),
("{/count}", "/one,two,three"),
("{/count*}", "/one/two/three"),
("{;count}", ";count=one,two,three"),
("{;count*}", ";count=one;count=two;count=three"),
("{?count}", "?count=one,two,three"),
("{?count*}", "?count=one&count=two&count=three"),
("{&count*}", "&count=one&count=two&count=three"),
// Section 3.2.2. Simple String Expansion.
("{var}", "value"),
("{hello}", "Hello%20World%21"),
("{half}", "50%25"),
("O{empty}X", "OX"),
("O{undef}X", "OX"),
("{x,y}", "1024,768"),
("{x,hello,y}", "1024,Hello%20World%21,768"),
("?{x,empty}", "?1024,"),
("?{x,undef}", "?1024"),
("?{undef,y}", "?768"),
("{var:3}", "val"),
("{var:30}", "value"),
("{list}", "red,green,blue"),
("{list*}", "red,green,blue"),
("{keys}", "semi,%3B,dot,.,comma,%2C"),
("{keys*}", "semi=%3B,dot=.,comma=%2C"),
// Section 3.2.3. Reserved Expansion.
("{+var}", "value"),
("{+hello}", "Hello%20World!"),
("{+half}", "50%25"),
("{base}index", "http%3A%2F%2Fexample.com%2Fhome%2Findex"),
("{+base}index", "http://example.com/home/index"),
("O{+empty}X", "OX"),
("O{+undef}X", "OX"),
("{+path}/here", "/foo/bar/here"),
("here?ref={+path}", "here?ref=/foo/bar"),
("up{+path}{var}/here", "up/foo/barvalue/here"),
("{+x,hello,y}", "1024,Hello%20World!,768"),
("{+path,x}/here", "/foo/bar,1024/here"),
("{+path:6}/here", "/foo/b/here"),
("{+list}", "red,green,blue"),
("{+list*}", "red,green,blue"),
("{+keys}", "semi,;,dot,.,comma,,"),
("{+keys*}", "semi=;,dot=.,comma=,"),
// Section 3.2.4. Fragment Expansion.
("{#var}", "#value"),
("{#hello}", "#Hello%20World!"),
("{#half}", "#50%25"),
("foo{#empty}", "foo#"),
("foo{#undef}", "foo"),
("{#x,hello,y}", "#1024,Hello%20World!,768"),
("{#path,x}/here", "#/foo/bar,1024/here"),
("{#path:6}/here", "#/foo/b/here"),
("{#list}", "#red,green,blue"),
("{#list*}", "#red,green,blue"),
("{#keys}", "#semi,;,dot,.,comma,,"),
("{#keys*}", "#semi=;,dot=.,comma=,"),
// Section 3.2.5. Label Expansion with Dot-Prefix.
("{.who}", ".fred"),
("{.who,who}", ".fred.fred"),
("{.half,who}", ".50%25.fred"),
("www{.dom*}", "www.example.com"),
("X{.var}", "X.value"),
("X{.empty}", "X."),
("X{.undef}", "X"),
("X{.var:3}", "X.val"),
("X{.list}", "X.red,green,blue"),
("X{.list*}", "X.red.green.blue"),
("X{.keys}", "X.semi,%3B,dot,.,comma,%2C"),
("X{.keys*}", "X.semi=%3B.dot=..comma=%2C"),
("X{.empty_keys}", "X"),
("X{.empty_keys*}", "X"),
// Section 3.2.6. Path Segment Expansion.
("{/who}", "/fred"),
("{/who,who}", "/fred/fred"),
("{/half,who}", "/50%25/fred"),
("{/who,dub}", "/fred/me%2Ftoo"),
("{/var}", "/value"),
("{/var,empty}", "/value/"),
("{/var,undef}", "/value"),
("{/var,x}/here", "/value/1024/here"),
("{/var:1,var}", "/v/value"),
("{/list}", "/red,green,blue"),
("{/list*}", "/red/green/blue"),
("{/list*,path:4}", "/red/green/blue/%2Ffoo"),
("{/keys}", "/semi,%3B,dot,.,comma,%2C"),
("{/keys*}", "/semi=%3B/dot=./comma=%2C"),
// Section 3.2.7. Path-Style Parameter Expansion.
("{;who}", ";who=fred"),
("{;half}", ";half=50%25"),
("{;empty}", ";empty"),
("{;v,empty,who}", ";v=6;empty;who=fred"),
("{;v,bar,who}", ";v=6;who=fred"),
("{;x,y}", ";x=1024;y=768"),
("{;x,y,empty}", ";x=1024;y=768;empty"),
("{;x,y,undef}", ";x=1024;y=768"),
("{;hello:5}", ";hello=Hello"),
("{;list}", ";list=red,green,blue"),
("{;list*}", ";list=red;list=green;list=blue"),
("{;keys}", ";keys=semi,%3B,dot,.,comma,%2C"),
("{;keys*}", ";semi=%3B;dot=.;comma=%2C"),
// Section 3.2.8. Form-Style Query Expansion.
("{?who}", "?who=fred"),
("{?half}", "?half=50%25"),
("{?x,y}", "?x=1024&y=768"),
("{?x,y,empty}", "?x=1024&y=768&empty="),
("{?x,y,undef}", "?x=1024&y=768"),
("{?var:3}", "?var=val"),
("{?list}", "?list=red,green,blue"),
("{?list*}", "?list=red&list=green&list=blue"),
("{?keys}", "?keys=semi,%3B,dot,.,comma,%2C"),
("{?keys*}", "?semi=%3B&dot=.&comma=%2C"),
// Section 3.2.9. Form-Style Query Continuation.
("{&who}", "&who=fred"),
("{&half}", "&half=50%25"),
("?fixed=yes{&x}", "?fixed=yes&x=1024"),
("{&x,y,empty}", "&x=1024&y=768&empty="),
("{&x,y,undef}", "&x=1024&y=768"),
("{&var:3}", "&var=val"),
("{&list}", "&list=red,green,blue"),
("{&list*}", "&list=red&list=green&list=blue"),
("{&keys}", "&keys=semi,%3B,dot,.,comma,%2C"),
("{&keys*}", "&semi=%3B&dot=.&comma=%2C"),
];
/// Tests for examples in RFC 6570 section 3.2.
#[test]
fn rfc6570_section3_2() {
let context = rfc6570_context();
for (template, expected) in SUCCESS_CASES {
let template = UriTemplateStr::new(template).expect("must be valid template");
let expanded = template
.expand::<UriSpec, _>(&context)
.expect("must not have variable type error");
assert_eq_display!(expanded, expected, "template={template:?}");
assert_eq!(expanded.to_string(), *expected, "template={template:?}");
}
}
#[test]
fn prefix_modifier_for_percent_encoded_content() {
let mut context = SimpleContext::new();
context.insert("abcdef", "%61%62%63%64%65%66");
// `%CE`, `%CE%B1`, `%B1`, `%CE`, `%CE%B2`, `%B2`.
context.insert("invalid1", "%CE%CE%B1%B1%CE%CE%B2%B2");
// Each `%ff` is considered as an independent "character".
context.insert("invalid2", "%ff%ff%ff%ff%ff%ff");
// `&[(template, expected)]`.
const CASES: &[(&str, &str)] = &[
("{abcdef:4}", "%2561%25"),
("{+abcdef:4}", "%61%62%63%64"),
("{invalid1:2}", "%25C"),
("{invalid1:4}", "%25CE%25"),
("{+invalid1:2}", "%CE%CE%B1"),
("{+invalid1:4}", "%CE%CE%B1%B1%CE"),
("{invalid2:2}", "%25f"),
("{invalid2:4}", "%25ff%25"),
("{+invalid2:2}", "%ff%ff"),
("{+invalid2:4}", "%ff%ff%ff%ff"),
];
for (template, expected) in CASES {
let template = UriTemplateStr::new(template).expect("must be valid template");
let expanded = template
.expand::<UriSpec, _>(&context)
.expect("must not have variable type error");
assert_eq_display!(expanded, *expected, "template={template:?}");
assert_eq!(expanded.to_string(), *expected, "template={template:?}");
let expanded_dynamic = template
.expand_dynamic_to_string::<UriSpec, _>(&mut context.clone())
.expect("must not have variable type error");
assert_eq!(
expanded_dynamic, *expected,
"dynamic, template={template:?}"
);
}
}
#[test]
fn incomplete_percent_encode() {
let mut context = SimpleContext::new();
context.insert("incomplete1", "%ce%b1%");
context.insert("incomplete2", "%ce%b1%c");
context.insert("incomplete3", "%ce%b1%ce");
// `&[(template, expected)]`.
const CASES: &[(&str, &str)] = &[
("{incomplete1:1}", "%25"),
("{incomplete1:2}", "%25c"),
("{incomplete1:3}", "%25ce"),
("{incomplete1:4}", "%25ce%25"),
("{+incomplete1:1}", "%ce%b1"),
("{+incomplete1:2}", "%ce%b1%25"),
("{+incomplete2:1}", "%ce%b1"),
("{+incomplete2:2}", "%ce%b1%25"),
("{+incomplete2:3}", "%ce%b1%25c"),
("{+incomplete3:1}", "%ce%b1"),
("{+incomplete3:2}", "%ce%b1%ce"),
("{+incomplete3:3}", "%ce%b1%ce"),
];
for (template, expected) in CASES {
let template = UriTemplateStr::new(template).expect("must be valid template");
let expanded = template
.expand::<UriSpec, _>(&context)
.expect("must not have variable type error");
assert_eq_display!(expanded, *expected, "template={template:?}");
assert_eq!(expanded.to_string(), *expected, "template={template:?}");
let expanded_dynamic = template
.expand_dynamic_to_string::<UriSpec, _>(&mut context.clone())
.expect("must not have variable type error");
assert_eq!(
expanded_dynamic, *expected,
"dynamic, template={template:?}"
);
}
}
#[test]
fn fragmented_write() {
use core::fmt;
#[derive(Clone)]
enum Foo {
Incomplete1,
Incomplete2,
Incomplete3,
}
impl fmt::Display for Foo {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use core::fmt::Write;
f.write_char('%')?;
f.write_char('c')?;
f.write_char('e')?;
f.write_char('%')?;
f.write_char('b')?;
f.write_char('1')?;
f.write_char('%')?;
match self {
Foo::Incomplete1 => {}
Foo::Incomplete2 => {
f.write_char('c')?;
}
Foo::Incomplete3 => {
f.write_char('c')?;
f.write_char('e')?;
}
}
Ok(())
}
}
#[derive(Clone)]
struct MyContext {
incomplete1: Foo,
incomplete2: Foo,
incomplete3: Foo,
}
impl Context for MyContext {
fn visit<V: Visitor>(&self, visitor: V) -> V::Result {
let name = visitor.var_name().as_str();
match name {
"incomplete1" => visitor.visit_string(&self.incomplete1),
"incomplete2" => visitor.visit_string(&self.incomplete2),
"incomplete3" => visitor.visit_string(&self.incomplete3),
_ => visitor.visit_undefined(),
}
}
}
let context = MyContext {
incomplete1: Foo::Incomplete1,
incomplete2: Foo::Incomplete2,
incomplete3: Foo::Incomplete3,
};
// `&[(template, expected)]`.
const CASES: &[(&str, &str)] = &[
("{incomplete1:1}", "%25"),
("{incomplete1:2}", "%25c"),
("{incomplete1:3}", "%25ce"),
("{incomplete1:4}", "%25ce%25"),
("{+incomplete1:1}", "%ce%b1"),
("{+incomplete1:2}", "%ce%b1%25"),
("{+incomplete2:1}", "%ce%b1"),
("{+incomplete2:2}", "%ce%b1%25"),
("{+incomplete2:3}", "%ce%b1%25c"),
("{+incomplete3:1}", "%ce%b1"),
("{+incomplete3:2}", "%ce%b1%ce"),
("{+incomplete3:3}", "%ce%b1%ce"),
];
for (template, expected) in CASES {
let template = UriTemplateStr::new(template).expect("must be valid template");
let expanded = template
.expand::<UriSpec, _>(&context)
.expect("must not have variable type error");
assert_eq_display!(expanded, *expected, "template={template:?}");
assert_eq!(expanded.to_string(), *expected, "template={template:?}");
let expanded_dynamic = template
.expand_dynamic_to_string::<UriSpec, _>(&mut context.clone())
.expect("must not have variable type error");
assert_eq!(
expanded_dynamic, *expected,
"dynamic, template={template:?}"
);
}
}
#[test]
fn github_issue_39() {
#[derive(Default)]
struct MyContext {
on_expansion_start_called: Cell<bool>,
on_expansion_end_called: Cell<bool>,
}
impl DynamicContext for MyContext {
fn visit_dynamic<V: Visitor>(&mut self, visitor: V) -> V::Result {
visitor.visit_undefined()
}
fn on_expansion_start(&mut self) {
self.on_expansion_start_called.set(true);
}
fn on_expansion_end(&mut self) {
self.on_expansion_end_called.set(true);
}
}
let mut dyctx = MyContext::default();
let template = UriTemplateStr::new("hello/{world}").expect("valid template string");
let s = template
.expand_dynamic_to_string::<UriSpec, _>(&mut dyctx)
.expect("must not have variable type error");
assert_eq!(s, "hello/");
assert!(dyctx.on_expansion_start_called.get());
assert!(dyctx.on_expansion_end_called.get());
}

212
vendor/iri-string/tests/utils/mod.rs vendored Normal file
View File

@@ -0,0 +1,212 @@
//! Utilities.
#![allow(dead_code)]
use core::fmt;
use RawKind::*;
/// Raw kind (exclusive).
#[derive(Clone, Copy, PartialEq, Eq)]
enum RawKind {
/// Invalid string.
Invalid,
/// IRI.
Iri,
/// Absolute IRI.
IriAbsolute,
/// Relative IRI.
IriRelative,
/// URI.
Uri,
/// Absolute URI.
UriAbsolute,
/// Relative URI.
UriRelative,
}
impl RawKind {
fn spec_is(self, spec: Spec) -> bool {
match spec {
Spec::Uri => matches!(self, Self::Uri | Self::UriAbsolute | Self::UriRelative),
Spec::Iri => self != Self::Invalid,
}
}
fn kind_is(self, kind: Kind) -> bool {
match kind {
Kind::Absolute => matches!(self, Self::UriAbsolute | Self::IriAbsolute),
Kind::Normal => matches!(
self,
Self::UriAbsolute | Self::Uri | Self::IriAbsolute | Self::Iri
),
Kind::Reference => self != Self::Invalid,
Kind::Relative => matches!(self, Self::UriRelative | Self::IriRelative),
}
}
fn is(self, spec: Spec, kind: Kind) -> bool {
self.spec_is(spec) && self.kind_is(kind)
}
}
/// Strings.
/// ```
/// # use iri_string::types::IriReferenceStr;
/// // `<` and `>` cannot directly appear in an IRI reference.
/// assert!(IriReferenceStr::new("<not allowed>").is_err());
/// // Broken percent encoding cannot appear in an IRI reference.
/// assert!(IriReferenceStr::new("%").is_err());
/// assert!(IriReferenceStr::new("%GG").is_err());
/// ```
const STRINGS: &[(RawKind, &str)] = &[
(UriAbsolute, "https://user:pass@example.com:8080"),
(UriAbsolute, "https://example.com/"),
(UriAbsolute, "https://example.com/foo?bar=baz"),
(Uri, "https://example.com/foo?bar=baz#qux"),
(UriAbsolute, "foo:bar"),
(UriAbsolute, "foo:"),
(UriAbsolute, "foo:/"),
(UriAbsolute, "foo://"),
(UriAbsolute, "foo:///"),
(UriAbsolute, "foo:////"),
(UriAbsolute, "foo://///"),
(UriRelative, "foo"),
(UriRelative, "foo/bar"),
(UriRelative, "foo//bar"),
(UriRelative, "/"),
(UriRelative, "/foo"),
(UriRelative, "/foo/bar"),
(UriRelative, "//foo/bar"),
(UriRelative, "/foo//bar"),
(UriRelative, "?"),
(UriRelative, "???"),
(UriRelative, "?foo"),
(UriRelative, "#"),
(UriRelative, "#foo"),
(Invalid, "##"),
(Invalid, "fragment#cannot#have#hash#char"),
// `<` cannot appear in an IRI reference.
(Invalid, "<"),
// `>` cannot appear in an IRI reference.
(Invalid, ">"),
// `<` and `>` cannot appear in an IRI reference.
(Invalid, "lt<and-gt>not-allowed"),
// Incomplete percent encoding.
(Invalid, "%"),
(Invalid, "%0"),
(Invalid, "%f"),
(Invalid, "%F"),
// Invalid percent encoding.
(Invalid, "%0g"),
(Invalid, "%0G"),
(Invalid, "%GG"),
(Invalid, "%G0"),
];
/// Spec.
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum Spec {
/// URI.
Uri,
/// IRI and URI.
Iri,
}
/// Kind.
#[derive(Clone, Copy, PartialEq, Eq)]
pub enum Kind {
/// Absolute IRI / URI.
Absolute,
/// IRI / URI.
Normal,
/// IRI / URI reference.
Reference,
/// Relative IRI / URI reference.
Relative,
}
pub fn positive(spec: Spec, kind: Kind) -> impl Iterator<Item = &'static str> {
STRINGS
.iter()
.filter(move |(raw_kind, _)| raw_kind.is(spec, kind))
.map(|(_, s)| *s)
}
pub fn negative(spec: Spec, kind: Kind) -> impl Iterator<Item = &'static str> {
STRINGS
.iter()
.filter(move |(raw_kind, _)| !raw_kind.is(spec, kind))
.map(|(_, s)| *s)
}
/// Returns true if the two equals after they are converted to strings.
pub(crate) fn eq_display_str<T>(d: &T, s: &str) -> bool
where
T: ?Sized + fmt::Display,
{
use core::fmt::Write as _;
/// Dummy writer to compare the formatted object to the given string.
struct CmpWriter<'a>(&'a str);
impl fmt::Write for CmpWriter<'_> {
fn write_str(&mut self, s: &str) -> fmt::Result {
if self.0.len() < s.len() {
return Err(fmt::Error);
}
let (prefix, rest) = self.0.split_at(s.len());
self.0 = rest;
if prefix == s {
Ok(())
} else {
Err(fmt::Error)
}
}
}
let mut writer = CmpWriter(s);
let succeeded = write!(writer, "{}", d).is_ok();
succeeded && writer.0.is_empty()
}
#[allow(unused_macros)]
macro_rules! assert_eq_display {
($left:expr, $right:expr $(,)?) => {{
match (&$left, &$right) {
(left, right) => {
assert!(
utils::eq_display_str(left, right.as_ref()),
"`eq_str_display(left, right)`\n left: `{left}`,\n right: `{right}`",
);
#[cfg(feature = "alloc")]
{
let left = left.to_string();
let right = right.to_string();
assert_eq!(left, right);
}
}
}
}};
($left:expr, $right:expr, $($args:tt)*) => {{
match (&$left, &$right) {
(left, right) => {
assert!(
utils::eq_display_str(left, right.as_ref()),
"{}",
format_args!(
"{}: {}",
format_args!(
"`eq_str_display(left, right)`\n left: `{left}`,\n right: `{right}`",
),
format_args!($($args)*)
)
);
#[cfg(feature = "alloc")]
{
let left = left.to_string();
let right = right.to_string();
assert_eq!(left, right, $($args)*);
}
}
}
}};
}