chore: checkpoint before Python removal

This commit is contained in:
2026-03-26 22:33:59 +00:00
parent 683cec9307
commit e568ddf82a
29972 changed files with 11269302 additions and 2 deletions

708
vendor/pest_meta/src/ast.rs vendored Normal file
View File

@@ -0,0 +1,708 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
//! Types for the pest's abstract syntax tree.
/// A grammar rule
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Rule {
/// The name of the rule
pub name: String,
/// The rule's type (silent, atomic, ...)
pub ty: RuleType,
/// The rule's expression
pub expr: Expr,
}
/// All possible rule types
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum RuleType {
/// The normal rule type
Normal,
/// Silent rules are just like normal rules
/// — when run, they function the same way —
/// except they do not produce pairs or tokens.
/// If a rule is silent, it will never appear in a parse result.
/// (their syntax is `_{ ... }`)
Silent,
/// atomic rule prevent implicit whitespace: inside an atomic rule,
/// the tilde ~ means "immediately followed by",
/// and repetition operators (asterisk * and plus sign +)
/// have no implicit separation. In addition, all other rules
/// called from an atomic rule are also treated as atomic.
/// In an atomic rule, interior matching rules are silent.
/// (their syntax is `@{ ... }`)
Atomic,
/// Compound atomic rules are similar to atomic rules,
/// but they produce inner tokens as normal.
/// (their syntax is `${ ... }`)
CompoundAtomic,
/// Non-atomic rules cancel the effect of atomic rules.
/// (their syntax is `!{ ... }`)
NonAtomic,
}
/// All possible rule expressions
///
/// # Warning: Semantic Versioning
/// There may be non-breaking changes to the meta-grammar
/// between minor versions. Those non-breaking changes, however,
/// may translate into semver-breaking changes due to the additional variants
/// propagated from the `Rule` enum. This is a known issue and will be fixed in the
/// future (e.g. by increasing MSRV and non_exhaustive annotations).
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Expr {
/// Matches an exact string, e.g. `"a"`
Str(String),
/// Matches an exact string, case insensitively (ASCII only), e.g. `^"a"`
Insens(String),
/// Matches one character in the range, e.g. `'a'..'z'`
Range(String, String),
/// Matches the rule with the given name, e.g. `a`
Ident(String),
/// Matches a custom part of the stack, e.g. `PEEK[..]`
PeekSlice(i32, Option<i32>),
/// Positive lookahead; matches expression without making progress, e.g. `&e`
PosPred(Box<Expr>),
/// Negative lookahead; matches if expression doesn't match, without making progress, e.g. `!e`
NegPred(Box<Expr>),
/// Matches a sequence of two expressions, e.g. `e1 ~ e2`
Seq(Box<Expr>, Box<Expr>),
/// Matches either of two expressions, e.g. `e1 | e2`
Choice(Box<Expr>, Box<Expr>),
/// Optionally matches an expression, e.g. `e?`
Opt(Box<Expr>),
/// Matches an expression zero or more times, e.g. `e*`
Rep(Box<Expr>),
/// Matches an expression one or more times, e.g. `e+`
RepOnce(Box<Expr>),
/// Matches an expression an exact number of times, e.g. `e{n}`
RepExact(Box<Expr>, u32),
/// Matches an expression at least a number of times, e.g. `e{n,}`
RepMin(Box<Expr>, u32),
/// Matches an expression at most a number of times, e.g. `e{,n}`
RepMax(Box<Expr>, u32),
/// Matches an expression a number of times within a range, e.g. `e{m, n}`
RepMinMax(Box<Expr>, u32, u32),
/// Continues to match expressions until one of the strings in the `Vec` is found
Skip(Vec<String>),
/// Matches an expression and pushes it to the stack, e.g. `push(e)`
Push(Box<Expr>),
/// Pushes a literal string to the stack, e.g. `push_literal("a")`
#[cfg(feature = "grammar-extras")]
PushLiteral(String),
/// Matches an expression and assigns a label to it, e.g. #label = exp
#[cfg(feature = "grammar-extras")]
NodeTag(Box<Expr>, String),
}
impl Expr {
/// Returns the iterator that steps the expression from top to bottom.
pub fn iter_top_down(&self) -> ExprTopDownIterator {
ExprTopDownIterator::new(self)
}
/// Applies `f` to the expression and all its children (top to bottom).
pub fn map_top_down<F>(self, mut f: F) -> Expr
where
F: FnMut(Expr) -> Expr,
{
fn map_internal<F>(expr: Expr, f: &mut F) -> Expr
where
F: FnMut(Expr) -> Expr,
{
let expr = f(expr);
match expr {
Expr::PosPred(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::PosPred(mapped)
}
Expr::NegPred(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::NegPred(mapped)
}
Expr::Seq(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
Expr::Seq(mapped_lhs, mapped_rhs)
}
Expr::Choice(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
Expr::Choice(mapped_lhs, mapped_rhs)
}
Expr::Rep(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Rep(mapped)
}
Expr::RepOnce(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepOnce(mapped)
}
Expr::RepExact(expr, max) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepExact(mapped, max)
}
Expr::RepMin(expr, num) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMin(mapped, num)
}
Expr::RepMax(expr, num) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMax(mapped, num)
}
Expr::RepMinMax(expr, min, max) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMinMax(mapped, min, max)
}
Expr::Opt(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Opt(mapped)
}
Expr::Push(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Push(mapped)
}
#[cfg(feature = "grammar-extras")]
Expr::NodeTag(expr, tag) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::NodeTag(mapped, tag)
}
expr => expr,
}
}
map_internal(self, &mut f)
}
/// Applies `f` to the expression and all its children (bottom up).
pub fn map_bottom_up<F>(self, mut f: F) -> Expr
where
F: FnMut(Expr) -> Expr,
{
fn map_internal<F>(expr: Expr, f: &mut F) -> Expr
where
F: FnMut(Expr) -> Expr,
{
let mapped = match expr {
Expr::PosPred(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::PosPred(mapped)
}
Expr::NegPred(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::NegPred(mapped)
}
Expr::Seq(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
Expr::Seq(mapped_lhs, mapped_rhs)
}
Expr::Choice(lhs, rhs) => {
let mapped_lhs = Box::new(map_internal(*lhs, f));
let mapped_rhs = Box::new(map_internal(*rhs, f));
Expr::Choice(mapped_lhs, mapped_rhs)
}
Expr::Rep(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Rep(mapped)
}
Expr::RepOnce(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepOnce(mapped)
}
Expr::RepExact(expr, num) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepExact(mapped, num)
}
Expr::RepMin(expr, max) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMin(mapped, max)
}
Expr::RepMax(expr, max) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMax(mapped, max)
}
Expr::RepMinMax(expr, min, max) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::RepMinMax(mapped, min, max)
}
Expr::Opt(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Opt(mapped)
}
Expr::Push(expr) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::Push(mapped)
}
#[cfg(feature = "grammar-extras")]
Expr::NodeTag(expr, tag) => {
let mapped = Box::new(map_internal(*expr, f));
Expr::NodeTag(mapped, tag)
}
expr => expr,
};
f(mapped)
}
map_internal(self, &mut f)
}
}
impl core::fmt::Display for Expr {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
Expr::Str(s) => write!(f, "{:?}", s),
Expr::Insens(s) => write!(f, "^{:?}", s),
Expr::Range(start, end) => {
let start = start.chars().next().expect("Empty range start.");
let end = end.chars().next().expect("Empty range end.");
write!(f, "({:?}..{:?})", start, end)
}
Expr::Ident(id) => write!(f, "{}", id),
Expr::PeekSlice(start, end) => match end {
Some(end) => write!(f, "PEEK[{}..{}]", start, end),
None => write!(f, "PEEK[{}..]", start),
},
Expr::PosPred(expr) => write!(f, "&{}", expr.as_ref()),
Expr::NegPred(expr) => write!(f, "!{}", expr.as_ref()),
Expr::Seq(lhs, rhs) => {
let mut nodes = Vec::new();
nodes.push(lhs);
let mut current = rhs;
while let Expr::Seq(lhs, rhs) = current.as_ref() {
nodes.push(lhs);
current = rhs;
}
nodes.push(current);
let sequence = nodes
.iter()
.map(|node| format!("{}", node))
.collect::<Vec<_>>()
.join(" ~ ");
write!(f, "({})", sequence)
}
Expr::Choice(lhs, rhs) => {
let mut nodes = Vec::new();
nodes.push(lhs);
let mut current = rhs;
while let Expr::Choice(lhs, rhs) = current.as_ref() {
nodes.push(lhs);
current = rhs;
}
nodes.push(current);
let sequence = nodes
.iter()
.map(|node| format!("{}", node))
.collect::<Vec<_>>()
.join(" | ");
write!(f, "({})", sequence)
}
Expr::Opt(expr) => write!(f, "{}?", expr),
Expr::Rep(expr) => write!(f, "{}*", expr),
Expr::RepOnce(expr) => write!(f, "{}+", expr),
Expr::RepExact(expr, n) => write!(f, "{}{{{}}}", expr, n),
Expr::RepMin(expr, min) => write!(f, "{}{{{},}}", expr, min),
Expr::RepMax(expr, max) => write!(f, "{}{{,{}}}", expr, max),
Expr::RepMinMax(expr, min, max) => write!(f, "{}{{{}, {}}}", expr, min, max),
Expr::Skip(strings) => {
let strings = strings
.iter()
.map(|s| format!("{:?}", s))
.collect::<Vec<_>>()
.join(" | ");
write!(f, "(!({}) ~ ANY)*", strings)
}
Expr::Push(expr) => write!(f, "PUSH({})", expr),
#[cfg(feature = "grammar-extras")]
Expr::PushLiteral(s) => write!(f, "PUSH_LITERAL({:?})", s),
#[cfg(feature = "grammar-extras")]
Expr::NodeTag(expr, tag) => {
write!(f, "(#{} = {})", tag, expr)
}
}
}
}
/// The top down iterator for an expression.
pub struct ExprTopDownIterator {
current: Option<Expr>,
next: Option<Expr>,
right_branches: Vec<Expr>,
}
impl ExprTopDownIterator {
/// Constructs a top-down iterator from the expression.
pub fn new(expr: &Expr) -> Self {
let mut iter = ExprTopDownIterator {
current: None,
next: None,
right_branches: vec![],
};
iter.iterate_expr(expr.clone());
iter
}
fn iterate_expr(&mut self, expr: Expr) {
self.current = Some(expr.clone());
match expr {
Expr::Seq(lhs, rhs) => {
self.right_branches.push(*rhs);
self.next = Some(*lhs);
}
Expr::Choice(lhs, rhs) => {
self.right_branches.push(*rhs);
self.next = Some(*lhs);
}
Expr::PosPred(expr)
| Expr::NegPred(expr)
| Expr::Rep(expr)
| Expr::RepOnce(expr)
| Expr::RepExact(expr, _)
| Expr::RepMin(expr, _)
| Expr::RepMax(expr, _)
| Expr::RepMinMax(expr, ..)
| Expr::Opt(expr)
| Expr::Push(expr) => {
self.next = Some(*expr);
}
#[cfg(feature = "grammar-extras")]
Expr::NodeTag(expr, _) => {
self.next = Some(*expr);
}
_ => {
self.next = None;
}
}
}
}
impl Iterator for ExprTopDownIterator {
type Item = Expr;
fn next(&mut self) -> Option<Self::Item> {
let result = self.current.take();
if let Some(expr) = self.next.take() {
self.iterate_expr(expr);
} else if let Some(expr) = self.right_branches.pop() {
self.iterate_expr(expr);
}
result
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn top_down_iterator() {
let expr = Expr::Choice(
Box::new(Expr::Str(String::from("a"))),
Box::new(Expr::Str(String::from("b"))),
);
let mut top_down = expr.iter_top_down();
assert_eq!(top_down.next(), Some(expr));
assert_eq!(top_down.next(), Some(Expr::Str(String::from("a"))));
assert_eq!(top_down.next(), Some(Expr::Str(String::from("b"))));
assert_eq!(top_down.next(), None);
}
#[test]
fn identity() {
let expr = Expr::Choice(
Box::new(Expr::Seq(
Box::new(Expr::Ident("a".to_owned())),
Box::new(Expr::Str("b".to_owned())),
)),
Box::new(Expr::PosPred(Box::new(Expr::NegPred(Box::new(Expr::Rep(
Box::new(Expr::RepOnce(Box::new(Expr::Opt(Box::new(Expr::Choice(
Box::new(Expr::Insens("c".to_owned())),
Box::new(Expr::Push(Box::new(Expr::Range(
"'d'".to_owned(),
"'e'".to_owned(),
)))),
)))))),
)))))),
);
assert_eq!(
expr.clone()
.map_bottom_up(|expr| expr)
.map_top_down(|expr| expr),
expr,
);
}
mod display {
use super::super::*;
#[test]
fn string() {
assert_eq!(Expr::Str("a".to_owned()).to_string(), r#""a""#);
}
#[test]
fn insens() {
assert_eq!(Expr::Insens("a".to_owned()).to_string(), r#"^"a""#);
}
#[test]
fn range() {
assert_eq!(
Expr::Range("a".to_owned(), "z".to_owned()).to_string(),
r#"('a'..'z')"#,
);
}
#[test]
fn ident() {
assert_eq!(Expr::Ident("a".to_owned()).to_string(), "a");
}
#[test]
fn peek_slice() {
assert_eq!(Expr::PeekSlice(0, None).to_string(), "PEEK[0..]");
assert_eq!(Expr::PeekSlice(0, Some(-1)).to_string(), "PEEK[0..-1]");
}
#[test]
fn pos_pred() {
assert_eq!(
Expr::PosPred(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"&e",
);
}
#[test]
fn neg_pred() {
assert_eq!(
Expr::NegPred(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"!e",
);
}
#[test]
fn seq() {
assert_eq!(
Expr::Seq(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Ident("e2".to_owned())),
)
.to_string(),
"(e1 ~ e2)",
);
assert_eq!(
Expr::Seq(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Ident("e3".to_owned())),
)),
)
.to_string(),
"(e1 ~ e2 ~ e3)",
);
assert_eq!(
Expr::Seq(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e3".to_owned())),
Box::new(Expr::Ident("e4".to_owned())),
)),
)),
)
.to_string(),
"(e1 ~ e2 ~ e3 ~ e4)",
);
assert_eq!(
Expr::Seq(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e3".to_owned())),
Box::new(Expr::Ident("e4".to_owned())),
)),
)),
)
.to_string(),
"(e1 ~ (e2 | (e3 ~ e4)))",
);
assert_eq!(
Expr::Seq(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e3".to_owned())),
Box::new(Expr::Ident("e4".to_owned())),
)),
)),
)
.to_string(),
"(e1 ~ e2 ~ (e3 | e4))",
);
}
#[test]
fn choice() {
assert_eq!(
Expr::Choice(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Ident("e2".to_owned())),
)
.to_string(),
"(e1 | e2)",
);
assert_eq!(
Expr::Choice(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Ident("e3".to_owned())),
)),
)
.to_string(),
"(e1 | e2 | e3)",
);
assert_eq!(
Expr::Choice(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e3".to_owned())),
Box::new(Expr::Ident("e4".to_owned())),
)),
)),
)
.to_string(),
"(e1 | e2 | e3 | e4)",
);
assert_eq!(
Expr::Choice(
Box::new(Expr::Ident("e1".to_owned())),
Box::new(Expr::Seq(
Box::new(Expr::Ident("e2".to_owned())),
Box::new(Expr::Choice(
Box::new(Expr::Ident("e3".to_owned())),
Box::new(Expr::Ident("e4".to_owned())),
)),
)),
)
.to_string(),
"(e1 | (e2 ~ (e3 | e4)))",
);
}
#[test]
fn opt() {
assert_eq!(
Expr::Opt(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"e?",
);
}
#[test]
fn rep() {
assert_eq!(
Expr::Rep(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"e*",
);
}
#[test]
fn rep_once() {
assert_eq!(
Expr::RepOnce(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"e+",
);
}
#[test]
fn rep_exact() {
assert_eq!(
Expr::RepExact(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
"e{1}",
);
}
#[test]
fn rep_min() {
assert_eq!(
Expr::RepMin(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
"e{1,}",
);
}
#[test]
fn rep_max() {
assert_eq!(
Expr::RepMax(Box::new(Expr::Ident("e".to_owned())), 1).to_string(),
"e{,1}",
);
}
#[test]
fn rep_min_max() {
assert_eq!(
Expr::RepMinMax(Box::new(Expr::Ident("e".to_owned())), 1, 2).to_string(),
"e{1, 2}",
);
}
#[test]
fn skip() {
assert_eq!(
Expr::Skip(
["a", "bc"]
.into_iter()
.map(|s| s.to_owned())
.collect::<Vec<_>>(),
)
.to_string(),
r#"(!("a" | "bc") ~ ANY)*"#,
);
}
#[test]
fn push() {
assert_eq!(
Expr::Push(Box::new(Expr::Ident("e".to_owned()))).to_string(),
"PUSH(e)",
);
}
#[test]
#[cfg(feature = "grammar-extras")]
fn push_literal() {
assert_eq!(
Expr::PushLiteral("one \" ' two".to_string()).to_string(),
r#"PUSH_LITERAL("one \" ' two")"#
)
}
#[test]
#[cfg(feature = "grammar-extras")]
fn node_tag() {
assert_eq!(
Expr::NodeTag(Box::new(Expr::Ident("expr".to_owned())), "label".to_owned())
.to_string(),
"(#label = expr)",
);
}
}
}

228
vendor/pest_meta/src/grammar.pest vendored Normal file
View File

@@ -0,0 +1,228 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
//! Pest meta-grammar
//!
//! # Warning: Semantic Versioning
//! There may be non-breaking changes to the meta-grammar
//! between minor versions. Those non-breaking changes, however,
//! may translate into semver-breaking changes due to the additional variants
//! added to the `Rule` enum. This is a known issue and will be fixed in the
//! future (e.g. by increasing MSRV and non_exhaustive annotations).
/// The top-level rule of a grammar.
grammar_rules = _{ SOI ~ grammar_doc* ~ grammar_rule* ~ EOI }
/// A rule of a grammar.
grammar_rule = {
identifier ~ assignment_operator ~ modifier? ~ opening_brace ~ expression ~ closing_brace
| line_doc
}
/// Assignment operator.
assignment_operator = { "=" }
/// Opening brace for a rule.
opening_brace = { "{" }
/// Closing brace for a rule.
closing_brace = { "}" }
/// Opening parenthesis for a branch, PUSH, etc.
opening_paren = { "(" }
/// Closing parenthesis for a branch, PUSH, etc.
closing_paren = { ")" }
/// Opening bracket for PEEK (slice inside).
opening_brack = { "[" }
/// Closing bracket for PEEK (slice inside).
closing_brack = { "]" }
/// A rule modifier.
modifier = _{
silent_modifier
| atomic_modifier
| compound_atomic_modifier
| non_atomic_modifier
}
/// Silent rule prefix.
silent_modifier = { "_" }
/// Atomic rule prefix.
atomic_modifier = { "@" }
/// Compound atomic rule prefix.
compound_atomic_modifier = { "$" }
/// Non-atomic rule prefix.
non_atomic_modifier = { "!" }
/// A tag label.
tag_id = @{ "#" ~ ("_" | alpha) ~ ("_" | alpha_num)* }
/// For assigning labels to nodes.
node_tag = _{ tag_id ~ assignment_operator }
/// A rule expression.
expression = { choice_operator? ~ term ~ (infix_operator ~ term)* }
/// A rule term.
term = { node_tag? ~ prefix_operator* ~ node ~ postfix_operator* }
/// A rule node (inside terms).
node = _{ opening_paren ~ expression ~ closing_paren | terminal }
/// A terminal expression.
terminal = _{ _push_literal | _push | peek_slice | identifier | string | insensitive_string | range }
/// Possible predicates for a rule.
prefix_operator = _{ positive_predicate_operator | negative_predicate_operator }
/// Branches or sequences.
infix_operator = _{ sequence_operator | choice_operator }
/// Possible modifiers for a rule.
postfix_operator = _{
optional_operator
| repeat_operator
| repeat_once_operator
| repeat_exact
| repeat_min
| repeat_max
| repeat_min_max
}
/// A positive predicate.
positive_predicate_operator = { "&" }
/// A negative predicate.
negative_predicate_operator = { "!" }
/// A sequence operator.
sequence_operator = { "~" }
/// A choice operator.
choice_operator = { "|" }
/// An optional operator.
optional_operator = { "?" }
/// A repeat operator.
repeat_operator = { "*" }
/// A repeat at least once operator.
repeat_once_operator = { "+" }
/// A repeat exact times.
repeat_exact = { opening_brace ~ number ~ closing_brace }
/// A repeat at least times.
repeat_min = { opening_brace ~ number ~ comma ~ closing_brace }
/// A repeat at most times.
repeat_max = { opening_brace ~ comma ~ number ~ closing_brace }
/// A repeat in a range.
repeat_min_max = { opening_brace ~ number ~ comma ~ number ~ closing_brace }
/// A number.
number = @{ '0'..'9'+ }
/// An integer number (positive or negative).
integer = @{ number | "-" ~ "0"* ~ '1'..'9' ~ number? }
/// A comma terminal.
comma = { "," }
/// A PUSH expression.
_push = { "PUSH" ~ opening_paren ~ expression ~ closing_paren }
/// A PUSH_LITERAL expression with one argument, which must be a literal string.
_push_literal = { "PUSH_LITERAL" ~ opening_paren ~ string ~ closing_paren }
/// A PEEK expression.
peek_slice = { "PEEK" ~ opening_brack ~ integer? ~ range_operator ~ integer? ~ closing_brack }
/// An identifier.
identifier = @{ !"PUSH" ~ ("_" | alpha) ~ ("_" | alpha_num)* }
/// An alpha character.
alpha = _{ 'a'..'z' | 'A'..'Z' }
/// An alphanumeric character.
alpha_num = _{ alpha | '0'..'9' }
/// A string.
string = ${ quote ~ inner_str ~ quote }
/// An insensitive string.
insensitive_string = { "^" ~ string }
/// A character range.
range = { character ~ range_operator ~ character }
/// A single quoted character
character = ${ single_quote ~ inner_chr ~ single_quote }
/// A quoted string.
inner_str = @{ (!("\"" | "\\") ~ ANY)* ~ (escape ~ inner_str)? }
/// An escaped or any character.
inner_chr = @{ escape | ANY }
/// An escape sequence.
escape = @{ "\\" ~ ("\"" | "\\" | "r" | "n" | "t" | "0" | "'" | code | unicode) }
/// A hexadecimal code.
code = @{ "x" ~ hex_digit{2} }
/// A unicode code.
unicode = @{ "u" ~ opening_brace ~ hex_digit{2, 6} ~ closing_brace }
/// A hexadecimal digit.
hex_digit = @{ '0'..'9' | 'a'..'f' | 'A'..'F' }
/// A double quote.
quote = { "\"" }
/// A single quote.
single_quote = { "'" }
/// A range operator.
range_operator = { ".." }
/// A newline character.
newline = _{ "\n" | "\r\n" }
/// A whitespace character.
WHITESPACE = _{ " " | "\t" | newline }
/// A single line comment.
line_comment = _{ ("//" ~ !("/" | "!") ~ (!newline ~ ANY)*) }
/// A multi-line comment.
block_comment = _{ "/*" ~ (block_comment | !"*/" ~ ANY)* ~ "*/" }
/// A grammar comment.
COMMENT = _{ block_comment | line_comment }
// ref: https://doc.rust-lang.org/reference/comments.html
/// A space character.
space = _{ " " | "\t" }
/// A top-level comment.
grammar_doc = ${ "//!" ~ space? ~ inner_doc }
/// A rule comment.
line_doc = ${ "///" ~ space? ~ inner_doc }
/// A comment content.
inner_doc = @{ (!newline ~ ANY)* }

2
vendor/pest_meta/src/grammar.rs vendored Normal file

File diff suppressed because one or more lines are too long

76
vendor/pest_meta/src/lib.rs vendored Normal file
View File

@@ -0,0 +1,76 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
//! # pest meta
//!
//! This crate parses, validates, optimizes, and converts pest's own grammars to ASTs.
#![doc(
html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg",
html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg"
)]
#![warn(missing_docs, rust_2018_idioms, unused_qualifications)]
#[cfg(test)]
#[macro_use]
extern crate pest;
use std::fmt::Display;
use std::sync::LazyLock;
use pest::error::Error;
use pest::unicode::unicode_property_names;
pub mod ast;
pub mod optimizer;
pub mod parser;
pub mod validator;
/// A helper that will unwrap the result or panic
/// with the nicely formatted error message.
pub fn unwrap_or_report<T, E>(result: Result<T, E>) -> T
where
E: IntoIterator,
E::Item: Display,
{
result.unwrap_or_else(|e| {
panic!(
"{}{}",
"grammar error\n\n".to_owned(),
&e.into_iter()
.map(|error| format!("{}", error))
.collect::<Vec<_>>()
.join("\n\n")
)
})
}
/// A tuple returned by the validation and processing of the parsed grammar.
/// The first element is the vector of used builtin rule names,
/// the second element is the vector of optimized rules.
type UsedBuiltinAndOptimized<'i> = (Vec<&'i str>, Vec<optimizer::OptimizedRule>);
/// Parses, validates, processes and optimizes the provided grammar.
pub fn parse_and_optimize(
grammar: &str,
) -> Result<UsedBuiltinAndOptimized<'_>, Vec<Error<parser::Rule>>> {
let pairs = match parser::parse(parser::Rule::grammar_rules, grammar) {
Ok(pairs) => Ok(pairs),
Err(error) => Err(vec![error]),
}?;
let defaults = validator::validate_pairs(pairs.clone())?;
let ast = parser::consume_rules(pairs)?;
Ok((defaults, optimizer::optimize(ast)))
}
#[doc(hidden)]
#[deprecated(note = "use `pest::unicode::unicode_property_names` instead")]
pub static UNICODE_PROPERTY_NAMES: LazyLock<Vec<&str>> =
LazyLock::new(|| unicode_property_names().collect::<Vec<_>>());

View File

@@ -0,0 +1,32 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
use crate::ast::*;
pub fn concatenate(rule: Rule) -> Rule {
let Rule { name, ty, expr } = rule;
Rule {
name,
ty,
expr: expr.map_bottom_up(|expr| {
if ty == RuleType::Atomic {
match expr {
Expr::Seq(lhs, rhs) => match (*lhs, *rhs) {
(Expr::Str(lhs), Expr::Str(rhs)) => Expr::Str(lhs + &rhs),
(Expr::Insens(lhs), Expr::Insens(rhs)) => Expr::Insens(lhs + &rhs),
(lhs, rhs) => Expr::Seq(Box::new(lhs), Box::new(rhs)),
},
expr => expr,
}
} else {
expr
}
}),
}
}

View File

@@ -0,0 +1,54 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
use crate::ast::*;
pub fn factor(rule: Rule) -> Rule {
let Rule { name, ty, expr } = rule;
Rule {
name,
ty,
expr: expr.map_top_down(|expr| {
match expr {
Expr::Choice(lhs, rhs) => match (*lhs, *rhs) {
(Expr::Seq(l1, r1), Expr::Seq(l2, r2)) => {
if l1 == l2 {
Expr::Seq(l1, Box::new(Expr::Choice(r1, r2)))
} else {
Expr::Choice(Box::new(Expr::Seq(l1, r1)), Box::new(Expr::Seq(l2, r2)))
}
}
// Converts `(rule ~ rest) | rule` to `rule ~ rest?`, avoiding trying to match `rule` twice.
// This is only done for atomic rules, because other rule types have implicit whitespaces.
// FIXME: "desugar" implicit whitespace rules before applying any optimizations
(Expr::Seq(l1, l2), r)
if matches!(ty, RuleType::Atomic | RuleType::CompoundAtomic) =>
{
if *l1 == r {
Expr::Seq(l1, Box::new(Expr::Opt(l2)))
} else {
Expr::Choice(Box::new(Expr::Seq(l1, l2)), Box::new(r))
}
}
// Converts `rule | (rule ~ rest)` to `rule` since `(rule ~ rest)`
// will never match if `rule` didn't.
(l, Expr::Seq(r1, r2)) => {
if l == *r1 {
l
} else {
Expr::Choice(Box::new(l), Box::new(Expr::Seq(r1, r2)))
}
}
(lhs, rhs) => Expr::Choice(Box::new(lhs), Box::new(rhs)),
},
expr => expr,
}
}),
}
}

View File

@@ -0,0 +1,41 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
use crate::ast::*;
pub fn list(rule: Rule) -> Rule {
let Rule { name, ty, expr } = rule;
Rule {
name,
ty,
expr: expr.map_bottom_up(|expr| {
match expr {
Expr::Seq(l, r) => match *l {
Expr::Rep(l) => {
let l = *l;
match l {
Expr::Seq(l1, l2) => {
// Converts `(rule ~ rest)* ~ rule` to `rule ~ (rest ~ rule)*`,
// avoiding matching the last `rule` twice.
if l1 == r {
Expr::Seq(l1, Box::new(Expr::Rep(Box::new(Expr::Seq(l2, r)))))
} else {
Expr::Seq(Box::new(Expr::Rep(Box::new(Expr::Seq(l1, l2)))), r)
}
}
expr => Expr::Seq(Box::new(Expr::Rep(Box::new(expr))), r),
}
}
expr => Expr::Seq(Box::new(expr), r),
},
expr => expr,
}
}),
}
}

1146
vendor/pest_meta/src/optimizer/mod.rs vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,153 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
use std::collections::HashMap;
use crate::optimizer::*;
pub fn restore_on_err(
rule: OptimizedRule,
rules: &HashMap<String, OptimizedExpr>,
) -> OptimizedRule {
let OptimizedRule { name, ty, expr } = rule;
let expr = expr.map_bottom_up(|expr| wrap_branching_exprs(expr, rules));
OptimizedRule { name, ty, expr }
}
fn wrap_branching_exprs(
expr: OptimizedExpr,
rules: &HashMap<String, OptimizedExpr>,
) -> OptimizedExpr {
match expr {
OptimizedExpr::Opt(expr) => {
if child_modifies_state(&expr, rules, &mut HashMap::new()) {
OptimizedExpr::Opt(Box::new(OptimizedExpr::RestoreOnErr(expr)))
} else {
OptimizedExpr::Opt(expr)
}
}
OptimizedExpr::Choice(lhs, rhs) => {
let wrapped_lhs = if child_modifies_state(&lhs, rules, &mut HashMap::new()) {
Box::new(OptimizedExpr::RestoreOnErr(lhs))
} else {
lhs
};
let wrapped_rhs = if child_modifies_state(&rhs, rules, &mut HashMap::new()) {
Box::new(OptimizedExpr::RestoreOnErr(rhs))
} else {
rhs
};
OptimizedExpr::Choice(wrapped_lhs, wrapped_rhs)
}
OptimizedExpr::Rep(expr) => {
if child_modifies_state(&expr, rules, &mut HashMap::new()) {
OptimizedExpr::Rep(Box::new(OptimizedExpr::RestoreOnErr(expr)))
} else {
OptimizedExpr::Rep(expr)
}
}
_ => expr,
}
}
fn child_modifies_state(
expr: &OptimizedExpr,
rules: &HashMap<String, OptimizedExpr>,
cache: &mut HashMap<String, Option<bool>>,
) -> bool {
expr.iter_top_down().any(|expr| match expr {
OptimizedExpr::Push(_) => true,
OptimizedExpr::Ident(ref name) if name == "DROP" => true,
OptimizedExpr::Ident(ref name) if name == "POP" => true,
OptimizedExpr::Ident(ref name) => match cache.get(name).cloned() {
Some(option) => match option {
Some(cached) => cached,
None => {
cache.insert(name.to_owned(), Some(false));
false
}
},
None => {
cache.insert(name.to_owned(), None);
let result = match rules.get(name) {
Some(expr) => child_modifies_state(expr, rules, cache),
None => false,
};
cache.insert(name.to_owned(), Some(result));
result
}
},
_ => false,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::optimizer::OptimizedExpr::*;
#[test]
fn restore_no_stack_children() {
let rules = vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Normal,
expr: box_tree!(Opt(Str("a".to_string()))),
}];
assert_eq!(
restore_on_err(rules[0].clone(), &to_optimized_hash_map(&rules)),
rules[0].clone()
);
}
#[test]
fn restore_with_child_stack_ops() {
let rules = vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Normal,
expr: box_tree!(Rep(Push(Str("a".to_string())))),
}];
let restored = OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Normal,
expr: box_tree!(Rep(RestoreOnErr(Push(Str("a".to_string()))))),
};
assert_eq!(
restore_on_err(rules[0].clone(), &to_optimized_hash_map(&rules)),
restored
);
}
#[test]
fn restore_choice_branch_with_and_branch_without() {
let rules = vec![OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Normal,
expr: box_tree!(Choice(Push(Str("a".to_string())), Str("a".to_string()))),
}];
let restored = OptimizedRule {
name: "rule".to_owned(),
ty: RuleType::Normal,
expr: box_tree!(Choice(
RestoreOnErr(Push(Str("a".to_string()))),
Str("a".to_string())
)),
};
assert_eq!(
restore_on_err(rules[0].clone(), &to_optimized_hash_map(&rules)),
restored
);
}
}

View File

@@ -0,0 +1,43 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
use crate::ast::*;
pub fn rotate(rule: Rule) -> Rule {
fn rotate_internal(expr: Expr) -> Expr {
match expr {
Expr::Seq(lhs, rhs) => {
let lhs = *lhs;
match lhs {
Expr::Seq(ll, lr) => {
rotate_internal(Expr::Seq(ll, Box::new(Expr::Seq(lr, rhs))))
}
lhs => Expr::Seq(Box::new(lhs), rhs),
}
}
Expr::Choice(lhs, rhs) => {
let lhs = *lhs;
match lhs {
Expr::Choice(ll, lr) => {
rotate_internal(Expr::Choice(ll, Box::new(Expr::Choice(lr, rhs))))
}
lhs => Expr::Choice(Box::new(lhs), rhs),
}
}
expr => expr,
}
}
let Rule { name, ty, expr } = rule;
Rule {
name,
ty,
expr: expr.map_top_down(rotate_internal),
}
}

View File

@@ -0,0 +1,76 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
use std::collections::HashMap;
use crate::ast::*;
pub fn skip(rule: Rule, map: &HashMap<String, Expr>) -> Rule {
fn populate_choices(
expr: Expr,
map: &HashMap<String, Expr>,
mut choices: Vec<String>,
) -> Option<Expr> {
match expr {
Expr::Choice(lhs, rhs) => {
if let Expr::Str(string) = *lhs {
choices.push(string);
populate_choices(*rhs, map, choices)
} else if let Expr::Ident(name) = *lhs {
// Try inlining rule in choices
if let Some(Expr::Skip(mut inlined_choices)) = map
.get(&name)
.and_then(|expr| populate_choices(expr.clone(), map, vec![]))
{
choices.append(&mut inlined_choices);
populate_choices(*rhs, map, choices)
} else {
None
}
} else {
None
}
}
Expr::Str(string) => {
choices.push(string);
Some(Expr::Skip(choices))
}
// Try inlining single rule
Expr::Ident(name) => map
.get(&name)
.and_then(|expr| populate_choices(expr.clone(), map, choices)),
_ => None,
}
}
let Rule { name, ty, expr } = rule;
Rule {
name,
ty,
expr: if ty == RuleType::Atomic {
expr.map_top_down(|expr| {
if let Expr::Rep(expr) = expr.clone() {
if let Expr::Seq(lhs, rhs) = *expr {
if let (Expr::NegPred(expr), Expr::Ident(ident)) = (*lhs, *rhs) {
if ident == "ANY" {
if let Some(expr) = populate_choices(*expr, map, vec![]) {
return expr;
}
}
}
}
};
expr
})
} else {
expr
},
}
}

View File

@@ -0,0 +1,67 @@
// pest. The Elegant Parser
// Copyright (c) 2018 Dragoș Tiselice
//
// Licensed under the Apache License, Version 2.0
// <LICENSE-APACHE or http://www.apache.org/licenses/LICENSE-2.0> or the MIT
// license <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. All files in the project carrying such notice may not be copied,
// modified, or distributed except according to those terms.
use crate::ast::*;
pub fn unroll(rule: Rule) -> Rule {
let Rule { name, ty, expr } = rule;
Rule {
name,
ty,
expr: expr.map_bottom_up(|expr| match expr {
#[cfg(not(feature = "grammar-extras"))]
Expr::RepOnce(expr) => Expr::Seq(expr.clone(), Box::new(Expr::Rep(expr))),
Expr::RepExact(expr, num) => (1..num + 1)
.map(|_| *expr.clone())
.rev()
.fold(None, |rep, expr| match rep {
None => Some(expr),
Some(rep) => Some(Expr::Seq(Box::new(expr), Box::new(rep))),
})
.unwrap(),
Expr::RepMin(expr, min) => (1..min + 2)
.map(|i| {
if i <= min {
*expr.clone()
} else {
Expr::Rep(expr.clone())
}
})
.rev()
.fold(None, |rep, expr| match rep {
None => Some(expr),
Some(rep) => Some(Expr::Seq(Box::new(expr), Box::new(rep))),
})
.unwrap(),
Expr::RepMax(expr, max) => (1..max + 1)
.map(|_| Expr::Opt(expr.clone()))
.rev()
.fold(None, |rep, expr| match rep {
None => Some(expr),
Some(rep) => Some(Expr::Seq(Box::new(expr), Box::new(rep))),
})
.unwrap(),
Expr::RepMinMax(expr, min, max) => (1..max + 1)
.map(|i| {
if i <= min {
*expr.clone()
} else {
Expr::Opt(expr.clone())
}
})
.rev()
.fold(None, |rep, expr| match rep {
None => Some(expr),
Some(rep) => Some(Expr::Seq(Box::new(expr), Box::new(rep))),
})
.unwrap(),
expr => expr,
}),
}
}

1847
vendor/pest_meta/src/parser.rs vendored Normal file

File diff suppressed because it is too large Load Diff

1967
vendor/pest_meta/src/validator.rs vendored Normal file

File diff suppressed because it is too large Load Diff