feat: implement storybook DSL with template composition and validation
Add complete domain-specific language for authoring narrative content for agent simulations. Features: - Complete parser using LALRPOP + logos lexer - Template composition (includes + multiple inheritance) - Strict mode validation for templates - Reserved keyword protection - Semantic validators (trait ranges, schedule overlaps, life arcs, behaviors) - Name resolution and cross-reference tracking - CLI tool (validate, inspect, query commands) - Query API with filtering - 260 comprehensive tests (unit, integration, property-based) Implementation phases: - Phase 1 (Parser): Complete - Phase 2 (Resolution + Validation): Complete - Phase 3 (Public API + CLI): Complete BREAKING CHANGE: Initial implementation
This commit is contained in:
282
src/syntax/ast.rs
Normal file
282
src/syntax/ast.rs
Normal file
@@ -0,0 +1,282 @@
|
||||
/// Source location for error reporting
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Span {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn new(start: usize, end: usize) -> Self {
|
||||
Self { start, end }
|
||||
}
|
||||
}
|
||||
|
||||
/// Top-level file containing multiple declarations
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct File {
|
||||
pub declarations: Vec<Declaration>,
|
||||
}
|
||||
|
||||
/// Any top-level declaration
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Declaration {
|
||||
Use(UseDecl),
|
||||
Character(Character),
|
||||
Template(Template),
|
||||
LifeArc(LifeArc),
|
||||
Schedule(Schedule),
|
||||
Behavior(Behavior),
|
||||
Institution(Institution),
|
||||
Relationship(Relationship),
|
||||
Location(Location),
|
||||
Species(Species),
|
||||
Enum(EnumDecl),
|
||||
}
|
||||
|
||||
/// Use statement for importing definitions
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct UseDecl {
|
||||
pub path: Vec<String>,
|
||||
pub kind: UseKind,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum UseKind {
|
||||
Single, // use foo::bar
|
||||
Grouped(Vec<String>), // use foo::{bar, baz}
|
||||
Wildcard, // use foo::*
|
||||
}
|
||||
|
||||
/// Character definition
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Character {
|
||||
pub name: String,
|
||||
pub fields: Vec<Field>,
|
||||
pub template: Option<Vec<String>>, // `from Template1, Template2`
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
/// Template definition (like Character but allows range values)
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Template {
|
||||
pub name: String,
|
||||
pub fields: Vec<Field>,
|
||||
pub strict: bool,
|
||||
pub includes: Vec<String>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
/// Field in a structured definition
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Field {
|
||||
pub name: String,
|
||||
pub value: Value,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
/// Field value types
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Value {
|
||||
Int(i64),
|
||||
Float(f64),
|
||||
String(String),
|
||||
Bool(bool),
|
||||
Range(Box<Value>, Box<Value>), // For templates: 20..40
|
||||
Time(Time),
|
||||
Duration(Duration),
|
||||
Identifier(Vec<String>), // Qualified path reference
|
||||
List(Vec<Value>),
|
||||
Object(Vec<Field>),
|
||||
ProseBlock(ProseBlock),
|
||||
Override(Override),
|
||||
}
|
||||
|
||||
/// Time literal (HH:MM or HH:MM:SS)
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Time {
|
||||
pub hour: u8,
|
||||
pub minute: u8,
|
||||
pub second: u8,
|
||||
}
|
||||
|
||||
/// Duration literal (e.g., 2h30m)
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Duration {
|
||||
pub hours: u32,
|
||||
pub minutes: u32,
|
||||
pub seconds: u32,
|
||||
}
|
||||
|
||||
/// Prose block with tag
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ProseBlock {
|
||||
pub tag: String,
|
||||
pub content: String,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
/// Override specification for template instantiation
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Override {
|
||||
pub base: Vec<String>, // Template path
|
||||
pub overrides: Vec<OverrideOp>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum OverrideOp {
|
||||
Set(Field), // field: value
|
||||
Remove(String), // remove field
|
||||
Append(Field), // append field
|
||||
}
|
||||
|
||||
/// Life arc state machine
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct LifeArc {
|
||||
pub name: String,
|
||||
pub states: Vec<ArcState>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ArcState {
|
||||
pub name: String,
|
||||
pub transitions: Vec<Transition>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Transition {
|
||||
pub to: String,
|
||||
pub condition: Expr,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
/// Schedule definition
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Schedule {
|
||||
pub name: String,
|
||||
pub blocks: Vec<ScheduleBlock>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ScheduleBlock {
|
||||
pub start: Time,
|
||||
pub end: Time,
|
||||
pub activity: String,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
/// Behavior tree definition
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Behavior {
|
||||
pub name: String,
|
||||
pub root: BehaviorNode,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum BehaviorNode {
|
||||
Selector(Vec<BehaviorNode>), // ? operator
|
||||
Sequence(Vec<BehaviorNode>), // > operator (context-dependent)
|
||||
Condition(Expr),
|
||||
Action(String, Vec<Field>), // Action name + parameters
|
||||
Decorator(String, Box<BehaviorNode>),
|
||||
SubTree(Vec<String>), // Reference to another behavior
|
||||
}
|
||||
|
||||
/// Institution definition
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Institution {
|
||||
pub name: String,
|
||||
pub fields: Vec<Field>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
/// Relationship definition
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Relationship {
|
||||
pub name: String,
|
||||
pub participants: Vec<Participant>,
|
||||
pub fields: Vec<Field>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Participant {
|
||||
pub role: Option<String>, // "as parent"
|
||||
pub name: Vec<String>, // Qualified path
|
||||
pub self_block: Option<Vec<Field>>,
|
||||
pub other_block: Option<Vec<Field>>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
/// Location definition
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Location {
|
||||
pub name: String,
|
||||
pub fields: Vec<Field>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
/// Species definition
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Species {
|
||||
pub name: String,
|
||||
pub fields: Vec<Field>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
/// Enum definition
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct EnumDecl {
|
||||
pub name: String,
|
||||
pub variants: Vec<String>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
/// Expression AST for conditions and queries
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Expr {
|
||||
IntLit(i64),
|
||||
FloatLit(f64),
|
||||
StringLit(String),
|
||||
BoolLit(bool),
|
||||
Identifier(Vec<String>),
|
||||
FieldAccess(Box<Expr>, String),
|
||||
Comparison(Box<Expr>, CompOp, Box<Expr>),
|
||||
Logical(Box<Expr>, LogicalOp, Box<Expr>),
|
||||
Unary(UnaryOp, Box<Expr>),
|
||||
Quantifier(QuantifierKind, String, Box<Expr>, Box<Expr>), /* forall/exists x in collection:
|
||||
* predicate */
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum CompOp {
|
||||
Eq, // ==
|
||||
Ne, // !=
|
||||
Lt, // <
|
||||
Le, // <=
|
||||
Gt, // >
|
||||
Ge, // >=
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum LogicalOp {
|
||||
And,
|
||||
Or,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum UnaryOp {
|
||||
Not,
|
||||
Neg,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub enum QuantifierKind {
|
||||
ForAll,
|
||||
Exists,
|
||||
}
|
||||
424
src/syntax/lexer.rs
Normal file
424
src/syntax/lexer.rs
Normal file
@@ -0,0 +1,424 @@
|
||||
use std::fmt;
|
||||
|
||||
use logos::Logos;
|
||||
|
||||
/// Token types for the Storybook language
|
||||
#[derive(Logos, Debug, Clone, PartialEq)]
|
||||
#[logos(skip r"[ \t\n\f]+")] // Skip whitespace
|
||||
#[logos(skip r"//[^\n]*")] // Skip line comments
|
||||
#[logos(skip r"/\*([^*]|\*[^/])*\*/")] // Skip block comments
|
||||
pub enum Token {
|
||||
// Keywords
|
||||
#[token("use")]
|
||||
Use,
|
||||
#[token("character")]
|
||||
Character,
|
||||
#[token("template")]
|
||||
Template,
|
||||
#[token("life_arc")]
|
||||
LifeArc,
|
||||
#[token("schedule")]
|
||||
Schedule,
|
||||
#[token("behavior")]
|
||||
Behavior,
|
||||
#[token("institution")]
|
||||
Institution,
|
||||
#[token("relationship")]
|
||||
Relationship,
|
||||
#[token("location")]
|
||||
Location,
|
||||
#[token("species")]
|
||||
Species,
|
||||
#[token("enum")]
|
||||
Enum,
|
||||
#[token("state")]
|
||||
State,
|
||||
#[token("on")]
|
||||
On,
|
||||
#[token("as")]
|
||||
As,
|
||||
#[token("self")]
|
||||
SelfKw,
|
||||
#[token("other")]
|
||||
Other,
|
||||
#[token("remove")]
|
||||
Remove,
|
||||
#[token("append")]
|
||||
Append,
|
||||
#[token("forall")]
|
||||
ForAll,
|
||||
#[token("exists")]
|
||||
Exists,
|
||||
#[token("in")]
|
||||
In,
|
||||
#[token("where")]
|
||||
Where,
|
||||
#[token("and")]
|
||||
And,
|
||||
#[token("or")]
|
||||
Or,
|
||||
#[token("not")]
|
||||
Not,
|
||||
#[token("strict")]
|
||||
Strict,
|
||||
#[token("include")]
|
||||
Include,
|
||||
#[token("from")]
|
||||
From,
|
||||
#[token("is")]
|
||||
Is,
|
||||
#[token("true")]
|
||||
True,
|
||||
#[token("false")]
|
||||
False,
|
||||
|
||||
// Identifiers and literals
|
||||
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string())]
|
||||
Ident(String),
|
||||
|
||||
#[regex(r"-?[0-9]+", |lex| lex.slice().parse::<i64>().ok())]
|
||||
IntLit(i64),
|
||||
|
||||
#[regex(r"-?[0-9]+\.[0-9]+", |lex| lex.slice().parse::<f64>().ok())]
|
||||
FloatLit(f64),
|
||||
|
||||
#[regex(r#""([^"\\]|\\.)*""#, |lex| {
|
||||
let s = lex.slice();
|
||||
s[1..s.len()-1].to_string()
|
||||
})]
|
||||
StringLit(String),
|
||||
|
||||
// Time literal: HH:MM or HH:MM:SS
|
||||
#[regex(r"[0-9]{2}:[0-9]{2}(:[0-9]{2})?", |lex| lex.slice().to_string())]
|
||||
TimeLit(String),
|
||||
|
||||
// Duration literal: e.g., 2h30m, 45m, 1h
|
||||
#[regex(r"[0-9]+[hms]([0-9]+[hms])*", |lex| lex.slice().to_string())]
|
||||
DurationLit(String),
|
||||
|
||||
// Punctuation
|
||||
#[token("{")]
|
||||
LBrace,
|
||||
#[token("}")]
|
||||
RBrace,
|
||||
#[token("(")]
|
||||
LParen,
|
||||
#[token(")")]
|
||||
RParen,
|
||||
#[token("[")]
|
||||
LBracket,
|
||||
#[token("]")]
|
||||
RBracket,
|
||||
#[token(":")]
|
||||
Colon,
|
||||
#[token("::")]
|
||||
ColonColon,
|
||||
#[token(";")]
|
||||
Semicolon,
|
||||
#[token(",")]
|
||||
Comma,
|
||||
#[token(".")]
|
||||
Dot,
|
||||
#[token("..")]
|
||||
DotDot,
|
||||
#[token("*")]
|
||||
Star,
|
||||
#[token("?")]
|
||||
Question,
|
||||
#[token("@")]
|
||||
At,
|
||||
|
||||
// Operators
|
||||
#[token(">")]
|
||||
Gt,
|
||||
#[token(">=")]
|
||||
Ge,
|
||||
#[token("<")]
|
||||
Lt,
|
||||
#[token("<=")]
|
||||
Le,
|
||||
#[token("->")]
|
||||
Arrow,
|
||||
|
||||
// Special markers
|
||||
#[token("---")]
|
||||
ProseMarker,
|
||||
|
||||
// Prose block (handled specially)
|
||||
ProseBlock(super::ast::ProseBlock),
|
||||
|
||||
// Error token
|
||||
Error,
|
||||
}
|
||||
|
||||
impl fmt::Display for Token {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self {
|
||||
| Token::Ident(s) => write!(f, "identifier '{}'", s),
|
||||
| Token::IntLit(n) => write!(f, "integer {}", n),
|
||||
| Token::FloatLit(n) => write!(f, "float {}", n),
|
||||
| Token::StringLit(s) => write!(f, "string \"{}\"", s),
|
||||
| Token::TimeLit(s) => write!(f, "time {}", s),
|
||||
| Token::DurationLit(s) => write!(f, "duration {}", s),
|
||||
| Token::ProseBlock(pb) => write!(f, "prose block ---{}", pb.tag),
|
||||
| _ => write!(f, "{:?}", self),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Lexer state machine for handling prose blocks
|
||||
#[derive(Debug, Clone)]
|
||||
enum LexerState {
|
||||
Normal,
|
||||
ProseTag, // After seeing first ---
|
||||
ProseContent(String, usize), // Tag + content start position
|
||||
}
|
||||
|
||||
/// Wrapper lexer that handles two-mode scanning
|
||||
pub struct Lexer<'a> {
|
||||
source: &'a str,
|
||||
position: usize,
|
||||
state: LexerState,
|
||||
normal_lexer: Option<logos::Lexer<'a, Token>>,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(source: &'a str) -> Self {
|
||||
Self {
|
||||
source,
|
||||
position: 0,
|
||||
state: LexerState::Normal,
|
||||
normal_lexer: Some(Token::lexer(source)),
|
||||
}
|
||||
}
|
||||
|
||||
fn scan_prose_tag(&mut self) -> Option<(usize, Token, usize)> {
|
||||
let _start = self.position;
|
||||
self.position += 3; // Skip ---
|
||||
|
||||
// Skip whitespace
|
||||
while self.position < self.source.len() &&
|
||||
self.source[self.position..].starts_with(|c: char| c.is_whitespace())
|
||||
{
|
||||
self.position += 1;
|
||||
}
|
||||
|
||||
// Read tag until whitespace or newline
|
||||
let tag_start = self.position;
|
||||
while self.position < self.source.len() {
|
||||
let ch = self.source[self.position..].chars().next().unwrap();
|
||||
if ch.is_whitespace() {
|
||||
break;
|
||||
}
|
||||
self.position += ch.len_utf8();
|
||||
}
|
||||
|
||||
let tag = self.source[tag_start..self.position].to_string();
|
||||
|
||||
// Skip to end of line
|
||||
while self.position < self.source.len() {
|
||||
let ch = self.source[self.position..].chars().next().unwrap();
|
||||
if ch == '\n' {
|
||||
self.position += 1;
|
||||
break;
|
||||
}
|
||||
self.position += ch.len_utf8();
|
||||
}
|
||||
|
||||
self.state = LexerState::ProseContent(tag, self.position);
|
||||
self.next()
|
||||
}
|
||||
|
||||
fn scan_prose_content(
|
||||
&mut self,
|
||||
tag: String,
|
||||
content_start: usize,
|
||||
) -> Option<(usize, Token, usize)> {
|
||||
let remaining = &self.source[content_start..];
|
||||
let mut byte_offset = 0;
|
||||
|
||||
// Scan until we find closing ---
|
||||
while byte_offset < remaining.len() {
|
||||
if remaining[byte_offset..].starts_with("---") {
|
||||
// Check if it's at start of line (or after whitespace)
|
||||
let is_line_start = byte_offset == 0 ||
|
||||
remaining[..byte_offset]
|
||||
.chars()
|
||||
.rev()
|
||||
.take_while(|&c| c != '\n')
|
||||
.all(|c| c.is_whitespace());
|
||||
|
||||
if is_line_start {
|
||||
// Found closing marker
|
||||
let content_end = content_start + byte_offset;
|
||||
let content = self.source[content_start..content_end]
|
||||
.trim_end()
|
||||
.to_string();
|
||||
let start = content_start.saturating_sub(tag.len() + 4); // Include opening ---tag
|
||||
self.position = content_end + 3; // Skip closing ---
|
||||
self.state = LexerState::Normal;
|
||||
self.normal_lexer = Some(Token::lexer(&self.source[self.position..]));
|
||||
|
||||
let prose_block = super::ast::ProseBlock {
|
||||
tag,
|
||||
content,
|
||||
span: super::ast::Span::new(start, self.position),
|
||||
};
|
||||
return Some((start, Token::ProseBlock(prose_block), self.position));
|
||||
}
|
||||
}
|
||||
|
||||
// Advance by one UTF-8 character to avoid char boundary issues
|
||||
if let Some(ch) = remaining[byte_offset..].chars().next() {
|
||||
byte_offset += ch.len_utf8();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// EOF reached without closing marker - treat as error
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Lexer<'a> {
|
||||
type Item = (usize, Token, usize);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match &self.state {
|
||||
| LexerState::Normal => {
|
||||
let lexer = self.normal_lexer.as_mut()?;
|
||||
|
||||
let token = lexer.next()?;
|
||||
let span = lexer.span();
|
||||
|
||||
match token {
|
||||
| Ok(Token::ProseMarker) => {
|
||||
// Switch to prose mode
|
||||
let marker_pos = span.start;
|
||||
self.position = marker_pos;
|
||||
self.state = LexerState::ProseTag;
|
||||
self.normal_lexer = None;
|
||||
self.scan_prose_tag()
|
||||
},
|
||||
| Ok(tok) => {
|
||||
self.position = span.end;
|
||||
Some((span.start, tok, span.end))
|
||||
},
|
||||
| Err(_) => {
|
||||
self.position = span.end;
|
||||
Some((span.start, Token::Error, span.end))
|
||||
},
|
||||
}
|
||||
},
|
||||
| LexerState::ProseTag => {
|
||||
// Should not happen - scan_prose_tag transitions state
|
||||
None
|
||||
},
|
||||
| LexerState::ProseContent(tag, content_start) => {
|
||||
let tag = tag.clone();
|
||||
let content_start = *content_start;
|
||||
self.scan_prose_content(tag, content_start)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_basic_tokens() {
|
||||
let input = "character Martha { age: 34 }";
|
||||
let lexer = Lexer::new(input);
|
||||
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
|
||||
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Token::Character,
|
||||
Token::Ident("Martha".to_string()),
|
||||
Token::LBrace,
|
||||
Token::Ident("age".to_string()),
|
||||
Token::Colon,
|
||||
Token::IntLit(34),
|
||||
Token::RBrace,
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_prose_block() {
|
||||
let input = r#"
|
||||
---backstory
|
||||
Martha grew up in a small town.
|
||||
She loved baking from a young age.
|
||||
---
|
||||
"#;
|
||||
let lexer = Lexer::new(input.trim());
|
||||
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
|
||||
|
||||
assert_eq!(tokens.len(), 1);
|
||||
match &tokens[0] {
|
||||
| Token::ProseBlock(pb) => {
|
||||
assert_eq!(pb.tag, "backstory");
|
||||
assert!(pb.content.contains("Martha grew up"));
|
||||
assert!(pb.content.contains("young age"));
|
||||
},
|
||||
| _ => panic!("Expected ProseBlock, got {:?}", tokens[0]),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_prose_with_dashes_in_content() {
|
||||
let input = r#"
|
||||
---description
|
||||
She was well-known for her kind-hearted nature.
|
||||
The bakery had a no-nonsense policy.
|
||||
---
|
||||
"#;
|
||||
let lexer = Lexer::new(input.trim());
|
||||
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
|
||||
|
||||
assert_eq!(tokens.len(), 1);
|
||||
match &tokens[0] {
|
||||
| Token::ProseBlock(pb) => {
|
||||
assert_eq!(pb.tag, "description");
|
||||
assert!(pb.content.contains("well-known"));
|
||||
assert!(pb.content.contains("kind-hearted"));
|
||||
assert!(pb.content.contains("no-nonsense"));
|
||||
},
|
||||
| _ => panic!("Expected ProseBlock"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_time_duration_literals() {
|
||||
let input = "08:30 14:45:00 2h30m 45m";
|
||||
let lexer = Lexer::new(input);
|
||||
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
|
||||
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![
|
||||
Token::TimeLit("08:30".to_string()),
|
||||
Token::TimeLit("14:45:00".to_string()),
|
||||
Token::DurationLit("2h30m".to_string()),
|
||||
Token::DurationLit("45m".to_string()),
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_range_syntax() {
|
||||
let input = "20..40";
|
||||
let lexer = Lexer::new(input);
|
||||
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
|
||||
|
||||
assert_eq!(
|
||||
tokens,
|
||||
vec![Token::IntLit(20), Token::DotDot, Token::IntLit(40),]
|
||||
);
|
||||
}
|
||||
}
|
||||
56
src/syntax/mod.rs
Normal file
56
src/syntax/mod.rs
Normal file
@@ -0,0 +1,56 @@
|
||||
#![allow(unused_assignments)] // False positives in error enum fields used by thiserror
|
||||
|
||||
pub mod ast;
|
||||
pub mod lexer;
|
||||
|
||||
// Parser is generated by LALRPOP
|
||||
#[allow(clippy::all)]
|
||||
#[allow(unused)]
|
||||
mod parser;
|
||||
|
||||
pub use parser::FileParser;
|
||||
|
||||
#[cfg(test)]
|
||||
mod prop_tests;
|
||||
|
||||
use miette::Diagnostic;
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug, Diagnostic)]
|
||||
pub enum ParseError {
|
||||
#[error("Unexpected token: {token}")]
|
||||
#[diagnostic(help("Check for syntax errors like missing braces, colons, or semicolons. Common issues: forgetting ':' after field names, missing '}}' to close a block, or using reserved keywords as names."))]
|
||||
UnexpectedToken {
|
||||
#[allow(dead_code)]
|
||||
#[allow(unused_assignments)]
|
||||
token: String,
|
||||
#[label("unexpected token here")]
|
||||
span: miette::SourceSpan,
|
||||
},
|
||||
|
||||
#[error("Unexpected end of file")]
|
||||
#[diagnostic(help("The file ended before a declaration was complete. Check that all blocks are properly closed with '}}', all strings are closed with quotes, and all prose blocks end with '---'."))]
|
||||
UnexpectedEof {
|
||||
#[label("file ended here, but expected more input")]
|
||||
span: miette::SourceSpan,
|
||||
},
|
||||
|
||||
#[error("Invalid token")]
|
||||
#[diagnostic(help("This character or sequence is not valid in Storybook syntax. Common issues: special characters in names (use letters, numbers, and underscores only), unescaped quotes in strings, or invalid time formats."))]
|
||||
InvalidToken {
|
||||
#[label("invalid token here")]
|
||||
span: miette::SourceSpan,
|
||||
},
|
||||
|
||||
#[error("Unclosed prose block starting with ---{tag}")]
|
||||
#[diagnostic(help("Prose blocks must be closed with '---' on its own line. Make sure the closing '---' is at the start of a line with no other text before it."))]
|
||||
UnclosedProseBlock {
|
||||
#[allow(dead_code)]
|
||||
#[allow(unused_assignments)]
|
||||
tag: String,
|
||||
#[label("prose block starts here but never closes")]
|
||||
span: miette::SourceSpan,
|
||||
},
|
||||
}
|
||||
|
||||
pub type ParseResult<T> = Result<T, ParseError>;
|
||||
520
src/syntax/parser.lalrpop
Normal file
520
src/syntax/parser.lalrpop
Normal file
@@ -0,0 +1,520 @@
|
||||
use crate::syntax::ast::*;
|
||||
use crate::syntax::lexer::Token;
|
||||
|
||||
grammar;
|
||||
|
||||
// ===== Top-level =====
|
||||
|
||||
pub File: File = {
|
||||
<declarations:Declaration*> => File { declarations }
|
||||
};
|
||||
|
||||
Declaration: Declaration = {
|
||||
<u:UseDecl> => Declaration::Use(u),
|
||||
<c:Character> => Declaration::Character(c),
|
||||
<t:Template> => Declaration::Template(t),
|
||||
<l:LifeArc> => Declaration::LifeArc(l),
|
||||
<s:Schedule> => Declaration::Schedule(s),
|
||||
<b:Behavior> => Declaration::Behavior(b),
|
||||
<i:Institution> => Declaration::Institution(i),
|
||||
<r:Relationship> => Declaration::Relationship(r),
|
||||
<loc:Location> => Declaration::Location(loc),
|
||||
<sp:Species> => Declaration::Species(sp),
|
||||
<e:EnumDecl> => Declaration::Enum(e),
|
||||
};
|
||||
|
||||
// ===== Use declarations =====
|
||||
|
||||
UseDecl: UseDecl = {
|
||||
"use" <path:Path> ";" => UseDecl {
|
||||
path,
|
||||
kind: UseKind::Single,
|
||||
span: Span::new(0, 0), // TODO: track actual spans
|
||||
},
|
||||
"use" <base:PathSegments> "::" "{" <items:Comma<Ident>> "}" ";" => UseDecl {
|
||||
path: base,
|
||||
kind: UseKind::Grouped(items),
|
||||
span: Span::new(0, 0),
|
||||
},
|
||||
"use" <path:PathSegments> "::" "*" ";" => UseDecl {
|
||||
path,
|
||||
kind: UseKind::Wildcard,
|
||||
span: Span::new(0, 0),
|
||||
},
|
||||
};
|
||||
|
||||
Path: Vec<String> = {
|
||||
<PathSegments>
|
||||
};
|
||||
|
||||
PathSegments: Vec<String> = {
|
||||
<Ident> => vec![<>],
|
||||
<mut v:PathSegments> "::" <i:Ident> => {
|
||||
v.push(i);
|
||||
v
|
||||
}
|
||||
};
|
||||
|
||||
// ===== Character =====
|
||||
|
||||
Character: Character = {
|
||||
"character" <name:Ident> <template:TemplateClause?> "{" <fields:Field*> "}" => Character {
|
||||
name,
|
||||
fields,
|
||||
template,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
TemplateClause: Vec<String> = {
|
||||
"from" <t:Ident> <rest:("," <Ident>)*> => {
|
||||
let mut templates = vec![t];
|
||||
templates.extend(rest);
|
||||
templates
|
||||
}
|
||||
};
|
||||
|
||||
// ===== Template =====
|
||||
|
||||
Template: Template = {
|
||||
"template" <name:Ident> <strict:"strict"?> "{" <includes:Include*> <fields:Field*> "}" => Template {
|
||||
name,
|
||||
fields,
|
||||
strict: strict.is_some(),
|
||||
includes,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
Include: String = {
|
||||
"include" <name:Ident> => name
|
||||
};
|
||||
|
||||
// ===== Fields =====
|
||||
|
||||
Field: Field = {
|
||||
<name:Ident> ":" <value:Value> => Field {
|
||||
name,
|
||||
value,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
Value: Value = {
|
||||
<IntLit> => Value::Int(<>),
|
||||
<FloatLit> => Value::Float(<>),
|
||||
<StringLit> => Value::String(<>),
|
||||
<BoolLit> => Value::Bool(<>),
|
||||
<lo:IntLit> ".." <hi:IntLit> => Value::Range(
|
||||
Box::new(Value::Int(lo)),
|
||||
Box::new(Value::Int(hi))
|
||||
),
|
||||
<lo:FloatLit> ".." <hi:FloatLit> => Value::Range(
|
||||
Box::new(Value::Float(lo)),
|
||||
Box::new(Value::Float(hi))
|
||||
),
|
||||
<t:Time> => Value::Time(t),
|
||||
<d:Duration> => Value::Duration(d),
|
||||
<p:Path> => Value::Identifier(p),
|
||||
<ProseBlock> => Value::ProseBlock(<>),
|
||||
"[" <values:Comma<Value>> "]" => Value::List(values),
|
||||
"{" <fields:Field*> "}" => Value::Object(fields),
|
||||
<Override> => Value::Override(<>),
|
||||
};
|
||||
|
||||
BoolLit: bool = {
|
||||
"true" => true,
|
||||
"false" => false,
|
||||
};
|
||||
|
||||
Time: Time = {
|
||||
<s:TimeLit> => {
|
||||
let parts: Vec<&str> = s.split(':').collect();
|
||||
let hour = parts[0].parse().unwrap_or(0);
|
||||
let minute = parts[1].parse().unwrap_or(0);
|
||||
let second = if parts.len() > 2 {
|
||||
parts[2].parse().unwrap_or(0)
|
||||
} else {
|
||||
0
|
||||
};
|
||||
Time { hour, minute, second }
|
||||
}
|
||||
};
|
||||
|
||||
Duration: Duration = {
|
||||
<s:DurationLit> => {
|
||||
let mut hours = 0;
|
||||
let mut minutes = 0;
|
||||
let mut seconds = 0;
|
||||
|
||||
let mut num = String::new();
|
||||
for ch in s.chars() {
|
||||
if ch.is_ascii_digit() {
|
||||
num.push(ch);
|
||||
} else {
|
||||
let val: u32 = num.parse().unwrap_or(0);
|
||||
match ch {
|
||||
'h' => hours = val,
|
||||
'm' => minutes = val,
|
||||
's' => seconds = val,
|
||||
_ => {}
|
||||
}
|
||||
num.clear();
|
||||
}
|
||||
}
|
||||
|
||||
Duration { hours, minutes, seconds }
|
||||
}
|
||||
};
|
||||
|
||||
ProseBlock: ProseBlock = {
|
||||
ProseBlockToken
|
||||
};
|
||||
|
||||
Override: Override = {
|
||||
"@" <base:Path> "{" <overrides:OverrideOp*> "}" => Override {
|
||||
base,
|
||||
overrides,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
OverrideOp: OverrideOp = {
|
||||
"remove" <name:Ident> => OverrideOp::Remove(name),
|
||||
"append" <f:Field> => OverrideOp::Append(f),
|
||||
<f:Field> => OverrideOp::Set(f),
|
||||
};
|
||||
|
||||
// ===== Life Arc =====
|
||||
|
||||
LifeArc: LifeArc = {
|
||||
"life_arc" <name:Ident> "{" <states:ArcState*> "}" => LifeArc {
|
||||
name,
|
||||
states,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
ArcState: ArcState = {
|
||||
"state" <name:Ident> "{" <transitions:Transition*> "}" => ArcState {
|
||||
name,
|
||||
transitions,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
Transition: Transition = {
|
||||
"on" <cond:Expr> "->" <to:Ident> => Transition {
|
||||
to,
|
||||
condition: cond,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
// ===== Schedule =====
|
||||
|
||||
Schedule: Schedule = {
|
||||
"schedule" <name:Ident> "{" <blocks:ScheduleBlock*> "}" => Schedule {
|
||||
name,
|
||||
blocks,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
ScheduleBlock: ScheduleBlock = {
|
||||
<start:Time> "->" <end:Time> ":" <activity:Ident> => ScheduleBlock {
|
||||
start,
|
||||
end,
|
||||
activity,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
// ===== Behavior Trees =====
|
||||
|
||||
Behavior: Behavior = {
|
||||
"behavior" <name:Ident> "{" <root:BehaviorNode> "}" => Behavior {
|
||||
name,
|
||||
root,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
BehaviorNode: BehaviorNode = {
|
||||
<SelectorNode>,
|
||||
<SequenceNode>,
|
||||
<ActionNode>,
|
||||
<SubTreeNode>,
|
||||
};
|
||||
|
||||
SelectorNode: BehaviorNode = {
|
||||
"?" "{" <nodes:BehaviorNode+> "}" => BehaviorNode::Selector(nodes),
|
||||
};
|
||||
|
||||
SequenceNode: BehaviorNode = {
|
||||
">" "{" <nodes:BehaviorNode+> "}" => BehaviorNode::Sequence(nodes),
|
||||
};
|
||||
|
||||
ActionNode: BehaviorNode = {
|
||||
<name:Ident> "(" <params:Comma<Field>> ")" => BehaviorNode::Action(name, params),
|
||||
<name:Ident> => BehaviorNode::Action(name, vec![]),
|
||||
};
|
||||
|
||||
SubTreeNode: BehaviorNode = {
|
||||
"@" <path:Path> => BehaviorNode::SubTree(path),
|
||||
};
|
||||
|
||||
// ===== Institution =====
|
||||
|
||||
Institution: Institution = {
|
||||
"institution" <name:Ident> "{" <fields:Field*> "}" => Institution {
|
||||
name,
|
||||
fields,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
// ===== Relationship =====
|
||||
|
||||
Relationship: Relationship = {
|
||||
"relationship" <name:Ident> "{" <participants:Participant+> <fields:Field*> "}" => Relationship {
|
||||
name,
|
||||
participants,
|
||||
fields,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
Participant: Participant = {
|
||||
<name:Path> <role:("as" <Ident>)?> <self_block:SelfBlock?> <other_block:OtherBlock?> => Participant {
|
||||
role,
|
||||
name,
|
||||
self_block,
|
||||
other_block,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
SelfBlock: Vec<Field> = {
|
||||
"self" "{" <fields:Field*> "}" => fields
|
||||
};
|
||||
|
||||
OtherBlock: Vec<Field> = {
|
||||
"other" "{" <fields:Field*> "}" => fields
|
||||
};
|
||||
|
||||
// ===== Location =====
|
||||
|
||||
Location: Location = {
|
||||
"location" <name:Ident> "{" <fields:Field*> "}" => Location {
|
||||
name,
|
||||
fields,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
// ===== Species =====
|
||||
|
||||
Species: Species = {
|
||||
"species" <name:Ident> "{" <fields:Field*> "}" => Species {
|
||||
name,
|
||||
fields,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
// ===== Enum =====
|
||||
|
||||
EnumDecl: EnumDecl = {
|
||||
"enum" <name:Ident> "{" <variants:Comma<Ident>> "}" => EnumDecl {
|
||||
name,
|
||||
variants,
|
||||
span: Span::new(0, 0),
|
||||
}
|
||||
};
|
||||
|
||||
// ===== Expressions =====
|
||||
// Expression grammar with proper precedence:
|
||||
// or > and > not > field_access > comparison > term
|
||||
|
||||
Expr: Expr = {
|
||||
<OrExpr>,
|
||||
};
|
||||
|
||||
// Logical OR (lowest precedence)
|
||||
OrExpr: Expr = {
|
||||
<left:OrExpr> "or" <right:AndExpr> => {
|
||||
Expr::Logical(
|
||||
Box::new(left),
|
||||
LogicalOp::Or,
|
||||
Box::new(right)
|
||||
)
|
||||
},
|
||||
<AndExpr>,
|
||||
};
|
||||
|
||||
// Logical AND
|
||||
AndExpr: Expr = {
|
||||
<left:AndExpr> "and" <right:NotExpr> => {
|
||||
Expr::Logical(
|
||||
Box::new(left),
|
||||
LogicalOp::And,
|
||||
Box::new(right)
|
||||
)
|
||||
},
|
||||
<NotExpr>,
|
||||
};
|
||||
|
||||
// Unary NOT
|
||||
NotExpr: Expr = {
|
||||
"not" <expr:NotExpr> => {
|
||||
Expr::Unary(
|
||||
UnaryOp::Not,
|
||||
Box::new(expr)
|
||||
)
|
||||
},
|
||||
<ComparisonExpr>,
|
||||
};
|
||||
|
||||
// Comparison expressions
|
||||
ComparisonExpr: Expr = {
|
||||
// Equality: field access or path is (literal or identifier)
|
||||
<left:FieldAccessExpr> "is" <right:FieldAccessExpr> => {
|
||||
Expr::Comparison(
|
||||
Box::new(left),
|
||||
CompOp::Eq,
|
||||
Box::new(right)
|
||||
)
|
||||
},
|
||||
// Comparison: field access or path > literal/identifier, etc.
|
||||
<left:FieldAccessExpr> <op:InequalityOp> <right:FieldAccessExpr> => {
|
||||
Expr::Comparison(
|
||||
Box::new(left),
|
||||
op,
|
||||
Box::new(right)
|
||||
)
|
||||
},
|
||||
// Just a field access expression
|
||||
<FieldAccessExpr>,
|
||||
};
|
||||
|
||||
// Field access with dot notation (binds tightest)
|
||||
FieldAccessExpr: Expr = {
|
||||
<base:FieldAccessExpr> "." <field:Ident> => {
|
||||
Expr::FieldAccess(
|
||||
Box::new(base),
|
||||
field
|
||||
)
|
||||
},
|
||||
<PrimaryExpr>,
|
||||
};
|
||||
|
||||
// Primary expressions (atoms)
|
||||
PrimaryExpr: Expr = {
|
||||
"self" => Expr::Identifier(vec!["self".to_string()]),
|
||||
"other" => Expr::Identifier(vec!["other".to_string()]),
|
||||
<Literal>,
|
||||
<Path> => Expr::Identifier(<>),
|
||||
};
|
||||
|
||||
InequalityOp: CompOp = {
|
||||
">" => CompOp::Gt,
|
||||
">=" => CompOp::Ge,
|
||||
"<" => CompOp::Lt,
|
||||
"<=" => CompOp::Le,
|
||||
};
|
||||
|
||||
Literal: Expr = {
|
||||
<IntLit> => Expr::IntLit(<>),
|
||||
<FloatLit> => Expr::FloatLit(<>),
|
||||
<StringLit> => Expr::StringLit(<>),
|
||||
<BoolLit> => Expr::BoolLit(<>),
|
||||
};
|
||||
|
||||
// ===== Helpers =====
|
||||
|
||||
Comma<T>: Vec<T> = {
|
||||
<v:(<T> ",")*> <e:T?> => match e {
|
||||
None => v,
|
||||
Some(e) => {
|
||||
let mut v = v;
|
||||
v.push(e);
|
||||
v
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// ===== Token conversion =====
|
||||
|
||||
extern {
|
||||
type Location = usize;
|
||||
type Error = crate::syntax::ParseError;
|
||||
|
||||
enum Token {
|
||||
// Keywords
|
||||
"use" => Token::Use,
|
||||
"character" => Token::Character,
|
||||
"template" => Token::Template,
|
||||
"life_arc" => Token::LifeArc,
|
||||
"schedule" => Token::Schedule,
|
||||
"behavior" => Token::Behavior,
|
||||
"institution" => Token::Institution,
|
||||
"relationship" => Token::Relationship,
|
||||
"location" => Token::Location,
|
||||
"species" => Token::Species,
|
||||
"enum" => Token::Enum,
|
||||
"state" => Token::State,
|
||||
"on" => Token::On,
|
||||
"as" => Token::As,
|
||||
"self" => Token::SelfKw,
|
||||
"other" => Token::Other,
|
||||
"remove" => Token::Remove,
|
||||
"append" => Token::Append,
|
||||
"forall" => Token::ForAll,
|
||||
"exists" => Token::Exists,
|
||||
"in" => Token::In,
|
||||
"where" => Token::Where,
|
||||
"and" => Token::And,
|
||||
"or" => Token::Or,
|
||||
"not" => Token::Not,
|
||||
"strict" => Token::Strict,
|
||||
"include" => Token::Include,
|
||||
"from" => Token::From,
|
||||
"is" => Token::Is,
|
||||
"true" => Token::True,
|
||||
"false" => Token::False,
|
||||
|
||||
// Literals
|
||||
Ident => Token::Ident(<String>),
|
||||
IntLit => Token::IntLit(<i64>),
|
||||
FloatLit => Token::FloatLit(<f64>),
|
||||
StringLit => Token::StringLit(<String>),
|
||||
TimeLit => Token::TimeLit(<String>),
|
||||
DurationLit => Token::DurationLit(<String>),
|
||||
ProseBlockToken => Token::ProseBlock(<ProseBlock>),
|
||||
|
||||
// Punctuation
|
||||
"{" => Token::LBrace,
|
||||
"}" => Token::RBrace,
|
||||
"(" => Token::LParen,
|
||||
")" => Token::RParen,
|
||||
"[" => Token::LBracket,
|
||||
"]" => Token::RBracket,
|
||||
":" => Token::Colon,
|
||||
"::" => Token::ColonColon,
|
||||
";" => Token::Semicolon,
|
||||
"," => Token::Comma,
|
||||
"." => Token::Dot,
|
||||
".." => Token::DotDot,
|
||||
"*" => Token::Star,
|
||||
"?" => Token::Question,
|
||||
"@" => Token::At,
|
||||
|
||||
// Operators
|
||||
">" => Token::Gt,
|
||||
">=" => Token::Ge,
|
||||
"<" => Token::Lt,
|
||||
"<=" => Token::Le,
|
||||
"->" => Token::Arrow,
|
||||
}
|
||||
}
|
||||
10846
src/syntax/parser.rs
Normal file
10846
src/syntax/parser.rs
Normal file
File diff suppressed because it is too large
Load Diff
1441
src/syntax/prop_tests.rs
Normal file
1441
src/syntax/prop_tests.rs
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user