Files
storybook/src/syntax/lexer.rs
Sienna Meridian Satterwhite 37793cea0d fix(deps): update dependencies and fix all compilation errors
Updated dependencies to latest versions causing breaking changes:
- logos: 0.14 -> 0.16
- lalrpop: 0.21 -> 0.23
- thiserror: 1.0 -> 2.0
- petgraph: 0.6 -> 0.8
- notify: 6.0 -> 8
- toml: 0.8 -> 1.0.2
- tree-sitter (grammar): 0.20 -> 0.26

Fixed compilation issues:
1. logos 0.16: Added allow_greedy for unbounded repetitions in lexer
2. lalrpop 0.23: Changed from process_current_dir() to process()
3. tree-sitter 0.26: Updated bindings to use &Language reference

Also fixed Zed extension:
- Removed local highlights.scm override that had diverged from source
- Added regression test to prevent future divergence
2026-02-16 23:49:29 +00:00

539 lines
15 KiB
Rust

use std::fmt;
use logos::Logos;
/// Token types for the Storybook language
#[derive(Logos, Debug, Clone, PartialEq)]
#[logos(skip r"[ \t\n\f]+")] // Skip whitespace
#[allow(clippy::duplicated_attributes)]
#[logos(skip(r"//[^\n]*", allow_greedy = true))] // Skip line comments
#[logos(skip(r"/\*([^*]|\*[^/])*\*/", allow_greedy = true))] // Skip block comments
pub enum Token {
// Keywords
#[token("use")]
Use,
#[token("character")]
Character,
#[token("template")]
Template,
#[token("life_arc")]
LifeArc,
#[token("schedule")]
Schedule,
#[token("behavior")]
Behavior,
#[token("institution")]
Institution,
#[token("relationship")]
Relationship,
#[token("location")]
Location,
#[token("species")]
Species,
#[token("concept")]
Concept,
#[token("sub_concept")]
SubConcept,
#[token("concept_comparison")]
ConceptComparison,
#[token("any")]
Any,
#[token("requires")]
Requires,
#[token("state")]
State,
#[token("on")]
On,
#[token("enter")]
Enter,
#[token("as")]
As,
#[token("self")]
SelfKw,
#[token("other")]
Other,
#[token("remove")]
Remove,
#[token("append")]
Append,
#[token("forall")]
ForAll,
#[token("exists")]
Exists,
#[token("in")]
In,
#[token("where")]
Where,
#[token("and")]
And,
#[token("or")]
Or,
#[token("not")]
Not,
#[token("strict")]
Strict,
#[token("include")]
Include,
#[token("from")]
From,
#[token("is")]
Is,
#[token("uses")]
Uses,
#[token("behaviors")]
Behaviors,
#[token("schedules")]
Schedules,
#[token("tree")]
Tree,
#[token("priority")]
Priority,
#[token("modifies")]
Modifies,
#[token("override")]
Override,
#[token("recurrence")]
Recurrence,
#[token("season")]
Season,
#[token("block")]
Block,
#[token("true")]
True,
#[token("false")]
False,
// Behavior tree keywords
#[token("choose")]
Choose,
#[token("then")]
Then,
#[token("if")]
If,
#[token("when")]
When,
#[token("repeat")]
Repeat,
#[token("invert")]
Invert,
#[token("retry")]
Retry,
#[token("timeout")]
Timeout,
#[token("cooldown")]
Cooldown,
// "guard" keyword removed - use "if" instead (Token::If)
#[token("succeed_always")]
SucceedAlways,
#[token("fail_always")]
FailAlways,
// Identifiers and literals
#[regex(r"[a-zA-Z_][a-zA-Z0-9_]*", |lex| lex.slice().to_string())]
Ident(String),
#[regex(r"-?[0-9]+", |lex| lex.slice().parse::<i64>().ok())]
NumberLit(i64),
#[regex(r"-?[0-9]+\.[0-9]+", |lex| lex.slice().parse::<f64>().ok())]
DecimalLit(f64),
#[regex(r#""([^"\\]|\\.)*""#, |lex| {
let s = lex.slice();
s[1..s.len()-1].to_string()
})]
TextLit(String),
// Time literal: HH:MM or HH:MM:SS
#[regex(r"[0-9]{2}:[0-9]{2}(:[0-9]{2})?", |lex| lex.slice().to_string())]
TimeLit(String),
// Duration literal: e.g., 2h30m, 45m, 1h
#[regex(r"[0-9]+[hms]([0-9]+[hms])*", |lex| lex.slice().to_string())]
DurationLit(String),
// Punctuation
#[token("{")]
LBrace,
#[token("}")]
RBrace,
#[token("(")]
LParen,
#[token(")")]
RParen,
#[token("[")]
LBracket,
#[token("]")]
RBracket,
#[token(":")]
Colon,
#[token("::")]
ColonColon,
#[token(";")]
Semicolon,
#[token(",")]
Comma,
#[token(".")]
Dot,
#[token("..")]
DotDot,
#[token("*")]
Star,
#[token("?")]
Question,
#[token("@")]
At,
// Operators
#[token(">")]
Gt,
#[token(">=")]
Ge,
#[token("<")]
Lt,
#[token("<=")]
Le,
#[token("->")]
Arrow,
// Special markers
#[token("---")]
ProseMarker,
// Prose block (handled specially)
ProseBlock(super::ast::ProseBlock),
// Error token
Error,
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
| Token::Ident(s) => write!(f, "identifier '{}'", s),
| Token::NumberLit(n) => write!(f, "number {}", n),
| Token::DecimalLit(n) => write!(f, "decimal {}", n),
| Token::TextLit(s) => write!(f, "text \"{}\"", s),
| Token::TimeLit(s) => write!(f, "time {}", s),
| Token::DurationLit(s) => write!(f, "duration {}", s),
| Token::ProseBlock(pb) => write!(f, "prose block ---{}", pb.tag),
| _ => write!(f, "{:?}", self),
}
}
}
/// Lexer state machine for handling prose blocks
#[derive(Debug, Clone)]
enum LexerState {
Normal,
ProseTag, // After seeing first ---
ProseContent(String, usize), // Tag + content start position
}
/// Wrapper lexer that handles two-mode scanning
pub struct Lexer<'a> {
source: &'a str,
position: usize,
state: LexerState,
normal_lexer: Option<logos::Lexer<'a, Token>>,
lexer_base_offset: usize, // Offset of the substring that normal_lexer is lexing
}
impl<'a> Lexer<'a> {
pub fn new(source: &'a str) -> Self {
Self {
source,
position: 0,
state: LexerState::Normal,
normal_lexer: Some(Token::lexer(source)),
lexer_base_offset: 0,
}
}
fn scan_prose_tag(&mut self) -> Option<(usize, Token, usize)> {
let _start = self.position;
self.position += 3; // Skip ---
// Skip whitespace
while self.position < self.source.len() &&
self.source[self.position..].starts_with(|c: char| c.is_whitespace())
{
self.position += 1;
}
// Read tag until whitespace or newline
let tag_start = self.position;
while self.position < self.source.len() {
let ch = self.source[self.position..].chars().next().unwrap();
if ch.is_whitespace() {
break;
}
self.position += ch.len_utf8();
}
let tag = self.source[tag_start..self.position].to_string();
// Skip to end of line
while self.position < self.source.len() {
let ch = self.source[self.position..].chars().next().unwrap();
if ch == '\n' {
self.position += 1;
break;
}
self.position += ch.len_utf8();
}
self.state = LexerState::ProseContent(tag, self.position);
self.next()
}
fn scan_prose_content(
&mut self,
tag: String,
content_start: usize,
) -> Option<(usize, Token, usize)> {
let remaining = &self.source[content_start..];
let mut byte_offset = 0;
// Scan until we find closing ---
while byte_offset < remaining.len() {
if remaining[byte_offset..].starts_with("---") {
// Check if it's at start of line (or after whitespace)
let is_line_start = byte_offset == 0 ||
remaining[..byte_offset]
.chars()
.rev()
.take_while(|&c| c != '\n')
.all(|c| c.is_whitespace());
if is_line_start {
// Found closing marker
let content_end = content_start + byte_offset;
let content = self.source[content_start..content_end]
.trim_end()
.to_string();
let start = content_start.saturating_sub(tag.len() + 4); // Include opening ---tag
self.position = content_end + 3; // Skip closing ---
self.state = LexerState::Normal;
self.lexer_base_offset = self.position; // Update base offset for new substring
self.normal_lexer = Some(Token::lexer(&self.source[self.position..]));
let prose_block = super::ast::ProseBlock {
tag,
content,
span: super::ast::Span::new(start, self.position),
};
return Some((start, Token::ProseBlock(prose_block), self.position));
}
}
// Advance by one UTF-8 character to avoid char boundary issues
if let Some(ch) = remaining[byte_offset..].chars().next() {
byte_offset += ch.len_utf8();
} else {
break;
}
}
// EOF reached without closing marker - treat as error
None
}
}
impl<'a> Iterator for Lexer<'a> {
type Item = (usize, Token, usize);
fn next(&mut self) -> Option<Self::Item> {
match &self.state {
| LexerState::Normal => {
let lexer = self.normal_lexer.as_mut()?;
let token = lexer.next()?;
let span = lexer.span();
match token {
| Ok(Token::ProseMarker) => {
// Switch to prose mode
// span is relative to the substring that logos is lexing; add base offset
self.position = self.lexer_base_offset + span.start;
self.state = LexerState::ProseTag;
self.normal_lexer = None;
self.scan_prose_tag()
},
| Ok(tok) => {
// Adjust span to be relative to original source
let absolute_start = self.lexer_base_offset + span.start;
let absolute_end = self.lexer_base_offset + span.end;
self.position = absolute_end;
Some((absolute_start, tok, absolute_end))
},
| Err(_) => {
// Adjust span to be relative to original source
let absolute_start = self.lexer_base_offset + span.start;
let absolute_end = self.lexer_base_offset + span.end;
self.position = absolute_end;
Some((absolute_start, Token::Error, absolute_end))
},
}
},
| LexerState::ProseTag => {
// Should not happen - scan_prose_tag transitions state
None
},
| LexerState::ProseContent(tag, content_start) => {
let tag = tag.clone();
let content_start = *content_start;
self.scan_prose_content(tag, content_start)
},
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_basic_tokens() {
let input = "character Martha { age: 34 }";
let lexer = Lexer::new(input);
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
assert_eq!(
tokens,
vec![
Token::Character,
Token::Ident("Martha".to_string()),
Token::LBrace,
Token::Ident("age".to_string()),
Token::Colon,
Token::NumberLit(34),
Token::RBrace,
]
);
}
#[test]
fn test_prose_block() {
let input = r#"
---backstory
Martha grew up in a small town.
She loved baking from a young age.
---
"#;
let lexer = Lexer::new(input.trim());
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
assert_eq!(tokens.len(), 1);
match &tokens[0] {
| Token::ProseBlock(pb) => {
assert_eq!(pb.tag, "backstory");
assert!(pb.content.contains("Martha grew up"));
assert!(pb.content.contains("young age"));
},
| _ => panic!("Expected ProseBlock, got {:?}", tokens[0]),
}
}
#[test]
fn test_prose_with_dashes_in_content() {
let input = r#"
---description
She was well-known for her kind-hearted nature.
The bakery had a no-nonsense policy.
---
"#;
let lexer = Lexer::new(input.trim());
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
assert_eq!(tokens.len(), 1);
match &tokens[0] {
| Token::ProseBlock(pb) => {
assert_eq!(pb.tag, "description");
assert!(pb.content.contains("well-known"));
assert!(pb.content.contains("kind-hearted"));
assert!(pb.content.contains("no-nonsense"));
},
| _ => panic!("Expected ProseBlock"),
}
}
#[test]
fn test_multiple_prose_blocks() {
let input = r#"
---description
First prose block content.
---
---details
Second prose block content.
---
"#;
let lexer = Lexer::new(input);
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
assert_eq!(tokens.len(), 2, "Should have exactly 2 prose block tokens");
match &tokens[0] {
| Token::ProseBlock(pb) => {
assert_eq!(pb.tag, "description");
assert!(pb.content.contains("First prose block"));
},
| _ => panic!("Expected first ProseBlock, got {:?}", tokens[0]),
}
match &tokens[1] {
| Token::ProseBlock(pb) => {
assert_eq!(pb.tag, "details");
assert!(pb.content.contains("Second prose block"));
},
| _ => panic!("Expected second ProseBlock, got {:?}", tokens[1]),
}
}
#[test]
fn test_time_duration_literals() {
let input = "08:30 14:45:00 2h30m 45m";
let lexer = Lexer::new(input);
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
assert_eq!(
tokens,
vec![
Token::TimeLit("08:30".to_string()),
Token::TimeLit("14:45:00".to_string()),
Token::DurationLit("2h30m".to_string()),
Token::DurationLit("45m".to_string()),
]
);
}
#[test]
fn test_range_syntax() {
let input = "20..40";
let lexer = Lexer::new(input);
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
assert_eq!(
tokens,
vec![Token::NumberLit(20), Token::DotDot, Token::NumberLit(40),]
);
}
#[test]
fn test_type_system_keywords() {
let input = "concept sub_concept concept_comparison any";
let lexer = Lexer::new(input);
let tokens: Vec<Token> = lexer.map(|(_, tok, _)| tok).collect();
assert_eq!(
tokens,
vec![
Token::Concept,
Token::SubConcept,
Token::ConceptComparison,
Token::Any,
]
);
}
}