Files
celery/sexp_effects/parser.py
giles bb458aa924 Replace batch DAG system with streaming architecture
- Remove legacy_tasks.py, hybrid_state.py, render.py
- Remove old task modules (analyze, execute, execute_sexp, orchestrate)
- Add streaming interpreter from test repo
- Add sexp_effects with primitives and video effects
- Add streaming Celery task with CID-based asset resolution
- Support both CID and friendly name references for assets
- Add .dockerignore to prevent local clones from conflicting

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 19:10:11 +00:00

169 lines
4.4 KiB
Python

"""
S-Expression Parser
Parses S-expressions into Python data structures:
- Lists become Python lists
- Symbols become Symbol objects
- Numbers become int/float
- Strings become str
- Keywords (:foo) become Keyword objects
"""
import re
from dataclasses import dataclass
from typing import Any, List, Union
@dataclass(frozen=True)
class Symbol:
"""A symbol (identifier) in the S-expression."""
name: str
def __repr__(self):
return self.name
@dataclass(frozen=True)
class Keyword:
"""A keyword like :foo in the S-expression."""
name: str
def __repr__(self):
return f":{self.name}"
# Token patterns
TOKEN_PATTERNS = [
(r'\s+', None), # Whitespace (skip)
(r';[^\n]*', None), # Comments (skip)
(r'\(', 'LPAREN'),
(r'\)', 'RPAREN'),
(r'\[', 'LBRACKET'),
(r'\]', 'RBRACKET'),
(r"'", 'QUOTE'),
(r'"([^"\\]|\\.)*"', 'STRING'),
(r':[a-zA-Z_][a-zA-Z0-9_\-]*', 'KEYWORD'),
(r'-?[0-9]+\.[0-9]+', 'FLOAT'),
(r'-?[0-9]+', 'INT'),
(r'#t|#f|true|false', 'BOOL'),
(r'[a-zA-Z_+\-*/<>=!?][a-zA-Z0-9_+\-*/<>=!?]*', 'SYMBOL'),
]
TOKEN_REGEX = '|'.join(f'(?P<{name}>{pattern})' if name else f'(?:{pattern})'
for pattern, name in TOKEN_PATTERNS)
def tokenize(source: str) -> List[tuple]:
"""Tokenize S-expression source code."""
tokens = []
for match in re.finditer(TOKEN_REGEX, source):
kind = match.lastgroup
value = match.group()
if kind:
tokens.append((kind, value))
return tokens
def parse(source: str) -> Any:
"""Parse S-expression source into Python data structures."""
tokens = tokenize(source)
pos = [0] # Use list for mutability in nested function
def parse_expr():
if pos[0] >= len(tokens):
raise SyntaxError("Unexpected end of input")
kind, value = tokens[pos[0]]
if kind == 'LPAREN':
pos[0] += 1
items = []
while pos[0] < len(tokens) and tokens[pos[0]][0] != 'RPAREN':
items.append(parse_expr())
if pos[0] >= len(tokens):
raise SyntaxError("Missing closing parenthesis")
pos[0] += 1 # Skip RPAREN
return items
if kind == 'LBRACKET':
pos[0] += 1
items = []
while pos[0] < len(tokens) and tokens[pos[0]][0] != 'RBRACKET':
items.append(parse_expr())
if pos[0] >= len(tokens):
raise SyntaxError("Missing closing bracket")
pos[0] += 1 # Skip RBRACKET
return items
elif kind == 'RPAREN':
raise SyntaxError("Unexpected closing parenthesis")
elif kind == 'QUOTE':
pos[0] += 1
return [Symbol('quote'), parse_expr()]
elif kind == 'STRING':
pos[0] += 1
# Remove quotes and unescape
return value[1:-1].replace('\\"', '"').replace('\\n', '\n')
elif kind == 'INT':
pos[0] += 1
return int(value)
elif kind == 'FLOAT':
pos[0] += 1
return float(value)
elif kind == 'BOOL':
pos[0] += 1
return value in ('#t', 'true')
elif kind == 'KEYWORD':
pos[0] += 1
return Keyword(value[1:]) # Remove leading :
elif kind == 'SYMBOL':
pos[0] += 1
return Symbol(value)
else:
raise SyntaxError(f"Unknown token: {kind} {value}")
result = parse_expr()
# Check for multiple top-level expressions
if pos[0] < len(tokens):
# Allow multiple top-level expressions, return as list
results = [result]
while pos[0] < len(tokens):
results.append(parse_expr())
return results
return result
def parse_file(path: str) -> Any:
"""Parse an S-expression file."""
with open(path, 'r') as f:
return parse(f.read())
# Convenience for pretty-printing
def to_sexp(obj: Any) -> str:
"""Convert Python object back to S-expression string."""
if isinstance(obj, list):
return '(' + ' '.join(to_sexp(x) for x in obj) + ')'
elif isinstance(obj, Symbol):
return obj.name
elif isinstance(obj, Keyword):
return f':{obj.name}'
elif isinstance(obj, str):
return f'"{obj}"'
elif isinstance(obj, bool):
return '#t' if obj else '#f'
elif isinstance(obj, (int, float)):
return str(obj)
else:
return repr(obj)