Files
celery/sexp_effects/parser.py
gilesb fc9597456f
All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 1m28s
Add JAX typography, xector primitives, deferred effect chains, and GPU streaming
- Add JAX text rendering with font atlas, styled text placement, and typography primitives
- Add xector (element-wise/reduction) operations library and sexp effects
- Add deferred effect chain fusion for JIT-compiled effect pipelines
- Expand drawing primitives with font management, alignment, shadow, and outline
- Add interpreter support for function-style define and require
- Add GPU persistence mode and hardware decode support to streaming
- Add new sexp effects: cell_pattern, halftone, mosaic, and derived definitions
- Add path registry for asset resolution
- Add integration, primitives, and xector tests

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-06 17:41:19 +00:00

397 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
S-expression parser for ArtDAG recipes and plans.
Supports:
- Lists: (a b c)
- Symbols: foo, bar-baz, ->
- Keywords: :key
- Strings: "hello world"
- Numbers: 42, 3.14, -1.5
- Comments: ; to end of line
- Vectors: [a b c] (syntactic sugar for lists)
- Maps: {:key1 val1 :key2 val2} (parsed as Python dicts)
"""
from dataclasses import dataclass
from typing import Any, Dict, List, Union
import re
@dataclass
class Symbol:
"""An unquoted symbol/identifier."""
name: str
def __repr__(self):
return f"Symbol({self.name!r})"
def __eq__(self, other):
if isinstance(other, Symbol):
return self.name == other.name
if isinstance(other, str):
return self.name == other
return False
def __hash__(self):
return hash(self.name)
@dataclass
class Keyword:
"""A keyword starting with colon."""
name: str
def __repr__(self):
return f"Keyword({self.name!r})"
def __eq__(self, other):
if isinstance(other, Keyword):
return self.name == other.name
return False
def __hash__(self):
return hash((':' , self.name))
class ParseError(Exception):
"""Error during S-expression parsing."""
def __init__(self, message: str, position: int = 0, line: int = 1, col: int = 1):
self.position = position
self.line = line
self.col = col
super().__init__(f"{message} at line {line}, column {col}")
class Tokenizer:
"""Tokenize S-expression text into tokens."""
# Token patterns
WHITESPACE = re.compile(r'\s+')
COMMENT = re.compile(r';[^\n]*')
STRING = re.compile(r'"(?:[^"\\]|\\.)*"')
NUMBER = re.compile(r'-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?')
KEYWORD = re.compile(r':[a-zA-Z_][a-zA-Z0-9_-]*')
# Symbol pattern includes Greek letters α (alpha) and β (beta) for xector operations
SYMBOL = re.compile(r'[a-zA-Z_*+\-><=/!?αβ²λ][a-zA-Z0-9_*+\-><=/!?.:αβ²λ]*')
def __init__(self, text: str):
self.text = text
self.pos = 0
self.line = 1
self.col = 1
def _advance(self, count: int = 1):
"""Advance position, tracking line/column."""
for _ in range(count):
if self.pos < len(self.text):
if self.text[self.pos] == '\n':
self.line += 1
self.col = 1
else:
self.col += 1
self.pos += 1
def _skip_whitespace_and_comments(self):
"""Skip whitespace and comments."""
while self.pos < len(self.text):
# Whitespace
match = self.WHITESPACE.match(self.text, self.pos)
if match:
self._advance(match.end() - self.pos)
continue
# Comments
match = self.COMMENT.match(self.text, self.pos)
if match:
self._advance(match.end() - self.pos)
continue
break
def peek(self) -> str | None:
"""Peek at current character."""
self._skip_whitespace_and_comments()
if self.pos >= len(self.text):
return None
return self.text[self.pos]
def next_token(self) -> Any:
"""Get the next token."""
self._skip_whitespace_and_comments()
if self.pos >= len(self.text):
return None
char = self.text[self.pos]
start_line, start_col = self.line, self.col
# Single-character tokens (parens, brackets, braces)
if char in '()[]{}':
self._advance()
return char
# String
if char == '"':
match = self.STRING.match(self.text, self.pos)
if not match:
raise ParseError("Unterminated string", self.pos, self.line, self.col)
self._advance(match.end() - self.pos)
# Parse escape sequences
content = match.group()[1:-1]
content = content.replace('\\n', '\n')
content = content.replace('\\t', '\t')
content = content.replace('\\"', '"')
content = content.replace('\\\\', '\\')
return content
# Keyword
if char == ':':
match = self.KEYWORD.match(self.text, self.pos)
if match:
self._advance(match.end() - self.pos)
return Keyword(match.group()[1:]) # Strip leading colon
raise ParseError(f"Invalid keyword", self.pos, self.line, self.col)
# Number (must check before symbol due to - prefix)
if char.isdigit() or (char == '-' and self.pos + 1 < len(self.text) and
(self.text[self.pos + 1].isdigit() or self.text[self.pos + 1] == '.')):
match = self.NUMBER.match(self.text, self.pos)
if match:
self._advance(match.end() - self.pos)
num_str = match.group()
if '.' in num_str or 'e' in num_str or 'E' in num_str:
return float(num_str)
return int(num_str)
# Symbol
match = self.SYMBOL.match(self.text, self.pos)
if match:
self._advance(match.end() - self.pos)
return Symbol(match.group())
raise ParseError(f"Unexpected character: {char!r}", self.pos, self.line, self.col)
def parse(text: str) -> Any:
"""
Parse an S-expression string into Python data structures.
Returns:
Parsed S-expression as nested Python structures:
- Lists become Python lists
- Symbols become Symbol objects
- Keywords become Keyword objects
- Strings become Python strings
- Numbers become int/float
Example:
>>> parse('(recipe "test" :version "1.0")')
[Symbol('recipe'), 'test', Keyword('version'), '1.0']
"""
tokenizer = Tokenizer(text)
result = _parse_expr(tokenizer)
# Check for trailing content
if tokenizer.peek() is not None:
raise ParseError("Unexpected content after expression",
tokenizer.pos, tokenizer.line, tokenizer.col)
return result
def parse_all(text: str) -> List[Any]:
"""
Parse multiple S-expressions from a string.
Returns list of parsed expressions.
"""
tokenizer = Tokenizer(text)
results = []
while tokenizer.peek() is not None:
results.append(_parse_expr(tokenizer))
return results
def _parse_expr(tokenizer: Tokenizer) -> Any:
"""Parse a single expression."""
token = tokenizer.next_token()
if token is None:
raise ParseError("Unexpected end of input", tokenizer.pos, tokenizer.line, tokenizer.col)
# List
if token == '(':
return _parse_list(tokenizer, ')')
# Vector (sugar for list)
if token == '[':
return _parse_list(tokenizer, ']')
# Map/dict: {:key1 val1 :key2 val2}
if token == '{':
return _parse_map(tokenizer)
# Unexpected closers
if isinstance(token, str) and token in ')]}':
raise ParseError(f"Unexpected {token!r}", tokenizer.pos, tokenizer.line, tokenizer.col)
# Atom
return token
def _parse_list(tokenizer: Tokenizer, closer: str) -> List[Any]:
"""Parse a list until the closing delimiter."""
items = []
while True:
char = tokenizer.peek()
if char is None:
raise ParseError(f"Unterminated list, expected {closer!r}",
tokenizer.pos, tokenizer.line, tokenizer.col)
if char == closer:
tokenizer.next_token() # Consume closer
return items
items.append(_parse_expr(tokenizer))
def _parse_map(tokenizer: Tokenizer) -> Dict[str, Any]:
"""Parse a map/dict: {:key1 val1 :key2 val2} -> {"key1": val1, "key2": val2}."""
result = {}
while True:
char = tokenizer.peek()
if char is None:
raise ParseError("Unterminated map, expected '}'",
tokenizer.pos, tokenizer.line, tokenizer.col)
if char == '}':
tokenizer.next_token() # Consume closer
return result
# Parse key (should be a keyword like :key)
key_token = _parse_expr(tokenizer)
if isinstance(key_token, Keyword):
key = key_token.name
elif isinstance(key_token, str):
key = key_token
else:
raise ParseError(f"Map key must be keyword or string, got {type(key_token).__name__}",
tokenizer.pos, tokenizer.line, tokenizer.col)
# Parse value
value = _parse_expr(tokenizer)
result[key] = value
def serialize(expr: Any, indent: int = 0, pretty: bool = False) -> str:
"""
Serialize a Python data structure back to S-expression format.
Args:
expr: The expression to serialize
indent: Current indentation level (for pretty printing)
pretty: Whether to use pretty printing with newlines
Returns:
S-expression string
"""
if isinstance(expr, list):
if not expr:
return "()"
if pretty:
return _serialize_pretty(expr, indent)
else:
items = [serialize(item, indent, False) for item in expr]
return "(" + " ".join(items) + ")"
if isinstance(expr, Symbol):
return expr.name
if isinstance(expr, Keyword):
return f":{expr.name}"
if isinstance(expr, str):
# Escape special characters
escaped = expr.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n').replace('\t', '\\t')
return f'"{escaped}"'
if isinstance(expr, bool):
return "true" if expr else "false"
if isinstance(expr, (int, float)):
return str(expr)
if expr is None:
return "nil"
if isinstance(expr, dict):
# Serialize dict as property list: {:key1 val1 :key2 val2}
items = []
for k, v in expr.items():
items.append(f":{k}")
items.append(serialize(v, indent, pretty))
return "{" + " ".join(items) + "}"
raise ValueError(f"Cannot serialize {type(expr).__name__}: {expr!r}")
def _serialize_pretty(expr: List, indent: int) -> str:
"""Pretty-print a list expression with smart formatting."""
if not expr:
return "()"
prefix = " " * indent
inner_prefix = " " * (indent + 1)
# Check if this is a simple list that fits on one line
simple = serialize(expr, indent, False)
if len(simple) < 60 and '\n' not in simple:
return simple
# Start building multiline output
head = serialize(expr[0], indent + 1, False)
parts = [f"({head}"]
i = 1
while i < len(expr):
item = expr[i]
# Group keyword-value pairs on same line
if isinstance(item, Keyword) and i + 1 < len(expr):
key = serialize(item, 0, False)
val = serialize(expr[i + 1], indent + 1, False)
# If value is short, put on same line
if len(val) < 50 and '\n' not in val:
parts.append(f"{inner_prefix}{key} {val}")
else:
# Value is complex, serialize it pretty
val_pretty = serialize(expr[i + 1], indent + 1, True)
parts.append(f"{inner_prefix}{key} {val_pretty}")
i += 2
else:
# Regular item
item_str = serialize(item, indent + 1, True)
parts.append(f"{inner_prefix}{item_str}")
i += 1
return "\n".join(parts) + ")"
def parse_file(path: str) -> Any:
"""Parse an S-expression file (supports multiple top-level expressions)."""
with open(path, 'r') as f:
return parse_all(f.read())
def to_sexp(obj: Any) -> str:
"""Convert Python object back to S-expression string (alias for serialize)."""
return serialize(obj)