All checks were successful
Build and Deploy / build-and-deploy (push) Successful in 1m28s
- Add JAX text rendering with font atlas, styled text placement, and typography primitives - Add xector (element-wise/reduction) operations library and sexp effects - Add deferred effect chain fusion for JIT-compiled effect pipelines - Expand drawing primitives with font management, alignment, shadow, and outline - Add interpreter support for function-style define and require - Add GPU persistence mode and hardware decode support to streaming - Add new sexp effects: cell_pattern, halftone, mosaic, and derived definitions - Add path registry for asset resolution - Add integration, primitives, and xector tests Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
397 lines
12 KiB
Python
397 lines
12 KiB
Python
"""
|
||
S-expression parser for ArtDAG recipes and plans.
|
||
|
||
Supports:
|
||
- Lists: (a b c)
|
||
- Symbols: foo, bar-baz, ->
|
||
- Keywords: :key
|
||
- Strings: "hello world"
|
||
- Numbers: 42, 3.14, -1.5
|
||
- Comments: ; to end of line
|
||
- Vectors: [a b c] (syntactic sugar for lists)
|
||
- Maps: {:key1 val1 :key2 val2} (parsed as Python dicts)
|
||
"""
|
||
|
||
from dataclasses import dataclass
|
||
from typing import Any, Dict, List, Union
|
||
import re
|
||
|
||
|
||
@dataclass
|
||
class Symbol:
|
||
"""An unquoted symbol/identifier."""
|
||
name: str
|
||
|
||
def __repr__(self):
|
||
return f"Symbol({self.name!r})"
|
||
|
||
def __eq__(self, other):
|
||
if isinstance(other, Symbol):
|
||
return self.name == other.name
|
||
if isinstance(other, str):
|
||
return self.name == other
|
||
return False
|
||
|
||
def __hash__(self):
|
||
return hash(self.name)
|
||
|
||
|
||
@dataclass
|
||
class Keyword:
|
||
"""A keyword starting with colon."""
|
||
name: str
|
||
|
||
def __repr__(self):
|
||
return f"Keyword({self.name!r})"
|
||
|
||
def __eq__(self, other):
|
||
if isinstance(other, Keyword):
|
||
return self.name == other.name
|
||
return False
|
||
|
||
def __hash__(self):
|
||
return hash((':' , self.name))
|
||
|
||
|
||
class ParseError(Exception):
|
||
"""Error during S-expression parsing."""
|
||
def __init__(self, message: str, position: int = 0, line: int = 1, col: int = 1):
|
||
self.position = position
|
||
self.line = line
|
||
self.col = col
|
||
super().__init__(f"{message} at line {line}, column {col}")
|
||
|
||
|
||
class Tokenizer:
|
||
"""Tokenize S-expression text into tokens."""
|
||
|
||
# Token patterns
|
||
WHITESPACE = re.compile(r'\s+')
|
||
COMMENT = re.compile(r';[^\n]*')
|
||
STRING = re.compile(r'"(?:[^"\\]|\\.)*"')
|
||
NUMBER = re.compile(r'-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?')
|
||
KEYWORD = re.compile(r':[a-zA-Z_][a-zA-Z0-9_-]*')
|
||
# Symbol pattern includes Greek letters α (alpha) and β (beta) for xector operations
|
||
SYMBOL = re.compile(r'[a-zA-Z_*+\-><=/!?αβ²λ][a-zA-Z0-9_*+\-><=/!?.:αβ²λ]*')
|
||
|
||
def __init__(self, text: str):
|
||
self.text = text
|
||
self.pos = 0
|
||
self.line = 1
|
||
self.col = 1
|
||
|
||
def _advance(self, count: int = 1):
|
||
"""Advance position, tracking line/column."""
|
||
for _ in range(count):
|
||
if self.pos < len(self.text):
|
||
if self.text[self.pos] == '\n':
|
||
self.line += 1
|
||
self.col = 1
|
||
else:
|
||
self.col += 1
|
||
self.pos += 1
|
||
|
||
def _skip_whitespace_and_comments(self):
|
||
"""Skip whitespace and comments."""
|
||
while self.pos < len(self.text):
|
||
# Whitespace
|
||
match = self.WHITESPACE.match(self.text, self.pos)
|
||
if match:
|
||
self._advance(match.end() - self.pos)
|
||
continue
|
||
|
||
# Comments
|
||
match = self.COMMENT.match(self.text, self.pos)
|
||
if match:
|
||
self._advance(match.end() - self.pos)
|
||
continue
|
||
|
||
break
|
||
|
||
def peek(self) -> str | None:
|
||
"""Peek at current character."""
|
||
self._skip_whitespace_and_comments()
|
||
if self.pos >= len(self.text):
|
||
return None
|
||
return self.text[self.pos]
|
||
|
||
def next_token(self) -> Any:
|
||
"""Get the next token."""
|
||
self._skip_whitespace_and_comments()
|
||
|
||
if self.pos >= len(self.text):
|
||
return None
|
||
|
||
char = self.text[self.pos]
|
||
start_line, start_col = self.line, self.col
|
||
|
||
# Single-character tokens (parens, brackets, braces)
|
||
if char in '()[]{}':
|
||
self._advance()
|
||
return char
|
||
|
||
# String
|
||
if char == '"':
|
||
match = self.STRING.match(self.text, self.pos)
|
||
if not match:
|
||
raise ParseError("Unterminated string", self.pos, self.line, self.col)
|
||
self._advance(match.end() - self.pos)
|
||
# Parse escape sequences
|
||
content = match.group()[1:-1]
|
||
content = content.replace('\\n', '\n')
|
||
content = content.replace('\\t', '\t')
|
||
content = content.replace('\\"', '"')
|
||
content = content.replace('\\\\', '\\')
|
||
return content
|
||
|
||
# Keyword
|
||
if char == ':':
|
||
match = self.KEYWORD.match(self.text, self.pos)
|
||
if match:
|
||
self._advance(match.end() - self.pos)
|
||
return Keyword(match.group()[1:]) # Strip leading colon
|
||
raise ParseError(f"Invalid keyword", self.pos, self.line, self.col)
|
||
|
||
# Number (must check before symbol due to - prefix)
|
||
if char.isdigit() or (char == '-' and self.pos + 1 < len(self.text) and
|
||
(self.text[self.pos + 1].isdigit() or self.text[self.pos + 1] == '.')):
|
||
match = self.NUMBER.match(self.text, self.pos)
|
||
if match:
|
||
self._advance(match.end() - self.pos)
|
||
num_str = match.group()
|
||
if '.' in num_str or 'e' in num_str or 'E' in num_str:
|
||
return float(num_str)
|
||
return int(num_str)
|
||
|
||
# Symbol
|
||
match = self.SYMBOL.match(self.text, self.pos)
|
||
if match:
|
||
self._advance(match.end() - self.pos)
|
||
return Symbol(match.group())
|
||
|
||
raise ParseError(f"Unexpected character: {char!r}", self.pos, self.line, self.col)
|
||
|
||
|
||
def parse(text: str) -> Any:
|
||
"""
|
||
Parse an S-expression string into Python data structures.
|
||
|
||
Returns:
|
||
Parsed S-expression as nested Python structures:
|
||
- Lists become Python lists
|
||
- Symbols become Symbol objects
|
||
- Keywords become Keyword objects
|
||
- Strings become Python strings
|
||
- Numbers become int/float
|
||
|
||
Example:
|
||
>>> parse('(recipe "test" :version "1.0")')
|
||
[Symbol('recipe'), 'test', Keyword('version'), '1.0']
|
||
"""
|
||
tokenizer = Tokenizer(text)
|
||
result = _parse_expr(tokenizer)
|
||
|
||
# Check for trailing content
|
||
if tokenizer.peek() is not None:
|
||
raise ParseError("Unexpected content after expression",
|
||
tokenizer.pos, tokenizer.line, tokenizer.col)
|
||
|
||
return result
|
||
|
||
|
||
def parse_all(text: str) -> List[Any]:
|
||
"""
|
||
Parse multiple S-expressions from a string.
|
||
|
||
Returns list of parsed expressions.
|
||
"""
|
||
tokenizer = Tokenizer(text)
|
||
results = []
|
||
|
||
while tokenizer.peek() is not None:
|
||
results.append(_parse_expr(tokenizer))
|
||
|
||
return results
|
||
|
||
|
||
def _parse_expr(tokenizer: Tokenizer) -> Any:
|
||
"""Parse a single expression."""
|
||
token = tokenizer.next_token()
|
||
|
||
if token is None:
|
||
raise ParseError("Unexpected end of input", tokenizer.pos, tokenizer.line, tokenizer.col)
|
||
|
||
# List
|
||
if token == '(':
|
||
return _parse_list(tokenizer, ')')
|
||
|
||
# Vector (sugar for list)
|
||
if token == '[':
|
||
return _parse_list(tokenizer, ']')
|
||
|
||
# Map/dict: {:key1 val1 :key2 val2}
|
||
if token == '{':
|
||
return _parse_map(tokenizer)
|
||
|
||
# Unexpected closers
|
||
if isinstance(token, str) and token in ')]}':
|
||
raise ParseError(f"Unexpected {token!r}", tokenizer.pos, tokenizer.line, tokenizer.col)
|
||
|
||
# Atom
|
||
return token
|
||
|
||
|
||
def _parse_list(tokenizer: Tokenizer, closer: str) -> List[Any]:
|
||
"""Parse a list until the closing delimiter."""
|
||
items = []
|
||
|
||
while True:
|
||
char = tokenizer.peek()
|
||
|
||
if char is None:
|
||
raise ParseError(f"Unterminated list, expected {closer!r}",
|
||
tokenizer.pos, tokenizer.line, tokenizer.col)
|
||
|
||
if char == closer:
|
||
tokenizer.next_token() # Consume closer
|
||
return items
|
||
|
||
items.append(_parse_expr(tokenizer))
|
||
|
||
|
||
def _parse_map(tokenizer: Tokenizer) -> Dict[str, Any]:
|
||
"""Parse a map/dict: {:key1 val1 :key2 val2} -> {"key1": val1, "key2": val2}."""
|
||
result = {}
|
||
|
||
while True:
|
||
char = tokenizer.peek()
|
||
|
||
if char is None:
|
||
raise ParseError("Unterminated map, expected '}'",
|
||
tokenizer.pos, tokenizer.line, tokenizer.col)
|
||
|
||
if char == '}':
|
||
tokenizer.next_token() # Consume closer
|
||
return result
|
||
|
||
# Parse key (should be a keyword like :key)
|
||
key_token = _parse_expr(tokenizer)
|
||
if isinstance(key_token, Keyword):
|
||
key = key_token.name
|
||
elif isinstance(key_token, str):
|
||
key = key_token
|
||
else:
|
||
raise ParseError(f"Map key must be keyword or string, got {type(key_token).__name__}",
|
||
tokenizer.pos, tokenizer.line, tokenizer.col)
|
||
|
||
# Parse value
|
||
value = _parse_expr(tokenizer)
|
||
result[key] = value
|
||
|
||
|
||
def serialize(expr: Any, indent: int = 0, pretty: bool = False) -> str:
|
||
"""
|
||
Serialize a Python data structure back to S-expression format.
|
||
|
||
Args:
|
||
expr: The expression to serialize
|
||
indent: Current indentation level (for pretty printing)
|
||
pretty: Whether to use pretty printing with newlines
|
||
|
||
Returns:
|
||
S-expression string
|
||
"""
|
||
if isinstance(expr, list):
|
||
if not expr:
|
||
return "()"
|
||
|
||
if pretty:
|
||
return _serialize_pretty(expr, indent)
|
||
else:
|
||
items = [serialize(item, indent, False) for item in expr]
|
||
return "(" + " ".join(items) + ")"
|
||
|
||
if isinstance(expr, Symbol):
|
||
return expr.name
|
||
|
||
if isinstance(expr, Keyword):
|
||
return f":{expr.name}"
|
||
|
||
if isinstance(expr, str):
|
||
# Escape special characters
|
||
escaped = expr.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n').replace('\t', '\\t')
|
||
return f'"{escaped}"'
|
||
|
||
if isinstance(expr, bool):
|
||
return "true" if expr else "false"
|
||
|
||
if isinstance(expr, (int, float)):
|
||
return str(expr)
|
||
|
||
if expr is None:
|
||
return "nil"
|
||
|
||
if isinstance(expr, dict):
|
||
# Serialize dict as property list: {:key1 val1 :key2 val2}
|
||
items = []
|
||
for k, v in expr.items():
|
||
items.append(f":{k}")
|
||
items.append(serialize(v, indent, pretty))
|
||
return "{" + " ".join(items) + "}"
|
||
|
||
raise ValueError(f"Cannot serialize {type(expr).__name__}: {expr!r}")
|
||
|
||
|
||
def _serialize_pretty(expr: List, indent: int) -> str:
|
||
"""Pretty-print a list expression with smart formatting."""
|
||
if not expr:
|
||
return "()"
|
||
|
||
prefix = " " * indent
|
||
inner_prefix = " " * (indent + 1)
|
||
|
||
# Check if this is a simple list that fits on one line
|
||
simple = serialize(expr, indent, False)
|
||
if len(simple) < 60 and '\n' not in simple:
|
||
return simple
|
||
|
||
# Start building multiline output
|
||
head = serialize(expr[0], indent + 1, False)
|
||
parts = [f"({head}"]
|
||
|
||
i = 1
|
||
while i < len(expr):
|
||
item = expr[i]
|
||
|
||
# Group keyword-value pairs on same line
|
||
if isinstance(item, Keyword) and i + 1 < len(expr):
|
||
key = serialize(item, 0, False)
|
||
val = serialize(expr[i + 1], indent + 1, False)
|
||
|
||
# If value is short, put on same line
|
||
if len(val) < 50 and '\n' not in val:
|
||
parts.append(f"{inner_prefix}{key} {val}")
|
||
else:
|
||
# Value is complex, serialize it pretty
|
||
val_pretty = serialize(expr[i + 1], indent + 1, True)
|
||
parts.append(f"{inner_prefix}{key} {val_pretty}")
|
||
i += 2
|
||
else:
|
||
# Regular item
|
||
item_str = serialize(item, indent + 1, True)
|
||
parts.append(f"{inner_prefix}{item_str}")
|
||
i += 1
|
||
|
||
return "\n".join(parts) + ")"
|
||
|
||
|
||
def parse_file(path: str) -> Any:
|
||
"""Parse an S-expression file (supports multiple top-level expressions)."""
|
||
with open(path, 'r') as f:
|
||
return parse_all(f.read())
|
||
|
||
|
||
def to_sexp(obj: Any) -> str:
|
||
"""Convert Python object back to S-expression string (alias for serialize)."""
|
||
return serialize(obj)
|