Files
celery/sexp_effects/parser.py
giles d20eef76ad Fix completed runs not appearing in list + add purge-failed endpoint
- Update save_run_cache to also update actor_id, recipe, inputs on conflict
- Add logging for actor_id when saving runs to run_cache
- Add admin endpoint DELETE /runs/admin/purge-failed to delete all failed runs

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 23:24:39 +00:00

396 lines
12 KiB
Python

"""
S-expression parser for ArtDAG recipes and plans.
Supports:
- Lists: (a b c)
- Symbols: foo, bar-baz, ->
- Keywords: :key
- Strings: "hello world"
- Numbers: 42, 3.14, -1.5
- Comments: ; to end of line
- Vectors: [a b c] (syntactic sugar for lists)
- Maps: {:key1 val1 :key2 val2} (parsed as Python dicts)
"""
from dataclasses import dataclass
from typing import Any, Dict, List, Union
import re
@dataclass
class Symbol:
"""An unquoted symbol/identifier."""
name: str
def __repr__(self):
return f"Symbol({self.name!r})"
def __eq__(self, other):
if isinstance(other, Symbol):
return self.name == other.name
if isinstance(other, str):
return self.name == other
return False
def __hash__(self):
return hash(self.name)
@dataclass
class Keyword:
"""A keyword starting with colon."""
name: str
def __repr__(self):
return f"Keyword({self.name!r})"
def __eq__(self, other):
if isinstance(other, Keyword):
return self.name == other.name
return False
def __hash__(self):
return hash((':' , self.name))
class ParseError(Exception):
"""Error during S-expression parsing."""
def __init__(self, message: str, position: int = 0, line: int = 1, col: int = 1):
self.position = position
self.line = line
self.col = col
super().__init__(f"{message} at line {line}, column {col}")
class Tokenizer:
"""Tokenize S-expression text into tokens."""
# Token patterns
WHITESPACE = re.compile(r'\s+')
COMMENT = re.compile(r';[^\n]*')
STRING = re.compile(r'"(?:[^"\\]|\\.)*"')
NUMBER = re.compile(r'-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?')
KEYWORD = re.compile(r':[a-zA-Z_][a-zA-Z0-9_-]*')
SYMBOL = re.compile(r'[a-zA-Z_*+\-><=/!?][a-zA-Z0-9_*+\-><=/!?.:]*')
def __init__(self, text: str):
self.text = text
self.pos = 0
self.line = 1
self.col = 1
def _advance(self, count: int = 1):
"""Advance position, tracking line/column."""
for _ in range(count):
if self.pos < len(self.text):
if self.text[self.pos] == '\n':
self.line += 1
self.col = 1
else:
self.col += 1
self.pos += 1
def _skip_whitespace_and_comments(self):
"""Skip whitespace and comments."""
while self.pos < len(self.text):
# Whitespace
match = self.WHITESPACE.match(self.text, self.pos)
if match:
self._advance(match.end() - self.pos)
continue
# Comments
match = self.COMMENT.match(self.text, self.pos)
if match:
self._advance(match.end() - self.pos)
continue
break
def peek(self) -> str | None:
"""Peek at current character."""
self._skip_whitespace_and_comments()
if self.pos >= len(self.text):
return None
return self.text[self.pos]
def next_token(self) -> Any:
"""Get the next token."""
self._skip_whitespace_and_comments()
if self.pos >= len(self.text):
return None
char = self.text[self.pos]
start_line, start_col = self.line, self.col
# Single-character tokens (parens, brackets, braces)
if char in '()[]{}':
self._advance()
return char
# String
if char == '"':
match = self.STRING.match(self.text, self.pos)
if not match:
raise ParseError("Unterminated string", self.pos, self.line, self.col)
self._advance(match.end() - self.pos)
# Parse escape sequences
content = match.group()[1:-1]
content = content.replace('\\n', '\n')
content = content.replace('\\t', '\t')
content = content.replace('\\"', '"')
content = content.replace('\\\\', '\\')
return content
# Keyword
if char == ':':
match = self.KEYWORD.match(self.text, self.pos)
if match:
self._advance(match.end() - self.pos)
return Keyword(match.group()[1:]) # Strip leading colon
raise ParseError(f"Invalid keyword", self.pos, self.line, self.col)
# Number (must check before symbol due to - prefix)
if char.isdigit() or (char == '-' and self.pos + 1 < len(self.text) and
(self.text[self.pos + 1].isdigit() or self.text[self.pos + 1] == '.')):
match = self.NUMBER.match(self.text, self.pos)
if match:
self._advance(match.end() - self.pos)
num_str = match.group()
if '.' in num_str or 'e' in num_str or 'E' in num_str:
return float(num_str)
return int(num_str)
# Symbol
match = self.SYMBOL.match(self.text, self.pos)
if match:
self._advance(match.end() - self.pos)
return Symbol(match.group())
raise ParseError(f"Unexpected character: {char!r}", self.pos, self.line, self.col)
def parse(text: str) -> Any:
"""
Parse an S-expression string into Python data structures.
Returns:
Parsed S-expression as nested Python structures:
- Lists become Python lists
- Symbols become Symbol objects
- Keywords become Keyword objects
- Strings become Python strings
- Numbers become int/float
Example:
>>> parse('(recipe "test" :version "1.0")')
[Symbol('recipe'), 'test', Keyword('version'), '1.0']
"""
tokenizer = Tokenizer(text)
result = _parse_expr(tokenizer)
# Check for trailing content
if tokenizer.peek() is not None:
raise ParseError("Unexpected content after expression",
tokenizer.pos, tokenizer.line, tokenizer.col)
return result
def parse_all(text: str) -> List[Any]:
"""
Parse multiple S-expressions from a string.
Returns list of parsed expressions.
"""
tokenizer = Tokenizer(text)
results = []
while tokenizer.peek() is not None:
results.append(_parse_expr(tokenizer))
return results
def _parse_expr(tokenizer: Tokenizer) -> Any:
"""Parse a single expression."""
token = tokenizer.next_token()
if token is None:
raise ParseError("Unexpected end of input", tokenizer.pos, tokenizer.line, tokenizer.col)
# List
if token == '(':
return _parse_list(tokenizer, ')')
# Vector (sugar for list)
if token == '[':
return _parse_list(tokenizer, ']')
# Map/dict: {:key1 val1 :key2 val2}
if token == '{':
return _parse_map(tokenizer)
# Unexpected closers
if isinstance(token, str) and token in ')]}':
raise ParseError(f"Unexpected {token!r}", tokenizer.pos, tokenizer.line, tokenizer.col)
# Atom
return token
def _parse_list(tokenizer: Tokenizer, closer: str) -> List[Any]:
"""Parse a list until the closing delimiter."""
items = []
while True:
char = tokenizer.peek()
if char is None:
raise ParseError(f"Unterminated list, expected {closer!r}",
tokenizer.pos, tokenizer.line, tokenizer.col)
if char == closer:
tokenizer.next_token() # Consume closer
return items
items.append(_parse_expr(tokenizer))
def _parse_map(tokenizer: Tokenizer) -> Dict[str, Any]:
"""Parse a map/dict: {:key1 val1 :key2 val2} -> {"key1": val1, "key2": val2}."""
result = {}
while True:
char = tokenizer.peek()
if char is None:
raise ParseError("Unterminated map, expected '}'",
tokenizer.pos, tokenizer.line, tokenizer.col)
if char == '}':
tokenizer.next_token() # Consume closer
return result
# Parse key (should be a keyword like :key)
key_token = _parse_expr(tokenizer)
if isinstance(key_token, Keyword):
key = key_token.name
elif isinstance(key_token, str):
key = key_token
else:
raise ParseError(f"Map key must be keyword or string, got {type(key_token).__name__}",
tokenizer.pos, tokenizer.line, tokenizer.col)
# Parse value
value = _parse_expr(tokenizer)
result[key] = value
def serialize(expr: Any, indent: int = 0, pretty: bool = False) -> str:
"""
Serialize a Python data structure back to S-expression format.
Args:
expr: The expression to serialize
indent: Current indentation level (for pretty printing)
pretty: Whether to use pretty printing with newlines
Returns:
S-expression string
"""
if isinstance(expr, list):
if not expr:
return "()"
if pretty:
return _serialize_pretty(expr, indent)
else:
items = [serialize(item, indent, False) for item in expr]
return "(" + " ".join(items) + ")"
if isinstance(expr, Symbol):
return expr.name
if isinstance(expr, Keyword):
return f":{expr.name}"
if isinstance(expr, str):
# Escape special characters
escaped = expr.replace('\\', '\\\\').replace('"', '\\"').replace('\n', '\\n').replace('\t', '\\t')
return f'"{escaped}"'
if isinstance(expr, bool):
return "true" if expr else "false"
if isinstance(expr, (int, float)):
return str(expr)
if expr is None:
return "nil"
if isinstance(expr, dict):
# Serialize dict as property list: {:key1 val1 :key2 val2}
items = []
for k, v in expr.items():
items.append(f":{k}")
items.append(serialize(v, indent, pretty))
return "{" + " ".join(items) + "}"
raise ValueError(f"Cannot serialize {type(expr).__name__}: {expr!r}")
def _serialize_pretty(expr: List, indent: int) -> str:
"""Pretty-print a list expression with smart formatting."""
if not expr:
return "()"
prefix = " " * indent
inner_prefix = " " * (indent + 1)
# Check if this is a simple list that fits on one line
simple = serialize(expr, indent, False)
if len(simple) < 60 and '\n' not in simple:
return simple
# Start building multiline output
head = serialize(expr[0], indent + 1, False)
parts = [f"({head}"]
i = 1
while i < len(expr):
item = expr[i]
# Group keyword-value pairs on same line
if isinstance(item, Keyword) and i + 1 < len(expr):
key = serialize(item, 0, False)
val = serialize(expr[i + 1], indent + 1, False)
# If value is short, put on same line
if len(val) < 50 and '\n' not in val:
parts.append(f"{inner_prefix}{key} {val}")
else:
# Value is complex, serialize it pretty
val_pretty = serialize(expr[i + 1], indent + 1, True)
parts.append(f"{inner_prefix}{key} {val_pretty}")
i += 2
else:
# Regular item
item_str = serialize(item, indent + 1, True)
parts.append(f"{inner_prefix}{item_str}")
i += 1
return "\n".join(parts) + ")"
def parse_file(path: str) -> Any:
"""Parse an S-expression file."""
with open(path, 'r') as f:
return parse(f.read())
def to_sexp(obj: Any) -> str:
"""Convert Python object back to S-expression string (alias for serialize)."""
return serialize(obj)