Rebrand sexp → sx across web platform (173 files)
Rename all sexp directories, files, identifiers, and references to sx. artdag/ excluded (separate media processing DSL). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
347
shared/sx/parser.py
Normal file
347
shared/sx/parser.py
Normal file
@@ -0,0 +1,347 @@
|
||||
"""
|
||||
S-expression parser.
|
||||
|
||||
Supports:
|
||||
- Lists: (a b c)
|
||||
- Vectors: [a b c] (sugar for lists)
|
||||
- Maps: {:key1 val1 :key2 val2}
|
||||
- Symbols: foo, bar-baz, ->, ~card
|
||||
- Keywords: :class, :id
|
||||
- Strings: "hello world" (with \\n, \\t, \\", \\\\ escapes)
|
||||
- Numbers: 42, 3.14, -1.5, 1e-3
|
||||
- Comments: ; to end of line
|
||||
- Fragment: <> (empty-tag symbol for fragment groups)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any
|
||||
|
||||
from .types import Keyword, Symbol, NIL
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# SxExpr — pre-built sx source marker
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class SxExpr:
|
||||
"""Pre-built sx source that serialize() outputs unquoted.
|
||||
|
||||
Use this to nest sx call strings inside other sx_call() invocations
|
||||
without them being quoted as strings::
|
||||
|
||||
sx_call("parent", child=SxExpr(sx_call("child", x=1)))
|
||||
# => (~parent :child (~child :x 1))
|
||||
"""
|
||||
__slots__ = ("source",)
|
||||
|
||||
def __init__(self, source: str):
|
||||
self.source = source
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"SxExpr({self.source!r})"
|
||||
|
||||
def __str__(self) -> str:
|
||||
return self.source
|
||||
|
||||
def __add__(self, other: object) -> "SxExpr":
|
||||
return SxExpr(self.source + str(other))
|
||||
|
||||
def __radd__(self, other: object) -> "SxExpr":
|
||||
return SxExpr(str(other) + self.source)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ParseError(Exception):
|
||||
"""Error during s-expression parsing."""
|
||||
|
||||
def __init__(self, message: str, position: int = 0, line: int = 1, col: int = 1):
|
||||
self.position = position
|
||||
self.line = line
|
||||
self.col = col
|
||||
super().__init__(f"{message} at line {line}, column {col}")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tokenizer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class Tokenizer:
|
||||
"""Stateful tokenizer that walks an s-expression string."""
|
||||
|
||||
WHITESPACE = re.compile(r"\s+")
|
||||
COMMENT = re.compile(r";[^\n]*")
|
||||
STRING = re.compile(r'"(?:[^"\\]|\\.)*"')
|
||||
NUMBER = re.compile(r"-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?")
|
||||
KEYWORD = re.compile(r":[a-zA-Z_][a-zA-Z0-9_>:-]*")
|
||||
# Symbols may start with alpha, _, or common operator chars, plus ~ for components,
|
||||
# <> for the fragment symbol, and & for &key/&rest.
|
||||
SYMBOL = re.compile(r"[a-zA-Z_~*+\-><=/!?&][a-zA-Z0-9_~*+\-><=/!?.:&]*")
|
||||
|
||||
def __init__(self, text: str):
|
||||
self.text = text
|
||||
self.pos = 0
|
||||
self.line = 1
|
||||
self.col = 1
|
||||
|
||||
def _advance(self, count: int = 1):
|
||||
for _ in range(count):
|
||||
if self.pos < len(self.text):
|
||||
if self.text[self.pos] == "\n":
|
||||
self.line += 1
|
||||
self.col = 1
|
||||
else:
|
||||
self.col += 1
|
||||
self.pos += 1
|
||||
|
||||
def _skip_whitespace_and_comments(self):
|
||||
while self.pos < len(self.text):
|
||||
m = self.WHITESPACE.match(self.text, self.pos)
|
||||
if m:
|
||||
self._advance(m.end() - self.pos)
|
||||
continue
|
||||
m = self.COMMENT.match(self.text, self.pos)
|
||||
if m:
|
||||
self._advance(m.end() - self.pos)
|
||||
continue
|
||||
break
|
||||
|
||||
def peek(self) -> str | None:
|
||||
self._skip_whitespace_and_comments()
|
||||
if self.pos >= len(self.text):
|
||||
return None
|
||||
return self.text[self.pos]
|
||||
|
||||
def next_token(self) -> Any:
|
||||
self._skip_whitespace_and_comments()
|
||||
if self.pos >= len(self.text):
|
||||
return None
|
||||
|
||||
char = self.text[self.pos]
|
||||
|
||||
# Delimiters
|
||||
if char in "()[]{}":
|
||||
self._advance()
|
||||
return char
|
||||
|
||||
# String
|
||||
if char == '"':
|
||||
m = self.STRING.match(self.text, self.pos)
|
||||
if not m:
|
||||
raise ParseError("Unterminated string", self.pos, self.line, self.col)
|
||||
self._advance(m.end() - self.pos)
|
||||
content = m.group()[1:-1]
|
||||
content = content.replace("\\n", "\n")
|
||||
content = content.replace("\\t", "\t")
|
||||
content = content.replace('\\"', '"')
|
||||
content = content.replace("\\\\", "\\")
|
||||
return content
|
||||
|
||||
# Keyword
|
||||
if char == ":":
|
||||
m = self.KEYWORD.match(self.text, self.pos)
|
||||
if m:
|
||||
self._advance(m.end() - self.pos)
|
||||
return Keyword(m.group()[1:])
|
||||
raise ParseError("Invalid keyword", self.pos, self.line, self.col)
|
||||
|
||||
# Number (check before symbol because of leading -)
|
||||
if char.isdigit() or (
|
||||
char == "-"
|
||||
and self.pos + 1 < len(self.text)
|
||||
and (self.text[self.pos + 1].isdigit() or self.text[self.pos + 1] == ".")
|
||||
):
|
||||
m = self.NUMBER.match(self.text, self.pos)
|
||||
if m:
|
||||
self._advance(m.end() - self.pos)
|
||||
num_str = m.group()
|
||||
if "." in num_str or "e" in num_str or "E" in num_str:
|
||||
return float(num_str)
|
||||
return int(num_str)
|
||||
|
||||
# Symbol
|
||||
m = self.SYMBOL.match(self.text, self.pos)
|
||||
if m:
|
||||
self._advance(m.end() - self.pos)
|
||||
name = m.group()
|
||||
# Built-in literal symbols
|
||||
if name == "true":
|
||||
return True
|
||||
if name == "false":
|
||||
return False
|
||||
if name == "nil":
|
||||
return NIL
|
||||
return Symbol(name)
|
||||
|
||||
raise ParseError(f"Unexpected character: {char!r}", self.pos, self.line, self.col)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse(text: str) -> Any:
|
||||
"""Parse a single s-expression from *text*.
|
||||
|
||||
>>> parse('(div :class "main" (p "hello"))')
|
||||
[Symbol('div'), Keyword('class'), 'main', [Symbol('p'), 'hello']]
|
||||
"""
|
||||
tok = Tokenizer(text)
|
||||
result = _parse_expr(tok)
|
||||
if tok.peek() is not None:
|
||||
raise ParseError("Unexpected content after expression", tok.pos, tok.line, tok.col)
|
||||
return result
|
||||
|
||||
|
||||
def parse_all(text: str) -> list[Any]:
|
||||
"""Parse zero or more s-expressions from *text*."""
|
||||
tok = Tokenizer(text)
|
||||
results: list[Any] = []
|
||||
while tok.peek() is not None:
|
||||
results.append(_parse_expr(tok))
|
||||
return results
|
||||
|
||||
|
||||
def _parse_expr(tok: Tokenizer) -> Any:
|
||||
token = tok.next_token()
|
||||
if token is None:
|
||||
raise ParseError("Unexpected end of input", tok.pos, tok.line, tok.col)
|
||||
if token == "(":
|
||||
return _parse_list(tok, ")")
|
||||
if token == "[":
|
||||
return _parse_list(tok, "]")
|
||||
if token == "{":
|
||||
return _parse_map(tok)
|
||||
if token in (")", "]", "}"):
|
||||
raise ParseError(f"Unexpected {token!r}", tok.pos, tok.line, tok.col)
|
||||
return token
|
||||
|
||||
|
||||
def _parse_list(tok: Tokenizer, closer: str) -> list[Any]:
|
||||
items: list[Any] = []
|
||||
while True:
|
||||
c = tok.peek()
|
||||
if c is None:
|
||||
raise ParseError(f"Unterminated list, expected {closer!r}", tok.pos, tok.line, tok.col)
|
||||
if c == closer:
|
||||
tok.next_token()
|
||||
return items
|
||||
items.append(_parse_expr(tok))
|
||||
|
||||
|
||||
def _parse_map(tok: Tokenizer) -> dict[str, Any]:
|
||||
result: dict[str, Any] = {}
|
||||
while True:
|
||||
c = tok.peek()
|
||||
if c is None:
|
||||
raise ParseError("Unterminated map, expected '}'", tok.pos, tok.line, tok.col)
|
||||
if c == "}":
|
||||
tok.next_token()
|
||||
return result
|
||||
key_token = _parse_expr(tok)
|
||||
if isinstance(key_token, Keyword):
|
||||
key = key_token.name
|
||||
elif isinstance(key_token, str):
|
||||
key = key_token
|
||||
else:
|
||||
raise ParseError(
|
||||
f"Map key must be keyword or string, got {type(key_token).__name__}",
|
||||
tok.pos, tok.line, tok.col,
|
||||
)
|
||||
result[key] = _parse_expr(tok)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Serialization
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def serialize(expr: Any, indent: int = 0, pretty: bool = False) -> str:
|
||||
"""Serialize a value back to s-expression text."""
|
||||
if isinstance(expr, SxExpr):
|
||||
return expr.source
|
||||
|
||||
if isinstance(expr, list):
|
||||
if not expr:
|
||||
return "()"
|
||||
if pretty:
|
||||
return _serialize_pretty(expr, indent)
|
||||
items = [serialize(item, indent, False) for item in expr]
|
||||
return "(" + " ".join(items) + ")"
|
||||
|
||||
if isinstance(expr, Symbol):
|
||||
return expr.name
|
||||
|
||||
if isinstance(expr, Keyword):
|
||||
return f":{expr.name}"
|
||||
|
||||
if isinstance(expr, str):
|
||||
escaped = (
|
||||
expr.replace("\\", "\\\\")
|
||||
.replace('"', '\\"')
|
||||
.replace("\n", "\\n")
|
||||
.replace("\t", "\\t")
|
||||
)
|
||||
return f'"{escaped}"'
|
||||
|
||||
if isinstance(expr, bool):
|
||||
return "true" if expr else "false"
|
||||
|
||||
if isinstance(expr, (int, float)):
|
||||
return str(expr)
|
||||
|
||||
if expr is None or isinstance(expr, type(NIL)):
|
||||
return "nil"
|
||||
|
||||
if isinstance(expr, dict):
|
||||
items: list[str] = []
|
||||
for k, v in expr.items():
|
||||
items.append(f":{k}")
|
||||
items.append(serialize(v, indent, pretty))
|
||||
return "{" + " ".join(items) + "}"
|
||||
|
||||
# Catch callables (Python functions leaked into sx data)
|
||||
if callable(expr):
|
||||
import logging
|
||||
logging.getLogger("sx").error(
|
||||
"serialize: callable leaked into sx data: %r", expr)
|
||||
return "nil"
|
||||
|
||||
# Fallback for Lambda/Component — show repr
|
||||
return repr(expr)
|
||||
|
||||
|
||||
def _serialize_pretty(expr: list, indent: int) -> str:
|
||||
if not expr:
|
||||
return "()"
|
||||
inner_prefix = " " * (indent + 1)
|
||||
|
||||
# Try compact first
|
||||
compact = serialize(expr, indent, False)
|
||||
if len(compact) < 72 and "\n" not in compact:
|
||||
return compact
|
||||
|
||||
head = serialize(expr[0], indent + 1, False)
|
||||
parts = [f"({head}"]
|
||||
|
||||
i = 1
|
||||
while i < len(expr):
|
||||
item = expr[i]
|
||||
if isinstance(item, Keyword) and i + 1 < len(expr):
|
||||
key = serialize(item, 0, False)
|
||||
val = serialize(expr[i + 1], indent + 1, False)
|
||||
if len(val) < 50 and "\n" not in val:
|
||||
parts.append(f"{inner_prefix}{key} {val}")
|
||||
else:
|
||||
val_p = serialize(expr[i + 1], indent + 1, True)
|
||||
parts.append(f"{inner_prefix}{key} {val_p}")
|
||||
i += 2
|
||||
else:
|
||||
item_str = serialize(item, indent + 1, True)
|
||||
parts.append(f"{inner_prefix}{item_str}")
|
||||
i += 1
|
||||
|
||||
return "\n".join(parts) + ")"
|
||||
Reference in New Issue
Block a user