Files
rose-ash/shared/sx/parser.py
giles 19d59f5f4b
Some checks failed
Build and Deploy / build-and-deploy (push) Has been cancelled
Implement CSSX Phase 2: native SX style primitives
Replace Tailwind class strings with native SX expressions:
(css :flex :gap-4 :hover:bg-sky-200) instead of :class "flex gap-4 ..."

- Add style_dict.py: 516 atoms, variants, breakpoints, keyframes, patterns
- Add style_resolver.py: memoized resolver with variant splitting
- Add StyleValue type to types.py (frozen dataclass with class_name, declarations, etc.)
- Add css and merge-styles primitives to primitives.py
- Add defstyle and defkeyframes special forms to evaluator.py and async_eval.py
- Integrate StyleValue into html.py and async_eval.py render paths
- Add register_generated_rule() to css_registry.py, fix media query selector
- Add style dict JSON delivery with localStorage caching to helpers.py
- Add client-side css primitive, resolver, and style injection to sx.js

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-04 12:47:51 +00:00

397 lines
13 KiB
Python

"""
S-expression parser.
Supports:
- Lists: (a b c)
- Vectors: [a b c] (sugar for lists)
- Maps: {:key1 val1 :key2 val2}
- Symbols: foo, bar-baz, ->, ~card
- Keywords: :class, :id
- Strings: "hello world" (with \\n, \\t, \\", \\\\ escapes)
- Numbers: 42, 3.14, -1.5, 1e-3
- Comments: ; to end of line
- Fragment: <> (empty-tag symbol for fragment groups)
"""
from __future__ import annotations
import re
from typing import Any
from .types import Keyword, Symbol, NIL
# ---------------------------------------------------------------------------
# SxExpr — pre-built sx source marker
# ---------------------------------------------------------------------------
class SxExpr:
"""Pre-built sx source that serialize() outputs unquoted.
Use this to nest sx call strings inside other sx_call() invocations
without them being quoted as strings::
sx_call("parent", child=SxExpr(sx_call("child", x=1)))
# => (~parent :child (~child :x 1))
"""
__slots__ = ("source",)
def __init__(self, source: str):
self.source = source
def __repr__(self) -> str:
return f"SxExpr({self.source!r})"
def __str__(self) -> str:
return self.source
def __add__(self, other: object) -> "SxExpr":
return SxExpr(self.source + str(other))
def __radd__(self, other: object) -> "SxExpr":
return SxExpr(str(other) + self.source)
# ---------------------------------------------------------------------------
# Errors
# ---------------------------------------------------------------------------
class ParseError(Exception):
"""Error during s-expression parsing."""
def __init__(self, message: str, position: int = 0, line: int = 1, col: int = 1):
self.position = position
self.line = line
self.col = col
super().__init__(f"{message} at line {line}, column {col}")
# ---------------------------------------------------------------------------
# Tokenizer
# ---------------------------------------------------------------------------
class Tokenizer:
"""Stateful tokenizer that walks an s-expression string."""
WHITESPACE = re.compile(r"\s+")
COMMENT = re.compile(r";[^\n]*")
STRING = re.compile(r'"(?:[^"\\]|\\.)*"')
NUMBER = re.compile(r"-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?")
KEYWORD = re.compile(r":[a-zA-Z_][a-zA-Z0-9_>:-]*")
# Symbols may start with alpha, _, or common operator chars, plus ~ for components,
# <> for the fragment symbol, and & for &key/&rest.
SYMBOL = re.compile(r"[a-zA-Z_~*+\-><=/!?&][a-zA-Z0-9_~*+\-><=/!?.:&]*")
def __init__(self, text: str):
self.text = text
self.pos = 0
self.line = 1
self.col = 1
def _advance(self, count: int = 1):
for _ in range(count):
if self.pos < len(self.text):
if self.text[self.pos] == "\n":
self.line += 1
self.col = 1
else:
self.col += 1
self.pos += 1
def _skip_whitespace_and_comments(self):
while self.pos < len(self.text):
m = self.WHITESPACE.match(self.text, self.pos)
if m:
self._advance(m.end() - self.pos)
continue
m = self.COMMENT.match(self.text, self.pos)
if m:
self._advance(m.end() - self.pos)
continue
break
def peek(self) -> str | None:
self._skip_whitespace_and_comments()
if self.pos >= len(self.text):
return None
return self.text[self.pos]
def next_token(self) -> Any:
self._skip_whitespace_and_comments()
if self.pos >= len(self.text):
return None
char = self.text[self.pos]
# Delimiters
if char in "()[]{}":
self._advance()
return char
# String
if char == '"':
m = self.STRING.match(self.text, self.pos)
if not m:
raise ParseError("Unterminated string", self.pos, self.line, self.col)
self._advance(m.end() - self.pos)
content = m.group()[1:-1]
content = content.replace("\\n", "\n")
content = content.replace("\\t", "\t")
content = content.replace('\\"', '"')
content = content.replace("\\/", "/")
content = content.replace("\\\\", "\\")
return content
# Keyword
if char == ":":
m = self.KEYWORD.match(self.text, self.pos)
if m:
self._advance(m.end() - self.pos)
return Keyword(m.group()[1:])
raise ParseError("Invalid keyword", self.pos, self.line, self.col)
# Number (check before symbol because of leading -)
if char.isdigit() or (
char == "-"
and self.pos + 1 < len(self.text)
and (self.text[self.pos + 1].isdigit() or self.text[self.pos + 1] == ".")
):
m = self.NUMBER.match(self.text, self.pos)
if m:
self._advance(m.end() - self.pos)
num_str = m.group()
if "." in num_str or "e" in num_str or "E" in num_str:
return float(num_str)
return int(num_str)
# Symbol
m = self.SYMBOL.match(self.text, self.pos)
if m:
self._advance(m.end() - self.pos)
name = m.group()
# Built-in literal symbols
if name == "true":
return True
if name == "false":
return False
if name == "nil":
return NIL
return Symbol(name)
raise ParseError(f"Unexpected character: {char!r}", self.pos, self.line, self.col)
# ---------------------------------------------------------------------------
# Parsing
# ---------------------------------------------------------------------------
def parse(text: str) -> Any:
"""Parse a single s-expression from *text*.
>>> parse('(div :class "main" (p "hello"))')
[Symbol('div'), Keyword('class'), 'main', [Symbol('p'), 'hello']]
"""
tok = Tokenizer(text)
result = _parse_expr(tok)
if tok.peek() is not None:
raise ParseError("Unexpected content after expression", tok.pos, tok.line, tok.col)
return result
def parse_all(text: str) -> list[Any]:
"""Parse zero or more s-expressions from *text*."""
tok = Tokenizer(text)
results: list[Any] = []
while tok.peek() is not None:
results.append(_parse_expr(tok))
return results
def _parse_expr(tok: Tokenizer) -> Any:
# Use peek() (raw character) for structural decisions so that string
# values like ")" or "(" don't get confused with actual delimiters.
raw = tok.peek()
if raw is None:
raise ParseError("Unexpected end of input", tok.pos, tok.line, tok.col)
if raw in ")]}":
tok.next_token() # consume the delimiter
raise ParseError(f"Unexpected {raw!r}", tok.pos, tok.line, tok.col)
if raw == "(":
tok.next_token() # consume the '('
return _parse_list(tok, ")")
if raw == "[":
tok.next_token() # consume the '['
return _parse_list(tok, "]")
if raw == "{":
tok.next_token() # consume the '{'
return _parse_map(tok)
# Quasiquote syntax: ` , ,@
if raw == "`":
tok._advance(1) # consume the backtick
inner = _parse_expr(tok)
return [Symbol("quasiquote"), inner]
if raw == ",":
tok._advance(1) # consume the comma
# Check for splice-unquote (,@) — no whitespace between , and @
if tok.pos < len(tok.text) and tok.text[tok.pos] == "@":
tok._advance(1) # consume the @
inner = _parse_expr(tok)
return [Symbol("splice-unquote"), inner]
inner = _parse_expr(tok)
return [Symbol("unquote"), inner]
# Everything else: strings, keywords, symbols, numbers
token = tok.next_token()
return token
def _parse_list(tok: Tokenizer, closer: str) -> list[Any]:
items: list[Any] = []
while True:
c = tok.peek()
if c is None:
raise ParseError(f"Unterminated list, expected {closer!r}", tok.pos, tok.line, tok.col)
if c == closer:
tok.next_token()
return items
items.append(_parse_expr(tok))
def _parse_map(tok: Tokenizer) -> dict[str, Any]:
result: dict[str, Any] = {}
while True:
c = tok.peek()
if c is None:
raise ParseError("Unterminated map, expected '}'", tok.pos, tok.line, tok.col)
if c == "}":
tok.next_token()
return result
key_token = _parse_expr(tok)
if isinstance(key_token, Keyword):
key = key_token.name
elif isinstance(key_token, str):
key = key_token
else:
raise ParseError(
f"Map key must be keyword or string, got {type(key_token).__name__}",
tok.pos, tok.line, tok.col,
)
result[key] = _parse_expr(tok)
# ---------------------------------------------------------------------------
# Serialization
# ---------------------------------------------------------------------------
def serialize(expr: Any, indent: int = 0, pretty: bool = False) -> str:
"""Serialize a value back to s-expression text."""
if isinstance(expr, SxExpr):
return expr.source
if isinstance(expr, list):
if not expr:
return "()"
# Quasiquote sugar: [Symbol("quasiquote"), x] → `x
if (len(expr) == 2 and isinstance(expr[0], Symbol)):
name = expr[0].name
if name == "quasiquote":
return "`" + serialize(expr[1], indent, pretty)
if name == "unquote":
return "," + serialize(expr[1], indent, pretty)
if name == "splice-unquote":
return ",@" + serialize(expr[1], indent, pretty)
if pretty:
return _serialize_pretty(expr, indent)
items = [serialize(item, indent, False) for item in expr]
return "(" + " ".join(items) + ")"
if isinstance(expr, Symbol):
return expr.name
if isinstance(expr, Keyword):
return f":{expr.name}"
if isinstance(expr, str):
escaped = (
expr.replace("\\", "\\\\")
.replace('"', '\\"')
.replace("\n", "\\n")
.replace("\t", "\\t")
.replace("</script", "<\\/script")
)
return f'"{escaped}"'
if isinstance(expr, bool):
return "true" if expr else "false"
if isinstance(expr, (int, float)):
return str(expr)
if expr is None or isinstance(expr, type(NIL)):
return "nil"
if isinstance(expr, dict):
items: list[str] = []
for k, v in expr.items():
items.append(f":{k}")
items.append(serialize(v, indent, pretty))
return "{" + " ".join(items) + "}"
# StyleValue — serialize as class name string
from .types import StyleValue
if isinstance(expr, StyleValue):
return f'"{expr.class_name}"'
# _RawHTML — pre-rendered HTML; wrap as (raw! "...") for SX wire format
from .html import _RawHTML
if isinstance(expr, _RawHTML):
escaped = (
expr.html.replace("\\", "\\\\")
.replace('"', '\\"')
.replace("\n", "\\n")
.replace("</script", "<\\/script")
)
return f'(raw! "{escaped}")'
# Catch callables (Python functions leaked into sx data)
if callable(expr):
import logging
logging.getLogger("sx").error(
"serialize: callable leaked into sx data: %r", expr)
return "nil"
# Fallback for Lambda/Component — show repr
return repr(expr)
def _serialize_pretty(expr: list, indent: int) -> str:
if not expr:
return "()"
inner_prefix = " " * (indent + 1)
# Try compact first
compact = serialize(expr, indent, False)
if len(compact) < 72 and "\n" not in compact:
return compact
head = serialize(expr[0], indent + 1, False)
parts = [f"({head}"]
i = 1
while i < len(expr):
item = expr[i]
if isinstance(item, Keyword) and i + 1 < len(expr):
key = serialize(item, 0, False)
val = serialize(expr[i + 1], indent + 1, False)
if len(val) < 50 and "\n" not in val:
parts.append(f"{inner_prefix}{key} {val}")
else:
val_p = serialize(expr[i + 1], indent + 1, True)
parts.append(f"{inner_prefix}{key} {val_p}")
i += 2
else:
item_str = serialize(item, indent + 1, True)
parts.append(f"{inner_prefix}{item_str}")
i += 1
return "\n".join(parts) + ")"