Files
rose-ash/shared/sx/ref/bootstrap_py.py
giles d8cddbd971 Replace hand-written evaluator with bootstrapped spec, emit flat Python
- evaluator.py: replace 1200 lines of hand-written eval with thin shim
  that re-exports from bootstrapped sx_ref.py
- bootstrap_py.py: emit all fn-bodied defines as `def` (not `lambda`),
  flatten tail-position if/cond/case/when to if/elif with returns,
  fix &rest handling in _emit_define_as_def
- platform_py.py: EvalError imports from evaluator.py so catches work
- __init__.py: remove SX_USE_REF conditional, always use bootstrapped
- tests/run.py: reset render_active after render tests for isolation
- Removes setrecursionlimit(5000) hack — no longer needed with flat code

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-11 09:18:17 +00:00

1305 lines
53 KiB
Python

#!/usr/bin/env python3
"""
Bootstrap compiler: reference SX evaluator -> Python.
Reads the .sx reference specification and emits a standalone Python
evaluator module (sx_ref.py) that can be compared against the hand-written
evaluator.py / html.py / async_eval.py.
The compiler translates the restricted SX subset used in eval.sx/render.sx
into idiomatic Python. Platform interface functions are emitted as
native Python implementations.
Usage:
python bootstrap_py.py > sx_ref.py
"""
from __future__ import annotations
import os
import sys
# Add project root to path for imports
_HERE = os.path.dirname(os.path.abspath(__file__))
_PROJECT = os.path.abspath(os.path.join(_HERE, "..", "..", ".."))
sys.path.insert(0, _PROJECT)
from shared.sx.parser import parse_all
from shared.sx.types import Symbol, Keyword, NIL as SX_NIL
# ---------------------------------------------------------------------------
# SX -> Python transpiler
# ---------------------------------------------------------------------------
# Python reserved words — SX names that collide get _ suffix
# Excludes names we intentionally shadow (list, dict, range, filter, map)
_PY_RESERVED = frozenset({
"False", "None", "True", "and", "as", "assert", "async", "await",
"break", "class", "continue", "def", "del", "elif", "else", "except",
"finally", "for", "from", "global", "if", "import", "in", "is",
"lambda", "nonlocal", "not", "or", "pass", "raise", "return", "try",
"while", "with", "yield",
# builtins we don't want to shadow
"default", "type", "id", "input", "open", "print", "set", "super",
})
class PyEmitter:
"""Transpile an SX AST node to Python source code."""
def __init__(self):
self.indent = 0
def emit(self, expr) -> str:
"""Emit a Python expression from an SX AST node."""
# Bool MUST be checked before int (bool is subclass of int in Python)
if isinstance(expr, bool):
return "True" if expr else "False"
if isinstance(expr, (int, float)):
return str(expr)
if isinstance(expr, str):
return self._py_string(expr)
if expr is None or expr is SX_NIL:
return "NIL"
if isinstance(expr, Symbol):
return self._emit_symbol(expr.name)
if isinstance(expr, Keyword):
return self._py_string(expr.name)
if isinstance(expr, dict):
return self._emit_native_dict(expr)
if isinstance(expr, list):
return self._emit_list(expr)
return str(expr)
def emit_statement(self, expr, indent: int = 0) -> str:
"""Emit a Python statement from an SX AST node."""
pad = " " * indent
if isinstance(expr, list) and expr:
head = expr[0]
if isinstance(head, Symbol):
name = head.name
if name == "define":
return self._emit_define(expr, indent)
if name == "set!":
return f"{pad}{self._mangle(expr[1].name)} = {self.emit(expr[2])}"
if name == "when":
return self._emit_when_stmt(expr, indent)
if name == "do" or name == "begin":
return "\n".join(self.emit_statement(e, indent) for e in expr[1:])
if name == "for-each":
return self._emit_for_each_stmt(expr, indent)
if name == "dict-set!":
return f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}"
if name == "append!":
return f"{pad}{self.emit(expr[1])}.append({self.emit(expr[2])})"
if name == "env-set!":
return f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}"
if name == "set-lambda-name!":
return f"{pad}{self.emit(expr[1])}.name = {self.emit(expr[2])}"
return f"{pad}{self.emit(expr)}"
# --- Symbol emission ---
def _emit_symbol(self, name: str) -> str:
mangled = self._mangle(name)
cell_vars = getattr(self, '_current_cell_vars', set())
if mangled in cell_vars:
return f"_cells[{self._py_string(mangled)}]"
return mangled
def _mangle(self, name: str) -> str:
"""Convert SX identifier to valid Python identifier."""
RENAMES = {
"nil": "NIL",
"true": "True",
"false": "False",
"nil?": "is_nil",
"type-of": "type_of",
"symbol-name": "symbol_name",
"keyword-name": "keyword_name",
"make-lambda": "make_lambda",
"make-component": "make_component",
"make-macro": "make_macro",
"make-thunk": "make_thunk",
"make-handler-def": "make_handler_def",
"make-query-def": "make_query_def",
"make-action-def": "make_action_def",
"make-page-def": "make_page_def",
"make-symbol": "make_symbol",
"make-keyword": "make_keyword",
"lambda-params": "lambda_params",
"lambda-body": "lambda_body",
"lambda-closure": "lambda_closure",
"lambda-name": "lambda_name",
"set-lambda-name!": "set_lambda_name",
"component-params": "component_params",
"component-body": "component_body",
"component-closure": "component_closure",
"component-has-children?": "component_has_children",
"component-name": "component_name",
"component-affinity": "component_affinity",
"macro-params": "macro_params",
"macro-rest-param": "macro_rest_param",
"macro-body": "macro_body",
"macro-closure": "macro_closure",
"thunk?": "is_thunk",
"thunk-expr": "thunk_expr",
"thunk-env": "thunk_env",
"callable?": "is_callable",
"lambda?": "is_lambda",
"component?": "is_component",
"island?": "is_island",
"make-island": "make_island",
"make-signal": "make_signal",
"signal?": "is_signal",
"signal-value": "signal_value",
"signal-set-value!": "signal_set_value",
"signal-subscribers": "signal_subscribers",
"signal-add-sub!": "signal_add_sub",
"signal-remove-sub!": "signal_remove_sub",
"signal-deps": "signal_deps",
"signal-set-deps!": "signal_set_deps",
"set-tracking-context!": "set_tracking_context",
"get-tracking-context": "get_tracking_context",
"make-tracking-context": "make_tracking_context",
"tracking-context-deps": "tracking_context_deps",
"tracking-context-add-dep!": "tracking_context_add_dep",
"tracking-context-notify-fn": "tracking_context_notify_fn",
"identical?": "is_identical",
"notify-subscribers": "notify_subscribers",
"flush-subscribers": "flush_subscribers",
"dispose-computed": "dispose_computed",
"with-island-scope": "with_island_scope",
"register-in-scope": "register_in_scope",
"*batch-depth*": "_batch_depth",
"*batch-queue*": "_batch_queue",
"*island-scope*": "_island_scope",
"*store-registry*": "_store_registry",
"def-store": "def_store",
"use-store": "use_store",
"clear-stores": "clear_stores",
"emit-event": "emit_event",
"on-event": "on_event",
"bridge-event": "bridge_event",
"dom-listen": "dom_listen",
"dom-dispatch": "dom_dispatch",
"event-detail": "event_detail",
"macro?": "is_macro",
"primitive?": "is_primitive",
"get-primitive": "get_primitive",
"env-has?": "env_has",
"env-get": "env_get",
"env-set!": "env_set",
"env-extend": "env_extend",
"env-merge": "env_merge",
"dict-set!": "dict_set",
"dict-get": "dict_get",
"dict-has?": "dict_has",
"dict-delete!": "dict_delete",
"eval-expr": "eval_expr",
"eval-list": "eval_list",
"eval-call": "eval_call",
"is-render-expr?": "is_render_expr",
"render-expr": "render_expr",
"call-lambda": "call_lambda",
"call-component": "call_component",
"parse-keyword-args": "parse_keyword_args",
"parse-comp-params": "parse_comp_params",
"parse-macro-params": "parse_macro_params",
"expand-macro": "expand_macro",
"render-to-html": "render_to_html",
"render-to-sx": "render_to_sx",
"render-value-to-html": "render_value_to_html",
"render-list-to-html": "render_list_to_html",
"render-html-element": "render_html_element",
"render-html-component": "render_html_component",
"parse-element-args": "parse_element_args",
"render-attrs": "render_attrs",
"aser-list": "aser_list",
"aser-fragment": "aser_fragment",
"aser-call": "aser_call",
"aser-special": "aser_special",
"sf-if": "sf_if",
"sf-when": "sf_when",
"sf-cond": "sf_cond",
"sf-cond-scheme": "sf_cond_scheme",
"sf-cond-clojure": "sf_cond_clojure",
"sf-case": "sf_case",
"sf-case-loop": "sf_case_loop",
"sf-and": "sf_and",
"sf-or": "sf_or",
"sf-let": "sf_let",
"sf-lambda": "sf_lambda",
"sf-define": "sf_define",
"sf-defcomp": "sf_defcomp",
"defcomp-kwarg": "defcomp_kwarg",
"sf-defmacro": "sf_defmacro",
"sf-begin": "sf_begin",
"sf-quote": "sf_quote",
"sf-quasiquote": "sf_quasiquote",
"sf-thread-first": "sf_thread_first",
"sf-set!": "sf_set_bang",
"sf-reset": "sf_reset",
"sf-shift": "sf_shift",
"qq-expand": "qq_expand",
"ho-map": "ho_map",
"ho-map-indexed": "ho_map_indexed",
"ho-filter": "ho_filter",
"ho-reduce": "ho_reduce",
"ho-some": "ho_some",
"ho-every": "ho_every",
"ho-for-each": "ho_for_each",
"sf-defstyle": "sf_defstyle",
"special-form?": "is_special_form",
"ho-form?": "is_ho_form",
"strip-prefix": "strip_prefix",
"escape-html": "escape_html",
"escape-attr": "escape_attr",
"escape-string": "escape_string",
"raw-html-content": "raw_html_content",
"HTML_TAGS": "HTML_TAGS",
"VOID_ELEMENTS": "VOID_ELEMENTS",
"BOOLEAN_ATTRS": "BOOLEAN_ATTRS",
# render.sx core
"definition-form?": "is_definition_form",
# adapter-html.sx
"RENDER_HTML_FORMS": "RENDER_HTML_FORMS",
"render-html-form?": "is_render_html_form",
"dispatch-html-form": "dispatch_html_form",
"render-lambda-html": "render_lambda_html",
"make-raw-html": "make_raw_html",
"render-html-island": "render_html_island",
"serialize-island-state": "serialize_island_state",
"json-serialize": "json_serialize",
"empty-dict?": "is_empty_dict",
"sf-defisland": "sf_defisland",
# adapter-sx.sx
"render-to-sx": "render_to_sx",
"aser": "aser",
"eval-case-aser": "eval_case_aser",
"sx-serialize": "sx_serialize",
"sx-serialize-dict": "sx_serialize_dict",
"sx-expr-source": "sx_expr_source",
# Primitives that need exact aliases
"contains?": "contains_p",
"starts-with?": "starts_with_p",
"ends-with?": "ends_with_p",
"empty?": "empty_p",
"every?": "every_p",
"for-each": "for_each",
"for-each-indexed": "for_each_indexed",
"map-indexed": "map_indexed",
"map-dict": "map_dict",
"eval-cond": "eval_cond",
"eval-cond-scheme": "eval_cond_scheme",
"eval-cond-clojure": "eval_cond_clojure",
"process-bindings": "process_bindings",
# deps.sx
"scan-refs": "scan_refs",
"scan-refs-walk": "scan_refs_walk",
"transitive-deps": "transitive_deps",
"compute-all-deps": "compute_all_deps",
"scan-components-from-source": "scan_components_from_source",
"components-needed": "components_needed",
"page-component-bundle": "page_component_bundle",
"page-css-classes": "page_css_classes",
"component-deps": "component_deps",
"component-set-deps!": "component_set_deps",
"component-css-classes": "component_css_classes",
"component-io-refs": "component_io_refs",
"component-set-io-refs!": "component_set_io_refs",
"env-components": "env_components",
"regex-find-all": "regex_find_all",
"scan-css-classes": "scan_css_classes",
# deps.sx IO detection
"scan-io-refs": "scan_io_refs",
"scan-io-refs-walk": "scan_io_refs_walk",
"transitive-io-refs": "transitive_io_refs",
"compute-all-io-refs": "compute_all_io_refs",
"component-io-refs-cached": "component_io_refs_cached",
"component-pure?": "component_pure_p",
"render-target": "render_target",
"page-render-plan": "page_render_plan",
# router.sx
"split-path-segments": "split_path_segments",
"make-route-segment": "make_route_segment",
"parse-route-pattern": "parse_route_pattern",
"match-route-segments": "match_route_segments",
"match-route": "match_route",
"find-matching-route": "find_matching_route",
}
if name in RENAMES:
return RENAMES[name]
# General mangling
result = name
# Handle trailing ? and !
if result.endswith("?"):
result = result[:-1] + "_p"
elif result.endswith("!"):
result = result[:-1] + "_b"
# Kebab to snake_case
result = result.replace("-", "_")
# Escape Python reserved words
if result in _PY_RESERVED:
result = result + "_"
return result
# --- List emission ---
def _emit_list(self, expr: list) -> str:
if not expr:
return "[]"
head = expr[0]
if not isinstance(head, Symbol):
# Data list
return "[" + ", ".join(self.emit(x) for x in expr) + "]"
name = head.name
handler = getattr(
self,
f"_sf_{name.replace('-', '_').replace('!', '_b').replace('?', '_p')}",
None,
)
if handler:
return handler(expr)
# Built-in forms
if name in ("fn", "lambda"):
return self._emit_fn(expr)
if name in ("let", "let*"):
return self._emit_let(expr)
if name == "if":
return self._emit_if(expr)
if name == "when":
return self._emit_when(expr)
if name == "cond":
return self._emit_cond(expr)
if name == "case":
return self._emit_case(expr)
if name == "and":
return self._emit_and(expr)
if name == "or":
return self._emit_or(expr)
if name == "not":
return f"(not sx_truthy({self.emit(expr[1])}))"
if name in ("do", "begin"):
return self._emit_do(expr)
if name == "list":
return "[" + ", ".join(self.emit(x) for x in expr[1:]) + "]"
if name == "dict":
return self._emit_dict_literal(expr)
if name == "quote":
return self._emit_quote(expr[1])
if name == "set!":
# set! in expression context — use nonlocal_cells dict for mutation
# from nested lambdas (Python closures can read but not rebind outer vars)
varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
py_var = self._mangle(varname)
return f"_sx_cell_set(_cells, {self._py_string(py_var)}, {self.emit(expr[2])})"
if name == "str":
parts = [self.emit(x) for x in expr[1:]]
return "sx_str(" + ", ".join(parts) + ")"
# Mutation forms that can appear in expression context
if name == "append!":
return f"_sx_append({self.emit(expr[1])}, {self.emit(expr[2])})"
if name == "dict-set!":
return f"_sx_dict_set({self.emit(expr[1])}, {self.emit(expr[2])}, {self.emit(expr[3])})"
if name == "env-set!":
return f"_sx_dict_set({self.emit(expr[1])}, {self.emit(expr[2])}, {self.emit(expr[3])})"
if name == "set-lambda-name!":
return f"_sx_set_attr({self.emit(expr[1])}, 'name', {self.emit(expr[2])})"
# Infix operators
if name in ("+", "-", "*", "/", "=", "!=", "<", ">", "<=", ">=", "mod"):
return self._emit_infix(name, expr[1:])
if name == "inc":
return f"({self.emit(expr[1])} + 1)"
if name == "dec":
return f"({self.emit(expr[1])} - 1)"
# Regular function call
fn_name = self._mangle(name)
args = ", ".join(self.emit(x) for x in expr[1:])
return f"{fn_name}({args})"
# --- Special form emitters ---
def _emit_fn(self, expr) -> str:
params = expr[1]
body = expr[2:]
param_names = []
rest_name = None
i = 0
while i < len(params):
p = params[i]
if isinstance(p, Symbol) and p.name == "&rest":
# Next param is the rest parameter
if i + 1 < len(params):
rest_name = self._mangle(params[i + 1].name if isinstance(params[i + 1], Symbol) else str(params[i + 1]))
i += 2
continue
else:
i += 1
continue
if isinstance(p, Symbol):
param_names.append(self._mangle(p.name))
else:
param_names.append(str(p))
i += 1
if rest_name:
param_names.append(f"*{rest_name}")
params_str = ", ".join(param_names)
if len(body) == 1:
body_py = self.emit(body[0])
return f"lambda {params_str}: {body_py}"
# Multi-expression body: need a local function
lines = []
lines.append(f"_sx_fn(lambda {params_str}: (")
for b in body[:-1]:
lines.append(f" {self.emit(b)},")
lines.append(f" {self.emit(body[-1])}")
lines.append(")[-1])")
return "\n".join(lines)
def _emit_let(self, expr) -> str:
bindings = expr[1]
body = expr[2:]
assignments = []
if isinstance(bindings, list):
if bindings and isinstance(bindings[0], list):
# Scheme-style: ((name val) ...)
for b in bindings:
vname = b[0].name if isinstance(b[0], Symbol) else str(b[0])
assignments.append((self._mangle(vname), self.emit(b[1])))
else:
# Clojure-style: (name val name val ...)
for i in range(0, len(bindings), 2):
vname = bindings[i].name if isinstance(bindings[i], Symbol) else str(bindings[i])
assignments.append((self._mangle(vname), self.emit(bindings[i + 1])))
# Nested IIFE for sequential let (each binding can see previous ones):
# (lambda a: (lambda b: body)(val_b))(val_a)
# Cell variables (mutated by nested set!) are initialized in _cells dict
# instead of lambda params, since the body reads _cells[name].
cell_vars = getattr(self, '_current_cell_vars', set())
body_parts = [self.emit(b) for b in body]
if len(body) == 1:
body_str = body_parts[0]
else:
body_str = f"_sx_begin({', '.join(body_parts)})"
# Build from inside out
result = body_str
for name, val in reversed(assignments):
if name in cell_vars:
# Cell var: initialize in _cells dict, not as lambda param
result = f"_sx_begin(_sx_cell_set(_cells, {self._py_string(name)}, {val}), {result})"
else:
result = f"(lambda {name}: {result})({val})"
return result
def _emit_if(self, expr) -> str:
cond = self.emit(expr[1])
then = self.emit(expr[2])
els = self.emit(expr[3]) if len(expr) > 3 else "NIL"
return f"({then} if sx_truthy({cond}) else {els})"
def _emit_when(self, expr) -> str:
cond = self.emit(expr[1])
body_parts = expr[2:]
if len(body_parts) == 1:
return f"({self.emit(body_parts[0])} if sx_truthy({cond}) else NIL)"
body = ", ".join(self.emit(b) for b in body_parts)
return f"(_sx_begin({body}) if sx_truthy({cond}) else NIL)"
def _emit_when_stmt(self, expr, indent: int = 0) -> str:
pad = " " * indent
cond = self.emit(expr[1])
body_parts = expr[2:]
lines = [f"{pad}if sx_truthy({cond}):"]
for b in body_parts:
lines.append(self.emit_statement(b, indent + 1))
return "\n".join(lines)
def _emit_cond(self, expr) -> str:
clauses = expr[1:]
if not clauses:
return "NIL"
is_scheme = (
all(isinstance(c, list) and len(c) == 2 for c in clauses)
and not any(isinstance(c, Keyword) for c in clauses)
)
if is_scheme:
return self._cond_scheme(clauses)
return self._cond_clojure(clauses)
def _cond_scheme(self, clauses) -> str:
if not clauses:
return "NIL"
clause = clauses[0]
test = clause[0]
body = clause[1]
if isinstance(test, Symbol) and test.name in ("else", ":else"):
return self.emit(body)
if isinstance(test, Keyword) and test.name == "else":
return self.emit(body)
return f"({self.emit(body)} if sx_truthy({self.emit(test)}) else {self._cond_scheme(clauses[1:])})"
def _cond_clojure(self, clauses) -> str:
if len(clauses) < 2:
return "NIL"
test = clauses[0]
body = clauses[1]
if isinstance(test, Keyword) and test.name == "else":
return self.emit(body)
if isinstance(test, Symbol) and test.name in ("else", ":else"):
return self.emit(body)
return f"({self.emit(body)} if sx_truthy({self.emit(test)}) else {self._cond_clojure(clauses[2:])})"
def _emit_case(self, expr) -> str:
match_expr = self.emit(expr[1])
clauses = expr[2:]
return f"_sx_case({match_expr}, [{self._case_pairs(clauses)}])"
def _case_pairs(self, clauses) -> str:
pairs = []
i = 0
while i < len(clauses) - 1:
test = clauses[i]
body = clauses[i + 1]
if isinstance(test, Keyword) and test.name == "else":
pairs.append(f"(None, lambda: {self.emit(body)})")
elif isinstance(test, Symbol) and test.name in ("else", ":else"):
pairs.append(f"(None, lambda: {self.emit(body)})")
else:
pairs.append(f"({self.emit(test)}, lambda: {self.emit(body)})")
i += 2
return ", ".join(pairs)
def _emit_and(self, expr) -> str:
parts = [self.emit(x) for x in expr[1:]]
if len(parts) == 1:
return parts[0]
# Use Python's native and for short-circuit evaluation.
# Last value returned as-is; prior values tested with sx_truthy.
# (and a b c) -> (a if not sx_truthy(a) else (b if not sx_truthy(b) else c))
result = parts[-1]
for p in reversed(parts[:-1]):
result = f"({p} if not sx_truthy({p}) else {result})"
return result
def _emit_or(self, expr) -> str:
if len(expr) == 2:
return self.emit(expr[1])
parts = [self.emit(x) for x in expr[1:]]
# Use Python's short-circuit pattern:
# (or a b c) -> (a if sx_truthy(a) else (b if sx_truthy(b) else c))
result = parts[-1]
for p in reversed(parts[:-1]):
result = f"({p} if sx_truthy({p}) else {result})"
return result
def _emit_do(self, expr) -> str:
return self._emit_do_inner(expr[1:])
def _emit_do_inner(self, exprs) -> str:
if len(exprs) == 1:
return self.emit(exprs[0])
parts = [self.emit(e) for e in exprs]
return "_sx_begin(" + ", ".join(parts) + ")"
def _emit_native_dict(self, expr: dict) -> str:
"""Emit a native Python dict (from parser's {:key val} syntax)."""
parts = []
for key, val in expr.items():
parts.append(f"{self._py_string(key)}: {self.emit(val)}")
return "{" + ", ".join(parts) + "}"
def _emit_dict_literal(self, expr) -> str:
pairs = expr[1:]
parts = []
i = 0
while i < len(pairs) - 1:
key = pairs[i]
val = pairs[i + 1]
if isinstance(key, Keyword):
parts.append(f"{self._py_string(key.name)}: {self.emit(val)}")
else:
parts.append(f"{self.emit(key)}: {self.emit(val)}")
i += 2
return "{" + ", ".join(parts) + "}"
def _emit_infix(self, op: str, args: list) -> str:
PY_OPS = {"=": "==", "!=": "!=", "mod": "%"}
py_op = PY_OPS.get(op, op)
if len(args) == 1 and op == "-":
return f"(-{self.emit(args[0])})"
return f"({self.emit(args[0])} {py_op} {self.emit(args[1])})"
def _emit_define(self, expr, indent: int = 0) -> str:
pad = " " * indent
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
val_expr = expr[2]
# Always emit fn-bodied defines as def statements for flat control flow
if (isinstance(val_expr, list) and val_expr and
isinstance(val_expr[0], Symbol) and val_expr[0].name in ("fn", "lambda")):
return self._emit_define_as_def(name, val_expr, indent)
val = self.emit(val_expr)
return f"{pad}{self._mangle(name)} = {val}"
def _body_uses_set(self, fn_expr) -> bool:
"""Check if a fn expression's body (recursively) uses set!."""
def _has_set(node):
if not isinstance(node, list) or not node:
return False
head = node[0]
if isinstance(head, Symbol) and head.name == "set!":
return True
return any(_has_set(child) for child in node if isinstance(child, list))
body = fn_expr[2:]
return any(_has_set(b) for b in body)
def _emit_define_as_def(self, name: str, fn_expr, indent: int = 0) -> str:
"""Emit a define with fn value as a proper def statement.
This is used for functions that contain set! — Python closures can't
rebind outer lambda params, so we need proper def + local variables.
Variables mutated by set! from nested lambdas use a _cells dict.
"""
pad = " " * indent
params = fn_expr[1]
body = fn_expr[2:]
param_names = []
i = 0
while i < len(params):
p = params[i]
if isinstance(p, Symbol) and p.name == "&rest":
if i + 1 < len(params):
rest_name = self._mangle(params[i + 1].name if isinstance(params[i + 1], Symbol) else str(params[i + 1]))
param_names.append(f"*{rest_name}")
i += 2
continue
else:
i += 1
continue
if isinstance(p, Symbol):
param_names.append(self._mangle(p.name))
else:
param_names.append(str(p))
i += 1
params_str = ", ".join(param_names)
py_name = self._mangle(name)
# Find set! target variables that are used from nested lambda scopes
nested_set_vars = self._find_nested_set_vars(body)
lines = [f"{pad}def {py_name}({params_str}):"]
if nested_set_vars:
lines.append(f"{pad} _cells = {{}}")
# Emit body with cell var tracking
old_cells = getattr(self, '_current_cell_vars', set())
self._current_cell_vars = nested_set_vars
self._emit_body_stmts(body, lines, indent + 1)
self._current_cell_vars = old_cells
return "\n".join(lines)
def _find_nested_set_vars(self, body) -> set[str]:
"""Find variable names that are set! from within nested fn/lambda bodies."""
result = set()
def _scan(node, in_nested_fn=False):
if not isinstance(node, list) or not node:
return
head = node[0]
if isinstance(head, Symbol):
if head.name in ("fn", "lambda") and in_nested_fn:
# Already nested, keep scanning
for child in node[2:]:
_scan(child, True)
return
if head.name in ("fn", "lambda"):
# Entering nested fn
for child in node[2:]:
_scan(child, True)
return
if head.name == "set!" and in_nested_fn:
var = node[1].name if isinstance(node[1], Symbol) else str(node[1])
result.add(self._mangle(var))
for child in node:
if isinstance(child, list):
_scan(child, in_nested_fn)
for b in body:
_scan(b)
return result
def _emit_body_stmts(self, body: list, lines: list, indent: int) -> None:
"""Emit body expressions as statements into lines list.
Handles let as local variable declarations, and returns the last
expression. Control flow in tail position (if, cond, case, when)
is flattened to if/elif statements with returns in each branch.
"""
pad = " " * indent
for i, expr in enumerate(body):
is_last = (i == len(body) - 1)
if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
name = expr[0].name
if name in ("let", "let*"):
self._emit_let_as_stmts(expr, lines, indent, is_last)
continue
if name in ("do", "begin"):
sub_body = expr[1:]
if is_last:
self._emit_body_stmts(sub_body, lines, indent)
else:
for sub in sub_body:
lines.append(self.emit_statement(sub, indent))
continue
if is_last:
self._emit_return_expr(expr, lines, indent)
else:
lines.append(self.emit_statement(expr, indent))
def _emit_return_expr(self, expr, lines: list, indent: int) -> None:
"""Emit an expression in return position, flattening control flow."""
pad = " " * indent
if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
name = expr[0].name
if name == "if":
self._emit_if_return(expr, lines, indent)
return
if name == "cond":
self._emit_cond_return(expr, lines, indent)
return
if name == "case":
self._emit_case_return(expr, lines, indent)
return
if name == "when":
self._emit_when_return(expr, lines, indent)
return
if name in ("let", "let*"):
self._emit_let_as_stmts(expr, lines, indent, True)
return
if name in ("do", "begin"):
self._emit_body_stmts(expr[1:], lines, indent)
return
lines.append(f"{pad}return {self.emit(expr)}")
def _emit_if_return(self, expr, lines: list, indent: int) -> None:
"""Emit if as statement with returns in each branch."""
pad = " " * indent
lines.append(f"{pad}if sx_truthy({self.emit(expr[1])}):")
self._emit_return_expr(expr[2], lines, indent + 1)
if len(expr) > 3:
lines.append(f"{pad}else:")
self._emit_return_expr(expr[3], lines, indent + 1)
else:
lines.append(f"{pad}return NIL")
def _emit_when_return(self, expr, lines: list, indent: int) -> None:
"""Emit when as statement with return in body, else return NIL."""
pad = " " * indent
lines.append(f"{pad}if sx_truthy({self.emit(expr[1])}):")
body_parts = expr[2:]
if len(body_parts) == 1:
self._emit_return_expr(body_parts[0], lines, indent + 1)
else:
for b in body_parts[:-1]:
lines.append(self.emit_statement(b, indent + 1))
self._emit_return_expr(body_parts[-1], lines, indent + 1)
lines.append(f"{pad}return NIL")
def _emit_cond_return(self, expr, lines: list, indent: int) -> None:
"""Emit cond as if/elif/else with returns in each branch."""
pad = " " * indent
clauses = expr[1:]
if not clauses:
lines.append(f"{pad}return NIL")
return
is_scheme = (
all(isinstance(c, list) and len(c) == 2 for c in clauses)
and not any(isinstance(c, Keyword) for c in clauses)
)
has_else = False
first_clause = True
if is_scheme:
for clause in clauses:
test, body = clause[0], clause[1]
if ((isinstance(test, Symbol) and test.name in ("else", ":else")) or
(isinstance(test, Keyword) and test.name == "else")):
lines.append(f"{pad}else:")
has_else = True
else:
kw = "if" if first_clause else "elif"
lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):")
first_clause = False
self._emit_return_expr(body, lines, indent + 1)
else:
i = 0
while i < len(clauses) - 1:
test, body = clauses[i], clauses[i + 1]
if ((isinstance(test, Keyword) and test.name == "else") or
(isinstance(test, Symbol) and test.name in ("else", ":else"))):
lines.append(f"{pad}else:")
has_else = True
else:
kw = "if" if first_clause else "elif"
lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):")
first_clause = False
self._emit_return_expr(body, lines, indent + 1)
i += 2
if not has_else:
lines.append(f"{pad}return NIL")
def _emit_case_return(self, expr, lines: list, indent: int) -> None:
"""Emit case as if/elif/else with returns in each branch."""
pad = " " * indent
match_val = self.emit(expr[1])
clauses = expr[2:]
lines.append(f"{pad}_match = {match_val}")
has_else = False
first_clause = True
i = 0
while i < len(clauses) - 1:
test = clauses[i]
body = clauses[i + 1]
if ((isinstance(test, Keyword) and test.name == "else") or
(isinstance(test, Symbol) and test.name in ("else", ":else"))):
lines.append(f"{pad}else:")
has_else = True
else:
kw = "if" if first_clause else "elif"
lines.append(f"{pad}{kw} _match == {self.emit(test)}:")
first_clause = False
self._emit_return_expr(body, lines, indent + 1)
i += 2
if not has_else:
lines.append(f"{pad}return NIL")
def _emit_let_as_stmts(self, expr, lines: list, indent: int, is_last: bool) -> None:
"""Emit a let expression as local variable declarations."""
pad = " " * indent
bindings = expr[1]
body = expr[2:]
cell_vars = getattr(self, '_current_cell_vars', set())
if isinstance(bindings, list):
if bindings and isinstance(bindings[0], list):
# Scheme-style: ((name val) ...)
for b in bindings:
vname = b[0].name if isinstance(b[0], Symbol) else str(b[0])
mangled = self._mangle(vname)
if mangled in cell_vars:
lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(b[1])}")
else:
lines.append(f"{pad}{mangled} = {self.emit(b[1])}")
else:
# Clojure-style: (name val name val ...)
for j in range(0, len(bindings), 2):
vname = bindings[j].name if isinstance(bindings[j], Symbol) else str(bindings[j])
mangled = self._mangle(vname)
if mangled in cell_vars:
lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(bindings[j + 1])}")
else:
lines.append(f"{pad}{mangled} = {self.emit(bindings[j + 1])}")
if is_last:
self._emit_body_stmts(body, lines, indent)
else:
for b in body:
self._emit_stmt_recursive(b, lines, indent)
def _emit_for_each_stmt(self, expr, indent: int = 0) -> str:
pad = " " * indent
fn_expr = expr[1]
coll_expr = expr[2]
coll = self.emit(coll_expr)
# If fn is an inline lambda, emit a for loop
if isinstance(fn_expr, list) and isinstance(fn_expr[0], Symbol) and fn_expr[0].name == "fn":
params = fn_expr[1]
body = fn_expr[2:]
p = params[0].name if isinstance(params[0], Symbol) else str(params[0])
p_py = self._mangle(p)
lines = [f"{pad}for {p_py} in {coll}:"]
# Emit body as statements with proper let/set! handling
self._emit_loop_body(body, lines, indent + 1)
return "\n".join(lines)
fn = self.emit(fn_expr)
return f"{pad}for _item in {coll}:\n{pad} {fn}(_item)"
def _emit_loop_body(self, body: list, lines: list, indent: int) -> None:
"""Emit loop body as statements. Handles let, when, set!, cond properly."""
pad = " " * indent
for expr in body:
self._emit_stmt_recursive(expr, lines, indent)
def _emit_stmt_recursive(self, expr, lines: list, indent: int) -> None:
"""Emit an expression as statement(s), recursing into control flow."""
pad = " " * indent
if not isinstance(expr, list) or not expr:
lines.append(self.emit_statement(expr, indent))
return
head = expr[0]
if not isinstance(head, Symbol):
lines.append(self.emit_statement(expr, indent))
return
name = head.name
if name == "set!":
varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
mangled = self._mangle(varname)
cell_vars = getattr(self, '_current_cell_vars', set())
if mangled in cell_vars:
lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(expr[2])}")
else:
lines.append(f"{pad}{mangled} = {self.emit(expr[2])}")
elif name in ("let", "let*"):
self._emit_let_as_stmts(expr, lines, indent, False)
elif name == "when":
cond = self.emit(expr[1])
lines.append(f"{pad}if sx_truthy({cond}):")
for b in expr[2:]:
self._emit_stmt_recursive(b, lines, indent + 1)
elif name == "cond":
self._emit_cond_stmt(expr, lines, indent)
elif name in ("do", "begin"):
for b in expr[1:]:
self._emit_stmt_recursive(b, lines, indent)
elif name == "if":
cond = self.emit(expr[1])
lines.append(f"{pad}if sx_truthy({cond}):")
self._emit_stmt_recursive(expr[2], lines, indent + 1)
if len(expr) > 3:
lines.append(f"{pad}else:")
self._emit_stmt_recursive(expr[3], lines, indent + 1)
elif name == "append!":
lines.append(f"{pad}{self.emit(expr[1])}.append({self.emit(expr[2])})")
elif name == "dict-set!":
lines.append(f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}")
elif name == "env-set!":
lines.append(f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}")
else:
lines.append(self.emit_statement(expr, indent))
def _emit_cond_stmt(self, expr, lines: list, indent: int) -> None:
"""Emit cond as if/elif/else chain."""
pad = " " * indent
clauses = expr[1:]
# Detect scheme vs clojure style
is_scheme = (
all(isinstance(c, list) and len(c) == 2 for c in clauses)
and not any(isinstance(c, Keyword) for c in clauses)
)
first_clause = True
if is_scheme:
for clause in clauses:
test, body = clause[0], clause[1]
if isinstance(test, Symbol) and test.name in ("else", ":else"):
lines.append(f"{pad}else:")
elif isinstance(test, Keyword) and test.name == "else":
lines.append(f"{pad}else:")
else:
kw = "if" if first_clause else "elif"
lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):")
first_clause = False
self._emit_stmt_recursive(body, lines, indent + 1)
else:
i = 0
while i < len(clauses) - 1:
test, body = clauses[i], clauses[i + 1]
if isinstance(test, Keyword) and test.name == "else":
lines.append(f"{pad}else:")
elif isinstance(test, Symbol) and test.name in ("else", ":else"):
lines.append(f"{pad}else:")
else:
kw = "if" if first_clause else "elif"
lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):")
first_clause = False
self._emit_stmt_recursive(body, lines, indent + 1)
i += 2
def _emit_quote(self, expr) -> str:
"""Emit a quoted expression as a Python literal AST."""
if isinstance(expr, bool):
return "True" if expr else "False"
if isinstance(expr, (int, float)):
return str(expr)
if isinstance(expr, str):
return self._py_string(expr)
if expr is None or expr is SX_NIL:
return "NIL"
if isinstance(expr, Symbol):
return f"Symbol({self._py_string(expr.name)})"
if isinstance(expr, Keyword):
return f"Keyword({self._py_string(expr.name)})"
if isinstance(expr, list):
return "[" + ", ".join(self._emit_quote(x) for x in expr) + "]"
return str(expr)
def _py_string(self, s: str) -> str:
return repr(s)
# ---------------------------------------------------------------------------
# Bootstrap compiler
# ---------------------------------------------------------------------------
def extract_defines(source: str) -> list[tuple[str, list]]:
"""Parse .sx source, return list of (name, define-expr) for top-level defines."""
exprs = parse_all(source)
defines = []
for expr in exprs:
if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
if expr[0].name == "define":
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
defines.append((name, expr))
return defines
# Build config and static platform sections — canonical source is platform_py.py
try:
from .platform_py import (
PREAMBLE, PLATFORM_PY, PRIMITIVES_PY_PRE, PRIMITIVES_PY_POST,
PRIMITIVES_PY_MODULES, _ALL_PY_MODULES,
PLATFORM_DEPS_PY, PLATFORM_ASYNC_PY, FIXUPS_PY, CONTINUATIONS_PY,
_assemble_primitives_py, public_api_py,
ADAPTER_FILES, SPEC_MODULES, EXTENSION_NAMES, EXTENSION_FORMS,
)
except ImportError:
from shared.sx.ref.platform_py import (
PREAMBLE, PLATFORM_PY, PRIMITIVES_PY_PRE, PRIMITIVES_PY_POST,
PRIMITIVES_PY_MODULES, _ALL_PY_MODULES,
PLATFORM_DEPS_PY, PLATFORM_ASYNC_PY, FIXUPS_PY, CONTINUATIONS_PY,
_assemble_primitives_py, public_api_py,
ADAPTER_FILES, SPEC_MODULES, EXTENSION_NAMES, EXTENSION_FORMS,
)
def _parse_special_forms_spec(ref_dir: str) -> set[str]:
"""Parse special-forms.sx to extract declared form names."""
filepath = os.path.join(ref_dir, "special-forms.sx")
if not os.path.exists(filepath):
return set()
with open(filepath) as f:
src = f.read()
names = set()
for expr in parse_all(src):
if (isinstance(expr, list) and len(expr) >= 2
and isinstance(expr[0], Symbol)
and expr[0].name == "define-special-form"
and isinstance(expr[1], str)):
names.add(expr[1])
return names
def _extract_eval_dispatch_names(all_sections: list) -> set[str]:
"""Extract special form names dispatched in eval-list from transpiled sections."""
names = set()
for _label, defines in all_sections:
for name, _expr in defines:
if name.startswith("sf-"):
form = name[3:]
if form in ("cond-scheme", "cond-clojure", "case-loop"):
continue
names.add(form)
if name.startswith("ho-"):
form = name[3:]
names.add(form)
return names
def _validate_special_forms(ref_dir: str, all_sections: list,
has_continuations: bool) -> None:
"""Cross-check special-forms.sx against eval.sx dispatch. Warn on mismatches."""
spec_names = _parse_special_forms_spec(ref_dir)
if not spec_names:
return
dispatch_names = _extract_eval_dispatch_names(all_sections)
if has_continuations:
dispatch_names |= EXTENSION_FORMS["continuations"]
name_aliases = {
"thread-first": "->",
"every": "every?",
"set-bang": "set!",
}
normalized_dispatch = set()
for n in dispatch_names:
normalized_dispatch.add(name_aliases.get(n, n))
internal = {"named-let"}
normalized_dispatch -= internal
undispatched = spec_names - normalized_dispatch
ignore = {"fn", "let*", "do", "defrelation"}
undispatched -= ignore
unspecced = normalized_dispatch - spec_names
unspecced -= ignore
if undispatched:
import sys
print(f"# WARNING: special-forms.sx declares forms not in eval.sx: "
f"{', '.join(sorted(undispatched))}", file=sys.stderr)
if unspecced:
import sys
print(f"# WARNING: eval.sx dispatches forms not in special-forms.sx: "
f"{', '.join(sorted(unspecced))}", file=sys.stderr)
def compile_ref_to_py(
adapters: list[str] | None = None,
modules: list[str] | None = None,
extensions: list[str] | None = None,
spec_modules: list[str] | None = None,
) -> str:
"""Read reference .sx files and emit Python.
Args:
adapters: List of adapter names to include.
Valid names: html, sx.
None = include all server-side adapters.
modules: List of primitive module names to include.
core.* are always included. stdlib.* are opt-in.
None = include all modules (backward compatible).
extensions: List of optional extensions to include.
Valid names: continuations.
None = no extensions.
spec_modules: List of spec module names to include.
Valid names: deps, engine.
None = no spec modules.
"""
# Determine which primitive modules to include
prim_modules = None # None = all
if modules is not None:
prim_modules = [m for m in _ALL_PY_MODULES if m.startswith("core.")]
for m in modules:
if m not in prim_modules:
if m not in PRIMITIVES_PY_MODULES:
raise ValueError(f"Unknown module: {m!r}. Valid: {', '.join(PRIMITIVES_PY_MODULES)}")
prim_modules.append(m)
ref_dir = os.path.dirname(os.path.abspath(__file__))
emitter = PyEmitter()
# Resolve adapter set
if adapters is None:
adapter_set = set(ADAPTER_FILES.keys())
else:
adapter_set = set()
for a in adapters:
if a not in ADAPTER_FILES:
raise ValueError(f"Unknown adapter: {a!r}. Valid: {', '.join(ADAPTER_FILES)}")
adapter_set.add(a)
# Resolve spec modules
spec_mod_set = set()
if spec_modules:
for sm in spec_modules:
if sm not in SPEC_MODULES:
raise ValueError(f"Unknown spec module: {sm!r}. Valid: {', '.join(SPEC_MODULES)}")
spec_mod_set.add(sm)
# html adapter needs deps (component analysis) and signals (island rendering)
if "html" in adapter_set:
if "deps" in SPEC_MODULES:
spec_mod_set.add("deps")
if "signals" in SPEC_MODULES:
spec_mod_set.add("signals")
has_deps = "deps" in spec_mod_set
# Core files always included, then selected adapters, then spec modules
sx_files = [
("eval.sx", "eval"),
("forms.sx", "forms (server definition forms)"),
("render.sx", "render (core)"),
]
for name in ("html", "sx"):
if name in adapter_set:
sx_files.append(ADAPTER_FILES[name])
for name in sorted(spec_mod_set):
sx_files.append(SPEC_MODULES[name])
all_sections = []
for filename, label in sx_files:
filepath = os.path.join(ref_dir, filename)
if not os.path.exists(filepath):
continue
with open(filepath) as f:
src = f.read()
defines = extract_defines(src)
all_sections.append((label, defines))
# Resolve extensions
ext_set = set()
if extensions:
for e in extensions:
if e not in EXTENSION_NAMES:
raise ValueError(f"Unknown extension: {e!r}. Valid: {', '.join(EXTENSION_NAMES)}")
ext_set.add(e)
has_continuations = "continuations" in ext_set
# Validate special forms
_validate_special_forms(ref_dir, all_sections, has_continuations)
# Build output
has_html = "html" in adapter_set
has_sx = "sx" in adapter_set
parts = []
parts.append(PREAMBLE)
parts.append(PLATFORM_PY)
parts.append(PRIMITIVES_PY_PRE)
parts.append(_assemble_primitives_py(prim_modules))
parts.append(PRIMITIVES_PY_POST)
if has_deps:
parts.append(PLATFORM_DEPS_PY)
for label, defines in all_sections:
parts.append(f"\n# === Transpiled from {label} ===\n")
for name, expr in defines:
parts.append(f"# {name}")
parts.append(emitter.emit_statement(expr))
parts.append("")
parts.append(FIXUPS_PY)
if has_continuations:
parts.append(CONTINUATIONS_PY)
parts.append(public_api_py(has_html, has_sx, has_deps))
return "\n".join(parts)
# NOTE: Static platform sections (PREAMBLE, PLATFORM_PY, PRIMITIVES_*, etc.)
# are now imported from platform_py.py above. Do not redefine them here.
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
import argparse
parser = argparse.ArgumentParser(description="Bootstrap SX spec -> Python")
parser.add_argument(
"--adapters",
default=None,
help="Comma-separated adapter names (html,sx). Default: all server-side.",
)
parser.add_argument(
"--modules",
default=None,
help="Comma-separated primitive modules (core.* always included). Default: all.",
)
parser.add_argument(
"--extensions",
default=None,
help="Comma-separated extensions (continuations). Default: none.",
)
parser.add_argument(
"--spec-modules",
default=None,
help="Comma-separated spec modules (deps,engine). Default: none.",
)
args = parser.parse_args()
adapters = args.adapters.split(",") if args.adapters else None
modules = args.modules.split(",") if args.modules else None
extensions = args.extensions.split(",") if args.extensions else None
spec_modules = args.spec_modules.split(",") if args.spec_modules else None
print(compile_ref_to_py(adapters, modules, extensions, spec_modules))
if __name__ == "__main__":
main()