Files
rose-ash/hosts/python/bootstrap.py
giles 06666ac8c4 Decouple core evaluator from web platform, extract libraries
The core evaluator (spec/evaluator.sx) is now the irreducible computational
core with zero web, rendering, or type-system knowledge. 2531 → 2313 lines.

- Add extensible special form registry (*custom-special-forms* + register-special-form!)
- Add render dispatch hooks (*render-check* / *render-fn*) replacing hardcoded render-active?/is-render-expr?/render-expr
- Extract freeze scopes → spec/freeze.sx (library, not core)
- Extract content addressing → spec/content.sx (library, not core)
- Move sf-deftype/sf-defeffect → spec/types.sx (self-registering)
- Move sf-defstyle → web/forms.sx (self-registering with all web forms)
- Move web tests (defpage, streaming) → web/tests/test-forms.sx
- Add is-else-clause? helper (replaces 5 inline patterns)
- Make escape-html/escape-attr library functions in render.sx (pure SX, not platform-provided)
- Add foundations plan: Step 3.5 (data representations), Step 3.7 (verified components), OCaml for Step 4d
- Update all three bootstrappers (JS 957/957, Python 744/744, OCaml 952/952)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-03-16 08:37:50 +00:00

1644 lines
68 KiB
Python

#!/usr/bin/env python3
"""
Bootstrap compiler: reference SX evaluator -> Python.
Reads the .sx reference specification and emits a standalone Python
evaluator module (sx_ref.py) that can be compared against the hand-written
evaluator.py / html.py / async_eval.py.
The compiler translates the restricted SX subset used in eval.sx/render.sx
into idiomatic Python. Platform interface functions are emitted as
native Python implementations.
Usage:
python bootstrap_py.py > sx_ref.py
"""
from __future__ import annotations
import os
import sys
# Add project root to path for imports
_HERE = os.path.dirname(os.path.abspath(__file__))
_PROJECT = os.path.abspath(os.path.join(_HERE, "..", ".."))
sys.path.insert(0, _PROJECT)
from shared.sx.parser import parse_all
from shared.sx.types import Symbol, Keyword, NIL as SX_NIL
# ---------------------------------------------------------------------------
# SX -> Python transpiler
# ---------------------------------------------------------------------------
# Python reserved words — SX names that collide get _ suffix
# Excludes names we intentionally shadow (list, dict, range, filter, map)
_PY_RESERVED = frozenset({
"False", "None", "True", "and", "as", "assert", "async", "await",
"break", "class", "continue", "def", "del", "elif", "else", "except",
"finally", "for", "from", "global", "if", "import", "in", "is",
"lambda", "nonlocal", "not", "or", "pass", "raise", "return", "try",
"while", "with", "yield",
# builtins we don't want to shadow
"default", "type", "id", "input", "open", "print", "set", "super",
})
class PyEmitter:
"""Transpile an SX AST node to Python source code."""
def __init__(self):
self.indent = 0
self._async_names: set[str] = set() # SX names of define-async functions
self._in_async: bool = False # Currently emitting async def body?
def emit(self, expr) -> str:
"""Emit a Python expression from an SX AST node."""
# Bool MUST be checked before int (bool is subclass of int in Python)
if isinstance(expr, bool):
return "True" if expr else "False"
if isinstance(expr, (int, float)):
return str(expr)
if isinstance(expr, str):
return self._py_string(expr)
if expr is None or expr is SX_NIL:
return "NIL"
if isinstance(expr, Symbol):
return self._emit_symbol(expr.name)
if isinstance(expr, Keyword):
return self._py_string(expr.name)
if isinstance(expr, dict):
return self._emit_native_dict(expr)
if isinstance(expr, list):
return self._emit_list(expr)
return str(expr)
def emit_statement(self, expr, indent: int = 0) -> str:
"""Emit a Python statement from an SX AST node."""
pad = " " * indent
if isinstance(expr, list) and expr:
head = expr[0]
if isinstance(head, Symbol):
name = head.name
if name == "define":
return self._emit_define(expr, indent)
if name == "define-async":
return self._emit_define_async(expr, indent)
if name == "set!":
varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
py_var = self._mangle(varname)
cell_vars = getattr(self, '_current_cell_vars', set())
if py_var in cell_vars:
return f"{pad}_cells[{self._py_string(py_var)}] = {self.emit(expr[2])}"
return f"{pad}{py_var} = {self.emit(expr[2])}"
if name == "when":
return self._emit_when_stmt(expr, indent)
if name == "do" or name == "begin":
return "\n".join(self.emit_statement(e, indent) for e in expr[1:])
if name == "for-each":
return self._emit_for_each_stmt(expr, indent)
if name == "dict-set!":
return f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}"
if name == "append!":
return f"{pad}{self.emit(expr[1])}.append({self.emit(expr[2])})"
if name == "env-set!":
return f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}"
if name == "set-lambda-name!":
return f"{pad}{self.emit(expr[1])}.name = {self.emit(expr[2])}"
return f"{pad}{self.emit(expr)}"
# --- Symbol emission ---
def _emit_symbol(self, name: str) -> str:
mangled = self._mangle(name)
cell_vars = getattr(self, '_current_cell_vars', set())
if mangled in cell_vars:
return f"_cells[{self._py_string(mangled)}]"
return mangled
def _mangle(self, name: str) -> str:
"""Convert SX identifier to valid Python identifier."""
RENAMES = {
"nil": "NIL",
"true": "True",
"false": "False",
"nil?": "is_nil",
"type-of": "type_of",
"symbol-name": "symbol_name",
"keyword-name": "keyword_name",
"make-lambda": "make_lambda",
"make-component": "make_component",
"make-macro": "make_macro",
"make-thunk": "make_thunk",
"make-handler-def": "make_handler_def",
"make-query-def": "make_query_def",
"make-action-def": "make_action_def",
"make-page-def": "make_page_def",
"make-symbol": "make_symbol",
"make-keyword": "make_keyword",
"lambda-params": "lambda_params",
"lambda-body": "lambda_body",
"lambda-closure": "lambda_closure",
"lambda-name": "lambda_name",
"set-lambda-name!": "set_lambda_name",
"component-params": "component_params",
"component-body": "component_body",
"component-closure": "component_closure",
"component-has-children?": "component_has_children",
"component-name": "component_name",
"component-affinity": "component_affinity",
"component-param-types": "component_param_types",
"component-set-param-types!": "component_set_param_types",
"macro-params": "macro_params",
"macro-rest-param": "macro_rest_param",
"macro-body": "macro_body",
"macro-closure": "macro_closure",
"thunk?": "is_thunk",
"thunk-expr": "thunk_expr",
"thunk-env": "thunk_env",
"callable?": "is_callable",
"lambda?": "is_lambda",
"component?": "is_component",
"island?": "is_island",
"make-island": "make_island",
"make-signal": "make_signal",
"signal?": "is_signal",
"signal-value": "signal_value",
"signal-set-value!": "signal_set_value",
"signal-subscribers": "signal_subscribers",
"signal-add-sub!": "signal_add_sub",
"signal-remove-sub!": "signal_remove_sub",
"signal-deps": "signal_deps",
"signal-set-deps!": "signal_set_deps",
"identical?": "is_identical",
"notify-subscribers": "notify_subscribers",
"flush-subscribers": "flush_subscribers",
"dispose-computed": "dispose_computed",
"with-island-scope": "with_island_scope",
"register-in-scope": "register_in_scope",
"*batch-depth*": "_batch_depth",
"*batch-queue*": "_batch_queue",
"*store-registry*": "_store_registry",
"*custom-special-forms*": "_custom_special_forms",
"*render-check*": "_render_check",
"*render-fn*": "_render_fn",
"register-special-form!": "register_special_form_b",
"is-else-clause?": "is_else_clause_p",
"def-store": "def_store",
"use-store": "use_store",
"clear-stores": "clear_stores",
"emit-event": "emit_event",
"on-event": "on_event",
"bridge-event": "bridge_event",
"dom-listen": "dom_listen",
"dom-dispatch": "dom_dispatch",
"event-detail": "event_detail",
"macro?": "is_macro",
"primitive?": "is_primitive",
"get-primitive": "get_primitive",
"env-has?": "env_has",
"env-get": "env_get",
"env-set!": "env_set",
"env-extend": "env_extend",
"env-merge": "env_merge",
"dict-set!": "dict_set",
"dict-get": "dict_get",
"dict-has?": "dict_has",
"dict-delete!": "dict_delete",
"eval-expr": "eval_expr",
"eval-list": "eval_list",
"eval-call": "eval_call",
"is-render-expr?": "is_render_expr",
"render-expr": "render_expr",
"call-lambda": "call_lambda",
"call-component": "call_component",
"parse-keyword-args": "parse_keyword_args",
"parse-comp-params": "parse_comp_params",
"parse-macro-params": "parse_macro_params",
"expand-macro": "expand_macro",
"render-to-html": "render_to_html",
"render-to-sx": "render_to_sx",
"render-value-to-html": "render_value_to_html",
"render-list-to-html": "render_list_to_html",
"render-html-element": "render_html_element",
"render-html-component": "render_html_component",
"parse-element-args": "parse_element_args",
"render-attrs": "render_attrs",
"aser-list": "aser_list",
"aser-fragment": "aser_fragment",
"aser-call": "aser_call",
"aser-special": "aser_special",
"sf-if": "sf_if",
"sf-when": "sf_when",
"sf-cond": "sf_cond",
"sf-cond-scheme": "sf_cond_scheme",
"sf-cond-clojure": "sf_cond_clojure",
"sf-case": "sf_case",
"sf-case-loop": "sf_case_loop",
"sf-and": "sf_and",
"sf-or": "sf_or",
"sf-let": "sf_let",
"sf-lambda": "sf_lambda",
"sf-define": "sf_define",
"sf-defcomp": "sf_defcomp",
"defcomp-kwarg": "defcomp_kwarg",
"sf-defmacro": "sf_defmacro",
"sf-begin": "sf_begin",
"sf-quote": "sf_quote",
"sf-quasiquote": "sf_quasiquote",
"sf-thread-first": "sf_thread_first",
"sf-set!": "sf_set_bang",
"sf-reset": "sf_reset",
"sf-shift": "sf_shift",
"qq-expand": "qq_expand",
"ho-map": "ho_map",
"ho-map-indexed": "ho_map_indexed",
"ho-filter": "ho_filter",
"ho-reduce": "ho_reduce",
"ho-some": "ho_some",
"ho-every": "ho_every",
"ho-for-each": "ho_for_each",
"sf-defstyle": "sf_defstyle",
"special-form?": "is_special_form",
"ho-form?": "is_ho_form",
"strip-prefix": "strip_prefix",
"escape-html": "escape_html",
"escape-attr": "escape_attr",
"escape-string": "escape_string",
"raw-html-content": "raw_html_content",
"HTML_TAGS": "HTML_TAGS",
"VOID_ELEMENTS": "VOID_ELEMENTS",
"BOOLEAN_ATTRS": "BOOLEAN_ATTRS",
# render.sx core
"definition-form?": "is_definition_form",
# adapter-html.sx
"RENDER_HTML_FORMS": "RENDER_HTML_FORMS",
"render-html-form?": "is_render_html_form",
"dispatch-html-form": "dispatch_html_form",
"render-lambda-html": "render_lambda_html",
"make-raw-html": "make_raw_html",
"render-html-island": "render_html_island",
"serialize-island-state": "serialize_island_state",
"json-serialize": "json_serialize",
"empty-dict?": "is_empty_dict",
"sf-defisland": "sf_defisland",
# adapter-sx.sx
"render-to-sx": "render_to_sx",
# adapter-async.sx platform primitives
"svg-context-set!": "svg_context_set",
"svg-context-reset!": "svg_context_reset",
"css-class-collect!": "css_class_collect",
# spread + collect primitives
"make-spread": "make_spread",
"spread?": "is_spread",
"spread-attrs": "spread_attrs",
"merge-spread-attrs": "merge_spread_attrs",
"collect!": "sx_collect",
"collected": "sx_collected",
"clear-collected!": "sx_clear_collected",
"scope-push!": "scope_push",
"scope-pop!": "scope_pop",
"provide-push!": "provide_push",
"provide-pop!": "provide_pop",
"context": "sx_context",
"emit!": "sx_emit",
"emitted": "sx_emitted",
"is-raw-html?": "is_raw_html",
"async-coroutine?": "is_async_coroutine",
"async-await!": "async_await",
"is-sx-expr?": "is_sx_expr",
"sx-expr?": "is_sx_expr",
"io-primitive?": "io_primitive_p",
"expand-components?": "expand_components_p",
"svg-context?": "svg_context_p",
"make-sx-expr": "make_sx_expr",
"aser": "aser",
"eval-case-aser": "eval_case_aser",
"sx-serialize": "sx_serialize",
"sx-serialize-dict": "sx_serialize_dict",
"sx-expr-source": "sx_expr_source",
# Primitives that need exact aliases
"contains?": "contains_p",
"starts-with?": "starts_with_p",
"ends-with?": "ends_with_p",
"empty?": "empty_p",
"every?": "every_p",
"for-each": "for_each",
"for-each-indexed": "for_each_indexed",
"map-indexed": "map_indexed",
"map-dict": "map_dict",
"eval-cond": "eval_cond",
"eval-cond-scheme": "eval_cond_scheme",
"eval-cond-clojure": "eval_cond_clojure",
"process-bindings": "process_bindings",
# deps.sx
"scan-refs": "scan_refs",
"scan-refs-walk": "scan_refs_walk",
"transitive-deps": "transitive_deps",
"compute-all-deps": "compute_all_deps",
"scan-components-from-source": "scan_components_from_source",
"components-needed": "components_needed",
"page-component-bundle": "page_component_bundle",
"page-css-classes": "page_css_classes",
"component-deps": "component_deps",
"component-set-deps!": "component_set_deps",
"component-css-classes": "component_css_classes",
"component-io-refs": "component_io_refs",
"component-set-io-refs!": "component_set_io_refs",
"env-components": "env_components",
"regex-find-all": "regex_find_all",
"scan-css-classes": "scan_css_classes",
# deps.sx IO detection
"scan-io-refs": "scan_io_refs",
"scan-io-refs-walk": "scan_io_refs_walk",
"transitive-io-refs": "transitive_io_refs",
"compute-all-io-refs": "compute_all_io_refs",
"component-io-refs-cached": "component_io_refs_cached",
"component-pure?": "component_pure_p",
"render-target": "render_target",
"page-render-plan": "page_render_plan",
# router.sx
"split-path-segments": "split_path_segments",
"make-route-segment": "make_route_segment",
"parse-route-pattern": "parse_route_pattern",
"match-route-segments": "match_route_segments",
"match-route": "match_route",
"find-matching-route": "find_matching_route",
}
if name in RENAMES:
return RENAMES[name]
# General mangling
result = name
# Handle trailing ? and !
if result.endswith("?"):
result = result[:-1] + "_p"
elif result.endswith("!"):
result = result[:-1] + "_b"
# Kebab to snake_case
result = result.replace("-", "_")
# Escape Python reserved words
if result in _PY_RESERVED:
result = result + "_"
return result
# --- List emission ---
def _emit_list(self, expr: list) -> str:
if not expr:
return "[]"
head = expr[0]
if not isinstance(head, Symbol):
# Data list
return "[" + ", ".join(self.emit(x) for x in expr) + "]"
name = head.name
handler = getattr(
self,
f"_sf_{name.replace('-', '_').replace('!', '_b').replace('?', '_p')}",
None,
)
if handler:
return handler(expr)
# Built-in forms
if name in ("fn", "lambda"):
return self._emit_fn(expr)
if name in ("let", "let*"):
return self._emit_let(expr)
if name == "if":
return self._emit_if(expr)
if name == "when":
return self._emit_when(expr)
if name == "cond":
return self._emit_cond(expr)
if name == "case":
return self._emit_case(expr)
if name == "and":
return self._emit_and(expr)
if name == "or":
return self._emit_or(expr)
if name == "not":
return f"(not sx_truthy({self.emit(expr[1])}))"
if name in ("do", "begin"):
return self._emit_do(expr)
if name == "list":
return "[" + ", ".join(self.emit(x) for x in expr[1:]) + "]"
if name == "dict":
return self._emit_dict_literal(expr)
if name == "quote":
return self._emit_quote(expr[1])
if name == "set!":
# set! in expression context — use nonlocal_cells dict for mutation
# from nested lambdas (Python closures can read but not rebind outer vars)
varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
py_var = self._mangle(varname)
return f"_sx_cell_set(_cells, {self._py_string(py_var)}, {self.emit(expr[2])})"
if name == "str":
parts = [self.emit(x) for x in expr[1:]]
return "sx_str(" + ", ".join(parts) + ")"
# Mutation forms that can appear in expression context
if name == "append!":
return f"_sx_append({self.emit(expr[1])}, {self.emit(expr[2])})"
if name == "dict-set!":
return f"_sx_dict_set({self.emit(expr[1])}, {self.emit(expr[2])}, {self.emit(expr[3])})"
if name == "env-set!":
return f"_sx_dict_set({self.emit(expr[1])}, {self.emit(expr[2])}, {self.emit(expr[3])})"
if name == "set-lambda-name!":
return f"_sx_set_attr({self.emit(expr[1])}, 'name', {self.emit(expr[2])})"
# Infix operators
if name in ("+", "-", "*", "/", "=", "!=", "<", ">", "<=", ">=", "mod"):
return self._emit_infix(name, expr[1:])
if name == "inc":
return f"({self.emit(expr[1])} + 1)"
if name == "dec":
return f"({self.emit(expr[1])} - 1)"
# Regular function call
fn_name = self._mangle(name)
args = ", ".join(self.emit(x) for x in expr[1:])
if self._in_async and name in self._async_names:
return f"(await {fn_name}({args}))"
return f"{fn_name}({args})"
# --- Special form emitters ---
@staticmethod
def _extract_param_name(p):
"""Extract the name from a param, handling (name :as type) annotations."""
if isinstance(p, list) and len(p) == 3 and isinstance(p[1], Keyword) and p[1].name == "as":
return p[0].name if isinstance(p[0], Symbol) else str(p[0])
if isinstance(p, Symbol):
return p.name
return str(p)
def _emit_fn(self, expr) -> str:
params = expr[1]
body = expr[2:]
param_names = []
rest_name = None
i = 0
while i < len(params):
p = params[i]
if isinstance(p, Symbol) and p.name == "&rest":
# Next param is the rest parameter
if i + 1 < len(params):
rest_name = self._mangle(self._extract_param_name(params[i + 1]))
i += 2
continue
else:
i += 1
continue
param_names.append(self._mangle(self._extract_param_name(p)))
i += 1
if rest_name:
param_names.append(f"*{rest_name}")
params_str = ", ".join(param_names)
if len(body) == 1:
body_py = self.emit(body[0])
return f"lambda {params_str}: {body_py}"
# Multi-expression body: need a local function
lines = []
lines.append(f"_sx_fn(lambda {params_str}: (")
for b in body[:-1]:
lines.append(f" {self.emit(b)},")
lines.append(f" {self.emit(body[-1])}")
lines.append(")[-1])")
return "\n".join(lines)
def _emit_let(self, expr) -> str:
bindings = expr[1]
body = expr[2:]
assignments = []
if isinstance(bindings, list):
if bindings and isinstance(bindings[0], list):
# Scheme-style: ((name val) ...)
for b in bindings:
vname = b[0].name if isinstance(b[0], Symbol) else str(b[0])
assignments.append((self._mangle(vname), self.emit(b[1])))
else:
# Clojure-style: (name val name val ...)
for i in range(0, len(bindings), 2):
vname = bindings[i].name if isinstance(bindings[i], Symbol) else str(bindings[i])
assignments.append((self._mangle(vname), self.emit(bindings[i + 1])))
# Nested IIFE for sequential let (each binding can see previous ones):
# (lambda a: (lambda b: body)(val_b))(val_a)
# Cell variables (mutated by nested set!) are initialized in _cells dict
# instead of lambda params, since the body reads _cells[name].
cell_vars = getattr(self, '_current_cell_vars', set())
body_parts = [self.emit(b) for b in body]
if len(body) == 1:
body_str = body_parts[0]
else:
body_str = f"_sx_begin({', '.join(body_parts)})"
# Build from inside out
result = body_str
for name, val in reversed(assignments):
if name in cell_vars:
# Cell var: initialize in _cells dict, not as lambda param
result = f"_sx_begin(_sx_cell_set(_cells, {self._py_string(name)}, {val}), {result})"
else:
result = f"(lambda {name}: {result})({val})"
return result
def _emit_if(self, expr) -> str:
cond = self.emit(expr[1])
then = self.emit(expr[2])
els = self.emit(expr[3]) if len(expr) > 3 else "NIL"
return f"({then} if sx_truthy({cond}) else {els})"
def _emit_when(self, expr) -> str:
cond = self.emit(expr[1])
body_parts = expr[2:]
if len(body_parts) == 1:
return f"({self.emit(body_parts[0])} if sx_truthy({cond}) else NIL)"
body = ", ".join(self.emit(b) for b in body_parts)
return f"(_sx_begin({body}) if sx_truthy({cond}) else NIL)"
def _emit_when_stmt(self, expr, indent: int = 0) -> str:
pad = " " * indent
cond = self.emit(expr[1])
body_parts = expr[2:]
lines = [f"{pad}if sx_truthy({cond}):"]
for b in body_parts:
self._emit_stmt_recursive(b, lines, indent + 1)
return "\n".join(lines)
def _emit_cond(self, expr) -> str:
clauses = expr[1:]
if not clauses:
return "NIL"
# Check ALL clauses are 2-element lists (scheme-style).
# Checking only the first is ambiguous — (nil? x) is a 2-element
# function call, not a scheme clause ((test body)).
is_scheme = (
all(isinstance(c, list) and len(c) == 2 for c in clauses)
and not any(isinstance(c, Keyword) for c in clauses)
)
if is_scheme:
return self._cond_scheme(clauses)
return self._cond_clojure(clauses)
def _cond_scheme(self, clauses) -> str:
if not clauses:
return "NIL"
clause = clauses[0]
test = clause[0]
body = clause[1]
if isinstance(test, Symbol) and test.name in ("else", ":else"):
return self.emit(body)
if isinstance(test, Keyword) and test.name == "else":
return self.emit(body)
return f"({self.emit(body)} if sx_truthy({self.emit(test)}) else {self._cond_scheme(clauses[1:])})"
def _cond_clojure(self, clauses) -> str:
if len(clauses) < 2:
return "NIL"
test = clauses[0]
body = clauses[1]
if isinstance(test, Keyword) and test.name == "else":
return self.emit(body)
if isinstance(test, Symbol) and test.name in ("else", ":else"):
return self.emit(body)
return f"({self.emit(body)} if sx_truthy({self.emit(test)}) else {self._cond_clojure(clauses[2:])})"
def _emit_case(self, expr) -> str:
match_expr = self.emit(expr[1])
clauses = expr[2:]
return f"_sx_case({match_expr}, [{self._case_pairs(clauses)}])"
def _case_pairs(self, clauses) -> str:
pairs = []
i = 0
while i < len(clauses) - 1:
test = clauses[i]
body = clauses[i + 1]
if isinstance(test, Keyword) and test.name == "else":
pairs.append(f"(None, lambda: {self.emit(body)})")
elif isinstance(test, Symbol) and test.name in ("else", ":else"):
pairs.append(f"(None, lambda: {self.emit(body)})")
else:
pairs.append(f"({self.emit(test)}, lambda: {self.emit(body)})")
i += 2
return ", ".join(pairs)
def _emit_and(self, expr) -> str:
parts = [self.emit(x) for x in expr[1:]]
if len(parts) == 1:
return parts[0]
# Use Python's native and for short-circuit evaluation.
# Last value returned as-is; prior values tested with sx_truthy.
# (and a b c) -> (a if not sx_truthy(a) else (b if not sx_truthy(b) else c))
result = parts[-1]
for p in reversed(parts[:-1]):
result = f"({p} if not sx_truthy({p}) else {result})"
return result
def _emit_or(self, expr) -> str:
if len(expr) == 2:
return self.emit(expr[1])
parts = [self.emit(x) for x in expr[1:]]
# Use Python's short-circuit pattern:
# (or a b c) -> (a if sx_truthy(a) else (b if sx_truthy(b) else c))
result = parts[-1]
for p in reversed(parts[:-1]):
result = f"({p} if sx_truthy({p}) else {result})"
return result
def _emit_do(self, expr) -> str:
return self._emit_do_inner(expr[1:])
def _emit_do_inner(self, exprs) -> str:
if len(exprs) == 1:
return self.emit(exprs[0])
parts = [self.emit(e) for e in exprs]
return "_sx_begin(" + ", ".join(parts) + ")"
def _emit_native_dict(self, expr: dict) -> str:
"""Emit a native Python dict (from parser's {:key val} syntax)."""
parts = []
for key, val in expr.items():
parts.append(f"{self._py_string(key)}: {self.emit(val)}")
return "{" + ", ".join(parts) + "}"
def _emit_dict_literal(self, expr) -> str:
pairs = expr[1:]
parts = []
i = 0
while i < len(pairs) - 1:
key = pairs[i]
val = pairs[i + 1]
if isinstance(key, Keyword):
parts.append(f"{self._py_string(key.name)}: {self.emit(val)}")
else:
parts.append(f"{self.emit(key)}: {self.emit(val)}")
i += 2
return "{" + ", ".join(parts) + "}"
def _emit_infix(self, op: str, args: list) -> str:
PY_OPS = {"=": "==", "!=": "!=", "mod": "%"}
py_op = PY_OPS.get(op, op)
if len(args) == 1 and op == "-":
return f"(-{self.emit(args[0])})"
return f"({self.emit(args[0])} {py_op} {self.emit(args[1])})"
def _emit_define(self, expr, indent: int = 0) -> str:
pad = " " * indent
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
# Handle (define name :effects [...] value) — skip :effects annotation
if (len(expr) >= 5 and isinstance(expr[2], Keyword)
and expr[2].name == "effects"):
val_expr = expr[4]
else:
val_expr = expr[2]
# Always emit fn-bodied defines as def statements for flat control flow
if (isinstance(val_expr, list) and val_expr and
isinstance(val_expr[0], Symbol) and val_expr[0].name in ("fn", "lambda")):
return self._emit_define_as_def(name, val_expr, indent)
val = self.emit(val_expr)
return f"{pad}{self._mangle(name)} = {val}"
def _emit_define_async(self, expr, indent: int = 0) -> str:
"""Emit a define-async form as an async def statement."""
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
# Handle (define-async name :effects [...] value) — skip :effects annotation
if (len(expr) >= 5 and isinstance(expr[2], Keyword)
and expr[2].name == "effects"):
val_expr = expr[4]
else:
val_expr = expr[2]
if (isinstance(val_expr, list) and val_expr and
isinstance(val_expr[0], Symbol) and val_expr[0].name in ("fn", "lambda")):
return self._emit_define_as_def(name, val_expr, indent, is_async=True)
# Shouldn't happen — define-async should always wrap fn/lambda
return self._emit_define(expr, indent)
def _body_uses_set(self, fn_expr) -> bool:
"""Check if a fn expression's body (recursively) uses set!."""
def _has_set(node):
if not isinstance(node, list) or not node:
return False
head = node[0]
if isinstance(head, Symbol) and head.name == "set!":
return True
return any(_has_set(child) for child in node if isinstance(child, list))
body = fn_expr[2:]
return any(_has_set(b) for b in body)
def _emit_define_as_def(self, name: str, fn_expr, indent: int = 0,
is_async: bool = False) -> str:
"""Emit a define with fn value as a proper def statement.
This is used for functions that contain set! — Python closures can't
rebind outer lambda params, so we need proper def + local variables.
Variables mutated by set! from nested lambdas use a _cells dict.
When is_async=True, emits 'async def' and sets _in_async so that
calls to other async functions receive 'await'.
"""
pad = " " * indent
params = fn_expr[1]
body = fn_expr[2:]
param_names = []
i = 0
while i < len(params):
p = params[i]
if isinstance(p, Symbol) and p.name == "&rest":
if i + 1 < len(params):
rest_name = self._mangle(self._extract_param_name(params[i + 1]))
param_names.append(f"*{rest_name}")
i += 2
continue
else:
i += 1
continue
param_names.append(self._mangle(self._extract_param_name(p)))
i += 1
params_str = ", ".join(param_names)
py_name = self._mangle(name)
# Find set! target variables that are used from nested lambda scopes
nested_set_vars = self._find_nested_set_vars(body)
def_kw = "async def" if is_async else "def"
lines = [f"{pad}{def_kw} {py_name}({params_str}):"]
# Emit body with cell var tracking (and async context if needed)
old_cells = getattr(self, '_current_cell_vars', set())
if nested_set_vars and not old_cells:
lines.append(f"{pad} _cells = {{}}")
old_async = self._in_async
self._current_cell_vars = old_cells | nested_set_vars
if is_async:
self._in_async = True
# Self-tail-recursive 0-param functions: wrap body in while True
if (not param_names and not is_async
and self._has_self_tail_call(body, name)):
lines.append(f"{pad} while True:")
old_loop = getattr(self, '_current_loop_name', None)
self._current_loop_name = name
self._emit_body_stmts(body, lines, indent + 2)
self._current_loop_name = old_loop
else:
self._emit_body_stmts(body, lines, indent + 1)
self._current_cell_vars = old_cells
self._in_async = old_async
return "\n".join(lines)
def _find_nested_set_vars(self, body) -> set[str]:
"""Find variable names that are set! from within nested fn/lambda bodies."""
result = set()
def _scan(node, in_nested_fn=False):
if not isinstance(node, list) or not node:
return
head = node[0]
if isinstance(head, Symbol):
if head.name in ("fn", "lambda") and in_nested_fn:
# Already nested, keep scanning
for child in node[2:]:
_scan(child, True)
return
if head.name in ("fn", "lambda"):
# Entering nested fn
for child in node[2:]:
_scan(child, True)
return
if head.name == "set!" and in_nested_fn:
var = node[1].name if isinstance(node[1], Symbol) else str(node[1])
result.add(self._mangle(var))
for child in node:
if isinstance(child, list):
_scan(child, in_nested_fn)
for b in body:
_scan(b)
return result
def _emit_body_stmts(self, body: list, lines: list, indent: int) -> None:
"""Emit body expressions as statements into lines list.
Handles let as local variable declarations, and returns the last
expression. Control flow in tail position (if, cond, case, when)
is flattened to if/elif statements with returns in each branch.
Detects self-tail-recursive (define name (fn () ...)) followed by
(name) and emits as while True loop instead of recursive def.
"""
pad = " " * indent
idx = 0
while idx < len(body):
expr = body[idx]
is_last = (idx == len(body) - 1)
if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
name = expr[0].name
if name in ("let", "let*"):
self._emit_let_as_stmts(expr, lines, indent, is_last)
idx += 1
continue
if name in ("do", "begin"):
sub_body = expr[1:]
if is_last:
self._emit_body_stmts(sub_body, lines, indent)
else:
for sub in sub_body:
lines.append(self.emit_statement(sub, indent))
idx += 1
continue
# Detect self-tail-recursive loop pattern:
# (define loop-name (fn () body...))
# (loop-name)
# Emit as: while True: <body with self-calls as continue>
if (name == "define" and not is_last
and idx + 1 < len(body)):
loop_info = self._detect_tail_loop(expr, body[idx + 1])
if loop_info:
loop_name, fn_body = loop_info
remaining = body[idx + 2:]
# Only optimize if the function isn't called again later
if not self._name_in_exprs(loop_name, remaining):
self._emit_while_loop(loop_name, fn_body, lines, indent)
# Skip the invocation; emit remaining body
for j, rem in enumerate(remaining):
if j == len(remaining) - 1:
self._emit_return_expr(rem, lines, indent)
else:
self._emit_stmt_recursive(rem, lines, indent)
return
if is_last:
self._emit_return_expr(expr, lines, indent)
else:
self._emit_stmt_recursive(expr, lines, indent)
idx += 1
def _detect_tail_loop(self, define_expr, next_expr):
"""Detect pattern: (define name (fn () body...)) followed by (name).
Returns (loop_name, fn_body) if tail-recursive, else None.
The function must have 0 params and body must end with self-call
in all tail positions.
"""
# Extract name and fn from define
dname = define_expr[1].name if isinstance(define_expr[1], Symbol) else None
if not dname:
return None
# Skip :effects annotation
if (len(define_expr) >= 5 and isinstance(define_expr[2], Keyword)
and define_expr[2].name == "effects"):
val_expr = define_expr[4]
else:
val_expr = define_expr[2] if len(define_expr) > 2 else None
if not (isinstance(val_expr, list) and val_expr
and isinstance(val_expr[0], Symbol)
and val_expr[0].name in ("fn", "lambda")):
return None
params = val_expr[1]
if not isinstance(params, list) or len(params) != 0:
return None # Must be 0-param function
fn_body = val_expr[2:]
# Check next expression is (name) — invocation
if not (isinstance(next_expr, list) and len(next_expr) == 1
and isinstance(next_expr[0], Symbol)
and next_expr[0].name == dname):
return None
# Check that fn_body has self-call in tail position(s)
if not self._has_self_tail_call(fn_body, dname):
return None
return (dname, fn_body)
def _has_self_tail_call(self, body, name):
"""Check if body is safe for while-loop optimization.
Returns True only when ALL tail positions are either:
- self-calls (name) → will become continue
- nil/void returns → will become break
- error() calls → raise, don't return
- when blocks → implicit nil else is fine
No tail position may return a computed value, since while-loop
break discards return values.
"""
if not body:
return False
last = body[-1]
# Non-list terminal: nil is ok, anything else is a value return
if not isinstance(last, list) or not last:
return (last is None or last is SX_NIL
or (isinstance(last, Symbol) and last.name == "nil"))
head = last[0] if isinstance(last[0], Symbol) else None
if not head:
return False
# Direct self-call in tail position
if head.name == name and len(last) == 1:
return True
# error() — raises, safe
if head.name == "error":
return True
# if — ALL branches must be safe
if head.name == "if":
then_ok = self._has_self_tail_call(
[last[2]] if len(last) > 2 else [None], name)
else_ok = self._has_self_tail_call(
[last[3]] if len(last) > 3 else [None], name)
return then_ok and else_ok
# do/begin — check last expression
if head.name in ("do", "begin"):
return self._has_self_tail_call(last[1:], name)
# when — body must be safe (implicit nil else is ok)
if head.name == "when":
return self._has_self_tail_call(last[2:], name)
# let/let* — check body (skip bindings)
if head.name in ("let", "let*"):
return self._has_self_tail_call(last[2:], name)
# cond — ALL branches must be safe
if head.name == "cond":
clauses = last[1:]
is_scheme = (
all(isinstance(c, list) and len(c) == 2 for c in clauses)
and not any(isinstance(c, Keyword) for c in clauses)
)
if is_scheme:
for clause in clauses:
if not self._has_self_tail_call([clause[1]], name):
return False
return True
else:
i = 0
while i < len(clauses) - 1:
if not self._has_self_tail_call([clauses[i + 1]], name):
return False
i += 2
return True
return False
def _name_in_exprs(self, name, exprs):
"""Check if a symbol name appears anywhere in a list of expressions."""
for expr in exprs:
if isinstance(expr, Symbol) and expr.name == name:
return True
if isinstance(expr, list):
if self._name_in_exprs(name, expr):
return True
return False
def _emit_while_loop(self, loop_name, fn_body, lines, indent):
"""Emit a self-tail-recursive function body as a while True loop."""
pad = " " * indent
lines.append(f"{pad}while True:")
# Track the loop name so _emit_return_expr can emit 'continue'
old_loop = getattr(self, '_current_loop_name', None)
self._current_loop_name = loop_name
self._emit_body_stmts(fn_body, lines, indent + 1)
self._current_loop_name = old_loop
def _emit_nil_return(self, lines: list, indent: int) -> None:
"""Emit 'return NIL' or 'break' depending on while-loop context."""
pad = " " * indent
if getattr(self, '_current_loop_name', None):
lines.append(f"{pad}break")
else:
lines.append(f"{pad}return NIL")
def _emit_return_expr(self, expr, lines: list, indent: int) -> None:
"""Emit an expression in return position, flattening control flow."""
pad = " " * indent
# Inside a while loop (self-tail-recursive define optimization):
# self-call → continue
loop_name = getattr(self, '_current_loop_name', None)
if loop_name:
if (isinstance(expr, list) and len(expr) == 1
and isinstance(expr[0], Symbol) and expr[0].name == loop_name):
lines.append(f"{pad}continue")
return
if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
name = expr[0].name
if name == "if":
self._emit_if_return(expr, lines, indent)
return
if name == "cond":
self._emit_cond_return(expr, lines, indent)
return
if name == "case":
self._emit_case_return(expr, lines, indent)
return
if name == "when":
self._emit_when_return(expr, lines, indent)
return
if name in ("let", "let*"):
self._emit_let_as_stmts(expr, lines, indent, True)
return
if name in ("do", "begin"):
self._emit_body_stmts(expr[1:], lines, indent)
return
if name == "for-each":
# for-each in return position: emit as statement, then return/break
lines.append(self._emit_for_each_stmt(expr, indent))
self._emit_nil_return(lines, indent)
return
if loop_name:
emitted = self.emit(expr)
if emitted != "NIL":
lines.append(f"{pad}{emitted}")
lines.append(f"{pad}break")
else:
lines.append(f"{pad}return {self.emit(expr)}")
def _emit_if_return(self, expr, lines: list, indent: int) -> None:
"""Emit if as statement with returns in each branch."""
pad = " " * indent
lines.append(f"{pad}if sx_truthy({self.emit(expr[1])}):")
self._emit_return_expr(expr[2], lines, indent + 1)
if len(expr) > 3:
lines.append(f"{pad}else:")
self._emit_return_expr(expr[3], lines, indent + 1)
else:
self._emit_nil_return(lines, indent)
def _emit_when_return(self, expr, lines: list, indent: int) -> None:
"""Emit when as statement with return in body, else return NIL."""
pad = " " * indent
lines.append(f"{pad}if sx_truthy({self.emit(expr[1])}):")
body_parts = expr[2:]
if len(body_parts) == 1:
self._emit_return_expr(body_parts[0], lines, indent + 1)
else:
for b in body_parts[:-1]:
lines.append(self.emit_statement(b, indent + 1))
self._emit_return_expr(body_parts[-1], lines, indent + 1)
self._emit_nil_return(lines, indent)
def _emit_cond_return(self, expr, lines: list, indent: int) -> None:
"""Emit cond as if/elif/else with returns in each branch."""
pad = " " * indent
clauses = expr[1:]
if not clauses:
lines.append(f"{pad}return NIL")
return
is_scheme = (
all(isinstance(c, list) and len(c) == 2 for c in clauses)
and not any(isinstance(c, Keyword) for c in clauses)
)
has_else = False
first_clause = True
if is_scheme:
for clause in clauses:
test, body = clause[0], clause[1]
if ((isinstance(test, Symbol) and test.name in ("else", ":else")) or
(isinstance(test, Keyword) and test.name == "else")):
lines.append(f"{pad}else:")
has_else = True
else:
kw = "if" if first_clause else "elif"
lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):")
first_clause = False
self._emit_return_expr(body, lines, indent + 1)
else:
i = 0
while i < len(clauses) - 1:
test, body = clauses[i], clauses[i + 1]
if ((isinstance(test, Keyword) and test.name == "else") or
(isinstance(test, Symbol) and test.name in ("else", ":else"))):
lines.append(f"{pad}else:")
has_else = True
else:
kw = "if" if first_clause else "elif"
lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):")
first_clause = False
self._emit_return_expr(body, lines, indent + 1)
i += 2
if not has_else:
self._emit_nil_return(lines, indent)
def _emit_case_return(self, expr, lines: list, indent: int) -> None:
"""Emit case as if/elif/else with returns in each branch."""
pad = " " * indent
match_val = self.emit(expr[1])
clauses = expr[2:]
lines.append(f"{pad}_match = {match_val}")
has_else = False
first_clause = True
i = 0
while i < len(clauses) - 1:
test = clauses[i]
body = clauses[i + 1]
if ((isinstance(test, Keyword) and test.name == "else") or
(isinstance(test, Symbol) and test.name in ("else", ":else"))):
lines.append(f"{pad}else:")
has_else = True
else:
kw = "if" if first_clause else "elif"
lines.append(f"{pad}{kw} _match == {self.emit(test)}:")
first_clause = False
self._emit_return_expr(body, lines, indent + 1)
i += 2
if not has_else:
self._emit_nil_return(lines, indent)
def _emit_let_as_stmts(self, expr, lines: list, indent: int, is_last: bool) -> None:
"""Emit a let expression as local variable declarations."""
pad = " " * indent
bindings = expr[1]
body = expr[2:]
cell_vars = getattr(self, '_current_cell_vars', set())
if isinstance(bindings, list):
if bindings and isinstance(bindings[0], list):
# Scheme-style: ((name val) ...)
for b in bindings:
vname = b[0].name if isinstance(b[0], Symbol) else str(b[0])
mangled = self._mangle(vname)
if mangled in cell_vars:
lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(b[1])}")
else:
lines.append(f"{pad}{mangled} = {self.emit(b[1])}")
else:
# Clojure-style: (name val name val ...)
for j in range(0, len(bindings), 2):
vname = bindings[j].name if isinstance(bindings[j], Symbol) else str(bindings[j])
mangled = self._mangle(vname)
if mangled in cell_vars:
lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(bindings[j + 1])}")
else:
lines.append(f"{pad}{mangled} = {self.emit(bindings[j + 1])}")
if is_last:
self._emit_body_stmts(body, lines, indent)
else:
for b in body:
self._emit_stmt_recursive(b, lines, indent)
def _emit_for_each_stmt(self, expr, indent: int = 0) -> str:
pad = " " * indent
fn_expr = expr[1]
coll_expr = expr[2]
coll = self.emit(coll_expr)
# If fn is an inline lambda, emit a for loop
if isinstance(fn_expr, list) and isinstance(fn_expr[0], Symbol) and fn_expr[0].name == "fn":
params = fn_expr[1]
body = fn_expr[2:]
p = self._extract_param_name(params[0])
p_py = self._mangle(p)
lines = [f"{pad}for {p_py} in {coll}:"]
# Emit body as statements with proper let/set! handling
self._emit_loop_body(body, lines, indent + 1)
return "\n".join(lines)
fn = self.emit(fn_expr)
return f"{pad}for _item in {coll}:\n{pad} {fn}(_item)"
def _emit_loop_body(self, body: list, lines: list, indent: int) -> None:
"""Emit loop body as statements. Handles let, when, set!, cond properly."""
pad = " " * indent
for expr in body:
self._emit_stmt_recursive(expr, lines, indent)
def _emit_stmt_recursive(self, expr, lines: list, indent: int) -> None:
"""Emit an expression as statement(s), recursing into control flow."""
pad = " " * indent
if not isinstance(expr, list) or not expr:
lines.append(self.emit_statement(expr, indent))
return
head = expr[0]
if not isinstance(head, Symbol):
lines.append(self.emit_statement(expr, indent))
return
name = head.name
if name == "set!":
varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
mangled = self._mangle(varname)
cell_vars = getattr(self, '_current_cell_vars', set())
if mangled in cell_vars:
lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(expr[2])}")
else:
lines.append(f"{pad}{mangled} = {self.emit(expr[2])}")
elif name in ("let", "let*"):
self._emit_let_as_stmts(expr, lines, indent, False)
elif name == "when":
cond = self.emit(expr[1])
lines.append(f"{pad}if sx_truthy({cond}):")
for b in expr[2:]:
self._emit_stmt_recursive(b, lines, indent + 1)
elif name == "cond":
self._emit_cond_stmt(expr, lines, indent)
elif name in ("do", "begin"):
for b in expr[1:]:
self._emit_stmt_recursive(b, lines, indent)
elif name == "if":
cond = self.emit(expr[1])
lines.append(f"{pad}if sx_truthy({cond}):")
self._emit_stmt_recursive(expr[2], lines, indent + 1)
if len(expr) > 3:
lines.append(f"{pad}else:")
self._emit_stmt_recursive(expr[3], lines, indent + 1)
elif name == "append!":
lines.append(f"{pad}{self.emit(expr[1])}.append({self.emit(expr[2])})")
elif name == "dict-set!":
lines.append(f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}")
elif name == "env-set!":
lines.append(f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}")
else:
lines.append(self.emit_statement(expr, indent))
def _emit_cond_stmt(self, expr, lines: list, indent: int) -> None:
"""Emit cond as if/elif/else chain."""
pad = " " * indent
clauses = expr[1:]
# Detect scheme vs clojure style
is_scheme = (
all(isinstance(c, list) and len(c) == 2 for c in clauses)
and not any(isinstance(c, Keyword) for c in clauses)
)
first_clause = True
if is_scheme:
for clause in clauses:
test, body = clause[0], clause[1]
if isinstance(test, Symbol) and test.name in ("else", ":else"):
lines.append(f"{pad}else:")
elif isinstance(test, Keyword) and test.name == "else":
lines.append(f"{pad}else:")
else:
kw = "if" if first_clause else "elif"
lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):")
first_clause = False
self._emit_stmt_recursive(body, lines, indent + 1)
else:
i = 0
while i < len(clauses) - 1:
test, body = clauses[i], clauses[i + 1]
if isinstance(test, Keyword) and test.name == "else":
lines.append(f"{pad}else:")
elif isinstance(test, Symbol) and test.name in ("else", ":else"):
lines.append(f"{pad}else:")
else:
kw = "if" if first_clause else "elif"
lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):")
first_clause = False
self._emit_stmt_recursive(body, lines, indent + 1)
i += 2
def _emit_quote(self, expr) -> str:
"""Emit a quoted expression as a Python literal AST."""
if isinstance(expr, bool):
return "True" if expr else "False"
if isinstance(expr, (int, float)):
return str(expr)
if isinstance(expr, str):
return self._py_string(expr)
if expr is None or expr is SX_NIL:
return "NIL"
if isinstance(expr, Symbol):
return f"Symbol({self._py_string(expr.name)})"
if isinstance(expr, Keyword):
return f"Keyword({self._py_string(expr.name)})"
if isinstance(expr, list):
return "[" + ", ".join(self._emit_quote(x) for x in expr) + "]"
return str(expr)
def _py_string(self, s: str) -> str:
return repr(s)
# ---------------------------------------------------------------------------
# Bootstrap compiler
# ---------------------------------------------------------------------------
def extract_defines(source: str) -> list[tuple[str, list]]:
"""Parse .sx source, return list of (name, define-expr) for top-level defines.
Extracts both (define ...) and (define-async ...) forms.
"""
exprs = parse_all(source)
defines = []
for expr in exprs:
if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
if expr[0].name in ("define", "define-async"):
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
defines.append((name, expr))
return defines
# Build config and static platform sections — canonical source is platform_py.py
try:
from .platform_py import (
PREAMBLE, PLATFORM_PY, PRIMITIVES_PY_PRE, PRIMITIVES_PY_POST,
PRIMITIVES_PY_MODULES, _ALL_PY_MODULES,
PLATFORM_PARSER_PY,
PLATFORM_DEPS_PY, PLATFORM_CEK_PY, CEK_FIXUPS_PY, PLATFORM_ASYNC_PY,
FIXUPS_PY, CONTINUATIONS_PY,
_assemble_primitives_py, public_api_py,
ADAPTER_FILES, SPEC_MODULES, SPEC_MODULE_ORDER,
EXTENSION_NAMES, EXTENSION_FORMS,
)
except ImportError:
from hosts.python.platform import (
PREAMBLE, PLATFORM_PY, PRIMITIVES_PY_PRE, PRIMITIVES_PY_POST,
PRIMITIVES_PY_MODULES, _ALL_PY_MODULES,
PLATFORM_PARSER_PY,
PLATFORM_DEPS_PY, PLATFORM_CEK_PY, CEK_FIXUPS_PY, PLATFORM_ASYNC_PY,
FIXUPS_PY, CONTINUATIONS_PY,
_assemble_primitives_py, public_api_py,
ADAPTER_FILES, SPEC_MODULES, SPEC_MODULE_ORDER,
EXTENSION_NAMES, EXTENSION_FORMS,
)
def _parse_special_forms_spec(ref_dir: str, source_dirs=None) -> set[str]:
"""Parse special-forms.sx to extract declared form names."""
filepath = None
if source_dirs:
for d in source_dirs:
p = os.path.join(d, "special-forms.sx")
if os.path.exists(p):
filepath = p
break
if not filepath:
filepath = os.path.join(ref_dir, "special-forms.sx")
if not os.path.exists(filepath):
return set()
with open(filepath) as f:
src = f.read()
names = set()
for expr in parse_all(src):
if (isinstance(expr, list) and len(expr) >= 2
and isinstance(expr[0], Symbol)
and expr[0].name == "define-special-form"
and isinstance(expr[1], str)):
names.add(expr[1])
return names
def _extract_eval_dispatch_names(all_sections: list) -> set[str]:
"""Extract special form names dispatched in eval-list from transpiled sections."""
names = set()
for _label, defines in all_sections:
for name, _expr in defines:
if name.startswith("sf-"):
form = name[3:]
if form in ("cond-scheme", "cond-clojure", "case-loop"):
continue
names.add(form)
if name.startswith("ho-"):
form = name[3:]
names.add(form)
return names
def _validate_special_forms(ref_dir: str, all_sections: list,
has_continuations: bool, source_dirs=None) -> None:
"""Cross-check special-forms.sx against eval.sx dispatch. Warn on mismatches."""
spec_names = _parse_special_forms_spec(ref_dir, source_dirs=source_dirs)
if not spec_names:
return
dispatch_names = _extract_eval_dispatch_names(all_sections)
if has_continuations:
dispatch_names |= EXTENSION_FORMS["continuations"]
name_aliases = {
"thread-first": "->",
"every": "every?",
"set-bang": "set!",
}
normalized_dispatch = set()
for n in dispatch_names:
normalized_dispatch.add(name_aliases.get(n, n))
internal = {"named-let"}
normalized_dispatch -= internal
undispatched = spec_names - normalized_dispatch
ignore = {"fn", "let*", "do", "defrelation"}
undispatched -= ignore
unspecced = normalized_dispatch - spec_names
unspecced -= ignore
if undispatched:
import sys
print(f"# WARNING: special-forms.sx declares forms not in eval.sx: "
f"{', '.join(sorted(undispatched))}", file=sys.stderr)
if unspecced:
import sys
print(f"# WARNING: eval.sx dispatches forms not in special-forms.sx: "
f"{', '.join(sorted(unspecced))}", file=sys.stderr)
def compile_ref_to_py(
adapters: list[str] | None = None,
modules: list[str] | None = None,
extensions: list[str] | None = None,
spec_modules: list[str] | None = None,
) -> str:
"""Read reference .sx files and emit Python.
Args:
adapters: List of adapter names to include.
Valid names: parser, html, sx.
None = include all server-side adapters.
modules: List of primitive module names to include.
core.* are always included. stdlib.* are opt-in.
None = include all modules (backward compatible).
extensions: List of optional extensions to include.
Valid names: continuations.
None = no extensions.
spec_modules: List of spec module names to include.
Valid names: deps, engine.
None = no spec modules.
"""
# Determine which primitive modules to include
prim_modules = None # None = all
if modules is not None:
prim_modules = [m for m in _ALL_PY_MODULES if m.startswith("core.")]
for m in modules:
if m not in prim_modules:
if m not in PRIMITIVES_PY_MODULES:
raise ValueError(f"Unknown module: {m!r}. Valid: {', '.join(PRIMITIVES_PY_MODULES)}")
prim_modules.append(m)
ref_dir = os.path.join(os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..")), "shared", "sx", "ref")
_project = os.path.abspath(os.path.join(ref_dir, "..", "..", ".."))
_source_dirs = [
os.path.join(_project, "spec"),
os.path.join(_project, "web"),
ref_dir,
]
def _find_sx(filename):
for d in _source_dirs:
p = os.path.join(d, filename)
if os.path.exists(p):
return p
return None
emitter = PyEmitter()
# Resolve adapter set
if adapters is None:
adapter_set = set(ADAPTER_FILES.keys())
else:
adapter_set = set()
for a in adapters:
if a not in ADAPTER_FILES:
raise ValueError(f"Unknown adapter: {a!r}. Valid: {', '.join(ADAPTER_FILES)}")
adapter_set.add(a)
# Resolve spec modules
spec_mod_set = set()
if spec_modules:
for sm in spec_modules:
if sm not in SPEC_MODULES:
raise ValueError(f"Unknown spec module: {sm!r}. Valid: {', '.join(SPEC_MODULES)}")
spec_mod_set.add(sm)
# html adapter needs deps (component analysis), signals (island rendering),
# router (URL-to-expression evaluation), and page-helpers
if "html" in adapter_set:
if "deps" in SPEC_MODULES:
spec_mod_set.add("deps")
if "signals" in SPEC_MODULES:
spec_mod_set.add("signals")
if "page-helpers" in SPEC_MODULES:
spec_mod_set.add("page-helpers")
if "router" in SPEC_MODULES:
spec_mod_set.add("router")
# CEK is always included (part of evaluator.sx core file)
has_cek = True
has_deps = "deps" in spec_mod_set
# Core files always included, then selected adapters, then spec modules
# evaluator.sx = merged frames + eval utilities + CEK machine
sx_files = [
("evaluator.sx", "evaluator (frames + eval + CEK)"),
("forms.sx", "forms (server definition forms)"),
("render.sx", "render (core)"),
]
# Parser before html/sx — provides serialize used by adapters
if "parser" in adapter_set:
sx_files.append(ADAPTER_FILES["parser"])
for name in ("html", "sx"):
if name in adapter_set:
sx_files.append(ADAPTER_FILES[name])
# Use explicit ordering for spec modules (respects dependencies)
for name in SPEC_MODULE_ORDER:
if name in spec_mod_set:
sx_files.append(SPEC_MODULES[name])
# Any spec modules not in the order list (future-proofing)
for name in sorted(spec_mod_set):
if name not in SPEC_MODULE_ORDER:
sx_files.append(SPEC_MODULES[name])
# Pre-scan define-async names (needed before transpilation so emitter
# knows which calls require 'await')
has_async = "async" in adapter_set
if has_async:
async_filename = ADAPTER_FILES["async"][0]
async_filepath = _find_sx(async_filename) or os.path.join(ref_dir, async_filename)
if os.path.exists(async_filepath):
with open(async_filepath) as f:
async_src = f.read()
for aexpr in parse_all(async_src):
if (isinstance(aexpr, list) and aexpr
and isinstance(aexpr[0], Symbol)
and aexpr[0].name == "define-async"):
aname = aexpr[1].name if isinstance(aexpr[1], Symbol) else str(aexpr[1])
emitter._async_names.add(aname)
# Platform async primitives (provided by host, also need await)
emitter._async_names.update({
"async-eval", "execute-io", "async-await!",
})
# Async adapter is transpiled last (after sync adapters)
sx_files.append(ADAPTER_FILES["async"])
all_sections = []
for filename, label in sx_files:
filepath = _find_sx(filename) or os.path.join(ref_dir, filename)
if not os.path.exists(filepath):
continue
with open(filepath) as f:
src = f.read()
defines = extract_defines(src)
all_sections.append((label, defines))
# Resolve extensions
ext_set = set()
if extensions:
for e in extensions:
if e not in EXTENSION_NAMES:
raise ValueError(f"Unknown extension: {e!r}. Valid: {', '.join(EXTENSION_NAMES)}")
ext_set.add(e)
has_continuations = "continuations" in ext_set
# Validate special forms
_validate_special_forms(ref_dir, all_sections, has_continuations, source_dirs=_source_dirs)
# Build output
has_html = "html" in adapter_set
has_sx = "sx" in adapter_set
has_parser = "parser" in adapter_set
parts = []
parts.append(PREAMBLE)
parts.append(PLATFORM_PY)
parts.append(PRIMITIVES_PY_PRE)
parts.append(_assemble_primitives_py(prim_modules))
parts.append(PRIMITIVES_PY_POST)
if has_parser:
parts.append(PLATFORM_PARSER_PY)
if has_deps:
parts.append(PLATFORM_DEPS_PY)
if has_cek:
parts.append(PLATFORM_CEK_PY)
if has_async:
parts.append(PLATFORM_ASYNC_PY)
for label, defines in all_sections:
parts.append(f"\n# === Transpiled from {label} ===\n")
for name, expr in defines:
parts.append(f"# {name}")
parts.append(emitter.emit_statement(expr))
parts.append("")
parts.append(FIXUPS_PY)
if has_cek:
parts.append(CEK_FIXUPS_PY)
if has_continuations:
parts.append(CONTINUATIONS_PY)
parts.append(public_api_py(has_html, has_sx, has_deps, has_async))
return "\n".join(parts)
# NOTE: Static platform sections (PREAMBLE, PLATFORM_PY, PRIMITIVES_*, etc.)
# are now imported from platform_py.py above. Do not redefine them here.
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
import argparse
parser = argparse.ArgumentParser(description="Bootstrap SX spec -> Python")
parser.add_argument(
"--adapters",
default=None,
help="Comma-separated adapter names (html,sx). Default: all server-side.",
)
parser.add_argument(
"--modules",
default=None,
help="Comma-separated primitive modules (core.* always included). Default: all.",
)
parser.add_argument(
"--extensions",
default=None,
help="Comma-separated extensions (continuations). Default: none.",
)
parser.add_argument(
"--spec-modules",
default=None,
help="Comma-separated spec modules (deps,engine). Default: none.",
)
args = parser.parse_args()
adapters = args.adapters.split(",") if args.adapters else None
modules = args.modules.split(",") if args.modules else None
extensions = args.extensions.split(",") if args.extensions else None
spec_modules = args.spec_modules.split(",") if args.spec_modules else None
print(compile_ref_to_py(adapters, modules, extensions, spec_modules))
if __name__ == "__main__":
main()