#!/usr/bin/env python3 """ Bootstrap compiler: reference SX evaluator -> Python. Reads the .sx reference specification and emits a standalone Python evaluator module (sx_ref.py) that can be compared against the hand-written evaluator.py / html.py / async_eval.py. The compiler translates the restricted SX subset used in eval.sx/render.sx into idiomatic Python. Platform interface functions are emitted as native Python implementations. Usage: python bootstrap_py.py > sx_ref.py """ from __future__ import annotations import os import sys # Add project root to path for imports _HERE = os.path.dirname(os.path.abspath(__file__)) _PROJECT = os.path.abspath(os.path.join(_HERE, "..", "..")) sys.path.insert(0, _PROJECT) from shared.sx.parser import parse_all from shared.sx.types import Symbol, Keyword, NIL as SX_NIL # --------------------------------------------------------------------------- # SX -> Python transpiler # --------------------------------------------------------------------------- # Python reserved words — SX names that collide get _ suffix # Excludes names we intentionally shadow (list, dict, range, filter, map) _PY_RESERVED = frozenset({ "False", "None", "True", "and", "as", "assert", "async", "await", "break", "class", "continue", "def", "del", "elif", "else", "except", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "nonlocal", "not", "or", "pass", "raise", "return", "try", "while", "with", "yield", # builtins we don't want to shadow "default", "type", "id", "input", "open", "print", "set", "super", }) class PyEmitter: """Transpile an SX AST node to Python source code.""" def __init__(self): self.indent = 0 self._async_names: set[str] = set() # SX names of define-async functions self._in_async: bool = False # Currently emitting async def body? def emit(self, expr) -> str: """Emit a Python expression from an SX AST node.""" # Bool MUST be checked before int (bool is subclass of int in Python) if isinstance(expr, bool): return "True" if expr else "False" if isinstance(expr, (int, float)): return str(expr) if isinstance(expr, str): return self._py_string(expr) if expr is None or expr is SX_NIL: return "NIL" if isinstance(expr, Symbol): return self._emit_symbol(expr.name) if isinstance(expr, Keyword): return self._py_string(expr.name) if isinstance(expr, dict): return self._emit_native_dict(expr) if isinstance(expr, list): return self._emit_list(expr) return str(expr) def emit_statement(self, expr, indent: int = 0) -> str: """Emit a Python statement from an SX AST node.""" pad = " " * indent if isinstance(expr, list) and expr: head = expr[0] if isinstance(head, Symbol): name = head.name if name == "define": return self._emit_define(expr, indent) if name == "define-async": return self._emit_define_async(expr, indent) if name == "set!": varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) py_var = self._mangle(varname) cell_vars = getattr(self, '_current_cell_vars', set()) if py_var in cell_vars: return f"{pad}_cells[{self._py_string(py_var)}] = {self.emit(expr[2])}" return f"{pad}{py_var} = {self.emit(expr[2])}" if name == "when": return self._emit_when_stmt(expr, indent) if name == "do" or name == "begin": return "\n".join(self.emit_statement(e, indent) for e in expr[1:]) if name == "for-each": return self._emit_for_each_stmt(expr, indent) if name == "dict-set!": return f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}" if name == "append!": return f"{pad}{self.emit(expr[1])}.append({self.emit(expr[2])})" if name == "env-set!": return f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}" if name == "set-lambda-name!": return f"{pad}{self.emit(expr[1])}.name = {self.emit(expr[2])}" return f"{pad}{self.emit(expr)}" # --- Symbol emission --- def _emit_symbol(self, name: str) -> str: mangled = self._mangle(name) cell_vars = getattr(self, '_current_cell_vars', set()) if mangled in cell_vars: return f"_cells[{self._py_string(mangled)}]" return mangled def _mangle(self, name: str) -> str: """Convert SX identifier to valid Python identifier.""" RENAMES = { "nil": "NIL", "true": "True", "false": "False", "nil?": "is_nil", "type-of": "type_of", "symbol-name": "symbol_name", "keyword-name": "keyword_name", "make-lambda": "make_lambda", "make-component": "make_component", "make-macro": "make_macro", "make-thunk": "make_thunk", "make-handler-def": "make_handler_def", "make-query-def": "make_query_def", "make-action-def": "make_action_def", "make-page-def": "make_page_def", "make-symbol": "make_symbol", "make-keyword": "make_keyword", "lambda-params": "lambda_params", "lambda-body": "lambda_body", "lambda-closure": "lambda_closure", "lambda-name": "lambda_name", "set-lambda-name!": "set_lambda_name", "component-params": "component_params", "component-body": "component_body", "component-closure": "component_closure", "component-has-children?": "component_has_children", "component-name": "component_name", "component-affinity": "component_affinity", "component-param-types": "component_param_types", "component-set-param-types!": "component_set_param_types", "macro-params": "macro_params", "macro-rest-param": "macro_rest_param", "macro-body": "macro_body", "macro-closure": "macro_closure", "thunk?": "is_thunk", "thunk-expr": "thunk_expr", "thunk-env": "thunk_env", "callable?": "is_callable", "lambda?": "is_lambda", "component?": "is_component", "island?": "is_island", "make-island": "make_island", "make-signal": "make_signal", "signal?": "is_signal", "signal-value": "signal_value", "signal-set-value!": "signal_set_value", "signal-subscribers": "signal_subscribers", "signal-add-sub!": "signal_add_sub", "signal-remove-sub!": "signal_remove_sub", "signal-deps": "signal_deps", "signal-set-deps!": "signal_set_deps", "identical?": "is_identical", "notify-subscribers": "notify_subscribers", "flush-subscribers": "flush_subscribers", "dispose-computed": "dispose_computed", "with-island-scope": "with_island_scope", "register-in-scope": "register_in_scope", "*batch-depth*": "_batch_depth", "*batch-queue*": "_batch_queue", "*store-registry*": "_store_registry", "*custom-special-forms*": "_custom_special_forms", "*render-check*": "_render_check", "*render-fn*": "_render_fn", "register-special-form!": "register_special_form_b", "is-else-clause?": "is_else_clause_p", "def-store": "def_store", "use-store": "use_store", "clear-stores": "clear_stores", "emit-event": "emit_event", "on-event": "on_event", "bridge-event": "bridge_event", "dom-listen": "dom_listen", "dom-dispatch": "dom_dispatch", "event-detail": "event_detail", "macro?": "is_macro", "primitive?": "is_primitive", "get-primitive": "get_primitive", "env-has?": "env_has", "env-get": "env_get", "env-set!": "env_set", "env-extend": "env_extend", "env-merge": "env_merge", "dict-set!": "dict_set", "dict-get": "dict_get", "dict-has?": "dict_has", "dict-delete!": "dict_delete", "eval-expr": "eval_expr", "eval-list": "eval_list", "eval-call": "eval_call", "is-render-expr?": "is_render_expr", "render-expr": "render_expr", "call-lambda": "call_lambda", "call-component": "call_component", "parse-keyword-args": "parse_keyword_args", "parse-comp-params": "parse_comp_params", "parse-macro-params": "parse_macro_params", "expand-macro": "expand_macro", "render-to-html": "render_to_html", "render-to-sx": "render_to_sx", "render-value-to-html": "render_value_to_html", "render-list-to-html": "render_list_to_html", "render-html-element": "render_html_element", "render-html-component": "render_html_component", "parse-element-args": "parse_element_args", "render-attrs": "render_attrs", "aser-list": "aser_list", "aser-fragment": "aser_fragment", "aser-call": "aser_call", "aser-special": "aser_special", "sf-if": "sf_if", "sf-when": "sf_when", "sf-cond": "sf_cond", "sf-cond-scheme": "sf_cond_scheme", "sf-cond-clojure": "sf_cond_clojure", "sf-case": "sf_case", "sf-case-loop": "sf_case_loop", "sf-and": "sf_and", "sf-or": "sf_or", "sf-let": "sf_let", "sf-lambda": "sf_lambda", "sf-define": "sf_define", "sf-defcomp": "sf_defcomp", "defcomp-kwarg": "defcomp_kwarg", "sf-defmacro": "sf_defmacro", "sf-begin": "sf_begin", "sf-quote": "sf_quote", "sf-quasiquote": "sf_quasiquote", "sf-thread-first": "sf_thread_first", "sf-set!": "sf_set_bang", "sf-reset": "sf_reset", "sf-shift": "sf_shift", "qq-expand": "qq_expand", "ho-map": "ho_map", "ho-map-indexed": "ho_map_indexed", "ho-filter": "ho_filter", "ho-reduce": "ho_reduce", "ho-some": "ho_some", "ho-every": "ho_every", "ho-for-each": "ho_for_each", "sf-defstyle": "sf_defstyle", "special-form?": "is_special_form", "ho-form?": "is_ho_form", "strip-prefix": "strip_prefix", "escape-html": "escape_html", "escape-attr": "escape_attr", "escape-string": "escape_string", "raw-html-content": "raw_html_content", "HTML_TAGS": "HTML_TAGS", "VOID_ELEMENTS": "VOID_ELEMENTS", "BOOLEAN_ATTRS": "BOOLEAN_ATTRS", # render.sx core "definition-form?": "is_definition_form", # adapter-html.sx "RENDER_HTML_FORMS": "RENDER_HTML_FORMS", "render-html-form?": "is_render_html_form", "dispatch-html-form": "dispatch_html_form", "render-lambda-html": "render_lambda_html", "make-raw-html": "make_raw_html", "render-html-island": "render_html_island", "serialize-island-state": "serialize_island_state", "json-serialize": "json_serialize", "empty-dict?": "is_empty_dict", "sf-defisland": "sf_defisland", # adapter-sx.sx "render-to-sx": "render_to_sx", # adapter-async.sx platform primitives "svg-context-set!": "svg_context_set", "svg-context-reset!": "svg_context_reset", "css-class-collect!": "css_class_collect", # spread + collect primitives "make-spread": "make_spread", "spread?": "is_spread", "spread-attrs": "spread_attrs", "merge-spread-attrs": "merge_spread_attrs", "collect!": "sx_collect", "collected": "sx_collected", "clear-collected!": "sx_clear_collected", "scope-push!": "scope_push", "scope-pop!": "scope_pop", "provide-push!": "provide_push", "provide-pop!": "provide_pop", "context": "sx_context", "emit!": "sx_emit", "emitted": "sx_emitted", "is-raw-html?": "is_raw_html", "async-coroutine?": "is_async_coroutine", "async-await!": "async_await", "is-sx-expr?": "is_sx_expr", "sx-expr?": "is_sx_expr", "io-primitive?": "io_primitive_p", "expand-components?": "expand_components_p", "svg-context?": "svg_context_p", "make-sx-expr": "make_sx_expr", "aser": "aser", "eval-case-aser": "eval_case_aser", "sx-serialize": "sx_serialize", "sx-serialize-dict": "sx_serialize_dict", "sx-expr-source": "sx_expr_source", # Primitives that need exact aliases "contains?": "contains_p", "starts-with?": "starts_with_p", "ends-with?": "ends_with_p", "empty?": "empty_p", "every?": "every_p", "for-each": "for_each", "for-each-indexed": "for_each_indexed", "map-indexed": "map_indexed", "map-dict": "map_dict", "eval-cond": "eval_cond", "eval-cond-scheme": "eval_cond_scheme", "eval-cond-clojure": "eval_cond_clojure", "process-bindings": "process_bindings", # deps.sx "scan-refs": "scan_refs", "scan-refs-walk": "scan_refs_walk", "transitive-deps": "transitive_deps", "compute-all-deps": "compute_all_deps", "scan-components-from-source": "scan_components_from_source", "components-needed": "components_needed", "page-component-bundle": "page_component_bundle", "page-css-classes": "page_css_classes", "component-deps": "component_deps", "component-set-deps!": "component_set_deps", "component-css-classes": "component_css_classes", "component-io-refs": "component_io_refs", "component-set-io-refs!": "component_set_io_refs", "env-components": "env_components", "regex-find-all": "regex_find_all", "scan-css-classes": "scan_css_classes", # deps.sx IO detection "scan-io-refs": "scan_io_refs", "scan-io-refs-walk": "scan_io_refs_walk", "transitive-io-refs": "transitive_io_refs", "compute-all-io-refs": "compute_all_io_refs", "component-io-refs-cached": "component_io_refs_cached", "component-pure?": "component_pure_p", "render-target": "render_target", "page-render-plan": "page_render_plan", # router.sx "split-path-segments": "split_path_segments", "make-route-segment": "make_route_segment", "parse-route-pattern": "parse_route_pattern", "match-route-segments": "match_route_segments", "match-route": "match_route", "find-matching-route": "find_matching_route", } if name in RENAMES: return RENAMES[name] # General mangling result = name # Handle trailing ? and ! if result.endswith("?"): result = result[:-1] + "_p" elif result.endswith("!"): result = result[:-1] + "_b" # Kebab to snake_case result = result.replace("-", "_") # Escape Python reserved words if result in _PY_RESERVED: result = result + "_" return result # --- List emission --- def _emit_list(self, expr: list) -> str: if not expr: return "[]" head = expr[0] if not isinstance(head, Symbol): # Data list return "[" + ", ".join(self.emit(x) for x in expr) + "]" name = head.name handler = getattr( self, f"_sf_{name.replace('-', '_').replace('!', '_b').replace('?', '_p')}", None, ) if handler: return handler(expr) # Built-in forms if name in ("fn", "lambda"): return self._emit_fn(expr) if name in ("let", "let*"): return self._emit_let(expr) if name == "if": return self._emit_if(expr) if name == "when": return self._emit_when(expr) if name == "cond": return self._emit_cond(expr) if name == "case": return self._emit_case(expr) if name == "and": return self._emit_and(expr) if name == "or": return self._emit_or(expr) if name == "not": return f"(not sx_truthy({self.emit(expr[1])}))" if name in ("do", "begin"): return self._emit_do(expr) if name == "list": return "[" + ", ".join(self.emit(x) for x in expr[1:]) + "]" if name == "dict": return self._emit_dict_literal(expr) if name == "quote": return self._emit_quote(expr[1]) if name == "set!": # set! in expression context — use nonlocal_cells dict for mutation # from nested lambdas (Python closures can read but not rebind outer vars) varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) py_var = self._mangle(varname) return f"_sx_cell_set(_cells, {self._py_string(py_var)}, {self.emit(expr[2])})" if name == "str": parts = [self.emit(x) for x in expr[1:]] return "sx_str(" + ", ".join(parts) + ")" # Mutation forms that can appear in expression context if name == "append!": return f"_sx_append({self.emit(expr[1])}, {self.emit(expr[2])})" if name == "dict-set!": return f"_sx_dict_set({self.emit(expr[1])}, {self.emit(expr[2])}, {self.emit(expr[3])})" if name == "env-set!": return f"_sx_dict_set({self.emit(expr[1])}, {self.emit(expr[2])}, {self.emit(expr[3])})" if name == "set-lambda-name!": return f"_sx_set_attr({self.emit(expr[1])}, 'name', {self.emit(expr[2])})" # Infix operators if name in ("+", "-", "*", "/", "=", "!=", "<", ">", "<=", ">=", "mod"): return self._emit_infix(name, expr[1:]) if name == "inc": return f"({self.emit(expr[1])} + 1)" if name == "dec": return f"({self.emit(expr[1])} - 1)" # Regular function call fn_name = self._mangle(name) args = ", ".join(self.emit(x) for x in expr[1:]) if self._in_async and name in self._async_names: return f"(await {fn_name}({args}))" return f"{fn_name}({args})" # --- Special form emitters --- @staticmethod def _extract_param_name(p): """Extract the name from a param, handling (name :as type) annotations.""" if isinstance(p, list) and len(p) == 3 and isinstance(p[1], Keyword) and p[1].name == "as": return p[0].name if isinstance(p[0], Symbol) else str(p[0]) if isinstance(p, Symbol): return p.name return str(p) def _emit_fn(self, expr) -> str: params = expr[1] body = expr[2:] param_names = [] rest_name = None i = 0 while i < len(params): p = params[i] if isinstance(p, Symbol) and p.name == "&rest": # Next param is the rest parameter if i + 1 < len(params): rest_name = self._mangle(self._extract_param_name(params[i + 1])) i += 2 continue else: i += 1 continue param_names.append(self._mangle(self._extract_param_name(p))) i += 1 if rest_name: param_names.append(f"*{rest_name}") params_str = ", ".join(param_names) if len(body) == 1: body_py = self.emit(body[0]) return f"lambda {params_str}: {body_py}" # Multi-expression body: need a local function lines = [] lines.append(f"_sx_fn(lambda {params_str}: (") for b in body[:-1]: lines.append(f" {self.emit(b)},") lines.append(f" {self.emit(body[-1])}") lines.append(")[-1])") return "\n".join(lines) def _emit_let(self, expr) -> str: bindings = expr[1] body = expr[2:] assignments = [] if isinstance(bindings, list): if bindings and isinstance(bindings[0], list): # Scheme-style: ((name val) ...) for b in bindings: vname = b[0].name if isinstance(b[0], Symbol) else str(b[0]) assignments.append((self._mangle(vname), self.emit(b[1]))) else: # Clojure-style: (name val name val ...) for i in range(0, len(bindings), 2): vname = bindings[i].name if isinstance(bindings[i], Symbol) else str(bindings[i]) assignments.append((self._mangle(vname), self.emit(bindings[i + 1]))) # Nested IIFE for sequential let (each binding can see previous ones): # (lambda a: (lambda b: body)(val_b))(val_a) # Cell variables (mutated by nested set!) are initialized in _cells dict # instead of lambda params, since the body reads _cells[name]. cell_vars = getattr(self, '_current_cell_vars', set()) body_parts = [self.emit(b) for b in body] if len(body) == 1: body_str = body_parts[0] else: body_str = f"_sx_begin({', '.join(body_parts)})" # Build from inside out result = body_str for name, val in reversed(assignments): if name in cell_vars: # Cell var: initialize in _cells dict, not as lambda param result = f"_sx_begin(_sx_cell_set(_cells, {self._py_string(name)}, {val}), {result})" else: result = f"(lambda {name}: {result})({val})" return result def _emit_if(self, expr) -> str: cond = self.emit(expr[1]) then = self.emit(expr[2]) els = self.emit(expr[3]) if len(expr) > 3 else "NIL" return f"({then} if sx_truthy({cond}) else {els})" def _emit_when(self, expr) -> str: cond = self.emit(expr[1]) body_parts = expr[2:] if len(body_parts) == 1: return f"({self.emit(body_parts[0])} if sx_truthy({cond}) else NIL)" body = ", ".join(self.emit(b) for b in body_parts) return f"(_sx_begin({body}) if sx_truthy({cond}) else NIL)" def _emit_when_stmt(self, expr, indent: int = 0) -> str: pad = " " * indent cond = self.emit(expr[1]) body_parts = expr[2:] lines = [f"{pad}if sx_truthy({cond}):"] for b in body_parts: self._emit_stmt_recursive(b, lines, indent + 1) return "\n".join(lines) def _emit_cond(self, expr) -> str: clauses = expr[1:] if not clauses: return "NIL" # Check ALL clauses are 2-element lists (scheme-style). # Checking only the first is ambiguous — (nil? x) is a 2-element # function call, not a scheme clause ((test body)). is_scheme = ( all(isinstance(c, list) and len(c) == 2 for c in clauses) and not any(isinstance(c, Keyword) for c in clauses) ) if is_scheme: return self._cond_scheme(clauses) return self._cond_clojure(clauses) def _cond_scheme(self, clauses) -> str: if not clauses: return "NIL" clause = clauses[0] test = clause[0] body = clause[1] if isinstance(test, Symbol) and test.name in ("else", ":else"): return self.emit(body) if isinstance(test, Keyword) and test.name == "else": return self.emit(body) return f"({self.emit(body)} if sx_truthy({self.emit(test)}) else {self._cond_scheme(clauses[1:])})" def _cond_clojure(self, clauses) -> str: if len(clauses) < 2: return "NIL" test = clauses[0] body = clauses[1] if isinstance(test, Keyword) and test.name == "else": return self.emit(body) if isinstance(test, Symbol) and test.name in ("else", ":else"): return self.emit(body) return f"({self.emit(body)} if sx_truthy({self.emit(test)}) else {self._cond_clojure(clauses[2:])})" def _emit_case(self, expr) -> str: match_expr = self.emit(expr[1]) clauses = expr[2:] return f"_sx_case({match_expr}, [{self._case_pairs(clauses)}])" def _case_pairs(self, clauses) -> str: pairs = [] i = 0 while i < len(clauses) - 1: test = clauses[i] body = clauses[i + 1] if isinstance(test, Keyword) and test.name == "else": pairs.append(f"(None, lambda: {self.emit(body)})") elif isinstance(test, Symbol) and test.name in ("else", ":else"): pairs.append(f"(None, lambda: {self.emit(body)})") else: pairs.append(f"({self.emit(test)}, lambda: {self.emit(body)})") i += 2 return ", ".join(pairs) def _emit_and(self, expr) -> str: parts = [self.emit(x) for x in expr[1:]] if len(parts) == 1: return parts[0] # Use Python's native and for short-circuit evaluation. # Last value returned as-is; prior values tested with sx_truthy. # (and a b c) -> (a if not sx_truthy(a) else (b if not sx_truthy(b) else c)) result = parts[-1] for p in reversed(parts[:-1]): result = f"({p} if not sx_truthy({p}) else {result})" return result def _emit_or(self, expr) -> str: if len(expr) == 2: return self.emit(expr[1]) parts = [self.emit(x) for x in expr[1:]] # Use Python's short-circuit pattern: # (or a b c) -> (a if sx_truthy(a) else (b if sx_truthy(b) else c)) result = parts[-1] for p in reversed(parts[:-1]): result = f"({p} if sx_truthy({p}) else {result})" return result def _emit_do(self, expr) -> str: return self._emit_do_inner(expr[1:]) def _emit_do_inner(self, exprs) -> str: if len(exprs) == 1: return self.emit(exprs[0]) parts = [self.emit(e) for e in exprs] return "_sx_begin(" + ", ".join(parts) + ")" def _emit_native_dict(self, expr: dict) -> str: """Emit a native Python dict (from parser's {:key val} syntax).""" parts = [] for key, val in expr.items(): parts.append(f"{self._py_string(key)}: {self.emit(val)}") return "{" + ", ".join(parts) + "}" def _emit_dict_literal(self, expr) -> str: pairs = expr[1:] parts = [] i = 0 while i < len(pairs) - 1: key = pairs[i] val = pairs[i + 1] if isinstance(key, Keyword): parts.append(f"{self._py_string(key.name)}: {self.emit(val)}") else: parts.append(f"{self.emit(key)}: {self.emit(val)}") i += 2 return "{" + ", ".join(parts) + "}" def _emit_infix(self, op: str, args: list) -> str: PY_OPS = {"=": "==", "!=": "!=", "mod": "%"} py_op = PY_OPS.get(op, op) if len(args) == 1 and op == "-": return f"(-{self.emit(args[0])})" return f"({self.emit(args[0])} {py_op} {self.emit(args[1])})" def _emit_define(self, expr, indent: int = 0) -> str: pad = " " * indent name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) # Handle (define name :effects [...] value) — skip :effects annotation if (len(expr) >= 5 and isinstance(expr[2], Keyword) and expr[2].name == "effects"): val_expr = expr[4] else: val_expr = expr[2] # Always emit fn-bodied defines as def statements for flat control flow if (isinstance(val_expr, list) and val_expr and isinstance(val_expr[0], Symbol) and val_expr[0].name in ("fn", "lambda")): return self._emit_define_as_def(name, val_expr, indent) val = self.emit(val_expr) return f"{pad}{self._mangle(name)} = {val}" def _emit_define_async(self, expr, indent: int = 0) -> str: """Emit a define-async form as an async def statement.""" name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) # Handle (define-async name :effects [...] value) — skip :effects annotation if (len(expr) >= 5 and isinstance(expr[2], Keyword) and expr[2].name == "effects"): val_expr = expr[4] else: val_expr = expr[2] if (isinstance(val_expr, list) and val_expr and isinstance(val_expr[0], Symbol) and val_expr[0].name in ("fn", "lambda")): return self._emit_define_as_def(name, val_expr, indent, is_async=True) # Shouldn't happen — define-async should always wrap fn/lambda return self._emit_define(expr, indent) def _body_uses_set(self, fn_expr) -> bool: """Check if a fn expression's body (recursively) uses set!.""" def _has_set(node): if not isinstance(node, list) or not node: return False head = node[0] if isinstance(head, Symbol) and head.name == "set!": return True return any(_has_set(child) for child in node if isinstance(child, list)) body = fn_expr[2:] return any(_has_set(b) for b in body) def _emit_define_as_def(self, name: str, fn_expr, indent: int = 0, is_async: bool = False) -> str: """Emit a define with fn value as a proper def statement. This is used for functions that contain set! — Python closures can't rebind outer lambda params, so we need proper def + local variables. Variables mutated by set! from nested lambdas use a _cells dict. When is_async=True, emits 'async def' and sets _in_async so that calls to other async functions receive 'await'. """ pad = " " * indent params = fn_expr[1] body = fn_expr[2:] param_names = [] i = 0 while i < len(params): p = params[i] if isinstance(p, Symbol) and p.name == "&rest": if i + 1 < len(params): rest_name = self._mangle(self._extract_param_name(params[i + 1])) param_names.append(f"*{rest_name}") i += 2 continue else: i += 1 continue param_names.append(self._mangle(self._extract_param_name(p))) i += 1 params_str = ", ".join(param_names) py_name = self._mangle(name) # Find set! target variables that are used from nested lambda scopes nested_set_vars = self._find_nested_set_vars(body) def_kw = "async def" if is_async else "def" lines = [f"{pad}{def_kw} {py_name}({params_str}):"] # Emit body with cell var tracking (and async context if needed) old_cells = getattr(self, '_current_cell_vars', set()) if nested_set_vars and not old_cells: lines.append(f"{pad} _cells = {{}}") old_async = self._in_async self._current_cell_vars = old_cells | nested_set_vars if is_async: self._in_async = True # Self-tail-recursive 0-param functions: wrap body in while True if (not param_names and not is_async and self._has_self_tail_call(body, name)): lines.append(f"{pad} while True:") old_loop = getattr(self, '_current_loop_name', None) self._current_loop_name = name self._emit_body_stmts(body, lines, indent + 2) self._current_loop_name = old_loop else: self._emit_body_stmts(body, lines, indent + 1) self._current_cell_vars = old_cells self._in_async = old_async return "\n".join(lines) def _find_nested_set_vars(self, body) -> set[str]: """Find variable names that are set! from within nested fn/lambda bodies.""" result = set() def _scan(node, in_nested_fn=False): if not isinstance(node, list) or not node: return head = node[0] if isinstance(head, Symbol): if head.name in ("fn", "lambda") and in_nested_fn: # Already nested, keep scanning for child in node[2:]: _scan(child, True) return if head.name in ("fn", "lambda"): # Entering nested fn for child in node[2:]: _scan(child, True) return if head.name == "set!" and in_nested_fn: var = node[1].name if isinstance(node[1], Symbol) else str(node[1]) result.add(self._mangle(var)) for child in node: if isinstance(child, list): _scan(child, in_nested_fn) for b in body: _scan(b) return result def _emit_body_stmts(self, body: list, lines: list, indent: int) -> None: """Emit body expressions as statements into lines list. Handles let as local variable declarations, and returns the last expression. Control flow in tail position (if, cond, case, when) is flattened to if/elif statements with returns in each branch. Detects self-tail-recursive (define name (fn () ...)) followed by (name) and emits as while True loop instead of recursive def. """ pad = " " * indent idx = 0 while idx < len(body): expr = body[idx] is_last = (idx == len(body) - 1) if isinstance(expr, list) and expr and isinstance(expr[0], Symbol): name = expr[0].name if name in ("let", "let*"): self._emit_let_as_stmts(expr, lines, indent, is_last) idx += 1 continue if name in ("do", "begin"): sub_body = expr[1:] if is_last: self._emit_body_stmts(sub_body, lines, indent) else: for sub in sub_body: lines.append(self.emit_statement(sub, indent)) idx += 1 continue # Detect self-tail-recursive loop pattern: # (define loop-name (fn () body...)) # (loop-name) # Emit as: while True: if (name == "define" and not is_last and idx + 1 < len(body)): loop_info = self._detect_tail_loop(expr, body[idx + 1]) if loop_info: loop_name, fn_body = loop_info remaining = body[idx + 2:] # Only optimize if the function isn't called again later if not self._name_in_exprs(loop_name, remaining): self._emit_while_loop(loop_name, fn_body, lines, indent) # Skip the invocation; emit remaining body for j, rem in enumerate(remaining): if j == len(remaining) - 1: self._emit_return_expr(rem, lines, indent) else: self._emit_stmt_recursive(rem, lines, indent) return if is_last: self._emit_return_expr(expr, lines, indent) else: self._emit_stmt_recursive(expr, lines, indent) idx += 1 def _detect_tail_loop(self, define_expr, next_expr): """Detect pattern: (define name (fn () body...)) followed by (name). Returns (loop_name, fn_body) if tail-recursive, else None. The function must have 0 params and body must end with self-call in all tail positions. """ # Extract name and fn from define dname = define_expr[1].name if isinstance(define_expr[1], Symbol) else None if not dname: return None # Skip :effects annotation if (len(define_expr) >= 5 and isinstance(define_expr[2], Keyword) and define_expr[2].name == "effects"): val_expr = define_expr[4] else: val_expr = define_expr[2] if len(define_expr) > 2 else None if not (isinstance(val_expr, list) and val_expr and isinstance(val_expr[0], Symbol) and val_expr[0].name in ("fn", "lambda")): return None params = val_expr[1] if not isinstance(params, list) or len(params) != 0: return None # Must be 0-param function fn_body = val_expr[2:] # Check next expression is (name) — invocation if not (isinstance(next_expr, list) and len(next_expr) == 1 and isinstance(next_expr[0], Symbol) and next_expr[0].name == dname): return None # Check that fn_body has self-call in tail position(s) if not self._has_self_tail_call(fn_body, dname): return None return (dname, fn_body) def _has_self_tail_call(self, body, name): """Check if body is safe for while-loop optimization. Returns True only when ALL tail positions are either: - self-calls (name) → will become continue - nil/void returns → will become break - error() calls → raise, don't return - when blocks → implicit nil else is fine No tail position may return a computed value, since while-loop break discards return values. """ if not body: return False last = body[-1] # Non-list terminal: nil is ok, anything else is a value return if not isinstance(last, list) or not last: return (last is None or last is SX_NIL or (isinstance(last, Symbol) and last.name == "nil")) head = last[0] if isinstance(last[0], Symbol) else None if not head: return False # Direct self-call in tail position if head.name == name and len(last) == 1: return True # error() — raises, safe if head.name == "error": return True # if — ALL branches must be safe if head.name == "if": then_ok = self._has_self_tail_call( [last[2]] if len(last) > 2 else [None], name) else_ok = self._has_self_tail_call( [last[3]] if len(last) > 3 else [None], name) return then_ok and else_ok # do/begin — check last expression if head.name in ("do", "begin"): return self._has_self_tail_call(last[1:], name) # when — body must be safe (implicit nil else is ok) if head.name == "when": return self._has_self_tail_call(last[2:], name) # let/let* — check body (skip bindings) if head.name in ("let", "let*"): return self._has_self_tail_call(last[2:], name) # cond — ALL branches must be safe if head.name == "cond": clauses = last[1:] is_scheme = ( all(isinstance(c, list) and len(c) == 2 for c in clauses) and not any(isinstance(c, Keyword) for c in clauses) ) if is_scheme: for clause in clauses: if not self._has_self_tail_call([clause[1]], name): return False return True else: i = 0 while i < len(clauses) - 1: if not self._has_self_tail_call([clauses[i + 1]], name): return False i += 2 return True return False def _name_in_exprs(self, name, exprs): """Check if a symbol name appears anywhere in a list of expressions.""" for expr in exprs: if isinstance(expr, Symbol) and expr.name == name: return True if isinstance(expr, list): if self._name_in_exprs(name, expr): return True return False def _emit_while_loop(self, loop_name, fn_body, lines, indent): """Emit a self-tail-recursive function body as a while True loop.""" pad = " " * indent lines.append(f"{pad}while True:") # Track the loop name so _emit_return_expr can emit 'continue' old_loop = getattr(self, '_current_loop_name', None) self._current_loop_name = loop_name self._emit_body_stmts(fn_body, lines, indent + 1) self._current_loop_name = old_loop def _emit_nil_return(self, lines: list, indent: int) -> None: """Emit 'return NIL' or 'break' depending on while-loop context.""" pad = " " * indent if getattr(self, '_current_loop_name', None): lines.append(f"{pad}break") else: lines.append(f"{pad}return NIL") def _emit_return_expr(self, expr, lines: list, indent: int) -> None: """Emit an expression in return position, flattening control flow.""" pad = " " * indent # Inside a while loop (self-tail-recursive define optimization): # self-call → continue loop_name = getattr(self, '_current_loop_name', None) if loop_name: if (isinstance(expr, list) and len(expr) == 1 and isinstance(expr[0], Symbol) and expr[0].name == loop_name): lines.append(f"{pad}continue") return if isinstance(expr, list) and expr and isinstance(expr[0], Symbol): name = expr[0].name if name == "if": self._emit_if_return(expr, lines, indent) return if name == "cond": self._emit_cond_return(expr, lines, indent) return if name == "case": self._emit_case_return(expr, lines, indent) return if name == "when": self._emit_when_return(expr, lines, indent) return if name in ("let", "let*"): self._emit_let_as_stmts(expr, lines, indent, True) return if name in ("do", "begin"): self._emit_body_stmts(expr[1:], lines, indent) return if name == "for-each": # for-each in return position: emit as statement, then return/break lines.append(self._emit_for_each_stmt(expr, indent)) self._emit_nil_return(lines, indent) return if loop_name: emitted = self.emit(expr) if emitted != "NIL": lines.append(f"{pad}{emitted}") lines.append(f"{pad}break") else: lines.append(f"{pad}return {self.emit(expr)}") def _emit_if_return(self, expr, lines: list, indent: int) -> None: """Emit if as statement with returns in each branch.""" pad = " " * indent lines.append(f"{pad}if sx_truthy({self.emit(expr[1])}):") self._emit_return_expr(expr[2], lines, indent + 1) if len(expr) > 3: lines.append(f"{pad}else:") self._emit_return_expr(expr[3], lines, indent + 1) else: self._emit_nil_return(lines, indent) def _emit_when_return(self, expr, lines: list, indent: int) -> None: """Emit when as statement with return in body, else return NIL.""" pad = " " * indent lines.append(f"{pad}if sx_truthy({self.emit(expr[1])}):") body_parts = expr[2:] if len(body_parts) == 1: self._emit_return_expr(body_parts[0], lines, indent + 1) else: for b in body_parts[:-1]: lines.append(self.emit_statement(b, indent + 1)) self._emit_return_expr(body_parts[-1], lines, indent + 1) self._emit_nil_return(lines, indent) def _emit_cond_return(self, expr, lines: list, indent: int) -> None: """Emit cond as if/elif/else with returns in each branch.""" pad = " " * indent clauses = expr[1:] if not clauses: lines.append(f"{pad}return NIL") return is_scheme = ( all(isinstance(c, list) and len(c) == 2 for c in clauses) and not any(isinstance(c, Keyword) for c in clauses) ) has_else = False first_clause = True if is_scheme: for clause in clauses: test, body = clause[0], clause[1] if ((isinstance(test, Symbol) and test.name in ("else", ":else")) or (isinstance(test, Keyword) and test.name == "else")): lines.append(f"{pad}else:") has_else = True else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):") first_clause = False self._emit_return_expr(body, lines, indent + 1) else: i = 0 while i < len(clauses) - 1: test, body = clauses[i], clauses[i + 1] if ((isinstance(test, Keyword) and test.name == "else") or (isinstance(test, Symbol) and test.name in ("else", ":else"))): lines.append(f"{pad}else:") has_else = True else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):") first_clause = False self._emit_return_expr(body, lines, indent + 1) i += 2 if not has_else: self._emit_nil_return(lines, indent) def _emit_case_return(self, expr, lines: list, indent: int) -> None: """Emit case as if/elif/else with returns in each branch.""" pad = " " * indent match_val = self.emit(expr[1]) clauses = expr[2:] lines.append(f"{pad}_match = {match_val}") has_else = False first_clause = True i = 0 while i < len(clauses) - 1: test = clauses[i] body = clauses[i + 1] if ((isinstance(test, Keyword) and test.name == "else") or (isinstance(test, Symbol) and test.name in ("else", ":else"))): lines.append(f"{pad}else:") has_else = True else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} _match == {self.emit(test)}:") first_clause = False self._emit_return_expr(body, lines, indent + 1) i += 2 if not has_else: self._emit_nil_return(lines, indent) def _emit_let_as_stmts(self, expr, lines: list, indent: int, is_last: bool) -> None: """Emit a let expression as local variable declarations.""" pad = " " * indent bindings = expr[1] body = expr[2:] cell_vars = getattr(self, '_current_cell_vars', set()) if isinstance(bindings, list): if bindings and isinstance(bindings[0], list): # Scheme-style: ((name val) ...) for b in bindings: vname = b[0].name if isinstance(b[0], Symbol) else str(b[0]) mangled = self._mangle(vname) if mangled in cell_vars: lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(b[1])}") else: lines.append(f"{pad}{mangled} = {self.emit(b[1])}") else: # Clojure-style: (name val name val ...) for j in range(0, len(bindings), 2): vname = bindings[j].name if isinstance(bindings[j], Symbol) else str(bindings[j]) mangled = self._mangle(vname) if mangled in cell_vars: lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(bindings[j + 1])}") else: lines.append(f"{pad}{mangled} = {self.emit(bindings[j + 1])}") if is_last: self._emit_body_stmts(body, lines, indent) else: for b in body: self._emit_stmt_recursive(b, lines, indent) def _emit_for_each_stmt(self, expr, indent: int = 0) -> str: pad = " " * indent fn_expr = expr[1] coll_expr = expr[2] coll = self.emit(coll_expr) # If fn is an inline lambda, emit a for loop if isinstance(fn_expr, list) and isinstance(fn_expr[0], Symbol) and fn_expr[0].name == "fn": params = fn_expr[1] body = fn_expr[2:] p = self._extract_param_name(params[0]) p_py = self._mangle(p) lines = [f"{pad}for {p_py} in {coll}:"] # Emit body as statements with proper let/set! handling self._emit_loop_body(body, lines, indent + 1) return "\n".join(lines) fn = self.emit(fn_expr) return f"{pad}for _item in {coll}:\n{pad} {fn}(_item)" def _emit_loop_body(self, body: list, lines: list, indent: int) -> None: """Emit loop body as statements. Handles let, when, set!, cond properly.""" pad = " " * indent for expr in body: self._emit_stmt_recursive(expr, lines, indent) def _emit_stmt_recursive(self, expr, lines: list, indent: int) -> None: """Emit an expression as statement(s), recursing into control flow.""" pad = " " * indent if not isinstance(expr, list) or not expr: lines.append(self.emit_statement(expr, indent)) return head = expr[0] if not isinstance(head, Symbol): lines.append(self.emit_statement(expr, indent)) return name = head.name if name == "set!": varname = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) mangled = self._mangle(varname) cell_vars = getattr(self, '_current_cell_vars', set()) if mangled in cell_vars: lines.append(f"{pad}_cells[{self._py_string(mangled)}] = {self.emit(expr[2])}") else: lines.append(f"{pad}{mangled} = {self.emit(expr[2])}") elif name in ("let", "let*"): self._emit_let_as_stmts(expr, lines, indent, False) elif name == "when": cond = self.emit(expr[1]) lines.append(f"{pad}if sx_truthy({cond}):") for b in expr[2:]: self._emit_stmt_recursive(b, lines, indent + 1) elif name == "cond": self._emit_cond_stmt(expr, lines, indent) elif name in ("do", "begin"): for b in expr[1:]: self._emit_stmt_recursive(b, lines, indent) elif name == "if": cond = self.emit(expr[1]) lines.append(f"{pad}if sx_truthy({cond}):") self._emit_stmt_recursive(expr[2], lines, indent + 1) if len(expr) > 3: lines.append(f"{pad}else:") self._emit_stmt_recursive(expr[3], lines, indent + 1) elif name == "append!": lines.append(f"{pad}{self.emit(expr[1])}.append({self.emit(expr[2])})") elif name == "dict-set!": lines.append(f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}") elif name == "env-set!": lines.append(f"{pad}{self.emit(expr[1])}[{self.emit(expr[2])}] = {self.emit(expr[3])}") else: lines.append(self.emit_statement(expr, indent)) def _emit_cond_stmt(self, expr, lines: list, indent: int) -> None: """Emit cond as if/elif/else chain.""" pad = " " * indent clauses = expr[1:] # Detect scheme vs clojure style is_scheme = ( all(isinstance(c, list) and len(c) == 2 for c in clauses) and not any(isinstance(c, Keyword) for c in clauses) ) first_clause = True if is_scheme: for clause in clauses: test, body = clause[0], clause[1] if isinstance(test, Symbol) and test.name in ("else", ":else"): lines.append(f"{pad}else:") elif isinstance(test, Keyword) and test.name == "else": lines.append(f"{pad}else:") else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):") first_clause = False self._emit_stmt_recursive(body, lines, indent + 1) else: i = 0 while i < len(clauses) - 1: test, body = clauses[i], clauses[i + 1] if isinstance(test, Keyword) and test.name == "else": lines.append(f"{pad}else:") elif isinstance(test, Symbol) and test.name in ("else", ":else"): lines.append(f"{pad}else:") else: kw = "if" if first_clause else "elif" lines.append(f"{pad}{kw} sx_truthy({self.emit(test)}):") first_clause = False self._emit_stmt_recursive(body, lines, indent + 1) i += 2 def _emit_quote(self, expr) -> str: """Emit a quoted expression as a Python literal AST.""" if isinstance(expr, bool): return "True" if expr else "False" if isinstance(expr, (int, float)): return str(expr) if isinstance(expr, str): return self._py_string(expr) if expr is None or expr is SX_NIL: return "NIL" if isinstance(expr, Symbol): return f"Symbol({self._py_string(expr.name)})" if isinstance(expr, Keyword): return f"Keyword({self._py_string(expr.name)})" if isinstance(expr, list): return "[" + ", ".join(self._emit_quote(x) for x in expr) + "]" return str(expr) def _py_string(self, s: str) -> str: return repr(s) # --------------------------------------------------------------------------- # Bootstrap compiler # --------------------------------------------------------------------------- def extract_defines(source: str) -> list[tuple[str, list]]: """Parse .sx source, return list of (name, define-expr) for top-level defines. Extracts both (define ...) and (define-async ...) forms. """ exprs = parse_all(source) defines = [] for expr in exprs: if isinstance(expr, list) and expr and isinstance(expr[0], Symbol): if expr[0].name in ("define", "define-async"): name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) defines.append((name, expr)) return defines # Build config and static platform sections — canonical source is platform_py.py try: from .platform_py import ( PREAMBLE, PLATFORM_PY, PRIMITIVES_PY_PRE, PRIMITIVES_PY_POST, PRIMITIVES_PY_MODULES, _ALL_PY_MODULES, PLATFORM_PARSER_PY, PLATFORM_DEPS_PY, PLATFORM_CEK_PY, CEK_FIXUPS_PY, PLATFORM_ASYNC_PY, FIXUPS_PY, CONTINUATIONS_PY, _assemble_primitives_py, public_api_py, ADAPTER_FILES, SPEC_MODULES, SPEC_MODULE_ORDER, EXTENSION_NAMES, EXTENSION_FORMS, ) except ImportError: from hosts.python.platform import ( PREAMBLE, PLATFORM_PY, PRIMITIVES_PY_PRE, PRIMITIVES_PY_POST, PRIMITIVES_PY_MODULES, _ALL_PY_MODULES, PLATFORM_PARSER_PY, PLATFORM_DEPS_PY, PLATFORM_CEK_PY, CEK_FIXUPS_PY, PLATFORM_ASYNC_PY, FIXUPS_PY, CONTINUATIONS_PY, _assemble_primitives_py, public_api_py, ADAPTER_FILES, SPEC_MODULES, SPEC_MODULE_ORDER, EXTENSION_NAMES, EXTENSION_FORMS, ) def _parse_special_forms_spec(ref_dir: str, source_dirs=None) -> set[str]: """Parse special-forms.sx to extract declared form names.""" filepath = None if source_dirs: for d in source_dirs: p = os.path.join(d, "special-forms.sx") if os.path.exists(p): filepath = p break if not filepath: filepath = os.path.join(ref_dir, "special-forms.sx") if not os.path.exists(filepath): return set() with open(filepath) as f: src = f.read() names = set() for expr in parse_all(src): if (isinstance(expr, list) and len(expr) >= 2 and isinstance(expr[0], Symbol) and expr[0].name == "define-special-form" and isinstance(expr[1], str)): names.add(expr[1]) return names def _extract_eval_dispatch_names(all_sections: list) -> set[str]: """Extract special form names dispatched in eval-list from transpiled sections.""" names = set() for _label, defines in all_sections: for name, _expr in defines: if name.startswith("sf-"): form = name[3:] if form in ("cond-scheme", "cond-clojure", "case-loop"): continue names.add(form) if name.startswith("ho-"): form = name[3:] names.add(form) return names def _validate_special_forms(ref_dir: str, all_sections: list, has_continuations: bool, source_dirs=None) -> None: """Cross-check special-forms.sx against eval.sx dispatch. Warn on mismatches.""" spec_names = _parse_special_forms_spec(ref_dir, source_dirs=source_dirs) if not spec_names: return dispatch_names = _extract_eval_dispatch_names(all_sections) if has_continuations: dispatch_names |= EXTENSION_FORMS["continuations"] name_aliases = { "thread-first": "->", "every": "every?", "set-bang": "set!", } normalized_dispatch = set() for n in dispatch_names: normalized_dispatch.add(name_aliases.get(n, n)) internal = {"named-let"} normalized_dispatch -= internal undispatched = spec_names - normalized_dispatch ignore = {"fn", "let*", "do", "defrelation"} undispatched -= ignore unspecced = normalized_dispatch - spec_names unspecced -= ignore if undispatched: import sys print(f"# WARNING: special-forms.sx declares forms not in eval.sx: " f"{', '.join(sorted(undispatched))}", file=sys.stderr) if unspecced: import sys print(f"# WARNING: eval.sx dispatches forms not in special-forms.sx: " f"{', '.join(sorted(unspecced))}", file=sys.stderr) def compile_ref_to_py( adapters: list[str] | None = None, modules: list[str] | None = None, extensions: list[str] | None = None, spec_modules: list[str] | None = None, ) -> str: """Read reference .sx files and emit Python. Args: adapters: List of adapter names to include. Valid names: parser, html, sx. None = include all server-side adapters. modules: List of primitive module names to include. core.* are always included. stdlib.* are opt-in. None = include all modules (backward compatible). extensions: List of optional extensions to include. Valid names: continuations. None = no extensions. spec_modules: List of spec module names to include. Valid names: deps, engine. None = no spec modules. """ # Determine which primitive modules to include prim_modules = None # None = all if modules is not None: prim_modules = [m for m in _ALL_PY_MODULES if m.startswith("core.")] for m in modules: if m not in prim_modules: if m not in PRIMITIVES_PY_MODULES: raise ValueError(f"Unknown module: {m!r}. Valid: {', '.join(PRIMITIVES_PY_MODULES)}") prim_modules.append(m) ref_dir = os.path.join(os.path.abspath(os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", "..")), "shared", "sx", "ref") _project = os.path.abspath(os.path.join(ref_dir, "..", "..", "..")) _source_dirs = [ os.path.join(_project, "spec"), os.path.join(_project, "web"), ref_dir, ] def _find_sx(filename): for d in _source_dirs: p = os.path.join(d, filename) if os.path.exists(p): return p return None emitter = PyEmitter() # Resolve adapter set if adapters is None: adapter_set = set(ADAPTER_FILES.keys()) else: adapter_set = set() for a in adapters: if a not in ADAPTER_FILES: raise ValueError(f"Unknown adapter: {a!r}. Valid: {', '.join(ADAPTER_FILES)}") adapter_set.add(a) # Resolve spec modules spec_mod_set = set() if spec_modules: for sm in spec_modules: if sm not in SPEC_MODULES: raise ValueError(f"Unknown spec module: {sm!r}. Valid: {', '.join(SPEC_MODULES)}") spec_mod_set.add(sm) # html adapter needs deps (component analysis), signals (island rendering), # router (URL-to-expression evaluation), and page-helpers if "html" in adapter_set: if "deps" in SPEC_MODULES: spec_mod_set.add("deps") if "signals" in SPEC_MODULES: spec_mod_set.add("signals") if "page-helpers" in SPEC_MODULES: spec_mod_set.add("page-helpers") if "router" in SPEC_MODULES: spec_mod_set.add("router") # CEK is always included (part of evaluator.sx core file) has_cek = True has_deps = "deps" in spec_mod_set # Core files always included, then selected adapters, then spec modules # evaluator.sx = merged frames + eval utilities + CEK machine sx_files = [ ("evaluator.sx", "evaluator (frames + eval + CEK)"), ("forms.sx", "forms (server definition forms)"), ("render.sx", "render (core)"), ] # Parser before html/sx — provides serialize used by adapters if "parser" in adapter_set: sx_files.append(ADAPTER_FILES["parser"]) for name in ("html", "sx"): if name in adapter_set: sx_files.append(ADAPTER_FILES[name]) # Use explicit ordering for spec modules (respects dependencies) for name in SPEC_MODULE_ORDER: if name in spec_mod_set: sx_files.append(SPEC_MODULES[name]) # Any spec modules not in the order list (future-proofing) for name in sorted(spec_mod_set): if name not in SPEC_MODULE_ORDER: sx_files.append(SPEC_MODULES[name]) # Pre-scan define-async names (needed before transpilation so emitter # knows which calls require 'await') has_async = "async" in adapter_set if has_async: async_filename = ADAPTER_FILES["async"][0] async_filepath = _find_sx(async_filename) or os.path.join(ref_dir, async_filename) if os.path.exists(async_filepath): with open(async_filepath) as f: async_src = f.read() for aexpr in parse_all(async_src): if (isinstance(aexpr, list) and aexpr and isinstance(aexpr[0], Symbol) and aexpr[0].name == "define-async"): aname = aexpr[1].name if isinstance(aexpr[1], Symbol) else str(aexpr[1]) emitter._async_names.add(aname) # Platform async primitives (provided by host, also need await) emitter._async_names.update({ "async-eval", "execute-io", "async-await!", }) # Async adapter is transpiled last (after sync adapters) sx_files.append(ADAPTER_FILES["async"]) all_sections = [] for filename, label in sx_files: filepath = _find_sx(filename) or os.path.join(ref_dir, filename) if not os.path.exists(filepath): continue with open(filepath) as f: src = f.read() defines = extract_defines(src) all_sections.append((label, defines)) # Resolve extensions ext_set = set() if extensions: for e in extensions: if e not in EXTENSION_NAMES: raise ValueError(f"Unknown extension: {e!r}. Valid: {', '.join(EXTENSION_NAMES)}") ext_set.add(e) has_continuations = "continuations" in ext_set # Validate special forms _validate_special_forms(ref_dir, all_sections, has_continuations, source_dirs=_source_dirs) # Build output has_html = "html" in adapter_set has_sx = "sx" in adapter_set has_parser = "parser" in adapter_set parts = [] parts.append(PREAMBLE) parts.append(PLATFORM_PY) parts.append(PRIMITIVES_PY_PRE) parts.append(_assemble_primitives_py(prim_modules)) parts.append(PRIMITIVES_PY_POST) if has_parser: parts.append(PLATFORM_PARSER_PY) if has_deps: parts.append(PLATFORM_DEPS_PY) if has_cek: parts.append(PLATFORM_CEK_PY) if has_async: parts.append(PLATFORM_ASYNC_PY) for label, defines in all_sections: parts.append(f"\n# === Transpiled from {label} ===\n") for name, expr in defines: parts.append(f"# {name}") parts.append(emitter.emit_statement(expr)) parts.append("") parts.append(FIXUPS_PY) if has_cek: parts.append(CEK_FIXUPS_PY) if has_continuations: parts.append(CONTINUATIONS_PY) parts.append(public_api_py(has_html, has_sx, has_deps, has_async)) return "\n".join(parts) # NOTE: Static platform sections (PREAMBLE, PLATFORM_PY, PRIMITIVES_*, etc.) # are now imported from platform_py.py above. Do not redefine them here. # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main(): import argparse parser = argparse.ArgumentParser(description="Bootstrap SX spec -> Python") parser.add_argument( "--adapters", default=None, help="Comma-separated adapter names (html,sx). Default: all server-side.", ) parser.add_argument( "--modules", default=None, help="Comma-separated primitive modules (core.* always included). Default: all.", ) parser.add_argument( "--extensions", default=None, help="Comma-separated extensions (continuations). Default: none.", ) parser.add_argument( "--spec-modules", default=None, help="Comma-separated spec modules (deps,engine). Default: none.", ) args = parser.parse_args() adapters = args.adapters.split(",") if args.adapters else None modules = args.modules.split(",") if args.modules else None extensions = args.extensions.split(",") if args.extensions else None spec_modules = args.spec_modules.split(",") if args.spec_modules else None print(compile_ref_to_py(adapters, modules, extensions, spec_modules)) if __name__ == "__main__": main()